u32 ring_datasize; /* < ring_size */
u32 ring_data_startoffset;
+ u32 priv_write_index;
+ u32 priv_read_index;
};
/*
*read = dsize - *write;
}
+static inline u32 hv_get_bytes_to_read(struct hv_ring_buffer_info *rbi)
+{
+ u32 read_loc, write_loc, dsize, read;
+
+ dsize = rbi->ring_datasize;
+ read_loc = rbi->ring_buffer->read_index;
+ write_loc = READ_ONCE(rbi->ring_buffer->write_index);
+
+ read = write_loc >= read_loc ? (write_loc - read_loc) :
+ (dsize - read_loc) + write_loc;
+
+ return read;
+}
+
+static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi)
+{
+ u32 read_loc, write_loc, dsize, write;
+
+ dsize = rbi->ring_datasize;
+ read_loc = READ_ONCE(rbi->ring_buffer->read_index);
+ write_loc = rbi->ring_buffer->write_index;
+
+ write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
+ read_loc - write_loc;
+ return write;
+}
+
/*
* VMBUS version is 32 bit entity broken up into
* two 16 bit quantities: major_number. minor_number.
HV_SIGNAL_POLICY_EXPLICIT,
};
+enum hv_numa_policy {
+ HV_BALANCED = 0,
+ HV_LOCALIZED,
+};
+
enum vmbus_device_type {
HV_IDE = 0,
HV_SCSI,
};
struct vmbus_channel {
- /* Unique channel id */
- int id;
-
struct list_head listentry;
struct hv_device *device_obj;
* ring lock to preserve the current behavior.
*/
bool acquire_ring_lock;
+ /*
+ * For performance critical channels (storage, networking
+ * etc,), Hyper-V has a mechanism to enhance the throughput
+ * at the expense of latency:
+ * When the host is to be signaled, we just set a bit in a shared page
+ * and this bit will be inspected by the hypervisor within a certain
+ * window and if the bit is set, the host will be signaled. The window
+ * of time is the monitor latency - currently around 100 usecs. This
+ * mechanism improves throughput by:
+ *
+ * A) Making the host more efficient - each time it wakes up,
+ * potentially it will process morev number of packets. The
+ * monitor latency allows a batch to build up.
+ * B) By deferring the hypercall to signal, we will also minimize
+ * the interrupts.
+ *
+ * Clearly, these optimizations improve throughput at the expense of
+ * latency. Furthermore, since the channel is shared for both
+ * control and data messages, control messages currently suffer
+ * unnecessary latency adversley impacting performance and boot
+ * time. To fix this issue, permit tagging the channel as being
+ * in "low latency" mode. In this mode, we will bypass the monitor
+ * mechanism.
+ */
+ bool low_latency;
+
+ /*
+ * NUMA distribution policy:
+ * We support teo policies:
+ * 1) Balanced: Here all performance critical channels are
+ * distributed evenly amongst all the NUMA nodes.
+ * This policy will be the default policy.
+ * 2) Localized: All channels of a given instance of a
+ * performance critical service will be assigned CPUs
+ * within a selected NUMA node.
+ */
+ enum hv_numa_policy affinity_policy;
};
c->signal_policy = policy;
}
+static inline void set_channel_affinity_state(struct vmbus_channel *c,
+ enum hv_numa_policy policy)
+{
+ c->affinity_policy = policy;
+}
+
static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
{
c->batched_reading = state;
c->outbound.ring_buffer->pending_send_sz = size;
}
+static inline void set_low_latency_mode(struct vmbus_channel *c)
+{
+ c->low_latency = true;
+}
+
+static inline void clear_low_latency_mode(struct vmbus_channel *c)
+{
+ c->low_latency = false;
+}
+
void vmbus_onmessage(void *context);
int vmbus_request_offers(void);
resource_size_t min, resource_size_t max,
resource_size_t size, resource_size_t align,
bool fb_overlap_ok);
-
+void vmbus_free_mmio(resource_size_t start, resource_size_t size);
int vmbus_cpu_number_to_vp_number(int cpu_number);
u64 hv_do_hypercall(u64 control, void *input, void *output);
.guid = UUID_LE(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \
0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f)
+/*
+ * Linux doesn't support the 3 devices: the first two are for
+ * Automatic Virtual Machine Activation, and the third is for
+ * Remote Desktop Virtualization.
+ * {f8e65716-3cb3-4a06-9a60-1889c5cccab5}
+ * {3375baf4-9e15-4b30-b765-67acb10d607b}
+ * {276aacf4-ac15-426c-98dd-7521ad3f01fe}
+ */
+
+#define HV_AVMA1_GUID \
+ .guid = UUID_LE(0xf8e65716, 0x3cb3, 0x4a06, 0x9a, 0x60, \
+ 0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5)
+
+#define HV_AVMA2_GUID \
+ .guid = UUID_LE(0x3375baf4, 0x9e15, 0x4b30, 0xb7, 0x65, \
+ 0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b)
+
+#define HV_RDV_GUID \
+ .guid = UUID_LE(0x276aacf4, 0xac15, 0x426c, 0x98, 0xdd, \
+ 0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe)
+
/*
* Common header for Hyper-V ICs
*/
u8 flags;
} __packed;
+struct ictimesync_ref_data {
+ u64 parenttime;
+ u64 vmreferencetime;
+ u8 flags;
+ char leapflags;
+ char stratum;
+ u8 reserved[3];
+} __packed;
+
struct hyperv_service_callback {
u8 msg_type;
char *log_msg;
struct icmsg_negotiate *, u8 *, int,
int);
+void hv_event_tasklet_disable(struct vmbus_channel *channel);
+void hv_event_tasklet_enable(struct vmbus_channel *channel);
+
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
/*
int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
const uuid_le *shv_host_servie_id);
+void vmbus_set_event(struct vmbus_channel *channel);
+
+/* Get the start of the ring buffer. */
+static inline void *
+hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
+{
+ return (void *)ring_info->ring_buffer->buffer;
+}
+
+/*
+ * To optimize the flow management on the send-side,
+ * when the sender is blocked because of lack of
+ * sufficient space in the ring buffer, potential the
+ * consumer of the ring buffer can signal the producer.
+ * This is controlled by the following parameters:
+ *
+ * 1. pending_send_sz: This is the size in bytes that the
+ * producer is trying to send.
+ * 2. The feature bit feat_pending_send_sz set to indicate if
+ * the consumer of the ring will signal when the ring
+ * state transitions from being full to a state where
+ * there is room for the producer to send the pending packet.
+ */
+
+static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi)
+{
+ u32 cur_write_sz;
+ u32 pending_sz;
+
+ /*
+ * Issue a full memory barrier before making the signaling decision.
+ * Here is the reason for having this barrier:
+ * If the reading of the pend_sz (in this function)
+ * were to be reordered and read before we commit the new read
+ * index (in the calling function) we could
+ * have a problem. If the host were to set the pending_sz after we
+ * have sampled pending_sz and go to sleep before we commit the
+ * read index, we could miss sending the interrupt. Issue a full
+ * memory barrier to address this.
+ */
+ virt_mb();
+
+ pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
+ /* If the other end is not blocked on write don't bother. */
+ if (pending_sz == 0)
+ return false;
+
+ cur_write_sz = hv_get_bytes_to_write(rbi);
+
+ if (cur_write_sz >= pending_sz)
+ return true;
+
+ return false;
+}
+
+/*
+ * An API to support in-place processing of incoming VMBUS packets.
+ */
+#define VMBUS_PKT_TRAILER 8
+
+static inline struct vmpacket_descriptor *
+get_next_pkt_raw(struct vmbus_channel *channel)
+{
+ struct hv_ring_buffer_info *ring_info = &channel->inbound;
+ u32 read_loc = ring_info->priv_read_index;
+ void *ring_buffer = hv_get_ring_buffer(ring_info);
+ struct vmpacket_descriptor *cur_desc;
+ u32 packetlen;
+ u32 dsize = ring_info->ring_datasize;
+ u32 delta = read_loc - ring_info->ring_buffer->read_index;
+ u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta);
+
+ if (bytes_avail_toread < sizeof(struct vmpacket_descriptor))
+ return NULL;
+
+ if ((read_loc + sizeof(*cur_desc)) > dsize)
+ return NULL;
+
+ cur_desc = ring_buffer + read_loc;
+ packetlen = cur_desc->len8 << 3;
+
+ /*
+ * If the packet under consideration is wrapping around,
+ * return failure.
+ */
+ if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1))
+ return NULL;
+
+ return cur_desc;
+}
+
+/*
+ * A helper function to step through packets "in-place"
+ * This API is to be called after each successful call
+ * get_next_pkt_raw().
+ */
+static inline void put_pkt_raw(struct vmbus_channel *channel,
+ struct vmpacket_descriptor *desc)
+{
+ struct hv_ring_buffer_info *ring_info = &channel->inbound;
+ u32 read_loc = ring_info->priv_read_index;
+ u32 packetlen = desc->len8 << 3;
+ u32 dsize = ring_info->ring_datasize;
+
+ if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize)
+ BUG();
+ /*
+ * Include the packet trailer.
+ */
+ ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
+}
+
+/*
+ * This call commits the read index and potentially signals the host.
+ * Here is the pattern for using the "in-place" consumption APIs:
+ *
+ * while (get_next_pkt_raw() {
+ * process the packet "in-place";
+ * put_pkt_raw();
+ * }
+ * if (packets processed in place)
+ * commit_rd_index();
+ */
+static inline void commit_rd_index(struct vmbus_channel *channel)
+{
+ struct hv_ring_buffer_info *ring_info = &channel->inbound;
+ /*
+ * Make sure all reads are done before we update the read index since
+ * the writer may start writing to the read area once the read index
+ * is updated.
+ */
+ virt_rmb();
+ ring_info->ring_buffer->read_index = ring_info->priv_read_index;
+
+ if (hv_need_to_signal_on_read(ring_info))
+ vmbus_set_event(channel);
+}
+
+
#endif /* _HYPERV_H */