X-Git-Url: http://git.cascardo.info/?a=blobdiff_plain;f=include%2Flinux%2Fhyperv.h;h=cd184bdca58fdc28e0ae59432445248c588fd2fc;hb=5fdf4939dc66307daf30a3d5355a2bfb9d207676;hp=aa0fadce9308c3d03a090e84dced8fdde2afe5fa;hpb=6ea24cf79e055f0a62a64baa8587e2254a493c7b;p=cascardo%2Flinux.git diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index aa0fadce9308..cd184bdca58f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -126,6 +126,8 @@ struct hv_ring_buffer_info { u32 ring_datasize; /* < ring_size */ u32 ring_data_startoffset; + u32 priv_write_index; + u32 priv_read_index; }; /* @@ -151,6 +153,33 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi, *read = dsize - *write; } +static inline u32 hv_get_bytes_to_read(struct hv_ring_buffer_info *rbi) +{ + u32 read_loc, write_loc, dsize, read; + + dsize = rbi->ring_datasize; + read_loc = rbi->ring_buffer->read_index; + write_loc = READ_ONCE(rbi->ring_buffer->write_index); + + read = write_loc >= read_loc ? (write_loc - read_loc) : + (dsize - read_loc) + write_loc; + + return read; +} + +static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi) +{ + u32 read_loc, write_loc, dsize, write; + + dsize = rbi->ring_datasize; + read_loc = READ_ONCE(rbi->ring_buffer->read_index); + write_loc = rbi->ring_buffer->write_index; + + write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : + read_loc - write_loc; + return write; +} + /* * VMBUS version is 32 bit entity broken up into * two 16 bit quantities: major_number. minor_number. @@ -645,6 +674,11 @@ enum hv_signal_policy { HV_SIGNAL_POLICY_EXPLICIT, }; +enum hv_numa_policy { + HV_BALANCED = 0, + HV_LOCALIZED, +}; + enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -672,9 +706,6 @@ struct vmbus_device { }; struct vmbus_channel { - /* Unique channel id */ - int id; - struct list_head listentry; struct hv_device *device_obj; @@ -821,6 +852,43 @@ struct vmbus_channel { * ring lock to preserve the current behavior. */ bool acquire_ring_lock; + /* + * For performance critical channels (storage, networking + * etc,), Hyper-V has a mechanism to enhance the throughput + * at the expense of latency: + * When the host is to be signaled, we just set a bit in a shared page + * and this bit will be inspected by the hypervisor within a certain + * window and if the bit is set, the host will be signaled. The window + * of time is the monitor latency - currently around 100 usecs. This + * mechanism improves throughput by: + * + * A) Making the host more efficient - each time it wakes up, + * potentially it will process morev number of packets. The + * monitor latency allows a batch to build up. + * B) By deferring the hypercall to signal, we will also minimize + * the interrupts. + * + * Clearly, these optimizations improve throughput at the expense of + * latency. Furthermore, since the channel is shared for both + * control and data messages, control messages currently suffer + * unnecessary latency adversley impacting performance and boot + * time. To fix this issue, permit tagging the channel as being + * in "low latency" mode. In this mode, we will bypass the monitor + * mechanism. + */ + bool low_latency; + + /* + * NUMA distribution policy: + * We support teo policies: + * 1) Balanced: Here all performance critical channels are + * distributed evenly amongst all the NUMA nodes. + * This policy will be the default policy. + * 2) Localized: All channels of a given instance of a + * performance critical service will be assigned CPUs + * within a selected NUMA node. + */ + enum hv_numa_policy affinity_policy; }; @@ -841,6 +909,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c, c->signal_policy = policy; } +static inline void set_channel_affinity_state(struct vmbus_channel *c, + enum hv_numa_policy policy) +{ + c->affinity_policy = policy; +} + static inline void set_channel_read_state(struct vmbus_channel *c, bool state) { c->batched_reading = state; @@ -862,6 +936,16 @@ static inline void set_channel_pending_send_size(struct vmbus_channel *c, c->outbound.ring_buffer->pending_send_sz = size; } +static inline void set_low_latency_mode(struct vmbus_channel *c) +{ + c->low_latency = true; +} + +static inline void clear_low_latency_mode(struct vmbus_channel *c) +{ + c->low_latency = false; +} + void vmbus_onmessage(void *context); int vmbus_request_offers(void); @@ -1091,7 +1175,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, resource_size_t min, resource_size_t max, resource_size_t size, resource_size_t align, bool fb_overlap_ok); - +void vmbus_free_mmio(resource_size_t start, resource_size_t size); int vmbus_cpu_number_to_vp_number(int cpu_number); u64 hv_do_hypercall(u64 control, void *input, void *output); @@ -1227,6 +1311,27 @@ u64 hv_do_hypercall(u64 control, void *input, void *output); .guid = UUID_LE(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \ 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f) +/* + * Linux doesn't support the 3 devices: the first two are for + * Automatic Virtual Machine Activation, and the third is for + * Remote Desktop Virtualization. + * {f8e65716-3cb3-4a06-9a60-1889c5cccab5} + * {3375baf4-9e15-4b30-b765-67acb10d607b} + * {276aacf4-ac15-426c-98dd-7521ad3f01fe} + */ + +#define HV_AVMA1_GUID \ + .guid = UUID_LE(0xf8e65716, 0x3cb3, 0x4a06, 0x9a, 0x60, \ + 0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5) + +#define HV_AVMA2_GUID \ + .guid = UUID_LE(0x3375baf4, 0x9e15, 0x4b30, 0xb7, 0x65, \ + 0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b) + +#define HV_RDV_GUID \ + .guid = UUID_LE(0x276aacf4, 0xac15, 0x426c, 0x98, 0xdd, \ + 0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe) + /* * Common header for Hyper-V ICs */ @@ -1315,6 +1420,15 @@ struct ictimesync_data { u8 flags; } __packed; +struct ictimesync_ref_data { + u64 parenttime; + u64 vmreferencetime; + u8 flags; + char leapflags; + char stratum; + u8 reserved[3]; +} __packed; + struct hyperv_service_callback { u8 msg_type; char *log_msg; @@ -1328,6 +1442,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, struct icmsg_negotiate *, u8 *, int, int); +void hv_event_tasklet_disable(struct vmbus_channel *channel); +void hv_event_tasklet_enable(struct vmbus_channel *channel); + void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); /* @@ -1338,4 +1455,143 @@ extern __u32 vmbus_proto_version; int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, const uuid_le *shv_host_servie_id); +void vmbus_set_event(struct vmbus_channel *channel); + +/* Get the start of the ring buffer. */ +static inline void * +hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info) +{ + return (void *)ring_info->ring_buffer->buffer; +} + +/* + * To optimize the flow management on the send-side, + * when the sender is blocked because of lack of + * sufficient space in the ring buffer, potential the + * consumer of the ring buffer can signal the producer. + * This is controlled by the following parameters: + * + * 1. pending_send_sz: This is the size in bytes that the + * producer is trying to send. + * 2. The feature bit feat_pending_send_sz set to indicate if + * the consumer of the ring will signal when the ring + * state transitions from being full to a state where + * there is room for the producer to send the pending packet. + */ + +static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) +{ + u32 cur_write_sz; + u32 pending_sz; + + /* + * Issue a full memory barrier before making the signaling decision. + * Here is the reason for having this barrier: + * If the reading of the pend_sz (in this function) + * were to be reordered and read before we commit the new read + * index (in the calling function) we could + * have a problem. If the host were to set the pending_sz after we + * have sampled pending_sz and go to sleep before we commit the + * read index, we could miss sending the interrupt. Issue a full + * memory barrier to address this. + */ + virt_mb(); + + pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); + /* If the other end is not blocked on write don't bother. */ + if (pending_sz == 0) + return false; + + cur_write_sz = hv_get_bytes_to_write(rbi); + + if (cur_write_sz >= pending_sz) + return true; + + return false; +} + +/* + * An API to support in-place processing of incoming VMBUS packets. + */ +#define VMBUS_PKT_TRAILER 8 + +static inline struct vmpacket_descriptor * +get_next_pkt_raw(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + void *ring_buffer = hv_get_ring_buffer(ring_info); + struct vmpacket_descriptor *cur_desc; + u32 packetlen; + u32 dsize = ring_info->ring_datasize; + u32 delta = read_loc - ring_info->ring_buffer->read_index; + u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); + + if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) + return NULL; + + if ((read_loc + sizeof(*cur_desc)) > dsize) + return NULL; + + cur_desc = ring_buffer + read_loc; + packetlen = cur_desc->len8 << 3; + + /* + * If the packet under consideration is wrapping around, + * return failure. + */ + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) + return NULL; + + return cur_desc; +} + +/* + * A helper function to step through packets "in-place" + * This API is to be called after each successful call + * get_next_pkt_raw(). + */ +static inline void put_pkt_raw(struct vmbus_channel *channel, + struct vmpacket_descriptor *desc) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + u32 packetlen = desc->len8 << 3; + u32 dsize = ring_info->ring_datasize; + + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) + BUG(); + /* + * Include the packet trailer. + */ + ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; +} + +/* + * This call commits the read index and potentially signals the host. + * Here is the pattern for using the "in-place" consumption APIs: + * + * while (get_next_pkt_raw() { + * process the packet "in-place"; + * put_pkt_raw(); + * } + * if (packets processed in place) + * commit_rd_index(); + */ +static inline void commit_rd_index(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + /* + * Make sure all reads are done before we update the read index since + * the writer may start writing to the read area once the read index + * is updated. + */ + virt_rmb(); + ring_info->ring_buffer->read_index = ring_info->priv_read_index; + + if (hv_need_to_signal_on_read(ring_info)) + vmbus_set_event(channel); +} + + #endif /* _HYPERV_H */