Line data Source code
1 : /*
2 : *------------------------------------------------------------------
3 : * vhost-user-output
4 : *
5 : * Copyright (c) 2014-2018 Cisco and/or its affiliates.
6 : * Licensed under the Apache License, Version 2.0 (the "License");
7 : * you may not use this file except in compliance with the License.
8 : * You may obtain a copy of the License at:
9 : *
10 : * http://www.apache.org/licenses/LICENSE-2.0
11 : *
12 : * Unless required by applicable law or agreed to in writing, software
13 : * distributed under the License is distributed on an "AS IS" BASIS,
14 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 : * See the License for the specific language governing permissions and
16 : * limitations under the License.
17 : *------------------------------------------------------------------
18 : */
19 :
20 : #include <stddef.h>
21 : #include <fcntl.h> /* for open */
22 : #include <sys/ioctl.h>
23 : #include <sys/socket.h>
24 : #include <sys/un.h>
25 : #include <sys/stat.h>
26 : #include <sys/types.h>
27 : #include <sys/uio.h> /* for iovec */
28 : #include <netinet/in.h>
29 : #include <sys/vfs.h>
30 :
31 : #include <linux/if_arp.h>
32 : #include <linux/if_tun.h>
33 :
34 : #include <vlib/vlib.h>
35 : #include <vlib/unix/unix.h>
36 :
37 : #include <vnet/ethernet/ethernet.h>
38 : #include <vnet/devices/devices.h>
39 : #include <vnet/feature/feature.h>
40 : #include <vnet/ip/ip_psh_cksum.h>
41 :
42 : #include <vhost/vhost_user.h>
43 : #include <vhost/vhost_user_inline.h>
44 :
45 : #include <vnet/gso/hdr_offset_parser.h>
46 : /*
47 : * On the transmit side, we keep processing the buffers from vlib in the while
48 : * loop and prepare the copy order to be executed later. However, the static
49 : * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
50 : * entries. In order to not corrupt memory, we have to do the copy when the
51 : * static array reaches the copy threshold. We subtract 40 in case the code
52 : * goes into the inner loop for a maximum of 64k frames which may require
53 : * more array entries. We subtract 200 because our default buffer size is
54 : * 2048 and the default desc len is likely 1536. While it takes less than 40
55 : * vlib buffers for the jumbo frame, it may take twice as much descriptors
56 : * for the same jumbo frame. Use 200 for the extra head room.
57 : */
58 : #define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 200)
59 :
60 : extern vnet_device_class_t vhost_user_device_class;
61 :
62 : #define foreach_vhost_user_tx_func_error \
63 : _(NONE, "no error") \
64 : _(NOT_READY, "vhost vring not ready") \
65 : _(DOWN, "vhost interface is down") \
66 : _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
67 : _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
68 : _(MMAP_FAIL, "mmap failure") \
69 : _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
70 :
71 : typedef enum
72 : {
73 : #define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
74 : foreach_vhost_user_tx_func_error
75 : #undef _
76 : VHOST_USER_TX_FUNC_N_ERROR,
77 : } vhost_user_tx_func_error_t;
78 :
79 : static __clib_unused char *vhost_user_tx_func_error_strings[] = {
80 : #define _(n,s) s,
81 : foreach_vhost_user_tx_func_error
82 : #undef _
83 : };
84 :
85 : static __clib_unused u8 *
86 4 : format_vhost_user_interface_name (u8 * s, va_list * args)
87 : {
88 4 : u32 i = va_arg (*args, u32);
89 4 : u32 show_dev_instance = ~0;
90 4 : vhost_user_main_t *vum = &vhost_user_main;
91 :
92 4 : if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
93 0 : show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
94 :
95 4 : if (show_dev_instance != ~0)
96 0 : i = show_dev_instance;
97 :
98 4 : s = format (s, "VirtualEthernet0/0/%d", i);
99 4 : return s;
100 : }
101 :
102 : static __clib_unused int
103 0 : vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
104 : {
105 : // FIXME: check if the new dev instance is already used
106 0 : vhost_user_main_t *vum = &vhost_user_main;
107 0 : vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
108 : hi->dev_instance);
109 :
110 0 : vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
111 : hi->dev_instance, ~0);
112 :
113 0 : vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
114 : new_dev_instance;
115 :
116 0 : vu_log_debug (vui, "renumbered vhost-user interface dev_instance %d to %d",
117 : hi->dev_instance, new_dev_instance);
118 :
119 0 : return 0;
120 : }
121 :
122 : static_always_inline void
123 0 : vhost_user_tx_trace (vhost_trace_t * t,
124 : vhost_user_intf_t * vui, u16 qid,
125 : vlib_buffer_t * b, vhost_user_vring_t * rxvq)
126 : {
127 0 : vhost_user_main_t *vum = &vhost_user_main;
128 0 : u32 last_avail_idx = rxvq->last_avail_idx;
129 0 : u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
130 0 : vnet_virtio_vring_desc_t *hdr_desc = 0;
131 0 : u32 hint = 0;
132 :
133 0 : clib_memset (t, 0, sizeof (*t));
134 0 : t->device_index = vui - vum->vhost_user_interfaces;
135 0 : t->qid = qid;
136 :
137 0 : hdr_desc = &rxvq->desc[desc_current];
138 0 : if (rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
139 : {
140 0 : t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
141 : /* Header is the first here */
142 0 : hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
143 : }
144 0 : if (rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
145 : {
146 0 : t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
147 : }
148 0 : if (!(rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
149 0 : !(rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
150 : {
151 0 : t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
152 : }
153 :
154 0 : t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
155 0 : }
156 :
157 : static_always_inline u32
158 0 : vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
159 : u16 copy_len, u32 * map_hint)
160 : {
161 : void *dst0, *dst1, *dst2, *dst3;
162 0 : if (PREDICT_TRUE (copy_len >= 4))
163 : {
164 0 : if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
165 0 : return 1;
166 0 : if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
167 0 : return 1;
168 0 : while (PREDICT_TRUE (copy_len >= 4))
169 : {
170 0 : dst0 = dst2;
171 0 : dst1 = dst3;
172 :
173 0 : if (PREDICT_FALSE
174 : (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
175 0 : return 1;
176 0 : if (PREDICT_FALSE
177 : (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
178 0 : return 1;
179 :
180 0 : clib_prefetch_load ((void *) cpy[2].src);
181 0 : clib_prefetch_load ((void *) cpy[3].src);
182 :
183 0 : clib_memcpy_fast (dst0, (void *) cpy[0].src, cpy[0].len);
184 0 : clib_memcpy_fast (dst1, (void *) cpy[1].src, cpy[1].len);
185 :
186 0 : vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
187 0 : vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
188 0 : copy_len -= 2;
189 0 : cpy += 2;
190 : }
191 : }
192 0 : while (copy_len)
193 : {
194 0 : if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
195 0 : return 1;
196 0 : clib_memcpy_fast (dst0, (void *) cpy->src, cpy->len);
197 0 : vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
198 0 : copy_len -= 1;
199 0 : cpy += 1;
200 : }
201 0 : return 0;
202 : }
203 :
204 : static_always_inline void
205 0 : vhost_user_handle_tx_offload (vhost_user_intf_t *vui, vlib_buffer_t *b,
206 : vnet_virtio_net_hdr_t *hdr)
207 : {
208 0 : generic_header_offset_t gho = { 0 };
209 0 : int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
210 0 : int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
211 0 : vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
212 0 : u16 psh_cksum = 0;
213 0 : ip4_header_t *ip4 = 0;
214 0 : ip6_header_t *ip6 = 0;
215 :
216 0 : ASSERT (!(is_ip4 && is_ip6));
217 0 : vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6);
218 0 : if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
219 : {
220 0 : ip4 =
221 0 : (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
222 0 : ip4->checksum = ip4_header_checksum (ip4);
223 0 : psh_cksum = ip4_pseudo_header_cksum (ip4);
224 : }
225 : else
226 : {
227 0 : ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
228 0 : psh_cksum = ip6_pseudo_header_cksum (ip6);
229 : }
230 :
231 : /* checksum offload */
232 0 : if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
233 : {
234 0 : udp_header_t *udp =
235 0 : (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
236 0 : udp->checksum = psh_cksum;
237 0 : hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
238 0 : hdr->csum_start = gho.l4_hdr_offset;
239 0 : hdr->csum_offset = offsetof (udp_header_t, checksum);
240 : }
241 0 : else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
242 : {
243 0 : tcp_header_t *tcp =
244 0 : (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
245 0 : tcp->checksum = psh_cksum;
246 0 : hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
247 0 : hdr->csum_start = gho.l4_hdr_offset;
248 0 : hdr->csum_offset = offsetof (tcp_header_t, checksum);
249 : }
250 :
251 : /* GSO offload */
252 0 : if (b->flags & VNET_BUFFER_F_GSO)
253 : {
254 0 : if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
255 : {
256 0 : if (is_ip4 &&
257 0 : (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4)))
258 : {
259 0 : hdr->gso_size = vnet_buffer2 (b)->gso_size;
260 0 : hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
261 : }
262 0 : else if (is_ip6 &&
263 0 : (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6)))
264 : {
265 0 : hdr->gso_size = vnet_buffer2 (b)->gso_size;
266 0 : hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
267 : }
268 : }
269 0 : else if ((vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO)) &&
270 0 : (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
271 : {
272 0 : hdr->gso_size = vnet_buffer2 (b)->gso_size;
273 0 : hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
274 : }
275 : }
276 0 : }
277 :
278 : static_always_inline void
279 0 : vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_intf_t * vui,
280 : vhost_user_vring_t * rxvq,
281 : u16 * n_descs_processed, u8 chained,
282 : vlib_frame_t * frame, u32 n_left)
283 : {
284 : u16 desc_idx, flags;
285 0 : vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
286 0 : u16 last_used_idx = rxvq->last_used_idx;
287 :
288 0 : if (PREDICT_FALSE (*n_descs_processed == 0))
289 0 : return;
290 :
291 0 : if (rxvq->used_wrap_counter)
292 0 : flags = desc_table[last_used_idx & rxvq->qsz_mask].flags |
293 : (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
294 : else
295 0 : flags = desc_table[last_used_idx & rxvq->qsz_mask].flags &
296 : ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
297 :
298 0 : vhost_user_advance_last_used_idx (rxvq);
299 :
300 0 : for (desc_idx = 1; desc_idx < *n_descs_processed; desc_idx++)
301 : {
302 0 : if (rxvq->used_wrap_counter)
303 0 : desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags |=
304 : (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
305 : else
306 0 : desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &=
307 : ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
308 0 : vhost_user_advance_last_used_idx (rxvq);
309 : }
310 :
311 0 : desc_table[last_used_idx & rxvq->qsz_mask].flags = flags;
312 :
313 0 : *n_descs_processed = 0;
314 :
315 0 : if (chained)
316 : {
317 0 : vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
318 :
319 0 : while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &
320 : VRING_DESC_F_NEXT)
321 0 : vhost_user_advance_last_used_idx (rxvq);
322 :
323 : /* Advance past the current chained table entries */
324 0 : vhost_user_advance_last_used_idx (rxvq);
325 : }
326 :
327 : /* interrupt (call) handling */
328 0 : if ((rxvq->callfd_idx != ~0) &&
329 0 : (rxvq->avail_event->flags != VRING_EVENT_F_DISABLE))
330 : {
331 0 : vhost_user_main_t *vum = &vhost_user_main;
332 :
333 0 : rxvq->n_since_last_int += frame->n_vectors - n_left;
334 0 : if (rxvq->n_since_last_int > vum->coalesce_frames)
335 0 : vhost_user_send_call (vm, vui, rxvq);
336 : }
337 : }
338 :
339 : static_always_inline void
340 0 : vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
341 : u16 qid, vlib_buffer_t * b,
342 : vhost_user_vring_t * rxvq)
343 : {
344 0 : vhost_user_main_t *vum = &vhost_user_main;
345 0 : u32 last_avail_idx = rxvq->last_avail_idx;
346 0 : u32 desc_current = last_avail_idx & rxvq->qsz_mask;
347 0 : vnet_virtio_vring_packed_desc_t *hdr_desc = 0;
348 0 : u32 hint = 0;
349 :
350 0 : clib_memset (t, 0, sizeof (*t));
351 0 : t->device_index = vui - vum->vhost_user_interfaces;
352 0 : t->qid = qid;
353 :
354 0 : hdr_desc = &rxvq->packed_desc[desc_current];
355 0 : if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
356 : {
357 0 : t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
358 : /* Header is the first here */
359 0 : hdr_desc = map_guest_mem (vui, rxvq->packed_desc[desc_current].addr,
360 : &hint);
361 : }
362 0 : if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
363 : {
364 0 : t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
365 : }
366 0 : if (!(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
367 0 : !(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
368 : {
369 0 : t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
370 : }
371 :
372 0 : t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
373 0 : }
374 :
375 : static_always_inline uword
376 0 : vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
377 : vlib_frame_t *frame, vhost_user_intf_t *vui,
378 : vhost_user_vring_t *rxvq)
379 : {
380 0 : u32 *buffers = vlib_frame_vector_args (frame);
381 0 : u32 n_left = frame->n_vectors;
382 0 : vhost_user_main_t *vum = &vhost_user_main;
383 0 : u32 qid = rxvq->qid;
384 : u8 error;
385 0 : u32 thread_index = vm->thread_index;
386 0 : vhost_cpu_t *cpu = &vum->cpus[thread_index];
387 0 : u32 map_hint = 0;
388 0 : u8 retry = 8;
389 : u16 copy_len;
390 : u16 tx_headers_len;
391 : vnet_virtio_vring_packed_desc_t *desc_table;
392 : u32 or_flags;
393 : u16 desc_head, desc_index, desc_len;
394 : u16 n_descs_processed;
395 : u8 indirect, chained;
396 :
397 0 : retry:
398 0 : error = VHOST_USER_TX_FUNC_ERROR_NONE;
399 0 : tx_headers_len = 0;
400 0 : copy_len = 0;
401 0 : n_descs_processed = 0;
402 :
403 0 : while (n_left > 0)
404 : {
405 : vlib_buffer_t *b0, *current_b0;
406 : uword buffer_map_addr;
407 : u32 buffer_len;
408 : u16 bytes_left;
409 0 : u32 total_desc_len = 0;
410 0 : u16 n_entries = 0;
411 :
412 0 : indirect = 0;
413 0 : chained = 0;
414 0 : if (PREDICT_TRUE (n_left > 1))
415 0 : vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
416 :
417 0 : b0 = vlib_get_buffer (vm, buffers[0]);
418 0 : if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
419 : {
420 0 : cpu->current_trace = vlib_add_trace (vm, node, b0,
421 : sizeof (*cpu->current_trace));
422 0 : vhost_user_tx_trace_packed (cpu->current_trace, vui, qid / 2, b0,
423 : rxvq);
424 : }
425 :
426 0 : desc_table = rxvq->packed_desc;
427 0 : desc_head = desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
428 0 : if (PREDICT_FALSE (!vhost_user_packed_desc_available (rxvq, desc_head)))
429 : {
430 0 : error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
431 0 : goto done;
432 : }
433 : /*
434 : * Go deeper in case of indirect descriptor.
435 : * To test it, turn off mrg_rxbuf.
436 : */
437 0 : if (desc_table[desc_head].flags & VRING_DESC_F_INDIRECT)
438 : {
439 0 : indirect = 1;
440 0 : if (PREDICT_FALSE (desc_table[desc_head].len <
441 : sizeof (vnet_virtio_vring_packed_desc_t)))
442 : {
443 0 : error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
444 0 : goto done;
445 : }
446 0 : n_entries = desc_table[desc_head].len >> 4;
447 0 : desc_table = map_guest_mem (vui, desc_table[desc_index].addr,
448 : &map_hint);
449 0 : if (PREDICT_FALSE (desc_table == 0))
450 : {
451 0 : error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
452 0 : goto done;
453 : }
454 0 : desc_index = 0;
455 : }
456 0 : else if (rxvq->packed_desc[desc_head].flags & VRING_DESC_F_NEXT)
457 0 : chained = 1;
458 :
459 0 : desc_len = vui->virtio_net_hdr_sz;
460 0 : buffer_map_addr = desc_table[desc_index].addr;
461 0 : buffer_len = desc_table[desc_index].len;
462 :
463 : /* Get a header from the header array */
464 0 : vnet_virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
465 0 : tx_headers_len++;
466 0 : hdr->hdr.flags = 0;
467 0 : hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
468 0 : hdr->num_buffers = 1;
469 :
470 0 : or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
471 :
472 : /* Guest supports csum offload and buffer requires checksum offload? */
473 0 : if (or_flags &&
474 0 : (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
475 0 : vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
476 :
477 : /* Prepare a copy order executed later for the header */
478 0 : ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
479 0 : vhost_copy_t *cpy = &cpu->copy[copy_len];
480 0 : copy_len++;
481 0 : cpy->len = vui->virtio_net_hdr_sz;
482 0 : cpy->dst = buffer_map_addr;
483 0 : cpy->src = (uword) hdr;
484 :
485 0 : buffer_map_addr += vui->virtio_net_hdr_sz;
486 0 : buffer_len -= vui->virtio_net_hdr_sz;
487 0 : bytes_left = b0->current_length;
488 0 : current_b0 = b0;
489 : while (1)
490 0 : {
491 0 : if (buffer_len == 0)
492 : {
493 : /* Get new output */
494 0 : if (chained)
495 : {
496 : /*
497 : * Next one is chained
498 : * Test it with both indirect and mrg_rxbuf off
499 : */
500 0 : if (PREDICT_FALSE (!(desc_table[desc_index].flags &
501 : VRING_DESC_F_NEXT)))
502 : {
503 : /*
504 : * Last descriptor in chain.
505 : * Dequeue queued descriptors for this packet
506 : */
507 0 : vhost_user_dequeue_chained_descs (rxvq,
508 : &n_descs_processed);
509 0 : error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
510 0 : goto done;
511 : }
512 0 : vhost_user_advance_last_avail_idx (rxvq);
513 0 : desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
514 0 : n_descs_processed++;
515 0 : buffer_map_addr = desc_table[desc_index].addr;
516 0 : buffer_len = desc_table[desc_index].len;
517 0 : total_desc_len += desc_len;
518 0 : desc_len = 0;
519 : }
520 0 : else if (indirect)
521 : {
522 : /*
523 : * Indirect table
524 : * Test it with mrg_rxnuf off
525 : */
526 0 : if (PREDICT_TRUE (n_entries > 0))
527 0 : n_entries--;
528 : else
529 : {
530 : /* Dequeue queued descriptors for this packet */
531 0 : vhost_user_dequeue_chained_descs (rxvq,
532 : &n_descs_processed);
533 0 : error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
534 0 : goto done;
535 : }
536 0 : total_desc_len += desc_len;
537 0 : desc_index = (desc_index + 1) & rxvq->qsz_mask;
538 0 : buffer_map_addr = desc_table[desc_index].addr;
539 0 : buffer_len = desc_table[desc_index].len;
540 0 : desc_len = 0;
541 : }
542 0 : else if (vui->virtio_net_hdr_sz == 12)
543 : {
544 : /*
545 : * MRG is available
546 : * This is the default setting for the guest VM
547 : */
548 0 : vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
549 0 : &cpu->tx_headers[tx_headers_len - 1];
550 :
551 0 : desc_table[desc_index].len = desc_len;
552 0 : vhost_user_advance_last_avail_idx (rxvq);
553 0 : desc_head = desc_index =
554 0 : rxvq->last_avail_idx & rxvq->qsz_mask;
555 0 : hdr->num_buffers++;
556 0 : n_descs_processed++;
557 0 : desc_len = 0;
558 :
559 0 : if (PREDICT_FALSE (!vhost_user_packed_desc_available
560 : (rxvq, desc_index)))
561 : {
562 : /* Dequeue queued descriptors for this packet */
563 0 : vhost_user_dequeue_descs (rxvq, hdr,
564 : &n_descs_processed);
565 0 : error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
566 0 : goto done;
567 : }
568 :
569 0 : buffer_map_addr = desc_table[desc_index].addr;
570 0 : buffer_len = desc_table[desc_index].len;
571 : }
572 : else
573 : {
574 0 : error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
575 0 : goto done;
576 : }
577 : }
578 :
579 0 : ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
580 0 : vhost_copy_t *cpy = &cpu->copy[copy_len];
581 0 : copy_len++;
582 0 : cpy->len = bytes_left;
583 0 : cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
584 0 : cpy->dst = buffer_map_addr;
585 0 : cpy->src = (uword) vlib_buffer_get_current (current_b0) +
586 0 : current_b0->current_length - bytes_left;
587 :
588 0 : bytes_left -= cpy->len;
589 0 : buffer_len -= cpy->len;
590 0 : buffer_map_addr += cpy->len;
591 0 : desc_len += cpy->len;
592 :
593 0 : clib_prefetch_load (&rxvq->packed_desc);
594 :
595 : /* Check if vlib buffer has more data. If not, get more or break */
596 0 : if (PREDICT_TRUE (!bytes_left))
597 : {
598 0 : if (PREDICT_FALSE
599 : (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
600 : {
601 0 : current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
602 0 : bytes_left = current_b0->current_length;
603 : }
604 : else
605 : {
606 : /* End of packet */
607 0 : break;
608 : }
609 : }
610 : }
611 :
612 : /* Move from available to used ring */
613 0 : total_desc_len += desc_len;
614 0 : rxvq->packed_desc[desc_head].len = total_desc_len;
615 :
616 0 : vhost_user_advance_last_avail_table_idx (vui, rxvq, chained);
617 0 : n_descs_processed++;
618 :
619 0 : if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
620 0 : cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
621 :
622 0 : n_left--;
623 :
624 : /*
625 : * Do the copy periodically to prevent
626 : * cpu->copy array overflow and corrupt memory
627 : */
628 0 : if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD) || chained)
629 : {
630 0 : if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
631 : &map_hint)))
632 0 : vlib_error_count (vm, node->node_index,
633 : VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
634 0 : copy_len = 0;
635 :
636 : /* give buffers back to driver */
637 0 : vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
638 : chained, frame, n_left);
639 : }
640 :
641 0 : buffers++;
642 : }
643 :
644 0 : done:
645 0 : if (PREDICT_TRUE (copy_len))
646 : {
647 0 : if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
648 : &map_hint)))
649 0 : vlib_error_count (vm, node->node_index,
650 : VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
651 :
652 0 : vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
653 : chained, frame, n_left);
654 : }
655 :
656 : /*
657 : * When n_left is set, error is always set to something too.
658 : * In case error is due to lack of remaining buffers, we go back up and
659 : * retry.
660 : * The idea is that it is better to waste some time on packets
661 : * that have been processed already than dropping them and get
662 : * more fresh packets with a good likelyhood that they will be dropped too.
663 : * This technique also gives more time to VM driver to pick-up packets.
664 : * In case the traffic flows from physical to virtual interfaces, this
665 : * technique will end-up leveraging the physical NIC buffer in order to
666 : * absorb the VM's CPU jitter.
667 : */
668 0 : if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
669 : {
670 0 : retry--;
671 0 : goto retry;
672 : }
673 :
674 0 : clib_spinlock_unlock (&rxvq->vring_lock);
675 :
676 0 : if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
677 : {
678 0 : vlib_error_count (vm, node->node_index, error, n_left);
679 0 : vlib_increment_simple_counter
680 : (vnet_main.interface_main.sw_if_counters +
681 : VNET_INTERFACE_COUNTER_DROP, thread_index, vui->sw_if_index, n_left);
682 : }
683 :
684 0 : vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
685 0 : return frame->n_vectors;
686 : }
687 :
688 2236 : VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
689 : vlib_node_runtime_t *
690 : node, vlib_frame_t * frame)
691 : {
692 0 : u32 *buffers = vlib_frame_vector_args (frame);
693 0 : u32 n_left = frame->n_vectors;
694 0 : vhost_user_main_t *vum = &vhost_user_main;
695 0 : vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
696 0 : vhost_user_intf_t *vui =
697 0 : pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
698 : u32 qid;
699 : vhost_user_vring_t *rxvq;
700 : u8 error;
701 0 : u32 thread_index = vm->thread_index;
702 0 : vhost_cpu_t *cpu = &vum->cpus[thread_index];
703 0 : u32 map_hint = 0;
704 0 : u8 retry = 8;
705 : u16 copy_len;
706 : u16 tx_headers_len;
707 : u32 or_flags;
708 0 : vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
709 :
710 0 : if (PREDICT_FALSE (!vui->admin_up))
711 : {
712 0 : error = VHOST_USER_TX_FUNC_ERROR_DOWN;
713 0 : goto done3;
714 : }
715 :
716 0 : if (PREDICT_FALSE (!vui->is_ready))
717 : {
718 0 : error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
719 0 : goto done3;
720 : }
721 :
722 0 : qid = VHOST_VRING_IDX_RX (tf->queue_id);
723 0 : rxvq = &vui->vrings[qid];
724 0 : ASSERT (tf->queue_id == rxvq->qid);
725 :
726 0 : if (PREDICT_FALSE (rxvq->avail == 0))
727 : {
728 0 : error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
729 0 : goto done3;
730 : }
731 0 : if (tf->shared_queue)
732 0 : clib_spinlock_lock (&rxvq->vring_lock);
733 :
734 0 : if (vhost_user_is_packed_ring_supported (vui))
735 0 : return (vhost_user_device_class_packed (vm, node, frame, vui, rxvq));
736 :
737 0 : retry:
738 0 : error = VHOST_USER_TX_FUNC_ERROR_NONE;
739 0 : tx_headers_len = 0;
740 0 : copy_len = 0;
741 0 : while (n_left > 0)
742 : {
743 : vlib_buffer_t *b0, *current_b0;
744 : u16 desc_head, desc_index, desc_len;
745 : vnet_virtio_vring_desc_t *desc_table;
746 : uword buffer_map_addr;
747 : u32 buffer_len;
748 : u16 bytes_left;
749 :
750 0 : if (PREDICT_TRUE (n_left > 1))
751 0 : vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
752 :
753 0 : b0 = vlib_get_buffer (vm, buffers[0]);
754 :
755 0 : if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
756 : {
757 0 : cpu->current_trace = vlib_add_trace (vm, node, b0,
758 : sizeof (*cpu->current_trace));
759 0 : vhost_user_tx_trace (cpu->current_trace, vui, qid / 2, b0, rxvq);
760 : }
761 :
762 0 : if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
763 : {
764 0 : error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
765 0 : goto done;
766 : }
767 :
768 0 : desc_table = rxvq->desc;
769 0 : desc_head = desc_index =
770 0 : rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
771 :
772 : /* Go deeper in case of indirect descriptor
773 : * I don't know of any driver providing indirect for RX. */
774 0 : if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
775 : {
776 0 : if (PREDICT_FALSE (rxvq->desc[desc_head].len <
777 : sizeof (vnet_virtio_vring_desc_t)))
778 : {
779 0 : error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
780 0 : goto done;
781 : }
782 0 : if (PREDICT_FALSE
783 : (!(desc_table =
784 : map_guest_mem (vui, rxvq->desc[desc_index].addr,
785 : &map_hint))))
786 : {
787 0 : error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
788 0 : goto done;
789 : }
790 0 : desc_index = 0;
791 : }
792 :
793 0 : desc_len = vui->virtio_net_hdr_sz;
794 0 : buffer_map_addr = desc_table[desc_index].addr;
795 0 : buffer_len = desc_table[desc_index].len;
796 :
797 : {
798 : // Get a header from the header array
799 0 : vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
800 0 : &cpu->tx_headers[tx_headers_len];
801 0 : tx_headers_len++;
802 0 : hdr->hdr.flags = 0;
803 0 : hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
804 0 : hdr->num_buffers = 1; //This is local, no need to check
805 :
806 0 : or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
807 :
808 : /* Guest supports csum offload and buffer requires checksum offload? */
809 0 : if (or_flags
810 0 : && (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
811 0 : vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
812 :
813 : // Prepare a copy order executed later for the header
814 0 : ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
815 0 : vhost_copy_t *cpy = &cpu->copy[copy_len];
816 0 : copy_len++;
817 0 : cpy->len = vui->virtio_net_hdr_sz;
818 0 : cpy->dst = buffer_map_addr;
819 0 : cpy->src = (uword) hdr;
820 : }
821 :
822 0 : buffer_map_addr += vui->virtio_net_hdr_sz;
823 0 : buffer_len -= vui->virtio_net_hdr_sz;
824 0 : bytes_left = b0->current_length;
825 0 : current_b0 = b0;
826 : while (1)
827 : {
828 0 : if (buffer_len == 0)
829 : { //Get new output
830 0 : if (desc_table[desc_index].flags & VRING_DESC_F_NEXT)
831 : {
832 : //Next one is chained
833 0 : desc_index = desc_table[desc_index].next;
834 0 : buffer_map_addr = desc_table[desc_index].addr;
835 0 : buffer_len = desc_table[desc_index].len;
836 : }
837 0 : else if (vui->virtio_net_hdr_sz == 12) //MRG is available
838 : {
839 0 : vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
840 0 : &cpu->tx_headers[tx_headers_len - 1];
841 :
842 : //Move from available to used buffer
843 0 : rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
844 : desc_head;
845 0 : rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
846 : desc_len;
847 0 : vhost_user_log_dirty_ring (vui, rxvq,
848 : ring[rxvq->last_used_idx &
849 : rxvq->qsz_mask]);
850 :
851 0 : rxvq->last_avail_idx++;
852 0 : rxvq->last_used_idx++;
853 0 : hdr->num_buffers++;
854 0 : desc_len = 0;
855 :
856 0 : if (PREDICT_FALSE
857 : (rxvq->last_avail_idx == rxvq->avail->idx))
858 : {
859 : //Dequeue queued descriptors for this packet
860 0 : rxvq->last_used_idx -= hdr->num_buffers - 1;
861 0 : rxvq->last_avail_idx -= hdr->num_buffers - 1;
862 0 : error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
863 0 : goto done;
864 : }
865 :
866 0 : desc_table = rxvq->desc;
867 0 : desc_head = desc_index =
868 0 : rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
869 0 : if (PREDICT_FALSE
870 : (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
871 : {
872 : //It is seriously unlikely that a driver will put indirect descriptor
873 : //after non-indirect descriptor.
874 0 : if (PREDICT_FALSE (rxvq->desc[desc_head].len <
875 : sizeof (vnet_virtio_vring_desc_t)))
876 : {
877 0 : error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
878 0 : goto done;
879 : }
880 0 : if (PREDICT_FALSE
881 : (!(desc_table =
882 : map_guest_mem (vui,
883 : rxvq->desc[desc_index].addr,
884 : &map_hint))))
885 : {
886 0 : error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
887 0 : goto done;
888 : }
889 0 : desc_index = 0;
890 : }
891 0 : buffer_map_addr = desc_table[desc_index].addr;
892 0 : buffer_len = desc_table[desc_index].len;
893 : }
894 : else
895 : {
896 0 : error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
897 0 : goto done;
898 : }
899 : }
900 :
901 : {
902 0 : ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
903 0 : vhost_copy_t *cpy = &cpu->copy[copy_len];
904 0 : copy_len++;
905 0 : cpy->len = bytes_left;
906 0 : cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
907 0 : cpy->dst = buffer_map_addr;
908 0 : cpy->src = (uword) vlib_buffer_get_current (current_b0) +
909 0 : current_b0->current_length - bytes_left;
910 :
911 0 : bytes_left -= cpy->len;
912 0 : buffer_len -= cpy->len;
913 0 : buffer_map_addr += cpy->len;
914 0 : desc_len += cpy->len;
915 :
916 0 : clib_prefetch_load (&rxvq->desc);
917 : }
918 :
919 : // Check if vlib buffer has more data. If not, get more or break.
920 0 : if (PREDICT_TRUE (!bytes_left))
921 : {
922 0 : if (PREDICT_FALSE
923 : (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
924 : {
925 0 : current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
926 0 : bytes_left = current_b0->current_length;
927 : }
928 : else
929 : {
930 : //End of packet
931 0 : break;
932 : }
933 : }
934 : }
935 :
936 : //Move from available to used ring
937 0 : rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
938 0 : rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
939 0 : vhost_user_log_dirty_ring (vui, rxvq,
940 : ring[rxvq->last_used_idx & rxvq->qsz_mask]);
941 0 : rxvq->last_avail_idx++;
942 0 : rxvq->last_used_idx++;
943 :
944 0 : if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
945 : {
946 0 : cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
947 : }
948 :
949 0 : n_left--; //At the end for error counting when 'goto done' is invoked
950 :
951 : /*
952 : * Do the copy periodically to prevent
953 : * cpu->copy array overflow and corrupt memory
954 : */
955 0 : if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
956 : {
957 0 : if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
958 : &map_hint)))
959 : {
960 0 : vlib_error_count (vm, node->node_index,
961 : VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
962 : }
963 0 : copy_len = 0;
964 :
965 : /* give buffers back to driver */
966 0 : CLIB_MEMORY_BARRIER ();
967 0 : rxvq->used->idx = rxvq->last_used_idx;
968 0 : vhost_user_log_dirty_ring (vui, rxvq, idx);
969 : }
970 0 : buffers++;
971 : }
972 :
973 0 : done:
974 : //Do the memory copies
975 0 : if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
976 : &map_hint)))
977 : {
978 0 : vlib_error_count (vm, node->node_index,
979 : VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
980 : }
981 :
982 0 : CLIB_MEMORY_BARRIER ();
983 0 : rxvq->used->idx = rxvq->last_used_idx;
984 0 : vhost_user_log_dirty_ring (vui, rxvq, idx);
985 :
986 : /*
987 : * When n_left is set, error is always set to something too.
988 : * In case error is due to lack of remaining buffers, we go back up and
989 : * retry.
990 : * The idea is that it is better to waste some time on packets
991 : * that have been processed already than dropping them and get
992 : * more fresh packets with a good likelihood that they will be dropped too.
993 : * This technique also gives more time to VM driver to pick-up packets.
994 : * In case the traffic flows from physical to virtual interfaces, this
995 : * technique will end-up leveraging the physical NIC buffer in order to
996 : * absorb the VM's CPU jitter.
997 : */
998 0 : if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
999 : {
1000 0 : retry--;
1001 0 : goto retry;
1002 : }
1003 :
1004 : /* interrupt (call) handling */
1005 0 : if ((rxvq->callfd_idx != ~0) &&
1006 0 : !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
1007 : {
1008 0 : rxvq->n_since_last_int += frame->n_vectors - n_left;
1009 :
1010 0 : if (rxvq->n_since_last_int > vum->coalesce_frames)
1011 0 : vhost_user_send_call (vm, vui, rxvq);
1012 : }
1013 :
1014 0 : clib_spinlock_unlock (&rxvq->vring_lock);
1015 :
1016 0 : done3:
1017 0 : if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
1018 : {
1019 0 : vlib_error_count (vm, node->node_index, error, n_left);
1020 0 : vlib_increment_simple_counter
1021 : (vnet_main.interface_main.sw_if_counters
1022 : + VNET_INTERFACE_COUNTER_DROP,
1023 : thread_index, vui->sw_if_index, n_left);
1024 : }
1025 :
1026 0 : vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
1027 0 : return frame->n_vectors;
1028 : }
1029 :
1030 : static __clib_unused clib_error_t *
1031 0 : vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
1032 : u32 qid, vnet_hw_if_rx_mode mode)
1033 : {
1034 0 : vlib_main_t *vm = vnm->vlib_main;
1035 0 : vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
1036 0 : vhost_user_main_t *vum = &vhost_user_main;
1037 0 : vhost_user_intf_t *vui =
1038 0 : pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
1039 0 : vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
1040 : vhost_cpu_t *cpu;
1041 :
1042 0 : if (mode == txvq->mode)
1043 0 : return 0;
1044 :
1045 0 : if ((mode != VNET_HW_IF_RX_MODE_POLLING) &&
1046 0 : (mode != VNET_HW_IF_RX_MODE_ADAPTIVE) &&
1047 : (mode != VNET_HW_IF_RX_MODE_INTERRUPT))
1048 : {
1049 0 : vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
1050 : hw_if_index, qid);
1051 0 : return clib_error_return (0, "unsupported");
1052 : }
1053 :
1054 0 : if (txvq->thread_index == ~0)
1055 0 : return clib_error_return (0, "Queue initialization is not finished yet");
1056 :
1057 0 : cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
1058 0 : if ((mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
1059 : (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
1060 : {
1061 0 : if (txvq->kickfd_idx == ~0)
1062 : {
1063 : // We cannot support interrupt mode if the driver opts out
1064 0 : return clib_error_return (0, "Driver does not support interrupt");
1065 : }
1066 0 : if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
1067 : {
1068 0 : ASSERT (cpu->polling_q_count != 0);
1069 0 : if (cpu->polling_q_count)
1070 0 : cpu->polling_q_count--;
1071 0 : vum->ifq_count++;
1072 : // Start the timer if this is the first encounter on interrupt
1073 : // interface/queue
1074 0 : if ((vum->ifq_count == 1) &&
1075 0 : ((vum->coalesce_time > 0.0) || (vum->coalesce_frames > 0)))
1076 0 : vlib_process_signal_event (vm,
1077 0 : vhost_user_send_interrupt_node.index,
1078 : VHOST_USER_EVENT_START_TIMER, 0);
1079 : }
1080 : }
1081 0 : else if (mode == VNET_HW_IF_RX_MODE_POLLING)
1082 : {
1083 0 : if (((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
1084 0 : (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) && vum->ifq_count)
1085 : {
1086 0 : cpu->polling_q_count++;
1087 0 : vum->ifq_count--;
1088 : // Stop the timer if there is no more interrupt interface/queue
1089 0 : if (vum->ifq_count == 0)
1090 0 : vlib_process_signal_event (vm,
1091 0 : vhost_user_send_interrupt_node.index,
1092 : VHOST_USER_EVENT_STOP_TIMER, 0);
1093 : }
1094 : }
1095 :
1096 0 : txvq->mode = mode;
1097 0 : vhost_user_set_operation_mode (vui, txvq);
1098 :
1099 0 : return 0;
1100 : }
1101 :
1102 : static __clib_unused clib_error_t *
1103 6 : vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
1104 : u32 flags)
1105 : {
1106 6 : vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
1107 6 : vhost_user_main_t *vum = &vhost_user_main;
1108 6 : vhost_user_intf_t *vui =
1109 6 : pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
1110 : u8 link_old, link_new;
1111 :
1112 6 : link_old = vui_is_link_up (vui);
1113 :
1114 6 : vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1115 :
1116 6 : link_new = vui_is_link_up (vui);
1117 :
1118 6 : if (link_old != link_new)
1119 0 : vnet_hw_interface_set_flags (vnm, vui->hw_if_index, link_new ?
1120 : VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
1121 :
1122 6 : return /* no error */ 0;
1123 : }
1124 :
1125 : /* *INDENT-OFF* */
1126 2239 : VNET_DEVICE_CLASS (vhost_user_device_class) = {
1127 : .name = "vhost-user",
1128 : .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
1129 : .tx_function_error_strings = vhost_user_tx_func_error_strings,
1130 : .format_device_name = format_vhost_user_interface_name,
1131 : .name_renumber = vhost_user_name_renumber,
1132 : .admin_up_down_function = vhost_user_interface_admin_up_down,
1133 : .rx_mode_change_function = vhost_user_interface_rx_mode_change,
1134 : .format_tx_trace = format_vhost_trace,
1135 : };
1136 :
1137 : /* *INDENT-ON* */
1138 :
1139 : /*
1140 : * fd.io coding-style-patch-verification: ON
1141 : *
1142 : * Local Variables:
1143 : * eval: (c-set-style "gnu")
1144 : * End:
1145 : */
|