LCOV - code coverage report
Current view: top level - plugins/vhost - vhost_user_output.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 20 529 3.8 %
Date: 2023-07-05 22:20:52 Functions: 8 20 40.0 %

          Line data    Source code
       1             : /*
       2             :  *------------------------------------------------------------------
       3             :  * vhost-user-output
       4             :  *
       5             :  * Copyright (c) 2014-2018 Cisco and/or its affiliates.
       6             :  * Licensed under the Apache License, Version 2.0 (the "License");
       7             :  * you may not use this file except in compliance with the License.
       8             :  * You may obtain a copy of the License at:
       9             :  *
      10             :  *     http://www.apache.org/licenses/LICENSE-2.0
      11             :  *
      12             :  * Unless required by applicable law or agreed to in writing, software
      13             :  * distributed under the License is distributed on an "AS IS" BASIS,
      14             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      15             :  * See the License for the specific language governing permissions and
      16             :  * limitations under the License.
      17             :  *------------------------------------------------------------------
      18             :  */
      19             : 
      20             : #include <stddef.h>
      21             : #include <fcntl.h>                /* for open */
      22             : #include <sys/ioctl.h>
      23             : #include <sys/socket.h>
      24             : #include <sys/un.h>
      25             : #include <sys/stat.h>
      26             : #include <sys/types.h>
      27             : #include <sys/uio.h>              /* for iovec */
      28             : #include <netinet/in.h>
      29             : #include <sys/vfs.h>
      30             : 
      31             : #include <linux/if_arp.h>
      32             : #include <linux/if_tun.h>
      33             : 
      34             : #include <vlib/vlib.h>
      35             : #include <vlib/unix/unix.h>
      36             : 
      37             : #include <vnet/ethernet/ethernet.h>
      38             : #include <vnet/devices/devices.h>
      39             : #include <vnet/feature/feature.h>
      40             : #include <vnet/ip/ip_psh_cksum.h>
      41             : 
      42             : #include <vhost/vhost_user.h>
      43             : #include <vhost/vhost_user_inline.h>
      44             : 
      45             : #include <vnet/gso/hdr_offset_parser.h>
      46             : /*
      47             :  * On the transmit side, we keep processing the buffers from vlib in the while
      48             :  * loop and prepare the copy order to be executed later. However, the static
      49             :  * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
      50             :  * entries. In order to not corrupt memory, we have to do the copy when the
      51             :  * static array reaches the copy threshold. We subtract 40 in case the code
      52             :  * goes into the inner loop for a maximum of 64k frames which may require
      53             :  * more array entries. We subtract 200 because our default buffer size is
      54             :  * 2048 and the default desc len is likely 1536. While it takes less than 40
      55             :  * vlib buffers for the jumbo frame, it may take twice as much descriptors
      56             :  * for the same jumbo frame. Use 200 for the extra head room.
      57             :  */
      58             : #define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 200)
      59             : 
      60             : extern vnet_device_class_t vhost_user_device_class;
      61             : 
      62             : #define foreach_vhost_user_tx_func_error      \
      63             :   _(NONE, "no error")  \
      64             :   _(NOT_READY, "vhost vring not ready")  \
      65             :   _(DOWN, "vhost interface is down")  \
      66             :   _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)")  \
      67             :   _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)")  \
      68             :   _(MMAP_FAIL, "mmap failure") \
      69             :   _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
      70             : 
      71             : typedef enum
      72             : {
      73             : #define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
      74             :   foreach_vhost_user_tx_func_error
      75             : #undef _
      76             :     VHOST_USER_TX_FUNC_N_ERROR,
      77             : } vhost_user_tx_func_error_t;
      78             : 
      79             : static __clib_unused char *vhost_user_tx_func_error_strings[] = {
      80             : #define _(n,s) s,
      81             :   foreach_vhost_user_tx_func_error
      82             : #undef _
      83             : };
      84             : 
      85             : static __clib_unused u8 *
      86           4 : format_vhost_user_interface_name (u8 * s, va_list * args)
      87             : {
      88           4 :   u32 i = va_arg (*args, u32);
      89           4 :   u32 show_dev_instance = ~0;
      90           4 :   vhost_user_main_t *vum = &vhost_user_main;
      91             : 
      92           4 :   if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
      93           0 :     show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
      94             : 
      95           4 :   if (show_dev_instance != ~0)
      96           0 :     i = show_dev_instance;
      97             : 
      98           4 :   s = format (s, "VirtualEthernet0/0/%d", i);
      99           4 :   return s;
     100             : }
     101             : 
     102             : static __clib_unused int
     103           0 : vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
     104             : {
     105             :   // FIXME: check if the new dev instance is already used
     106           0 :   vhost_user_main_t *vum = &vhost_user_main;
     107           0 :   vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
     108             :                                               hi->dev_instance);
     109             : 
     110           0 :   vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
     111             :                            hi->dev_instance, ~0);
     112             : 
     113           0 :   vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
     114             :     new_dev_instance;
     115             : 
     116           0 :   vu_log_debug (vui, "renumbered vhost-user interface dev_instance %d to %d",
     117             :                 hi->dev_instance, new_dev_instance);
     118             : 
     119           0 :   return 0;
     120             : }
     121             : 
     122             : static_always_inline void
     123           0 : vhost_user_tx_trace (vhost_trace_t * t,
     124             :                      vhost_user_intf_t * vui, u16 qid,
     125             :                      vlib_buffer_t * b, vhost_user_vring_t * rxvq)
     126             : {
     127           0 :   vhost_user_main_t *vum = &vhost_user_main;
     128           0 :   u32 last_avail_idx = rxvq->last_avail_idx;
     129           0 :   u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
     130           0 :   vnet_virtio_vring_desc_t *hdr_desc = 0;
     131           0 :   u32 hint = 0;
     132             : 
     133           0 :   clib_memset (t, 0, sizeof (*t));
     134           0 :   t->device_index = vui - vum->vhost_user_interfaces;
     135           0 :   t->qid = qid;
     136             : 
     137           0 :   hdr_desc = &rxvq->desc[desc_current];
     138           0 :   if (rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
     139             :     {
     140           0 :       t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
     141             :       /* Header is the first here */
     142           0 :       hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
     143             :     }
     144           0 :   if (rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
     145             :     {
     146           0 :       t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
     147             :     }
     148           0 :   if (!(rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
     149           0 :       !(rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
     150             :     {
     151           0 :       t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
     152             :     }
     153             : 
     154           0 :   t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
     155           0 : }
     156             : 
     157             : static_always_inline u32
     158           0 : vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
     159             :                     u16 copy_len, u32 * map_hint)
     160             : {
     161             :   void *dst0, *dst1, *dst2, *dst3;
     162           0 :   if (PREDICT_TRUE (copy_len >= 4))
     163             :     {
     164           0 :       if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
     165           0 :         return 1;
     166           0 :       if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
     167           0 :         return 1;
     168           0 :       while (PREDICT_TRUE (copy_len >= 4))
     169             :         {
     170           0 :           dst0 = dst2;
     171           0 :           dst1 = dst3;
     172             : 
     173           0 :           if (PREDICT_FALSE
     174             :               (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
     175           0 :             return 1;
     176           0 :           if (PREDICT_FALSE
     177             :               (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
     178           0 :             return 1;
     179             : 
     180           0 :           clib_prefetch_load ((void *) cpy[2].src);
     181           0 :           clib_prefetch_load ((void *) cpy[3].src);
     182             : 
     183           0 :           clib_memcpy_fast (dst0, (void *) cpy[0].src, cpy[0].len);
     184           0 :           clib_memcpy_fast (dst1, (void *) cpy[1].src, cpy[1].len);
     185             : 
     186           0 :           vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
     187           0 :           vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
     188           0 :           copy_len -= 2;
     189           0 :           cpy += 2;
     190             :         }
     191             :     }
     192           0 :   while (copy_len)
     193             :     {
     194           0 :       if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
     195           0 :         return 1;
     196           0 :       clib_memcpy_fast (dst0, (void *) cpy->src, cpy->len);
     197           0 :       vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
     198           0 :       copy_len -= 1;
     199           0 :       cpy += 1;
     200             :     }
     201           0 :   return 0;
     202             : }
     203             : 
     204             : static_always_inline void
     205           0 : vhost_user_handle_tx_offload (vhost_user_intf_t *vui, vlib_buffer_t *b,
     206             :                               vnet_virtio_net_hdr_t *hdr)
     207             : {
     208           0 :   generic_header_offset_t gho = { 0 };
     209           0 :   int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
     210           0 :   int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
     211           0 :   vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
     212           0 :   u16 psh_cksum = 0;
     213           0 :   ip4_header_t *ip4 = 0;
     214           0 :   ip6_header_t *ip6 = 0;
     215             : 
     216           0 :   ASSERT (!(is_ip4 && is_ip6));
     217           0 :   vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6);
     218           0 :   if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
     219             :     {
     220           0 :       ip4 =
     221           0 :         (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
     222           0 :       ip4->checksum = ip4_header_checksum (ip4);
     223           0 :       psh_cksum = ip4_pseudo_header_cksum (ip4);
     224             :     }
     225             :   else
     226             :     {
     227           0 :       ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
     228           0 :       psh_cksum = ip6_pseudo_header_cksum (ip6);
     229             :     }
     230             : 
     231             :   /* checksum offload */
     232           0 :   if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
     233             :     {
     234           0 :       udp_header_t *udp =
     235           0 :         (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
     236           0 :       udp->checksum = psh_cksum;
     237           0 :       hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
     238           0 :       hdr->csum_start = gho.l4_hdr_offset;
     239           0 :       hdr->csum_offset = offsetof (udp_header_t, checksum);
     240             :     }
     241           0 :   else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
     242             :     {
     243           0 :       tcp_header_t *tcp =
     244           0 :         (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
     245           0 :       tcp->checksum = psh_cksum;
     246           0 :       hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
     247           0 :       hdr->csum_start = gho.l4_hdr_offset;
     248           0 :       hdr->csum_offset = offsetof (tcp_header_t, checksum);
     249             :     }
     250             : 
     251             :   /* GSO offload */
     252           0 :   if (b->flags & VNET_BUFFER_F_GSO)
     253             :     {
     254           0 :       if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
     255             :         {
     256           0 :           if (is_ip4 &&
     257           0 :               (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4)))
     258             :             {
     259           0 :               hdr->gso_size = vnet_buffer2 (b)->gso_size;
     260           0 :               hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
     261             :             }
     262           0 :           else if (is_ip6 &&
     263           0 :                    (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6)))
     264             :             {
     265           0 :               hdr->gso_size = vnet_buffer2 (b)->gso_size;
     266           0 :               hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
     267             :             }
     268             :         }
     269           0 :       else if ((vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO)) &&
     270           0 :                (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
     271             :         {
     272           0 :           hdr->gso_size = vnet_buffer2 (b)->gso_size;
     273           0 :           hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
     274             :         }
     275             :     }
     276           0 : }
     277             : 
     278             : static_always_inline void
     279           0 : vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_intf_t * vui,
     280             :                                 vhost_user_vring_t * rxvq,
     281             :                                 u16 * n_descs_processed, u8 chained,
     282             :                                 vlib_frame_t * frame, u32 n_left)
     283             : {
     284             :   u16 desc_idx, flags;
     285           0 :   vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
     286           0 :   u16 last_used_idx = rxvq->last_used_idx;
     287             : 
     288           0 :   if (PREDICT_FALSE (*n_descs_processed == 0))
     289           0 :     return;
     290             : 
     291           0 :   if (rxvq->used_wrap_counter)
     292           0 :     flags = desc_table[last_used_idx & rxvq->qsz_mask].flags |
     293             :       (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
     294             :   else
     295           0 :     flags = desc_table[last_used_idx & rxvq->qsz_mask].flags &
     296             :       ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
     297             : 
     298           0 :   vhost_user_advance_last_used_idx (rxvq);
     299             : 
     300           0 :   for (desc_idx = 1; desc_idx < *n_descs_processed; desc_idx++)
     301             :     {
     302           0 :       if (rxvq->used_wrap_counter)
     303           0 :         desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags |=
     304             :           (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
     305             :       else
     306           0 :         desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &=
     307             :           ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
     308           0 :       vhost_user_advance_last_used_idx (rxvq);
     309             :     }
     310             : 
     311           0 :   desc_table[last_used_idx & rxvq->qsz_mask].flags = flags;
     312             : 
     313           0 :   *n_descs_processed = 0;
     314             : 
     315           0 :   if (chained)
     316             :     {
     317           0 :       vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
     318             : 
     319           0 :       while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &
     320             :              VRING_DESC_F_NEXT)
     321           0 :         vhost_user_advance_last_used_idx (rxvq);
     322             : 
     323             :       /* Advance past the current chained table entries */
     324           0 :       vhost_user_advance_last_used_idx (rxvq);
     325             :     }
     326             : 
     327             :   /* interrupt (call) handling */
     328           0 :   if ((rxvq->callfd_idx != ~0) &&
     329           0 :       (rxvq->avail_event->flags != VRING_EVENT_F_DISABLE))
     330             :     {
     331           0 :       vhost_user_main_t *vum = &vhost_user_main;
     332             : 
     333           0 :       rxvq->n_since_last_int += frame->n_vectors - n_left;
     334           0 :       if (rxvq->n_since_last_int > vum->coalesce_frames)
     335           0 :         vhost_user_send_call (vm, vui, rxvq);
     336             :     }
     337             : }
     338             : 
     339             : static_always_inline void
     340           0 : vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
     341             :                             u16 qid, vlib_buffer_t * b,
     342             :                             vhost_user_vring_t * rxvq)
     343             : {
     344           0 :   vhost_user_main_t *vum = &vhost_user_main;
     345           0 :   u32 last_avail_idx = rxvq->last_avail_idx;
     346           0 :   u32 desc_current = last_avail_idx & rxvq->qsz_mask;
     347           0 :   vnet_virtio_vring_packed_desc_t *hdr_desc = 0;
     348           0 :   u32 hint = 0;
     349             : 
     350           0 :   clib_memset (t, 0, sizeof (*t));
     351           0 :   t->device_index = vui - vum->vhost_user_interfaces;
     352           0 :   t->qid = qid;
     353             : 
     354           0 :   hdr_desc = &rxvq->packed_desc[desc_current];
     355           0 :   if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
     356             :     {
     357           0 :       t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
     358             :       /* Header is the first here */
     359           0 :       hdr_desc = map_guest_mem (vui, rxvq->packed_desc[desc_current].addr,
     360             :                                 &hint);
     361             :     }
     362           0 :   if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
     363             :     {
     364           0 :       t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
     365             :     }
     366           0 :   if (!(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
     367           0 :       !(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
     368             :     {
     369           0 :       t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
     370             :     }
     371             : 
     372           0 :   t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
     373           0 : }
     374             : 
     375             : static_always_inline uword
     376           0 : vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
     377             :                                 vlib_frame_t *frame, vhost_user_intf_t *vui,
     378             :                                 vhost_user_vring_t *rxvq)
     379             : {
     380           0 :   u32 *buffers = vlib_frame_vector_args (frame);
     381           0 :   u32 n_left = frame->n_vectors;
     382           0 :   vhost_user_main_t *vum = &vhost_user_main;
     383           0 :   u32 qid = rxvq->qid;
     384             :   u8 error;
     385           0 :   u32 thread_index = vm->thread_index;
     386           0 :   vhost_cpu_t *cpu = &vum->cpus[thread_index];
     387           0 :   u32 map_hint = 0;
     388           0 :   u8 retry = 8;
     389             :   u16 copy_len;
     390             :   u16 tx_headers_len;
     391             :   vnet_virtio_vring_packed_desc_t *desc_table;
     392             :   u32 or_flags;
     393             :   u16 desc_head, desc_index, desc_len;
     394             :   u16 n_descs_processed;
     395             :   u8 indirect, chained;
     396             : 
     397           0 : retry:
     398           0 :   error = VHOST_USER_TX_FUNC_ERROR_NONE;
     399           0 :   tx_headers_len = 0;
     400           0 :   copy_len = 0;
     401           0 :   n_descs_processed = 0;
     402             : 
     403           0 :   while (n_left > 0)
     404             :     {
     405             :       vlib_buffer_t *b0, *current_b0;
     406             :       uword buffer_map_addr;
     407             :       u32 buffer_len;
     408             :       u16 bytes_left;
     409           0 :       u32 total_desc_len = 0;
     410           0 :       u16 n_entries = 0;
     411             : 
     412           0 :       indirect = 0;
     413           0 :       chained = 0;
     414           0 :       if (PREDICT_TRUE (n_left > 1))
     415           0 :         vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
     416             : 
     417           0 :       b0 = vlib_get_buffer (vm, buffers[0]);
     418           0 :       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
     419             :         {
     420           0 :           cpu->current_trace = vlib_add_trace (vm, node, b0,
     421             :                                                sizeof (*cpu->current_trace));
     422           0 :           vhost_user_tx_trace_packed (cpu->current_trace, vui, qid / 2, b0,
     423             :                                       rxvq);
     424             :         }
     425             : 
     426           0 :       desc_table = rxvq->packed_desc;
     427           0 :       desc_head = desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
     428           0 :       if (PREDICT_FALSE (!vhost_user_packed_desc_available (rxvq, desc_head)))
     429             :         {
     430           0 :           error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
     431           0 :           goto done;
     432             :         }
     433             :       /*
     434             :        * Go deeper in case of indirect descriptor.
     435             :        * To test it, turn off mrg_rxbuf.
     436             :        */
     437           0 :       if (desc_table[desc_head].flags & VRING_DESC_F_INDIRECT)
     438             :         {
     439           0 :           indirect = 1;
     440           0 :           if (PREDICT_FALSE (desc_table[desc_head].len <
     441             :                              sizeof (vnet_virtio_vring_packed_desc_t)))
     442             :             {
     443           0 :               error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
     444           0 :               goto done;
     445             :             }
     446           0 :           n_entries = desc_table[desc_head].len >> 4;
     447           0 :           desc_table = map_guest_mem (vui, desc_table[desc_index].addr,
     448             :                                       &map_hint);
     449           0 :           if (PREDICT_FALSE (desc_table == 0))
     450             :             {
     451           0 :               error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
     452           0 :               goto done;
     453             :             }
     454           0 :           desc_index = 0;
     455             :         }
     456           0 :       else if (rxvq->packed_desc[desc_head].flags & VRING_DESC_F_NEXT)
     457           0 :         chained = 1;
     458             : 
     459           0 :       desc_len = vui->virtio_net_hdr_sz;
     460           0 :       buffer_map_addr = desc_table[desc_index].addr;
     461           0 :       buffer_len = desc_table[desc_index].len;
     462             : 
     463             :       /* Get a header from the header array */
     464           0 :       vnet_virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
     465           0 :       tx_headers_len++;
     466           0 :       hdr->hdr.flags = 0;
     467           0 :       hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
     468           0 :       hdr->num_buffers = 1;
     469             : 
     470           0 :       or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
     471             : 
     472             :       /* Guest supports csum offload and buffer requires checksum offload? */
     473           0 :       if (or_flags &&
     474           0 :           (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
     475           0 :         vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
     476             : 
     477             :       /* Prepare a copy order executed later for the header */
     478           0 :       ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
     479           0 :       vhost_copy_t *cpy = &cpu->copy[copy_len];
     480           0 :       copy_len++;
     481           0 :       cpy->len = vui->virtio_net_hdr_sz;
     482           0 :       cpy->dst = buffer_map_addr;
     483           0 :       cpy->src = (uword) hdr;
     484             : 
     485           0 :       buffer_map_addr += vui->virtio_net_hdr_sz;
     486           0 :       buffer_len -= vui->virtio_net_hdr_sz;
     487           0 :       bytes_left = b0->current_length;
     488           0 :       current_b0 = b0;
     489             :       while (1)
     490           0 :         {
     491           0 :           if (buffer_len == 0)
     492             :             {
     493             :               /* Get new output */
     494           0 :               if (chained)
     495             :                 {
     496             :                   /*
     497             :                    * Next one is chained
     498             :                    * Test it with both indirect and mrg_rxbuf off
     499             :                    */
     500           0 :                   if (PREDICT_FALSE (!(desc_table[desc_index].flags &
     501             :                                        VRING_DESC_F_NEXT)))
     502             :                     {
     503             :                       /*
     504             :                        * Last descriptor in chain.
     505             :                        * Dequeue queued descriptors for this packet
     506             :                        */
     507           0 :                       vhost_user_dequeue_chained_descs (rxvq,
     508             :                                                         &n_descs_processed);
     509           0 :                       error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
     510           0 :                       goto done;
     511             :                     }
     512           0 :                   vhost_user_advance_last_avail_idx (rxvq);
     513           0 :                   desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
     514           0 :                   n_descs_processed++;
     515           0 :                   buffer_map_addr = desc_table[desc_index].addr;
     516           0 :                   buffer_len = desc_table[desc_index].len;
     517           0 :                   total_desc_len += desc_len;
     518           0 :                   desc_len = 0;
     519             :                 }
     520           0 :               else if (indirect)
     521             :                 {
     522             :                   /*
     523             :                    * Indirect table
     524             :                    * Test it with mrg_rxnuf off
     525             :                    */
     526           0 :                   if (PREDICT_TRUE (n_entries > 0))
     527           0 :                     n_entries--;
     528             :                   else
     529             :                     {
     530             :                       /* Dequeue queued descriptors for this packet */
     531           0 :                       vhost_user_dequeue_chained_descs (rxvq,
     532             :                                                         &n_descs_processed);
     533           0 :                       error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
     534           0 :                       goto done;
     535             :                     }
     536           0 :                   total_desc_len += desc_len;
     537           0 :                   desc_index = (desc_index + 1) & rxvq->qsz_mask;
     538           0 :                   buffer_map_addr = desc_table[desc_index].addr;
     539           0 :                   buffer_len = desc_table[desc_index].len;
     540           0 :                   desc_len = 0;
     541             :                 }
     542           0 :               else if (vui->virtio_net_hdr_sz == 12)
     543             :                 {
     544             :                   /*
     545             :                    * MRG is available
     546             :                    * This is the default setting for the guest VM
     547             :                    */
     548           0 :                   vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
     549           0 :                     &cpu->tx_headers[tx_headers_len - 1];
     550             : 
     551           0 :                   desc_table[desc_index].len = desc_len;
     552           0 :                   vhost_user_advance_last_avail_idx (rxvq);
     553           0 :                   desc_head = desc_index =
     554           0 :                     rxvq->last_avail_idx & rxvq->qsz_mask;
     555           0 :                   hdr->num_buffers++;
     556           0 :                   n_descs_processed++;
     557           0 :                   desc_len = 0;
     558             : 
     559           0 :                   if (PREDICT_FALSE (!vhost_user_packed_desc_available
     560             :                                      (rxvq, desc_index)))
     561             :                     {
     562             :                       /* Dequeue queued descriptors for this packet */
     563           0 :                       vhost_user_dequeue_descs (rxvq, hdr,
     564             :                                                 &n_descs_processed);
     565           0 :                       error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
     566           0 :                       goto done;
     567             :                     }
     568             : 
     569           0 :                   buffer_map_addr = desc_table[desc_index].addr;
     570           0 :                   buffer_len = desc_table[desc_index].len;
     571             :                 }
     572             :               else
     573             :                 {
     574           0 :                   error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
     575           0 :                   goto done;
     576             :                 }
     577             :             }
     578             : 
     579           0 :           ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
     580           0 :           vhost_copy_t *cpy = &cpu->copy[copy_len];
     581           0 :           copy_len++;
     582           0 :           cpy->len = bytes_left;
     583           0 :           cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
     584           0 :           cpy->dst = buffer_map_addr;
     585           0 :           cpy->src = (uword) vlib_buffer_get_current (current_b0) +
     586           0 :             current_b0->current_length - bytes_left;
     587             : 
     588           0 :           bytes_left -= cpy->len;
     589           0 :           buffer_len -= cpy->len;
     590           0 :           buffer_map_addr += cpy->len;
     591           0 :           desc_len += cpy->len;
     592             : 
     593           0 :           clib_prefetch_load (&rxvq->packed_desc);
     594             : 
     595             :           /* Check if vlib buffer has more data. If not, get more or break */
     596           0 :           if (PREDICT_TRUE (!bytes_left))
     597             :             {
     598           0 :               if (PREDICT_FALSE
     599             :                   (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
     600             :                 {
     601           0 :                   current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
     602           0 :                   bytes_left = current_b0->current_length;
     603             :                 }
     604             :               else
     605             :                 {
     606             :                   /* End of packet */
     607           0 :                   break;
     608             :                 }
     609             :             }
     610             :         }
     611             : 
     612             :       /* Move from available to used ring */
     613           0 :       total_desc_len += desc_len;
     614           0 :       rxvq->packed_desc[desc_head].len = total_desc_len;
     615             : 
     616           0 :       vhost_user_advance_last_avail_table_idx (vui, rxvq, chained);
     617           0 :       n_descs_processed++;
     618             : 
     619           0 :       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
     620           0 :         cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
     621             : 
     622           0 :       n_left--;
     623             : 
     624             :       /*
     625             :        * Do the copy periodically to prevent
     626             :        * cpu->copy array overflow and corrupt memory
     627             :        */
     628           0 :       if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD) || chained)
     629             :         {
     630           0 :           if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
     631             :                                                  &map_hint)))
     632           0 :             vlib_error_count (vm, node->node_index,
     633             :                               VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
     634           0 :           copy_len = 0;
     635             : 
     636             :           /* give buffers back to driver */
     637           0 :           vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
     638             :                                           chained, frame, n_left);
     639             :         }
     640             : 
     641           0 :       buffers++;
     642             :     }
     643             : 
     644           0 : done:
     645           0 :   if (PREDICT_TRUE (copy_len))
     646             :     {
     647           0 :       if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
     648             :                                              &map_hint)))
     649           0 :         vlib_error_count (vm, node->node_index,
     650             :                           VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
     651             : 
     652           0 :       vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
     653             :                                       chained, frame, n_left);
     654             :     }
     655             : 
     656             :   /*
     657             :    * When n_left is set, error is always set to something too.
     658             :    * In case error is due to lack of remaining buffers, we go back up and
     659             :    * retry.
     660             :    * The idea is that it is better to waste some time on packets
     661             :    * that have been processed already than dropping them and get
     662             :    * more fresh packets with a good likelyhood that they will be dropped too.
     663             :    * This technique also gives more time to VM driver to pick-up packets.
     664             :    * In case the traffic flows from physical to virtual interfaces, this
     665             :    * technique will end-up leveraging the physical NIC buffer in order to
     666             :    * absorb the VM's CPU jitter.
     667             :    */
     668           0 :   if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
     669             :     {
     670           0 :       retry--;
     671           0 :       goto retry;
     672             :     }
     673             : 
     674           0 :   clib_spinlock_unlock (&rxvq->vring_lock);
     675             : 
     676           0 :   if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
     677             :     {
     678           0 :       vlib_error_count (vm, node->node_index, error, n_left);
     679           0 :       vlib_increment_simple_counter
     680             :         (vnet_main.interface_main.sw_if_counters +
     681             :          VNET_INTERFACE_COUNTER_DROP, thread_index, vui->sw_if_index, n_left);
     682             :     }
     683             : 
     684           0 :   vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
     685           0 :   return frame->n_vectors;
     686             : }
     687             : 
     688        2236 : VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
     689             :                                                    vlib_node_runtime_t *
     690             :                                                    node, vlib_frame_t * frame)
     691             : {
     692           0 :   u32 *buffers = vlib_frame_vector_args (frame);
     693           0 :   u32 n_left = frame->n_vectors;
     694           0 :   vhost_user_main_t *vum = &vhost_user_main;
     695           0 :   vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
     696           0 :   vhost_user_intf_t *vui =
     697           0 :     pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
     698             :   u32 qid;
     699             :   vhost_user_vring_t *rxvq;
     700             :   u8 error;
     701           0 :   u32 thread_index = vm->thread_index;
     702           0 :   vhost_cpu_t *cpu = &vum->cpus[thread_index];
     703           0 :   u32 map_hint = 0;
     704           0 :   u8 retry = 8;
     705             :   u16 copy_len;
     706             :   u16 tx_headers_len;
     707             :   u32 or_flags;
     708           0 :   vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
     709             : 
     710           0 :   if (PREDICT_FALSE (!vui->admin_up))
     711             :     {
     712           0 :       error = VHOST_USER_TX_FUNC_ERROR_DOWN;
     713           0 :       goto done3;
     714             :     }
     715             : 
     716           0 :   if (PREDICT_FALSE (!vui->is_ready))
     717             :     {
     718           0 :       error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
     719           0 :       goto done3;
     720             :     }
     721             : 
     722           0 :   qid = VHOST_VRING_IDX_RX (tf->queue_id);
     723           0 :   rxvq = &vui->vrings[qid];
     724           0 :   ASSERT (tf->queue_id == rxvq->qid);
     725             : 
     726           0 :   if (PREDICT_FALSE (rxvq->avail == 0))
     727             :     {
     728           0 :       error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
     729           0 :       goto done3;
     730             :     }
     731           0 :   if (tf->shared_queue)
     732           0 :     clib_spinlock_lock (&rxvq->vring_lock);
     733             : 
     734           0 :   if (vhost_user_is_packed_ring_supported (vui))
     735           0 :     return (vhost_user_device_class_packed (vm, node, frame, vui, rxvq));
     736             : 
     737           0 : retry:
     738           0 :   error = VHOST_USER_TX_FUNC_ERROR_NONE;
     739           0 :   tx_headers_len = 0;
     740           0 :   copy_len = 0;
     741           0 :   while (n_left > 0)
     742             :     {
     743             :       vlib_buffer_t *b0, *current_b0;
     744             :       u16 desc_head, desc_index, desc_len;
     745             :       vnet_virtio_vring_desc_t *desc_table;
     746             :       uword buffer_map_addr;
     747             :       u32 buffer_len;
     748             :       u16 bytes_left;
     749             : 
     750           0 :       if (PREDICT_TRUE (n_left > 1))
     751           0 :         vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
     752             : 
     753           0 :       b0 = vlib_get_buffer (vm, buffers[0]);
     754             : 
     755           0 :       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
     756             :         {
     757           0 :           cpu->current_trace = vlib_add_trace (vm, node, b0,
     758             :                                                sizeof (*cpu->current_trace));
     759           0 :           vhost_user_tx_trace (cpu->current_trace, vui, qid / 2, b0, rxvq);
     760             :         }
     761             : 
     762           0 :       if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
     763             :         {
     764           0 :           error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
     765           0 :           goto done;
     766             :         }
     767             : 
     768           0 :       desc_table = rxvq->desc;
     769           0 :       desc_head = desc_index =
     770           0 :         rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
     771             : 
     772             :       /* Go deeper in case of indirect descriptor
     773             :        * I don't know of any driver providing indirect for RX. */
     774           0 :       if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
     775             :         {
     776           0 :           if (PREDICT_FALSE (rxvq->desc[desc_head].len <
     777             :                              sizeof (vnet_virtio_vring_desc_t)))
     778             :             {
     779           0 :               error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
     780           0 :               goto done;
     781             :             }
     782           0 :           if (PREDICT_FALSE
     783             :               (!(desc_table =
     784             :                  map_guest_mem (vui, rxvq->desc[desc_index].addr,
     785             :                                 &map_hint))))
     786             :             {
     787           0 :               error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
     788           0 :               goto done;
     789             :             }
     790           0 :           desc_index = 0;
     791             :         }
     792             : 
     793           0 :       desc_len = vui->virtio_net_hdr_sz;
     794           0 :       buffer_map_addr = desc_table[desc_index].addr;
     795           0 :       buffer_len = desc_table[desc_index].len;
     796             : 
     797             :       {
     798             :         // Get a header from the header array
     799           0 :         vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
     800           0 :           &cpu->tx_headers[tx_headers_len];
     801           0 :         tx_headers_len++;
     802           0 :         hdr->hdr.flags = 0;
     803           0 :         hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
     804           0 :         hdr->num_buffers = 1;        //This is local, no need to check
     805             : 
     806           0 :         or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
     807             : 
     808             :         /* Guest supports csum offload and buffer requires checksum offload? */
     809           0 :         if (or_flags
     810           0 :             && (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
     811           0 :           vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
     812             : 
     813             :         // Prepare a copy order executed later for the header
     814           0 :         ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
     815           0 :         vhost_copy_t *cpy = &cpu->copy[copy_len];
     816           0 :         copy_len++;
     817           0 :         cpy->len = vui->virtio_net_hdr_sz;
     818           0 :         cpy->dst = buffer_map_addr;
     819           0 :         cpy->src = (uword) hdr;
     820             :       }
     821             : 
     822           0 :       buffer_map_addr += vui->virtio_net_hdr_sz;
     823           0 :       buffer_len -= vui->virtio_net_hdr_sz;
     824           0 :       bytes_left = b0->current_length;
     825           0 :       current_b0 = b0;
     826             :       while (1)
     827             :         {
     828           0 :           if (buffer_len == 0)
     829             :             {                   //Get new output
     830           0 :               if (desc_table[desc_index].flags & VRING_DESC_F_NEXT)
     831             :                 {
     832             :                   //Next one is chained
     833           0 :                   desc_index = desc_table[desc_index].next;
     834           0 :                   buffer_map_addr = desc_table[desc_index].addr;
     835           0 :                   buffer_len = desc_table[desc_index].len;
     836             :                 }
     837           0 :               else if (vui->virtio_net_hdr_sz == 12) //MRG is available
     838             :                 {
     839           0 :                   vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
     840           0 :                     &cpu->tx_headers[tx_headers_len - 1];
     841             : 
     842             :                   //Move from available to used buffer
     843           0 :                   rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
     844             :                     desc_head;
     845           0 :                   rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
     846             :                     desc_len;
     847           0 :                   vhost_user_log_dirty_ring (vui, rxvq,
     848             :                                              ring[rxvq->last_used_idx &
     849             :                                                   rxvq->qsz_mask]);
     850             : 
     851           0 :                   rxvq->last_avail_idx++;
     852           0 :                   rxvq->last_used_idx++;
     853           0 :                   hdr->num_buffers++;
     854           0 :                   desc_len = 0;
     855             : 
     856           0 :                   if (PREDICT_FALSE
     857             :                       (rxvq->last_avail_idx == rxvq->avail->idx))
     858             :                     {
     859             :                       //Dequeue queued descriptors for this packet
     860           0 :                       rxvq->last_used_idx -= hdr->num_buffers - 1;
     861           0 :                       rxvq->last_avail_idx -= hdr->num_buffers - 1;
     862           0 :                       error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
     863           0 :                       goto done;
     864             :                     }
     865             : 
     866           0 :                   desc_table = rxvq->desc;
     867           0 :                   desc_head = desc_index =
     868           0 :                     rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
     869           0 :                   if (PREDICT_FALSE
     870             :                       (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
     871             :                     {
     872             :                       //It is seriously unlikely that a driver will put indirect descriptor
     873             :                       //after non-indirect descriptor.
     874           0 :                       if (PREDICT_FALSE (rxvq->desc[desc_head].len <
     875             :                                          sizeof (vnet_virtio_vring_desc_t)))
     876             :                         {
     877           0 :                           error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
     878           0 :                           goto done;
     879             :                         }
     880           0 :                       if (PREDICT_FALSE
     881             :                           (!(desc_table =
     882             :                              map_guest_mem (vui,
     883             :                                             rxvq->desc[desc_index].addr,
     884             :                                             &map_hint))))
     885             :                         {
     886           0 :                           error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
     887           0 :                           goto done;
     888             :                         }
     889           0 :                       desc_index = 0;
     890             :                     }
     891           0 :                   buffer_map_addr = desc_table[desc_index].addr;
     892           0 :                   buffer_len = desc_table[desc_index].len;
     893             :                 }
     894             :               else
     895             :                 {
     896           0 :                   error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
     897           0 :                   goto done;
     898             :                 }
     899             :             }
     900             : 
     901             :           {
     902           0 :             ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
     903           0 :             vhost_copy_t *cpy = &cpu->copy[copy_len];
     904           0 :             copy_len++;
     905           0 :             cpy->len = bytes_left;
     906           0 :             cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
     907           0 :             cpy->dst = buffer_map_addr;
     908           0 :             cpy->src = (uword) vlib_buffer_get_current (current_b0) +
     909           0 :               current_b0->current_length - bytes_left;
     910             : 
     911           0 :             bytes_left -= cpy->len;
     912           0 :             buffer_len -= cpy->len;
     913           0 :             buffer_map_addr += cpy->len;
     914           0 :             desc_len += cpy->len;
     915             : 
     916           0 :             clib_prefetch_load (&rxvq->desc);
     917             :           }
     918             : 
     919             :           // Check if vlib buffer has more data. If not, get more or break.
     920           0 :           if (PREDICT_TRUE (!bytes_left))
     921             :             {
     922           0 :               if (PREDICT_FALSE
     923             :                   (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
     924             :                 {
     925           0 :                   current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
     926           0 :                   bytes_left = current_b0->current_length;
     927             :                 }
     928             :               else
     929             :                 {
     930             :                   //End of packet
     931           0 :                   break;
     932             :                 }
     933             :             }
     934             :         }
     935             : 
     936             :       //Move from available to used ring
     937           0 :       rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
     938           0 :       rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
     939           0 :       vhost_user_log_dirty_ring (vui, rxvq,
     940             :                                  ring[rxvq->last_used_idx & rxvq->qsz_mask]);
     941           0 :       rxvq->last_avail_idx++;
     942           0 :       rxvq->last_used_idx++;
     943             : 
     944           0 :       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
     945             :         {
     946           0 :           cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
     947             :         }
     948             : 
     949           0 :       n_left--;                 //At the end for error counting when 'goto done' is invoked
     950             : 
     951             :       /*
     952             :        * Do the copy periodically to prevent
     953             :        * cpu->copy array overflow and corrupt memory
     954             :        */
     955           0 :       if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
     956             :         {
     957           0 :           if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
     958             :                                                  &map_hint)))
     959             :             {
     960           0 :               vlib_error_count (vm, node->node_index,
     961             :                                 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
     962             :             }
     963           0 :           copy_len = 0;
     964             : 
     965             :           /* give buffers back to driver */
     966           0 :           CLIB_MEMORY_BARRIER ();
     967           0 :           rxvq->used->idx = rxvq->last_used_idx;
     968           0 :           vhost_user_log_dirty_ring (vui, rxvq, idx);
     969             :         }
     970           0 :       buffers++;
     971             :     }
     972             : 
     973           0 : done:
     974             :   //Do the memory copies
     975           0 :   if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
     976             :                                          &map_hint)))
     977             :     {
     978           0 :       vlib_error_count (vm, node->node_index,
     979             :                         VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
     980             :     }
     981             : 
     982           0 :   CLIB_MEMORY_BARRIER ();
     983           0 :   rxvq->used->idx = rxvq->last_used_idx;
     984           0 :   vhost_user_log_dirty_ring (vui, rxvq, idx);
     985             : 
     986             :   /*
     987             :    * When n_left is set, error is always set to something too.
     988             :    * In case error is due to lack of remaining buffers, we go back up and
     989             :    * retry.
     990             :    * The idea is that it is better to waste some time on packets
     991             :    * that have been processed already than dropping them and get
     992             :    * more fresh packets with a good likelihood that they will be dropped too.
     993             :    * This technique also gives more time to VM driver to pick-up packets.
     994             :    * In case the traffic flows from physical to virtual interfaces, this
     995             :    * technique will end-up leveraging the physical NIC buffer in order to
     996             :    * absorb the VM's CPU jitter.
     997             :    */
     998           0 :   if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
     999             :     {
    1000           0 :       retry--;
    1001           0 :       goto retry;
    1002             :     }
    1003             : 
    1004             :   /* interrupt (call) handling */
    1005           0 :   if ((rxvq->callfd_idx != ~0) &&
    1006           0 :       !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
    1007             :     {
    1008           0 :       rxvq->n_since_last_int += frame->n_vectors - n_left;
    1009             : 
    1010           0 :       if (rxvq->n_since_last_int > vum->coalesce_frames)
    1011           0 :         vhost_user_send_call (vm, vui, rxvq);
    1012             :     }
    1013             : 
    1014           0 :   clib_spinlock_unlock (&rxvq->vring_lock);
    1015             : 
    1016           0 : done3:
    1017           0 :   if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
    1018             :     {
    1019           0 :       vlib_error_count (vm, node->node_index, error, n_left);
    1020           0 :       vlib_increment_simple_counter
    1021             :         (vnet_main.interface_main.sw_if_counters
    1022             :          + VNET_INTERFACE_COUNTER_DROP,
    1023             :          thread_index, vui->sw_if_index, n_left);
    1024             :     }
    1025             : 
    1026           0 :   vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
    1027           0 :   return frame->n_vectors;
    1028             : }
    1029             : 
    1030             : static __clib_unused clib_error_t *
    1031           0 : vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
    1032             :                                      u32 qid, vnet_hw_if_rx_mode mode)
    1033             : {
    1034           0 :   vlib_main_t *vm = vnm->vlib_main;
    1035           0 :   vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
    1036           0 :   vhost_user_main_t *vum = &vhost_user_main;
    1037           0 :   vhost_user_intf_t *vui =
    1038           0 :     pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
    1039           0 :   vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
    1040             :   vhost_cpu_t *cpu;
    1041             : 
    1042           0 :   if (mode == txvq->mode)
    1043           0 :     return 0;
    1044             : 
    1045           0 :   if ((mode != VNET_HW_IF_RX_MODE_POLLING) &&
    1046           0 :       (mode != VNET_HW_IF_RX_MODE_ADAPTIVE) &&
    1047             :       (mode != VNET_HW_IF_RX_MODE_INTERRUPT))
    1048             :     {
    1049           0 :       vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
    1050             :                   hw_if_index, qid);
    1051           0 :       return clib_error_return (0, "unsupported");
    1052             :     }
    1053             : 
    1054           0 :   if (txvq->thread_index == ~0)
    1055           0 :     return clib_error_return (0, "Queue initialization is not finished yet");
    1056             : 
    1057           0 :   cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
    1058           0 :   if ((mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
    1059             :       (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
    1060             :     {
    1061           0 :       if (txvq->kickfd_idx == ~0)
    1062             :         {
    1063             :           // We cannot support interrupt mode if the driver opts out
    1064           0 :           return clib_error_return (0, "Driver does not support interrupt");
    1065             :         }
    1066           0 :       if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
    1067             :         {
    1068           0 :           ASSERT (cpu->polling_q_count != 0);
    1069           0 :           if (cpu->polling_q_count)
    1070           0 :             cpu->polling_q_count--;
    1071           0 :           vum->ifq_count++;
    1072             :           // Start the timer if this is the first encounter on interrupt
    1073             :           // interface/queue
    1074           0 :           if ((vum->ifq_count == 1) &&
    1075           0 :               ((vum->coalesce_time > 0.0) || (vum->coalesce_frames > 0)))
    1076           0 :             vlib_process_signal_event (vm,
    1077           0 :                                        vhost_user_send_interrupt_node.index,
    1078             :                                        VHOST_USER_EVENT_START_TIMER, 0);
    1079             :         }
    1080             :     }
    1081           0 :   else if (mode == VNET_HW_IF_RX_MODE_POLLING)
    1082             :     {
    1083           0 :       if (((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
    1084           0 :            (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) && vum->ifq_count)
    1085             :         {
    1086           0 :           cpu->polling_q_count++;
    1087           0 :           vum->ifq_count--;
    1088             :           // Stop the timer if there is no more interrupt interface/queue
    1089           0 :           if (vum->ifq_count == 0)
    1090           0 :             vlib_process_signal_event (vm,
    1091           0 :                                        vhost_user_send_interrupt_node.index,
    1092             :                                        VHOST_USER_EVENT_STOP_TIMER, 0);
    1093             :         }
    1094             :     }
    1095             : 
    1096           0 :   txvq->mode = mode;
    1097           0 :   vhost_user_set_operation_mode (vui, txvq);
    1098             : 
    1099           0 :   return 0;
    1100             : }
    1101             : 
    1102             : static __clib_unused clib_error_t *
    1103           6 : vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
    1104             :                                     u32 flags)
    1105             : {
    1106           6 :   vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
    1107           6 :   vhost_user_main_t *vum = &vhost_user_main;
    1108           6 :   vhost_user_intf_t *vui =
    1109           6 :     pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
    1110             :   u8 link_old, link_new;
    1111             : 
    1112           6 :   link_old = vui_is_link_up (vui);
    1113             : 
    1114           6 :   vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
    1115             : 
    1116           6 :   link_new = vui_is_link_up (vui);
    1117             : 
    1118           6 :   if (link_old != link_new)
    1119           0 :     vnet_hw_interface_set_flags (vnm, vui->hw_if_index, link_new ?
    1120             :                                  VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
    1121             : 
    1122           6 :   return /* no error */ 0;
    1123             : }
    1124             : 
    1125             : /* *INDENT-OFF* */
    1126        2239 : VNET_DEVICE_CLASS (vhost_user_device_class) = {
    1127             :   .name = "vhost-user",
    1128             :   .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
    1129             :   .tx_function_error_strings = vhost_user_tx_func_error_strings,
    1130             :   .format_device_name = format_vhost_user_interface_name,
    1131             :   .name_renumber = vhost_user_name_renumber,
    1132             :   .admin_up_down_function = vhost_user_interface_admin_up_down,
    1133             :   .rx_mode_change_function = vhost_user_interface_rx_mode_change,
    1134             :   .format_tx_trace = format_vhost_trace,
    1135             : };
    1136             : 
    1137             : /* *INDENT-ON* */
    1138             : 
    1139             : /*
    1140             :  * fd.io coding-style-patch-verification: ON
    1141             :  *
    1142             :  * Local Variables:
    1143             :  * eval: (c-set-style "gnu")
    1144             :  * End:
    1145             :  */

Generated by: LCOV version 1.14