LCOV - code coverage report
Current view: top level - plugins/af_packet - node.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 281 420 66.9 %
Date: 2023-07-05 22:20:52 Functions: 13 17 76.5 %

          Line data    Source code
       1             : /*
       2             :  *------------------------------------------------------------------
       3             :  * af_packet.c - linux kernel packet interface
       4             :  *
       5             :  * Copyright (c) 2016 Cisco and/or its affiliates.
       6             :  * Licensed under the Apache License, Version 2.0 (the "License");
       7             :  * you may not use this file except in compliance with the License.
       8             :  * You may obtain a copy of the License at:
       9             :  *
      10             :  *     http://www.apache.org/licenses/LICENSE-2.0
      11             :  *
      12             :  * Unless required by applicable law or agreed to in writing, software
      13             :  * distributed under the License is distributed on an "AS IS" BASIS,
      14             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      15             :  * See the License for the specific language governing permissions and
      16             :  * limitations under the License.
      17             :  *------------------------------------------------------------------
      18             :  */
      19             : 
      20             : #include <linux/if_packet.h>
      21             : 
      22             : #include <vlib/vlib.h>
      23             : #include <vlib/unix/unix.h>
      24             : #include <vnet/ip/ip.h>
      25             : #include <vnet/ethernet/ethernet.h>
      26             : #include <vnet/interface/rx_queue_funcs.h>
      27             : #include <vnet/feature/feature.h>
      28             : #include <vnet/ethernet/packet.h>
      29             : 
      30             : #include <af_packet/af_packet.h>
      31             : #include <vnet/devices/virtio/virtio_std.h>
      32             : 
      33             : #define foreach_af_packet_input_error                                         \
      34             :   _ (PARTIAL_PKT, "partial packet")                                           \
      35             :   _ (TIMEDOUT_BLK, "timed out block")                                         \
      36             :   _ (TOTAL_RECV_BLK, "total received block")
      37             : typedef enum
      38             : {
      39             : #define _(f,s) AF_PACKET_INPUT_ERROR_##f,
      40             :   foreach_af_packet_input_error
      41             : #undef _
      42             :     AF_PACKET_INPUT_N_ERROR,
      43             : } af_packet_input_error_t;
      44             : 
      45             : static char *af_packet_input_error_strings[] = {
      46             : #define _(n,s) s,
      47             :   foreach_af_packet_input_error
      48             : #undef _
      49             : };
      50             : 
      51             : typedef struct
      52             : {
      53             :   u32 next_index;
      54             :   u32 hw_if_index;
      55             :   u16 queue_id;
      56             :   int block;
      57             :   u32 pkt_num;
      58             :   void *block_start;
      59             :   block_desc_t bd;
      60             :   union
      61             :   {
      62             :     tpacket3_hdr_t tph3;
      63             :     tpacket2_hdr_t tph2;
      64             :   };
      65             :   vnet_virtio_net_hdr_t vnet_hdr;
      66             :   u8 is_v3;
      67             : } af_packet_input_trace_t;
      68             : 
      69             : static u8 *
      70           0 : format_af_packet_input_trace (u8 * s, va_list * args)
      71             : {
      72           0 :   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
      73           0 :   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
      74           0 :   af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *);
      75           0 :   u32 indent = format_get_indent (s);
      76             : 
      77           0 :   s = format (s, "af_packet: hw_if_index %d rx-queue %u next-index %d",
      78           0 :               t->hw_if_index, t->queue_id, t->next_index);
      79             : 
      80           0 :   if (t->is_v3)
      81             :     {
      82           0 :       s = format (
      83             :         s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u",
      84             :         format_white_space, indent + 2, t->block, format_white_space,
      85             :         indent + 4, t->block_start, t->bd.version, t->bd.hdr.bh1.seq_num,
      86             :         t->pkt_num);
      87           0 :       s = format (
      88             :         s,
      89             :         "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
      90             :         "\n%Usec 0x%x nsec 0x%x vlan %U"
      91             : #ifdef TP_STATUS_VLAN_TPID_VALID
      92             :         " vlan_tpid %u"
      93             : #endif
      94             :         ,
      95             :         format_white_space, indent + 2, format_white_space, indent + 4,
      96           0 :         t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac,
      97           0 :         t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec,
      98             :         t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci
      99             : #ifdef TP_STATUS_VLAN_TPID_VALID
     100             :         ,
     101           0 :         t->tph3.hv1.tp_vlan_tpid
     102             : #endif
     103             :       );
     104             :     }
     105             :   else
     106             :     {
     107           0 :       s = format (
     108             :         s,
     109             :         "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
     110             :         "\n%Usec 0x%x nsec 0x%x vlan %U"
     111             : #ifdef TP_STATUS_VLAN_TPID_VALID
     112             :         " vlan_tpid %u"
     113             : #endif
     114             :         ,
     115             :         format_white_space, indent + 2, format_white_space, indent + 4,
     116           0 :         t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac,
     117           0 :         t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec,
     118           0 :         t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci
     119             : #ifdef TP_STATUS_VLAN_TPID_VALID
     120             :         ,
     121           0 :         t->tph2.tp_vlan_tpid
     122             : #endif
     123             :       );
     124             :     }
     125             : 
     126           0 :   s = format (s,
     127             :               "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u"
     128             :               "\n%Ugso_size %u csum_start %u csum_offset %u",
     129             :               format_white_space, indent + 2, format_white_space, indent + 4,
     130           0 :               t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len,
     131           0 :               format_white_space, indent + 4, t->vnet_hdr.gso_size,
     132           0 :               t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset);
     133           0 :   return s;
     134             : }
     135             : 
     136             : always_inline void
     137    32777800 : buffer_add_to_chain (vlib_buffer_t *b, vlib_buffer_t *first_b,
     138             :                      vlib_buffer_t *prev_b, u32 bi)
     139             : {
     140             :   /* update first buffer */
     141    32777800 :   first_b->total_length_not_including_first_buffer += b->current_length;
     142             : 
     143             :   /* update previous buffer */
     144    32777800 :   prev_b->next_buffer = bi;
     145    32777800 :   prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
     146             : 
     147             :   /* update current buffer */
     148    32777800 :   b->next_buffer = ~0;
     149    32777800 : }
     150             : 
     151             : static_always_inline void
     152      885213 : fill_gso_offload (vlib_buffer_t *b, u32 gso_size, u8 l4_hdr_sz)
     153             : {
     154      885213 :   b->flags |= VNET_BUFFER_F_GSO;
     155      885213 :   vnet_buffer2 (b)->gso_size = gso_size;
     156      885213 :   vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_sz;
     157      885213 : }
     158             : 
     159             : static_always_inline void
     160     2266660 : fill_cksum_offload (vlib_buffer_t *b, u8 *l4_hdr_sz, u8 is_ip)
     161             : {
     162     2266660 :   vnet_buffer_oflags_t oflags = 0;
     163     2266660 :   u16 l2hdr_sz = 0;
     164     2266660 :   u16 ethertype = 0;
     165     2266660 :   u8 l4_proto = 0;
     166             : 
     167     2266660 :   if (is_ip)
     168             :     {
     169           0 :       switch (b->data[0] & 0xf0)
     170             :         {
     171           0 :         case 0x40:
     172           0 :           ethertype = ETHERNET_TYPE_IP4;
     173           0 :           break;
     174           0 :         case 0x60:
     175           0 :           ethertype = ETHERNET_TYPE_IP6;
     176           0 :           break;
     177             :         }
     178           0 :     }
     179             :   else
     180             :     {
     181     2266660 :       ethernet_header_t *eth = (ethernet_header_t *) b->data;
     182     2266660 :       ethertype = clib_net_to_host_u16 (eth->type);
     183     2266660 :       l2hdr_sz = sizeof (ethernet_header_t);
     184     2266660 :       if (ethernet_frame_is_tagged (ethertype))
     185             :         {
     186           0 :           ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eth + 1);
     187             : 
     188           0 :           ethertype = clib_net_to_host_u16 (vlan->type);
     189           0 :           l2hdr_sz += sizeof (*vlan);
     190           0 :           if (ethertype == ETHERNET_TYPE_VLAN)
     191             :             {
     192           0 :               vlan++;
     193           0 :               ethertype = clib_net_to_host_u16 (vlan->type);
     194           0 :               l2hdr_sz += sizeof (*vlan);
     195             :             }
     196             :         }
     197             :     }
     198             : 
     199     2266660 :   vnet_buffer (b)->l2_hdr_offset = 0;
     200     2266660 :   vnet_buffer (b)->l3_hdr_offset = l2hdr_sz;
     201             : 
     202     2266660 :   if (ethertype == ETHERNET_TYPE_IP4)
     203             :     {
     204     1243170 :       ip4_header_t *ip4 = (ip4_header_t *) (b->data + l2hdr_sz);
     205     1243170 :       vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
     206     1243170 :       b->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
     207             :                    VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
     208             :                    VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
     209             : 
     210     1243170 :       l4_proto = ip4->protocol;
     211             :     }
     212     1023490 :   else if (ethertype == ETHERNET_TYPE_IP6)
     213             :     {
     214     1023490 :       ip6_header_t *ip6 = (ip6_header_t *) (b->data + l2hdr_sz);
     215     1023490 :       b->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
     216             :                    VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
     217             :                    VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
     218     1023490 :       u16 ip6_hdr_len = sizeof (ip6_header_t);
     219             : 
     220     1023490 :       if (ip6_ext_hdr (ip6->protocol))
     221             :         {
     222           0 :           ip6_ext_header_t *p = (void *) (ip6 + 1);
     223           0 :           ip6_hdr_len += ip6_ext_header_len (p);
     224           0 :           while (ip6_ext_hdr (p->next_hdr))
     225             :             {
     226           0 :               ip6_hdr_len += ip6_ext_header_len (p);
     227           0 :               p = ip6_ext_next_header (p);
     228             :             }
     229           0 :           l4_proto = p->next_hdr;
     230             :         }
     231             :       else
     232     1023490 :         l4_proto = ip6->protocol;
     233     1023490 :       vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip6_hdr_len;
     234             :     }
     235             : 
     236     2266660 :   if (l4_proto == IP_PROTOCOL_TCP)
     237             :     {
     238     2266660 :       oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
     239     2266660 :       tcp_header_t *tcp =
     240     2266660 :         (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
     241     2266660 :       *l4_hdr_sz = tcp_header_bytes (tcp);
     242             :     }
     243           0 :   else if (l4_proto == IP_PROTOCOL_UDP)
     244             :     {
     245           0 :       oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
     246           0 :       *l4_hdr_sz = sizeof (udp_header_t);
     247             :     }
     248             : 
     249     2266660 :   if (oflags)
     250     2266660 :     vnet_buffer_offload_flags_set (b, oflags);
     251     2266660 : }
     252             : 
     253             : always_inline uword
     254      661027 : af_packet_v3_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
     255             :                               vlib_frame_t *frame, af_packet_if_t *apif,
     256             :                               u16 queue_id, u8 is_cksum_gso_enabled)
     257             : {
     258      661027 :   af_packet_main_t *apm = &af_packet_main;
     259      661027 :   af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id);
     260             :   tpacket3_hdr_t *tph;
     261             :   u32 next_index;
     262             :   u32 n_free_bufs;
     263      661027 :   u32 n_rx_packets = 0;
     264      661027 :   u32 n_rx_bytes = 0;
     265      661027 :   u32 timedout_blk = 0;
     266      661027 :   u32 total = 0;
     267      661027 :   u32 *to_next = 0;
     268      661027 :   u32 block = rx_queue->next_rx_block;
     269      661027 :   u32 block_nr = rx_queue->rx_req->req3.tp_block_nr;
     270      661027 :   u8 *block_start = 0;
     271      661027 :   uword n_trace = vlib_get_trace_count (vm, node);
     272      661027 :   u32 thread_index = vm->thread_index;
     273      661027 :   u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
     274      661027 :   u32 min_bufs = rx_queue->rx_req->req3.tp_frame_size / n_buffer_bytes;
     275      661027 :   u32 num_pkts = 0;
     276      661027 :   u32 rx_frame_offset = 0;
     277      661027 :   block_desc_t *bd = 0;
     278      661027 :   vlib_buffer_t bt = {};
     279      661027 :   u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP);
     280             : 
     281      661027 :   if (is_ip)
     282           0 :     next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
     283             :   else
     284             :     {
     285      661027 :       next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
     286      661027 :       if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
     287           0 :         next_index = apif->per_interface_next_index;
     288             : 
     289             :       /* redirect if feature path enabled */
     290      661027 :       vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt);
     291             :     }
     292             : 
     293      661027 :   if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
     294      661027 :          ->hdr.bh1.block_status &
     295             :        TP_STATUS_USER) != 0)
     296             :     {
     297      539407 :       u32 n_required = 0;
     298      539407 :       bd = (block_desc_t *) block_start;
     299             : 
     300      539407 :       if (PREDICT_FALSE (rx_queue->is_rx_pending))
     301             :         {
     302           0 :           num_pkts = rx_queue->num_rx_pkts;
     303           0 :           rx_frame_offset = rx_queue->rx_frame_offset;
     304           0 :           rx_queue->is_rx_pending = 0;
     305             :         }
     306             :       else
     307             :         {
     308      539407 :           num_pkts = bd->hdr.bh1.num_pkts;
     309      539407 :           rx_frame_offset = bd->hdr.bh1.offset_to_first_pkt;
     310      539407 :           total++;
     311             : 
     312      539407 :           if (TP_STATUS_BLK_TMO & bd->hdr.bh1.block_status)
     313       25548 :             timedout_blk++;
     314             :         }
     315             : 
     316      539407 :       n_required = clib_max (num_pkts, VLIB_FRAME_SIZE);
     317      539407 :       n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
     318      539407 :       if (PREDICT_FALSE (n_free_bufs < n_required))
     319             :         {
     320       65923 :           vec_validate (apm->rx_buffers[thread_index],
     321             :                         n_required + n_free_bufs - 1);
     322      131846 :           n_free_bufs += vlib_buffer_alloc (
     323       65923 :             vm, &apm->rx_buffers[thread_index][n_free_bufs], n_required);
     324       65923 :           vec_set_len (apm->rx_buffers[thread_index], n_free_bufs);
     325             :         }
     326             : 
     327     1084310 :       while (num_pkts && (n_free_bufs >= min_bufs))
     328             :         {
     329      544901 :           u32 next0 = next_index;
     330             :           u32 n_left_to_next;
     331             : 
     332      544901 :           vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
     333             : 
     334     8743470 :           while (num_pkts && n_left_to_next && (n_free_bufs >= min_bufs))
     335             :             {
     336     8198570 :               tph = (tpacket3_hdr_t *) (block_start + rx_frame_offset);
     337             : 
     338     8198570 :               if (num_pkts > 1)
     339     7659160 :                 CLIB_PREFETCH (block_start + rx_frame_offset +
     340             :                                  tph->tp_next_offset,
     341             :                                2 * CLIB_CACHE_LINE_BYTES, LOAD);
     342             : 
     343     8198570 :               vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0;
     344     8198570 :               vnet_virtio_net_hdr_t *vnet_hdr = 0;
     345     8198570 :               u32 data_len = tph->tp_snaplen;
     346     8198570 :               u32 offset = 0;
     347     8198570 :               u32 bi0 = ~0, first_bi0 = ~0;
     348     8198570 :               u8 l4_hdr_sz = 0;
     349             : 
     350     8198570 :               if (is_cksum_gso_enabled)
     351      464572 :                 vnet_hdr =
     352      464572 :                   (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac -
     353             :                                              sizeof (vnet_virtio_net_hdr_t));
     354             : 
     355             :               // save current state and return
     356     8198570 :               if (PREDICT_FALSE (((data_len / n_buffer_bytes) + 1) >
     357             :                                  vec_len (apm->rx_buffers[thread_index])))
     358             :                 {
     359           0 :                   rx_queue->rx_frame_offset = rx_frame_offset;
     360           0 :                   rx_queue->num_rx_pkts = num_pkts;
     361           0 :                   rx_queue->is_rx_pending = 1;
     362           0 :                   vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     363           0 :                   goto done;
     364             :                 }
     365             : 
     366    28801600 :               while (data_len)
     367             :                 {
     368             :                   /* grab free buffer */
     369    20603100 :                   u32 last_empty_buffer =
     370    20603100 :                     vec_len (apm->rx_buffers[thread_index]) - 1;
     371    20603100 :                   bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
     372    20603100 :                   vec_set_len (apm->rx_buffers[thread_index],
     373             :                                last_empty_buffer);
     374    20603100 :                   n_free_bufs--;
     375             : 
     376             :                   /* copy data */
     377    20603100 :                   u32 bytes_to_copy =
     378             :                     data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
     379    20603100 :                   u32 vlan_len = 0;
     380    20603100 :                   u32 bytes_copied = 0;
     381             : 
     382    20603100 :                   b0 = vlib_get_buffer (vm, bi0);
     383    20603100 :                   b0->current_data = 0;
     384             : 
     385             :                   /* Kernel removes VLAN headers, so reconstruct VLAN */
     386    20603100 :                   if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
     387             :                     {
     388           0 :                       if (PREDICT_TRUE (offset == 0))
     389             :                         {
     390           0 :                           clib_memcpy_fast (vlib_buffer_get_current (b0),
     391           0 :                                             (u8 *) tph + tph->tp_mac,
     392             :                                             sizeof (ethernet_header_t));
     393             :                           ethernet_header_t *eth =
     394           0 :                             vlib_buffer_get_current (b0);
     395           0 :                           ethernet_vlan_header_t *vlan =
     396             :                             (ethernet_vlan_header_t *) (eth + 1);
     397           0 :                           vlan->priority_cfi_and_id =
     398           0 :                             clib_host_to_net_u16 (tph->hv1.tp_vlan_tci);
     399           0 :                           vlan->type = eth->type;
     400           0 :                           eth->type =
     401           0 :                             clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
     402           0 :                           vlan_len = sizeof (ethernet_vlan_header_t);
     403           0 :                           bytes_copied = sizeof (ethernet_header_t);
     404             :                         }
     405             :                     }
     406    41206100 :                   clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
     407    20603100 :                                       bytes_copied + vlan_len,
     408    20603100 :                                     (u8 *) tph + tph->tp_mac + offset +
     409             :                                       bytes_copied,
     410    20603100 :                                     (bytes_to_copy - bytes_copied));
     411             : 
     412             :                   /* fill buffer header */
     413    20603100 :                   b0->current_length = bytes_to_copy + vlan_len;
     414             : 
     415    20603100 :                   if (offset == 0)
     416             :                     {
     417     8198570 :                       b0->total_length_not_including_first_buffer = 0;
     418     8198570 :                       b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
     419     8198570 :                       vnet_buffer (b0)->sw_if_index[VLIB_RX] =
     420     8198570 :                         apif->sw_if_index;
     421     8198570 :                       vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0;
     422     8198570 :                       first_b0 = b0;
     423     8198570 :                       first_bi0 = bi0;
     424     8198570 :                       if (is_cksum_gso_enabled)
     425             :                         {
     426      464572 :                           if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
     427      464123 :                             fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip);
     428      464572 :                           if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 |
     429             :                                                     VIRTIO_NET_HDR_GSO_TCPV6))
     430      242386 :                             fill_gso_offload (first_b0, vnet_hdr->gso_size,
     431             :                                               l4_hdr_sz);
     432             :                         }
     433             :                     }
     434             :                   else
     435    12404500 :                     buffer_add_to_chain (b0, first_b0, prev_b0, bi0);
     436             : 
     437    20603100 :                   prev_b0 = b0;
     438    20603100 :                   offset += bytes_to_copy;
     439    20603100 :                   data_len -= bytes_to_copy;
     440             :                 }
     441     8198570 :               n_rx_packets++;
     442     8198570 :               n_rx_bytes += tph->tp_snaplen;
     443     8198570 :               to_next[0] = first_bi0;
     444     8198570 :               to_next += 1;
     445     8198570 :               n_left_to_next--;
     446             : 
     447             :               /* drop partial packets */
     448     8198570 :               if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
     449             :                 {
     450           0 :                   next0 = VNET_DEVICE_INPUT_NEXT_DROP;
     451           0 :                   first_b0->error =
     452           0 :                     node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
     453             :                 }
     454             :               else
     455             :                 {
     456     8198570 :                   if (PREDICT_FALSE (apif->mode == AF_PACKET_IF_MODE_IP))
     457             :                     {
     458           0 :                       switch (first_b0->data[0] & 0xf0)
     459             :                         {
     460           0 :                         case 0x40:
     461           0 :                           next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
     462           0 :                           break;
     463           0 :                         case 0x60:
     464           0 :                           next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
     465           0 :                           break;
     466           0 :                         default:
     467           0 :                           next0 = VNET_DEVICE_INPUT_NEXT_DROP;
     468           0 :                           break;
     469             :                         }
     470           0 :                       if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
     471           0 :                         next0 = apif->per_interface_next_index;
     472             :                     }
     473             :                   else
     474             :                     {
     475             :                       /* copy feature arc data from template */
     476     8198570 :                       first_b0->current_config_index = bt.current_config_index;
     477     8198570 :                       vnet_buffer (first_b0)->feature_arc_index =
     478     8198570 :                         vnet_buffer (&bt)->feature_arc_index;
     479             :                     }
     480             :                 }
     481             : 
     482             :               /* trace */
     483     8198570 :               if (PREDICT_FALSE (n_trace > 0 &&
     484             :                                  vlib_trace_buffer (vm, node, next0, first_b0,
     485             :                                                     /* follow_chain */ 0)))
     486             :                 {
     487             :                   af_packet_input_trace_t *tr;
     488           0 :                   vlib_set_trace_count (vm, node, --n_trace);
     489           0 :                   tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
     490           0 :                   tr->is_v3 = 1;
     491           0 :                   tr->next_index = next0;
     492           0 :                   tr->hw_if_index = apif->hw_if_index;
     493           0 :                   tr->queue_id = queue_id;
     494           0 :                   tr->block = block;
     495           0 :                   tr->block_start = bd;
     496           0 :                   tr->pkt_num = bd->hdr.bh1.num_pkts - num_pkts;
     497           0 :                   clib_memcpy_fast (&tr->bd, bd, sizeof (block_desc_t));
     498           0 :                   clib_memcpy_fast (&tr->tph3, tph, sizeof (tpacket3_hdr_t));
     499           0 :                   if (is_cksum_gso_enabled)
     500           0 :                     clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr,
     501             :                                       sizeof (vnet_virtio_net_hdr_t));
     502             :                   else
     503           0 :                     clib_memset_u8 (&tr->vnet_hdr, 0,
     504             :                                     sizeof (vnet_virtio_net_hdr_t));
     505             :                 }
     506             : 
     507             :               /* enque and take next packet */
     508     8198570 :               vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
     509             :                                                n_left_to_next, first_bi0,
     510             :                                                next0);
     511             : 
     512             :               /* next packet */
     513     8198570 :               num_pkts--;
     514     8198570 :               rx_frame_offset += tph->tp_next_offset;
     515             :             }
     516             : 
     517      544901 :           vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     518             :         }
     519             : 
     520      539407 :       if (PREDICT_TRUE (num_pkts == 0))
     521             :         {
     522      539407 :           bd->hdr.bh1.block_status = TP_STATUS_KERNEL;
     523      539407 :           block = (block + 1) % block_nr;
     524             :         }
     525             :       else
     526             :         {
     527           0 :           rx_queue->rx_frame_offset = rx_frame_offset;
     528           0 :           rx_queue->num_rx_pkts = num_pkts;
     529           0 :           rx_queue->is_rx_pending = 1;
     530             :         }
     531             :     }
     532             : 
     533      661027 :   rx_queue->next_rx_block = block;
     534             : 
     535      661027 : done:
     536             : 
     537      661027 :   if (apm->polling_count == 0)
     538             :     {
     539      661027 :       if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
     540      661027 :              ->hdr.bh1.block_status &
     541             :            TP_STATUS_USER) != 0)
     542      512263 :         vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_POLLING);
     543             :       else
     544      148764 :         vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_INTERRUPT);
     545             :     }
     546             : 
     547      661027 :   vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TOTAL_RECV_BLK,
     548             :                     total);
     549      661027 :   vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TIMEDOUT_BLK,
     550             :                     timedout_blk);
     551             : 
     552      661027 :   vlib_increment_combined_counter
     553      661027 :     (vnet_get_main ()->interface_main.combined_sw_if_counters
     554             :      + VNET_INTERFACE_COUNTER_RX,
     555      661027 :      vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
     556             : 
     557      661027 :   vnet_device_increment_rx_packets (thread_index, n_rx_packets);
     558      661027 :   return n_rx_packets;
     559             : }
     560             : 
     561             : always_inline uword
     562      253929 : af_packet_v2_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
     563             :                               vlib_frame_t *frame, af_packet_if_t *apif,
     564             :                               u16 queue_id, u8 is_cksum_gso_enabled)
     565             : {
     566      253929 :   af_packet_main_t *apm = &af_packet_main;
     567      253929 :   af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id);
     568             :   tpacket2_hdr_t *tph;
     569             :   u32 next_index;
     570      253929 :   u32 block = 0;
     571             :   u32 rx_frame;
     572             :   u32 n_free_bufs;
     573      253929 :   u32 n_rx_packets = 0;
     574      253929 :   u32 n_rx_bytes = 0;
     575      253929 :   u32 *to_next = 0;
     576      253929 :   u32 frame_size = rx_queue->rx_req->req.tp_frame_size;
     577      253929 :   u32 frame_num = rx_queue->rx_req->req.tp_frame_nr;
     578      253929 :   u8 *block_start = rx_queue->rx_ring[block];
     579      253929 :   uword n_trace = vlib_get_trace_count (vm, node);
     580      253929 :   u32 thread_index = vm->thread_index;
     581      253929 :   u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
     582      253929 :   u32 min_bufs = rx_queue->rx_req->req.tp_frame_size / n_buffer_bytes;
     583      253929 :   u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP);
     584      253929 :   vlib_buffer_t bt = {};
     585             : 
     586      253929 :   if (is_ip)
     587             :     {
     588           0 :       next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
     589             :     }
     590             :   else
     591             :     {
     592      253929 :       next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
     593      253929 :       if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
     594           0 :         next_index = apif->per_interface_next_index;
     595             : 
     596             :       /* redirect if feature path enabled */
     597      253929 :       vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt);
     598             :     }
     599             : 
     600      253929 :   n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
     601      253929 :   if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
     602             :     {
     603      113082 :       vec_validate (apm->rx_buffers[thread_index],
     604             :                     VLIB_FRAME_SIZE + n_free_bufs - 1);
     605      226164 :       n_free_bufs += vlib_buffer_alloc (
     606      113082 :         vm, &apm->rx_buffers[thread_index][n_free_bufs], VLIB_FRAME_SIZE);
     607      113082 :       vec_set_len (apm->rx_buffers[thread_index], n_free_bufs);
     608             :     }
     609             : 
     610      253929 :   rx_frame = rx_queue->next_rx_frame;
     611      253929 :   tph = (tpacket2_hdr_t *) (block_start + rx_frame * frame_size);
     612      510583 :   while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs))
     613             :     {
     614      256654 :       vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0;
     615      256654 :       u32 next0 = next_index;
     616             : 
     617             :       u32 n_left_to_next;
     618      256654 :       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
     619     5330850 :       while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) &&
     620             :              n_left_to_next)
     621             :         {
     622     5074200 :           vnet_virtio_net_hdr_t *vnet_hdr = 0;
     623     5074200 :           u32 data_len = tph->tp_snaplen;
     624     5074200 :           u32 offset = 0;
     625     5074200 :           u32 bi0 = 0, first_bi0 = 0;
     626     5074200 :           u8 l4_hdr_sz = 0;
     627             : 
     628     5074200 :           if (is_cksum_gso_enabled)
     629     1803740 :             vnet_hdr =
     630     1803740 :               (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac -
     631             :                                          sizeof (vnet_virtio_net_hdr_t));
     632    30521700 :           while (data_len)
     633             :             {
     634             :               /* grab free buffer */
     635    25447500 :               u32 last_empty_buffer =
     636    25447500 :                 vec_len (apm->rx_buffers[thread_index]) - 1;
     637    25447500 :               bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
     638    25447500 :               b0 = vlib_get_buffer (vm, bi0);
     639    25447500 :               vec_set_len (apm->rx_buffers[thread_index], last_empty_buffer);
     640    25447500 :               n_free_bufs--;
     641             : 
     642             :               /* copy data */
     643    25447500 :               u32 bytes_to_copy =
     644             :                 data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
     645    25447500 :               u32 vlan_len = 0;
     646    25447500 :               u32 bytes_copied = 0;
     647    25447500 :               b0->current_data = 0;
     648             :               /* Kernel removes VLAN headers, so reconstruct VLAN */
     649    25447500 :               if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
     650             :                 {
     651           0 :                   if (PREDICT_TRUE (offset == 0))
     652             :                     {
     653           0 :                       clib_memcpy_fast (vlib_buffer_get_current (b0),
     654           0 :                                         (u8 *) tph + tph->tp_mac,
     655             :                                         sizeof (ethernet_header_t));
     656           0 :                       ethernet_header_t *eth = vlib_buffer_get_current (b0);
     657           0 :                       ethernet_vlan_header_t *vlan =
     658             :                         (ethernet_vlan_header_t *) (eth + 1);
     659           0 :                       vlan->priority_cfi_and_id =
     660           0 :                         clib_host_to_net_u16 (tph->tp_vlan_tci);
     661           0 :                       vlan->type = eth->type;
     662           0 :                       eth->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
     663           0 :                       vlan_len = sizeof (ethernet_vlan_header_t);
     664           0 :                       bytes_copied = sizeof (ethernet_header_t);
     665             :                     }
     666             :                 }
     667    50895100 :               clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
     668    25447500 :                                   bytes_copied + vlan_len,
     669    25447500 :                                 (u8 *) tph + tph->tp_mac + offset +
     670             :                                   bytes_copied,
     671    25447500 :                                 (bytes_to_copy - bytes_copied));
     672             : 
     673             :               /* fill buffer header */
     674    25447500 :               b0->current_length = bytes_to_copy + vlan_len;
     675             : 
     676    25447500 :               if (offset == 0)
     677             :                 {
     678     5074200 :                   b0->total_length_not_including_first_buffer = 0;
     679     5074200 :                   b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
     680     5074200 :                   vnet_buffer (b0)->sw_if_index[VLIB_RX] = apif->sw_if_index;
     681     5074200 :                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0;
     682     5074200 :                   first_bi0 = bi0;
     683     5074200 :                   first_b0 = vlib_get_buffer (vm, first_bi0);
     684             : 
     685     5074200 :                   if (is_cksum_gso_enabled)
     686             :                     {
     687     1803740 :                       if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
     688     1802540 :                         fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip);
     689     1803740 :                       if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 |
     690             :                                                 VIRTIO_NET_HDR_GSO_TCPV6))
     691      642827 :                         fill_gso_offload (first_b0, vnet_hdr->gso_size,
     692             :                                           l4_hdr_sz);
     693             :                     }
     694             :                 }
     695             :               else
     696    20373300 :                 buffer_add_to_chain (b0, first_b0, prev_b0, bi0);
     697             : 
     698    25447500 :               prev_b0 = b0;
     699    25447500 :               offset += bytes_to_copy;
     700    25447500 :               data_len -= bytes_to_copy;
     701             :             }
     702     5074200 :           n_rx_packets++;
     703     5074200 :           n_rx_bytes += tph->tp_snaplen;
     704     5074200 :           to_next[0] = first_bi0;
     705     5074200 :           to_next += 1;
     706     5074200 :           n_left_to_next--;
     707             : 
     708             :           /* drop partial packets */
     709     5074200 :           if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
     710             :             {
     711           0 :               next0 = VNET_DEVICE_INPUT_NEXT_DROP;
     712           0 :               first_b0->error =
     713           0 :                 node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
     714             :             }
     715             :           else
     716             :             {
     717     5074200 :               if (PREDICT_FALSE (is_ip))
     718             :                 {
     719           0 :                   switch (first_b0->data[0] & 0xf0)
     720             :                     {
     721           0 :                     case 0x40:
     722           0 :                       next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
     723           0 :                       break;
     724           0 :                     case 0x60:
     725           0 :                       next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
     726           0 :                       break;
     727           0 :                     default:
     728           0 :                       next0 = VNET_DEVICE_INPUT_NEXT_DROP;
     729           0 :                       break;
     730             :                     }
     731           0 :                   if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
     732           0 :                     next0 = apif->per_interface_next_index;
     733             :                 }
     734             :               else
     735             :                 {
     736             :                   /* copy feature arc data from template */
     737     5074200 :                   first_b0->current_config_index = bt.current_config_index;
     738     5074200 :                   vnet_buffer (first_b0)->feature_arc_index =
     739     5074200 :                     vnet_buffer (&bt)->feature_arc_index;
     740             :                 }
     741             :             }
     742             : 
     743             :           /* trace */
     744     5074200 :           if (PREDICT_FALSE (n_trace > 0 &&
     745             :                              vlib_trace_buffer (vm, node, next0, first_b0,
     746             :                                                 /* follow_chain */ 0)))
     747             :             {
     748             :               af_packet_input_trace_t *tr;
     749           0 :               vlib_set_trace_count (vm, node, --n_trace);
     750           0 :               tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
     751           0 :               tr->is_v3 = 0;
     752           0 :               tr->next_index = next0;
     753           0 :               tr->hw_if_index = apif->hw_if_index;
     754           0 :               tr->queue_id = queue_id;
     755           0 :               clib_memcpy_fast (&tr->tph2, tph, sizeof (struct tpacket2_hdr));
     756           0 :               if (is_cksum_gso_enabled)
     757           0 :                 clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr,
     758             :                                   sizeof (vnet_virtio_net_hdr_t));
     759             :               else
     760           0 :                 clib_memset_u8 (&tr->vnet_hdr, 0,
     761             :                                 sizeof (vnet_virtio_net_hdr_t));
     762             :             }
     763             : 
     764             :           /* enque and take next packet */
     765     5074200 :           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
     766             :                                            n_left_to_next, first_bi0, next0);
     767             : 
     768             :           /* next packet */
     769     5074200 :           tph->tp_status = TP_STATUS_KERNEL;
     770     5074200 :           rx_frame = (rx_frame + 1) % frame_num;
     771     5074200 :           tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
     772             :         }
     773             : 
     774      256654 :       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     775             :     }
     776             : 
     777      253929 :   rx_queue->next_rx_frame = rx_frame;
     778             : 
     779      253929 :   vlib_increment_combined_counter (
     780      253929 :     vnet_get_main ()->interface_main.combined_sw_if_counters +
     781             :       VNET_INTERFACE_COUNTER_RX,
     782      253929 :     vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
     783             : 
     784      253929 :   vnet_device_increment_rx_packets (thread_index, n_rx_packets);
     785      253929 :   return n_rx_packets;
     786             : }
     787             : 
     788             : always_inline uword
     789      914956 : af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
     790             :                            vlib_frame_t *frame, af_packet_if_t *apif,
     791             :                            u16 queue_id, u8 is_cksum_gso_enabled)
     792             : 
     793             : {
     794      914956 :   if (apif->version == TPACKET_V3)
     795      661027 :     return af_packet_v3_device_input_fn (vm, node, frame, apif, queue_id,
     796             :                                          is_cksum_gso_enabled);
     797             :   else
     798      253929 :     return af_packet_v2_device_input_fn (vm, node, frame, apif, queue_id,
     799             :                                          is_cksum_gso_enabled);
     800             : }
     801             : 
     802      630943 : VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm,
     803             :                                      vlib_node_runtime_t * node,
     804             :                                      vlib_frame_t * frame)
     805             : {
     806      628707 :   u32 n_rx_packets = 0;
     807      628707 :   af_packet_main_t *apm = &af_packet_main;
     808             :   vnet_hw_if_rxq_poll_vector_t *pv;
     809      628707 :   pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
     810     1543710 :   for (int i = 0; i < vec_len (pv); i++)
     811             :     {
     812             :       af_packet_if_t *apif;
     813      915000 :       apif = vec_elt_at_index (apm->interfaces, pv[i].dev_instance);
     814      915000 :       if (apif->is_admin_up)
     815             :         {
     816      914956 :           if (apif->is_cksum_gso_enabled)
     817      499826 :             n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
     818      499826 :                                                        pv[i].queue_id, 1);
     819             :           else
     820      415130 :             n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
     821      415130 :                                                        pv[i].queue_id, 0);
     822             :         }
     823             :     }
     824      628707 :   return n_rx_packets;
     825             : }
     826             : 
     827      167480 : VLIB_REGISTER_NODE (af_packet_input_node) = {
     828             :   .name = "af-packet-input",
     829             :   .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
     830             :   .sibling_of = "device-input",
     831             :   .format_trace = format_af_packet_input_trace,
     832             :   .type = VLIB_NODE_TYPE_INPUT,
     833             :   .state = VLIB_NODE_STATE_INTERRUPT,
     834             :   .n_errors = AF_PACKET_INPUT_N_ERROR,
     835             :   .error_strings = af_packet_input_error_strings,
     836             : };
     837             : 
     838             : 
     839             : /*
     840             :  * fd.io coding-style-patch-verification: ON
     841             :  *
     842             :  * Local Variables:
     843             :  * eval: (c-set-style "gnu")
     844             :  * End:
     845             :  */

Generated by: LCOV version 1.14