LCOV - code coverage report
Current view: top level - plugins/af_xdp - device.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 6 447 1.3 %
Date: 2023-07-05 22:20:52 Functions: 5 28 17.9 %

          Line data    Source code
       1             : /*
       2             :  *------------------------------------------------------------------
       3             :  * Copyright (c) 2018 Cisco and/or its affiliates.
       4             :  * Licensed under the Apache License, Version 2.0 (the "License");
       5             :  * you may not use this file except in compliance with the License.
       6             :  * You may obtain a copy of the License at:
       7             :  *
       8             :  *     http://www.apache.org/licenses/LICENSE-2.0
       9             :  *
      10             :  * Unless required by applicable law or agreed to in writing, software
      11             :  * distributed under the License is distributed on an "AS IS" BASIS,
      12             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      13             :  * See the License for the specific language governing permissions and
      14             :  * limitations under the License.
      15             :  *------------------------------------------------------------------
      16             :  */
      17             : 
      18             : #include <stdio.h>
      19             : #include <net/if.h>
      20             : #include <sys/ioctl.h>
      21             : #include <linux/ethtool.h>
      22             : #include <linux/if_link.h>
      23             : #include <linux/sockios.h>
      24             : #include <linux/limits.h>
      25             : #include <bpf/bpf.h>
      26             : #include <vlib/vlib.h>
      27             : #include <vlib/unix/unix.h>
      28             : #include <vlib/pci/pci.h>
      29             : #include <vppinfra/linux/netns.h>
      30             : #include <vppinfra/linux/sysfs.h>
      31             : #include <vppinfra/unix.h>
      32             : #include <vnet/ethernet/ethernet.h>
      33             : #include <vnet/interface/rx_queue_funcs.h>
      34             : #include <vnet/interface/tx_queue_funcs.h>
      35             : #include "af_xdp.h"
      36             : 
      37             : #ifndef XDP_UMEM_MIN_CHUNK_SIZE
      38             : #define XDP_UMEM_MIN_CHUNK_SIZE 2048
      39             : #endif
      40             : 
      41             : af_xdp_main_t af_xdp_main;
      42             : 
      43             : typedef struct
      44             : {
      45             :   u32 prod;
      46             :   u32 cons;
      47             : } gdb_af_xdp_pair_t;
      48             : 
      49             : gdb_af_xdp_pair_t
      50           0 : gdb_af_xdp_get_prod (const struct xsk_ring_prod *prod)
      51             : {
      52           0 :   gdb_af_xdp_pair_t pair = { *prod->producer, *prod->consumer };
      53           0 :   return pair;
      54             : }
      55             : 
      56             : gdb_af_xdp_pair_t
      57           0 : gdb_af_xdp_get_cons (const struct xsk_ring_cons * cons)
      58             : {
      59           0 :   gdb_af_xdp_pair_t pair = { *cons->producer, *cons->consumer };
      60           0 :   return pair;
      61             : }
      62             : 
      63             : static clib_error_t *
      64           0 : af_xdp_mac_change (vnet_hw_interface_t * hw, const u8 * old, const u8 * new)
      65             : {
      66           0 :   af_xdp_main_t *am = &af_xdp_main;
      67           0 :   af_xdp_device_t *ad = vec_elt_at_index (am->devices, hw->dev_instance);
      68           0 :   errno_t err = memcpy_s (ad->hwaddr, sizeof (ad->hwaddr), new, 6);
      69           0 :   if (err)
      70           0 :     return clib_error_return_code (0, -err, CLIB_ERROR_ERRNO_VALID,
      71             :                                    "mac change failed");
      72           0 :   return 0;
      73             : }
      74             : 
      75             : static clib_error_t *
      76           0 : af_xdp_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
      77             :                            u32 frame_size)
      78             : {
      79           0 :   af_xdp_main_t *am = &af_xdp_main;
      80           0 :   af_xdp_device_t *ad = vec_elt_at_index (am->devices, hw->dev_instance);
      81           0 :   af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "set mtu not supported yet");
      82           0 :   return vnet_error (VNET_ERR_UNSUPPORTED, 0);
      83             : }
      84             : 
      85             : static u32
      86           0 : af_xdp_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
      87             : {
      88           0 :   af_xdp_main_t *am = &af_xdp_main;
      89           0 :   af_xdp_device_t *ad = vec_elt_at_index (am->devices, hw->dev_instance);
      90             : 
      91           0 :   switch (flags)
      92             :     {
      93           0 :     case 0:
      94           0 :       af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "set unicast not supported yet");
      95           0 :       return ~0;
      96           0 :     case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
      97           0 :       af_xdp_log (VLIB_LOG_LEVEL_ERR, ad,
      98             :                   "set promiscuous not supported yet");
      99           0 :       return ~0;
     100             :     }
     101             : 
     102           0 :   af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "unknown flag %x requested", flags);
     103           0 :   return ~0;
     104             : }
     105             : 
     106             : int
     107           0 : af_xdp_enter_netns (char *netns, int *fds)
     108             : {
     109           0 :   *fds = *(fds + 1) = -1;
     110           0 :   if (netns != NULL)
     111             :     {
     112           0 :       *fds = clib_netns_open (NULL /* self */);
     113           0 :       if ((*(fds + 1) = clib_netns_open ((u8 *) netns)) == -1)
     114           0 :         return VNET_API_ERROR_SYSCALL_ERROR_8;
     115           0 :       if (clib_setns (*(fds + 1)) == -1)
     116           0 :         return VNET_API_ERROR_SYSCALL_ERROR_9;
     117             :     }
     118           0 :   return 0;
     119             : }
     120             : 
     121             : void
     122           0 : af_xdp_cleanup_netns (int *fds)
     123             : {
     124           0 :   if (*fds != -1)
     125           0 :     close (*fds);
     126             : 
     127           0 :   if (*(fds + 1) != -1)
     128           0 :     close (*(fds + 1));
     129             : 
     130           0 :   *fds = *(fds + 1) = -1;
     131           0 : }
     132             : 
     133             : int
     134           0 : af_xdp_exit_netns (char *netns, int *fds)
     135             : {
     136           0 :   int ret = 0;
     137           0 :   if (netns != NULL)
     138             :     {
     139           0 :       if (*fds != -1)
     140           0 :         ret = clib_setns (*fds);
     141             : 
     142           0 :       af_xdp_cleanup_netns (fds);
     143             :     }
     144             : 
     145           0 :   return ret;
     146             : }
     147             : 
     148             : static int
     149           0 : af_xdp_remove_program (af_xdp_device_t *ad)
     150             : {
     151           0 :   u32 curr_prog_id = 0;
     152             :   int ret;
     153             :   int ns_fds[2];
     154             : 
     155           0 :   af_xdp_enter_netns (ad->netns, ns_fds);
     156           0 :   ret = bpf_xdp_query_id (ad->linux_ifindex, XDP_FLAGS_UPDATE_IF_NOEXIST,
     157             :                           &curr_prog_id);
     158           0 :   if (ret != 0)
     159             :     {
     160           0 :       af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "bpf_xdp_query_id failed\n");
     161           0 :       goto err0;
     162             :     }
     163             : 
     164           0 :   ret = bpf_xdp_detach (ad->linux_ifindex, XDP_FLAGS_UPDATE_IF_NOEXIST, NULL);
     165           0 :   if (ret != 0)
     166             :     {
     167           0 :       af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "bpf_xdp_detach failed\n");
     168           0 :       goto err0;
     169             :     }
     170           0 :   af_xdp_exit_netns (ad->netns, ns_fds);
     171           0 :   if (ad->bpf_obj)
     172           0 :     bpf_object__close (ad->bpf_obj);
     173             : 
     174           0 :   return 0;
     175             : 
     176           0 : err0:
     177           0 :   af_xdp_exit_netns (ad->netns, ns_fds);
     178           0 :   return ret;
     179             : }
     180             : 
     181             : void
     182           0 : af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad)
     183             : {
     184           0 :   vnet_main_t *vnm = vnet_get_main ();
     185           0 :   af_xdp_main_t *axm = &af_xdp_main;
     186             :   struct xsk_socket **xsk;
     187             :   struct xsk_umem **umem;
     188             :   int i;
     189             : 
     190           0 :   if (ad->hw_if_index)
     191             :     {
     192           0 :       vnet_hw_interface_set_flags (vnm, ad->hw_if_index, 0);
     193           0 :       ethernet_delete_interface (vnm, ad->hw_if_index);
     194             :     }
     195             : 
     196           0 :   for (i = 0; i < ad->txq_num; i++)
     197           0 :     clib_spinlock_free (&vec_elt (ad->txqs, i).lock);
     198             : 
     199           0 :   vec_foreach (xsk, ad->xsk)
     200           0 :     xsk_socket__delete (*xsk);
     201             : 
     202           0 :   vec_foreach (umem, ad->umem)
     203           0 :     xsk_umem__delete (*umem);
     204             : 
     205           0 :   for (i = 0; i < ad->rxq_num; i++)
     206           0 :     clib_file_del_by_index (&file_main, vec_elt (ad->rxqs, i).file_index);
     207             : 
     208           0 :   if (af_xdp_remove_program (ad) != 0)
     209           0 :     af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "Error while removing XDP program.\n");
     210             : 
     211           0 :   vec_free (ad->xsk);
     212           0 :   vec_free (ad->umem);
     213           0 :   vec_free (ad->buffer_template);
     214           0 :   vec_free (ad->rxqs);
     215           0 :   vec_free (ad->txqs);
     216           0 :   vec_free (ad->name);
     217           0 :   vec_free (ad->linux_ifname);
     218           0 :   vec_free (ad->netns);
     219           0 :   clib_error_free (ad->error);
     220           0 :   pool_put (axm->devices, ad);
     221           0 : }
     222             : 
     223             : static int
     224           0 : af_xdp_load_program (af_xdp_create_if_args_t * args, af_xdp_device_t * ad)
     225             : {
     226             :   int fd;
     227             :   struct bpf_program *bpf_prog;
     228           0 :   struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY };
     229             : 
     230           0 :   if (setrlimit (RLIMIT_MEMLOCK, &r))
     231           0 :     af_xdp_log (VLIB_LOG_LEVEL_WARNING, ad,
     232             :                 "setrlimit(%s) failed: %s (errno %d)", ad->linux_ifname,
     233             :                 strerror (errno), errno);
     234             : 
     235           0 :   ad->bpf_obj = bpf_object__open_file (args->prog, NULL);
     236           0 :   if (libbpf_get_error (ad->bpf_obj))
     237             :     {
     238           0 :       args->rv = VNET_API_ERROR_SYSCALL_ERROR_5;
     239           0 :       args->error = clib_error_return_unix (
     240             :         0, "bpf_object__open_file(%s) failed", args->prog);
     241           0 :       goto err0;
     242             :     }
     243             : 
     244           0 :   bpf_prog = bpf_object__next_program (ad->bpf_obj, NULL);
     245           0 :   if (!bpf_prog)
     246           0 :     goto err1;
     247             : 
     248           0 :   bpf_program__set_type (bpf_prog, BPF_PROG_TYPE_XDP);
     249             : 
     250           0 :   if (bpf_object__load (ad->bpf_obj))
     251           0 :     goto err1;
     252             : 
     253           0 :   fd = bpf_program__fd (bpf_prog);
     254             : 
     255           0 :   if (bpf_xdp_attach (ad->linux_ifindex, fd, XDP_FLAGS_UPDATE_IF_NOEXIST,
     256             :                       NULL))
     257             :     {
     258           0 :       args->rv = VNET_API_ERROR_SYSCALL_ERROR_6;
     259           0 :       args->error = clib_error_return_unix (0, "bpf_xdp_attach(%s) failed",
     260             :                                             ad->linux_ifname);
     261           0 :       goto err1;
     262             :     }
     263             : 
     264           0 :   return 0;
     265             : 
     266           0 : err1:
     267           0 :   bpf_object__close (ad->bpf_obj);
     268           0 :   ad->bpf_obj = 0;
     269           0 : err0:
     270           0 :   return -1;
     271             : }
     272             : 
     273             : static int
     274           0 : af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
     275             :                      af_xdp_device_t *ad, int qid)
     276             : {
     277             :   struct xsk_umem **umem;
     278             :   struct xsk_socket **xsk;
     279             :   af_xdp_rxq_t *rxq;
     280             :   af_xdp_txq_t *txq;
     281             :   struct xsk_umem_config umem_config;
     282             :   struct xsk_socket_config sock_config;
     283             :   struct xdp_options opt;
     284             :   socklen_t optlen;
     285           0 :   const int is_rx = qid < ad->rxq_num;
     286           0 :   const int is_tx = qid < ad->txq_num;
     287             : 
     288           0 :   umem = vec_elt_at_index (ad->umem, qid);
     289           0 :   xsk = vec_elt_at_index (ad->xsk, qid);
     290           0 :   rxq = vec_elt_at_index (ad->rxqs, qid);
     291           0 :   txq = vec_elt_at_index (ad->txqs, qid);
     292             : 
     293             :   /*
     294             :    * fq and cq must always be allocated even if unused
     295             :    * whereas rx and tx indicates whether we want rxq, txq, or both
     296             :    */
     297           0 :   struct xsk_ring_cons *rx = is_rx ? &rxq->rx : 0;
     298           0 :   struct xsk_ring_prod *fq = &rxq->fq;
     299           0 :   struct xsk_ring_prod *tx = is_tx ? &txq->tx : 0;
     300           0 :   struct xsk_ring_cons *cq = &txq->cq;
     301             :   int fd;
     302             : 
     303           0 :   memset (&umem_config, 0, sizeof (umem_config));
     304           0 :   umem_config.fill_size = args->rxq_size;
     305           0 :   umem_config.comp_size = args->txq_size;
     306           0 :   umem_config.frame_size =
     307           0 :     sizeof (vlib_buffer_t) + vlib_buffer_get_default_data_size (vm);
     308           0 :   umem_config.frame_headroom = sizeof (vlib_buffer_t);
     309           0 :   umem_config.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG;
     310           0 :   if (xsk_umem__create
     311           0 :       (umem, uword_to_pointer (vm->buffer_main->buffer_mem_start, void *),
     312           0 :        vm->buffer_main->buffer_mem_size, fq, cq, &umem_config))
     313             :     {
     314           0 :       uword sys_page_size = clib_mem_get_page_size ();
     315           0 :       args->rv = VNET_API_ERROR_SYSCALL_ERROR_1;
     316           0 :       args->error = clib_error_return_unix (0, "xsk_umem__create() failed");
     317             :       /* this should mimic the Linux kernel net/xdp/xdp_umem.c:xdp_umem_reg()
     318             :        * check */
     319           0 :       if (umem_config.frame_size < XDP_UMEM_MIN_CHUNK_SIZE ||
     320           0 :           umem_config.frame_size > sys_page_size)
     321           0 :         args->error = clib_error_return (
     322             :           args->error,
     323             :           "(unsupported data-size? (should be between %d and %d))",
     324             :           XDP_UMEM_MIN_CHUNK_SIZE - sizeof (vlib_buffer_t),
     325             :           sys_page_size - sizeof (vlib_buffer_t));
     326           0 :       goto err0;
     327             :     }
     328             : 
     329           0 :   memset (&sock_config, 0, sizeof (sock_config));
     330           0 :   sock_config.rx_size = args->rxq_size;
     331           0 :   sock_config.tx_size = args->txq_size;
     332           0 :   sock_config.bind_flags = XDP_USE_NEED_WAKEUP;
     333           0 :   switch (args->mode)
     334             :     {
     335           0 :     case AF_XDP_MODE_AUTO:
     336           0 :       break;
     337           0 :     case AF_XDP_MODE_COPY:
     338           0 :       sock_config.bind_flags |= XDP_COPY;
     339           0 :       break;
     340           0 :     case AF_XDP_MODE_ZERO_COPY:
     341           0 :       sock_config.bind_flags |= XDP_ZEROCOPY;
     342           0 :       break;
     343             :     }
     344           0 :   if (args->prog)
     345           0 :     sock_config.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
     346           0 :   if (xsk_socket__create
     347           0 :       (xsk, ad->linux_ifname, qid, *umem, rx, tx, &sock_config))
     348             :     {
     349           0 :       args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
     350           0 :       args->error =
     351           0 :         clib_error_return_unix (0,
     352             :                                 "xsk_socket__create() failed (is linux netdev %s up?)",
     353             :                                 ad->linux_ifname);
     354           0 :       goto err1;
     355             :     }
     356             : 
     357           0 :   fd = xsk_socket__fd (*xsk);
     358           0 :   if (args->prog)
     359             :     {
     360             :       struct bpf_map *map =
     361           0 :         bpf_object__find_map_by_name (ad->bpf_obj, "xsks_map");
     362           0 :       int ret = xsk_socket__update_xskmap (*xsk, bpf_map__fd (map));
     363           0 :       if (ret)
     364             :         {
     365           0 :           args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
     366           0 :           args->error = clib_error_return_unix (
     367             :             0, "xsk_socket__update_xskmap %s qid %d return %d",
     368             :             ad->linux_ifname, qid, ret);
     369           0 :           goto err2;
     370             :         }
     371             :     }
     372           0 :   optlen = sizeof (opt);
     373             : #ifndef SOL_XDP
     374             : #define SOL_XDP 283
     375             : #endif
     376           0 :   if (getsockopt (fd, SOL_XDP, XDP_OPTIONS, &opt, &optlen))
     377             :     {
     378           0 :       args->rv = VNET_API_ERROR_SYSCALL_ERROR_4;
     379           0 :       args->error =
     380           0 :         clib_error_return_unix (0, "getsockopt(XDP_OPTIONS) failed");
     381           0 :       goto err2;
     382             :     }
     383           0 :   if (opt.flags & XDP_OPTIONS_ZEROCOPY)
     384           0 :     ad->flags |= AF_XDP_DEVICE_F_ZEROCOPY;
     385             : 
     386           0 :   rxq->xsk_fd = is_rx ? fd : -1;
     387             : 
     388           0 :   if (is_tx)
     389             :     {
     390           0 :       txq->xsk_fd = fd;
     391           0 :       clib_spinlock_init (&txq->lock);
     392           0 :       if (is_rx && (ad->flags & AF_XDP_DEVICE_F_SYSCALL_LOCK))
     393             :         {
     394             :           /* This is a shared rx+tx queue and we need to lock before syscalls.
     395             :            * Prior to Linux 5.6 there is a race condition preventing to call
     396             :            * poll() and sendto() concurrently on AF_XDP sockets. This was
     397             :            * fixed with commit 11cc2d21499cabe7e7964389634ed1de3ee91d33
     398             :            * to workaround this issue, we protect the syscalls with a
     399             :            * spinlock. Note that it also prevents to use interrupt mode in
     400             :            * multi workers setup, because in this case the poll() is done in
     401             :            * the framework w/o any possibility to protect it.
     402             :            * See
     403             :            * https://lore.kernel.org/bpf/BYAPR11MB365382C5DB1E5FCC53242609C1549@BYAPR11MB3653.namprd11.prod.outlook.com/
     404             :            */
     405           0 :           clib_spinlock_init (&rxq->syscall_lock);
     406           0 :           txq->syscall_lock = rxq->syscall_lock;
     407             :         }
     408             :     }
     409             :   else
     410             :     {
     411           0 :       txq->xsk_fd = -1;
     412             :     }
     413             : 
     414           0 :   return 0;
     415             : 
     416           0 : err2:
     417           0 :   xsk_socket__delete (*xsk);
     418           0 : err1:
     419           0 :   xsk_umem__delete (*umem);
     420           0 : err0:
     421           0 :   *umem = 0;
     422           0 :   *xsk = 0;
     423           0 :   return -1;
     424             : }
     425             : 
     426             : static int
     427           0 : af_xdp_get_numa (const char *ifname)
     428             : {
     429             :   char *path;
     430             :   clib_error_t *err;
     431             :   int numa;
     432             : 
     433           0 :   path =
     434           0 :     (char *) format (0, "/sys/class/net/%s/device/numa_node%c", ifname, 0);
     435           0 :   err = clib_sysfs_read (path, "%d", &numa);
     436           0 :   if (err || numa < 0)
     437           0 :     numa = 0;
     438             : 
     439           0 :   clib_error_free (err);
     440           0 :   vec_free (path);
     441           0 :   return numa;
     442             : }
     443             : 
     444             : static void
     445           0 : af_xdp_get_q_count (const char *ifname, int *rxq_num, int *txq_num)
     446             : {
     447           0 :   struct ethtool_channels ec = { .cmd = ETHTOOL_GCHANNELS };
     448           0 :   struct ifreq ifr = { .ifr_data = (void *) &ec };
     449             :   int fd, err;
     450             : 
     451           0 :   *rxq_num = *txq_num = 1;
     452             : 
     453           0 :   fd = socket (AF_INET, SOCK_DGRAM, 0);
     454           0 :   if (fd < 0)
     455           0 :     return;
     456             : 
     457           0 :   snprintf (ifr.ifr_name, sizeof (ifr.ifr_name), "%s", ifname);
     458           0 :   err = ioctl (fd, SIOCETHTOOL, &ifr);
     459             : 
     460           0 :   close (fd);
     461             : 
     462           0 :   if (err)
     463           0 :     return;
     464             : 
     465           0 :   *rxq_num = clib_max (ec.combined_count, ec.rx_count);
     466           0 :   *txq_num = clib_max (ec.combined_count, ec.tx_count);
     467             : }
     468             : 
     469             : static clib_error_t *
     470           0 : af_xdp_device_rxq_read_ready (clib_file_t * f)
     471             : {
     472           0 :   vnet_hw_if_rx_queue_set_int_pending (vnet_get_main (), f->private_data);
     473           0 :   return 0;
     474             : }
     475             : 
     476             : static clib_error_t *
     477           0 : af_xdp_device_set_rxq_mode (const af_xdp_device_t *ad, af_xdp_rxq_t *rxq,
     478             :                             const af_xdp_rxq_mode_t mode)
     479             : {
     480           0 :   clib_file_main_t *fm = &file_main;
     481             :   clib_file_update_type_t update;
     482             :   clib_file_t *f;
     483             : 
     484           0 :   if (rxq->mode == mode)
     485           0 :     return 0;
     486             : 
     487           0 :   switch (mode)
     488             :     {
     489           0 :     case AF_XDP_RXQ_MODE_POLLING:
     490           0 :       update = UNIX_FILE_UPDATE_DELETE;
     491           0 :       break;
     492           0 :     case AF_XDP_RXQ_MODE_INTERRUPT:
     493           0 :       if (ad->flags & AF_XDP_DEVICE_F_SYSCALL_LOCK)
     494           0 :         return clib_error_create (
     495             :           "kernel workaround incompatible with interrupt mode");
     496           0 :       update = UNIX_FILE_UPDATE_ADD;
     497           0 :       break;
     498           0 :     default:
     499           0 :       ASSERT (0);
     500           0 :       return clib_error_create ("unknown rxq mode %i", mode);
     501             :     }
     502             : 
     503           0 :   f = clib_file_get (fm, rxq->file_index);
     504           0 :   fm->file_update (f, update);
     505           0 :   rxq->mode = mode;
     506           0 :   return 0;
     507             : }
     508             : 
     509             : static u32
     510           0 : af_xdp_find_rxq_for_thread (vnet_main_t *vnm, const af_xdp_device_t *ad,
     511             :                             const u32 thread)
     512             : {
     513             :   u32 i;
     514           0 :   for (i = 0; i < ad->rxq_num; i++)
     515             :     {
     516           0 :       const u32 qid = vec_elt (ad->rxqs, i).queue_index;
     517           0 :       const u32 tid = vnet_hw_if_get_rx_queue (vnm, qid)->thread_index;
     518           0 :       if (tid == thread)
     519           0 :         return i;
     520             :     }
     521           0 :   return ~0;
     522             : }
     523             : 
     524             : static clib_error_t *
     525           0 : af_xdp_finalize_queues (vnet_main_t *vnm, af_xdp_device_t *ad,
     526             :                         const int n_vlib_mains)
     527             : {
     528           0 :   clib_error_t *err = 0;
     529             :   int i;
     530             : 
     531           0 :   for (i = 0; i < ad->rxq_num; i++)
     532             :     {
     533           0 :       af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, i);
     534           0 :       rxq->queue_index = vnet_hw_if_register_rx_queue (
     535             :         vnm, ad->hw_if_index, i, VNET_HW_IF_RXQ_THREAD_ANY);
     536           0 :       u8 *desc = format (0, "%U rxq %d", format_af_xdp_device_name,
     537             :                          ad->dev_instance, i);
     538           0 :       clib_file_t f = {
     539           0 :         .file_descriptor = rxq->xsk_fd,
     540           0 :         .private_data = rxq->queue_index,
     541             :         .read_function = af_xdp_device_rxq_read_ready,
     542             :         .description = desc,
     543             :       };
     544           0 :       rxq->file_index = clib_file_add (&file_main, &f);
     545           0 :       vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index,
     546           0 :                                           rxq->file_index);
     547           0 :       err = af_xdp_device_set_rxq_mode (ad, rxq, AF_XDP_RXQ_MODE_POLLING);
     548           0 :       if (err)
     549           0 :         return err;
     550             :     }
     551             : 
     552           0 :   for (i = 0; i < ad->txq_num; i++)
     553           0 :     vec_elt (ad->txqs, i).queue_index =
     554           0 :       vnet_hw_if_register_tx_queue (vnm, ad->hw_if_index, i);
     555             : 
     556             :   /* We set the rxq and txq of the same queue pair on the same thread
     557             :    * by default to avoid locking because of the syscall lock. */
     558           0 :   int last_qid = clib_min (ad->rxq_num, ad->txq_num - 1);
     559           0 :   for (i = 0; i < n_vlib_mains; i++)
     560             :     {
     561             :       /* search for the 1st rxq assigned on this thread, if any */
     562           0 :       u32 qid = af_xdp_find_rxq_for_thread (vnm, ad, i);
     563             :       /* if this rxq is combined with a txq, use it. Otherwise, we'll
     564             :        * assign txq in a round-robin fashion. We start from the 1st txq
     565             :        * not shared with a rxq if possible... */
     566           0 :       qid = qid < ad->txq_num ? qid : (last_qid++ % ad->txq_num);
     567           0 :       vnet_hw_if_tx_queue_assign_thread (
     568           0 :         vnm, vec_elt (ad->txqs, qid).queue_index, i);
     569             :     }
     570             : 
     571           0 :   vnet_hw_if_update_runtime_data (vnm, ad->hw_if_index);
     572           0 :   return 0;
     573             : }
     574             : 
     575             : void
     576           0 : af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
     577             : {
     578           0 :   vnet_main_t *vnm = vnet_get_main ();
     579           0 :   vlib_thread_main_t *tm = vlib_get_thread_main ();
     580           0 :   vnet_eth_interface_registration_t eir = {};
     581           0 :   af_xdp_main_t *am = &af_xdp_main;
     582             :   af_xdp_device_t *ad;
     583             :   vnet_sw_interface_t *sw;
     584             :   int rxq_num, txq_num, q_num;
     585             :   int ns_fds[2];
     586             :   int i, ret;
     587             : 
     588           0 :   args->rxq_size = args->rxq_size ? args->rxq_size : 2 * VLIB_FRAME_SIZE;
     589           0 :   args->txq_size = args->txq_size ? args->txq_size : 2 * VLIB_FRAME_SIZE;
     590           0 :   args->rxq_num = args->rxq_num ? args->rxq_num : 1;
     591             : 
     592           0 :   if (!args->linux_ifname)
     593             :     {
     594           0 :       args->rv = VNET_API_ERROR_INVALID_VALUE;
     595           0 :       args->error = clib_error_return (0, "missing host interface");
     596           0 :       goto err0;
     597             :     }
     598             : 
     599           0 :   if (args->rxq_size < VLIB_FRAME_SIZE || args->txq_size < VLIB_FRAME_SIZE ||
     600           0 :       args->rxq_size > 65535 || args->txq_size > 65535 ||
     601           0 :       !is_pow2 (args->rxq_size) || !is_pow2 (args->txq_size))
     602             :     {
     603           0 :       args->rv = VNET_API_ERROR_INVALID_VALUE;
     604           0 :       args->error =
     605           0 :         clib_error_return (0,
     606             :                            "queue size must be a power of two between %i and 65535",
     607             :                            VLIB_FRAME_SIZE);
     608           0 :       goto err0;
     609             :     }
     610             : 
     611           0 :   ret = af_xdp_enter_netns (args->netns, ns_fds);
     612           0 :   if (ret)
     613             :     {
     614           0 :       args->rv = ret;
     615           0 :       args->error = clib_error_return (0, "enter netns %s failed, ret %d",
     616             :                                        args->netns, args->rv);
     617           0 :       goto err0;
     618             :     }
     619             : 
     620           0 :   af_xdp_get_q_count (args->linux_ifname, &rxq_num, &txq_num);
     621           0 :   if (args->rxq_num > rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != args->rxq_num)
     622             :     {
     623           0 :       args->rv = VNET_API_ERROR_INVALID_VALUE;
     624           0 :       args->error = clib_error_create ("too many rxq requested (%d > %d)",
     625             :                                        args->rxq_num, rxq_num);
     626           0 :       goto err1;
     627             :     }
     628           0 :   rxq_num = clib_min (rxq_num, args->rxq_num);
     629           0 :   txq_num = clib_min (txq_num, tm->n_vlib_mains);
     630             : 
     631           0 :   pool_get_zero (am->devices, ad);
     632             : 
     633           0 :   if (tm->n_vlib_mains > 1 &&
     634           0 :       0 == (args->flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK))
     635           0 :     ad->flags |= AF_XDP_DEVICE_F_SYSCALL_LOCK;
     636             : 
     637           0 :   ad->linux_ifname = (char *) format (0, "%s", args->linux_ifname);
     638           0 :   vec_validate (ad->linux_ifname, IFNAMSIZ - 1);     /* libbpf expects ifname to be at least IFNAMSIZ */
     639             : 
     640           0 :   if (args->netns)
     641           0 :     ad->netns = (char *) format (0, "%s%c", args->netns, 0);
     642             : 
     643           0 :   ad->linux_ifindex = if_nametoindex (ad->linux_ifname);
     644           0 :   if (!ad->linux_ifindex)
     645             :     {
     646           0 :       args->rv = VNET_API_ERROR_INVALID_VALUE;
     647           0 :       args->error = clib_error_return_unix (0, "if_nametoindex(%s) failed",
     648             :                                             ad->linux_ifname);
     649           0 :       ad->linux_ifindex = ~0;
     650           0 :       goto err1;
     651             :     }
     652             : 
     653           0 :   if (args->prog &&
     654           0 :       (af_xdp_remove_program (ad) || af_xdp_load_program (args, ad)))
     655           0 :     goto err2;
     656             : 
     657           0 :   q_num = clib_max (rxq_num, txq_num);
     658           0 :   ad->rxq_num = rxq_num;
     659           0 :   ad->txq_num = txq_num;
     660             : 
     661           0 :   vec_validate_aligned (ad->umem, q_num - 1, CLIB_CACHE_LINE_BYTES);
     662           0 :   vec_validate_aligned (ad->xsk, q_num - 1, CLIB_CACHE_LINE_BYTES);
     663           0 :   vec_validate_aligned (ad->rxqs, q_num - 1, CLIB_CACHE_LINE_BYTES);
     664           0 :   vec_validate_aligned (ad->txqs, q_num - 1, CLIB_CACHE_LINE_BYTES);
     665             : 
     666           0 :   for (i = 0; i < q_num; i++)
     667             :     {
     668           0 :       if (af_xdp_create_queue (vm, args, ad, i))
     669             :         {
     670             :           /*
     671             :            * queue creation failed
     672             :            * it is only a fatal error if we could not create the number of rx
     673             :            * queues requested explicitely by the user and the user did not
     674             :            * requested 'max'
     675             :            * we might create less tx queues than workers but this is ok
     676             :            */
     677           0 :           af_xdp_log (VLIB_LOG_LEVEL_DEBUG, ad,
     678             :                       "create interface failed to create queue qid=%d", i);
     679             : 
     680             :           /* fixup vectors length */
     681           0 :           vec_set_len (ad->umem, i);
     682           0 :           vec_set_len (ad->xsk, i);
     683           0 :           vec_set_len (ad->rxqs, i);
     684           0 :           vec_set_len (ad->txqs, i);
     685             : 
     686           0 :           ad->rxq_num = clib_min (i, rxq_num);
     687           0 :           ad->txq_num = clib_min (i, txq_num);
     688             : 
     689           0 :           if (i == 0 ||
     690           0 :               (i < rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != args->rxq_num))
     691             :             {
     692           0 :               ad->rxq_num = ad->txq_num = 0;
     693           0 :               goto err2; /* failed creating requested rxq: fatal error, bailing
     694             :                             out */
     695             :             }
     696             : 
     697             : 
     698           0 :           args->rv = 0;
     699           0 :           clib_error_free (args->error);
     700           0 :           break;
     701             :         }
     702             :     }
     703             : 
     704           0 :   if (af_xdp_exit_netns (args->netns, ns_fds))
     705             :     {
     706           0 :       args->rv = VNET_API_ERROR_SYSCALL_ERROR_10;
     707           0 :       args->error = clib_error_return (0, "exit netns failed");
     708           0 :       goto err2;
     709             :     }
     710             : 
     711           0 :   ad->dev_instance = ad - am->devices;
     712           0 :   ad->per_interface_next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
     713           0 :   ad->pool =
     714           0 :     vlib_buffer_pool_get_default_for_numa (vm,
     715           0 :                                            af_xdp_get_numa
     716           0 :                                            (ad->linux_ifname));
     717           0 :   if (!args->name)
     718             :     {
     719           0 :       char *ifname = ad->linux_ifname;
     720           0 :       if (args->netns != NULL && strncmp (args->netns, "pid:", 4) == 0)
     721             :         {
     722           0 :           ad->name =
     723           0 :             (char *) format (0, "%s/%u", ifname, atoi (args->netns + 4));
     724             :         }
     725             :       else
     726           0 :         ad->name = (char *) format (0, "%s/%d", ifname, ad->dev_instance);
     727             :     }
     728             :   else
     729           0 :     ad->name = (char *) format (0, "%s", args->name);
     730             : 
     731           0 :   ethernet_mac_address_generate (ad->hwaddr);
     732             : 
     733             :   /* create interface */
     734           0 :   eir.dev_class_index = af_xdp_device_class.index;
     735           0 :   eir.dev_instance = ad->dev_instance;
     736           0 :   eir.address = ad->hwaddr;
     737           0 :   eir.cb.flag_change = af_xdp_flag_change;
     738           0 :   eir.cb.set_max_frame_size = af_xdp_set_max_frame_size;
     739           0 :   ad->hw_if_index = vnet_eth_register_interface (vnm, &eir);
     740             : 
     741           0 :   sw = vnet_get_hw_sw_interface (vnm, ad->hw_if_index);
     742           0 :   args->sw_if_index = ad->sw_if_index = sw->sw_if_index;
     743             : 
     744           0 :   vnet_hw_if_set_caps (vnm, ad->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
     745             : 
     746           0 :   vnet_hw_if_set_input_node (vnm, ad->hw_if_index, af_xdp_input_node.index);
     747             : 
     748           0 :   args->error = af_xdp_finalize_queues (vnm, ad, tm->n_vlib_mains);
     749           0 :   if (args->error)
     750             :     {
     751           0 :       args->rv = VNET_API_ERROR_SYSCALL_ERROR_7;
     752           0 :       goto err2;
     753             :     }
     754             : 
     755             :   /* buffer template */
     756           0 :   vec_validate_aligned (ad->buffer_template, 1, CLIB_CACHE_LINE_BYTES);
     757           0 :   ad->buffer_template->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
     758           0 :   ad->buffer_template->ref_count = 1;
     759           0 :   vnet_buffer (ad->buffer_template)->sw_if_index[VLIB_RX] = ad->sw_if_index;
     760           0 :   vnet_buffer (ad->buffer_template)->sw_if_index[VLIB_TX] = (u32) ~ 0;
     761           0 :   ad->buffer_template->buffer_pool_index = ad->pool;
     762             : 
     763           0 :   return;
     764             : 
     765           0 : err2:
     766           0 :   af_xdp_delete_if (vm, ad);
     767           0 : err1:
     768           0 :   af_xdp_cleanup_netns (ns_fds);
     769           0 : err0:
     770           0 :   vlib_log_err (am->log_class, "%U", format_clib_error, args->error);
     771             : }
     772             : 
     773             : static clib_error_t *
     774           0 : af_xdp_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
     775             : {
     776           0 :   vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
     777           0 :   af_xdp_main_t *am = &af_xdp_main;
     778           0 :   af_xdp_device_t *ad = vec_elt_at_index (am->devices, hi->dev_instance);
     779           0 :   uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
     780             : 
     781           0 :   if (ad->flags & AF_XDP_DEVICE_F_ERROR)
     782           0 :     return clib_error_return (0, "device is in error state");
     783             : 
     784           0 :   if (is_up)
     785             :     {
     786           0 :       vnet_hw_interface_set_flags (vnm, ad->hw_if_index,
     787             :                                    VNET_HW_INTERFACE_FLAG_LINK_UP);
     788           0 :       ad->flags |= AF_XDP_DEVICE_F_ADMIN_UP;
     789           0 :       af_xdp_device_input_refill (ad);
     790             :     }
     791             :   else
     792             :     {
     793           0 :       vnet_hw_interface_set_flags (vnm, ad->hw_if_index, 0);
     794           0 :       ad->flags &= ~AF_XDP_DEVICE_F_ADMIN_UP;
     795             :     }
     796           0 :   return 0;
     797             : }
     798             : 
     799             : static clib_error_t *
     800           0 : af_xdp_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
     801             :                                  vnet_hw_if_rx_mode mode)
     802             : {
     803           0 :   af_xdp_main_t *am = &af_xdp_main;
     804           0 :   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
     805           0 :   af_xdp_device_t *ad = pool_elt_at_index (am->devices, hw->dev_instance);
     806           0 :   af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, qid);
     807             : 
     808           0 :   switch (mode)
     809             :     {
     810           0 :     default:                         /* fallthrough */
     811             :     case VNET_HW_IF_RX_MODE_UNKNOWN: /* fallthrough */
     812             :     case VNET_HW_IF_NUM_RX_MODES:
     813           0 :       return clib_error_create ("uknown rx mode - doing nothing");
     814           0 :     case VNET_HW_IF_RX_MODE_DEFAULT: /* fallthrough */
     815             :     case VNET_HW_IF_RX_MODE_POLLING:
     816           0 :       return af_xdp_device_set_rxq_mode (ad, rxq, AF_XDP_RXQ_MODE_POLLING);
     817           0 :     case VNET_HW_IF_RX_MODE_INTERRUPT: /* fallthrough */
     818             :     case VNET_HW_IF_RX_MODE_ADAPTIVE:
     819           0 :       return af_xdp_device_set_rxq_mode (ad, rxq, AF_XDP_RXQ_MODE_INTERRUPT);
     820             :     }
     821             : 
     822             :   ASSERT (0 && "unreachable");
     823             :   return clib_error_create ("unreachable");
     824             : }
     825             : 
     826             : static void
     827           0 : af_xdp_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
     828             :                                 u32 node_index)
     829             : {
     830           0 :   af_xdp_main_t *am = &af_xdp_main;
     831           0 :   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
     832           0 :   af_xdp_device_t *ad = pool_elt_at_index (am->devices, hw->dev_instance);
     833             : 
     834             :   /* Shut off redirection */
     835           0 :   if (node_index == ~0)
     836             :     {
     837           0 :       ad->per_interface_next_index = node_index;
     838           0 :       return;
     839             :     }
     840             : 
     841           0 :   ad->per_interface_next_index =
     842           0 :     vlib_node_add_next (vlib_get_main (), af_xdp_input_node.index,
     843             :                         node_index);
     844             : }
     845             : 
     846             : static char *af_xdp_tx_func_error_strings[] = {
     847             : #define _(n,s) s,
     848             :   foreach_af_xdp_tx_func_error
     849             : #undef _
     850             : };
     851             : 
     852             : static void
     853           0 : af_xdp_clear (u32 dev_instance)
     854             : {
     855           0 :   af_xdp_main_t *am = &af_xdp_main;
     856           0 :   af_xdp_device_t *ad = pool_elt_at_index (am->devices, dev_instance);
     857           0 :   clib_error_free (ad->error);
     858           0 : }
     859             : 
     860             : /* *INDENT-OFF* */
     861       10079 : VNET_DEVICE_CLASS (af_xdp_device_class) = {
     862             :   .name = "AF_XDP interface",
     863             :   .format_device = format_af_xdp_device,
     864             :   .format_device_name = format_af_xdp_device_name,
     865             :   .admin_up_down_function = af_xdp_interface_admin_up_down,
     866             :   .rx_mode_change_function = af_xdp_interface_rx_mode_change,
     867             :   .rx_redirect_to_node = af_xdp_set_interface_next_node,
     868             :   .tx_function_n_errors = AF_XDP_TX_N_ERROR,
     869             :   .tx_function_error_strings = af_xdp_tx_func_error_strings,
     870             :   .mac_addr_change_function = af_xdp_mac_change,
     871             :   .clear_counters = af_xdp_clear,
     872             : };
     873             : /* *INDENT-ON* */
     874             : 
     875             : clib_error_t *
     876         559 : af_xdp_init (vlib_main_t * vm)
     877             : {
     878         559 :   af_xdp_main_t *am = &af_xdp_main;
     879             : 
     880         559 :   am->log_class = vlib_log_register_class ("af_xdp", 0);
     881             : 
     882         559 :   return 0;
     883             : }
     884             : 
     885        1119 : VLIB_INIT_FUNCTION (af_xdp_init);
     886             : 
     887             : /*
     888             :  * fd.io coding-style-patch-verification: ON
     889             :  *
     890             :  * Local Variables:
     891             :  * eval: (c-set-style "gnu")
     892             :  * End:
     893             :  */

Generated by: LCOV version 1.14