LCOV - code coverage report
Current view: top level - vnet/unix - tuntap.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 44 364 12.1 %
Date: 2023-10-26 01:39:38 Functions: 19 26 73.1 %

          Line data    Source code
       1             : /*
       2             :  *------------------------------------------------------------------
       3             :  * tuntap.c - kernel stack (reverse) punt/inject path
       4             :  *
       5             :  * Copyright (c) 2009 Cisco and/or its affiliates.
       6             :  * Licensed under the Apache License, Version 2.0 (the "License");
       7             :  * you may not use this file except in compliance with the License.
       8             :  * You may obtain a copy of the License at:
       9             :  *
      10             :  *     http://www.apache.org/licenses/LICENSE-2.0
      11             :  *
      12             :  * Unless required by applicable law or agreed to in writing, software
      13             :  * distributed under the License is distributed on an "AS IS" BASIS,
      14             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      15             :  * See the License for the specific language governing permissions and
      16             :  * limitations under the License.
      17             :  *------------------------------------------------------------------
      18             :  */
      19             : /**
      20             :  * @file
      21             :  * @brief  TunTap Kernel stack (reverse) punt/inject path.
      22             :  *
      23             :  * This driver runs in one of two distinct modes:
      24             :  * - "punt/inject" mode, where we send pkts not otherwise processed
      25             :  * by the forwarding to the Linux kernel stack, and
      26             :  *
      27             :  * - "normal interface" mode, where we treat the Linux kernel stack
      28             :  * as a peer.
      29             :  *
      30             :  * By default, we select punt/inject mode.
      31             :  */
      32             : 
      33             : #include <fcntl.h>                /* for open */
      34             : #include <sys/ioctl.h>
      35             : #include <sys/socket.h>
      36             : #include <sys/stat.h>
      37             : #include <sys/types.h>
      38             : #include <sys/uio.h>              /* for iovec */
      39             : #include <netinet/in.h>
      40             : 
      41             : #include <linux/if_arp.h>
      42             : #include <linux/if_tun.h>
      43             : 
      44             : #include <vlib/vlib.h>
      45             : #include <vlib/unix/unix.h>
      46             : 
      47             : #include <vnet/ip/ip.h>
      48             : #include <vnet/fib/fib_table.h>
      49             : 
      50             : #include <vnet/ethernet/ethernet.h>
      51             : #include <vnet/devices/devices.h>
      52             : #include <vnet/feature/feature.h>
      53             : 
      54             : static vnet_device_class_t tuntap_dev_class;
      55             : static vnet_hw_interface_class_t tuntap_interface_class;
      56             : 
      57             : static void tuntap_punt_frame (vlib_main_t * vm,
      58             :                                vlib_node_runtime_t * node,
      59             :                                vlib_frame_t * frame);
      60             : static void tuntap_nopunt_frame (vlib_main_t * vm,
      61             :                                  vlib_node_runtime_t * node,
      62             :                                  vlib_frame_t * frame);
      63             : 
      64             : typedef struct
      65             : {
      66             :   u32 sw_if_index;
      67             :   u8 is_v6;
      68             :   u8 addr[16];
      69             : } subif_address_t;
      70             : 
      71             : /**
      72             :  * @brief TUNTAP per thread struct
      73             :  */
      74             : typedef struct
      75             : {
      76             :   /** Vector of VLIB rx buffers to use.  We allocate them in blocks
      77             :      of VLIB_FRAME_SIZE (256). */
      78             :   u32 *rx_buffers;
      79             : 
      80             :   /** Vector of iovecs for readv/writev calls. */
      81             :   struct iovec *iovecs;
      82             : } tuntap_per_thread_t;
      83             : 
      84             : /**
      85             :  * @brief TUNTAP node main state
      86             :  */
      87             : typedef struct
      88             : {
      89             :   /** per thread variables */
      90             :   tuntap_per_thread_t *threads;
      91             : 
      92             :   /** File descriptors for /dev/net/tun and provisioning socket. */
      93             :   int dev_net_tun_fd, dev_tap_fd;
      94             : 
      95             :   /** Create a "tap" [ethernet] encaps device */
      96             :   int is_ether;
      97             : 
      98             :   /** 1 if a "normal" routed intfc, 0 if a punt/inject interface */
      99             : 
     100             :   int have_normal_interface;
     101             : 
     102             :   /** tap device destination MAC address. Required, or Linux drops pkts */
     103             :   u8 ether_dst_mac[6];
     104             : 
     105             :   /** Interface MTU in bytes and # of default sized buffers. */
     106             :   u32 mtu_bytes, mtu_buffers;
     107             : 
     108             :   /** Linux interface name for tun device. */
     109             :   char *tun_name;
     110             : 
     111             :   /** Pool of subinterface addresses */
     112             :   subif_address_t *subifs;
     113             : 
     114             :   /** Hash for subif addresses */
     115             :   mhash_t subif_mhash;
     116             : 
     117             :   /** Unix file index */
     118             :   u32 clib_file_index;
     119             : 
     120             :   /** For the "normal" interface, if configured */
     121             :   u32 hw_if_index, sw_if_index;
     122             : 
     123             : } tuntap_main_t;
     124             : 
     125             : static tuntap_main_t tuntap_main = {
     126             :   .tun_name = "vnet",
     127             : 
     128             :   /** Suitable defaults for an Ethernet-like tun/tap device */
     129             :   .mtu_bytes = 4096 + 256,
     130             : };
     131             : 
     132             : /**
     133             :  * @brief tuntap_tx
     134             :  * @node tuntap-tx
     135             :  *
     136             :  * Output node, writes the buffers comprising the incoming frame
     137             :  * to the tun/tap device, aka hands them to the Linux kernel stack.
     138             :  *
     139             :  * @param *vm - vlib_main_t
     140             :  * @param *node - vlib_node_runtime_t
     141             :  * @param *frame - vlib_frame_t
     142             :  *
     143             :  * @return rc - uword
     144             :  *
     145             :  */
     146             : static uword
     147           0 : tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
     148             : {
     149           0 :   u32 *buffers = vlib_frame_vector_args (frame);
     150           0 :   uword n_packets = frame->n_vectors;
     151           0 :   tuntap_main_t *tm = &tuntap_main;
     152           0 :   vnet_main_t *vnm = vnet_get_main ();
     153           0 :   vnet_interface_main_t *im = &vnm->interface_main;
     154           0 :   u32 n_bytes = 0;
     155             :   int i;
     156           0 :   u16 thread_index = vm->thread_index;
     157             : 
     158           0 :   for (i = 0; i < n_packets; i++)
     159             :     {
     160             :       struct iovec *iov;
     161             :       vlib_buffer_t *b;
     162             :       uword l;
     163             : 
     164           0 :       b = vlib_get_buffer (vm, buffers[i]);
     165             : 
     166           0 :       if (tm->is_ether && (!tm->have_normal_interface))
     167             :         {
     168           0 :           vlib_buffer_reset (b);
     169           0 :           clib_memcpy_fast (vlib_buffer_get_current (b), tm->ether_dst_mac,
     170             :                             6);
     171             :         }
     172             : 
     173             :       /* Re-set iovecs if present. */
     174           0 :       if (tm->threads[thread_index].iovecs)
     175           0 :         vec_set_len (tm->threads[thread_index].iovecs, 0);
     176             : 
     177             :       /** VLIB buffer chain -> Unix iovec(s). */
     178           0 :       vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
     179           0 :       iov->iov_base = b->data + b->current_data;
     180           0 :       iov->iov_len = l = b->current_length;
     181             : 
     182           0 :       if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
     183             :         {
     184             :           do
     185             :             {
     186           0 :               b = vlib_get_buffer (vm, b->next_buffer);
     187             : 
     188           0 :               vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
     189             : 
     190           0 :               iov->iov_base = b->data + b->current_data;
     191           0 :               iov->iov_len = b->current_length;
     192           0 :               l += b->current_length;
     193             :             }
     194           0 :           while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
     195             :         }
     196             : 
     197           0 :       if (writev (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs,
     198           0 :                   vec_len (tm->threads[thread_index].iovecs)) < l)
     199           0 :         clib_unix_warning ("writev");
     200             : 
     201           0 :       n_bytes += l;
     202             :     }
     203             : 
     204             :   /* Update tuntap interface output stats. */
     205           0 :   vlib_increment_combined_counter (im->combined_sw_if_counters
     206             :                                    + VNET_INTERFACE_COUNTER_TX,
     207             :                                    vm->thread_index,
     208             :                                    tm->sw_if_index, n_packets, n_bytes);
     209             : 
     210             : 
     211             :   /** The normal interface path flattens the buffer chain */
     212           0 :   if (tm->have_normal_interface)
     213           0 :     vlib_buffer_free_no_next (vm, buffers, n_packets);
     214             :   else
     215           0 :     vlib_buffer_free (vm, buffers, n_packets);
     216             : 
     217           0 :   return n_packets;
     218             : }
     219             : 
     220             : /* *INDENT-OFF* */
     221      183788 : VLIB_REGISTER_NODE (tuntap_tx_node,static) = {
     222             :   .function = tuntap_tx,
     223             :   .name = "tuntap-tx",
     224             :   .type = VLIB_NODE_TYPE_INTERNAL,
     225             :   .vector_size = 4,
     226             : };
     227             : /* *INDENT-ON* */
     228             : 
     229             : /**
     230             :  * @brief TUNTAP receive node
     231             :  * @node tuntap-rx
     232             :  *
     233             :  * @param *vm - vlib_main_t
     234             :  * @param *node - vlib_node_runtime_t
     235             :  * @param *frame - vlib_frame_t
     236             :  *
     237             :  * @return rc - uword
     238             :  *
     239             :  */
     240             : static uword
     241           0 : tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
     242             : {
     243           0 :   tuntap_main_t *tm = &tuntap_main;
     244             :   vlib_buffer_t *b;
     245             :   u32 bi;
     246           0 :   const uword buffer_size = vlib_buffer_get_default_data_size (vm);
     247           0 :   u16 thread_index = vm->thread_index;
     248             : 
     249             :   /** Make sure we have some RX buffers. */
     250             :   {
     251           0 :     uword n_left = vec_len (tm->threads[thread_index].rx_buffers);
     252             :     uword n_alloc;
     253             : 
     254           0 :     if (n_left < VLIB_FRAME_SIZE / 2)
     255             :       {
     256           0 :         if (!tm->threads[thread_index].rx_buffers)
     257           0 :           vec_alloc (tm->threads[thread_index].rx_buffers, VLIB_FRAME_SIZE);
     258             : 
     259           0 :         n_alloc =
     260           0 :           vlib_buffer_alloc (vm,
     261           0 :                              tm->threads[thread_index].rx_buffers + n_left,
     262             :                              VLIB_FRAME_SIZE - n_left);
     263           0 :         vec_set_len (tm->threads[thread_index].rx_buffers, n_left + n_alloc);
     264             :       }
     265             :   }
     266             : 
     267             :   /** Allocate RX buffers from end of rx_buffers.
     268             :      Turn them into iovecs to pass to readv. */
     269             :   {
     270           0 :     uword i_rx = vec_len (tm->threads[thread_index].rx_buffers) - 1;
     271             :     vlib_buffer_t *b;
     272             :     word i, n_bytes_left, n_bytes_in_packet;
     273             : 
     274             :     /** We should have enough buffers left for an MTU sized packet. */
     275           0 :     ASSERT (vec_len (tm->threads[thread_index].rx_buffers) >=
     276             :             tm->mtu_buffers);
     277             : 
     278           0 :     vec_validate (tm->threads[thread_index].iovecs, tm->mtu_buffers - 1);
     279           0 :     for (i = 0; i < tm->mtu_buffers; i++)
     280             :       {
     281             :         b =
     282           0 :           vlib_get_buffer (vm,
     283           0 :                            tm->threads[thread_index].rx_buffers[i_rx - i]);
     284           0 :         tm->threads[thread_index].iovecs[i].iov_base = b->data;
     285           0 :         tm->threads[thread_index].iovecs[i].iov_len = buffer_size;
     286             :       }
     287             : 
     288             :     n_bytes_left =
     289           0 :       readv (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs,
     290           0 :              tm->mtu_buffers);
     291           0 :     n_bytes_in_packet = n_bytes_left;
     292           0 :     if (n_bytes_left <= 0)
     293             :       {
     294           0 :         if (errno != EAGAIN)
     295           0 :           clib_unix_warning ("readv %d", n_bytes_left);
     296           0 :         return 0;
     297             :       }
     298             : 
     299           0 :     bi = tm->threads[thread_index].rx_buffers[i_rx];
     300             : 
     301             :     while (1)
     302             :       {
     303           0 :         b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx]);
     304           0 :         b->flags = 0;
     305           0 :         b->current_data = 0;
     306           0 :         b->current_length =
     307           0 :           n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
     308             : 
     309           0 :         n_bytes_left -= buffer_size;
     310             : 
     311           0 :         if (n_bytes_left <= 0)
     312             :           {
     313           0 :             break;
     314             :           }
     315             : 
     316           0 :         i_rx--;
     317           0 :         b->flags |= VLIB_BUFFER_NEXT_PRESENT;
     318           0 :         b->next_buffer = tm->threads[thread_index].rx_buffers[i_rx];
     319             :       }
     320             : 
     321             :     /** Interface counters for tuntap interface. */
     322           0 :     vlib_increment_combined_counter
     323             :       (vnet_main.interface_main.combined_sw_if_counters
     324             :        + VNET_INTERFACE_COUNTER_RX,
     325             :        thread_index, tm->sw_if_index, 1, n_bytes_in_packet);
     326             : 
     327           0 :     vec_set_len (tm->threads[thread_index].rx_buffers, i_rx);
     328             :   }
     329             : 
     330           0 :   b = vlib_get_buffer (vm, bi);
     331             : 
     332             :   {
     333             :     u32 next_index;
     334           0 :     uword n_trace = vlib_get_trace_count (vm, node);
     335             : 
     336           0 :     vnet_buffer (b)->sw_if_index[VLIB_RX] = tm->sw_if_index;
     337           0 :     vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
     338             : 
     339           0 :     b->error = node->errors[0];
     340             : 
     341           0 :     if (tm->is_ether)
     342             :       {
     343           0 :         next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
     344             :       }
     345             :     else
     346           0 :       switch (b->data[0] & 0xf0)
     347             :         {
     348           0 :         case 0x40:
     349           0 :           next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
     350           0 :           break;
     351           0 :         case 0x60:
     352           0 :           next_index = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
     353           0 :           break;
     354           0 :         default:
     355           0 :           next_index = VNET_DEVICE_INPUT_NEXT_DROP;
     356           0 :           break;
     357             :         }
     358             : 
     359             :     /* The linux kernel couldn't care less if our interface is up */
     360           0 :     if (tm->have_normal_interface)
     361             :       {
     362           0 :         vnet_main_t *vnm = vnet_get_main ();
     363             :         vnet_sw_interface_t *si;
     364           0 :         si = vnet_get_sw_interface (vnm, tm->sw_if_index);
     365           0 :         if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
     366           0 :           next_index = VNET_DEVICE_INPUT_NEXT_DROP;
     367             :       }
     368             : 
     369           0 :     vnet_feature_start_device_input (tm->sw_if_index, &next_index, b);
     370             : 
     371           0 :     vlib_set_next_frame_buffer (vm, node, next_index, bi);
     372             : 
     373           0 :     if (PREDICT_FALSE (n_trace > 0 && vlib_trace_buffer (vm, node, next_index, b,    /* follow_chain */
     374             :                                                          1)))
     375           0 :       vlib_set_trace_count (vm, node, n_trace - 1);
     376             :   }
     377             : 
     378           0 :   return 1;
     379             : }
     380             : 
     381             : /**
     382             :  * @brief TUNTAP_RX error strings
     383             :  */
     384             : static char *tuntap_rx_error_strings[] = {
     385             :   "unknown packet type",
     386             : };
     387             : 
     388             : /* *INDENT-OFF* */
     389      183788 : VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
     390             :   .function = tuntap_rx,
     391             :   .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
     392             :   .name = "tuntap-rx",
     393             :   .sibling_of = "device-input",
     394             :   .type = VLIB_NODE_TYPE_INPUT,
     395             :   .state = VLIB_NODE_STATE_INTERRUPT,
     396             :   .vector_size = 4,
     397             :   .n_errors = 1,
     398             :   .error_strings = tuntap_rx_error_strings,
     399             : };
     400             : /* *INDENT-ON* */
     401             : 
     402             : /**
     403             :  * @brief Gets called when file descriptor is ready from epoll.
     404             :  *
     405             :  * @param *uf - clib_file_t
     406             :  *
     407             :  * @return error - clib_error_t
     408             :  */
     409             : static clib_error_t *
     410           0 : tuntap_read_ready (clib_file_t * uf)
     411             : {
     412           0 :   vlib_main_t *vm = vlib_get_main ();
     413           0 :   vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index);
     414           0 :   return 0;
     415             : }
     416             : 
     417             : /**
     418             :  * @brief Clean up the tun/tap device
     419             :  *
     420             :  * @param *vm - vlib_main_t
     421             :  *
     422             :  * @return error - clib_error_t
     423             :  *
     424             :  */
     425             : static clib_error_t *
     426         575 : tuntap_exit (vlib_main_t * vm)
     427             : {
     428         575 :   tuntap_main_t *tm = &tuntap_main;
     429             :   struct ifreq ifr;
     430             :   int sfd;
     431             : 
     432             :   /* Not present. */
     433         575 :   if (!tm->dev_net_tun_fd || tm->dev_net_tun_fd < 0)
     434         575 :     return 0;
     435             : 
     436           0 :   sfd = socket (AF_INET, SOCK_STREAM, 0);
     437           0 :   if (sfd < 0)
     438           0 :     clib_unix_warning ("provisioning socket");
     439             : 
     440           0 :   clib_memset (&ifr, 0, sizeof (ifr));
     441           0 :   strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name) - 1);
     442             : 
     443             :   /* get flags, modify to bring down interface... */
     444           0 :   if (ioctl (sfd, SIOCGIFFLAGS, &ifr) < 0)
     445           0 :     clib_unix_warning ("SIOCGIFFLAGS");
     446             : 
     447           0 :   ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
     448             : 
     449           0 :   if (ioctl (sfd, SIOCSIFFLAGS, &ifr) < 0)
     450           0 :     clib_unix_warning ("SIOCSIFFLAGS");
     451             : 
     452             :   /* Turn off persistence */
     453           0 :   if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 0) < 0)
     454           0 :     clib_unix_warning ("TUNSETPERSIST");
     455           0 :   close (tm->dev_tap_fd);
     456           0 :   if (tm->dev_net_tun_fd >= 0)
     457           0 :     close (tm->dev_net_tun_fd);
     458           0 :   if (sfd >= 0)
     459           0 :     close (sfd);
     460             : 
     461           0 :   return 0;
     462             : }
     463             : 
     464        2876 : VLIB_MAIN_LOOP_EXIT_FUNCTION (tuntap_exit);
     465             : 
     466             : /**
     467             :  * @brief CLI function for tun/tap config
     468             :  *
     469             :  * @param *vm - vlib_main_t
     470             :  * @param *input - unformat_input_t
     471             :  *
     472             :  * @return error - clib_error_t
     473             :  *
     474             :  */
     475             : static clib_error_t *
     476         575 : tuntap_config (vlib_main_t * vm, unformat_input_t * input)
     477             : {
     478         575 :   tuntap_main_t *tm = &tuntap_main;
     479         575 :   clib_error_t *error = 0;
     480             :   struct ifreq ifr;
     481             :   u8 *name;
     482         575 :   int flags = IFF_TUN | IFF_NO_PI;
     483         575 :   int is_enabled = 0, is_ether = 0, have_normal_interface = 0;
     484         575 :   const uword buffer_size = vlib_buffer_get_default_data_size (vm);
     485             : 
     486         575 :   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     487             :     {
     488           0 :       if (unformat (input, "mtu %d", &tm->mtu_bytes))
     489             :         ;
     490           0 :       else if (unformat (input, "enable"))
     491           0 :         is_enabled = 1;
     492           0 :       else if (unformat (input, "disable"))
     493           0 :         is_enabled = 0;
     494           0 :       else if (unformat (input, "ethernet") || unformat (input, "ether"))
     495           0 :         is_ether = 1;
     496           0 :       else if (unformat (input, "have-normal-interface") ||
     497           0 :                unformat (input, "have-normal"))
     498           0 :         have_normal_interface = 1;
     499           0 :       else if (unformat (input, "name %s", &name))
     500           0 :         tm->tun_name = (char *) name;
     501             :       else
     502           0 :         return clib_error_return (0, "unknown input `%U'",
     503             :                                   format_unformat_error, input);
     504             :     }
     505             : 
     506         575 :   tm->dev_net_tun_fd = -1;
     507         575 :   tm->dev_tap_fd = -1;
     508             : 
     509         575 :   if (is_enabled == 0)
     510         575 :     return 0;
     511             : 
     512           0 :   if (geteuid ())
     513             :     {
     514           0 :       clib_warning ("tuntap disabled: must be superuser");
     515           0 :       return 0;
     516             :     }
     517             : 
     518           0 :   tm->is_ether = is_ether;
     519           0 :   tm->have_normal_interface = have_normal_interface;
     520             : 
     521           0 :   if (is_ether)
     522           0 :     flags = IFF_TAP | IFF_NO_PI;
     523             : 
     524           0 :   if ((tm->dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0)
     525             :     {
     526           0 :       error = clib_error_return_unix (0, "open /dev/net/tun");
     527           0 :       goto done;
     528             :     }
     529             : 
     530           0 :   clib_memset (&ifr, 0, sizeof (ifr));
     531           0 :   strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name) - 1);
     532           0 :   ifr.ifr_flags = flags;
     533           0 :   if (ioctl (tm->dev_net_tun_fd, TUNSETIFF, (void *) &ifr) < 0)
     534             :     {
     535           0 :       error = clib_error_return_unix (0, "ioctl TUNSETIFF");
     536           0 :       goto done;
     537             :     }
     538             : 
     539             :   /* Make it persistent, at least until we split. */
     540           0 :   if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 1) < 0)
     541             :     {
     542           0 :       error = clib_error_return_unix (0, "TUNSETPERSIST");
     543           0 :       goto done;
     544             :     }
     545             : 
     546             :   /* Open a provisioning socket */
     547           0 :   if ((tm->dev_tap_fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
     548             :     {
     549           0 :       error = clib_error_return_unix (0, "socket");
     550           0 :       goto done;
     551             :     }
     552             : 
     553             :   /* Find the interface index. */
     554             :   {
     555             :     struct ifreq ifr;
     556             :     struct sockaddr_ll sll;
     557             : 
     558           0 :     clib_memset (&ifr, 0, sizeof (ifr));
     559           0 :     strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name) - 1);
     560           0 :     if (ioctl (tm->dev_tap_fd, SIOCGIFINDEX, &ifr) < 0)
     561             :       {
     562           0 :         error = clib_error_return_unix (0, "ioctl SIOCGIFINDEX");
     563           0 :         goto done;
     564             :       }
     565             : 
     566             :     /* Bind the provisioning socket to the interface. */
     567           0 :     clib_memset (&sll, 0, sizeof (sll));
     568           0 :     sll.sll_family = AF_PACKET;
     569           0 :     sll.sll_ifindex = ifr.ifr_ifindex;
     570           0 :     sll.sll_protocol = htons (ETH_P_ALL);
     571             : 
     572           0 :     if (bind (tm->dev_tap_fd, (struct sockaddr *) &sll, sizeof (sll)) < 0)
     573             :       {
     574           0 :         error = clib_error_return_unix (0, "bind");
     575           0 :         goto done;
     576             :       }
     577             :   }
     578             : 
     579             :   /* non-blocking I/O on /dev/tapX */
     580             :   {
     581           0 :     int one = 1;
     582           0 :     if (ioctl (tm->dev_net_tun_fd, FIONBIO, &one) < 0)
     583             :       {
     584           0 :         error = clib_error_return_unix (0, "ioctl FIONBIO");
     585           0 :         goto done;
     586             :       }
     587             :   }
     588             : 
     589           0 :   tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size;
     590             : 
     591           0 :   ifr.ifr_mtu = tm->mtu_bytes;
     592           0 :   if (ioctl (tm->dev_tap_fd, SIOCSIFMTU, &ifr) < 0)
     593             :     {
     594           0 :       error = clib_error_return_unix (0, "ioctl SIOCSIFMTU");
     595           0 :       goto done;
     596             :     }
     597             : 
     598             :   /* get flags, modify to bring up interface... */
     599           0 :   if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
     600             :     {
     601           0 :       error = clib_error_return_unix (0, "ioctl SIOCGIFFLAGS");
     602           0 :       goto done;
     603             :     }
     604             : 
     605           0 :   ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
     606             : 
     607           0 :   if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
     608             :     {
     609           0 :       error = clib_error_return_unix (0, "ioctl SIOCSIFFLAGS");
     610           0 :       goto done;
     611             :     }
     612             : 
     613           0 :   if (is_ether)
     614             :     {
     615           0 :       if (ioctl (tm->dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0)
     616             :         {
     617           0 :           error = clib_error_return_unix (0, "ioctl SIOCGIFHWADDR");
     618           0 :           goto done;
     619             :         }
     620             :       else
     621           0 :         clib_memcpy_fast (tm->ether_dst_mac, ifr.ifr_hwaddr.sa_data, 6);
     622             :     }
     623             : 
     624           0 :   if (have_normal_interface)
     625             :     {
     626           0 :       vnet_main_t *vnm = vnet_get_main ();
     627           0 :       vnet_eth_interface_registration_t eir = {};
     628             : 
     629           0 :       eir.dev_class_index = tuntap_dev_class.index;
     630           0 :       eir.address = tm->ether_dst_mac;
     631           0 :       tm->hw_if_index = vnet_eth_register_interface (vnm, &eir);
     632             : 
     633           0 :       tm->sw_if_index = tm->hw_if_index;
     634           0 :       vm->os_punt_frame = tuntap_nopunt_frame;
     635             :     }
     636             :   else
     637             :     {
     638           0 :       vnet_main_t *vnm = vnet_get_main ();
     639             :       vnet_hw_interface_t *hi;
     640             : 
     641           0 :       vm->os_punt_frame = tuntap_punt_frame;
     642             : 
     643           0 :       tm->hw_if_index = vnet_register_interface
     644             :         (vnm, tuntap_dev_class.index, 0 /* device instance */ ,
     645             :          tuntap_interface_class.index, 0);
     646           0 :       hi = vnet_get_hw_interface (vnm, tm->hw_if_index);
     647           0 :       tm->sw_if_index = hi->sw_if_index;
     648             : 
     649             :       /* Interface is always up. */
     650           0 :       vnet_hw_interface_set_flags (vnm, tm->hw_if_index,
     651             :                                    VNET_HW_INTERFACE_FLAG_LINK_UP);
     652           0 :       vnet_sw_interface_set_flags (vnm, tm->sw_if_index,
     653             :                                    VNET_SW_INTERFACE_FLAG_ADMIN_UP);
     654             :     }
     655             : 
     656             :   {
     657           0 :     clib_file_t template = { 0 };
     658           0 :     template.read_function = tuntap_read_ready;
     659           0 :     template.file_descriptor = tm->dev_net_tun_fd;
     660           0 :     template.description = format (0, "vnet tuntap");
     661           0 :     tm->clib_file_index = clib_file_add (&file_main, &template);
     662             :   }
     663             : 
     664           0 : done:
     665           0 :   if (error)
     666             :     {
     667           0 :       if (tm->dev_net_tun_fd >= 0)
     668           0 :         close (tm->dev_net_tun_fd);
     669           0 :       if (tm->dev_tap_fd >= 0)
     670           0 :         close (tm->dev_tap_fd);
     671             :     }
     672             : 
     673           0 :   return error;
     674             : }
     675             : 
     676        7514 : VLIB_CONFIG_FUNCTION (tuntap_config, "tuntap");
     677             : 
     678             : /**
     679             :  * @brief Add or Del IP4 address to tun/tap interface
     680             :  *
     681             :  * @param *im - ip4_main_t
     682             :  * @param opaque - uword
     683             :  * @param sw_if_index - u32
     684             :  * @param *address - ip4_address_t
     685             :  * @param is_delete - u32
     686             :  *
     687             :  */
     688             : void
     689        4838 : tuntap_ip4_add_del_interface_address (ip4_main_t * im,
     690             :                                       uword opaque,
     691             :                                       u32 sw_if_index,
     692             :                                       ip4_address_t * address,
     693             :                                       u32 address_length,
     694             :                                       u32 if_address_index, u32 is_delete)
     695             : {
     696        4838 :   tuntap_main_t *tm = &tuntap_main;
     697             :   struct ifreq ifr;
     698             :   subif_address_t subif_addr, *ap;
     699             :   uword *p;
     700             : 
     701             :   /** Tuntap disabled, or using a "normal" interface. */
     702        4838 :   if (tm->have_normal_interface || tm->dev_tap_fd < 0)
     703        4838 :     return;
     704             : 
     705             :   /* if the address is being applied to an interface that is not in
     706             :    * the same table/VRF as this tap, then ignore it.
     707             :    * If we don't do this overlapping address spaces in the different tables
     708             :    * breaks the linux host's routing tables */
     709           0 :   if (fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
     710             :                                            sw_if_index) !=
     711           0 :       fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, tm->sw_if_index))
     712           0 :     return;
     713             : 
     714             :   /** See if we already know about this subif */
     715           0 :   clib_memset (&subif_addr, 0, sizeof (subif_addr));
     716           0 :   subif_addr.sw_if_index = sw_if_index;
     717           0 :   clib_memcpy_fast (&subif_addr.addr, address, sizeof (*address));
     718             : 
     719           0 :   p = mhash_get (&tm->subif_mhash, &subif_addr);
     720             : 
     721           0 :   if (p)
     722           0 :     ap = pool_elt_at_index (tm->subifs, p[0]);
     723             :   else
     724             :     {
     725           0 :       pool_get (tm->subifs, ap);
     726           0 :       *ap = subif_addr;
     727           0 :       mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
     728             :     }
     729             : 
     730             :   /* Use subif pool index to select alias device. */
     731           0 :   clib_memset (&ifr, 0, sizeof (ifr));
     732           0 :   snprintf (ifr.ifr_name, sizeof (ifr.ifr_name),
     733           0 :             "%s:%d", tm->tun_name, (int) (ap - tm->subifs));
     734             : 
     735             :   /* the tuntap punt/inject is enabled for IPv4 RX so long as
     736             :    * any vpp interface has an IPv4 address.
     737             :    * this is also ref counted.
     738             :    */
     739           0 :   ip4_sw_interface_enable_disable (tm->sw_if_index, !is_delete);
     740             : 
     741           0 :   if (!is_delete)
     742             :     {
     743             :       struct sockaddr_in *sin;
     744             : 
     745           0 :       sin = (struct sockaddr_in *) &ifr.ifr_addr;
     746             : 
     747             :       /* Set ipv4 address, netmask. */
     748           0 :       sin->sin_family = AF_INET;
     749           0 :       clib_memcpy_fast (&sin->sin_addr.s_addr, address, 4);
     750           0 :       if (ioctl (tm->dev_tap_fd, SIOCSIFADDR, &ifr) < 0)
     751           0 :         clib_unix_warning ("ioctl SIOCSIFADDR");
     752             : 
     753           0 :       sin->sin_addr.s_addr = im->fib_masks[address_length];
     754           0 :       if (ioctl (tm->dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0)
     755           0 :         clib_unix_warning ("ioctl SIOCSIFNETMASK");
     756             :     }
     757             :   else
     758             :     {
     759           0 :       mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */ );
     760           0 :       pool_put (tm->subifs, ap);
     761             :     }
     762             : 
     763             :   /* get flags, modify to bring up interface... */
     764           0 :   if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
     765           0 :     clib_unix_warning ("ioctl SIOCGIFFLAGS");
     766             : 
     767           0 :   if (is_delete)
     768           0 :     ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
     769             :   else
     770           0 :     ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
     771             : 
     772           0 :   if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
     773           0 :     clib_unix_warning ("ioctl SIOCSIFFLAGS");
     774             : }
     775             : 
     776             : /**
     777             :  * @brief workaround for a known include file bug.
     778             :  * including @c <linux/ipv6.h> causes multiple definitions if
     779             :  * @c <netinet/in.h is also included.
     780             :  */
     781             : struct in6_ifreq
     782             : {
     783             :   struct in6_addr ifr6_addr;
     784             :   u32 ifr6_prefixlen;
     785             :   int ifr6_ifindex;
     786             : };
     787             : 
     788             : /**
     789             :  * @brief Add or Del tun/tap interface address.
     790             :  *
     791             :  * Both the v6 interface address API and the way ifconfig
     792             :  * displays subinterfaces differ from their v4 counterparts.
     793             :  * The code given here seems to work but YMMV.
     794             :  *
     795             :  * @param *im - ip6_main_t
     796             :  * @param opaque - uword
     797             :  * @param sw_if_index - u32
     798             :  * @param *address - ip6_address_t
     799             :  * @param address_length - u32
     800             :  * @param if_address_index - u32
     801             :  * @param is_delete - u32
     802             :  */
     803             : void
     804        4115 : tuntap_ip6_add_del_interface_address (ip6_main_t * im,
     805             :                                       uword opaque,
     806             :                                       u32 sw_if_index,
     807             :                                       ip6_address_t * address,
     808             :                                       u32 address_length,
     809             :                                       u32 if_address_index, u32 is_delete)
     810             : {
     811        4115 :   tuntap_main_t *tm = &tuntap_main;
     812             :   struct ifreq ifr;
     813             :   struct in6_ifreq ifr6;
     814             :   subif_address_t subif_addr, *ap;
     815             :   uword *p;
     816             : 
     817             :   /* Tuntap disabled, or using a "normal" interface. */
     818        4115 :   if (tm->have_normal_interface || tm->dev_tap_fd < 0)
     819        4115 :     return;
     820             : 
     821             :   /* if the address is being applied to an interface that is not in
     822             :    * the same table/VRF as this tap, then ignore it.
     823             :    * If we don't do this overlapping address spaces in the different tables
     824             :    * breaks the linux host's routing tables */
     825           0 :   if (fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
     826             :                                            sw_if_index) !=
     827           0 :       fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, tm->sw_if_index))
     828           0 :     return;
     829             : 
     830             :   /* See if we already know about this subif */
     831           0 :   clib_memset (&subif_addr, 0, sizeof (subif_addr));
     832           0 :   subif_addr.sw_if_index = sw_if_index;
     833           0 :   subif_addr.is_v6 = 1;
     834           0 :   clib_memcpy_fast (&subif_addr.addr, address, sizeof (*address));
     835             : 
     836           0 :   p = mhash_get (&tm->subif_mhash, &subif_addr);
     837             : 
     838           0 :   if (p)
     839           0 :     ap = pool_elt_at_index (tm->subifs, p[0]);
     840             :   else
     841             :     {
     842           0 :       pool_get (tm->subifs, ap);
     843           0 :       *ap = subif_addr;
     844           0 :       mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
     845             :     }
     846             : 
     847             :   /* Use subif pool index to select alias device. */
     848           0 :   clib_memset (&ifr, 0, sizeof (ifr));
     849           0 :   clib_memset (&ifr6, 0, sizeof (ifr6));
     850           0 :   snprintf (ifr.ifr_name, sizeof (ifr.ifr_name),
     851           0 :             "%s:%d", tm->tun_name, (int) (ap - tm->subifs));
     852             : 
     853             :   /* the tuntap punt/inject is enabled for IPv6 RX so long as
     854             :    * any vpp interface has an IPv6 address.
     855             :    * this is also ref counted.
     856             :    */
     857           0 :   ip6_sw_interface_enable_disable (tm->sw_if_index, !is_delete);
     858             : 
     859           0 :   if (!is_delete)
     860             :     {
     861           0 :       int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
     862           0 :       if (sockfd < 0)
     863           0 :         clib_unix_warning ("get ifindex socket");
     864             : 
     865           0 :       if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
     866           0 :         clib_unix_warning ("get ifindex");
     867             : 
     868           0 :       ifr6.ifr6_ifindex = ifr.ifr_ifindex;
     869           0 :       ifr6.ifr6_prefixlen = address_length;
     870           0 :       clib_memcpy_fast (&ifr6.ifr6_addr, address, 16);
     871             : 
     872           0 :       if (ioctl (sockfd, SIOCSIFADDR, &ifr6) < 0)
     873           0 :         clib_unix_warning ("set address");
     874             : 
     875           0 :       if (sockfd >= 0)
     876           0 :         close (sockfd);
     877             :     }
     878             :   else
     879             :     {
     880           0 :       int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
     881           0 :       if (sockfd < 0)
     882           0 :         clib_unix_warning ("get ifindex socket");
     883             : 
     884           0 :       if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
     885           0 :         clib_unix_warning ("get ifindex");
     886             : 
     887           0 :       ifr6.ifr6_ifindex = ifr.ifr_ifindex;
     888           0 :       ifr6.ifr6_prefixlen = address_length;
     889           0 :       clib_memcpy_fast (&ifr6.ifr6_addr, address, 16);
     890             : 
     891           0 :       if (ioctl (sockfd, SIOCDIFADDR, &ifr6) < 0)
     892           0 :         clib_unix_warning ("del address");
     893             : 
     894           0 :       if (sockfd >= 0)
     895           0 :         close (sockfd);
     896             : 
     897           0 :       mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */ );
     898           0 :       pool_put (tm->subifs, ap);
     899             :     }
     900             : }
     901             : 
     902             : /**
     903             :  * @brief TX the tun/tap frame
     904             :  *
     905             :  * @param *vm - vlib_main_t
     906             :  * @param *node - vlib_node_runtime_t
     907             :  * @param *frame - vlib_frame_t
     908             :  *
     909             :  */
     910             : static void
     911           0 : tuntap_punt_frame (vlib_main_t * vm,
     912             :                    vlib_node_runtime_t * node, vlib_frame_t * frame)
     913             : {
     914           0 :   tuntap_tx (vm, node, frame);
     915           0 :   vlib_frame_free (vm, frame);
     916           0 : }
     917             : 
     918             : /**
     919             :  * @brief Free the tun/tap frame
     920             :  *
     921             :  * @param *vm - vlib_main_t
     922             :  * @param *node - vlib_node_runtime_t
     923             :  * @param *frame - vlib_frame_t
     924             :  *
     925             :  */
     926             : static void
     927           0 : tuntap_nopunt_frame (vlib_main_t * vm,
     928             :                      vlib_node_runtime_t * node, vlib_frame_t * frame)
     929             : {
     930           0 :   u32 *buffers = vlib_frame_vector_args (frame);
     931           0 :   uword n_packets = frame->n_vectors;
     932           0 :   vlib_buffer_free (vm, buffers, n_packets);
     933           0 :   vlib_frame_free (vm, frame);
     934           0 : }
     935             : 
     936             : /* *INDENT-OFF* */
     937        8063 : VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
     938             :   .name = "tuntap",
     939             :   .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
     940             : };
     941             : /* *INDENT-ON* */
     942             : 
     943             : /**
     944             :  * @brief Format tun/tap interface name
     945             :  *
     946             :  * @param *s - u8 - formatter string
     947             :  * @param *args - va_list
     948             :  *
     949             :  * @return *s - u8 - formatted string
     950             :  *
     951             :  */
     952             : static u8 *
     953           0 : format_tuntap_interface_name (u8 * s, va_list * args)
     954             : {
     955           0 :   u32 i = va_arg (*args, u32);
     956             : 
     957           0 :   s = format (s, "tuntap-%d", i);
     958           0 :   return s;
     959             : }
     960             : 
     961             : /**
     962             :  * @brief TX packet out tun/tap
     963             :  *
     964             :  * @param *vm - vlib_main_t
     965             :  * @param *node - vlib_node_runtime_t
     966             :  * @param *frame - vlib_frame_t
     967             :  *
     968             :  * @return n_buffers - uword - Packets transmitted
     969             :  *
     970             :  */
     971             : static uword
     972           0 : tuntap_intfc_tx (vlib_main_t * vm,
     973             :                  vlib_node_runtime_t * node, vlib_frame_t * frame)
     974             : {
     975           0 :   tuntap_main_t *tm = &tuntap_main;
     976           0 :   u32 *buffers = vlib_frame_vector_args (frame);
     977           0 :   uword n_buffers = frame->n_vectors;
     978             : 
     979             :   /* Normal interface transmit happens only on the normal interface... */
     980           0 :   if (tm->have_normal_interface)
     981           0 :     return tuntap_tx (vm, node, frame);
     982             : 
     983           0 :   vlib_buffer_free (vm, buffers, n_buffers);
     984           0 :   return n_buffers;
     985             : }
     986             : 
     987             : /* *INDENT-OFF* */
     988       12095 : VNET_DEVICE_CLASS (tuntap_dev_class,static) = {
     989             :   .name = "tuntap",
     990             :   .tx_function = tuntap_intfc_tx,
     991             :   .format_device_name = format_tuntap_interface_name,
     992             : };
     993             : /* *INDENT-ON* */
     994             : 
     995             : /**
     996             :  * @brief tun/tap node init
     997             :  *
     998             :  * @param *vm - vlib_main_t
     999             :  *
    1000             :  * @return error - clib_error_t
    1001             :  *
    1002             :  */
    1003             : static clib_error_t *
    1004         575 : tuntap_init (vlib_main_t * vm)
    1005             : {
    1006         575 :   ip4_main_t *im4 = &ip4_main;
    1007         575 :   ip6_main_t *im6 = &ip6_main;
    1008             :   ip4_add_del_interface_address_callback_t cb4;
    1009             :   ip6_add_del_interface_address_callback_t cb6;
    1010         575 :   tuntap_main_t *tm = &tuntap_main;
    1011         575 :   vlib_thread_main_t *m = vlib_get_thread_main ();
    1012             : 
    1013         575 :   mhash_init (&tm->subif_mhash, sizeof (u32), sizeof (subif_address_t));
    1014             : 
    1015         575 :   cb4.function = tuntap_ip4_add_del_interface_address;
    1016         575 :   cb4.function_opaque = 0;
    1017         575 :   vec_add1 (im4->add_del_interface_address_callbacks, cb4);
    1018             : 
    1019         575 :   cb6.function = tuntap_ip6_add_del_interface_address;
    1020         575 :   cb6.function_opaque = 0;
    1021         575 :   vec_add1 (im6->add_del_interface_address_callbacks, cb6);
    1022         575 :   vec_validate_aligned (tm->threads, m->n_vlib_mains - 1,
    1023             :                         CLIB_CACHE_LINE_BYTES);
    1024             : 
    1025         575 :   return 0;
    1026             : }
    1027             : 
    1028             : /* *INDENT-OFF* */
    1029       81215 : VLIB_INIT_FUNCTION (tuntap_init) =
    1030             : {
    1031             :   .runs_after = VLIB_INITS("ip4_init"),
    1032             : };
    1033             : /* *INDENT-ON* */
    1034             : 
    1035             : /*
    1036             :  * fd.io coding-style-patch-verification: ON
    1037             :  *
    1038             :  * Local Variables:
    1039             :  * eval: (c-set-style "gnu")
    1040             :  * End:
    1041             :  */

Generated by: LCOV version 1.14