LCOV - code coverage report
Current view: top level - plugins/vxlan - vxlan.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 273 517 52.8 %
Date: 2023-07-05 22:20:52 Functions: 32 46 69.6 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2015 Cisco and/or its affiliates.
       3             :  * Licensed under the Apache License, Version 2.0 (the "License");
       4             :  * you may not use this file except in compliance with the License.
       5             :  * You may obtain a copy of the License at:
       6             :  *
       7             :  *     http://www.apache.org/licenses/LICENSE-2.0
       8             :  *
       9             :  * Unless required by applicable law or agreed to in writing, software
      10             :  * distributed under the License is distributed on an "AS IS" BASIS,
      11             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             :  * See the License for the specific language governing permissions and
      13             :  * limitations under the License.
      14             :  */
      15             : #include <vxlan/vxlan.h>
      16             : #include <vnet/ip/format.h>
      17             : #include <vnet/fib/fib_entry.h>
      18             : #include <vnet/fib/fib_table.h>
      19             : #include <vnet/fib/fib_entry_track.h>
      20             : #include <vnet/mfib/mfib_table.h>
      21             : #include <vnet/adj/adj_mcast.h>
      22             : #include <vnet/adj/rewrite.h>
      23             : #include <vnet/dpo/drop_dpo.h>
      24             : #include <vnet/interface.h>
      25             : #include <vnet/flow/flow.h>
      26             : #include <vnet/udp/udp_local.h>
      27             : #include <vlib/vlib.h>
      28             : 
      29             : /**
      30             :  * @file
      31             :  * @brief VXLAN.
      32             :  *
      33             :  * VXLAN provides the features needed to allow L2 bridge domains (BDs)
      34             :  * to span multiple servers. This is done by building an L2 overlay on
      35             :  * top of an L3 network underlay using VXLAN tunnels.
      36             :  *
      37             :  * This makes it possible for servers to be co-located in the same data
      38             :  * center or be separated geographically as long as they are reachable
      39             :  * through the underlay L3 network.
      40             :  *
      41             :  * You can refer to this kind of L2 overlay bridge domain as a VXLAN
      42             :  * (Virtual eXtensible VLAN) segment.
      43             :  */
      44             : 
      45             : 
      46             : vxlan_main_t vxlan_main;
      47             : 
      48             : static u32
      49         980 : vxlan_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
      50             : {
      51             :   /* nothing for now */
      52         980 :   return 0;
      53             : }
      54             : 
      55             : static clib_error_t *
      56           0 : vxlan_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
      57             :                               u32 frame_size)
      58             : {
      59             :   /* nothing for now */
      60           0 :   return 0;
      61             : }
      62             : 
      63             : static u8 *
      64           0 : format_decap_next (u8 * s, va_list * args)
      65             : {
      66           0 :   u32 next_index = va_arg (*args, u32);
      67             : 
      68           0 :   if (next_index == VXLAN_INPUT_NEXT_DROP)
      69           0 :     return format (s, "drop");
      70             :   else
      71           0 :     return format (s, "index %d", next_index);
      72             :   return s;
      73             : }
      74             : 
      75             : u8 *
      76         484 : format_vxlan_tunnel (u8 * s, va_list * args)
      77             : {
      78         484 :   vxlan_tunnel_t *t = va_arg (*args, vxlan_tunnel_t *);
      79             : 
      80         484 :   s = format (s,
      81             :               "[%d] instance %d src %U dst %U src_port %d dst_port %d vni %d "
      82             :               "fib-idx %d sw-if-idx %d ",
      83             :               t->dev_instance, t->user_instance, format_ip46_address, &t->src,
      84             :               IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY,
      85         484 :               t->src_port, t->dst_port, t->vni, t->encap_fib_index,
      86             :               t->sw_if_index);
      87             : 
      88         484 :   s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
      89             : 
      90         484 :   if (PREDICT_FALSE (t->decap_next_index != VXLAN_INPUT_NEXT_L2_INPUT))
      91           0 :     s = format (s, "decap-next-%U ", format_decap_next, t->decap_next_index);
      92             : 
      93         484 :   if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst)))
      94          22 :     s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
      95             : 
      96         484 :   if (t->flow_index != ~0)
      97           0 :     s = format (s, "flow-index %d [%U]", t->flow_index,
      98             :                 format_flow_enabled_hw, t->flow_index);
      99             : 
     100         484 :   return s;
     101             : }
     102             : 
     103             : static u8 *
     104        3416 : format_vxlan_name (u8 * s, va_list * args)
     105             : {
     106        3416 :   u32 dev_instance = va_arg (*args, u32);
     107        3416 :   vxlan_main_t *vxm = &vxlan_main;
     108             :   vxlan_tunnel_t *t;
     109             : 
     110        3416 :   if (dev_instance == ~0)
     111           0 :     return format (s, "<cached-unused>");
     112             : 
     113        3416 :   if (dev_instance >= vec_len (vxm->tunnels))
     114           0 :     return format (s, "<improperly-referenced>");
     115             : 
     116        3416 :   t = pool_elt_at_index (vxm->tunnels, dev_instance);
     117             : 
     118        3416 :   return format (s, "vxlan_tunnel%d", t->user_instance);
     119             : }
     120             : 
     121             : static clib_error_t *
     122        5840 : vxlan_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
     123             : {
     124        5840 :   u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
     125             :     VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
     126        5840 :   vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
     127             : 
     128        5840 :   return /* no error */ 0;
     129             : }
     130             : 
     131             : /* *INDENT-OFF* */
     132        1119 : VNET_DEVICE_CLASS (vxlan_device_class, static) = {
     133             :   .name = "VXLAN",
     134             :   .format_device_name = format_vxlan_name,
     135             :   .format_tx_trace = format_vxlan_encap_trace,
     136             :   .admin_up_down_function = vxlan_interface_admin_up_down,
     137             : };
     138             : /* *INDENT-ON* */
     139             : 
     140             : static u8 *
     141           0 : format_vxlan_header_with_length (u8 * s, va_list * args)
     142             : {
     143           0 :   u32 dev_instance = va_arg (*args, u32);
     144           0 :   s = format (s, "unimplemented dev %u", dev_instance);
     145           0 :   return s;
     146             : }
     147             : 
     148             : /* *INDENT-OFF* */
     149        1119 : VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
     150             :   .name = "VXLAN",
     151             :   .format_header = format_vxlan_header_with_length,
     152             :   .build_rewrite = default_build_rewrite,
     153             : };
     154             : /* *INDENT-ON* */
     155             : 
     156             : static void
     157         719 : vxlan_tunnel_restack_dpo (vxlan_tunnel_t * t)
     158             : {
     159         719 :   u8 is_ip4 = ip46_address_is_ip4 (&t->dst);
     160         719 :   dpo_id_t dpo = DPO_INVALID;
     161         719 :   fib_forward_chain_type_t forw_type = is_ip4 ?
     162         719 :     FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
     163             : 
     164         719 :   fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
     165             : 
     166             :   /* vxlan uses the payload hash as the udp source port
     167             :    * hence the packet's hash is unknown
     168             :    * skip single bucket load balance dpo's */
     169        2118 :   while (DPO_LOAD_BALANCE == dpo.dpoi_type)
     170             :     {
     171             :       const load_balance_t *lb;
     172             :       const dpo_id_t *choice;
     173             : 
     174        1399 :       lb = load_balance_get (dpo.dpoi_index);
     175        1399 :       if (lb->lb_n_buckets > 1)
     176           0 :         break;
     177             : 
     178        1399 :       choice = load_balance_get_bucket_i (lb, 0);
     179             : 
     180        1399 :       if (DPO_RECEIVE == choice->dpoi_type)
     181           1 :         dpo_copy (&dpo, drop_dpo_get (choice->dpoi_proto));
     182             :       else
     183        1398 :         dpo_copy (&dpo, choice);
     184             :     }
     185             : 
     186         719 :   u32 encap_index = is_ip4 ?
     187         719 :     vxlan4_encap_node.index : vxlan6_encap_node.index;
     188         719 :   dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
     189         719 :   dpo_reset (&dpo);
     190         719 : }
     191             : 
     192             : static vxlan_tunnel_t *
     193         241 : vxlan_tunnel_from_fib_node (fib_node_t * node)
     194             : {
     195         241 :   ASSERT (FIB_NODE_TYPE_VXLAN_TUNNEL == node->fn_type);
     196         241 :   return ((vxlan_tunnel_t *) (((char *) node) -
     197             :                               STRUCT_OFFSET_OF (vxlan_tunnel_t, node)));
     198             : }
     199             : 
     200             : /**
     201             :  * Function definition to backwalk a FIB node -
     202             :  * Here we will restack the new dpo of VXLAN DIP to encap node.
     203             :  */
     204             : static fib_node_back_walk_rc_t
     205         241 : vxlan_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
     206             : {
     207         241 :   vxlan_tunnel_restack_dpo (vxlan_tunnel_from_fib_node (node));
     208         241 :   return (FIB_NODE_BACK_WALK_CONTINUE);
     209             : }
     210             : 
     211             : /**
     212             :  * Function definition to get a FIB node from its index
     213             :  */
     214             : static fib_node_t *
     215         241 : vxlan_tunnel_fib_node_get (fib_node_index_t index)
     216             : {
     217             :   vxlan_tunnel_t *t;
     218         241 :   vxlan_main_t *vxm = &vxlan_main;
     219             : 
     220         241 :   t = pool_elt_at_index (vxm->tunnels, index);
     221             : 
     222         241 :   return (&t->node);
     223             : }
     224             : 
     225             : /**
     226             :  * Function definition to inform the FIB node that its last lock has gone.
     227             :  */
     228             : static void
     229           0 : vxlan_tunnel_last_lock_gone (fib_node_t * node)
     230             : {
     231             :   /*
     232             :    * The VXLAN tunnel is a root of the graph. As such
     233             :    * it never has children and thus is never locked.
     234             :    */
     235           0 :   ASSERT (0);
     236           0 : }
     237             : 
     238             : /*
     239             :  * Virtual function table registered by VXLAN tunnels
     240             :  * for participation in the FIB object graph.
     241             :  */
     242             : const static fib_node_vft_t vxlan_vft = {
     243             :   .fnv_get = vxlan_tunnel_fib_node_get,
     244             :   .fnv_last_lock = vxlan_tunnel_last_lock_gone,
     245             :   .fnv_back_walk = vxlan_tunnel_back_walk,
     246             : };
     247             : 
     248             : #define foreach_copy_field                                                    \
     249             :   _ (vni)                                                                     \
     250             :   _ (mcast_sw_if_index)                                                       \
     251             :   _ (encap_fib_index)                                                         \
     252             :   _ (decap_next_index)                                                        \
     253             :   _ (src)                                                                     \
     254             :   _ (dst)                                                                     \
     255             :   _ (src_port)                                                                \
     256             :   _ (dst_port)
     257             : 
     258             : static void
     259        2920 : vxlan_rewrite (vxlan_tunnel_t * t, bool is_ip6)
     260             : {
     261             :   union
     262             :   {
     263             :     ip4_vxlan_header_t h4;
     264             :     ip6_vxlan_header_t h6;
     265             :   } h;
     266        2920 :   int len = is_ip6 ? sizeof h.h6 : sizeof h.h4;
     267             : 
     268             :   udp_header_t *udp;
     269             :   vxlan_header_t *vxlan;
     270             :   /* Fixed portion of the (outer) ip header */
     271             : 
     272        2920 :   clib_memset (&h, 0, sizeof (h));
     273        2920 :   if (!is_ip6)
     274             :     {
     275        2675 :       ip4_header_t *ip = &h.h4.ip4;
     276        2675 :       udp = &h.h4.udp, vxlan = &h.h4.vxlan;
     277        2675 :       ip->ip_version_and_header_length = 0x45;
     278        2675 :       ip->ttl = 254;
     279        2675 :       ip->protocol = IP_PROTOCOL_UDP;
     280             : 
     281        2675 :       ip->src_address = t->src.ip4;
     282        2675 :       ip->dst_address = t->dst.ip4;
     283             : 
     284             :       /* we fix up the ip4 header length and checksum after-the-fact */
     285        2675 :       ip->checksum = ip4_header_checksum (ip);
     286             :     }
     287             :   else
     288             :     {
     289         245 :       ip6_header_t *ip = &h.h6.ip6;
     290         245 :       udp = &h.h6.udp, vxlan = &h.h6.vxlan;
     291         245 :       ip->ip_version_traffic_class_and_flow_label =
     292         245 :         clib_host_to_net_u32 (6 << 28);
     293         245 :       ip->hop_limit = 255;
     294         245 :       ip->protocol = IP_PROTOCOL_UDP;
     295             : 
     296         245 :       ip->src_address = t->src.ip6;
     297         245 :       ip->dst_address = t->dst.ip6;
     298             :     }
     299             : 
     300             :   /* UDP header, randomize src port on something, maybe? */
     301        2920 :   udp->src_port = clib_host_to_net_u16 (t->src_port);
     302        2920 :   udp->dst_port = clib_host_to_net_u16 (t->dst_port);
     303             : 
     304             :   /* VXLAN header */
     305        2920 :   vnet_set_vni_and_flags (vxlan, t->vni);
     306        2920 :   vnet_rewrite_set_data (*t, &h, len);
     307        2920 : }
     308             : 
     309             : static bool
     310        2920 : vxlan_decap_next_is_valid (vxlan_main_t * vxm, u32 is_ip6,
     311             :                            u32 decap_next_index)
     312             : {
     313        2920 :   vlib_main_t *vm = vxm->vlib_main;
     314        2920 :   u32 input_idx = (!is_ip6) ?
     315        2920 :     vxlan4_input_node.index : vxlan6_input_node.index;
     316        2920 :   vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx);
     317             : 
     318        2920 :   return decap_next_index < r->n_next_nodes;
     319             : }
     320             : 
     321             : /* *INDENT-OFF* */
     322             : typedef CLIB_PACKED(union
     323             : {
     324             :   struct
     325             :   {
     326             :     fib_node_index_t mfib_entry_index;
     327             :     adj_index_t mcast_adj_index;
     328             :   };
     329             :   u64 as_u64;
     330             : }) mcast_shared_t;
     331             : /* *INDENT-ON* */
     332             : 
     333             : static inline mcast_shared_t
     334        4664 : mcast_shared_get (ip46_address_t * ip)
     335             : {
     336        4664 :   ASSERT (ip46_address_is_multicast (ip));
     337        4664 :   uword *p = hash_get_mem (vxlan_main.mcast_shared, ip);
     338        4664 :   ALWAYS_ASSERT (p);
     339        4664 :   mcast_shared_t ret = {.as_u64 = *p };
     340        4664 :   return ret;
     341             : }
     342             : 
     343             : static inline void
     344        2222 : mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai)
     345             : {
     346        2222 :   mcast_shared_t new_ep = {
     347             :     .mcast_adj_index = ai,
     348             :     .mfib_entry_index = mfei,
     349             :   };
     350             : 
     351        2222 :   hash_set_mem_alloc (&vxlan_main.mcast_shared, dst, new_ep.as_u64);
     352        2222 : }
     353             : 
     354             : static inline void
     355        2222 : mcast_shared_remove (ip46_address_t * dst)
     356             : {
     357        2222 :   mcast_shared_t ep = mcast_shared_get (dst);
     358             : 
     359        2222 :   adj_unlock (ep.mcast_adj_index);
     360        2222 :   mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN);
     361             : 
     362        2222 :   hash_unset_mem_free (&vxlan_main.mcast_shared, dst);
     363        2222 : }
     364             : 
     365        5840 : int vnet_vxlan_add_del_tunnel
     366             :   (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
     367             : {
     368        5840 :   vxlan_main_t *vxm = &vxlan_main;
     369        5840 :   vnet_main_t *vnm = vxm->vnet_main;
     370             :   vxlan_decap_info_t *p;
     371        5840 :   u32 sw_if_index = ~0;
     372             :   vxlan4_tunnel_key_t key4;
     373             :   vxlan6_tunnel_key_t key6;
     374        5840 :   u32 is_ip6 = a->is_ip6;
     375        5840 :   vlib_main_t *vm = vlib_get_main ();
     376             :   u8 hw_addr[6];
     377             : 
     378             :   /* Set udp-ports */
     379        5840 :   if (a->src_port == 0)
     380          32 :     a->src_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
     381             : 
     382        5840 :   if (a->dst_port == 0)
     383          32 :     a->dst_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
     384             : 
     385             :   int not_found;
     386        5840 :   if (!is_ip6)
     387             :     {
     388             :       /* ip4 mcast is indexed by mcast addr only */
     389        5350 :       key4.key[0] = ip46_address_is_multicast (&a->dst) ?
     390        5350 :                       a->dst.ip4.as_u32 :
     391         488 :                       a->dst.ip4.as_u32 | (((u64) a->src.ip4.as_u32) << 32);
     392        5350 :       key4.key[1] = ((u64) clib_host_to_net_u16 (a->src_port) << 48) |
     393       10700 :                     (((u64) a->encap_fib_index) << 32) |
     394        5350 :                     clib_host_to_net_u32 (a->vni << 8);
     395             :       not_found =
     396        5350 :         clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
     397        5350 :       p = (void *) &key4.value;
     398             :     }
     399             :   else
     400             :     {
     401         490 :       key6.key[0] = a->dst.ip6.as_u64[0];
     402         490 :       key6.key[1] = a->dst.ip6.as_u64[1];
     403         490 :       key6.key[2] = (((u64) clib_host_to_net_u16 (a->src_port) << 48) |
     404         980 :                      ((u64) a->encap_fib_index) << 32) |
     405         490 :                     clib_host_to_net_u32 (a->vni << 8);
     406             :       not_found =
     407         490 :         clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
     408         490 :       p = (void *) &key6.value;
     409             :     }
     410             : 
     411        5840 :   if (not_found)
     412        2920 :     p = 0;
     413             : 
     414        5840 :   if (a->is_add)
     415             :     {
     416        2920 :       l2input_main_t *l2im = &l2input_main;
     417             :       u32 dev_instance;         /* real dev instance tunnel index */
     418             :       u32 user_instance;        /* request and actual instance number */
     419             : 
     420             :       /* adding a tunnel: tunnel must not already exist */
     421        2920 :       if (p)
     422           0 :         return VNET_API_ERROR_TUNNEL_EXIST;
     423             : 
     424             :       /*if not set explicitly, default to l2 */
     425        2920 :       if (a->decap_next_index == ~0)
     426        2920 :         a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
     427        2920 :       if (!vxlan_decap_next_is_valid (vxm, is_ip6, a->decap_next_index))
     428           0 :         return VNET_API_ERROR_INVALID_DECAP_NEXT;
     429             : 
     430             :       vxlan_tunnel_t *t;
     431        2920 :       pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
     432        2920 :       clib_memset (t, 0, sizeof (*t));
     433        2920 :       dev_instance = t - vxm->tunnels;
     434             : 
     435             :       /* copy from arg structure */
     436             : #define _(x) t->x = a->x;
     437        2920 :       foreach_copy_field;
     438             : #undef _
     439             : 
     440        2920 :       vxlan_rewrite (t, is_ip6);
     441             :       /*
     442             :        * Reconcile the real dev_instance and a possible requested instance.
     443             :        */
     444        2920 :       user_instance = a->instance;
     445        2920 :       if (user_instance == ~0)
     446        2920 :         user_instance = dev_instance;
     447        2920 :       if (hash_get (vxm->instance_used, user_instance))
     448             :         {
     449           0 :           pool_put (vxm->tunnels, t);
     450           0 :           return VNET_API_ERROR_INSTANCE_IN_USE;
     451             :         }
     452             : 
     453        2920 :       hash_set (vxm->instance_used, user_instance, 1);
     454             : 
     455        2920 :       t->dev_instance = dev_instance;        /* actual */
     456        2920 :       t->user_instance = user_instance; /* name */
     457        2920 :       t->flow_index = ~0;
     458             : 
     459        2920 :       if (a->is_l3)
     460           0 :         t->hw_if_index =
     461           0 :           vnet_register_interface (vnm, vxlan_device_class.index, dev_instance,
     462             :                                    vxlan_hw_class.index, dev_instance);
     463             :       else
     464             :         {
     465        2920 :           vnet_eth_interface_registration_t eir = {};
     466        2920 :           f64 now = vlib_time_now (vm);
     467             :           u32 rnd;
     468        2920 :           rnd = (u32) (now * 1e6);
     469        2920 :           rnd = random_u32 (&rnd);
     470        2920 :           memcpy (hw_addr + 2, &rnd, sizeof (rnd));
     471        2920 :           hw_addr[0] = 2;
     472        2920 :           hw_addr[1] = 0xfe;
     473             : 
     474        2920 :           eir.dev_class_index = vxlan_device_class.index;
     475        2920 :           eir.dev_instance = dev_instance;
     476        2920 :           eir.address = hw_addr;
     477        2920 :           eir.cb.flag_change = vxlan_eth_flag_change;
     478        2920 :           eir.cb.set_max_frame_size = vxlan_eth_set_max_frame_size;
     479        2920 :           t->hw_if_index = vnet_eth_register_interface (vnm, &eir);
     480             :         }
     481             : 
     482        2920 :       vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
     483             : 
     484             :       /* Set vxlan tunnel output node */
     485        2920 :       u32 encap_index = !is_ip6 ?
     486        2920 :         vxlan4_encap_node.index : vxlan6_encap_node.index;
     487        2920 :       vnet_set_interface_output_node (vnm, t->hw_if_index, encap_index);
     488             : 
     489        2920 :       t->sw_if_index = sw_if_index = hi->sw_if_index;
     490             : 
     491             :       /* copy the key */
     492             :       int add_failed;
     493        2920 :       if (is_ip6)
     494             :         {
     495         245 :           key6.value = (u64) dev_instance;
     496         245 :           add_failed = clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key,
     497             :                                                  &key6, 1 /*add */ );
     498             :         }
     499             :       else
     500             :         {
     501        2675 :           vxlan_decap_info_t di = {.sw_if_index = t->sw_if_index, };
     502        2675 :           if (ip46_address_is_multicast (&t->dst))
     503        2431 :             di.local_ip = t->src.ip4;
     504             :           else
     505         244 :             di.next_index = t->decap_next_index;
     506        2675 :           key4.value = di.as_u64;
     507        2675 :           add_failed = clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key,
     508             :                                                  &key4, 1 /*add */ );
     509             :         }
     510             : 
     511        2920 :       if (add_failed)
     512             :         {
     513           0 :           if (a->is_l3)
     514           0 :             vnet_delete_hw_interface (vnm, t->hw_if_index);
     515             :           else
     516           0 :             ethernet_delete_interface (vnm, t->hw_if_index);
     517           0 :           hash_unset (vxm->instance_used, t->user_instance);
     518           0 :           pool_put (vxm->tunnels, t);
     519           0 :           return VNET_API_ERROR_INVALID_REGISTRATION;
     520             :         }
     521             : 
     522        3219 :       vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index,
     523             :                                ~0);
     524        2920 :       vxm->tunnel_index_by_sw_if_index[sw_if_index] = dev_instance;
     525             : 
     526             :       /* setup l2 input config with l2 feature and bd 0 to drop packet */
     527        2920 :       vec_validate (l2im->configs, sw_if_index);
     528        2920 :       l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
     529        2920 :       l2im->configs[sw_if_index].bd_index = 0;
     530             : 
     531        2920 :       vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
     532        2920 :       si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
     533        2920 :       vnet_sw_interface_set_flags (vnm, sw_if_index,
     534             :                                    VNET_SW_INTERFACE_FLAG_ADMIN_UP);
     535             : 
     536        2920 :       fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_TUNNEL);
     537             :       fib_prefix_t tun_dst_pfx;
     538        2920 :       vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
     539             : 
     540        2920 :       fib_protocol_t fp = fib_ip_proto (is_ip6);
     541        2920 :       fib_prefix_from_ip46_addr (fp, &t->dst, &tun_dst_pfx);
     542        2920 :       if (!ip46_address_is_multicast (&t->dst))
     543             :         {
     544             :           /* Unicast tunnel -
     545             :            * source the FIB entry for the tunnel's destination
     546             :            * and become a child thereof. The tunnel will then get poked
     547             :            * when the forwarding for the entry updates, and the tunnel can
     548             :            * re-stack accordingly
     549             :            */
     550         478 :           vtep_addr_ref (&vxm->vtep_table, t->encap_fib_index, &t->src);
     551         956 :           t->fib_entry_index = fib_entry_track (t->encap_fib_index,
     552             :                                                 &tun_dst_pfx,
     553             :                                                 FIB_NODE_TYPE_VXLAN_TUNNEL,
     554             :                                                 dev_instance,
     555         478 :                                                 &t->sibling_index);
     556         478 :           vxlan_tunnel_restack_dpo (t);
     557             :         }
     558             :       else
     559             :         {
     560             :           /* Multicast tunnel -
     561             :            * as the same mcast group can be used for multiple mcast tunnels
     562             :            * with different VNIs, create the output fib adjacency only if
     563             :            * it does not already exist
     564             :            */
     565        2442 :           if (vtep_addr_ref (&vxm->vtep_table,
     566        2442 :                              t->encap_fib_index, &t->dst) == 1)
     567             :             {
     568             :               fib_node_index_t mfei;
     569             :               adj_index_t ai;
     570        4444 :               fib_route_path_t path = {
     571        2222 :                 .frp_proto = fib_proto_to_dpo (fp),
     572             :                 .frp_addr = zero_addr,
     573             :                 .frp_sw_if_index = 0xffffffff,
     574             :                 .frp_fib_index = ~0,
     575             :                 .frp_weight = 1,
     576             :                 .frp_flags = FIB_ROUTE_PATH_LOCAL,
     577             :                 .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
     578             :               };
     579        2222 :               const mfib_prefix_t mpfx = {
     580             :                 .fp_proto = fp,
     581             :                 .fp_len = (is_ip6 ? 128 : 32),
     582             :                 .fp_grp_addr = tun_dst_pfx.fp_addr,
     583             :               };
     584             : 
     585             :               /*
     586             :                * Setup the (*,G) to receive traffic on the mcast group
     587             :                *  - the forwarding interface is for-us
     588             :                *  - the accepting interface is that from the API
     589             :                */
     590        2222 :               mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
     591             :                                             MFIB_SOURCE_VXLAN,
     592             :                                             MFIB_ENTRY_FLAG_NONE, &path);
     593             : 
     594        2222 :               path.frp_sw_if_index = a->mcast_sw_if_index;
     595        2222 :               path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
     596        2222 :               path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
     597        2222 :               mfei = mfib_table_entry_path_update (
     598        2222 :                 t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN,
     599             :                 MFIB_ENTRY_FLAG_NONE, &path);
     600             : 
     601             :               /*
     602             :                * Create the mcast adjacency to send traffic to the group
     603             :                */
     604        2222 :               ai = adj_mcast_add_or_lock (fp,
     605        2222 :                                           fib_proto_to_link (fp),
     606             :                                           a->mcast_sw_if_index);
     607             : 
     608             :               /*
     609             :                * create a new end-point
     610             :                */
     611        2222 :               mcast_shared_add (&t->dst, mfei, ai);
     612             :             }
     613             : 
     614        2442 :           dpo_id_t dpo = DPO_INVALID;
     615        2442 :           mcast_shared_t ep = mcast_shared_get (&t->dst);
     616             : 
     617             :           /* Stack shared mcast dst mac addr rewrite on encap */
     618        2442 :           dpo_set (&dpo, DPO_ADJACENCY_MCAST,
     619        2442 :                    fib_proto_to_dpo (fp), ep.mcast_adj_index);
     620             : 
     621        2442 :           dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
     622        2442 :           dpo_reset (&dpo);
     623        2442 :           flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
     624             :         }
     625             : 
     626        2920 :       vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class =
     627             :         flood_class;
     628             :     }
     629             :   else
     630             :     {
     631             :       /* deleting a tunnel: tunnel must exist */
     632        2920 :       if (!p)
     633           0 :         return VNET_API_ERROR_NO_SUCH_ENTRY;
     634             : 
     635        2920 :       u32 instance = is_ip6 ? key6.value :
     636        2675 :         vxm->tunnel_index_by_sw_if_index[p->sw_if_index];
     637        2920 :       vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, instance);
     638             : 
     639        2920 :       sw_if_index = t->sw_if_index;
     640        2920 :       vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
     641             : 
     642        2920 :       vxm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
     643             : 
     644        2920 :       if (!is_ip6)
     645        2675 :         clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key, &key4,
     646             :                                   0 /*del */ );
     647             :       else
     648         245 :         clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key, &key6,
     649             :                                   0 /*del */ );
     650             : 
     651        2920 :       if (!ip46_address_is_multicast (&t->dst))
     652             :         {
     653         478 :           if (t->flow_index != ~0)
     654           0 :             vnet_flow_del (vnm, t->flow_index);
     655             : 
     656         478 :           vtep_addr_unref (&vxm->vtep_table, t->encap_fib_index, &t->src);
     657         478 :           fib_entry_untrack (t->fib_entry_index, t->sibling_index);
     658             :         }
     659        2442 :       else if (vtep_addr_unref (&vxm->vtep_table,
     660             :                                 t->encap_fib_index, &t->dst) == 0)
     661             :         {
     662        2222 :           mcast_shared_remove (&t->dst);
     663             :         }
     664             : 
     665        2920 :       vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, t->hw_if_index);
     666        2920 :       if (hw->dev_class_index == vxlan_device_class.index)
     667        2920 :         vnet_delete_hw_interface (vnm, t->hw_if_index);
     668             :       else
     669           0 :         ethernet_delete_interface (vnm, t->hw_if_index);
     670        2920 :       hash_unset (vxm->instance_used, t->user_instance);
     671             : 
     672        2920 :       fib_node_deinit (&t->node);
     673        2920 :       pool_put (vxm->tunnels, t);
     674             :     }
     675             : 
     676        5840 :   if (sw_if_indexp)
     677        5840 :     *sw_if_indexp = sw_if_index;
     678             : 
     679        5840 :   if (a->is_add)
     680             :     {
     681             :       /* register udp ports */
     682        2920 :       if (!is_ip6 && !udp_is_valid_dst_port (a->src_port, 1))
     683           6 :         udp_register_dst_port (vxm->vlib_main, a->src_port,
     684             :                                vxlan4_input_node.index, 1);
     685        2920 :       if (is_ip6 && !udp_is_valid_dst_port (a->src_port, 0))
     686           5 :         udp_register_dst_port (vxm->vlib_main, a->src_port,
     687             :                                vxlan6_input_node.index, 0);
     688             :     }
     689             : 
     690        5840 :   return 0;
     691             : }
     692             : 
     693             : static uword
     694           0 : get_decap_next_for_node (u32 node_index, u32 ipv4_set)
     695             : {
     696           0 :   vxlan_main_t *vxm = &vxlan_main;
     697           0 :   vlib_main_t *vm = vxm->vlib_main;
     698           0 :   uword input_node = (ipv4_set) ? vxlan4_input_node.index :
     699           0 :     vxlan6_input_node.index;
     700             : 
     701           0 :   return vlib_node_add_next (vm, input_node, node_index);
     702             : }
     703             : 
     704             : static uword
     705           0 : unformat_decap_next (unformat_input_t * input, va_list * args)
     706             : {
     707           0 :   u32 *result = va_arg (*args, u32 *);
     708           0 :   u32 ipv4_set = va_arg (*args, int);
     709           0 :   vxlan_main_t *vxm = &vxlan_main;
     710           0 :   vlib_main_t *vm = vxm->vlib_main;
     711             :   u32 node_index;
     712             :   u32 tmp;
     713             : 
     714           0 :   if (unformat (input, "l2"))
     715           0 :     *result = VXLAN_INPUT_NEXT_L2_INPUT;
     716           0 :   else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index))
     717           0 :     *result = get_decap_next_for_node (node_index, ipv4_set);
     718           0 :   else if (unformat (input, "%d", &tmp))
     719           0 :     *result = tmp;
     720             :   else
     721           0 :     return 0;
     722           0 :   return 1;
     723             : }
     724             : 
     725             : static clib_error_t *
     726           0 : vxlan_add_del_tunnel_command_fn (vlib_main_t * vm,
     727             :                                  unformat_input_t * input,
     728             :                                  vlib_cli_command_t * cmd)
     729             : {
     730           0 :   unformat_input_t _line_input, *line_input = &_line_input;
     731           0 :   ip46_address_t src = ip46_address_initializer, dst =
     732             :     ip46_address_initializer;
     733           0 :   u8 is_add = 1;
     734           0 :   u8 src_set = 0;
     735           0 :   u8 dst_set = 0;
     736           0 :   u8 grp_set = 0;
     737           0 :   u8 ipv4_set = 0;
     738           0 :   u8 ipv6_set = 0;
     739           0 :   u8 is_l3 = 0;
     740           0 :   u32 instance = ~0;
     741           0 :   u32 encap_fib_index = 0;
     742           0 :   u32 mcast_sw_if_index = ~0;
     743           0 :   u32 decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
     744           0 :   u32 vni = 0;
     745           0 :   u32 src_port = 0;
     746           0 :   u32 dst_port = 0;
     747             :   u32 table_id;
     748           0 :   clib_error_t *parse_error = NULL;
     749             : 
     750             :   /* Get a line of input. */
     751           0 :   if (!unformat_user (input, unformat_line_input, line_input))
     752           0 :     return 0;
     753             : 
     754           0 :   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
     755             :     {
     756           0 :       if (unformat (line_input, "del"))
     757             :         {
     758           0 :           is_add = 0;
     759             :         }
     760           0 :       else if (unformat (line_input, "instance %d", &instance))
     761             :         ;
     762           0 :       else if (unformat (line_input, "src %U",
     763             :                          unformat_ip46_address, &src, IP46_TYPE_ANY))
     764             :         {
     765           0 :           src_set = 1;
     766           0 :           ip46_address_is_ip4 (&src) ? (ipv4_set = 1) : (ipv6_set = 1);
     767             :         }
     768           0 :       else if (unformat (line_input, "dst %U",
     769             :                          unformat_ip46_address, &dst, IP46_TYPE_ANY))
     770             :         {
     771           0 :           dst_set = 1;
     772           0 :           ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
     773             :         }
     774           0 :       else if (unformat (line_input, "group %U %U",
     775             :                          unformat_ip46_address, &dst, IP46_TYPE_ANY,
     776             :                          unformat_vnet_sw_interface,
     777             :                          vnet_get_main (), &mcast_sw_if_index))
     778             :         {
     779           0 :           grp_set = dst_set = 1;
     780           0 :           ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
     781             :         }
     782           0 :       else if (unformat (line_input, "encap-vrf-id %d", &table_id))
     783             :         {
     784             :           encap_fib_index =
     785           0 :             fib_table_find (fib_ip_proto (ipv6_set), table_id);
     786             :         }
     787           0 :       else if (unformat (line_input, "l3"))
     788           0 :         is_l3 = 1;
     789           0 :       else if (unformat (line_input, "decap-next %U", unformat_decap_next,
     790             :                          &decap_next_index, ipv4_set))
     791             :         ;
     792           0 :       else if (unformat (line_input, "vni %d", &vni))
     793             :         ;
     794           0 :       else if (unformat (line_input, "src_port %d", &src_port))
     795             :         ;
     796           0 :       else if (unformat (line_input, "dst_port %d", &dst_port))
     797             :         ;
     798             :       else
     799             :         {
     800           0 :           parse_error = clib_error_return (0, "parse error: '%U'",
     801             :                                            format_unformat_error, line_input);
     802           0 :           break;
     803             :         }
     804             :     }
     805             : 
     806           0 :   unformat_free (line_input);
     807             : 
     808           0 :   if (parse_error)
     809           0 :     return parse_error;
     810             : 
     811           0 :   if (is_l3 && decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT)
     812             :     {
     813           0 :       vlib_node_t *node = vlib_get_node_by_name (
     814             :         vm, (u8 *) (ipv4_set ? "ip4-input" : "ip6-input"));
     815           0 :       decap_next_index = get_decap_next_for_node (node->index, ipv4_set);
     816             :     }
     817             : 
     818           0 :   if (encap_fib_index == ~0)
     819           0 :     return clib_error_return (0, "nonexistent encap-vrf-id %d", table_id);
     820             : 
     821           0 :   if (src_set == 0)
     822           0 :     return clib_error_return (0, "tunnel src address not specified");
     823             : 
     824           0 :   if (dst_set == 0)
     825           0 :     return clib_error_return (0, "tunnel dst address not specified");
     826             : 
     827           0 :   if (grp_set && !ip46_address_is_multicast (&dst))
     828           0 :     return clib_error_return (0, "tunnel group address not multicast");
     829             : 
     830           0 :   if (grp_set == 0 && ip46_address_is_multicast (&dst))
     831           0 :     return clib_error_return (0, "dst address must be unicast");
     832             : 
     833           0 :   if (grp_set && mcast_sw_if_index == ~0)
     834           0 :     return clib_error_return (0, "tunnel nonexistent multicast device");
     835             : 
     836           0 :   if (ipv4_set && ipv6_set)
     837           0 :     return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
     838             : 
     839           0 :   if (ip46_address_cmp (&src, &dst) == 0)
     840           0 :     return clib_error_return (0, "src and dst addresses are identical");
     841             : 
     842           0 :   if (decap_next_index == ~0)
     843           0 :     return clib_error_return (0, "next node not found");
     844             : 
     845           0 :   if (vni == 0)
     846           0 :     return clib_error_return (0, "vni not specified");
     847             : 
     848           0 :   if (vni >> 24)
     849           0 :     return clib_error_return (0, "vni %d out of range", vni);
     850             : 
     851           0 :   vnet_vxlan_add_del_tunnel_args_t a = { .is_add = is_add,
     852             :                                          .is_ip6 = ipv6_set,
     853             :                                          .is_l3 = is_l3,
     854             :                                          .instance = instance,
     855             : #define _(x) .x = x,
     856             :                                          foreach_copy_field
     857             : #undef _
     858             :   };
     859             : 
     860             :   u32 tunnel_sw_if_index;
     861           0 :   int rv = vnet_vxlan_add_del_tunnel (&a, &tunnel_sw_if_index);
     862             : 
     863           0 :   switch (rv)
     864             :     {
     865           0 :     case 0:
     866           0 :       if (is_add)
     867           0 :         vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
     868             :                          vnet_get_main (), tunnel_sw_if_index);
     869           0 :       break;
     870             : 
     871           0 :     case VNET_API_ERROR_TUNNEL_EXIST:
     872           0 :       return clib_error_return (0, "tunnel already exists...");
     873             : 
     874           0 :     case VNET_API_ERROR_NO_SUCH_ENTRY:
     875           0 :       return clib_error_return (0, "tunnel does not exist...");
     876             : 
     877           0 :     case VNET_API_ERROR_INSTANCE_IN_USE:
     878           0 :       return clib_error_return (0, "Instance is in use");
     879             : 
     880           0 :     default:
     881           0 :       return clib_error_return
     882             :         (0, "vnet_vxlan_add_del_tunnel returned %d", rv);
     883             :     }
     884             : 
     885           0 :   return 0;
     886             : }
     887             : 
     888             : /*?
     889             :  * Add or delete a VXLAN Tunnel.
     890             :  *
     891             :  * VXLAN provides the features needed to allow L2 bridge domains (BDs)
     892             :  * to span multiple servers. This is done by building an L2 overlay on
     893             :  * top of an L3 network underlay using VXLAN tunnels.
     894             :  *
     895             :  * This makes it possible for servers to be co-located in the same data
     896             :  * center or be separated geographically as long as they are reachable
     897             :  * through the underlay L3 network.
     898             :  *
     899             :  * You can refer to this kind of L2 overlay bridge domain as a VXLAN
     900             :  * (Virtual eXtensible VLAN) segment.
     901             :  *
     902             :  * @cliexpar
     903             :  * Example of how to create a VXLAN Tunnel:
     904             :  * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id
     905             :  7}
     906             :  * Example of how to create a VXLAN Tunnel with a known name, vxlan_tunnel42:
     907             :  * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 instance 42}
     908             :  * Example of how to create a multicast VXLAN Tunnel with a known name,
     909             :  vxlan_tunnel23:
     910             :  * @cliexcmd{create vxlan tunnel src 10.0.3.1 group 239.1.1.1
     911             :  GigabitEthernet0/8/0 instance 23}
     912             :  * Example of how to create a VXLAN Tunnel with custom udp-ports:
     913             :  * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 src_port
     914             :  59000 dst_port 59001}
     915             :  * Example of how to delete a VXLAN Tunnel:
     916             :  * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
     917             :  ?*/
     918             : /* *INDENT-OFF* */
     919        5039 : VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = {
     920             :   .path = "create vxlan tunnel",
     921             :   .short_help =
     922             :     "create vxlan tunnel src <local-vtep-addr>"
     923             :     " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>"
     924             :     " [instance <id>]"
     925             :     " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del] [l3]"
     926             :     " [src_port <local-vtep-udp-port>] [dst_port <remote-vtep-udp-port>]",
     927             :   .function = vxlan_add_del_tunnel_command_fn,
     928             : };
     929             : /* *INDENT-ON* */
     930             : 
     931             : static clib_error_t *
     932          22 : show_vxlan_tunnel_command_fn (vlib_main_t * vm,
     933             :                               unformat_input_t * input,
     934             :                               vlib_cli_command_t * cmd)
     935             : {
     936          22 :   vxlan_main_t *vxm = &vxlan_main;
     937             :   vxlan_tunnel_t *t;
     938          22 :   int raw = 0;
     939             : 
     940          22 :   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     941             :     {
     942           0 :       if (unformat (input, "raw"))
     943           0 :         raw = 1;
     944             :       else
     945           0 :         return clib_error_return (0, "parse error: '%U'",
     946             :                                   format_unformat_error, input);
     947             :     }
     948             : 
     949          22 :   if (pool_elts (vxm->tunnels) == 0)
     950           0 :     vlib_cli_output (vm, "No vxlan tunnels configured...");
     951             : 
     952             : /* *INDENT-OFF* */
     953         506 :   pool_foreach (t, vxm->tunnels)
     954             :    {
     955         484 :     vlib_cli_output (vm, "%U", format_vxlan_tunnel, t);
     956             :   }
     957             : /* *INDENT-ON* */
     958             : 
     959          22 :   if (raw)
     960             :     {
     961           0 :       vlib_cli_output (vm, "Raw IPv4 Hash Table:\n%U\n",
     962             :                        format_bihash_16_8, &vxm->vxlan4_tunnel_by_key,
     963             :                        1 /* verbose */ );
     964           0 :       vlib_cli_output (vm, "Raw IPv6 Hash Table:\n%U\n",
     965             :                        format_bihash_24_8, &vxm->vxlan6_tunnel_by_key,
     966             :                        1 /* verbose */ );
     967             :     }
     968             : 
     969          22 :   return 0;
     970             : }
     971             : 
     972             : /*?
     973             :  * Display all the VXLAN Tunnel entries.
     974             :  *
     975             :  * @cliexpar
     976             :  * Example of how to display the VXLAN Tunnel entries:
     977             :  * @cliexstart{show vxlan tunnel}
     978             :  * [0] src 10.0.3.1 dst 10.0.3.3 src_port 4789 dst_port 4789 vni 13
     979             :  encap_fib_index 0 sw_if_index 5 decap_next l2
     980             :  * @cliexend
     981             :  ?*/
     982             : /* *INDENT-OFF* */
     983        5039 : VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = {
     984             :     .path = "show vxlan tunnel",
     985             :     .short_help = "show vxlan tunnel [raw]",
     986             :     .function = show_vxlan_tunnel_command_fn,
     987             : };
     988             : /* *INDENT-ON* */
     989             : 
     990             : 
     991             : void
     992           0 : vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
     993             : {
     994           0 :   vxlan_main_t *vxm = &vxlan_main;
     995             : 
     996           0 :   if (pool_is_free_index (vxm->vnet_main->interface_main.sw_interfaces,
     997             :                           sw_if_index))
     998           0 :     return;
     999             : 
    1000           0 :   is_enable = ! !is_enable;
    1001             : 
    1002           0 :   if (is_ip6)
    1003             :     {
    1004           0 :       if (clib_bitmap_get (vxm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index)
    1005           0 :           != is_enable)
    1006             :         {
    1007           0 :           vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-bypass",
    1008             :                                        sw_if_index, is_enable, 0, 0);
    1009           0 :           vxm->bm_ip6_bypass_enabled_by_sw_if =
    1010           0 :             clib_bitmap_set (vxm->bm_ip6_bypass_enabled_by_sw_if,
    1011             :                              sw_if_index, is_enable);
    1012             :         }
    1013             :     }
    1014             :   else
    1015             :     {
    1016           0 :       if (clib_bitmap_get (vxm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index)
    1017           0 :           != is_enable)
    1018             :         {
    1019           0 :           vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-bypass",
    1020             :                                        sw_if_index, is_enable, 0, 0);
    1021           0 :           vxm->bm_ip4_bypass_enabled_by_sw_if =
    1022           0 :             clib_bitmap_set (vxm->bm_ip4_bypass_enabled_by_sw_if,
    1023             :                              sw_if_index, is_enable);
    1024             :         }
    1025             :     }
    1026             : }
    1027             : 
    1028             : 
    1029             : static clib_error_t *
    1030           0 : set_ip_vxlan_bypass (u32 is_ip6,
    1031             :                      unformat_input_t * input, vlib_cli_command_t * cmd)
    1032             : {
    1033           0 :   unformat_input_t _line_input, *line_input = &_line_input;
    1034           0 :   vnet_main_t *vnm = vnet_get_main ();
    1035           0 :   clib_error_t *error = 0;
    1036             :   u32 sw_if_index, is_enable;
    1037             : 
    1038           0 :   sw_if_index = ~0;
    1039           0 :   is_enable = 1;
    1040             : 
    1041           0 :   if (!unformat_user (input, unformat_line_input, line_input))
    1042           0 :     return 0;
    1043             : 
    1044           0 :   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
    1045             :     {
    1046           0 :       if (unformat_user
    1047             :           (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
    1048             :         ;
    1049           0 :       else if (unformat (line_input, "del"))
    1050           0 :         is_enable = 0;
    1051             :       else
    1052             :         {
    1053           0 :           error = unformat_parse_error (line_input);
    1054           0 :           goto done;
    1055             :         }
    1056             :     }
    1057             : 
    1058           0 :   if (~0 == sw_if_index)
    1059             :     {
    1060           0 :       error = clib_error_return (0, "unknown interface `%U'",
    1061             :                                  format_unformat_error, line_input);
    1062           0 :       goto done;
    1063             :     }
    1064             : 
    1065           0 :   vnet_int_vxlan_bypass_mode (sw_if_index, is_ip6, is_enable);
    1066             : 
    1067           0 : done:
    1068           0 :   unformat_free (line_input);
    1069             : 
    1070           0 :   return error;
    1071             : }
    1072             : 
    1073             : static clib_error_t *
    1074           0 : set_ip4_vxlan_bypass (vlib_main_t * vm,
    1075             :                       unformat_input_t * input, vlib_cli_command_t * cmd)
    1076             : {
    1077           0 :   return set_ip_vxlan_bypass (0, input, cmd);
    1078             : }
    1079             : 
    1080             : /*?
    1081             :  * This command adds the 'ip4-vxlan-bypass' graph node for a given interface.
    1082             :  * By adding the IPv4 vxlan-bypass graph node to an interface, the node checks
    1083             :  * for and validate input vxlan packet and bypass ip4-lookup, ip4-local,
    1084             :  * ip4-udp-lookup nodes to speedup vxlan packet forwarding. This node will
    1085             :  * cause extra overhead to for non-vxlan packets which is kept at a minimum.
    1086             :  *
    1087             :  * @cliexpar
    1088             :  * @parblock
    1089             :  * Example of graph node before ip4-vxlan-bypass is enabled:
    1090             :  * @cliexstart{show vlib graph ip4-vxlan-bypass}
    1091             :  *            Name                      Next                    Previous
    1092             :  * ip4-vxlan-bypass                error-drop [0]
    1093             :  *                                vxlan4-input [1]
    1094             :  *                                 ip4-lookup [2]
    1095             :  * @cliexend
    1096             :  *
    1097             :  * Example of how to enable ip4-vxlan-bypass on an interface:
    1098             :  * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0}
    1099             :  *
    1100             :  * Example of graph node after ip4-vxlan-bypass is enabled:
    1101             :  * @cliexstart{show vlib graph ip4-vxlan-bypass}
    1102             :  *            Name                      Next                    Previous
    1103             :  * ip4-vxlan-bypass                error-drop [0]               ip4-input
    1104             :  *                                vxlan4-input [1]        ip4-input-no-checksum
    1105             :  *                                 ip4-lookup [2]
    1106             :  * @cliexend
    1107             :  *
    1108             :  * Example of how to display the feature enabled on an interface:
    1109             :  * @cliexstart{show ip interface features GigabitEthernet2/0/0}
    1110             :  * IP feature paths configured on GigabitEthernet2/0/0...
    1111             :  * ...
    1112             :  * ipv4 unicast:
    1113             :  *   ip4-vxlan-bypass
    1114             :  *   ip4-lookup
    1115             :  * ...
    1116             :  * @cliexend
    1117             :  *
    1118             :  * Example of how to disable ip4-vxlan-bypass on an interface:
    1119             :  * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0 del}
    1120             :  * @endparblock
    1121             : ?*/
    1122             : /* *INDENT-OFF* */
    1123        5039 : VLIB_CLI_COMMAND (set_interface_ip_vxlan_bypass_command, static) = {
    1124             :   .path = "set interface ip vxlan-bypass",
    1125             :   .function = set_ip4_vxlan_bypass,
    1126             :   .short_help = "set interface ip vxlan-bypass <interface> [del]",
    1127             : };
    1128             : /* *INDENT-ON* */
    1129             : 
    1130             : static clib_error_t *
    1131           0 : set_ip6_vxlan_bypass (vlib_main_t * vm,
    1132             :                       unformat_input_t * input, vlib_cli_command_t * cmd)
    1133             : {
    1134           0 :   return set_ip_vxlan_bypass (1, input, cmd);
    1135             : }
    1136             : 
    1137             : /*?
    1138             :  * This command adds the 'ip6-vxlan-bypass' graph node for a given interface.
    1139             :  * By adding the IPv6 vxlan-bypass graph node to an interface, the node checks
    1140             :  * for and validate input vxlan packet and bypass ip6-lookup, ip6-local,
    1141             :  * ip6-udp-lookup nodes to speedup vxlan packet forwarding. This node will
    1142             :  * cause extra overhead to for non-vxlan packets which is kept at a minimum.
    1143             :  *
    1144             :  * @cliexpar
    1145             :  * @parblock
    1146             :  * Example of graph node before ip6-vxlan-bypass is enabled:
    1147             :  * @cliexstart{show vlib graph ip6-vxlan-bypass}
    1148             :  *            Name                      Next                    Previous
    1149             :  * ip6-vxlan-bypass                error-drop [0]
    1150             :  *                                vxlan6-input [1]
    1151             :  *                                 ip6-lookup [2]
    1152             :  * @cliexend
    1153             :  *
    1154             :  * Example of how to enable ip6-vxlan-bypass on an interface:
    1155             :  * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0}
    1156             :  *
    1157             :  * Example of graph node after ip6-vxlan-bypass is enabled:
    1158             :  * @cliexstart{show vlib graph ip6-vxlan-bypass}
    1159             :  *            Name                      Next                    Previous
    1160             :  * ip6-vxlan-bypass                error-drop [0]               ip6-input
    1161             :  *                                vxlan6-input [1]        ip4-input-no-checksum
    1162             :  *                                 ip6-lookup [2]
    1163             :  * @cliexend
    1164             :  *
    1165             :  * Example of how to display the feature enabled on an interface:
    1166             :  * @cliexstart{show ip interface features GigabitEthernet2/0/0}
    1167             :  * IP feature paths configured on GigabitEthernet2/0/0...
    1168             :  * ...
    1169             :  * ipv6 unicast:
    1170             :  *   ip6-vxlan-bypass
    1171             :  *   ip6-lookup
    1172             :  * ...
    1173             :  * @cliexend
    1174             :  *
    1175             :  * Example of how to disable ip6-vxlan-bypass on an interface:
    1176             :  * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0 del}
    1177             :  * @endparblock
    1178             : ?*/
    1179             : /* *INDENT-OFF* */
    1180        5039 : VLIB_CLI_COMMAND (set_interface_ip6_vxlan_bypass_command, static) = {
    1181             :   .path = "set interface ip6 vxlan-bypass",
    1182             :   .function = set_ip6_vxlan_bypass,
    1183             :   .short_help = "set interface ip6 vxlan-bypass <interface> [del]",
    1184             : };
    1185             : /* *INDENT-ON* */
    1186             : 
    1187             : int
    1188           0 : vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_index, int is_add)
    1189             : {
    1190           0 :   vxlan_main_t *vxm = &vxlan_main;
    1191           0 :   vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
    1192           0 :   vnet_main_t *vnm = vnet_get_main ();
    1193           0 :   if (is_add)
    1194             :     {
    1195           0 :       if (t->flow_index == ~0)
    1196             :         {
    1197           0 :           vxlan_main_t *vxm = &vxlan_main;
    1198           0 :           vnet_flow_t flow = {
    1199             :             .actions =
    1200             :               VNET_FLOW_ACTION_REDIRECT_TO_NODE | VNET_FLOW_ACTION_MARK |
    1201             :               VNET_FLOW_ACTION_BUFFER_ADVANCE,
    1202           0 :             .mark_flow_id = t->dev_instance + vxm->flow_id_start,
    1203           0 :             .redirect_node_index = vxlan4_flow_input_node.index,
    1204             :             .buffer_advance = sizeof (ethernet_header_t),
    1205             :             .type = VNET_FLOW_TYPE_IP4_VXLAN,
    1206             :             .ip4_vxlan = {
    1207             :                           .protocol.prot = IP_PROTOCOL_UDP,
    1208             :                           .src_addr.addr = t->dst.ip4,
    1209             :                           .dst_addr.addr = t->src.ip4,
    1210             :                           .src_addr.mask.as_u32 = ~0,
    1211             :                           .dst_addr.mask.as_u32 = ~0,
    1212           0 :                           .dst_port.port = t->src_port,
    1213             :                           .dst_port.mask = 0xFF,
    1214           0 :                           .vni = t->vni,
    1215             :                           }
    1216             :             ,
    1217             :           };
    1218           0 :           vnet_flow_add (vnm, &flow, &t->flow_index);
    1219             :         }
    1220           0 :       return vnet_flow_enable (vnm, t->flow_index, hw_if_index);
    1221             :     }
    1222             :   /* flow index is removed when the tunnel is deleted */
    1223           0 :   return vnet_flow_disable (vnm, t->flow_index, hw_if_index);
    1224             : }
    1225             : 
    1226             : u32
    1227           0 : vnet_vxlan_get_tunnel_index (u32 sw_if_index)
    1228             : {
    1229           0 :   vxlan_main_t *vxm = &vxlan_main;
    1230             : 
    1231           0 :   if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index))
    1232           0 :     return ~0;
    1233           0 :   return vxm->tunnel_index_by_sw_if_index[sw_if_index];
    1234             : }
    1235             : 
    1236             : static clib_error_t *
    1237           0 : vxlan_offload_command_fn (vlib_main_t * vm,
    1238             :                           unformat_input_t * input, vlib_cli_command_t * cmd)
    1239             : {
    1240           0 :   unformat_input_t _line_input, *line_input = &_line_input;
    1241             : 
    1242             :   /* Get a line of input. */
    1243           0 :   if (!unformat_user (input, unformat_line_input, line_input))
    1244           0 :     return 0;
    1245             : 
    1246           0 :   vnet_main_t *vnm = vnet_get_main ();
    1247           0 :   u32 rx_sw_if_index = ~0;
    1248           0 :   u32 hw_if_index = ~0;
    1249           0 :   int is_add = 1;
    1250             : 
    1251           0 :   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
    1252             :     {
    1253           0 :       if (unformat (line_input, "hw %U", unformat_vnet_hw_interface, vnm,
    1254             :                     &hw_if_index))
    1255           0 :         continue;
    1256           0 :       if (unformat (line_input, "rx %U", unformat_vnet_sw_interface, vnm,
    1257             :                     &rx_sw_if_index))
    1258           0 :         continue;
    1259           0 :       if (unformat (line_input, "del"))
    1260             :         {
    1261           0 :           is_add = 0;
    1262           0 :           continue;
    1263             :         }
    1264           0 :       return clib_error_return (0, "unknown input `%U'",
    1265             :                                 format_unformat_error, line_input);
    1266             :     }
    1267             : 
    1268           0 :   if (rx_sw_if_index == ~0)
    1269           0 :     return clib_error_return (0, "missing rx interface");
    1270           0 :   if (hw_if_index == ~0)
    1271           0 :     return clib_error_return (0, "missing hw interface");
    1272             : 
    1273           0 :   u32 t_index = vnet_vxlan_get_tunnel_index (rx_sw_if_index);;
    1274           0 :   if (t_index == ~0)
    1275           0 :     return clib_error_return (0, "%U is not a vxlan tunnel",
    1276             :                               format_vnet_sw_if_index_name, vnm,
    1277             :                               rx_sw_if_index);
    1278             : 
    1279           0 :   vxlan_main_t *vxm = &vxlan_main;
    1280           0 :   vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
    1281             : 
    1282           0 :   if (!ip46_address_is_ip4 (&t->dst))
    1283           0 :     return clib_error_return (0, "currently only IPV4 tunnels are supported");
    1284             : 
    1285           0 :   vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
    1286           0 :   ip4_main_t *im = &ip4_main;
    1287           0 :   u32 rx_fib_index =
    1288           0 :     vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index);
    1289             : 
    1290           0 :   if (t->encap_fib_index != rx_fib_index)
    1291           0 :     return clib_error_return (0, "interface/tunnel fib mismatch");
    1292             : 
    1293           0 :   if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, is_add))
    1294           0 :     return clib_error_return (0, "error %s flow",
    1295             :                               is_add ? "enabling" : "disabling");
    1296             : 
    1297           0 :   return 0;
    1298             : }
    1299             : 
    1300             : /* *INDENT-OFF* */
    1301        5039 : VLIB_CLI_COMMAND (vxlan_offload_command, static) = {
    1302             :     .path = "set flow-offload vxlan",
    1303             :     .short_help =
    1304             :     "set flow-offload vxlan hw <interface-name> rx <tunnel-name> [del]",
    1305             :     .function = vxlan_offload_command_fn,
    1306             : };
    1307             : /* *INDENT-ON* */
    1308             : 
    1309             : #define VXLAN_HASH_NUM_BUCKETS (2 * 1024)
    1310             : #define VXLAN_HASH_MEMORY_SIZE (1 << 20)
    1311             : 
    1312             : clib_error_t *
    1313         559 : vxlan_init (vlib_main_t * vm)
    1314             : {
    1315         559 :   vxlan_main_t *vxm = &vxlan_main;
    1316             : 
    1317         559 :   vxm->vnet_main = vnet_get_main ();
    1318         559 :   vxm->vlib_main = vm;
    1319             : 
    1320         559 :   vnet_flow_get_range (vxm->vnet_main, "vxlan", 1024 * 1024,
    1321             :                        &vxm->flow_id_start);
    1322             : 
    1323         559 :   vxm->bm_ip4_bypass_enabled_by_sw_if = 0;
    1324         559 :   vxm->bm_ip6_bypass_enabled_by_sw_if = 0;
    1325             : 
    1326             :   /* initialize the ip6 hash */
    1327         559 :   clib_bihash_init_16_8 (&vxm->vxlan4_tunnel_by_key, "vxlan4",
    1328             :                          VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
    1329         559 :   clib_bihash_init_24_8 (&vxm->vxlan6_tunnel_by_key, "vxlan6",
    1330             :                          VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
    1331         559 :   vxm->vtep_table = vtep_table_create ();
    1332         559 :   vxm->mcast_shared = hash_create_mem (0,
    1333             :                                        sizeof (ip46_address_t),
    1334             :                                        sizeof (mcast_shared_t));
    1335             : 
    1336         559 :   fib_node_register_type (FIB_NODE_TYPE_VXLAN_TUNNEL, &vxlan_vft);
    1337             : 
    1338         559 :   return 0;
    1339             : }
    1340             : 
    1341        1119 : VLIB_INIT_FUNCTION (vxlan_init);
    1342             : 
    1343             : /*
    1344             :  * fd.io coding-style-patch-verification: ON
    1345             :  *
    1346             :  * Local Variables:
    1347             :  * eval: (c-set-style "gnu")
    1348             :  * End:
    1349             :  */

Generated by: LCOV version 1.14