LCOV - code coverage report
Current view: top level - plugins/lb - lb.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 588 729 80.7 %
Date: 2023-10-26 01:39:38 Functions: 37 43 86.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016 Cisco and/or its affiliates.
       3             :  * Licensed under the Apache License, Version 2.0 (the "License");
       4             :  * you may not use this file except in compliance with the License.
       5             :  * You may obtain a copy of the License at:
       6             :  *
       7             :  *     http://www.apache.org/licenses/LICENSE-2.0
       8             :  *
       9             :  * Unless required by applicable law or agreed to in writing, software
      10             :  * distributed under the License is distributed on an "AS IS" BASIS,
      11             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             :  * See the License for the specific language governing permissions and
      13             :  * limitations under the License.
      14             :  */
      15             : 
      16             : #include <lb/lb.h>
      17             : #include <vnet/plugin/plugin.h>
      18             : #include <vpp/app/version.h>
      19             : #include <vnet/api_errno.h>
      20             : #include <vnet/udp/udp_local.h>
      21             : #include <vppinfra/lock.h>
      22             : 
      23             : //GC runs at most once every so many seconds
      24             : #define LB_GARBAGE_RUN 60
      25             : 
      26             : //After so many seconds. It is assumed that inter-core race condition will not occur.
      27             : #define LB_CONCURRENCY_TIMEOUT 10
      28             : 
      29             : // FIB source for adding routes
      30             : static fib_source_t lb_fib_src;
      31             : 
      32             : lb_main_t lb_main;
      33             : 
      34             : #define lb_get_writer_lock() clib_spinlock_lock (&lb_main.writer_lock)
      35             : #define lb_put_writer_lock() clib_spinlock_unlock (&lb_main.writer_lock)
      36             : 
      37             : static void lb_as_stack (lb_as_t *as);
      38             : 
      39             : 
      40             : const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL };
      41             : const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL };
      42             : const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] =
      43             :     {
      44             :         [DPO_PROTO_IP4]  = lb_dpo_gre4_ip4,
      45             :         [DPO_PROTO_IP6]  = lb_dpo_gre4_ip6,
      46             :     };
      47             : 
      48             : const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL };
      49             : const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL };
      50             : const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
      51             :     {
      52             :         [DPO_PROTO_IP4]  = lb_dpo_gre6_ip4,
      53             :         [DPO_PROTO_IP6]  = lb_dpo_gre6_ip6,
      54             :     };
      55             : 
      56             : const static char * const lb_dpo_gre4_ip4_port[] = { "lb4-gre4-port" , NULL };
      57             : const static char * const lb_dpo_gre4_ip6_port[] = { "lb6-gre4-port" , NULL };
      58             : const static char* const * const lb_dpo_gre4_port_nodes[DPO_PROTO_NUM] =
      59             :     {
      60             :         [DPO_PROTO_IP4]  = lb_dpo_gre4_ip4_port,
      61             :         [DPO_PROTO_IP6]  = lb_dpo_gre4_ip6_port,
      62             :     };
      63             : 
      64             : const static char * const lb_dpo_gre6_ip4_port[] = { "lb4-gre6-port" , NULL };
      65             : const static char * const lb_dpo_gre6_ip6_port[] = { "lb6-gre6-port" , NULL };
      66             : const static char* const * const lb_dpo_gre6_port_nodes[DPO_PROTO_NUM] =
      67             :     {
      68             :         [DPO_PROTO_IP4]  = lb_dpo_gre6_ip4_port,
      69             :         [DPO_PROTO_IP6]  = lb_dpo_gre6_ip6_port,
      70             :     };
      71             : 
      72             : const static char * const lb_dpo_l3dsr_ip4[] = {"lb4-l3dsr" , NULL};
      73             : const static char* const * const lb_dpo_l3dsr_nodes[DPO_PROTO_NUM] =
      74             :     {
      75             :         [DPO_PROTO_IP4]  = lb_dpo_l3dsr_ip4,
      76             :     };
      77             : 
      78             : const static char * const lb_dpo_l3dsr_ip4_port[] = {"lb4-l3dsr-port" , NULL};
      79             : const static char* const * const lb_dpo_l3dsr_port_nodes[DPO_PROTO_NUM] =
      80             :     {
      81             :         [DPO_PROTO_IP4]  = lb_dpo_l3dsr_ip4_port,
      82             :     };
      83             : 
      84             : const static char * const lb_dpo_nat4_ip4_port[] = { "lb4-nat4-port" , NULL };
      85             : const static char* const * const lb_dpo_nat4_port_nodes[DPO_PROTO_NUM] =
      86             :     {
      87             :         [DPO_PROTO_IP4]  = lb_dpo_nat4_ip4_port,
      88             :     };
      89             : 
      90             : const static char * const lb_dpo_nat6_ip6_port[] = { "lb6-nat6-port" , NULL };
      91             : const static char* const * const lb_dpo_nat6_port_nodes[DPO_PROTO_NUM] =
      92             :     {
      93             :         [DPO_PROTO_IP6]  = lb_dpo_nat6_ip6_port,
      94             :     };
      95             : 
      96          14 : u32 lb_hash_time_now(vlib_main_t * vm)
      97             : {
      98          14 :   return (u32) (vlib_time_now(vm) + 10000);
      99             : }
     100             : 
     101           0 : u8 *format_lb_main (u8 * s, va_list * args)
     102             : {
     103           0 :   vlib_thread_main_t *tm = vlib_get_thread_main();
     104           0 :   lb_main_t *lbm = &lb_main;
     105           0 :   s = format(s, "lb_main");
     106           0 :   s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address);
     107           0 :   s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address);
     108           0 :   s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
     109           0 :   s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
     110             : 
     111             :   u32 thread_index;
     112           0 :   for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
     113           0 :     lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
     114           0 :     if (h) {
     115           0 :       s = format(s, "core %d\n", thread_index);
     116           0 :       s = format(s, "  timeout: %ds\n", h->timeout);
     117           0 :       s = format(s, "  usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())),  lb_hash_size(h));
     118             :     }
     119             :   }
     120             : 
     121           0 :   return s;
     122             : }
     123             : 
     124             : static char *lb_vip_type_strings[] = {
     125             :     [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6",
     126             :     [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4",
     127             :     [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6",
     128             :     [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4",
     129             :     [LB_VIP_TYPE_IP4_L3DSR] = "ip4-l3dsr",
     130             :     [LB_VIP_TYPE_IP4_NAT4] = "ip4-nat4",
     131             :     [LB_VIP_TYPE_IP6_NAT6] = "ip6-nat6",
     132             : };
     133             : 
     134        1506 : u8 *format_lb_vip_type (u8 * s, va_list * args)
     135             : {
     136        1506 :   lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t);
     137             :   u32 i;
     138        5718 :   for (i=0; i<LB_VIP_N_TYPES; i++)
     139        5718 :     if (vipt == i)
     140        1506 :       return format(s, lb_vip_type_strings[i]);
     141           0 :   return format(s, "_WRONG_TYPE_");
     142             : }
     143             : 
     144           0 : uword unformat_lb_vip_type (unformat_input_t * input, va_list * args)
     145             : {
     146           0 :   lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *);
     147             :   u32 i;
     148           0 :   for (i=0; i<LB_VIP_N_TYPES; i++)
     149           0 :     if (unformat(input, lb_vip_type_strings[i])) {
     150           0 :       *vipt = i;
     151           0 :       return 1;
     152             :     }
     153           0 :   return 0;
     154             : }
     155             : 
     156        1400 : u8 *format_lb_vip (u8 * s, va_list * args)
     157             : {
     158        1400 :   lb_vip_t *vip = va_arg (*args, lb_vip_t *);
     159        1400 :   s = format(s, "%U %U new_size:%u #as:%u%s",
     160        1400 :              format_lb_vip_type, vip->type,
     161        1400 :              format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY,
     162        1400 :              vip->new_flow_table_mask + 1,
     163        1400 :              pool_elts(vip->as_indexes),
     164        1400 :              (vip->flags & LB_VIP_FLAGS_USED)?"":" removed");
     165             : 
     166        1400 :   if (vip->port != 0)
     167             :     {
     168         800 :       s = format(s, "  protocol:%u port:%u ", vip->protocol, vip->port);
     169             :     }
     170             : 
     171        1400 :   if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
     172             :     {
     173         400 :       s = format(s, "  dscp:%u", vip->encap_args.dscp);
     174             :     }
     175        1000 :   else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
     176         900 :           || (vip->type == LB_VIP_TYPE_IP6_NAT6))
     177             :     {
     178         200 :       s = format (s, " type:%s port:%u target_port:%u",
     179         200 :          (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
     180             :              "nodeport",
     181         200 :          ntohs(vip->port), ntohs(vip->encap_args.target_port));
     182             :     }
     183             : 
     184        1400 :   return s;
     185             : }
     186             : 
     187        1400 : u8 *format_lb_as (u8 * s, va_list * args)
     188             : {
     189        1400 :   lb_as_t *as = va_arg (*args, lb_as_t *);
     190        1400 :   return format(s, "%U %s", format_ip46_address,
     191             :                 &as->address, IP46_TYPE_ANY,
     192        1400 :                 (as->flags & LB_AS_FLAGS_USED)?"used":"removed");
     193             : }
     194             : 
     195         106 : u8 *format_lb_vip_detailed (u8 * s, va_list * args)
     196             : {
     197         106 :   lb_main_t *lbm = &lb_main;
     198         106 :   lb_vip_t *vip = va_arg (*args, lb_vip_t *);
     199         106 :   u32 indent = format_get_indent (s);
     200             : 
     201             :   /* clang-format off */
     202         212 :   s = format(s, "%U %U [%lu] %U%s%s\n"
     203             :                    "%U  new_size:%u\n",
     204             :                   format_white_space, indent,
     205         106 :                   format_lb_vip_type, vip->type,
     206         106 :                   vip - lbm->vips,
     207         106 :                   format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY,
     208         106 :                   lb_vip_is_src_ip_sticky (vip) ? " src_ip_sticky" : "",
     209         106 :                   (vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
     210             :                   format_white_space, indent,
     211         106 :                   vip->new_flow_table_mask + 1);
     212             :   /* clang-format on */
     213             : 
     214         106 :   if (vip->port != 0)
     215             :     {
     216          55 :       s = format(s, "%U  protocol:%u port:%u\n",
     217             :                  format_white_space, indent,
     218          55 :                  vip->protocol, vip->port);
     219             :     }
     220             : 
     221         106 :   if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
     222             :     {
     223          35 :       s = format(s, "%U  dscp:%u\n",
     224             :                     format_white_space, indent,
     225          35 :                     vip->encap_args.dscp);
     226             :     }
     227          71 :   else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
     228          65 :           || (vip->type == LB_VIP_TYPE_IP6_NAT6))
     229             :     {
     230           7 :       s = format (s, "%U  type:%s port:%u target_port:%u",
     231             :          format_white_space, indent,
     232           7 :          (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
     233             :              "nodeport",
     234           7 :          ntohs(vip->port), ntohs(vip->encap_args.target_port));
     235             :     }
     236             : 
     237             :   //Print counters
     238         106 :   s = format(s, "%U  counters:\n",
     239             :              format_white_space, indent);
     240             :   u32 i;
     241         530 :   for (i=0; i<LB_N_VIP_COUNTERS; i++)
     242         424 :     s = format(s, "%U    %s: %Lu\n",
     243             :                format_white_space, indent,
     244             :                lbm->vip_counters[i].name,
     245         424 :                vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips));
     246             : 
     247             : 
     248         106 :   s = format(s, "%U  #as:%u\n",
     249             :              format_white_space, indent,
     250         106 :              pool_elts(vip->as_indexes));
     251             : 
     252             :   //Let's count the buckets for each AS
     253         106 :   u32 *count = 0;
     254         106 :   vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much...
     255             :   lb_new_flow_entry_t *nfe;
     256      108650 :   vec_foreach(nfe, vip->new_flow_table)
     257      108544 :     count[nfe->as_index]++;
     258             : 
     259             :   lb_as_t *as;
     260             :   u32 *as_index;
     261         631 :   pool_foreach (as_index, vip->as_indexes) {
     262         525 :       as = &lbm->ass[*as_index];
     263         525 :       s = format(s, "%U    %U %u buckets   %Lu flows  dpo:%u %s\n",
     264             :                    format_white_space, indent,
     265             :                    format_ip46_address, &as->address, IP46_TYPE_ANY,
     266         525 :                    count[as - lbm->ass],
     267         525 :                    vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
     268             :                    as->dpo.dpoi_index,
     269         525 :                    (as->flags & LB_AS_FLAGS_USED)?"used":" removed");
     270             :   }
     271             : 
     272         106 :   vec_free(count);
     273         106 :   return s;
     274             : }
     275             : 
     276             : typedef struct {
     277             :   u32 as_index;
     278             :   u32 last;
     279             :   u32 skip;
     280             : } lb_pseudorand_t;
     281             : 
     282         266 : static int lb_pseudorand_compare(void *a, void *b)
     283             : {
     284             :   lb_as_t *asa, *asb;
     285         266 :   lb_main_t *lbm = &lb_main;
     286         266 :   asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index];
     287         266 :   asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index];
     288         266 :   return memcmp(&asa->address, &asb->address, sizeof(asb->address));
     289             : }
     290             : 
     291        1604 : static void lb_vip_garbage_collection(lb_vip_t *vip)
     292             : {
     293        1604 :   lb_main_t *lbm = &lb_main;
     294             :   lb_snat4_key_t m_key4;
     295             :   clib_bihash_kv_8_8_t kv4, value4;
     296             :   lb_snat6_key_t m_key6;
     297             :   clib_bihash_kv_24_8_t kv6, value6;
     298        1604 :   lb_snat_mapping_t *m = 0;
     299        1604 :   CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
     300             : 
     301        1604 :   u32 now = (u32) vlib_time_now(vlib_get_main());
     302        1604 :   if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN))
     303        1604 :     return;
     304             : 
     305           0 :   vip->last_garbage_collection = now;
     306             :   lb_as_t *as;
     307             :   u32 *as_index;
     308           0 :   pool_foreach (as_index, vip->as_indexes) {
     309           0 :       as = &lbm->ass[*as_index];
     310           0 :       if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
     311           0 :           clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) &&
     312           0 :           (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
     313             :         { //Not referenced
     314             : 
     315           0 :           if (lb_vip_is_nat4_port(vip)) {
     316           0 :               m_key4.addr = as->address.ip4;
     317           0 :               m_key4.port = vip->encap_args.target_port;
     318           0 :               m_key4.protocol = 0;
     319           0 :               m_key4.fib_index = 0;
     320             : 
     321           0 :               kv4.key = m_key4.as_u64;
     322           0 :               if(!clib_bihash_search_8_8(&lbm->mapping_by_as4, &kv4, &value4))
     323           0 :                 m = pool_elt_at_index (lbm->snat_mappings, value4.value);
     324           0 :               ASSERT (m);
     325             : 
     326           0 :               kv4.value = m - lbm->snat_mappings;
     327           0 :               clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 0);
     328           0 :               pool_put (lbm->snat_mappings, m);
     329           0 :           } else if (lb_vip_is_nat6_port(vip)) {
     330           0 :               m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
     331           0 :               m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
     332           0 :               m_key6.port = vip->encap_args.target_port;
     333           0 :               m_key6.protocol = 0;
     334           0 :               m_key6.fib_index = 0;
     335             : 
     336           0 :               kv6.key[0] = m_key6.as_u64[0];
     337           0 :               kv6.key[1] = m_key6.as_u64[1];
     338           0 :               kv6.key[2] = m_key6.as_u64[2];
     339             : 
     340           0 :               if (!clib_bihash_search_24_8 (&lbm->mapping_by_as6, &kv6, &value6))
     341           0 :                 m = pool_elt_at_index (lbm->snat_mappings, value6.value);
     342           0 :               ASSERT (m);
     343             : 
     344           0 :               kv6.value = m - lbm->snat_mappings;
     345           0 :               clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 0);
     346           0 :               pool_put (lbm->snat_mappings, m);
     347             :           }
     348           0 :           fib_entry_child_remove(as->next_hop_fib_entry_index,
     349             :                                 as->next_hop_child_index);
     350           0 :           fib_table_entry_delete_index(as->next_hop_fib_entry_index,
     351             :                                        FIB_SOURCE_RR);
     352           0 :           as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
     353             : 
     354           0 :           pool_put(vip->as_indexes, as_index);
     355           0 :           pool_put(lbm->ass, as);
     356             :         }
     357             :   }
     358             : }
     359             : 
     360         174 : void lb_garbage_collection()
     361             : {
     362         174 :   lb_main_t *lbm = &lb_main;
     363         174 :   lb_get_writer_lock();
     364             :   lb_vip_t *vip;
     365         174 :   u32 *to_be_removed_vips = 0, *i;
     366        1623 :   pool_foreach (vip, lbm->vips) {
     367        1449 :       lb_vip_garbage_collection(vip);
     368             : 
     369        2555 :       if (!(vip->flags & LB_VIP_FLAGS_USED) &&
     370        1106 :           (pool_elts(vip->as_indexes) == 0)) {
     371           1 :         vec_add1(to_be_removed_vips, vip - lbm->vips);
     372             :       }
     373             :   }
     374             : 
     375         175 :   vec_foreach(i, to_be_removed_vips) {
     376           1 :     vip = &lbm->vips[*i];
     377           1 :     pool_put(lbm->vips, vip);
     378           1 :     pool_free(vip->as_indexes);
     379             :   }
     380             : 
     381         174 :   vec_free(to_be_removed_vips);
     382         174 :   lb_put_writer_lock();
     383         174 : }
     384             : 
     385         747 : static void lb_vip_update_new_flow_table(lb_vip_t *vip)
     386             : {
     387         747 :   lb_main_t *lbm = &lb_main;
     388             :   lb_new_flow_entry_t *old_table;
     389             :   u32 i, *as_index;
     390         747 :   lb_new_flow_entry_t *new_flow_table = 0;
     391             :   lb_as_t *as;
     392         747 :   lb_pseudorand_t *pr, *sort_arr = 0;
     393             : 
     394         747 :   CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock); // We must have the lock
     395             : 
     396             :   //Check if some AS is configured or not
     397         747 :   i = 0;
     398        1027 :   pool_foreach (as_index, vip->as_indexes) {
     399         407 :       as = &lbm->ass[*as_index];
     400         407 :       if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore
     401         127 :         i = 1;
     402         127 :         goto out; //Not sure 'break' works in this macro-loop
     403             :       }
     404             :   }
     405             : 
     406         620 : out:
     407         747 :   if (i == 0) {
     408             :     //Only the default. i.e. no AS
     409         620 :     vec_validate(new_flow_table, vip->new_flow_table_mask);
     410       47275 :     for (i=0; i<vec_len(new_flow_table); i++)
     411       46655 :       new_flow_table[i].as_index = 0;
     412             : 
     413         620 :     goto finished;
     414             :   }
     415             : 
     416             :   //First, let's sort the ASs
     417         127 :   vec_validate (sort_arr, pool_elts (vip->as_indexes) - 1);
     418             : 
     419         127 :   i = 0;
     420         618 :   pool_foreach (as_index, vip->as_indexes) {
     421         491 :       as = &lbm->ass[*as_index];
     422         491 :       if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore
     423         140 :         continue;
     424             : 
     425         351 :       sort_arr[i].as_index = as - lbm->ass;
     426         351 :       i++;
     427             :   }
     428         127 :   vec_set_len (sort_arr, i);
     429             : 
     430         127 :   vec_sort_with_function(sort_arr, lb_pseudorand_compare);
     431             : 
     432             :   //Now let's pseudo-randomly generate permutations
     433         478 :   vec_foreach(pr, sort_arr) {
     434         351 :     lb_as_t *as = &lbm->ass[pr->as_index];
     435             : 
     436         351 :     u64 seed = clib_xxhash(as->address.as_u64[0] ^
     437         351 :                            as->address.as_u64[1]);
     438             :     /* We have 2^n buckets.
     439             :      * skip must be prime with 2^n.
     440             :      * So skip must be odd.
     441             :      * MagLev actually state that M should be prime,
     442             :      * but this has a big computation cost (% operation).
     443             :      * Using 2^n is more better (& operation).
     444             :      */
     445         351 :     pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask;
     446         351 :     pr->last = (seed >> 32) & vip->new_flow_table_mask;
     447             :   }
     448             : 
     449             :   //Let's create a new flow table
     450         127 :   vec_validate(new_flow_table, vip->new_flow_table_mask);
     451      130175 :   for (i=0; i<vec_len(new_flow_table); i++)
     452      130048 :     new_flow_table[i].as_index = 0;
     453             : 
     454         127 :   u32 done = 0;
     455             :   while (1) {
     456      193567 :     vec_foreach(pr, sort_arr) {
     457      174435 :       while (1) {
     458      304483 :         u32 last = pr->last;
     459      304483 :         pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
     460      304483 :         if (new_flow_table[last].as_index == 0) {
     461      130048 :           new_flow_table[last].as_index = pr->as_index;
     462      130048 :           break;
     463             :         }
     464             :       }
     465      130048 :       done++;
     466      130048 :       if (done == vec_len(new_flow_table))
     467         127 :         goto finished;
     468             :     }
     469             :   }
     470             : 
     471         747 : finished:
     472         747 :   vec_free(sort_arr);
     473             : 
     474         747 :   old_table = vip->new_flow_table;
     475         747 :   vip->new_flow_table = new_flow_table;
     476         747 :   vec_free(old_table);
     477         747 : }
     478             : 
     479           1 : int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
     480             :            u32 per_cpu_sticky_buckets, u32 flow_timeout)
     481             : {
     482           1 :   lb_main_t *lbm = &lb_main;
     483             : 
     484           1 :   if (!is_pow2(per_cpu_sticky_buckets))
     485           0 :     return VNET_API_ERROR_INVALID_MEMORY_SIZE;
     486             : 
     487           1 :   lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self
     488           1 :   lbm->ip4_src_address = *ip4_address;
     489           1 :   lbm->ip6_src_address = *ip6_address;
     490           1 :   lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets;
     491           1 :   lbm->flow_timeout = flow_timeout;
     492           1 :   lb_put_writer_lock();
     493           1 :   return 0;
     494             : }
     495             : 
     496             : 
     497             : 
     498             : static
     499         218 : int lb_vip_port_find_index(ip46_address_t *prefix, u8 plen,
     500             :                            u8 protocol, u16 port,
     501             :                            lb_lkp_type_t lkp_type,
     502             :                            u32 *vip_index)
     503             : {
     504         218 :   lb_main_t *lbm = &lb_main;
     505             :   lb_vip_t *vip;
     506             :   /* This must be called with the lock owned */
     507         218 :   CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
     508         218 :   ip46_prefix_normalize(prefix, plen);
     509        1848 :   pool_foreach (vip, lbm->vips) {
     510        1790 :       if ((vip->flags & LB_AS_FLAGS_USED) &&
     511         407 :           vip->plen == plen &&
     512         189 :           vip->prefix.as_u64[0] == prefix->as_u64[0] &&
     513         189 :           vip->prefix.as_u64[1] == prefix->as_u64[1])
     514             :         {
     515         189 :           if((lkp_type == LB_LKP_SAME_IP_PORT &&
     516         169 :                vip->protocol == protocol &&
     517         189 :                vip->port == port) ||
     518           1 :              (lkp_type == LB_LKP_ALL_PORT_IP &&
     519          32 :                vip->port == 0) ||
     520          19 :              (lkp_type == LB_LKP_DIFF_IP_PORT &&
     521          19 :                 (vip->protocol != protocol ||
     522          19 :                 vip->port != port) ) )
     523             :             {
     524         160 :               *vip_index = vip - lbm->vips;
     525         160 :               return 0;
     526             :             }
     527             :         }
     528             :   }
     529          58 :   return VNET_API_ERROR_NO_SUCH_ENTRY;
     530             : }
     531             : 
     532             : static
     533         174 : int lb_vip_port_find_index_with_lock(ip46_address_t *prefix, u8 plen,
     534             :                                      u8 protocol, u16 port, u32 *vip_index)
     535             : {
     536         174 :   return lb_vip_port_find_index(prefix, plen, protocol, port,
     537             :                                 LB_LKP_SAME_IP_PORT, vip_index);
     538             : }
     539             : 
     540             : static
     541           9 : int lb_vip_port_find_all_port_vip(ip46_address_t *prefix, u8 plen,
     542             :                                   u32 *vip_index)
     543             : {
     544           9 :   return lb_vip_port_find_index(prefix, plen, ~0, 0,
     545             :                                 LB_LKP_ALL_PORT_IP, vip_index);
     546             : }
     547             : 
     548             : /* Find out per-port-vip entry with different protocol and port */
     549             : static
     550          35 : int lb_vip_port_find_diff_port(ip46_address_t *prefix, u8 plen,
     551             :                                u8 protocol, u16 port, u32 *vip_index)
     552             : {
     553          35 :   return lb_vip_port_find_index(prefix, plen, protocol, port,
     554             :                                 LB_LKP_DIFF_IP_PORT, vip_index);
     555             : }
     556             : 
     557         157 : int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u8 protocol,
     558             :                       u16 port, u32 *vip_index)
     559             : {
     560             :   int ret;
     561         157 :   lb_get_writer_lock();
     562         157 :   ret = lb_vip_port_find_index_with_lock(prefix, plen,
     563             :                                          protocol, port, vip_index);
     564         157 :   lb_put_writer_lock();
     565         157 :   return ret;
     566             : }
     567             : 
     568         211 : static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
     569             : {
     570         211 :   lb_main_t *lbm = &lb_main;
     571             :   /* This must be called with the lock owned */
     572         211 :   CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
     573             :   lb_as_t *as;
     574             :   u32 *asi;
     575         631 :   pool_foreach (asi, vip->as_indexes) {
     576         560 :       as = &lbm->ass[*asi];
     577         560 :       if (as->vip_index == (vip - lbm->vips) &&
     578         560 :           as->address.as_u64[0] == address->as_u64[0] &&
     579         560 :           as->address.as_u64[1] == address->as_u64[1])
     580             :       {
     581         140 :         *as_index = as - lbm->ass;
     582         140 :         return 0;
     583             :       }
     584             :   }
     585          71 :   return -1;
     586             : }
     587             : 
     588          71 : int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
     589             : {
     590          71 :   lb_main_t *lbm = &lb_main;
     591          71 :   lb_get_writer_lock();
     592             :   lb_vip_t *vip;
     593          71 :   if (!(vip = lb_vip_get_by_index(vip_index))) {
     594           0 :     lb_put_writer_lock();
     595           0 :     return VNET_API_ERROR_NO_SUCH_ENTRY;
     596             :   }
     597             : 
     598          71 :   ip46_type_t type = lb_encap_is_ip4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6;
     599          71 :   u32 *to_be_added = 0;
     600          71 :   u32 *to_be_updated = 0;
     601             :   u32 i;
     602             :   u32 *ip;
     603             :   lb_snat_mapping_t *m;
     604             : 
     605             :   //Sanity check
     606         142 :   while (n--) {
     607             : 
     608          71 :     if (!lb_as_find_index_vip(vip, &addresses[n], &i)) {
     609           0 :       if (lbm->ass[i].flags & LB_AS_FLAGS_USED) {
     610           0 :         vec_free(to_be_added);
     611           0 :         vec_free(to_be_updated);
     612           0 :         lb_put_writer_lock();
     613           0 :         return VNET_API_ERROR_VALUE_EXIST;
     614             :       }
     615           0 :       vec_add1(to_be_updated, i);
     616           0 :       goto next;
     617             :     }
     618             : 
     619          71 :     if (ip46_address_type(&addresses[n]) != type) {
     620           0 :       vec_free(to_be_added);
     621           0 :       vec_free(to_be_updated);
     622           0 :       lb_put_writer_lock();
     623           0 :       return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
     624             :     }
     625             : 
     626          71 :     if (n) {
     627           0 :       u32 n2 = n;
     628           0 :       while(n2--) //Check for duplicates
     629           0 :         if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
     630           0 :             addresses[n2].as_u64[1] == addresses[n].as_u64[1])
     631           0 :           goto next;
     632             :     }
     633             : 
     634          71 :     vec_add1(to_be_added, n);
     635             : 
     636          71 : next:
     637          71 :     continue;
     638             :   }
     639             : 
     640             :   //Update reused ASs
     641          71 :   vec_foreach(ip, to_be_updated) {
     642           0 :     lbm->ass[*ip].flags = LB_AS_FLAGS_USED;
     643             :   }
     644          71 :   vec_free(to_be_updated);
     645             : 
     646             :   //Create those who have to be created
     647         142 :   vec_foreach(ip, to_be_added) {
     648             :     lb_as_t *as;
     649             :     u32 *as_index;
     650          71 :     pool_get(lbm->ass, as);
     651          71 :     as->address = addresses[*ip];
     652          71 :     as->flags = LB_AS_FLAGS_USED;
     653          71 :     as->vip_index = vip_index;
     654          71 :     pool_get(vip->as_indexes, as_index);
     655          71 :     *as_index = as - lbm->ass;
     656             : 
     657             :     /*
     658             :      * become a child of the FIB entry
     659             :      * so we are informed when its forwarding changes
     660             :      */
     661          71 :     fib_prefix_t nh = {};
     662          71 :     if (lb_encap_is_ip4(vip)) {
     663          45 :         nh.fp_addr.ip4 = as->address.ip4;
     664          45 :         nh.fp_len = 32;
     665          45 :         nh.fp_proto = FIB_PROTOCOL_IP4;
     666             :     } else {
     667          26 :         nh.fp_addr.ip6 = as->address.ip6;
     668          26 :         nh.fp_len = 128;
     669          26 :         nh.fp_proto = FIB_PROTOCOL_IP6;
     670             :     }
     671             : 
     672         142 :     as->next_hop_fib_entry_index =
     673          71 :         fib_table_entry_special_add(0,
     674             :                                     &nh,
     675             :                                     FIB_SOURCE_RR,
     676             :                                     FIB_ENTRY_FLAG_NONE);
     677         142 :     as->next_hop_child_index =
     678          71 :         fib_entry_child_add(as->next_hop_fib_entry_index,
     679          71 :                             lbm->fib_node_type,
     680          71 :                             as - lbm->ass);
     681             : 
     682          71 :     lb_as_stack(as);
     683             : 
     684          71 :     if ( lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip) )
     685             :       {
     686             :         /* Add SNAT static mapping */
     687          10 :         pool_get (lbm->snat_mappings, m);
     688          10 :         clib_memset (m, 0, sizeof (*m));
     689          10 :         if (lb_vip_is_nat4_port(vip)) {
     690             :             lb_snat4_key_t m_key4;
     691             :             clib_bihash_kv_8_8_t kv4;
     692           5 :             m_key4.addr = as->address.ip4;
     693           5 :             m_key4.port = vip->encap_args.target_port;
     694           5 :             m_key4.protocol = 0;
     695           5 :             m_key4.fib_index = 0;
     696             : 
     697           5 :             if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
     698             :               {
     699           5 :                 m->src_ip.ip4 = vip->prefix.ip4;
     700             :               }
     701           0 :             else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
     702             :               {
     703           0 :                 m->src_ip.ip4 = lbm->ip4_src_address;
     704             :               }
     705           5 :             m->src_ip_is_ipv6 = 0;
     706           5 :             m->as_ip.ip4 = as->address.ip4;
     707           5 :             m->as_ip_is_ipv6 = 0;
     708           5 :             m->src_port = vip->port;
     709           5 :             m->target_port = vip->encap_args.target_port;
     710           5 :             m->vrf_id = 0;
     711           5 :             m->fib_index = 0;
     712             : 
     713           5 :             kv4.key = m_key4.as_u64;
     714           5 :             kv4.value = m - lbm->snat_mappings;
     715           5 :             clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 1);
     716             :         } else {
     717             :             lb_snat6_key_t m_key6;
     718             :             clib_bihash_kv_24_8_t kv6;
     719           5 :             m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
     720           5 :             m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
     721           5 :             m_key6.port = vip->encap_args.target_port;
     722           5 :             m_key6.protocol = 0;
     723           5 :             m_key6.fib_index = 0;
     724             : 
     725           5 :             if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
     726             :               {
     727           5 :                 m->src_ip.ip6.as_u64[0] = vip->prefix.ip6.as_u64[0];
     728           5 :                 m->src_ip.ip6.as_u64[1] = vip->prefix.ip6.as_u64[1];
     729             :               }
     730           0 :             else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
     731             :               {
     732           0 :                 m->src_ip.ip6.as_u64[0] = lbm->ip6_src_address.as_u64[0];
     733           0 :                 m->src_ip.ip6.as_u64[1] = lbm->ip6_src_address.as_u64[1];
     734             :               }
     735           5 :             m->src_ip_is_ipv6 = 1;
     736           5 :             m->as_ip.ip6.as_u64[0] = as->address.ip6.as_u64[0];
     737           5 :             m->as_ip.ip6.as_u64[1] = as->address.ip6.as_u64[1];
     738           5 :             m->as_ip_is_ipv6 = 1;
     739           5 :             m->src_port = vip->port;
     740           5 :             m->target_port = vip->encap_args.target_port;
     741           5 :             m->vrf_id = 0;
     742           5 :             m->fib_index = 0;
     743             : 
     744           5 :             kv6.key[0] = m_key6.as_u64[0];
     745           5 :             kv6.key[1] = m_key6.as_u64[1];
     746           5 :             kv6.key[2] = m_key6.as_u64[2];
     747           5 :             kv6.value = m - lbm->snat_mappings;
     748           5 :             clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 1);
     749             :         }
     750             :       }
     751             :   }
     752          71 :   vec_free(to_be_added);
     753             : 
     754             :   //Recompute flows
     755          71 :   lb_vip_update_new_flow_table(vip);
     756             : 
     757             :   //Garbage collection maybe
     758          71 :   lb_vip_garbage_collection(vip);
     759             : 
     760          71 :   lb_put_writer_lock();
     761          71 :   return 0;
     762             : }
     763             : 
     764             : int
     765          14 : lb_flush_vip_as (u32 vip_index, u32 as_index)
     766             : {
     767             :   u32 thread_index;
     768          14 :   vlib_thread_main_t *tm = vlib_get_thread_main();
     769          14 :   lb_main_t *lbm = &lb_main;
     770             : 
     771          28 :   for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
     772          14 :     lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
     773          14 :     if (h != NULL) {
     774             :         u32 i;
     775             :         lb_hash_bucket_t *b;
     776             : 
     777       71694 :         lb_hash_foreach_entry(h, b, i) {
     778       57344 :           if ((vip_index == ~0)
     779           0 :               || ((b->vip[i] == vip_index) && (as_index == ~0))
     780           0 :               || ((b->vip[i] == vip_index) && (b->value[i] == as_index)))
     781             :             {
     782       57344 :               vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
     783       57344 :               vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
     784       57344 :               b->vip[i] = ~0;
     785       57344 :               b->value[i] = 0;
     786             :             }
     787             :         }
     788          14 :         if (vip_index == ~0)
     789             :           {
     790          14 :             lb_hash_free(h);
     791          14 :             lbm->per_cpu[thread_index].sticky_ht = 0;
     792             :           }
     793             :       }
     794             :     }
     795             : 
     796          14 :   return 0;
     797             : }
     798             : 
     799          84 : int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n,
     800             :                             u8 flush)
     801             : {
     802          84 :   lb_main_t *lbm = &lb_main;
     803          84 :   u32 now = (u32) vlib_time_now(vlib_get_main());
     804          84 :   u32 *ip = 0;
     805          84 :   u32 as_index = 0;
     806             : 
     807             :   lb_vip_t *vip;
     808          84 :   if (!(vip = lb_vip_get_by_index(vip_index))) {
     809           0 :     return VNET_API_ERROR_NO_SUCH_ENTRY;
     810             :   }
     811             : 
     812          84 :   u32 *indexes = NULL;
     813         224 :   while (n--) {
     814         140 :     if (lb_as_find_index_vip(vip, &addresses[n], &as_index)) {
     815           0 :       vec_free(indexes);
     816           0 :       return VNET_API_ERROR_NO_SUCH_ENTRY;
     817             :     }
     818             : 
     819         140 :     if (n) { //Check for duplicates
     820          56 :       u32 n2 = n - 1;
     821         140 :       while(n2--) {
     822          84 :         if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
     823          84 :             addresses[n2].as_u64[1] == addresses[n].as_u64[1])
     824           0 :           goto next;
     825             :       }
     826             :     }
     827             : 
     828         140 :     vec_add1(indexes, as_index);
     829         140 : next:
     830         140 :   continue;
     831             :   }
     832             : 
     833             :   //Garbage collection maybe
     834          84 :   lb_vip_garbage_collection(vip);
     835             : 
     836          84 :   if (indexes != NULL) {
     837         224 :     vec_foreach(ip, indexes) {
     838         140 :       lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED;
     839         140 :       lbm->ass[*ip].last_used = now;
     840             : 
     841         140 :       if(flush)
     842             :         {
     843             :           /* flush flow table for deleted ASs*/
     844           0 :           lb_flush_vip_as(vip_index, *ip);
     845             :         }
     846             :     }
     847             : 
     848             :     //Recompute flows
     849          84 :     lb_vip_update_new_flow_table(vip);
     850             :   }
     851             : 
     852          84 :   vec_free(indexes);
     853          84 :   return 0;
     854             : }
     855             : 
     856          70 : int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n, u8 flush)
     857             : {
     858          70 :   lb_get_writer_lock();
     859          70 :   int ret = lb_vip_del_ass_withlock(vip_index, addresses, n, flush);
     860          70 :   lb_put_writer_lock();
     861             : 
     862          70 :   return ret;
     863             : }
     864             : 
     865             : static int
     866           8 : lb_vip_prefix_index_alloc (lb_main_t *lbm)
     867             : {
     868             :   /*
     869             :    * Check for dynamically allocated instance number.
     870             :    */
     871             :   u32 bit;
     872             : 
     873           8 :   bit = clib_bitmap_first_clear (lbm->vip_prefix_indexes);
     874             : 
     875           8 :   lbm->vip_prefix_indexes = clib_bitmap_set(lbm->vip_prefix_indexes, bit, 1);
     876             : 
     877           8 :   return bit;
     878             : }
     879             : 
     880             : static int
     881           8 : lb_vip_prefix_index_free (lb_main_t *lbm, u32 instance)
     882             : {
     883             : 
     884           8 :   if (clib_bitmap_get (lbm->vip_prefix_indexes, instance) == 0)
     885             :     {
     886           0 :       return -1;
     887             :     }
     888             : 
     889           8 :   lbm->vip_prefix_indexes = clib_bitmap_set (lbm->vip_prefix_indexes,
     890             :                                              instance, 0);
     891             : 
     892           8 :   return 0;
     893             : }
     894             : 
     895             : /**
     896             :  * Add the VIP adjacency to the ip4 or ip6 fib
     897             :  */
     898          17 : static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip,
     899             :                                  u32 *vip_prefix_index)
     900             : {
     901          17 :   dpo_proto_t proto = 0;
     902          17 :   dpo_type_t dpo_type = 0;
     903          17 :   u32 vip_idx = 0;
     904             : 
     905          17 :   if (vip->port != 0)
     906             :     {
     907             :       /* for per-port vip, if VIP adjacency has been added,
     908             :        * no need to add adjacency. */
     909           9 :       if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
     910           9 :                                       vip->protocol, vip->port, &vip_idx))
     911             :         {
     912           1 :           lb_vip_t *exists_vip = lb_vip_get_by_index(vip_idx);
     913           1 :           *vip_prefix_index = exists_vip ? exists_vip->vip_prefix_index : ~0;
     914           1 :           return;
     915             :         }
     916             : 
     917             :       /* Allocate an index for per-port vip */
     918           8 :       *vip_prefix_index = lb_vip_prefix_index_alloc(lbm);
     919             :     }
     920             :   else
     921             :     {
     922           8 :       *vip_prefix_index = vip - lbm->vips;
     923             :     }
     924             : 
     925          16 :   dpo_id_t dpo = DPO_INVALID;
     926          16 :   fib_prefix_t pfx = {};
     927          16 :   if (lb_vip_is_ip4(vip->type)) {
     928           9 :       pfx.fp_addr.ip4 = vip->prefix.ip4;
     929           9 :       pfx.fp_len = vip->plen - 96;
     930           9 :       pfx.fp_proto = FIB_PROTOCOL_IP4;
     931           9 :       proto = DPO_PROTO_IP4;
     932             :   } else {
     933           7 :       pfx.fp_addr.ip6 = vip->prefix.ip6;
     934           7 :       pfx.fp_len = vip->plen;
     935           7 :       pfx.fp_proto = FIB_PROTOCOL_IP6;
     936           7 :       proto = DPO_PROTO_IP6;
     937             :   }
     938             : 
     939          16 :   if (lb_vip_is_gre4(vip))
     940           2 :     dpo_type = lbm->dpo_gre4_type;
     941          14 :   else if (lb_vip_is_gre6(vip))
     942           4 :     dpo_type = lbm->dpo_gre6_type;
     943          10 :   else if (lb_vip_is_gre4_port(vip))
     944           2 :     dpo_type = lbm->dpo_gre4_port_type;
     945           8 :   else if (lb_vip_is_gre6_port(vip))
     946           2 :     dpo_type = lbm->dpo_gre6_port_type;
     947           6 :   else if (lb_vip_is_l3dsr(vip))
     948           2 :     dpo_type = lbm->dpo_l3dsr_type;
     949           4 :   else if (lb_vip_is_l3dsr_port(vip))
     950           2 :     dpo_type = lbm->dpo_l3dsr_port_type;
     951           2 :   else if(lb_vip_is_nat4_port(vip))
     952           1 :     dpo_type = lbm->dpo_nat4_port_type;
     953           1 :   else if (lb_vip_is_nat6_port(vip))
     954           1 :     dpo_type = lbm->dpo_nat6_port_type;
     955             : 
     956          16 :   dpo_set(&dpo, dpo_type, proto, *vip_prefix_index);
     957          16 :   fib_table_entry_special_dpo_add(0,
     958             :                                   &pfx,
     959             :                                   lb_fib_src,
     960             :                                   FIB_ENTRY_FLAG_EXCLUSIVE,
     961             :                                   &dpo);
     962          16 :   dpo_reset(&dpo);
     963             : }
     964             : 
     965             : /**
     966             :  * Add the VIP filter entry
     967             :  */
     968           9 : static int lb_vip_add_port_filter(lb_main_t *lbm, lb_vip_t *vip,
     969             :                                   u32 vip_prefix_index, u32 vip_idx)
     970             : {
     971             :   vip_port_key_t key;
     972             :   clib_bihash_kv_8_8_t kv;
     973             : 
     974           9 :   key.vip_prefix_index = vip_prefix_index;
     975           9 :   key.protocol = vip->protocol;
     976           9 :   key.port = clib_host_to_net_u16(vip->port);
     977           9 :   key.rsv = 0;
     978             : 
     979           9 :   kv.key = key.as_u64;
     980           9 :   kv.value = vip_idx;
     981           9 :   clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 1);
     982             : 
     983           9 :   return 0;
     984             : }
     985             : 
     986             : /**
     987             :  * Del the VIP filter entry
     988             :  */
     989           9 : static int lb_vip_del_port_filter(lb_main_t *lbm, lb_vip_t *vip)
     990             : {
     991             :   vip_port_key_t key;
     992             :   clib_bihash_kv_8_8_t kv, value;
     993           9 :   lb_vip_t *m = 0;
     994             : 
     995           9 :   key.vip_prefix_index = vip->vip_prefix_index;
     996           9 :   key.protocol = vip->protocol;
     997           9 :   key.port = clib_host_to_net_u16(vip->port);
     998           9 :   key.rsv = 0;
     999             : 
    1000           9 :   kv.key = key.as_u64;
    1001           9 :   if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) != 0)
    1002             :     {
    1003           0 :       clib_warning("looking up vip_index_per_port failed.");
    1004           0 :       return VNET_API_ERROR_NO_SUCH_ENTRY;
    1005             :     }
    1006           9 :   m = pool_elt_at_index (lbm->vips, value.value);
    1007           9 :   ASSERT (m);
    1008             : 
    1009           9 :   kv.value = m - lbm->vips;
    1010           9 :   clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 0);
    1011             : 
    1012           9 :   return 0;
    1013             : }
    1014             : 
    1015             : /**
    1016             :  * Deletes the adjacency associated with the VIP
    1017             :  */
    1018          16 : static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
    1019             : {
    1020          16 :   fib_prefix_t pfx = {};
    1021          16 :   u32 vip_idx = 0;
    1022             : 
    1023          16 :   if (vip->port != 0)
    1024             :     {
    1025             :       /* If this vip adjacency is used by other per-port vip,
    1026             :        * no need to del this adjacency. */
    1027           9 :       if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
    1028           9 :                                       vip->protocol, vip->port, &vip_idx))
    1029             :         {
    1030           1 :           lb_put_writer_lock();
    1031           1 :           return;
    1032             :         }
    1033             : 
    1034             :       /* Return vip_prefix_index for per-port vip */
    1035           8 :       lb_vip_prefix_index_free(lbm, vip->vip_prefix_index);
    1036             : 
    1037             :     }
    1038             : 
    1039          15 :   if (lb_vip_is_ip4(vip->type)) {
    1040           9 :       pfx.fp_addr.ip4 = vip->prefix.ip4;
    1041           9 :       pfx.fp_len = vip->plen - 96;
    1042           9 :       pfx.fp_proto = FIB_PROTOCOL_IP4;
    1043             :   } else {
    1044           6 :       pfx.fp_addr.ip6 = vip->prefix.ip6;
    1045           6 :       pfx.fp_len = vip->plen;
    1046           6 :       pfx.fp_proto = FIB_PROTOCOL_IP6;
    1047             :   }
    1048          15 :   fib_table_entry_special_remove(0, &pfx, lb_fib_src);
    1049             : }
    1050             : 
    1051          17 : int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
    1052             : {
    1053          17 :   lb_main_t *lbm = &lb_main;
    1054          17 :   vlib_main_t *vm = vlib_get_main();
    1055             :   lb_vip_t *vip;
    1056          17 :   lb_vip_type_t type = args.type;
    1057          17 :   u32 vip_prefix_index = 0;
    1058             : 
    1059          17 :   lb_get_writer_lock();
    1060          17 :   ip46_prefix_normalize(&(args.prefix), args.plen);
    1061             : 
    1062          17 :   if (!lb_vip_port_find_index_with_lock(&(args.prefix), args.plen,
    1063          17 :                                          args.protocol, args.port,
    1064             :                                          vip_index))
    1065             :     {
    1066           0 :       lb_put_writer_lock();
    1067           0 :       return VNET_API_ERROR_VALUE_EXIST;
    1068             :     }
    1069             : 
    1070             :   /* Make sure we can't add a per-port VIP entry
    1071             :    * when there already is an all-port VIP for the same prefix. */
    1072          26 :   if ((args.port != 0) &&
    1073           9 :       !lb_vip_port_find_all_port_vip(&(args.prefix), args.plen, vip_index))
    1074             :     {
    1075           0 :       lb_put_writer_lock();
    1076           0 :       return VNET_API_ERROR_VALUE_EXIST;
    1077             :     }
    1078             : 
    1079             :   /* Make sure we can't add a all-port VIP entry
    1080             :    * when there already is an per-port VIP for the same prefix. */
    1081          25 :   if ((args.port == 0) &&
    1082           8 :       !lb_vip_port_find_diff_port(&(args.prefix), args.plen,
    1083           8 :                                   args.protocol, args.port, vip_index))
    1084             :     {
    1085           0 :       lb_put_writer_lock();
    1086           0 :       return VNET_API_ERROR_VALUE_EXIST;
    1087             :     }
    1088             : 
    1089             :   /* Make sure all VIP for a given prefix (using different ports) have the same type. */
    1090          26 :   if ((args.port != 0) &&
    1091           9 :       !lb_vip_port_find_diff_port(&(args.prefix), args.plen,
    1092           9 :                                   args.protocol, args.port, vip_index)
    1093           1 :       && (args.type != lbm->vips[*vip_index].type))
    1094             :     {
    1095           0 :       lb_put_writer_lock();
    1096           0 :       return VNET_API_ERROR_INVALID_ARGUMENT;
    1097             :     }
    1098             : 
    1099          17 :   if (!is_pow2(args.new_length)) {
    1100           0 :     lb_put_writer_lock();
    1101           0 :     return VNET_API_ERROR_INVALID_MEMORY_SIZE;
    1102             :   }
    1103             : 
    1104          17 :   if (ip46_prefix_is_ip4(&(args.prefix), args.plen) &&
    1105           8 :       !lb_vip_is_ip4(type)) {
    1106           0 :     lb_put_writer_lock();
    1107           0 :     return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
    1108             :   }
    1109             : 
    1110          17 :   if ((!ip46_prefix_is_ip4(&(args.prefix), args.plen)) &&
    1111           3 :       !lb_vip_is_ip6(type)) {
    1112           0 :     lb_put_writer_lock();
    1113           0 :     return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
    1114             :   }
    1115             : 
    1116          17 :   if ((type == LB_VIP_TYPE_IP4_L3DSR) &&
    1117           5 :       (args.encap_args.dscp >= 64) )
    1118             :     {
    1119           0 :       lb_put_writer_lock();
    1120           0 :       return VNET_API_ERROR_VALUE_EXIST;
    1121             :     }
    1122             : 
    1123             :   //Allocate
    1124          17 :   pool_get(lbm->vips, vip);
    1125             : 
    1126             :   //Init
    1127          17 :   memcpy (&(vip->prefix), &(args.prefix), sizeof(args.prefix));
    1128          17 :   vip->plen = args.plen;
    1129          17 :   if (args.port != 0)
    1130             :     {
    1131           9 :       vip->protocol = args.protocol;
    1132           9 :       vip->port = args.port;
    1133             :     }
    1134             :   else
    1135             :     {
    1136           8 :       vip->protocol = (u8)~0;
    1137           8 :       vip->port = 0;
    1138             :     }
    1139          17 :   vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main());
    1140          17 :   vip->type = args.type;
    1141             : 
    1142          17 :   if (args.type == LB_VIP_TYPE_IP4_L3DSR) {
    1143           5 :       vip->encap_args.dscp = args.encap_args.dscp;
    1144             :     }
    1145          12 :   else if ((args.type == LB_VIP_TYPE_IP4_NAT4)
    1146          11 :            ||(args.type == LB_VIP_TYPE_IP6_NAT6)) {
    1147           2 :       vip->encap_args.srv_type = args.encap_args.srv_type;
    1148           2 :       vip->encap_args.target_port =
    1149           2 :           clib_host_to_net_u16(args.encap_args.target_port);
    1150             :     }
    1151             : 
    1152          17 :   vip->flags = LB_VIP_FLAGS_USED;
    1153          17 :   if (args.src_ip_sticky)
    1154             :     {
    1155           2 :       vip->flags |= LB_VIP_FLAGS_SRC_IP_STICKY;
    1156             :     }
    1157          17 :   vip->as_indexes = 0;
    1158             : 
    1159             :   //Validate counters
    1160             :   u32 i;
    1161          85 :   for (i = 0; i < LB_N_VIP_COUNTERS; i++) {
    1162          68 :     vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
    1163          68 :     vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
    1164             :   }
    1165             : 
    1166             :   //Configure new flow table
    1167          17 :   vip->new_flow_table_mask = args.new_length - 1;
    1168          17 :   vip->new_flow_table = 0;
    1169             : 
    1170             :   //Update flow hash table
    1171          17 :   lb_vip_update_new_flow_table(vip);
    1172             : 
    1173             :   //Create adjacency to direct traffic
    1174          17 :   lb_vip_add_adjacency(lbm, vip, &vip_prefix_index);
    1175             : 
    1176          17 :   if ( (lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip))
    1177           2 :       && (args.encap_args.srv_type == LB_SRV_TYPE_NODEPORT) )
    1178             :     {
    1179             :       u32 key;
    1180             :       uword * entry;
    1181             : 
    1182             :       //Create maping from nodeport to vip_index
    1183           0 :       key = clib_host_to_net_u16(args.port);
    1184           0 :       entry = hash_get_mem (lbm->vip_index_by_nodeport, &key);
    1185           0 :       if (entry) {
    1186           0 :         lb_put_writer_lock();
    1187           0 :         return VNET_API_ERROR_VALUE_EXIST;
    1188             :       }
    1189             : 
    1190           0 :       hash_set_mem (lbm->vip_index_by_nodeport, &key, vip - lbm->vips);
    1191             : 
    1192             :       /* receive packets destined to NodeIP:NodePort */
    1193           0 :       udp_register_dst_port (vm, args.port, lb4_nodeport_node.index, 1);
    1194           0 :       udp_register_dst_port (vm, args.port, lb6_nodeport_node.index, 0);
    1195             :     }
    1196             : 
    1197          17 :   *vip_index = vip - lbm->vips;
    1198             :   //Create per-port vip filtering table
    1199          17 :   if (args.port != 0)
    1200             :     {
    1201           9 :       lb_vip_add_port_filter(lbm, vip, vip_prefix_index, *vip_index);
    1202           9 :       vip->vip_prefix_index = vip_prefix_index;
    1203             :     }
    1204             : 
    1205          17 :   lb_put_writer_lock();
    1206          17 :   return 0;
    1207             : }
    1208             : 
    1209          16 : int lb_vip_del(u32 vip_index)
    1210             : {
    1211          16 :   lb_main_t *lbm = &lb_main;
    1212             :   lb_vip_t *vip;
    1213          16 :   int rv = 0;
    1214             : 
    1215             :   /* Does not remove default vip, i.e. vip_index = 0 */
    1216          16 :   if (vip_index == 0)
    1217           0 :     return VNET_API_ERROR_INVALID_VALUE;
    1218             : 
    1219          16 :   lb_get_writer_lock();
    1220          16 :   if (!(vip = lb_vip_get_by_index(vip_index))) {
    1221           0 :     lb_put_writer_lock();
    1222           0 :     return VNET_API_ERROR_NO_SUCH_ENTRY;
    1223             :   }
    1224             : 
    1225             :   //FIXME: This operation is actually not working
    1226             :   //We will need to remove state before performing this.
    1227             : 
    1228             :   {
    1229             :     //Remove all ASs
    1230          16 :     ip46_address_t *ass = 0;
    1231             :     lb_as_t *as;
    1232             :     u32 *as_index;
    1233             : 
    1234          86 :     pool_foreach (as_index, vip->as_indexes) {
    1235          70 :         as = &lbm->ass[*as_index];
    1236          70 :         vec_add1(ass, as->address);
    1237             :     }
    1238          16 :     if (vec_len(ass))
    1239          14 :       lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass), 0);
    1240          16 :     vec_free(ass);
    1241             :   }
    1242             : 
    1243             :   //Delete adjacency
    1244          16 :   lb_vip_del_adjacency(lbm, vip);
    1245             : 
    1246             :   //Delete per-port vip filtering entry
    1247          16 :   if (vip->port != 0)
    1248             :     {
    1249           9 :       rv = lb_vip_del_port_filter(lbm, vip);
    1250             :     }
    1251             : 
    1252             :   //Set the VIP as unused
    1253          16 :   vip->flags &= ~LB_VIP_FLAGS_USED;
    1254             : 
    1255          16 :   lb_put_writer_lock();
    1256          16 :   return rv;
    1257             : }
    1258             : 
    1259             : /* *INDENT-OFF* */
    1260             : VLIB_PLUGIN_REGISTER () = {
    1261             :     .version = VPP_BUILD_VER,
    1262             :     .description = "Load Balancer (LB)",
    1263             : };
    1264             : /* *INDENT-ON* */
    1265             : 
    1266           0 : u8 *format_lb_dpo (u8 * s, va_list * va)
    1267             : {
    1268           0 :   index_t index = va_arg (*va, index_t);
    1269           0 :   CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
    1270           0 :   lb_main_t *lbm = &lb_main;
    1271           0 :   lb_vip_t *vip = pool_elt_at_index (lbm->vips, index);
    1272           0 :   return format (s, "%U", format_lb_vip, vip);
    1273             : }
    1274             : 
    1275          96 : static void lb_dpo_lock (dpo_id_t *dpo) {}
    1276          93 : static void lb_dpo_unlock (dpo_id_t *dpo) {}
    1277             : 
    1278             : static fib_node_t *
    1279         230 : lb_fib_node_get_node (fib_node_index_t index)
    1280             : {
    1281         230 :   lb_main_t *lbm = &lb_main;
    1282         230 :   lb_as_t *as = pool_elt_at_index (lbm->ass, index);
    1283         230 :   return (&as->fib_node);
    1284             : }
    1285             : 
    1286             : static void
    1287           0 : lb_fib_node_last_lock_gone (fib_node_t *node)
    1288             : {
    1289           0 : }
    1290             : 
    1291             : static lb_as_t *
    1292         230 : lb_as_from_fib_node (fib_node_t *node)
    1293             : {
    1294         230 :   return ((lb_as_t*)(((char*)node) -
    1295             :       STRUCT_OFFSET_OF(lb_as_t, fib_node)));
    1296             : }
    1297             : 
    1298             : static void
    1299         301 : lb_as_stack (lb_as_t *as)
    1300             : {
    1301         301 :   lb_main_t *lbm = &lb_main;
    1302         301 :   lb_vip_t *vip = &lbm->vips[as->vip_index];
    1303         301 :   dpo_type_t dpo_type = 0;
    1304             : 
    1305         301 :   if (lb_vip_is_gre4(vip))
    1306          55 :     dpo_type = lbm->dpo_gre4_type;
    1307         246 :   else if (lb_vip_is_gre6(vip))
    1308          41 :     dpo_type = lbm->dpo_gre6_type;
    1309         205 :   else if (lb_vip_is_gre4_port(vip))
    1310          45 :     dpo_type = lbm->dpo_gre4_port_type;
    1311         160 :   else if (lb_vip_is_gre6_port(vip))
    1312          30 :     dpo_type = lbm->dpo_gre6_port_type;
    1313         130 :   else if (lb_vip_is_l3dsr(vip))
    1314          55 :     dpo_type = lbm->dpo_l3dsr_type;
    1315          75 :   else if (lb_vip_is_l3dsr_port(vip))
    1316          55 :     dpo_type = lbm->dpo_l3dsr_port_type;
    1317          20 :   else if(lb_vip_is_nat4_port(vip))
    1318          15 :     dpo_type = lbm->dpo_nat4_port_type;
    1319           5 :   else if (lb_vip_is_nat6_port(vip))
    1320           5 :     dpo_type = lbm->dpo_nat6_port_type;
    1321             : 
    1322         301 :   dpo_stack(dpo_type,
    1323         301 :             lb_vip_is_ip4(vip->type)?DPO_PROTO_IP4:DPO_PROTO_IP6,
    1324             :             &as->dpo,
    1325             :             fib_entry_contribute_ip_forwarding(
    1326             :                 as->next_hop_fib_entry_index));
    1327         301 : }
    1328             : 
    1329             : static fib_node_back_walk_rc_t
    1330         230 : lb_fib_node_back_walk_notify (fib_node_t *node,
    1331             :                  fib_node_back_walk_ctx_t *ctx)
    1332             : {
    1333         230 :     lb_as_stack(lb_as_from_fib_node(node));
    1334         230 :     return (FIB_NODE_BACK_WALK_CONTINUE);
    1335             : }
    1336             : 
    1337           0 : int lb_nat4_interface_add_del (u32 sw_if_index, int is_del)
    1338             : {
    1339           0 :   if (is_del)
    1340             :     {
    1341           0 :       vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
    1342             :                                    sw_if_index, 0, 0, 0);
    1343             :     }
    1344             :   else
    1345             :     {
    1346           0 :       vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
    1347             :                                    sw_if_index, 1, 0, 0);
    1348             :     }
    1349             : 
    1350           0 :   return 0;
    1351             : }
    1352             : 
    1353           0 : int lb_nat6_interface_add_del (u32 sw_if_index, int is_del)
    1354             : {
    1355           0 :   if (is_del)
    1356             :     {
    1357           0 :       vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
    1358             :                                    sw_if_index, 0, 0, 0);
    1359             :     }
    1360             :   else
    1361             :     {
    1362           0 :       vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
    1363             :                                    sw_if_index, 1, 0, 0);
    1364             :     }
    1365             : 
    1366           0 :   return 0;
    1367             : }
    1368             : 
    1369             : clib_error_t *
    1370         575 : lb_init (vlib_main_t * vm)
    1371             : {
    1372         575 :   vlib_thread_main_t *tm = vlib_get_thread_main ();
    1373         575 :   lb_main_t *lbm = &lb_main;
    1374         575 :   lbm->vnet_main = vnet_get_main ();
    1375         575 :   lbm->vlib_main = vm;
    1376             : 
    1377             :   lb_vip_t *default_vip;
    1378             :   lb_as_t *default_as;
    1379         575 :   fib_node_vft_t lb_fib_node_vft = {
    1380             :       .fnv_get = lb_fib_node_get_node,
    1381             :       .fnv_last_lock = lb_fib_node_last_lock_gone,
    1382             :       .fnv_back_walk = lb_fib_node_back_walk_notify,
    1383             :   };
    1384         575 :   dpo_vft_t lb_vft = {
    1385             :       .dv_lock = lb_dpo_lock,
    1386             :       .dv_unlock = lb_dpo_unlock,
    1387             :       .dv_format = format_lb_dpo,
    1388             :   };
    1389             : 
    1390             :   //Allocate and init default VIP.
    1391         575 :   lbm->vips = 0;
    1392         575 :   pool_get(lbm->vips, default_vip);
    1393         575 :   default_vip->new_flow_table_mask = 0;
    1394         575 :   default_vip->prefix.ip6.as_u64[0] = 0xffffffffffffffffL;
    1395         575 :   default_vip->prefix.ip6.as_u64[1] = 0xffffffffffffffffL;
    1396         575 :   default_vip->protocol = ~0;
    1397         575 :   default_vip->port = 0;
    1398         575 :   default_vip->flags = LB_VIP_FLAGS_USED;
    1399             : 
    1400         575 :   lbm->per_cpu = 0;
    1401         575 :   vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
    1402         575 :   clib_spinlock_init (&lbm->writer_lock);
    1403         575 :   lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS;
    1404         575 :   lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT;
    1405         575 :   lbm->ip4_src_address.as_u32 = 0xffffffff;
    1406         575 :   lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL;
    1407         575 :   lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
    1408         575 :   lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes);
    1409         575 :   lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes);
    1410         575 :   lbm->dpo_gre4_port_type = dpo_register_new_type(&lb_vft,
    1411             :                                                   lb_dpo_gre4_port_nodes);
    1412         575 :   lbm->dpo_gre6_port_type = dpo_register_new_type(&lb_vft,
    1413             :                                                   lb_dpo_gre6_port_nodes);
    1414         575 :   lbm->dpo_l3dsr_type = dpo_register_new_type(&lb_vft,
    1415             :                                               lb_dpo_l3dsr_nodes);
    1416         575 :   lbm->dpo_l3dsr_port_type = dpo_register_new_type(&lb_vft,
    1417             :                                                    lb_dpo_l3dsr_port_nodes);
    1418         575 :   lbm->dpo_nat4_port_type = dpo_register_new_type(&lb_vft,
    1419             :                                                   lb_dpo_nat4_port_nodes);
    1420         575 :   lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft,
    1421             :                                                   lb_dpo_nat6_port_nodes);
    1422         575 :   lbm->fib_node_type = fib_node_register_new_type ("lb", &lb_fib_node_vft);
    1423             : 
    1424             :   //Init AS reference counters
    1425         575 :   vlib_refcount_init(&lbm->as_refcount);
    1426             : 
    1427             :   //Allocate and init default AS.
    1428         575 :   lbm->ass = 0;
    1429         575 :   pool_get(lbm->ass, default_as);
    1430         575 :   default_as->flags = 0;
    1431         575 :   default_as->dpo.dpoi_next_node = LB_NEXT_DROP;
    1432         575 :   default_as->vip_index = ~0;
    1433         575 :   default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
    1434         575 :   default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;
    1435             : 
    1436             :   /* Generate a valid flow table for default VIP */
    1437         575 :   default_vip->as_indexes = NULL;
    1438         575 :   lb_get_writer_lock();
    1439         575 :   lb_vip_update_new_flow_table(default_vip);
    1440         575 :   lb_put_writer_lock();
    1441             : 
    1442             :   lbm->vip_index_by_nodeport
    1443         575 :     = hash_create_mem (0, sizeof(u16), sizeof (uword));
    1444             : 
    1445         575 :   clib_bihash_init_8_8 (&lbm->vip_index_per_port,
    1446             :                         "vip_index_per_port", LB_VIP_PER_PORT_BUCKETS,
    1447             :                         LB_VIP_PER_PORT_MEMORY_SIZE);
    1448             : 
    1449         575 :   clib_bihash_init_8_8 (&lbm->mapping_by_as4,
    1450             :                         "mapping_by_as4", LB_MAPPING_BUCKETS,
    1451             :                         LB_MAPPING_MEMORY_SIZE);
    1452             : 
    1453         575 :   clib_bihash_init_24_8 (&lbm->mapping_by_as6,
    1454             :                         "mapping_by_as6", LB_MAPPING_BUCKETS,
    1455             :                         LB_MAPPING_MEMORY_SIZE);
    1456             : 
    1457             : #define _(a,b,c) lbm->vip_counters[c].name = b;
    1458         575 :   lb_foreach_vip_counter
    1459             : #undef _
    1460             : 
    1461         575 :   lb_fib_src = fib_source_allocate("lb",
    1462             :                                    FIB_SOURCE_PRIORITY_HI,
    1463             :                                    FIB_SOURCE_BH_SIMPLE);
    1464             : 
    1465         575 :   return NULL;
    1466             : }
    1467             : 
    1468        1151 : VLIB_INIT_FUNCTION (lb_init);

Generated by: LCOV version 1.14