LCOV - code coverage report
Current view: top level - vnet/ip - ip_path_mtu.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 272 287 94.8 %
Date: 2023-10-26 01:39:38 Functions: 39 42 92.9 %

          Line data    Source code
       1             : /*
       2             :  *------------------------------------------------------------------
       3             :  * ip_path_mtu.c
       4             :  *
       5             :  * Copyright (c) 2021 Graphiant.
       6             :  * Licensed under the Apache License, Version 2.0 (the "License");
       7             :  * you may not use this file except in compliance with the License.
       8             :  * You may obtain a copy of the License at:
       9             :  *
      10             :  *     http://www.apache.org/licenses/LICENSE-2.0
      11             :  *
      12             :  * Unless required by applicable law or agreed to in writing, software
      13             :  * distributed under the License is distributed on an "AS IS" BASIS,
      14             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      15             :  * See the License for the specific language governing permissions and
      16             :  * limitations under the License.
      17             :  *------------------------------------------------------------------
      18             :  */
      19             : 
      20             : #include <vnet/ip/ip_path_mtu.h>
      21             : #include <vnet/ip/ip_frag.h>
      22             : #include <vnet/adj/adj_delegate.h>
      23             : #include <vnet/adj/adj_nbr.h>
      24             : #include <vnet/fib/fib_table.h>
      25             : #include <vnet/fib/fib_entry_track.h>
      26             : 
      27             : #include <vnet/dpo/drop_dpo.h>
      28             : 
      29             : /**
      30             :  * Path MTU
      31             :  *
      32             :  * A path is a peer. A peer is known by an IP address (in a table).
      33             :  * Insert a DPO in the forwarding chain for the peer to perform the
      34             :  * fragmentation.
      35             :  * For attached peers, all traffic will use the peer's adjacency, there
      36             :  * is already an MTU chekc in the adjacency (for the link's MTU) so as an
      37             :  * optimisation, instead of using a DPO, we add a delegate to the adjacency
      38             :  * to set the adjacency's MTU to the path MTU.
      39             :  */
      40             : 
      41             : /**
      42             :  * the logger
      43             :  */
      44             : static vlib_log_class_t ip_pmtu_logger;
      45             : 
      46             : static adj_delegate_type_t ip_pmtu_adj_delegate_type;
      47             : static fib_source_t ip_pmtu_source;
      48             : 
      49             : /**
      50             :  * DPO pool
      51             :  */
      52             : ip_pmtu_dpo_t *ip_pmtu_dpo_pool;
      53             : 
      54             : /**
      55             :  * DPO type registered for these GBP FWD
      56             :  */
      57             : static dpo_type_t ip_pmtu_dpo_type;
      58             : 
      59             : /**
      60             :  * Fib node type for the tracker
      61             :  */
      62             : static fib_node_type_t ip_pmtu_fib_type;
      63             : 
      64             : /**
      65             :  * Path MTU tracker pool
      66             :  */
      67             : ip_pmtu_t *ip_pmtu_pool;
      68             : 
      69             : /**
      70             :  * Delegate added to adjacencies to track path MTU
      71             :  */
      72             : typedef struct ip_path_mtu_adj_delegate_t_
      73             : {
      74             :   u16 pmtu;
      75             : } ip_path_mtu_adj_delegate_t;
      76             : 
      77             : static ip_path_mtu_adj_delegate_t *ip_path_mtu_adj_delegate_pool;
      78             : 
      79             : /* DB of all FIB PMTU settings */
      80             : typedef struct ip_pmtu_key_t_
      81             : {
      82             :   ip46_address_t nh;
      83             :   u32 table_id;
      84             :   fib_protocol_t fproto;
      85             : } __clib_packed ip_pmtu_key_t;
      86             : 
      87             : static uword *ip_pmtu_db;
      88             : 
      89             : #define IP_PMTU_TRKR_DBG(_ipt, _fmt, _args...)                                \
      90             :   {                                                                           \
      91             :     vlib_log_debug (ip_pmtu_logger, "[%U]: " _fmt ": ", format_ip_pmtu,       \
      92             :                     _ipt - ip_pmtu_pool, ##_args);                            \
      93             :   }
      94             : #define IP_PMTU_DBG(_fmt, _args...)                                           \
      95             :   {                                                                           \
      96             :     vlib_log_debug (ip_pmtu_logger, _fmt ": ", ##_args);                      \
      97             :   }
      98             : 
      99             : static u8 *
     100          46 : format_ip_pmtu_flags (u8 *s, va_list *ap)
     101             : {
     102          46 :   ip_pmtu_flags_t f = va_arg (*ap, ip_pmtu_flags_t);
     103             : 
     104             :   if (0)
     105             :     ;
     106             : #define _(a, b, c) else if (f & IP_PMTU_FLAG_##a) s = format (s, "%s ", c);
     107          46 :   foreach_ip_pmtu_flag
     108             : #undef _
     109             : 
     110          46 :     return (s);
     111             : }
     112             : 
     113             : u32
     114         457 : ip_pmtu_get_table_id (const ip_pmtu_t *ipt)
     115             : {
     116             :   const fib_prefix_t *pfx;
     117             :   u32 fib_index;
     118             : 
     119         457 :   pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
     120         457 :   fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry);
     121             : 
     122         457 :   return (fib_table_get_table_id (fib_index, pfx->fp_proto));
     123             : }
     124             : 
     125             : void
     126         411 : ip_pmtu_get_ip (const ip_pmtu_t *ipt, ip_address_t *ip)
     127             : {
     128             :   const fib_prefix_t *pfx;
     129             : 
     130         411 :   pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
     131         411 :   ip_address_from_46 (&pfx->fp_addr, pfx->fp_proto, ip);
     132         411 : }
     133             : 
     134             : static u8 *
     135          46 : format_ip_pmtu (u8 *s, va_list *ap)
     136             : {
     137             :   ip_pmtu_t *ipt;
     138          46 :   index_t ipti = va_arg (*ap, index_t);
     139             :   const fib_prefix_t *pfx;
     140             :   u32 fib_index;
     141             : 
     142          46 :   ipt = pool_elt_at_index (ip_pmtu_pool, ipti);
     143          46 :   pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
     144          46 :   fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry);
     145             : 
     146             :   s =
     147          46 :     format (s, "[%d] [tbl:[%d:%d]] %U pmtu:[cfg:%d, oper:%d, parent:%d] [%U]",
     148             :             ipti, ip_pmtu_get_table_id (ipt), fib_index, format_fib_prefix,
     149          46 :             pfx, ipt->ipt_cfg_pmtu, ipt->ipt_oper_pmtu, ipt->ipt_parent_pmtu,
     150          46 :             format_ip_pmtu_flags, ipt->ipt_flags);
     151             : 
     152          46 :   return (s);
     153             : }
     154             : 
     155             : static u8 *
     156           2 : format_ip_path_mtu_adj_delegate (const adj_delegate_t *aed, u8 *s)
     157             : {
     158             :   ip_path_mtu_adj_delegate_t *ip_adj;
     159             : 
     160           2 :   ip_adj = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, aed->ad_index);
     161             : 
     162           2 :   s = format (s, "IP path-MTU: %d", ip_adj->pmtu);
     163             : 
     164           2 :   return (s);
     165             : }
     166             : 
     167             : static void
     168       47098 : ip_pmtu_adj_delegate_adj_created (adj_index_t ai)
     169             : {
     170             :   ip_path_mtu_adj_delegate_t *ipp_ad;
     171             :   const ip_pmtu_t *ipt;
     172             :   ip_adjacency_t *adj;
     173             :   u32 table_id;
     174             :   uword *p;
     175             : 
     176       47098 :   adj = adj_get (ai);
     177             : 
     178       47098 :   switch (adj->lookup_next_index)
     179             :     {
     180       36897 :     case IP_LOOKUP_NEXT_DROP:
     181             :     case IP_LOOKUP_NEXT_PUNT:
     182             :     case IP_LOOKUP_NEXT_LOCAL:
     183             :     case IP_LOOKUP_NEXT_GLEAN:
     184             :     case IP_LOOKUP_NEXT_MCAST:
     185             :     case IP_LOOKUP_NEXT_BCAST:
     186             :     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
     187             :     case IP_LOOKUP_NEXT_ICMP_ERROR:
     188             :     case IP_LOOKUP_N_NEXT:
     189       47095 :       return;
     190             : 
     191       10201 :     case IP_LOOKUP_NEXT_ARP:
     192             :     case IP_LOOKUP_NEXT_REWRITE:
     193             :     case IP_LOOKUP_NEXT_MIDCHAIN:
     194       10201 :       break;
     195             :     }
     196             : 
     197       10201 :   table_id = fib_table_get_table_id_for_sw_if_index (
     198       10201 :     adj->ia_nh_proto, adj->rewrite_header.sw_if_index);
     199             : 
     200       10201 :   ip_pmtu_key_t key = {
     201             :     .nh = adj->sub_type.nbr.next_hop,
     202             :     .table_id = table_id,
     203       10201 :     .fproto = adj->ia_nh_proto,
     204             :   };
     205             : 
     206       10201 :   p = hash_get_mem (ip_pmtu_db, &key);
     207             : 
     208       10201 :   if (NULL == p)
     209       10198 :     return;
     210             : 
     211           3 :   ipt = pool_elt_at_index (ip_pmtu_pool, p[0]);
     212             : 
     213           3 :   pool_get (ip_path_mtu_adj_delegate_pool, ipp_ad);
     214           3 :   ipp_ad->pmtu = ipt->ipt_cfg_pmtu;
     215             : 
     216           3 :   adj_delegate_add (adj, ip_pmtu_adj_delegate_type,
     217           3 :                     ipp_ad - ip_path_mtu_adj_delegate_pool);
     218             : 
     219           3 :   adj_nbr_set_mtu (ai, ipp_ad->pmtu);
     220             : 
     221           3 :   IP_PMTU_TRKR_DBG (ipt, "adj-added:", ai);
     222             : }
     223             : 
     224             : static void
     225           2 : ip_pmtu_adj_delegate_adj_deleted (adj_delegate_t *ad)
     226             : {
     227           2 :   pool_put_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
     228           2 : }
     229             : 
     230             : static void
     231          13 : ip_pmtu_adj_delegate_adj_modified (adj_delegate_t *ad)
     232             : {
     233             :   ip_path_mtu_adj_delegate_t *ipp_ad;
     234             : 
     235          13 :   ipp_ad = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
     236             : 
     237          13 :   adj_nbr_set_mtu (ad->ad_adj_index, ipp_ad->pmtu);
     238          13 : }
     239             : 
     240             : const adj_delegate_vft_t ip_path_adj_delegate_vft = {
     241             :   .adv_format = format_ip_path_mtu_adj_delegate,
     242             :   .adv_adj_deleted = ip_pmtu_adj_delegate_adj_deleted,
     243             :   .adv_adj_modified = ip_pmtu_adj_delegate_adj_modified,
     244             :   .adv_adj_created = ip_pmtu_adj_delegate_adj_created,
     245             : };
     246             : 
     247             : static bool
     248          47 : ip_path_mtu_value_invalid (u16 pmtu)
     249             : {
     250          47 :   return (pmtu == 0 || pmtu == 0xffff);
     251             : }
     252             : 
     253             : static adj_walk_rc_t
     254          17 : ip_ptmu_adj_walk_remove (adj_index_t ai, void *ctx)
     255             : {
     256             :   adj_delegate_t *ad;
     257             : 
     258          17 :   ad = adj_delegate_get (adj_get (ai), ip_pmtu_adj_delegate_type);
     259             : 
     260          17 :   if (ad)
     261             :     {
     262          17 :       adj_nbr_set_mtu (ai, 0);
     263             : 
     264          17 :       pool_put_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
     265          17 :       adj_delegate_remove (ai, ip_pmtu_adj_delegate_type);
     266             :     }
     267          17 :   return (ADJ_WALK_RC_CONTINUE);
     268             : }
     269             : 
     270             : static adj_walk_rc_t
     271          54 : ip_ptmu_adj_walk_update (adj_index_t ai, void *ctx)
     272             : {
     273             :   ip_path_mtu_adj_delegate_t *ipp_ad;
     274             :   adj_delegate_t *ad;
     275             :   u16 *pmtup;
     276             : 
     277          54 :   pmtup = ctx;
     278          54 :   ad = adj_delegate_get (adj_get (ai), ip_pmtu_adj_delegate_type);
     279             : 
     280          54 :   if (ad)
     281          37 :     ipp_ad = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
     282             :   else
     283             :     {
     284          17 :       pool_get (ip_path_mtu_adj_delegate_pool, ipp_ad);
     285             : 
     286          17 :       adj_delegate_add (adj_get (ai), ip_pmtu_adj_delegate_type,
     287          17 :                         ipp_ad - ip_path_mtu_adj_delegate_pool);
     288             :     }
     289             : 
     290          54 :   ipp_ad->pmtu = *pmtup;
     291             : 
     292          54 :   adj_nbr_set_mtu (ai, ipp_ad->pmtu);
     293             : 
     294          54 :   return (ADJ_WALK_RC_CONTINUE);
     295             : }
     296             : 
     297             : static ip_pmtu_dpo_t *
     298          11 : ip_pmtu_dpo_alloc (void)
     299             : {
     300          11 :   vlib_main_t *vm = vlib_get_main ();
     301          11 :   u8 need_barrier_sync = pool_get_will_expand (ip_pmtu_dpo_pool);
     302             :   ip_pmtu_dpo_t *ipm;
     303             : 
     304             : 
     305          11 :   if (need_barrier_sync)
     306           2 :     vlib_worker_thread_barrier_sync (vm);
     307             : 
     308          11 :   pool_get_aligned_zero (ip_pmtu_dpo_pool, ipm, sizeof (ip_pmtu_dpo_t));
     309             : 
     310          11 :   if (need_barrier_sync)
     311           2 :     vlib_worker_thread_barrier_release (vm);
     312             : 
     313          11 :   return (ipm);
     314             : }
     315             : 
     316             : static ip_pmtu_dpo_t *
     317          86 : ip_pmtu_dpo_get_from_dpo (const dpo_id_t *dpo)
     318             : {
     319          86 :   ASSERT (ip_pmtu_dpo_type == dpo->dpoi_type);
     320             : 
     321          86 :   return (ip_pmtu_dpo_get (dpo->dpoi_index));
     322             : }
     323             : 
     324             : static index_t
     325          11 : ip_pmtu_dpo_get_index (ip_pmtu_dpo_t *ipm)
     326             : {
     327          11 :   return (ipm - ip_pmtu_dpo_pool);
     328             : }
     329             : 
     330             : static void
     331          43 : ip_pmtu_dpo_lock (dpo_id_t *dpo)
     332             : {
     333             :   ip_pmtu_dpo_t *ipm;
     334             : 
     335          43 :   ipm = ip_pmtu_dpo_get_from_dpo (dpo);
     336          43 :   ipm->ipm_locks++;
     337          43 : }
     338             : 
     339             : static void
     340          43 : ip_pmtu_dpo_unlock (dpo_id_t *dpo)
     341             : {
     342             :   ip_pmtu_dpo_t *ipm;
     343             : 
     344          43 :   ipm = ip_pmtu_dpo_get_from_dpo (dpo);
     345          43 :   ipm->ipm_locks--;
     346             : 
     347          43 :   if (0 == ipm->ipm_locks)
     348             :     {
     349          11 :       dpo_reset (&ipm->ipm_dpo);
     350          11 :       pool_put (ip_pmtu_dpo_pool, ipm);
     351             :     }
     352          43 : }
     353             : 
     354             : static u32
     355           0 : ip_pmtu_dpo_get_urpf (const dpo_id_t *dpo)
     356             : {
     357             :   ip_pmtu_dpo_t *ipm;
     358             : 
     359           0 :   ipm = ip_pmtu_dpo_get_from_dpo (dpo);
     360             : 
     361           0 :   return (dpo_get_urpf (&ipm->ipm_dpo));
     362             : }
     363             : 
     364             : void
     365           5 : ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent, dpo_id_t *dpo)
     366             : {
     367             :   ip_pmtu_dpo_t *ipm;
     368             : 
     369           5 :   ipm = ip_pmtu_dpo_alloc ();
     370             : 
     371           5 :   ipm->ipm_proto = parent->dpoi_proto;
     372           5 :   ipm->ipm_pmtu = pmtu;
     373             : 
     374           5 :   dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, parent);
     375           5 :   dpo_set (dpo, ip_pmtu_dpo_type, ipm->ipm_proto, ip_pmtu_dpo_get_index (ipm));
     376           5 : }
     377             : 
     378             : u8 *
     379           8 : format_ip_pmtu_dpo (u8 *s, va_list *ap)
     380             : {
     381           8 :   index_t index = va_arg (*ap, index_t);
     382           8 :   u32 indent = va_arg (*ap, u32);
     383           8 :   ip_pmtu_dpo_t *ipm = ip_pmtu_dpo_get (index);
     384             : 
     385           8 :   s = format (s, "ip-pmtu-dpo: %U, mtu:%d", format_dpo_proto, ipm->ipm_proto,
     386           8 :               ipm->ipm_pmtu);
     387           8 :   s = format (s, "\n%U", format_white_space, indent + 2);
     388           8 :   s = format (s, "%U", format_dpo_id, &ipm->ipm_dpo, indent + 4);
     389             : 
     390           8 :   return (s);
     391             : }
     392             : 
     393             : /**
     394             :  * Interpose a path MTU DPO
     395             :  */
     396             : static void
     397           6 : ip_pmtu_dpo_interpose (const dpo_id_t *original, const dpo_id_t *parent,
     398             :                        dpo_id_t *clone)
     399             : {
     400             :   ip_pmtu_dpo_t *ipm, *ipm_clone;
     401             : 
     402           6 :   ipm_clone = ip_pmtu_dpo_alloc ();
     403           6 :   ipm = ip_pmtu_dpo_get (original->dpoi_index);
     404             : 
     405           6 :   ipm_clone->ipm_proto = ipm->ipm_proto;
     406           6 :   ipm_clone->ipm_pmtu = ipm->ipm_pmtu;
     407             : 
     408           6 :   dpo_stack (ip_pmtu_dpo_type, ipm_clone->ipm_proto, &ipm_clone->ipm_dpo,
     409             :              parent);
     410           6 :   dpo_set (clone, ip_pmtu_dpo_type, ipm_clone->ipm_proto,
     411             :            ip_pmtu_dpo_get_index (ipm_clone));
     412           6 : }
     413             : 
     414             : static u16
     415           0 : ip_pmtu_dpo_get_mtu (const dpo_id_t *dpo)
     416             : {
     417             :   ip_pmtu_dpo_t *ipd;
     418             : 
     419           0 :   ipd = pool_elt_at_index (ip_pmtu_dpo_pool, dpo->dpoi_index);
     420             : 
     421           0 :   return (ipd->ipm_pmtu);
     422             : }
     423             : 
     424             : const static dpo_vft_t ip_pmtu_dpo_vft = {
     425             :   .dv_lock = ip_pmtu_dpo_lock,
     426             :   .dv_unlock = ip_pmtu_dpo_unlock,
     427             :   .dv_format = format_ip_pmtu_dpo,
     428             :   .dv_get_urpf = ip_pmtu_dpo_get_urpf,
     429             :   .dv_mk_interpose = ip_pmtu_dpo_interpose,
     430             :   .dv_get_mtu = ip_pmtu_dpo_get_mtu,
     431             : };
     432             : 
     433             : /**
     434             :  * @brief The per-protocol VLIB graph nodes that are assigned to a glean
     435             :  *        object.
     436             :  *
     437             :  * this means that these graph nodes are ones from which a glean is the
     438             :  * parent object in the DPO-graph.
     439             :  */
     440             : const static char *const ip_pmtu_dpo_ip4_nodes[] = {
     441             :   "ip4-pmtu-dpo",
     442             :   NULL,
     443             : };
     444             : 
     445             : const static char *const ip_pmtu_dpo_ip6_nodes[] = {
     446             :   "ip6-pmtu-dpo",
     447             :   NULL,
     448             : };
     449             : 
     450             : const static char *const *const ip_pmtu_dpo_nodes[DPO_PROTO_NUM] = {
     451             :   [DPO_PROTO_IP4] = ip_pmtu_dpo_ip4_nodes,
     452             :   [DPO_PROTO_IP6] = ip_pmtu_dpo_ip6_nodes,
     453             : };
     454             : 
     455             : static bool
     456          64 : ip_mtu_fib_entry_is_attached (fib_node_index_t fib_entry)
     457             : {
     458             :   const fib_prefix_t *pfx;
     459             :   u32 cover, fib_index;
     460             : 
     461          64 :   fib_index = fib_entry_get_fib_index (fib_entry);
     462          64 :   pfx = fib_entry_get_prefix (fib_entry);
     463             : 
     464             :   /*
     465             :    * If the tracked prefix's cover is attached, then all packets that
     466             :    * are forwarded to this neighbour will use the adjacency, this is a
     467             :    * more efficient place to perform the MTU check and fragging
     468             :    */
     469          64 :   cover = fib_table_get_less_specific (fib_index, pfx);
     470             : 
     471          75 :   return (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags (cover) ||
     472          11 :           FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags (fib_entry));
     473             : }
     474             : 
     475             : static index_t
     476          19 : ip_pmtu_alloc (u32 fib_index, const fib_prefix_t *pfx,
     477             :                const ip_pmtu_key_t *key, u16 pmtu)
     478             : {
     479          19 :   dpo_id_t ip_dpo = DPO_INVALID;
     480             :   ip_pmtu_t *ipt;
     481             :   fib_node_index_t cover;
     482             :   const dpo_id_t *lb_dpo;
     483             :   index_t ipti;
     484             : 
     485          19 :   pool_get (ip_pmtu_pool, ipt);
     486          19 :   fib_node_init (&(ipt->ipt_node), ip_pmtu_fib_type);
     487             : 
     488          19 :   ipti = ipt - ip_pmtu_pool;
     489          19 :   hash_set_mem_alloc (&ip_pmtu_db, key, ipti);
     490             : 
     491          19 :   ipt->ipt_cfg_pmtu = pmtu;
     492          38 :   ipt->ipt_fib_entry = fib_entry_track (fib_index, pfx, ip_pmtu_fib_type, ipti,
     493          19 :                                         &ipt->ipt_sibling);
     494             : 
     495             :   /*
     496             :    * If the tracked prefix's cover is attached, then all packets that
     497             :    * are forwarded to this neighbour will use the adjacency, this is a
     498             :    * more efficient place to perform the MTU check and fragging
     499             :    */
     500          19 :   cover = fib_table_get_less_specific (fib_index, pfx);
     501             : 
     502          19 :   if (ip_mtu_fib_entry_is_attached (ipt->ipt_fib_entry))
     503             :     {
     504             :       u32 sw_if_index;
     505             : 
     506          18 :       ipt->ipt_flags |= IP_PMTU_FLAG_ATTACHED;
     507          18 :       ipt->ipt_oper_pmtu = ipt->ipt_cfg_pmtu;
     508             : 
     509          18 :       sw_if_index = fib_entry_get_resolving_interface (cover);
     510             : 
     511             :       /* walk all adjs to add/update delegate */
     512          18 :       adj_nbr_walk_nh (sw_if_index, pfx->fp_proto, &pfx->fp_addr,
     513          18 :                        ip_ptmu_adj_walk_update, &ipt->ipt_oper_pmtu);
     514             :     }
     515             :   else
     516             :     {
     517           1 :       ipt->ipt_flags |= IP_PMTU_FLAG_REMOTE;
     518             : 
     519           1 :       lb_dpo = fib_entry_contribute_ip_forwarding (ipt->ipt_fib_entry);
     520             : 
     521           1 :       ipt->ipt_oper_pmtu = clib_min (dpo_get_mtu (lb_dpo), ipt->ipt_cfg_pmtu);
     522             : 
     523             :       /*
     524             :        * interpose a policy DPO from the nh so that MTU is applied
     525             :        */
     526           1 :       ip_pmtu_dpo_add_or_lock (ipt->ipt_oper_pmtu,
     527           1 :                                drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)),
     528             :                                &ip_dpo);
     529             : 
     530           1 :       fib_table_entry_special_dpo_add (fib_index, pfx, ip_pmtu_source,
     531             :                                        FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
     532           1 :       dpo_reset (&ip_dpo);
     533             :     }
     534             : 
     535          19 :   IP_PMTU_TRKR_DBG (ipt, "create");
     536             : 
     537          19 :   return (ipti);
     538             : }
     539             : 
     540             : static void
     541          45 : ip_pmtu_stack (ip_pmtu_t *ipt)
     542             : {
     543             :   bool was_attached, is_attached;
     544             :   const fib_prefix_t *pfx;
     545             :   u32 fib_index;
     546             : 
     547          45 :   pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
     548          45 :   fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry);
     549             : 
     550          45 :   was_attached = !!(ipt->ipt_flags & IP_PMTU_FLAG_ATTACHED);
     551          45 :   is_attached = ip_mtu_fib_entry_is_attached (ipt->ipt_fib_entry);
     552             : 
     553          45 :   if (was_attached && !is_attached)
     554             :     {
     555             :       /* transition from attached to remote - walk all adjs to remove delegate
     556             :        */
     557           1 :       adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry),
     558           1 :                        pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_remove,
     559           1 :                        &ipt->ipt_oper_pmtu);
     560           1 :       ipt->ipt_flags &= ~IP_PMTU_FLAG_ATTACHED;
     561             :     }
     562          45 :   if (!was_attached && is_attached)
     563             :     {
     564             :       /* transition from remote to attached - remove the DPO */
     565           1 :       fib_table_entry_special_remove (fib_index, pfx, ip_pmtu_source);
     566           1 :       ipt->ipt_flags &= ~IP_PMTU_FLAG_REMOTE;
     567             :     }
     568             : 
     569          45 :   if (is_attached)
     570             :     {
     571             :       /* walk all adjs to add/update delegate */
     572          39 :       ipt->ipt_oper_pmtu = ipt->ipt_cfg_pmtu;
     573          39 :       adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry),
     574          39 :                        pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_update,
     575          39 :                        &ipt->ipt_oper_pmtu);
     576          39 :       ipt->ipt_flags |= IP_PMTU_FLAG_ATTACHED;
     577             :     }
     578             :   else
     579             :     {
     580             :       const dpo_id_t *lb_dpo;
     581             :       u16 dpo_mtu;
     582             : 
     583           6 :       fib_table_entry_special_remove (fib_index, pfx, ip_pmtu_source);
     584             : 
     585           6 :       ipt->ipt_flags |= IP_PMTU_FLAG_REMOTE;
     586           6 :       lb_dpo = fib_entry_contribute_ip_forwarding (ipt->ipt_fib_entry);
     587           6 :       dpo_mtu = dpo_get_mtu (lb_dpo);
     588             : 
     589           6 :       ipt->ipt_oper_pmtu = clib_min (dpo_mtu, ipt->ipt_cfg_pmtu);
     590             : 
     591             :       /*
     592             :        * if the configured path-MTU is less that the egress/interface then
     593             :        * interpose a policy DPO from the nh so that MTU is applied
     594             :        */
     595           6 :       if (ipt->ipt_oper_pmtu < dpo_mtu)
     596             :         {
     597           4 :           dpo_id_t ip_dpo = DPO_INVALID;
     598             : 
     599           4 :           ip_pmtu_dpo_add_or_lock (
     600           4 :             ipt->ipt_oper_pmtu,
     601           4 :             drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)), &ip_dpo);
     602             : 
     603           4 :           fib_table_entry_special_dpo_update (
     604             :             fib_index, pfx, ip_pmtu_source, FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
     605           4 :           dpo_reset (&ip_dpo);
     606             :         }
     607             :     }
     608          45 :   IP_PMTU_TRKR_DBG (ipt, "stack");
     609          45 : }
     610             : 
     611             : static void
     612          10 : ip_pmtu_update (index_t ipti, u16 pmtu)
     613             : {
     614             :   ip_pmtu_t *ipt;
     615             : 
     616          10 :   ipt = pool_elt_at_index (ip_pmtu_pool, ipti);
     617          10 :   ipt->ipt_flags &= ~IP_PMTU_FLAG_STALE;
     618          10 :   ipt->ipt_cfg_pmtu = pmtu;
     619             : 
     620          10 :   ip_pmtu_stack (ipt);
     621          10 : }
     622             : 
     623             : static index_t
     624          18 : ip_pmtu_destroy (index_t ipti, const ip_pmtu_key_t *key)
     625             : {
     626             :   ip_pmtu_t *ipt;
     627             :   const fib_prefix_t *pfx;
     628             : 
     629          18 :   ipt = pool_elt_at_index (ip_pmtu_pool, ipti);
     630          18 :   pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
     631             : 
     632          18 :   IP_PMTU_TRKR_DBG (ipt, "destroy");
     633             : 
     634          18 :   if (ipt->ipt_flags & IP_PMTU_FLAG_REMOTE)
     635           0 :     fib_table_entry_special_remove (
     636             :       fib_entry_get_fib_index (ipt->ipt_fib_entry), pfx, ip_pmtu_source);
     637             :   else
     638             :     /* remove the delegate from all the adjacencies */
     639          18 :     adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry),
     640          18 :                      pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_remove,
     641             :                      NULL);
     642             : 
     643             :   /*
     644             :    * Drop the fib entry we're tracking
     645             :    */
     646          18 :   fib_entry_untrack (ipt->ipt_fib_entry, ipt->ipt_sibling);
     647             : 
     648             :   /*
     649             :    * remove from DB and return to pool
     650             :    */
     651          18 :   hash_unset_mem_free (&ip_pmtu_db, key);
     652          18 :   pool_put (ip_pmtu_pool, ipt);
     653             : 
     654          18 :   return (ipti);
     655             : }
     656             : 
     657             : int
     658          47 : ip_path_mtu_update (const ip_address_t *nh, u32 table_id, u16 pmtu)
     659             : {
     660             :   fib_prefix_t pfx;
     661             :   u32 fib_index;
     662             :   uword *p;
     663             : 
     664          47 :   ip_address_to_fib_prefix (nh, &pfx);
     665          47 :   fib_index = fib_table_find (pfx.fp_proto, table_id);
     666             : 
     667          47 :   if (~0 == fib_index)
     668           0 :     return (VNET_API_ERROR_NO_SUCH_TABLE);
     669             : 
     670          47 :   ip_pmtu_key_t key = {
     671          47 :     .fproto = pfx.fp_proto,
     672             :     .table_id = table_id,
     673             :     .nh = pfx.fp_addr,
     674             :   };
     675             : 
     676          47 :   p = hash_get_mem (ip_pmtu_db, &key);
     677             : 
     678          47 :   if (!ip_path_mtu_value_invalid (pmtu))
     679             :     {
     680             :       /* Add or update of path MTU */
     681          29 :       if (NULL == p)
     682          19 :         ip_pmtu_alloc (fib_index, &pfx, &key, pmtu);
     683             :       else
     684          10 :         ip_pmtu_update (p[0], pmtu);
     685             :     }
     686             :   else
     687             :     {
     688          18 :       if (NULL != p)
     689          18 :         ip_pmtu_destroy (p[0], &key);
     690             :     }
     691             : 
     692          47 :   return (0);
     693             : }
     694             : 
     695             : static walk_rc_t
     696           2 : ip_path_mtu_walk_mark (index_t ipti, void *ctx)
     697             : {
     698             :   ip_pmtu_t *ipt;
     699             : 
     700           2 :   ipt = ip_path_mtu_get (ipti);
     701             : 
     702           2 :   ipt->ipt_flags |= IP_PMTU_FLAG_STALE;
     703             : 
     704           2 :   return (WALK_CONTINUE);
     705             : }
     706             : 
     707             : typedef struct ip_path_mtu_walk_sweep_ctx_t_
     708             : {
     709             :   index_t *indicies;
     710             : } ip_path_mtu_walk_sweep_ctx_t;
     711             : 
     712             : static walk_rc_t
     713           2 : ip_path_mtu_walk_sweep (index_t ipti, void *arg)
     714             : {
     715           2 :   ip_path_mtu_walk_sweep_ctx_t *ctx = arg;
     716             :   ip_pmtu_t *ipt;
     717             : 
     718           2 :   ipt = ip_path_mtu_get (ipti);
     719             : 
     720           2 :   if (ipt->ipt_flags & IP_PMTU_FLAG_STALE)
     721             :     {
     722           1 :       vec_add1 (ctx->indicies, ipti);
     723             :     }
     724             : 
     725           2 :   return (WALK_CONTINUE);
     726             : }
     727             : 
     728             : int
     729           2 : ip_path_mtu_replace_begin (void)
     730             : {
     731           2 :   IP_PMTU_DBG ("replace-begin");
     732             : 
     733           2 :   ip_path_mtu_walk (ip_path_mtu_walk_mark, NULL);
     734             : 
     735           2 :   return (0);
     736             : }
     737             : 
     738             : int
     739           2 : ip_path_mtu_replace_end (void)
     740             : {
     741             :   index_t *ipti;
     742             : 
     743           2 :   IP_PMTU_DBG ("replace-end");
     744             : 
     745             :   /*
     746             :    * not safe to walk the pool whilst deleting, so create
     747             :    * temporary storage of stale entries
     748             :    */
     749           2 :   ip_path_mtu_walk_sweep_ctx_t ctx = {
     750             :     .indicies = NULL,
     751             :   };
     752             : 
     753           2 :   ip_path_mtu_walk (ip_path_mtu_walk_sweep, &ctx);
     754             : 
     755           3 :   vec_foreach (ipti, ctx.indicies)
     756             :     {
     757             :       ip_pmtu_t *ipt;
     758             :       ip_address_t ip;
     759             : 
     760           1 :       ipt = ip_path_mtu_get (*ipti);
     761           1 :       ip_pmtu_get_ip (ipt, &ip);
     762           1 :       ip_path_mtu_update (&ip, ip_pmtu_get_table_id (ipt), 0);
     763             :     }
     764             : 
     765           2 :   vec_free (ctx.indicies);
     766             : 
     767           2 :   return (0);
     768             : }
     769             : 
     770             : void
     771           4 : ip_path_mtu_walk (ip_path_mtu_walk_t fn, void *ctx)
     772             : {
     773             :   index_t ipmi;
     774             : 
     775           8 :   pool_foreach_index (ipmi, ip_pmtu_pool)
     776             :     {
     777           4 :       if (WALK_STOP == fn (ipmi, ctx))
     778           0 :         break;
     779             :     }
     780           4 : }
     781             : 
     782             : static fib_node_t *
     783          35 : ip_pmtu_get_node (fib_node_index_t index)
     784             : {
     785             :   ip_pmtu_t *ipt;
     786             : 
     787          35 :   ipt = pool_elt_at_index (ip_pmtu_pool, index);
     788             : 
     789          35 :   return (&(ipt->ipt_node));
     790             : }
     791             : 
     792             : static ip_pmtu_t *
     793          35 : ip_pmtu_get_from_node (fib_node_t *node)
     794             : {
     795             :   return (
     796          35 :     (ip_pmtu_t *) (((char *) node) - STRUCT_OFFSET_OF (ip_pmtu_t, ipt_node)));
     797             : }
     798             : 
     799             : static void
     800           0 : ip_pmtu_last_lock_gone (fib_node_t *node)
     801             : {
     802             :   /*
     803             :    * the lifetime of the entry is managed by the API.
     804             :    */
     805           0 :   ASSERT (0);
     806           0 : }
     807             : 
     808             : /*
     809             :  * A back walk has reached this BIER entry
     810             :  */
     811             : static fib_node_back_walk_rc_t
     812          35 : ip_pmtu_back_walk_notify (fib_node_t *node, fib_node_back_walk_ctx_t *ctx)
     813             : {
     814             :   /*
     815             :    * re-populate the ECMP tables with new choices
     816             :    */
     817          35 :   ip_pmtu_t *ipr = ip_pmtu_get_from_node (node);
     818             : 
     819          35 :   ip_pmtu_stack (ipr);
     820             : 
     821             :   /*
     822             :    * no need to propagate further up the graph, since there's nothing there
     823             :    */
     824          35 :   return (FIB_NODE_BACK_WALK_CONTINUE);
     825             : }
     826             : 
     827             : static const fib_node_vft_t ip_ptmu_fib_node_vft = {
     828             :   .fnv_get = ip_pmtu_get_node,
     829             :   .fnv_last_lock = ip_pmtu_last_lock_gone,
     830             :   .fnv_back_walk = ip_pmtu_back_walk_notify,
     831             : };
     832             : 
     833             : static clib_error_t *
     834         575 : ip_path_module_init (vlib_main_t *vm)
     835             : {
     836         575 :   ip_pmtu_adj_delegate_type =
     837         575 :     adj_delegate_register_new_type (&ip_path_adj_delegate_vft);
     838         575 :   ip_pmtu_source = fib_source_allocate ("path-mtu", FIB_SOURCE_PRIORITY_HI,
     839             :                                         FIB_SOURCE_BH_SIMPLE);
     840         575 :   ip_pmtu_fib_type =
     841         575 :     fib_node_register_new_type ("ip-pmtu", &ip_ptmu_fib_node_vft);
     842             : 
     843         575 :   ip_pmtu_db = hash_create_mem (0, sizeof (ip_pmtu_key_t), sizeof (index_t));
     844         575 :   ip_pmtu_logger = vlib_log_register_class ("ip", "pmtu");
     845         575 :   ip_pmtu_dpo_type =
     846         575 :     dpo_register_new_type (&ip_pmtu_dpo_vft, ip_pmtu_dpo_nodes);
     847             : 
     848         575 :   return (NULL);
     849             : }
     850             : 
     851       46079 : VLIB_INIT_FUNCTION (ip_path_module_init);
     852             : 
     853             : static clib_error_t *
     854           2 : show_ip_pmtu_command (vlib_main_t *vm, unformat_input_t *input,
     855             :                       vlib_cli_command_t *cmd)
     856             : {
     857             :   index_t ipti;
     858             : 
     859           2 :   if (unformat (input, "%d", &ipti))
     860             :     {
     861             :       /*
     862             :        * show one in detail
     863             :        */
     864           0 :       if (!pool_is_free_index (ip_pmtu_pool, ipti))
     865           0 :         vlib_cli_output (vm, "%U", format_ip_pmtu, ipti);
     866             :       else
     867           0 :         vlib_cli_output (vm, "entry %d invalid", ipti);
     868             :     }
     869             :   else
     870             :     {
     871             :       /*
     872             :        * show all
     873             :        */
     874          19 :       pool_foreach_index (ipti, ip_pmtu_pool)
     875             :         {
     876          17 :           vlib_cli_output (vm, "%U", format_ip_pmtu, ipti);
     877             :         }
     878             :     }
     879             : 
     880           2 :   return (NULL);
     881             : }
     882             : 
     883      285289 : VLIB_CLI_COMMAND (show_fib_entry, static) = {
     884             :   .path = "show ip pmtu",
     885             :   .function = show_ip_pmtu_command,
     886             :   .short_help = "show ip path MTU",
     887             : };
     888             : 
     889             : /*
     890             :  * fd.io coding-style-patch-verification: ON
     891             :  *
     892             :  * Local Variables:
     893             :  * eval: (c-set-style "gnu")
     894             :  * End:
     895             :  */

Generated by: LCOV version 1.14