Line data Source code
1 : /*
2 : *------------------------------------------------------------------
3 : * ip_path_mtu.c
4 : *
5 : * Copyright (c) 2021 Graphiant.
6 : * Licensed under the Apache License, Version 2.0 (the "License");
7 : * you may not use this file except in compliance with the License.
8 : * You may obtain a copy of the License at:
9 : *
10 : * http://www.apache.org/licenses/LICENSE-2.0
11 : *
12 : * Unless required by applicable law or agreed to in writing, software
13 : * distributed under the License is distributed on an "AS IS" BASIS,
14 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 : * See the License for the specific language governing permissions and
16 : * limitations under the License.
17 : *------------------------------------------------------------------
18 : */
19 :
20 : #include <vnet/ip/ip_path_mtu.h>
21 : #include <vnet/ip/ip_frag.h>
22 : #include <vnet/adj/adj_delegate.h>
23 : #include <vnet/adj/adj_nbr.h>
24 : #include <vnet/fib/fib_table.h>
25 : #include <vnet/fib/fib_entry_track.h>
26 :
27 : #include <vnet/dpo/drop_dpo.h>
28 :
29 : /**
30 : * Path MTU
31 : *
32 : * A path is a peer. A peer is known by an IP address (in a table).
33 : * Insert a DPO in the forwarding chain for the peer to perform the
34 : * fragmentation.
35 : * For attached peers, all traffic will use the peer's adjacency, there
36 : * is already an MTU chekc in the adjacency (for the link's MTU) so as an
37 : * optimisation, instead of using a DPO, we add a delegate to the adjacency
38 : * to set the adjacency's MTU to the path MTU.
39 : */
40 :
41 : /**
42 : * the logger
43 : */
44 : static vlib_log_class_t ip_pmtu_logger;
45 :
46 : static adj_delegate_type_t ip_pmtu_adj_delegate_type;
47 : static fib_source_t ip_pmtu_source;
48 :
49 : /**
50 : * DPO pool
51 : */
52 : ip_pmtu_dpo_t *ip_pmtu_dpo_pool;
53 :
54 : /**
55 : * DPO type registered for these GBP FWD
56 : */
57 : static dpo_type_t ip_pmtu_dpo_type;
58 :
59 : /**
60 : * Fib node type for the tracker
61 : */
62 : static fib_node_type_t ip_pmtu_fib_type;
63 :
64 : /**
65 : * Path MTU tracker pool
66 : */
67 : ip_pmtu_t *ip_pmtu_pool;
68 :
69 : /**
70 : * Delegate added to adjacencies to track path MTU
71 : */
72 : typedef struct ip_path_mtu_adj_delegate_t_
73 : {
74 : u16 pmtu;
75 : } ip_path_mtu_adj_delegate_t;
76 :
77 : static ip_path_mtu_adj_delegate_t *ip_path_mtu_adj_delegate_pool;
78 :
79 : /* DB of all FIB PMTU settings */
80 : typedef struct ip_pmtu_key_t_
81 : {
82 : ip46_address_t nh;
83 : u32 table_id;
84 : fib_protocol_t fproto;
85 : } __clib_packed ip_pmtu_key_t;
86 :
87 : static uword *ip_pmtu_db;
88 :
89 : #define IP_PMTU_TRKR_DBG(_ipt, _fmt, _args...) \
90 : { \
91 : vlib_log_debug (ip_pmtu_logger, "[%U]: " _fmt ": ", format_ip_pmtu, \
92 : _ipt - ip_pmtu_pool, ##_args); \
93 : }
94 : #define IP_PMTU_DBG(_fmt, _args...) \
95 : { \
96 : vlib_log_debug (ip_pmtu_logger, _fmt ": ", ##_args); \
97 : }
98 :
99 : static u8 *
100 46 : format_ip_pmtu_flags (u8 *s, va_list *ap)
101 : {
102 46 : ip_pmtu_flags_t f = va_arg (*ap, ip_pmtu_flags_t);
103 :
104 : if (0)
105 : ;
106 : #define _(a, b, c) else if (f & IP_PMTU_FLAG_##a) s = format (s, "%s ", c);
107 46 : foreach_ip_pmtu_flag
108 : #undef _
109 :
110 46 : return (s);
111 : }
112 :
113 : u32
114 457 : ip_pmtu_get_table_id (const ip_pmtu_t *ipt)
115 : {
116 : const fib_prefix_t *pfx;
117 : u32 fib_index;
118 :
119 457 : pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
120 457 : fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry);
121 :
122 457 : return (fib_table_get_table_id (fib_index, pfx->fp_proto));
123 : }
124 :
125 : void
126 411 : ip_pmtu_get_ip (const ip_pmtu_t *ipt, ip_address_t *ip)
127 : {
128 : const fib_prefix_t *pfx;
129 :
130 411 : pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
131 411 : ip_address_from_46 (&pfx->fp_addr, pfx->fp_proto, ip);
132 411 : }
133 :
134 : static u8 *
135 46 : format_ip_pmtu (u8 *s, va_list *ap)
136 : {
137 : ip_pmtu_t *ipt;
138 46 : index_t ipti = va_arg (*ap, index_t);
139 : const fib_prefix_t *pfx;
140 : u32 fib_index;
141 :
142 46 : ipt = pool_elt_at_index (ip_pmtu_pool, ipti);
143 46 : pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
144 46 : fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry);
145 :
146 : s =
147 46 : format (s, "[%d] [tbl:[%d:%d]] %U pmtu:[cfg:%d, oper:%d, parent:%d] [%U]",
148 : ipti, ip_pmtu_get_table_id (ipt), fib_index, format_fib_prefix,
149 46 : pfx, ipt->ipt_cfg_pmtu, ipt->ipt_oper_pmtu, ipt->ipt_parent_pmtu,
150 46 : format_ip_pmtu_flags, ipt->ipt_flags);
151 :
152 46 : return (s);
153 : }
154 :
155 : static u8 *
156 2 : format_ip_path_mtu_adj_delegate (const adj_delegate_t *aed, u8 *s)
157 : {
158 : ip_path_mtu_adj_delegate_t *ip_adj;
159 :
160 2 : ip_adj = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, aed->ad_index);
161 :
162 2 : s = format (s, "IP path-MTU: %d", ip_adj->pmtu);
163 :
164 2 : return (s);
165 : }
166 :
167 : static void
168 47098 : ip_pmtu_adj_delegate_adj_created (adj_index_t ai)
169 : {
170 : ip_path_mtu_adj_delegate_t *ipp_ad;
171 : const ip_pmtu_t *ipt;
172 : ip_adjacency_t *adj;
173 : u32 table_id;
174 : uword *p;
175 :
176 47098 : adj = adj_get (ai);
177 :
178 47098 : switch (adj->lookup_next_index)
179 : {
180 36897 : case IP_LOOKUP_NEXT_DROP:
181 : case IP_LOOKUP_NEXT_PUNT:
182 : case IP_LOOKUP_NEXT_LOCAL:
183 : case IP_LOOKUP_NEXT_GLEAN:
184 : case IP_LOOKUP_NEXT_MCAST:
185 : case IP_LOOKUP_NEXT_BCAST:
186 : case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
187 : case IP_LOOKUP_NEXT_ICMP_ERROR:
188 : case IP_LOOKUP_N_NEXT:
189 47095 : return;
190 :
191 10201 : case IP_LOOKUP_NEXT_ARP:
192 : case IP_LOOKUP_NEXT_REWRITE:
193 : case IP_LOOKUP_NEXT_MIDCHAIN:
194 10201 : break;
195 : }
196 :
197 10201 : table_id = fib_table_get_table_id_for_sw_if_index (
198 10201 : adj->ia_nh_proto, adj->rewrite_header.sw_if_index);
199 :
200 10201 : ip_pmtu_key_t key = {
201 : .nh = adj->sub_type.nbr.next_hop,
202 : .table_id = table_id,
203 10201 : .fproto = adj->ia_nh_proto,
204 : };
205 :
206 10201 : p = hash_get_mem (ip_pmtu_db, &key);
207 :
208 10201 : if (NULL == p)
209 10198 : return;
210 :
211 3 : ipt = pool_elt_at_index (ip_pmtu_pool, p[0]);
212 :
213 3 : pool_get (ip_path_mtu_adj_delegate_pool, ipp_ad);
214 3 : ipp_ad->pmtu = ipt->ipt_cfg_pmtu;
215 :
216 3 : adj_delegate_add (adj, ip_pmtu_adj_delegate_type,
217 3 : ipp_ad - ip_path_mtu_adj_delegate_pool);
218 :
219 3 : adj_nbr_set_mtu (ai, ipp_ad->pmtu);
220 :
221 3 : IP_PMTU_TRKR_DBG (ipt, "adj-added:", ai);
222 : }
223 :
224 : static void
225 2 : ip_pmtu_adj_delegate_adj_deleted (adj_delegate_t *ad)
226 : {
227 2 : pool_put_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
228 2 : }
229 :
230 : static void
231 13 : ip_pmtu_adj_delegate_adj_modified (adj_delegate_t *ad)
232 : {
233 : ip_path_mtu_adj_delegate_t *ipp_ad;
234 :
235 13 : ipp_ad = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
236 :
237 13 : adj_nbr_set_mtu (ad->ad_adj_index, ipp_ad->pmtu);
238 13 : }
239 :
240 : const adj_delegate_vft_t ip_path_adj_delegate_vft = {
241 : .adv_format = format_ip_path_mtu_adj_delegate,
242 : .adv_adj_deleted = ip_pmtu_adj_delegate_adj_deleted,
243 : .adv_adj_modified = ip_pmtu_adj_delegate_adj_modified,
244 : .adv_adj_created = ip_pmtu_adj_delegate_adj_created,
245 : };
246 :
247 : static bool
248 47 : ip_path_mtu_value_invalid (u16 pmtu)
249 : {
250 47 : return (pmtu == 0 || pmtu == 0xffff);
251 : }
252 :
253 : static adj_walk_rc_t
254 17 : ip_ptmu_adj_walk_remove (adj_index_t ai, void *ctx)
255 : {
256 : adj_delegate_t *ad;
257 :
258 17 : ad = adj_delegate_get (adj_get (ai), ip_pmtu_adj_delegate_type);
259 :
260 17 : if (ad)
261 : {
262 17 : adj_nbr_set_mtu (ai, 0);
263 :
264 17 : pool_put_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
265 17 : adj_delegate_remove (ai, ip_pmtu_adj_delegate_type);
266 : }
267 17 : return (ADJ_WALK_RC_CONTINUE);
268 : }
269 :
270 : static adj_walk_rc_t
271 54 : ip_ptmu_adj_walk_update (adj_index_t ai, void *ctx)
272 : {
273 : ip_path_mtu_adj_delegate_t *ipp_ad;
274 : adj_delegate_t *ad;
275 : u16 *pmtup;
276 :
277 54 : pmtup = ctx;
278 54 : ad = adj_delegate_get (adj_get (ai), ip_pmtu_adj_delegate_type);
279 :
280 54 : if (ad)
281 37 : ipp_ad = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
282 : else
283 : {
284 17 : pool_get (ip_path_mtu_adj_delegate_pool, ipp_ad);
285 :
286 17 : adj_delegate_add (adj_get (ai), ip_pmtu_adj_delegate_type,
287 17 : ipp_ad - ip_path_mtu_adj_delegate_pool);
288 : }
289 :
290 54 : ipp_ad->pmtu = *pmtup;
291 :
292 54 : adj_nbr_set_mtu (ai, ipp_ad->pmtu);
293 :
294 54 : return (ADJ_WALK_RC_CONTINUE);
295 : }
296 :
297 : static ip_pmtu_dpo_t *
298 11 : ip_pmtu_dpo_alloc (void)
299 : {
300 11 : vlib_main_t *vm = vlib_get_main ();
301 11 : u8 need_barrier_sync = pool_get_will_expand (ip_pmtu_dpo_pool);
302 : ip_pmtu_dpo_t *ipm;
303 :
304 :
305 11 : if (need_barrier_sync)
306 2 : vlib_worker_thread_barrier_sync (vm);
307 :
308 11 : pool_get_aligned_zero (ip_pmtu_dpo_pool, ipm, sizeof (ip_pmtu_dpo_t));
309 :
310 11 : if (need_barrier_sync)
311 2 : vlib_worker_thread_barrier_release (vm);
312 :
313 11 : return (ipm);
314 : }
315 :
316 : static ip_pmtu_dpo_t *
317 86 : ip_pmtu_dpo_get_from_dpo (const dpo_id_t *dpo)
318 : {
319 86 : ASSERT (ip_pmtu_dpo_type == dpo->dpoi_type);
320 :
321 86 : return (ip_pmtu_dpo_get (dpo->dpoi_index));
322 : }
323 :
324 : static index_t
325 11 : ip_pmtu_dpo_get_index (ip_pmtu_dpo_t *ipm)
326 : {
327 11 : return (ipm - ip_pmtu_dpo_pool);
328 : }
329 :
330 : static void
331 43 : ip_pmtu_dpo_lock (dpo_id_t *dpo)
332 : {
333 : ip_pmtu_dpo_t *ipm;
334 :
335 43 : ipm = ip_pmtu_dpo_get_from_dpo (dpo);
336 43 : ipm->ipm_locks++;
337 43 : }
338 :
339 : static void
340 43 : ip_pmtu_dpo_unlock (dpo_id_t *dpo)
341 : {
342 : ip_pmtu_dpo_t *ipm;
343 :
344 43 : ipm = ip_pmtu_dpo_get_from_dpo (dpo);
345 43 : ipm->ipm_locks--;
346 :
347 43 : if (0 == ipm->ipm_locks)
348 : {
349 11 : dpo_reset (&ipm->ipm_dpo);
350 11 : pool_put (ip_pmtu_dpo_pool, ipm);
351 : }
352 43 : }
353 :
354 : static u32
355 0 : ip_pmtu_dpo_get_urpf (const dpo_id_t *dpo)
356 : {
357 : ip_pmtu_dpo_t *ipm;
358 :
359 0 : ipm = ip_pmtu_dpo_get_from_dpo (dpo);
360 :
361 0 : return (dpo_get_urpf (&ipm->ipm_dpo));
362 : }
363 :
364 : void
365 5 : ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent, dpo_id_t *dpo)
366 : {
367 : ip_pmtu_dpo_t *ipm;
368 :
369 5 : ipm = ip_pmtu_dpo_alloc ();
370 :
371 5 : ipm->ipm_proto = parent->dpoi_proto;
372 5 : ipm->ipm_pmtu = pmtu;
373 :
374 5 : dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, parent);
375 5 : dpo_set (dpo, ip_pmtu_dpo_type, ipm->ipm_proto, ip_pmtu_dpo_get_index (ipm));
376 5 : }
377 :
378 : u8 *
379 8 : format_ip_pmtu_dpo (u8 *s, va_list *ap)
380 : {
381 8 : index_t index = va_arg (*ap, index_t);
382 8 : u32 indent = va_arg (*ap, u32);
383 8 : ip_pmtu_dpo_t *ipm = ip_pmtu_dpo_get (index);
384 :
385 8 : s = format (s, "ip-pmtu-dpo: %U, mtu:%d", format_dpo_proto, ipm->ipm_proto,
386 8 : ipm->ipm_pmtu);
387 8 : s = format (s, "\n%U", format_white_space, indent + 2);
388 8 : s = format (s, "%U", format_dpo_id, &ipm->ipm_dpo, indent + 4);
389 :
390 8 : return (s);
391 : }
392 :
393 : /**
394 : * Interpose a path MTU DPO
395 : */
396 : static void
397 6 : ip_pmtu_dpo_interpose (const dpo_id_t *original, const dpo_id_t *parent,
398 : dpo_id_t *clone)
399 : {
400 : ip_pmtu_dpo_t *ipm, *ipm_clone;
401 :
402 6 : ipm_clone = ip_pmtu_dpo_alloc ();
403 6 : ipm = ip_pmtu_dpo_get (original->dpoi_index);
404 :
405 6 : ipm_clone->ipm_proto = ipm->ipm_proto;
406 6 : ipm_clone->ipm_pmtu = ipm->ipm_pmtu;
407 :
408 6 : dpo_stack (ip_pmtu_dpo_type, ipm_clone->ipm_proto, &ipm_clone->ipm_dpo,
409 : parent);
410 6 : dpo_set (clone, ip_pmtu_dpo_type, ipm_clone->ipm_proto,
411 : ip_pmtu_dpo_get_index (ipm_clone));
412 6 : }
413 :
414 : static u16
415 0 : ip_pmtu_dpo_get_mtu (const dpo_id_t *dpo)
416 : {
417 : ip_pmtu_dpo_t *ipd;
418 :
419 0 : ipd = pool_elt_at_index (ip_pmtu_dpo_pool, dpo->dpoi_index);
420 :
421 0 : return (ipd->ipm_pmtu);
422 : }
423 :
424 : const static dpo_vft_t ip_pmtu_dpo_vft = {
425 : .dv_lock = ip_pmtu_dpo_lock,
426 : .dv_unlock = ip_pmtu_dpo_unlock,
427 : .dv_format = format_ip_pmtu_dpo,
428 : .dv_get_urpf = ip_pmtu_dpo_get_urpf,
429 : .dv_mk_interpose = ip_pmtu_dpo_interpose,
430 : .dv_get_mtu = ip_pmtu_dpo_get_mtu,
431 : };
432 :
433 : /**
434 : * @brief The per-protocol VLIB graph nodes that are assigned to a glean
435 : * object.
436 : *
437 : * this means that these graph nodes are ones from which a glean is the
438 : * parent object in the DPO-graph.
439 : */
440 : const static char *const ip_pmtu_dpo_ip4_nodes[] = {
441 : "ip4-pmtu-dpo",
442 : NULL,
443 : };
444 :
445 : const static char *const ip_pmtu_dpo_ip6_nodes[] = {
446 : "ip6-pmtu-dpo",
447 : NULL,
448 : };
449 :
450 : const static char *const *const ip_pmtu_dpo_nodes[DPO_PROTO_NUM] = {
451 : [DPO_PROTO_IP4] = ip_pmtu_dpo_ip4_nodes,
452 : [DPO_PROTO_IP6] = ip_pmtu_dpo_ip6_nodes,
453 : };
454 :
455 : static bool
456 64 : ip_mtu_fib_entry_is_attached (fib_node_index_t fib_entry)
457 : {
458 : const fib_prefix_t *pfx;
459 : u32 cover, fib_index;
460 :
461 64 : fib_index = fib_entry_get_fib_index (fib_entry);
462 64 : pfx = fib_entry_get_prefix (fib_entry);
463 :
464 : /*
465 : * If the tracked prefix's cover is attached, then all packets that
466 : * are forwarded to this neighbour will use the adjacency, this is a
467 : * more efficient place to perform the MTU check and fragging
468 : */
469 64 : cover = fib_table_get_less_specific (fib_index, pfx);
470 :
471 75 : return (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags (cover) ||
472 11 : FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags (fib_entry));
473 : }
474 :
475 : static index_t
476 19 : ip_pmtu_alloc (u32 fib_index, const fib_prefix_t *pfx,
477 : const ip_pmtu_key_t *key, u16 pmtu)
478 : {
479 19 : dpo_id_t ip_dpo = DPO_INVALID;
480 : ip_pmtu_t *ipt;
481 : fib_node_index_t cover;
482 : const dpo_id_t *lb_dpo;
483 : index_t ipti;
484 :
485 19 : pool_get (ip_pmtu_pool, ipt);
486 19 : fib_node_init (&(ipt->ipt_node), ip_pmtu_fib_type);
487 :
488 19 : ipti = ipt - ip_pmtu_pool;
489 19 : hash_set_mem_alloc (&ip_pmtu_db, key, ipti);
490 :
491 19 : ipt->ipt_cfg_pmtu = pmtu;
492 38 : ipt->ipt_fib_entry = fib_entry_track (fib_index, pfx, ip_pmtu_fib_type, ipti,
493 19 : &ipt->ipt_sibling);
494 :
495 : /*
496 : * If the tracked prefix's cover is attached, then all packets that
497 : * are forwarded to this neighbour will use the adjacency, this is a
498 : * more efficient place to perform the MTU check and fragging
499 : */
500 19 : cover = fib_table_get_less_specific (fib_index, pfx);
501 :
502 19 : if (ip_mtu_fib_entry_is_attached (ipt->ipt_fib_entry))
503 : {
504 : u32 sw_if_index;
505 :
506 18 : ipt->ipt_flags |= IP_PMTU_FLAG_ATTACHED;
507 18 : ipt->ipt_oper_pmtu = ipt->ipt_cfg_pmtu;
508 :
509 18 : sw_if_index = fib_entry_get_resolving_interface (cover);
510 :
511 : /* walk all adjs to add/update delegate */
512 18 : adj_nbr_walk_nh (sw_if_index, pfx->fp_proto, &pfx->fp_addr,
513 18 : ip_ptmu_adj_walk_update, &ipt->ipt_oper_pmtu);
514 : }
515 : else
516 : {
517 1 : ipt->ipt_flags |= IP_PMTU_FLAG_REMOTE;
518 :
519 1 : lb_dpo = fib_entry_contribute_ip_forwarding (ipt->ipt_fib_entry);
520 :
521 1 : ipt->ipt_oper_pmtu = clib_min (dpo_get_mtu (lb_dpo), ipt->ipt_cfg_pmtu);
522 :
523 : /*
524 : * interpose a policy DPO from the nh so that MTU is applied
525 : */
526 1 : ip_pmtu_dpo_add_or_lock (ipt->ipt_oper_pmtu,
527 1 : drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)),
528 : &ip_dpo);
529 :
530 1 : fib_table_entry_special_dpo_add (fib_index, pfx, ip_pmtu_source,
531 : FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
532 1 : dpo_reset (&ip_dpo);
533 : }
534 :
535 19 : IP_PMTU_TRKR_DBG (ipt, "create");
536 :
537 19 : return (ipti);
538 : }
539 :
540 : static void
541 45 : ip_pmtu_stack (ip_pmtu_t *ipt)
542 : {
543 : bool was_attached, is_attached;
544 : const fib_prefix_t *pfx;
545 : u32 fib_index;
546 :
547 45 : pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
548 45 : fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry);
549 :
550 45 : was_attached = !!(ipt->ipt_flags & IP_PMTU_FLAG_ATTACHED);
551 45 : is_attached = ip_mtu_fib_entry_is_attached (ipt->ipt_fib_entry);
552 :
553 45 : if (was_attached && !is_attached)
554 : {
555 : /* transition from attached to remote - walk all adjs to remove delegate
556 : */
557 1 : adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry),
558 1 : pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_remove,
559 1 : &ipt->ipt_oper_pmtu);
560 1 : ipt->ipt_flags &= ~IP_PMTU_FLAG_ATTACHED;
561 : }
562 45 : if (!was_attached && is_attached)
563 : {
564 : /* transition from remote to attached - remove the DPO */
565 1 : fib_table_entry_special_remove (fib_index, pfx, ip_pmtu_source);
566 1 : ipt->ipt_flags &= ~IP_PMTU_FLAG_REMOTE;
567 : }
568 :
569 45 : if (is_attached)
570 : {
571 : /* walk all adjs to add/update delegate */
572 39 : ipt->ipt_oper_pmtu = ipt->ipt_cfg_pmtu;
573 39 : adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry),
574 39 : pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_update,
575 39 : &ipt->ipt_oper_pmtu);
576 39 : ipt->ipt_flags |= IP_PMTU_FLAG_ATTACHED;
577 : }
578 : else
579 : {
580 : const dpo_id_t *lb_dpo;
581 : u16 dpo_mtu;
582 :
583 6 : fib_table_entry_special_remove (fib_index, pfx, ip_pmtu_source);
584 :
585 6 : ipt->ipt_flags |= IP_PMTU_FLAG_REMOTE;
586 6 : lb_dpo = fib_entry_contribute_ip_forwarding (ipt->ipt_fib_entry);
587 6 : dpo_mtu = dpo_get_mtu (lb_dpo);
588 :
589 6 : ipt->ipt_oper_pmtu = clib_min (dpo_mtu, ipt->ipt_cfg_pmtu);
590 :
591 : /*
592 : * if the configured path-MTU is less that the egress/interface then
593 : * interpose a policy DPO from the nh so that MTU is applied
594 : */
595 6 : if (ipt->ipt_oper_pmtu < dpo_mtu)
596 : {
597 4 : dpo_id_t ip_dpo = DPO_INVALID;
598 :
599 4 : ip_pmtu_dpo_add_or_lock (
600 4 : ipt->ipt_oper_pmtu,
601 4 : drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)), &ip_dpo);
602 :
603 4 : fib_table_entry_special_dpo_update (
604 : fib_index, pfx, ip_pmtu_source, FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
605 4 : dpo_reset (&ip_dpo);
606 : }
607 : }
608 45 : IP_PMTU_TRKR_DBG (ipt, "stack");
609 45 : }
610 :
611 : static void
612 10 : ip_pmtu_update (index_t ipti, u16 pmtu)
613 : {
614 : ip_pmtu_t *ipt;
615 :
616 10 : ipt = pool_elt_at_index (ip_pmtu_pool, ipti);
617 10 : ipt->ipt_flags &= ~IP_PMTU_FLAG_STALE;
618 10 : ipt->ipt_cfg_pmtu = pmtu;
619 :
620 10 : ip_pmtu_stack (ipt);
621 10 : }
622 :
623 : static index_t
624 18 : ip_pmtu_destroy (index_t ipti, const ip_pmtu_key_t *key)
625 : {
626 : ip_pmtu_t *ipt;
627 : const fib_prefix_t *pfx;
628 :
629 18 : ipt = pool_elt_at_index (ip_pmtu_pool, ipti);
630 18 : pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
631 :
632 18 : IP_PMTU_TRKR_DBG (ipt, "destroy");
633 :
634 18 : if (ipt->ipt_flags & IP_PMTU_FLAG_REMOTE)
635 0 : fib_table_entry_special_remove (
636 : fib_entry_get_fib_index (ipt->ipt_fib_entry), pfx, ip_pmtu_source);
637 : else
638 : /* remove the delegate from all the adjacencies */
639 18 : adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry),
640 18 : pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_remove,
641 : NULL);
642 :
643 : /*
644 : * Drop the fib entry we're tracking
645 : */
646 18 : fib_entry_untrack (ipt->ipt_fib_entry, ipt->ipt_sibling);
647 :
648 : /*
649 : * remove from DB and return to pool
650 : */
651 18 : hash_unset_mem_free (&ip_pmtu_db, key);
652 18 : pool_put (ip_pmtu_pool, ipt);
653 :
654 18 : return (ipti);
655 : }
656 :
657 : int
658 47 : ip_path_mtu_update (const ip_address_t *nh, u32 table_id, u16 pmtu)
659 : {
660 : fib_prefix_t pfx;
661 : u32 fib_index;
662 : uword *p;
663 :
664 47 : ip_address_to_fib_prefix (nh, &pfx);
665 47 : fib_index = fib_table_find (pfx.fp_proto, table_id);
666 :
667 47 : if (~0 == fib_index)
668 0 : return (VNET_API_ERROR_NO_SUCH_TABLE);
669 :
670 47 : ip_pmtu_key_t key = {
671 47 : .fproto = pfx.fp_proto,
672 : .table_id = table_id,
673 : .nh = pfx.fp_addr,
674 : };
675 :
676 47 : p = hash_get_mem (ip_pmtu_db, &key);
677 :
678 47 : if (!ip_path_mtu_value_invalid (pmtu))
679 : {
680 : /* Add or update of path MTU */
681 29 : if (NULL == p)
682 19 : ip_pmtu_alloc (fib_index, &pfx, &key, pmtu);
683 : else
684 10 : ip_pmtu_update (p[0], pmtu);
685 : }
686 : else
687 : {
688 18 : if (NULL != p)
689 18 : ip_pmtu_destroy (p[0], &key);
690 : }
691 :
692 47 : return (0);
693 : }
694 :
695 : static walk_rc_t
696 2 : ip_path_mtu_walk_mark (index_t ipti, void *ctx)
697 : {
698 : ip_pmtu_t *ipt;
699 :
700 2 : ipt = ip_path_mtu_get (ipti);
701 :
702 2 : ipt->ipt_flags |= IP_PMTU_FLAG_STALE;
703 :
704 2 : return (WALK_CONTINUE);
705 : }
706 :
707 : typedef struct ip_path_mtu_walk_sweep_ctx_t_
708 : {
709 : index_t *indicies;
710 : } ip_path_mtu_walk_sweep_ctx_t;
711 :
712 : static walk_rc_t
713 2 : ip_path_mtu_walk_sweep (index_t ipti, void *arg)
714 : {
715 2 : ip_path_mtu_walk_sweep_ctx_t *ctx = arg;
716 : ip_pmtu_t *ipt;
717 :
718 2 : ipt = ip_path_mtu_get (ipti);
719 :
720 2 : if (ipt->ipt_flags & IP_PMTU_FLAG_STALE)
721 : {
722 1 : vec_add1 (ctx->indicies, ipti);
723 : }
724 :
725 2 : return (WALK_CONTINUE);
726 : }
727 :
728 : int
729 2 : ip_path_mtu_replace_begin (void)
730 : {
731 2 : IP_PMTU_DBG ("replace-begin");
732 :
733 2 : ip_path_mtu_walk (ip_path_mtu_walk_mark, NULL);
734 :
735 2 : return (0);
736 : }
737 :
738 : int
739 2 : ip_path_mtu_replace_end (void)
740 : {
741 : index_t *ipti;
742 :
743 2 : IP_PMTU_DBG ("replace-end");
744 :
745 : /*
746 : * not safe to walk the pool whilst deleting, so create
747 : * temporary storage of stale entries
748 : */
749 2 : ip_path_mtu_walk_sweep_ctx_t ctx = {
750 : .indicies = NULL,
751 : };
752 :
753 2 : ip_path_mtu_walk (ip_path_mtu_walk_sweep, &ctx);
754 :
755 3 : vec_foreach (ipti, ctx.indicies)
756 : {
757 : ip_pmtu_t *ipt;
758 : ip_address_t ip;
759 :
760 1 : ipt = ip_path_mtu_get (*ipti);
761 1 : ip_pmtu_get_ip (ipt, &ip);
762 1 : ip_path_mtu_update (&ip, ip_pmtu_get_table_id (ipt), 0);
763 : }
764 :
765 2 : vec_free (ctx.indicies);
766 :
767 2 : return (0);
768 : }
769 :
770 : void
771 4 : ip_path_mtu_walk (ip_path_mtu_walk_t fn, void *ctx)
772 : {
773 : index_t ipmi;
774 :
775 8 : pool_foreach_index (ipmi, ip_pmtu_pool)
776 : {
777 4 : if (WALK_STOP == fn (ipmi, ctx))
778 0 : break;
779 : }
780 4 : }
781 :
782 : static fib_node_t *
783 35 : ip_pmtu_get_node (fib_node_index_t index)
784 : {
785 : ip_pmtu_t *ipt;
786 :
787 35 : ipt = pool_elt_at_index (ip_pmtu_pool, index);
788 :
789 35 : return (&(ipt->ipt_node));
790 : }
791 :
792 : static ip_pmtu_t *
793 35 : ip_pmtu_get_from_node (fib_node_t *node)
794 : {
795 : return (
796 35 : (ip_pmtu_t *) (((char *) node) - STRUCT_OFFSET_OF (ip_pmtu_t, ipt_node)));
797 : }
798 :
799 : static void
800 0 : ip_pmtu_last_lock_gone (fib_node_t *node)
801 : {
802 : /*
803 : * the lifetime of the entry is managed by the API.
804 : */
805 0 : ASSERT (0);
806 0 : }
807 :
808 : /*
809 : * A back walk has reached this BIER entry
810 : */
811 : static fib_node_back_walk_rc_t
812 35 : ip_pmtu_back_walk_notify (fib_node_t *node, fib_node_back_walk_ctx_t *ctx)
813 : {
814 : /*
815 : * re-populate the ECMP tables with new choices
816 : */
817 35 : ip_pmtu_t *ipr = ip_pmtu_get_from_node (node);
818 :
819 35 : ip_pmtu_stack (ipr);
820 :
821 : /*
822 : * no need to propagate further up the graph, since there's nothing there
823 : */
824 35 : return (FIB_NODE_BACK_WALK_CONTINUE);
825 : }
826 :
827 : static const fib_node_vft_t ip_ptmu_fib_node_vft = {
828 : .fnv_get = ip_pmtu_get_node,
829 : .fnv_last_lock = ip_pmtu_last_lock_gone,
830 : .fnv_back_walk = ip_pmtu_back_walk_notify,
831 : };
832 :
833 : static clib_error_t *
834 575 : ip_path_module_init (vlib_main_t *vm)
835 : {
836 575 : ip_pmtu_adj_delegate_type =
837 575 : adj_delegate_register_new_type (&ip_path_adj_delegate_vft);
838 575 : ip_pmtu_source = fib_source_allocate ("path-mtu", FIB_SOURCE_PRIORITY_HI,
839 : FIB_SOURCE_BH_SIMPLE);
840 575 : ip_pmtu_fib_type =
841 575 : fib_node_register_new_type ("ip-pmtu", &ip_ptmu_fib_node_vft);
842 :
843 575 : ip_pmtu_db = hash_create_mem (0, sizeof (ip_pmtu_key_t), sizeof (index_t));
844 575 : ip_pmtu_logger = vlib_log_register_class ("ip", "pmtu");
845 575 : ip_pmtu_dpo_type =
846 575 : dpo_register_new_type (&ip_pmtu_dpo_vft, ip_pmtu_dpo_nodes);
847 :
848 575 : return (NULL);
849 : }
850 :
851 46079 : VLIB_INIT_FUNCTION (ip_path_module_init);
852 :
853 : static clib_error_t *
854 2 : show_ip_pmtu_command (vlib_main_t *vm, unformat_input_t *input,
855 : vlib_cli_command_t *cmd)
856 : {
857 : index_t ipti;
858 :
859 2 : if (unformat (input, "%d", &ipti))
860 : {
861 : /*
862 : * show one in detail
863 : */
864 0 : if (!pool_is_free_index (ip_pmtu_pool, ipti))
865 0 : vlib_cli_output (vm, "%U", format_ip_pmtu, ipti);
866 : else
867 0 : vlib_cli_output (vm, "entry %d invalid", ipti);
868 : }
869 : else
870 : {
871 : /*
872 : * show all
873 : */
874 19 : pool_foreach_index (ipti, ip_pmtu_pool)
875 : {
876 17 : vlib_cli_output (vm, "%U", format_ip_pmtu, ipti);
877 : }
878 : }
879 :
880 2 : return (NULL);
881 : }
882 :
883 285289 : VLIB_CLI_COMMAND (show_fib_entry, static) = {
884 : .path = "show ip pmtu",
885 : .function = show_ip_pmtu_command,
886 : .short_help = "show ip path MTU",
887 : };
888 :
889 : /*
890 : * fd.io coding-style-patch-verification: ON
891 : *
892 : * Local Variables:
893 : * eval: (c-set-style "gnu")
894 : * End:
895 : */
|