Line data Source code
1 : /*
2 : * Copyright (c) 2016 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : #include <vnet/adj/adj_nbr.h>
17 : #include <vnet/adj/adj_internal.h>
18 : #include <vnet/ethernet/arp_packet.h>
19 : #include <vnet/fib/fib_walk.h>
20 :
21 : /*
22 : * Vector Hash tables of neighbour (traditional) adjacencies
23 : * Key: interface(for the vector index), address (and its proto),
24 : * link-type/ether-type.
25 : */
26 : static uword **adj_nbr_tables[FIB_PROTOCOL_IP_MAX];
27 :
28 : typedef struct adj_nbr_key_t_
29 : {
30 : ip46_address_t ank_ip;
31 : u64 ank_linkt;
32 : } adj_nbr_key_t;
33 :
34 : #define ADJ_NBR_SET_KEY(_key, _lt, _nh) \
35 : { \
36 : ip46_address_copy(&(_key).ank_ip, (_nh)); \
37 : _key.ank_linkt = (_lt); \
38 : }
39 :
40 : #define ADJ_NBR_ITF_OK(_proto, _itf) \
41 : (((_itf) < vec_len(adj_nbr_tables[_proto])) && \
42 : (NULL != adj_nbr_tables[_proto][(_itf)]))
43 :
44 : #define ADJ_NBR_ASSERT_NH_PROTO(nh_proto, err) \
45 : do { \
46 : ASSERT (nh_proto < FIB_PROTOCOL_IP_MAX); \
47 : const fib_protocol_t nh_proto__ = (nh_proto); \
48 : if (nh_proto__ >= FIB_PROTOCOL_IP_MAX) \
49 : { \
50 : clib_warning ("BUG: protocol %d > %d\n", \
51 : (int)nh_proto__, \
52 : FIB_PROTOCOL_IP_MAX); \
53 : return err; \
54 : } \
55 : } while (0)
56 :
57 : static void
58 10068 : adj_nbr_insert (fib_protocol_t nh_proto,
59 : vnet_link_t link_type,
60 : const ip46_address_t *nh_addr,
61 : u32 sw_if_index,
62 : adj_index_t adj_index)
63 : {
64 : adj_nbr_key_t kv;
65 :
66 10068 : ADJ_NBR_ASSERT_NH_PROTO (nh_proto,);
67 :
68 10068 : if (sw_if_index >= vec_len(adj_nbr_tables[nh_proto]))
69 : {
70 2271 : vec_validate(adj_nbr_tables[nh_proto], sw_if_index);
71 : }
72 10068 : if (NULL == adj_nbr_tables[nh_proto][sw_if_index])
73 : {
74 4360 : adj_nbr_tables[nh_proto][sw_if_index] =
75 4360 : hash_create_mem(0, sizeof(adj_nbr_key_t), sizeof(adj_index_t));
76 : }
77 :
78 10068 : ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
79 :
80 10068 : hash_set_mem_alloc (&adj_nbr_tables[nh_proto][sw_if_index],
81 : &kv, adj_index);
82 : }
83 :
84 : void
85 6077 : adj_nbr_remove (adj_index_t ai,
86 : fib_protocol_t nh_proto,
87 : vnet_link_t link_type,
88 : const ip46_address_t *nh_addr,
89 : u32 sw_if_index)
90 : {
91 : adj_nbr_key_t kv;
92 :
93 6077 : ADJ_NBR_ASSERT_NH_PROTO (nh_proto,);
94 :
95 6077 : if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
96 0 : return;
97 :
98 6077 : ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
99 :
100 6077 : hash_unset_mem_free(&adj_nbr_tables[nh_proto][sw_if_index], &kv);
101 :
102 6077 : if (0 == hash_elts(adj_nbr_tables[nh_proto][sw_if_index]))
103 : {
104 4020 : hash_free(adj_nbr_tables[nh_proto][sw_if_index]);
105 : }
106 : }
107 :
108 : typedef struct adj_nbr_get_n_adjs_walk_ctx_t_
109 : {
110 : vnet_link_t linkt;
111 : u32 count;
112 : } adj_nbr_get_n_adjs_walk_ctx_t;
113 :
114 : static adj_walk_rc_t
115 0 : adj_nbr_get_n_adjs_walk (adj_index_t ai,
116 : void *data)
117 : {
118 0 : adj_nbr_get_n_adjs_walk_ctx_t *ctx = data;
119 : const ip_adjacency_t *adj;
120 :
121 0 : adj = adj_get(ai);
122 :
123 0 : if (ctx->linkt == adj->ia_link)
124 0 : ctx->count++;
125 :
126 0 : return (ADJ_WALK_RC_CONTINUE);
127 : }
128 :
129 : u32
130 0 : adj_nbr_get_n_adjs (vnet_link_t link_type, u32 sw_if_index)
131 : {
132 0 : adj_nbr_get_n_adjs_walk_ctx_t ctx = {
133 : .linkt = link_type,
134 : };
135 : fib_protocol_t fproto;
136 :
137 0 : FOR_EACH_FIB_IP_PROTOCOL(fproto)
138 : {
139 0 : adj_nbr_walk (sw_if_index,
140 : fproto,
141 : adj_nbr_get_n_adjs_walk,
142 : &ctx);
143 : }
144 :
145 0 : return (ctx.count);
146 : }
147 :
148 : adj_index_t
149 172367 : adj_nbr_find (fib_protocol_t nh_proto,
150 : vnet_link_t link_type,
151 : const ip46_address_t *nh_addr,
152 : u32 sw_if_index)
153 : {
154 : adj_nbr_key_t kv;
155 : uword *p;
156 :
157 172367 : ADJ_NBR_ASSERT_NH_PROTO (nh_proto, ADJ_INDEX_INVALID);
158 :
159 172367 : ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
160 :
161 172367 : if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
162 4360 : return (ADJ_INDEX_INVALID);
163 :
164 168007 : p = hash_get_mem(adj_nbr_tables[nh_proto][sw_if_index], &kv);
165 :
166 168007 : if (p)
167 : {
168 39216 : return (p[0]);
169 : }
170 128791 : return (ADJ_INDEX_INVALID);
171 : }
172 :
173 : static inline u32
174 15612 : adj_get_nd_node (fib_protocol_t proto)
175 : {
176 15612 : switch (proto) {
177 8700 : case FIB_PROTOCOL_IP4:
178 8700 : return (ip4_arp_node.index);
179 6912 : case FIB_PROTOCOL_IP6:
180 6912 : return (ip6_discover_neighbor_node.index);
181 0 : case FIB_PROTOCOL_MPLS:
182 0 : break;
183 : }
184 0 : ASSERT(0);
185 0 : return (ip4_arp_node.index);
186 : }
187 :
188 : /**
189 : * @brief Check and set feature flags if o/p interface has any o/p features.
190 : */
191 : static void
192 10068 : adj_nbr_evaluate_feature (adj_index_t ai)
193 : {
194 : ip_adjacency_t *adj;
195 10068 : vnet_feature_main_t *fm = &feature_main;
196 : i16 feature_count;
197 : u8 arc_index;
198 : u32 sw_if_index;
199 :
200 10068 : adj = adj_get(ai);
201 :
202 10068 : switch (adj->ia_link)
203 : {
204 5535 : case VNET_LINK_IP4:
205 5535 : arc_index = ip4_main.lookup_main.output_feature_arc_index;
206 5535 : break;
207 4442 : case VNET_LINK_IP6:
208 4442 : arc_index = ip6_main.lookup_main.output_feature_arc_index;
209 4442 : break;
210 75 : case VNET_LINK_MPLS:
211 75 : arc_index = mpls_main.output_feature_arc_index;
212 75 : break;
213 16 : default:
214 16 : return;
215 : }
216 :
217 10052 : sw_if_index = adj->rewrite_header.sw_if_index;
218 10052 : if (vec_len(fm->feature_count_by_sw_if_index[arc_index]) > sw_if_index)
219 : {
220 835 : feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index];
221 835 : if (feature_count > 0)
222 : {
223 : vnet_feature_config_main_t *cm;
224 :
225 256 : adj->rewrite_header.flags |= VNET_REWRITE_HAS_FEATURES;
226 256 : cm = &fm->feature_config_mains[arc_index];
227 :
228 256 : adj->ia_cfg_index = vec_elt (cm->config_index_by_sw_if_index,
229 : sw_if_index);
230 : }
231 : }
232 10052 : return;
233 : }
234 :
235 : static ip_adjacency_t*
236 10068 : adj_nbr_alloc (fib_protocol_t nh_proto,
237 : vnet_link_t link_type,
238 : const ip46_address_t *nh_addr,
239 : u32 sw_if_index)
240 : {
241 : ip_adjacency_t *adj;
242 :
243 10068 : adj = adj_alloc(nh_proto);
244 :
245 10068 : adj_nbr_insert(nh_proto, link_type, nh_addr,
246 : sw_if_index,
247 : adj_get_index(adj));
248 :
249 : /*
250 : * since we just added the ADJ we have no rewrite string for it,
251 : * so its for ARP
252 : */
253 10068 : adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
254 10068 : adj->sub_type.nbr.next_hop = *nh_addr;
255 10068 : adj->ia_link = link_type;
256 10068 : adj->ia_nh_proto = nh_proto;
257 10068 : adj->rewrite_header.sw_if_index = sw_if_index;
258 10068 : vnet_rewrite_update_mtu(vnet_get_main(), adj->ia_link,
259 : &adj->rewrite_header);
260 :
261 10068 : adj_nbr_evaluate_feature (adj_get_index(adj));
262 10068 : return (adj);
263 : }
264 :
265 : void
266 87 : adj_nbr_set_mtu (adj_index_t adj_index, u16 mtu)
267 : {
268 : ip_adjacency_t *adj;
269 :
270 87 : ASSERT(ADJ_INDEX_INVALID != adj_index);
271 :
272 87 : adj = adj_get(adj_index);
273 :
274 87 : if (0 == mtu)
275 17 : vnet_rewrite_update_mtu(vnet_get_main(), adj->ia_link,
276 : &adj->rewrite_header);
277 : else
278 : {
279 70 : vnet_rewrite_update_mtu(vnet_get_main(), adj->ia_link,
280 : &adj->rewrite_header);
281 70 : adj->rewrite_header.max_l3_packet_bytes =
282 70 : clib_min (adj->rewrite_header.max_l3_packet_bytes, mtu);
283 : }
284 87 : }
285 :
286 : /*
287 : * adj_nbr_add_or_lock
288 : *
289 : * Add an adjacency for the neighbour requested.
290 : *
291 : * The key for an adj is:
292 : * - the Next-hops protocol (i.e. v4 or v6)
293 : * - the address of the next-hop
294 : * - the interface the next-hop is reachable through
295 : */
296 : adj_index_t
297 18754 : adj_nbr_add_or_lock (fib_protocol_t nh_proto,
298 : vnet_link_t link_type,
299 : const ip46_address_t *nh_addr,
300 : u32 sw_if_index)
301 : {
302 : adj_index_t adj_index;
303 :
304 18754 : adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
305 :
306 18754 : if (ADJ_INDEX_INVALID == adj_index)
307 : {
308 : ip_adjacency_t *adj;
309 : vnet_main_t *vnm;
310 :
311 10068 : vnm = vnet_get_main();
312 10068 : adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
313 10068 : adj_index = adj_get_index(adj);
314 10068 : adj_lock(adj_index);
315 :
316 10068 : if (ip46_address_is_equal(&ADJ_BCAST_ADDR, nh_addr))
317 : {
318 18 : adj->lookup_next_index = IP_LOOKUP_NEXT_BCAST;
319 : }
320 :
321 10068 : vnet_rewrite_init(vnm, sw_if_index, link_type,
322 : adj_get_nd_node(nh_proto),
323 : vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
324 : &adj->rewrite_header);
325 :
326 : /*
327 : * we need a rewrite where the destination IP address is converted
328 : * to the appropriate link-layer address. This is interface specific.
329 : * So ask the interface to do it.
330 : */
331 10068 : vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, adj_index);
332 10068 : adj_delegate_adj_created(adj_get(adj_index));
333 : }
334 : else
335 : {
336 8686 : adj_lock(adj_index);
337 : }
338 :
339 18754 : return (adj_index);
340 : }
341 :
342 : adj_index_t
343 0 : adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
344 : vnet_link_t link_type,
345 : const ip46_address_t *nh_addr,
346 : u32 sw_if_index,
347 : u8 *rewrite)
348 : {
349 : adj_index_t adj_index;
350 :
351 0 : adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
352 :
353 0 : if (ADJ_INDEX_INVALID == adj_index)
354 : {
355 : ip_adjacency_t *adj;
356 :
357 0 : adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
358 0 : adj->rewrite_header.sw_if_index = sw_if_index;
359 0 : adj_index = adj_get_index(adj);
360 : }
361 :
362 0 : adj_lock(adj_index);
363 0 : adj_nbr_update_rewrite(adj_index,
364 : ADJ_NBR_REWRITE_FLAG_COMPLETE,
365 : rewrite);
366 :
367 0 : adj_delegate_adj_created(adj_get(adj_index));
368 :
369 0 : return (adj_index);
370 : }
371 :
372 : /**
373 : * adj_nbr_update_rewrite
374 : *
375 : * Update the adjacency's rewrite string. A NULL string implies the
376 : * rewrite is reset (i.e. when ARP/ND entry is gone).
377 : * NB: the adj being updated may be handling traffic in the DP.
378 : */
379 : void
380 24890 : adj_nbr_update_rewrite (adj_index_t adj_index,
381 : adj_nbr_rewrite_flag_t flags,
382 : u8 *rewrite)
383 : {
384 : ip_adjacency_t *adj;
385 :
386 24890 : ASSERT(ADJ_INDEX_INVALID != adj_index);
387 :
388 24890 : adj = adj_get(adj_index);
389 :
390 24890 : if (flags & ADJ_NBR_REWRITE_FLAG_COMPLETE)
391 : {
392 : /*
393 : * update the adj's rewrite string and build the arc
394 : * from the rewrite node to the interface's TX node
395 : */
396 38692 : adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_REWRITE,
397 19346 : adj_get_rewrite_node(adj->ia_link),
398 : vnet_tx_node_index_for_sw_interface(
399 19346 : vnet_get_main(),
400 : adj->rewrite_header.sw_if_index),
401 : rewrite);
402 : }
403 : else
404 : {
405 11088 : adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_ARP,
406 5544 : adj_get_nd_node(adj->ia_nh_proto),
407 : vnet_tx_node_index_for_sw_interface(
408 5544 : vnet_get_main(),
409 : adj->rewrite_header.sw_if_index),
410 : rewrite);
411 : }
412 24890 : }
413 :
414 : /**
415 : * adj_nbr_update_rewrite_internal
416 : *
417 : * Update the adjacency's rewrite string. A NULL string implies the
418 : * rewrite is reset (i.e. when ARP/ND entry is gone).
419 : * NB: the adj being updated may be handling traffic in the DP.
420 : */
421 : void
422 27504 : adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
423 : ip_lookup_next_t adj_next_index,
424 : u32 this_node,
425 : u32 next_node,
426 : u8 *rewrite)
427 : {
428 : ip_adjacency_t *walk_adj;
429 : adj_index_t walk_ai, ai;
430 : vlib_main_t * vm;
431 : u32 old_next;
432 : int do_walk;
433 :
434 27504 : vm = vlib_get_main();
435 27504 : old_next = adj->lookup_next_index;
436 :
437 27504 : ai = walk_ai = adj_get_index(adj);
438 27504 : if (VNET_LINK_MPLS == adj->ia_link)
439 : {
440 : /*
441 : * The link type MPLS has no children in the control plane graph, it only
442 : * has children in the data-plane graph. The backwalk is up the former.
443 : * So we need to walk from its IP cousin.
444 : */
445 102 : walk_ai = adj_nbr_find(adj->ia_nh_proto,
446 102 : fib_proto_to_link(adj->ia_nh_proto),
447 102 : &adj->sub_type.nbr.next_hop,
448 : adj->rewrite_header.sw_if_index);
449 : }
450 :
451 : /*
452 : * Don't call the walk re-entrantly
453 : */
454 27504 : if (ADJ_INDEX_INVALID != walk_ai)
455 : {
456 27499 : walk_adj = adj_get(walk_ai);
457 27499 : if (ADJ_FLAG_SYNC_WALK_ACTIVE & walk_adj->ia_flags)
458 : {
459 54 : do_walk = 0;
460 : }
461 : else
462 : {
463 : /*
464 : * Prevent re-entrant walk of the same adj
465 : */
466 27445 : walk_adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
467 27445 : do_walk = 1;
468 : }
469 : }
470 : else
471 : {
472 5 : do_walk = 0;
473 : }
474 :
475 : /*
476 : * lock the adjacencies that are affected by updates this walk will provoke.
477 : * Since the aim of the walk is to update children to link to a different
478 : * DPO, this adj will no longer be in use and its lock count will drop to 0.
479 : * We don't want it to be deleted as part of this endeavour.
480 : */
481 27504 : adj_lock(ai);
482 27504 : adj_lock(walk_ai);
483 :
484 : /*
485 : * Updating a rewrite string is not atomic;
486 : * - the rewrite string is too long to write in one instruction
487 : * - when swapping from incomplete to complete, we also need to update
488 : * the VLIB graph next-index of the adj.
489 : * ideally we would only want to suspend forwarding via this adj whilst we
490 : * do this, but we do not have that level of granularity - it's suspend all
491 : * worker threads or nothing.
492 : * The other choices are:
493 : * - to mark the adj down and back walk so child load-balances drop this adj
494 : * from the set.
495 : * - update the next_node index of this adj to point to error-drop
496 : * both of which will mean for MAC change we will drop for this adj
497 : * which is not acceptable. However, when the adj changes type (from
498 : * complete to incomplete and vice-versa) the child DPOs, which have the
499 : * VLIB graph next node index, will be sending packets to the wrong graph
500 : * node. So from the options above, updating the next_node of the adj to
501 : * be drop will work, but it relies on each graph node v4/v6/mpls, rewrite/
502 : * arp/midchain always be valid w.r.t. a mis-match of adj type and node type
503 : * (i.e. a rewrite adj in the arp node). This is not enforceable. Getting it
504 : * wrong will lead to hard to find bugs since its a race condition. So we
505 : * choose the more reliable method of updating the children to use the drop,
506 : * then switching adj's type, then updating the children again. Did I mention
507 : * that this doesn't happen often...
508 : * So we need to distinguish between the two cases:
509 : * 1 - mac change
510 : * 2 - adj type change
511 : */
512 27504 : if (do_walk &&
513 27445 : old_next != adj_next_index &&
514 : ADJ_INDEX_INVALID != walk_ai)
515 : {
516 : /*
517 : * the adj is changing type. we need to fix all children so that they
518 : * stack momentarily on a drop, while the adj changes. If we don't do
519 : * this the children will send packets to a VLIB graph node that does
520 : * not correspond to the adj's type - and it goes downhill from there.
521 : */
522 15184 : fib_node_back_walk_ctx_t bw_ctx = {
523 : .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_DOWN,
524 : /*
525 : * force this walk to be synchronous. if we don't and a node in the graph
526 : * (a heavily shared path-list) chooses to back-ground the walk (make it
527 : * async) then it will pause and we will do the adj update below, before
528 : * all the children are updated. not good.
529 : */
530 : .fnbw_flags = FIB_NODE_BW_FLAG_FORCE_SYNC,
531 : };
532 :
533 15184 : fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
534 : /*
535 : * fib_walk_sync may allocate a new adjacency and potentially cause a
536 : * realloc for adj_pool. When that happens, adj pointer is no longer
537 : * valid here. We refresh the adj pointer accordingly.
538 : */
539 15184 : adj = adj_get (ai);
540 : }
541 :
542 : /*
543 : * If we are just updating the MAC string of the adj (which we also can't
544 : * do atomically), then we need to stop packets switching through the adj.
545 : * We can't do that on a per-adj basis, so it's all the packets.
546 : * If we are updating the type, and we walked back to the children above,
547 : * then this barrier serves to flush the queues/frames.
548 : */
549 27504 : vlib_worker_thread_barrier_sync(vm);
550 :
551 27504 : adj->lookup_next_index = adj_next_index;
552 27504 : adj->ia_node_index = this_node;
553 :
554 27504 : if (NULL != rewrite)
555 : {
556 : /*
557 : * new rewrite provided.
558 : * fill in the adj's rewrite string, and build the VLIB graph arc.
559 : */
560 27245 : vnet_rewrite_set_data_internal(&adj->rewrite_header,
561 : sizeof(adj->rewrite_data),
562 : rewrite,
563 27245 : vec_len(rewrite));
564 27245 : vec_free(rewrite);
565 : }
566 : else
567 : {
568 259 : vnet_rewrite_clear_data_internal(&adj->rewrite_header,
569 : sizeof(adj->rewrite_data));
570 : }
571 27504 : adj->rewrite_header.next_index = vlib_node_add_next(vlib_get_main(),
572 : this_node,
573 : next_node);
574 :
575 : /*
576 : * done with the rewrite update - let the workers loose.
577 : */
578 27504 : vlib_worker_thread_barrier_release(vm);
579 :
580 27504 : if (do_walk &&
581 27445 : (old_next != adj->lookup_next_index) &&
582 : (ADJ_INDEX_INVALID != walk_ai))
583 : {
584 : /*
585 : * backwalk to the children so they can stack on the now updated
586 : * adjacency
587 : */
588 15184 : fib_node_back_walk_ctx_t bw_ctx = {
589 : .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
590 : };
591 :
592 15184 : fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
593 : }
594 : /*
595 : * Prevent re-entrant walk of the same adj
596 : */
597 27504 : if (do_walk)
598 : {
599 27445 : walk_adj = adj_get(walk_ai);
600 27445 : walk_adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
601 : }
602 :
603 27504 : adj_delegate_adj_modified(adj_get(ai));
604 27504 : adj_unlock(ai);
605 27504 : adj_unlock(walk_ai);
606 27504 : }
607 :
608 : u32
609 13 : adj_nbr_db_size (void)
610 : {
611 : fib_protocol_t proto;
612 13 : u32 sw_if_index = 0;
613 13 : u64 count = 0;
614 :
615 39 : for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
616 : {
617 89 : vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
618 : {
619 63 : if (NULL != adj_nbr_tables[proto][sw_if_index])
620 : {
621 0 : count += hash_elts(adj_nbr_tables[proto][sw_if_index]);
622 : }
623 : }
624 : }
625 13 : return (count);
626 : }
627 :
628 : /**
629 : * @brief Walk all adjacencies on a link for a given next-hop protocol
630 : */
631 : void
632 339490 : adj_nbr_walk (u32 sw_if_index,
633 : fib_protocol_t adj_nh_proto,
634 : adj_walk_cb_t cb,
635 : void *ctx)
636 : {
637 : adj_index_t ai, *ais, *aip;
638 : adj_nbr_key_t *key;
639 :
640 636040 : ADJ_NBR_ASSERT_NH_PROTO (adj_nh_proto,);
641 :
642 339490 : if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
643 296550 : return;
644 :
645 42940 : ais = NULL;
646 :
647 : /* elements may be removed from the table during the walk, so
648 : * collect the set first then process them */
649 2840890 : hash_foreach_mem (key, ai, adj_nbr_tables[adj_nh_proto][sw_if_index],
650 : ({
651 : vec_add1(ais, ai);
652 : }));
653 :
654 92286 : vec_foreach(aip, ais)
655 : {
656 : /* An adj may be deleted during the walk so check first */
657 49346 : if (!pool_is_free_index(adj_pool, *aip))
658 49329 : cb(*aip, ctx);
659 : }
660 42940 : vec_free(ais);
661 : }
662 :
663 : /**
664 : * @brief Walk adjacencies on a link with a given v4 next-hop.
665 : * that is visit the adjacencies with different link types.
666 : */
667 : void
668 13628 : adj_nbr_walk_nh4 (u32 sw_if_index,
669 : const ip4_address_t *addr,
670 : adj_walk_cb_t cb,
671 : void *ctx)
672 : {
673 13628 : if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP4, sw_if_index))
674 0 : return;
675 :
676 13628 : ip46_address_t nh = {
677 : .ip4 = *addr,
678 : };
679 : vnet_link_t linkt;
680 : adj_index_t ai;
681 :
682 95396 : FOR_EACH_VNET_LINK(linkt)
683 : {
684 81768 : ai = adj_nbr_find (FIB_PROTOCOL_IP4, linkt, &nh, sw_if_index);
685 :
686 81768 : if (INDEX_INVALID != ai)
687 13628 : cb(ai, ctx);
688 : }
689 : }
690 :
691 : /**
692 : * @brief Walk adjacencies on a link with a given v6 next-hop.
693 : * that is visit the adjacencies with different link types.
694 : */
695 : void
696 10986 : adj_nbr_walk_nh6 (u32 sw_if_index,
697 : const ip6_address_t *addr,
698 : adj_walk_cb_t cb,
699 : void *ctx)
700 : {
701 10986 : if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP6, sw_if_index))
702 0 : return;
703 :
704 10986 : ip46_address_t nh = {
705 : .ip6 = *addr,
706 : };
707 : vnet_link_t linkt;
708 : adj_index_t ai;
709 :
710 76902 : FOR_EACH_VNET_LINK(linkt)
711 : {
712 65916 : ai = adj_nbr_find (FIB_PROTOCOL_IP6, linkt, &nh, sw_if_index);
713 :
714 65916 : if (INDEX_INVALID != ai)
715 10980 : cb(ai, ctx);
716 : }
717 : }
718 :
719 : /**
720 : * @brief Walk adjacencies on a link with a given next-hop.
721 : * that is visit the adjacencies with different link types.
722 : */
723 : void
724 24624 : adj_nbr_walk_nh (u32 sw_if_index,
725 : fib_protocol_t adj_nh_proto,
726 : const ip46_address_t *nh,
727 : adj_walk_cb_t cb,
728 : void *ctx)
729 : {
730 24624 : ADJ_NBR_ASSERT_NH_PROTO (adj_nh_proto,);
731 :
732 24624 : if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
733 10 : return;
734 :
735 24614 : switch (adj_nh_proto)
736 : {
737 13628 : case FIB_PROTOCOL_IP4:
738 13628 : adj_nbr_walk_nh4(sw_if_index, &nh->ip4, cb, ctx);
739 13628 : break;
740 10986 : case FIB_PROTOCOL_IP6:
741 10986 : adj_nbr_walk_nh6(sw_if_index, &nh->ip6, cb, ctx);
742 10986 : break;
743 0 : case FIB_PROTOCOL_MPLS:
744 0 : ASSERT(0);
745 0 : break;
746 : }
747 : }
748 :
749 : /**
750 : * Flags associated with the interface state walks
751 : */
752 : typedef enum adj_nbr_interface_flags_t_
753 : {
754 : ADJ_NBR_INTERFACE_UP = (1 << 0),
755 : } adj_nbr_interface_flags_t;
756 :
757 : /**
758 : * Context for the state change walk of the DB
759 : */
760 : typedef struct adj_nbr_interface_state_change_ctx_t_
761 : {
762 : /**
763 : * Flags on the interface
764 : */
765 : adj_nbr_interface_flags_t flags;
766 : } adj_nbr_interface_state_change_ctx_t;
767 :
768 : static adj_walk_rc_t
769 1997 : adj_nbr_interface_state_change_one (adj_index_t ai,
770 : void *arg)
771 : {
772 : /*
773 : * Back walk the graph to inform the forwarding entries
774 : * that this interface state has changed. Do this synchronously
775 : * since this is the walk that provides convergence
776 : */
777 1997 : adj_nbr_interface_state_change_ctx_t *ctx = arg;
778 3994 : fib_node_back_walk_ctx_t bw_ctx = {
779 1997 : .fnbw_reason = ((ctx->flags & ADJ_NBR_INTERFACE_UP) ?
780 1997 : FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
781 : FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
782 : /*
783 : * the force sync applies only as far as the first fib_entry.
784 : * And it's the fib_entry's we need to converge away from
785 : * the adjacencies on the now down link
786 : */
787 1997 : .fnbw_flags = (!(ctx->flags & ADJ_NBR_INTERFACE_UP) ?
788 1997 : FIB_NODE_BW_FLAG_FORCE_SYNC :
789 : FIB_NODE_BW_FLAG_NONE),
790 : };
791 : ip_adjacency_t *adj;
792 :
793 1997 : adj_lock (ai);
794 :
795 1997 : adj = adj_get(ai);
796 1997 : adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
797 1997 : fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
798 :
799 : /*
800 : * fib_walk_sync may allocate a new adjacency and potentially cause a
801 : * realloc for adj_pool. When that happens, adj pointer is no longer
802 : * valid here. We refresh the adj pointer accordingly.
803 : */
804 1997 : adj = adj_get(ai);
805 1997 : adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
806 :
807 1997 : adj_unlock (ai);
808 1997 : return (ADJ_WALK_RC_CONTINUE);
809 : }
810 :
811 : /**
812 : * @brief Registered function for SW interface state changes
813 : */
814 : static clib_error_t *
815 13268 : adj_nbr_sw_interface_state_change (vnet_main_t * vnm,
816 : u32 sw_if_index,
817 : u32 flags)
818 : {
819 : fib_protocol_t proto;
820 :
821 : /*
822 : * walk each adj on the interface and trigger a walk from that adj
823 : */
824 39804 : for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
825 : {
826 26536 : adj_nbr_interface_state_change_ctx_t ctx = {
827 : .flags = ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
828 26536 : ADJ_NBR_INTERFACE_UP :
829 : 0),
830 : };
831 :
832 26536 : adj_nbr_walk(sw_if_index, proto,
833 : adj_nbr_interface_state_change_one,
834 : &ctx);
835 : }
836 :
837 13268 : return (NULL);
838 : }
839 :
840 1119 : VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION_PRIO(
841 : adj_nbr_sw_interface_state_change,
842 : VNET_ITF_FUNC_PRIORITY_HIGH);
843 :
844 : /**
845 : * @brief Invoked on each SW interface of a HW interface when the
846 : * HW interface state changes
847 : */
848 : static walk_rc_t
849 13182 : adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm,
850 : u32 sw_if_index,
851 : void *arg)
852 : {
853 13182 : adj_nbr_interface_state_change_ctx_t *ctx = arg;
854 : fib_protocol_t proto;
855 :
856 : /*
857 : * walk each adj on the interface and trigger a walk from that adj
858 : */
859 39546 : for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
860 : {
861 26364 : adj_nbr_walk(sw_if_index, proto,
862 : adj_nbr_interface_state_change_one,
863 : ctx);
864 : }
865 13182 : return (WALK_CONTINUE);
866 : }
867 :
868 : /**
869 : * @brief Registered callback for HW interface state changes
870 : */
871 : static clib_error_t *
872 13092 : adj_nbr_hw_interface_state_change (vnet_main_t * vnm,
873 : u32 hw_if_index,
874 : u32 flags)
875 : {
876 : /*
877 : * walk SW interface on the HW
878 : */
879 13092 : adj_nbr_interface_state_change_ctx_t ctx = {
880 : .flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
881 13092 : ADJ_NBR_INTERFACE_UP :
882 : 0),
883 : };
884 :
885 13092 : vnet_hw_interface_walk_sw(vnm, hw_if_index,
886 : adj_nbr_hw_sw_interface_state_change,
887 : &ctx);
888 :
889 13092 : return (NULL);
890 : }
891 :
892 1119 : VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(
893 : adj_nbr_hw_interface_state_change,
894 : VNET_ITF_FUNC_PRIORITY_HIGH);
895 :
896 : static adj_walk_rc_t
897 51 : adj_nbr_interface_delete_one (adj_index_t ai,
898 : void *arg)
899 : {
900 : /*
901 : * Back walk the graph to inform the forwarding entries
902 : * that this interface has been deleted.
903 : */
904 51 : fib_node_back_walk_ctx_t bw_ctx = {
905 : .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
906 : };
907 : ip_adjacency_t *adj;
908 :
909 51 : adj_lock(ai);
910 :
911 51 : adj = adj_get(ai);
912 51 : adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
913 51 : fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
914 :
915 : /*
916 : * fib_walk_sync may allocate a new adjacency and potentially cause a
917 : * realloc for adj_pool. When that happens, adj pointer is no longer
918 : * valid here. We refresh the adj pointer accordingly.
919 : */
920 51 : adj = adj_get(ai);
921 51 : adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
922 :
923 51 : adj_unlock(ai);
924 51 : return (ADJ_WALK_RC_CONTINUE);
925 : }
926 :
927 : /**
928 : * adj_nbr_interface_add_del
929 : *
930 : * Registered to receive interface Add and delete notifications
931 : */
932 : static clib_error_t *
933 11597 : adj_nbr_interface_add_del (vnet_main_t * vnm,
934 : u32 sw_if_index,
935 : u32 is_add)
936 : {
937 : fib_protocol_t proto;
938 :
939 11597 : if (is_add)
940 : {
941 : /*
942 : * not interested in interface additions. we will not back walk
943 : * to resolve paths through newly added interfaces. Why? The control
944 : * plane should have the brains to add interfaces first, then routes.
945 : * So the case where there are paths with a interface that matches
946 : * one just created is the case where the path resolved through an
947 : * interface that was deleted, and still has not been removed. The
948 : * new interface added, is NO GUARANTEE that the interface being
949 : * added now, even though it may have the same sw_if_index, is the
950 : * same interface that the path needs. So tough!
951 : * If the control plane wants these routes to resolve it needs to
952 : * remove and add them again.
953 : */
954 7418 : return (NULL);
955 : }
956 :
957 12537 : for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
958 : {
959 8358 : adj_nbr_walk(sw_if_index, proto,
960 : adj_nbr_interface_delete_one,
961 : NULL);
962 : }
963 :
964 4179 : return (NULL);
965 : }
966 :
967 3363 : VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
968 :
969 :
970 : static adj_walk_rc_t
971 252 : adj_nbr_ethernet_mac_change_one (adj_index_t ai,
972 : void *arg)
973 : {
974 252 : vnet_update_adjacency_for_sw_interface(vnet_get_main(),
975 : adj_get_sw_if_index(ai),
976 : ai);
977 :
978 252 : return (ADJ_WALK_RC_CONTINUE);
979 : }
980 :
981 : /**
982 : * Callback function invoked when an interface's MAC Address changes
983 : */
984 : static void
985 29 : adj_nbr_ethernet_change_mac (ethernet_main_t * em,
986 : u32 sw_if_index, uword opaque)
987 : {
988 : fib_protocol_t proto;
989 :
990 87 : FOR_EACH_FIB_IP_PROTOCOL(proto)
991 : {
992 58 : adj_nbr_walk(sw_if_index, proto,
993 : adj_nbr_ethernet_mac_change_one,
994 : NULL);
995 : }
996 29 : }
997 :
998 : static adj_walk_rc_t
999 9 : adj_nbr_show_one (adj_index_t ai,
1000 : void *arg)
1001 : {
1002 9 : vlib_cli_output (arg, "[@%d] %U",
1003 : ai,
1004 : format_ip_adjacency, ai,
1005 : FORMAT_IP_ADJACENCY_NONE);
1006 :
1007 9 : return (ADJ_WALK_RC_CONTINUE);
1008 : }
1009 :
1010 : static clib_error_t *
1011 12 : adj_nbr_show (vlib_main_t * vm,
1012 : unformat_input_t * input,
1013 : vlib_cli_command_t * cmd)
1014 : {
1015 12 : adj_index_t ai = ADJ_INDEX_INVALID;
1016 12 : ip46_address_t nh = ip46_address_initializer;
1017 12 : u32 sw_if_index = ~0;
1018 :
1019 24 : while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1020 : {
1021 12 : if (unformat (input, "%U",
1022 : unformat_vnet_sw_interface, vnet_get_main(),
1023 : &sw_if_index))
1024 : ;
1025 6 : else if (unformat (input, "%U",
1026 : unformat_ip46_address, &nh, IP46_TYPE_ANY))
1027 : ;
1028 0 : else if (unformat (input, "%d", &ai))
1029 : ;
1030 : else
1031 0 : break;
1032 : }
1033 :
1034 12 : if (ADJ_INDEX_INVALID != ai)
1035 : {
1036 0 : vlib_cli_output (vm, "[@%d] %U",
1037 : ai,
1038 : format_ip_adjacency, ai,
1039 : FORMAT_IP_ADJACENCY_DETAIL);
1040 : }
1041 12 : else if (~0 != sw_if_index)
1042 : {
1043 : fib_protocol_t proto;
1044 :
1045 6 : if (ip46_address_is_zero(&nh))
1046 : {
1047 0 : for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
1048 : {
1049 0 : adj_nbr_walk(sw_if_index, proto,
1050 : adj_nbr_show_one,
1051 : vm);
1052 : }
1053 : }
1054 : else
1055 : {
1056 6 : proto = (ip46_address_is_ip4(&nh) ?
1057 6 : FIB_PROTOCOL_IP4 :
1058 : FIB_PROTOCOL_IP6);
1059 6 : adj_nbr_walk_nh(sw_if_index, proto, &nh,
1060 : adj_nbr_show_one,
1061 : vm);
1062 : }
1063 : }
1064 : else
1065 : {
1066 : fib_protocol_t proto;
1067 :
1068 18 : for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
1069 : {
1070 28 : vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
1071 : {
1072 16 : adj_nbr_walk(sw_if_index, proto,
1073 : adj_nbr_show_one,
1074 : vm);
1075 : }
1076 : }
1077 : }
1078 :
1079 12 : return 0;
1080 : }
1081 :
1082 : /*?
1083 : * Show all neighbour adjacencies.
1084 : * @cliexpar
1085 : * @cliexstart{sh adj nbr}
1086 : * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
1087 : * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
1088 : * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
1089 : * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
1090 : * @cliexend
1091 : ?*/
1092 272887 : VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
1093 : .path = "show adj nbr",
1094 : .short_help = "show adj nbr [<adj_index>] [interface]",
1095 : .function = adj_nbr_show,
1096 : };
1097 :
1098 : u8*
1099 2125 : format_adj_nbr_incomplete (u8* s, va_list *ap)
1100 : {
1101 2125 : index_t index = va_arg(*ap, index_t);
1102 2125 : CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
1103 2125 : vnet_main_t * vnm = vnet_get_main();
1104 2125 : ip_adjacency_t * adj = adj_get(index);
1105 :
1106 2125 : s = format (s, "arp-%U", format_vnet_link, adj->ia_link);
1107 2125 : s = format (s, ": via %U",
1108 : format_ip46_address, &adj->sub_type.nbr.next_hop,
1109 2125 : adj_proto_to_46(adj->ia_nh_proto));
1110 2125 : s = format (s, " %U",
1111 : format_vnet_sw_if_index_name,
1112 : vnm, adj->rewrite_header.sw_if_index);
1113 :
1114 2125 : return (s);
1115 : }
1116 :
1117 : u8*
1118 317953 : format_adj_nbr (u8* s, va_list *ap)
1119 : {
1120 317953 : index_t index = va_arg(*ap, index_t);
1121 317953 : CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
1122 317953 : ip_adjacency_t * adj = adj_get(index);
1123 :
1124 317953 : s = format (s, "%U", format_vnet_link, adj->ia_link);
1125 317953 : s = format (s, " via %U ",
1126 : format_ip46_address, &adj->sub_type.nbr.next_hop,
1127 317953 : adj_proto_to_46(adj->ia_nh_proto));
1128 317953 : s = format (s, "%U",
1129 : format_vnet_rewrite,
1130 : &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
1131 :
1132 317953 : return (s);
1133 : }
1134 :
1135 : static void
1136 103168 : adj_dpo_lock (dpo_id_t *dpo)
1137 : {
1138 103168 : adj_lock(dpo->dpoi_index);
1139 103168 : }
1140 : static void
1141 94703 : adj_dpo_unlock (dpo_id_t *dpo)
1142 : {
1143 94703 : adj_unlock(dpo->dpoi_index);
1144 94703 : }
1145 :
1146 : static void
1147 0 : adj_mem_show (void)
1148 : {
1149 0 : fib_show_memory_usage("Adjacency",
1150 0 : pool_elts(adj_pool),
1151 0 : pool_len(adj_pool),
1152 : sizeof(ip_adjacency_t));
1153 0 : }
1154 :
1155 : const static dpo_vft_t adj_nbr_dpo_vft = {
1156 : .dv_lock = adj_dpo_lock,
1157 : .dv_unlock = adj_dpo_unlock,
1158 : .dv_format = format_adj_nbr,
1159 : .dv_mem_show = adj_mem_show,
1160 : .dv_get_urpf = adj_dpo_get_urpf,
1161 : .dv_get_mtu = adj_dpo_get_mtu,
1162 : };
1163 : const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
1164 : .dv_lock = adj_dpo_lock,
1165 : .dv_unlock = adj_dpo_unlock,
1166 : .dv_format = format_adj_nbr_incomplete,
1167 : .dv_get_urpf = adj_dpo_get_urpf,
1168 : .dv_get_mtu = adj_dpo_get_mtu,
1169 : };
1170 :
1171 : /**
1172 : * @brief The per-protocol VLIB graph nodes that are assigned to an adjacency
1173 : * object.
1174 : *
1175 : * this means that these graph nodes are ones from which a nbr is the
1176 : * parent object in the DPO-graph.
1177 : */
1178 : const static char* const nbr_ip4_nodes[] =
1179 : {
1180 : "ip4-rewrite",
1181 : NULL,
1182 : };
1183 : const static char* const nbr_ip6_nodes[] =
1184 : {
1185 : "ip6-rewrite",
1186 : NULL,
1187 : };
1188 : const static char* const nbr_mpls_nodes[] =
1189 : {
1190 : "mpls-output",
1191 : NULL,
1192 : };
1193 : const static char* const nbr_ethernet_nodes[] =
1194 : {
1195 : "adj-l2-rewrite",
1196 : NULL,
1197 : };
1198 : const static char* const * const nbr_nodes[DPO_PROTO_NUM] =
1199 : {
1200 : [DPO_PROTO_IP4] = nbr_ip4_nodes,
1201 : [DPO_PROTO_IP6] = nbr_ip6_nodes,
1202 : [DPO_PROTO_MPLS] = nbr_mpls_nodes,
1203 : [DPO_PROTO_ETHERNET] = nbr_ethernet_nodes,
1204 : };
1205 :
1206 : const static char* const nbr_incomplete_ip4_nodes[] =
1207 : {
1208 : "ip4-arp",
1209 : NULL,
1210 : };
1211 : const static char* const nbr_incomplete_ip6_nodes[] =
1212 : {
1213 : "ip6-discover-neighbor",
1214 : NULL,
1215 : };
1216 : const static char* const nbr_incomplete_mpls_nodes[] =
1217 : {
1218 : "mpls-adj-incomplete",
1219 : NULL,
1220 : };
1221 :
1222 : const static char* const * const nbr_incomplete_nodes[DPO_PROTO_NUM] =
1223 : {
1224 : [DPO_PROTO_IP4] = nbr_incomplete_ip4_nodes,
1225 : [DPO_PROTO_IP6] = nbr_incomplete_ip6_nodes,
1226 : [DPO_PROTO_MPLS] = nbr_incomplete_mpls_nodes,
1227 : };
1228 :
1229 : void
1230 559 : adj_nbr_module_init (void)
1231 : {
1232 559 : dpo_register(DPO_ADJACENCY,
1233 : &adj_nbr_dpo_vft,
1234 : nbr_nodes);
1235 559 : dpo_register(DPO_ADJACENCY_INCOMPLETE,
1236 : &adj_nbr_incompl_dpo_vft,
1237 : nbr_incomplete_nodes);
1238 :
1239 559 : ethernet_address_change_ctx_t ctx = {
1240 : .function = adj_nbr_ethernet_change_mac,
1241 : .function_opaque = 0,
1242 : };
1243 559 : vec_add1 (ethernet_main.address_change_callbacks, ctx);
1244 559 : }
|