Line data Source code
1 : /*
2 : * Copyright (c) 2017 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 : #ifndef __included_vnet_bonding_node_h__
16 : #define __included_vnet_bonding_node_h__
17 :
18 : #include <vlib/vlib.h>
19 : #include <vlib/unix/unix.h>
20 : #include <vppinfra/format.h>
21 : #include <vppinfra/hash.h>
22 : #include <vnet/ethernet/ethernet.h>
23 : #include <vnet/interface.h>
24 : #include <vnet/hash/hash.h>
25 :
26 : #define LACP_FAST_PERIODIC_TIMER 1.0
27 : #define LACP_SHORT_TIMOUT_TIME (LACP_FAST_PERIODIC_TIMER * 3)
28 : #define LACP_SLOW_PERIODIC_TIMER 30.0
29 : #define LACP_LONG_TIMOUT_TIME (LACP_SLOW_PERIODIC_TIMER * 3)
30 :
31 : #ifndef MIN
32 : #define MIN(x,y) (((x)<(y))?(x):(y))
33 : #endif
34 :
35 : #define BOND_MODULO_SHORTCUT(a) \
36 : (is_pow2 (a))
37 :
38 : #define foreach_bond_mode \
39 : _ (1, ROUND_ROBIN, "round-robin") \
40 : _ (2, ACTIVE_BACKUP, "active-backup") \
41 : _ (3, XOR, "xor") \
42 : _ (4, BROADCAST, "broadcast") \
43 : _ (5, LACP, "lacp")
44 :
45 : typedef enum
46 : {
47 : #define _(v, f, s) BOND_MODE_##f = v,
48 : foreach_bond_mode
49 : #undef _
50 : } bond_mode_t;
51 :
52 : /* configurable load-balances */
53 : #define foreach_bond_lb \
54 : _ (2, L23, "l23", l23) \
55 : _ (1, L34 , "l34", l34) \
56 : _ (0, L2, "l2", l2)
57 :
58 : /* load-balance functions implemented in bond-output */
59 : #define foreach_bond_lb_algo \
60 : _ (0, L2, "l2", l2) \
61 : _ (1, L34 , "l34", l34) \
62 : _ (2, L23, "l23", l23) \
63 : _ (3, RR, "round-robin", round_robin) \
64 : _ (4, BC, "broadcast", broadcast) \
65 : _ (5, AB, "active-backup", active_backup)
66 :
67 : typedef enum
68 : {
69 : #define _(v, f, s, p) BOND_LB_##f = v,
70 : foreach_bond_lb_algo
71 : #undef _
72 : } bond_load_balance_t;
73 :
74 : typedef enum
75 : {
76 : BOND_SEND_GARP_NA = 1,
77 : } bond_send_garp_na_process_event_t;
78 :
79 : typedef struct
80 : {
81 : u32 id;
82 : u8 hw_addr_set;
83 : u8 hw_addr[6];
84 : u8 mode;
85 : u8 lb;
86 : u8 numa_only;
87 : u8 gso;
88 : /* return */
89 : u32 sw_if_index;
90 : int rv;
91 : clib_error_t *error;
92 : } bond_create_if_args_t;
93 :
94 : typedef struct
95 : {
96 : /* member's sw_if_index */
97 : u32 member;
98 : /* bond's sw_if_index */
99 : u32 group;
100 : u8 is_passive;
101 : u8 is_long_timeout;
102 : /* return */
103 : int rv;
104 : clib_error_t *error;
105 : } bond_add_member_args_t;
106 :
107 : typedef struct
108 : {
109 : u32 member;
110 : /* return */
111 : int rv;
112 : clib_error_t *error;
113 : } bond_detach_member_args_t;
114 :
115 : typedef struct
116 : {
117 : u32 sw_if_index;
118 : u32 weight;
119 : /* return */
120 : int rv;
121 : clib_error_t *error;
122 : } bond_set_intf_weight_args_t;
123 :
124 : /** BOND interface details struct */
125 : typedef struct
126 : {
127 : u32 sw_if_index;
128 : u32 id;
129 : u8 interface_name[64];
130 : u32 mode;
131 : u32 lb;
132 : u8 numa_only;
133 : u32 active_members;
134 : u32 members;
135 : } bond_interface_details_t;
136 :
137 : /** member interface details struct */
138 : typedef struct
139 : {
140 : u32 sw_if_index;
141 : u8 interface_name[64];
142 : u8 is_passive;
143 : u8 is_long_timeout;
144 : u8 is_local_numa;
145 : u32 weight;
146 : u32 active_members;
147 : } member_interface_details_t;
148 :
149 : typedef CLIB_PACKED (struct
150 : {
151 : u16 system_priority;
152 : u8 system[6];
153 : u16 key; u16 port_priority; u16 port_number;
154 : u8 state;
155 : }) lacp_port_info_t;
156 :
157 : typedef struct
158 : {
159 : CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
160 : u32 buffers[VLIB_FRAME_SIZE];
161 : u32 n_buffers;
162 : } bond_per_port_queue_t;
163 :
164 : typedef struct
165 : {
166 : bond_per_port_queue_t *per_port_queue;
167 : void **data;
168 : } bond_per_thread_data_t;
169 :
170 : typedef struct
171 : {
172 : u8 admin_up;
173 : u8 mode;
174 : u8 lb;
175 :
176 : /* the last member index for the rr lb */
177 : u32 lb_rr_last_index;
178 :
179 : /* Real device instance in interface vector */
180 : u32 dev_instance;
181 :
182 : /* Interface ID being shown to user */
183 : u32 id;
184 :
185 : u32 hw_if_index;
186 : u32 sw_if_index;
187 :
188 : /* Configured members */
189 : u32 *members;
190 :
191 : /* Members that are in DISTRIBUTING state */
192 : u32 *active_members;
193 :
194 : lacp_port_info_t partner;
195 : lacp_port_info_t actor;
196 : u8 individual_aggregator;
197 :
198 : /* If the flag numa_only is set, it means that only members
199 : on local numa node works for lacp mode if have at least one,
200 : otherwise it works as usual. */
201 : u8 numa_only;
202 : u8 gso;
203 :
204 : /* How many members on local numa node are there in lacp mode? */
205 : word n_numa_members;
206 :
207 : u32 group;
208 : uword *port_number_bitmap;
209 : u8 use_custom_mac;
210 : u8 hw_address[6];
211 :
212 : clib_spinlock_t lockp;
213 : vnet_hash_fn_t hash_func;
214 : } bond_if_t;
215 :
216 : typedef struct
217 : {
218 : u8 persistent_hw_address[6];
219 :
220 : /* neighbor's vlib software interface index */
221 : u32 sw_if_index;
222 :
223 : /* Neighbor time-to-live (usually 3s) */
224 : f32 ttl_in_seconds;
225 :
226 : /* 1 = interface is configured with long timeout (60s) */
227 : u8 is_long_timeout;
228 :
229 : /* 1 = debug is on; 0 = debug is off */
230 : u8 debug;
231 :
232 : /* tx packet template id for this neighbor */
233 : u8 packet_template_index;
234 :
235 : /* Info we actually keep about each neighbor */
236 :
237 : /* Jenkins hash optimization: avoid tlv scan, send short keepalive msg */
238 : u8 last_packet_signature_valid;
239 : uword last_packet_signature;
240 :
241 : /* last received lacp packet, for the J-hash optimization */
242 : u8 *last_rx_pkt;
243 :
244 : /* last marker packet */
245 : u8 *last_marker_pkt;
246 :
247 : /* neighbor vlib hw_if_index */
248 : u32 hw_if_index;
249 :
250 : /* weight -- valid only for active backup */
251 : u32 weight;
252 :
253 : /* actor does not initiate the protocol exchange */
254 : u8 is_passive;
255 :
256 : /* Partner port information */
257 : lacp_port_info_t partner;
258 : lacp_port_info_t partner_admin;;
259 :
260 : /* Actor port information */
261 : lacp_port_info_t actor;
262 : lacp_port_info_t actor_admin;
263 :
264 : /* Need To Transmit flag */
265 : u8 ntt;
266 :
267 : /* Link has been established and Aggregate Port is operable */
268 : u8 port_enabled;
269 :
270 : /* Initialization or reinitialization of the lacp protocol entity */
271 : u8 begin;
272 :
273 : /* Aggregation Port is operating the lacp */
274 : u8 lacp_enabled;
275 :
276 : /* MUX to indicate to the Selection Logic wait_while_timer expired */
277 : u8 ready_n;
278 :
279 : /* Selection Logic indicates al Aggregation Ports attached */
280 : u8 ready;
281 :
282 : /* Selection Logic selected an Aggregator */
283 : int selected;
284 :
285 : /* RX machine indicates an Aggregation Port in PORT_DISABLED state */
286 : u8 port_moved;
287 :
288 : /* timer used to detect whether received protocol information has expired */
289 : f64 current_while_timer;
290 :
291 : /* timer used to detect actor churn states */
292 : f64 actor_churn_timer;
293 :
294 : /* time last lacpdu was sent */
295 : f64 last_lacpdu_sent_time;
296 :
297 : /* time last lacpdu was received */
298 : f64 last_lacpdu_recd_time;
299 :
300 : /* time last marker pdu was sent */
301 : f64 last_marker_pdu_sent_time;
302 :
303 : /* time last marker pdu was received */
304 : f64 last_marker_pdu_recd_time;
305 :
306 : /* timer used to generate periodic transmission */
307 : f64 periodic_timer;
308 :
309 : /* timer used to detect partner churn states */
310 : f64 partner_churn_timer;
311 :
312 : /* provides hysteresis before performing an aggregation change */
313 : f64 wait_while_timer;
314 :
315 : /* Implemention variables, not in the spec */
316 : int rx_state;
317 : int tx_state;
318 : int mux_state;
319 : int ptx_state;
320 :
321 : /* actor admin key */
322 : u32 group;
323 :
324 : u32 marker_tx_id;
325 :
326 : u32 bif_dev_instance;
327 :
328 : u8 loopback_port;
329 :
330 : /* bond mode */
331 : u8 mode;
332 :
333 : /* good lacp pdu received */
334 : u64 pdu_received;
335 :
336 : /* bad lacp pdu received */
337 : u64 bad_pdu_received;
338 :
339 : /* pdu sent */
340 : u64 pdu_sent;
341 :
342 : /* good marker pdu received */
343 : u64 marker_pdu_received;
344 :
345 : /* bad marker pdu received */
346 : u64 marker_bad_pdu_received;
347 :
348 : /* pdu sent */
349 : u64 marker_pdu_sent;
350 :
351 : /* member is numa node */
352 : u8 is_local_numa;
353 : } member_if_t;
354 :
355 : typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif,
356 : member_if_t * mif, u8 enable);
357 :
358 : typedef struct
359 : {
360 : u32 partner_state;
361 : u32 actor_state;
362 : } lacp_stats_t;
363 :
364 : typedef struct
365 : {
366 : /* pool of bonding interfaces */
367 : bond_if_t *interfaces;
368 :
369 : /* record used interface IDs */
370 : uword *id_used;
371 :
372 : /* pool of member interfaces */
373 : member_if_t *neighbors;
374 :
375 : /* rapidly find a bond by vlib software interface index */
376 : uword *bond_by_sw_if_index;
377 :
378 : /* convenience variables */
379 : vlib_main_t *vlib_main;
380 : vnet_main_t *vnet_main;
381 :
382 : /* lacp plugin is loaded */
383 : u8 lacp_plugin_loaded;
384 :
385 : lacp_enable_disable_func lacp_enable_disable;
386 :
387 : uword *member_by_sw_if_index;
388 :
389 : bond_per_thread_data_t *per_thread_data;
390 :
391 : lacp_stats_t **stats;
392 : } bond_main_t;
393 :
394 : /* bond packet trace capture */
395 : typedef struct
396 : {
397 : ethernet_header_t ethernet;
398 : u32 sw_if_index;
399 : u32 bond_sw_if_index;
400 : } bond_packet_trace_t;
401 :
402 : typedef u32 (*load_balance_func) (vlib_main_t * vm,
403 : vlib_node_runtime_t * node, bond_if_t * bif,
404 : vlib_buffer_t * b0, uword member_count);
405 :
406 : typedef struct
407 : {
408 : load_balance_func load_balance;
409 : } bond_load_balance_func_t;
410 :
411 : extern vlib_node_registration_t bond_input_node;
412 : extern vlib_node_registration_t bond_process_node;
413 : extern vnet_device_class_t bond_dev_class;
414 : extern bond_main_t bond_main;
415 :
416 : void bond_disable_collecting_distributing (vlib_main_t * vm,
417 : member_if_t * mif);
418 : void bond_enable_collecting_distributing (vlib_main_t * vm,
419 : member_if_t * mif);
420 : u8 *format_bond_interface_name (u8 * s, va_list * args);
421 :
422 : void bond_set_intf_weight (vlib_main_t * vm,
423 : bond_set_intf_weight_args_t * args);
424 : void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args);
425 : int bond_delete_if (vlib_main_t * vm, u32 sw_if_index);
426 : void bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args);
427 : void bond_detach_member (vlib_main_t * vm, bond_detach_member_args_t * args);
428 : int bond_dump_ifs (bond_interface_details_t ** out_bondids);
429 : int bond_dump_member_ifs (member_interface_details_t ** out_memberids,
430 : u32 bond_sw_if_index);
431 :
432 : static inline uword
433 0 : unformat_bond_mode (unformat_input_t * input, va_list * args)
434 : {
435 0 : u8 *r = va_arg (*args, u8 *);
436 :
437 : if (0);
438 : #define _(v, f, s) else if (unformat (input, s)) *r = BOND_MODE_##f;
439 0 : foreach_bond_mode
440 : #undef _
441 : else
442 0 : return 0;
443 :
444 0 : return 1;
445 : }
446 :
447 : static inline u8 *
448 0 : format_bond_mode (u8 * s, va_list * args)
449 : {
450 0 : u32 i = va_arg (*args, u32);
451 0 : u8 *t = 0;
452 :
453 0 : switch (i)
454 : {
455 : #define _(v, f, s) case BOND_MODE_##f: t = (u8 *) s; break;
456 0 : foreach_bond_mode
457 : #undef _
458 0 : default:
459 0 : return format (s, "unknown");
460 : }
461 0 : return format (s, "%s", t);
462 : }
463 :
464 : static inline uword
465 0 : unformat_bond_load_balance (unformat_input_t * input, va_list * args)
466 : {
467 0 : u8 *r = va_arg (*args, u8 *);
468 :
469 : if (0);
470 : #define _(v, f, s, p) else if (unformat (input, s)) *r = BOND_LB_##f;
471 0 : foreach_bond_lb
472 : #undef _
473 : else
474 0 : return 0;
475 :
476 0 : return 1;
477 : }
478 :
479 : static inline u8 *
480 0 : format_bond_load_balance (u8 * s, va_list * args)
481 : {
482 0 : u32 i = va_arg (*args, u32);
483 0 : u8 *t = 0;
484 :
485 0 : switch (i)
486 : {
487 : #define _(v, f, s, p) case BOND_LB_##f: t = (u8 *) s; break;
488 0 : foreach_bond_lb_algo
489 : #undef _
490 0 : default:
491 0 : return format (s, "unknown");
492 : }
493 0 : return format (s, "%s", t);
494 : }
495 :
496 : static inline void
497 559 : bond_register_callback (lacp_enable_disable_func func)
498 : {
499 559 : bond_main_t *bm = &bond_main;
500 :
501 559 : bm->lacp_plugin_loaded = 1;
502 559 : bm->lacp_enable_disable = func;
503 559 : }
504 :
505 : static inline bond_if_t *
506 30 : bond_get_bond_if_by_sw_if_index (u32 sw_if_index)
507 : {
508 30 : bond_main_t *bm = &bond_main;
509 : uword *p;
510 :
511 30 : p = hash_get (bm->bond_by_sw_if_index, sw_if_index);
512 30 : if (!p)
513 : {
514 0 : return 0;
515 : }
516 30 : return pool_elt_at_index (bm->interfaces, p[0]);
517 : }
518 :
519 : static inline bond_if_t *
520 162 : bond_get_bond_if_by_dev_instance (u32 dev_instance)
521 : {
522 162 : bond_main_t *bm = &bond_main;
523 :
524 162 : return pool_elt_at_index (bm->interfaces, dev_instance);
525 : }
526 :
527 : static inline member_if_t *
528 57038 : bond_get_member_by_sw_if_index (u32 sw_if_index)
529 : {
530 57038 : bond_main_t *bm = &bond_main;
531 57038 : member_if_t *mif = 0;
532 : uword p;
533 :
534 57038 : if (sw_if_index < vec_len (bm->member_by_sw_if_index))
535 : {
536 9005 : p = bm->member_by_sw_if_index[sw_if_index];
537 9005 : if (p)
538 128 : mif = pool_elt_at_index (bm->neighbors, p >> 1);
539 : }
540 :
541 57038 : return mif;
542 : }
543 :
544 : #endif /* __included_vnet_bonding_node_h__ */
545 :
546 : /*
547 : * fd.io coding-style-patch-verification: ON
548 : *
549 : * Local Variables:
550 : * eval: (c-set-style "gnu")
551 : * End:
552 : */
|