Line data Source code
1 : /*
2 : *------------------------------------------------------------------
3 : * Copyright (c) 2017 Cisco and/or its affiliates.
4 : * Licensed under the Apache License, Version 2.0 (the "License");
5 : * you may not use this file except in compliance with the License.
6 : * You may obtain a copy of the License at:
7 : *
8 : * http://www.apache.org/licenses/LICENSE-2.0
9 : *
10 : * Unless required by applicable law or agreed to in writing, software
11 : * distributed under the License is distributed on an "AS IS" BASIS,
12 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 : * See the License for the specific language governing permissions and
14 : * limitations under the License.
15 : *------------------------------------------------------------------
16 : */
17 :
18 : #define _GNU_SOURCE
19 : #include <stdint.h>
20 : #include <vnet/llc/llc.h>
21 : #include <vnet/snap/snap.h>
22 : #include <vnet/bonding/node.h>
23 :
24 : #ifndef CLIB_MARCH_VARIANT
25 : bond_main_t bond_main;
26 : #endif /* CLIB_MARCH_VARIANT */
27 :
28 : #define foreach_bond_input_error \
29 : _(NONE, "no error") \
30 : _(IF_DOWN, "interface down") \
31 : _(PASSIVE_IF, "traffic received on passive interface") \
32 : _(PASS_THRU, "pass through (CDP, LLDP, slow protocols)")
33 :
34 : typedef enum
35 : {
36 : #define _(f,s) BOND_INPUT_ERROR_##f,
37 : foreach_bond_input_error
38 : #undef _
39 : BOND_INPUT_N_ERROR,
40 : } bond_input_error_t;
41 :
42 : static char *bond_input_error_strings[] = {
43 : #define _(n,s) s,
44 : foreach_bond_input_error
45 : #undef _
46 : };
47 :
48 : static u8 *
49 2 : format_bond_input_trace (u8 * s, va_list * args)
50 : {
51 2 : CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
52 2 : CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
53 2 : bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *);
54 :
55 2 : s = format (s, "src %U, dst %U, %U -> %U",
56 2 : format_ethernet_address, t->ethernet.src_address,
57 2 : format_ethernet_address, t->ethernet.dst_address,
58 : format_vnet_sw_if_index_name, vnet_get_main (),
59 : t->sw_if_index,
60 : format_vnet_sw_if_index_name, vnet_get_main (),
61 : t->bond_sw_if_index);
62 :
63 2 : return s;
64 : }
65 :
66 : typedef enum
67 : {
68 : BOND_INPUT_NEXT_DROP,
69 : BOND_INPUT_N_NEXT,
70 : } bond_output_next_t;
71 :
72 : static_always_inline u8
73 0 : packet_is_cdp (ethernet_header_t * eth)
74 : {
75 : llc_header_t *llc;
76 : snap_header_t *snap;
77 :
78 0 : llc = (llc_header_t *) (eth + 1);
79 0 : snap = (snap_header_t *) (llc + 1);
80 :
81 0 : return ((eth->type == htons (ETHERNET_TYPE_CDP)) ||
82 0 : ((llc->src_sap == 0xAA) && (llc->control == 0x03) &&
83 0 : (snap->protocol == htons (0x2000)) &&
84 0 : (snap->oui[0] == 0) && (snap->oui[1] == 0) &&
85 0 : (snap->oui[2] == 0x0C)));
86 : }
87 :
88 : static inline void
89 22 : bond_sw_if_idx_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node,
90 : vlib_buffer_t * b, u32 bond_sw_if_index,
91 : u32 * n_rx_packets, u32 * n_rx_bytes)
92 : {
93 : u16 *ethertype_p, ethertype;
94 : ethernet_vlan_header_t *vlan;
95 22 : ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b);
96 :
97 22 : (*n_rx_packets)++;
98 22 : *n_rx_bytes += b->current_length;
99 22 : ethertype = clib_mem_unaligned (ð->type, u16);
100 22 : if (!ethernet_frame_is_tagged (ntohs (ethertype)))
101 : {
102 : // Let some layer2 packets pass through.
103 22 : if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
104 : && !packet_is_cdp (eth)
105 : && (ethertype != htons (ETHERNET_TYPE_802_1_LLDP))))
106 : {
107 : /* Change the physical interface to bond interface */
108 0 : vnet_buffer (b)->sw_if_index[VLIB_RX] = bond_sw_if_index;
109 0 : return;
110 : }
111 : }
112 : else
113 : {
114 0 : vlan = (void *) (eth + 1);
115 0 : ethertype_p = &vlan->type;
116 0 : ethertype = clib_mem_unaligned (ethertype_p, u16);
117 0 : if (ethertype == ntohs (ETHERNET_TYPE_VLAN))
118 : {
119 0 : vlan++;
120 0 : ethertype_p = &vlan->type;
121 : }
122 0 : ethertype = clib_mem_unaligned (ethertype_p, u16);
123 0 : if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
124 : && (ethertype != htons (ETHERNET_TYPE_CDP))
125 : && (ethertype != htons (ETHERNET_TYPE_802_1_LLDP))))
126 : {
127 : /* Change the physical interface to bond interface */
128 0 : vnet_buffer (b)->sw_if_index[VLIB_RX] = bond_sw_if_index;
129 0 : return;
130 : }
131 : }
132 :
133 22 : vlib_error_count (vm, node->node_index, BOND_INPUT_ERROR_PASS_THRU, 1);
134 22 : return;
135 : }
136 :
137 : static inline void
138 22 : bond_update_next (vlib_main_t * vm, vlib_node_runtime_t * node,
139 : u32 * last_member_sw_if_index, u32 member_sw_if_index,
140 : u32 * bond_sw_if_index, vlib_buffer_t * b,
141 : u32 * next_index, vlib_error_t * error)
142 : {
143 : member_if_t *mif;
144 : bond_if_t *bif;
145 :
146 22 : *next_index = BOND_INPUT_NEXT_DROP;
147 22 : *error = 0;
148 :
149 22 : if (PREDICT_TRUE (*last_member_sw_if_index == member_sw_if_index))
150 0 : goto next;
151 :
152 22 : *last_member_sw_if_index = member_sw_if_index;
153 :
154 22 : mif = bond_get_member_by_sw_if_index (member_sw_if_index);
155 22 : ALWAYS_ASSERT (mif);
156 :
157 22 : bif = bond_get_bond_if_by_dev_instance (mif->bif_dev_instance);
158 :
159 22 : ALWAYS_ASSERT (bif);
160 22 : ASSERT (vec_len (bif->members));
161 :
162 22 : if (PREDICT_FALSE (bif->admin_up == 0))
163 : {
164 0 : *bond_sw_if_index = member_sw_if_index;
165 0 : *error = node->errors[BOND_INPUT_ERROR_IF_DOWN];
166 : }
167 :
168 22 : if (PREDICT_FALSE ((bif->mode == BOND_MODE_ACTIVE_BACKUP) &&
169 : vec_len (bif->active_members) &&
170 : (member_sw_if_index != bif->active_members[0])))
171 : {
172 0 : *bond_sw_if_index = member_sw_if_index;
173 0 : *error = node->errors[BOND_INPUT_ERROR_PASSIVE_IF];
174 0 : return;
175 : }
176 :
177 22 : *bond_sw_if_index = bif->sw_if_index;
178 :
179 22 : next:
180 22 : vnet_feature_next (next_index, b);
181 : }
182 :
183 : static_always_inline void
184 0 : bond_update_next_x4 (vlib_buffer_t * b0, vlib_buffer_t * b1,
185 : vlib_buffer_t * b2, vlib_buffer_t * b3)
186 : {
187 : u32 tmp0, tmp1, tmp2, tmp3;
188 :
189 0 : tmp0 = tmp1 = tmp2 = tmp3 = BOND_INPUT_NEXT_DROP;
190 0 : vnet_feature_next (&tmp0, b0);
191 0 : vnet_feature_next (&tmp1, b1);
192 0 : vnet_feature_next (&tmp2, b2);
193 0 : vnet_feature_next (&tmp3, b3);
194 0 : }
195 :
196 2254 : VLIB_NODE_FN (bond_input_node) (vlib_main_t * vm,
197 : vlib_node_runtime_t * node,
198 : vlib_frame_t * frame)
199 : {
200 18 : u16 thread_index = vm->thread_index;
201 : u32 *from, n_left;
202 : vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
203 : u32 sw_if_indices[VLIB_FRAME_SIZE], *sw_if_index;
204 : u16 nexts[VLIB_FRAME_SIZE], *next;
205 18 : u32 last_member_sw_if_index = ~0;
206 18 : u32 bond_sw_if_index = 0;
207 18 : vlib_error_t error = 0;
208 18 : u32 next_index = 0;
209 18 : u32 n_rx_bytes = 0, n_rx_packets = 0;
210 :
211 : /* Vector of buffer / pkt indices we're supposed to process */
212 18 : from = vlib_frame_vector_args (frame);
213 :
214 : /* Number of buffers / pkts */
215 18 : n_left = frame->n_vectors;
216 :
217 18 : vlib_get_buffers (vm, from, bufs, n_left);
218 :
219 18 : b = bufs;
220 18 : next = nexts;
221 18 : sw_if_index = sw_if_indices;
222 :
223 18 : while (n_left >= 4)
224 : {
225 0 : u32 x = 0;
226 : /* Prefetch next iteration */
227 0 : if (PREDICT_TRUE (n_left >= 16))
228 : {
229 0 : vlib_prefetch_buffer_data (b[8], LOAD);
230 0 : vlib_prefetch_buffer_data (b[9], LOAD);
231 0 : vlib_prefetch_buffer_data (b[10], LOAD);
232 0 : vlib_prefetch_buffer_data (b[11], LOAD);
233 :
234 0 : vlib_prefetch_buffer_header (b[12], LOAD);
235 0 : vlib_prefetch_buffer_header (b[13], LOAD);
236 0 : vlib_prefetch_buffer_header (b[14], LOAD);
237 0 : vlib_prefetch_buffer_header (b[15], LOAD);
238 : }
239 :
240 0 : sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
241 0 : sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
242 0 : sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
243 0 : sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
244 :
245 0 : x |= sw_if_index[0] ^ last_member_sw_if_index;
246 0 : x |= sw_if_index[1] ^ last_member_sw_if_index;
247 0 : x |= sw_if_index[2] ^ last_member_sw_if_index;
248 0 : x |= sw_if_index[3] ^ last_member_sw_if_index;
249 :
250 0 : if (PREDICT_TRUE (x == 0))
251 : {
252 : /*
253 : * Optimize to call update_next only if there is a feature arc
254 : * after bond-input. Test feature count greater than 1 because
255 : * bond-input itself is a feature arc for this member interface.
256 : */
257 0 : ASSERT ((vnet_buffer (b[0])->feature_arc_index ==
258 : vnet_buffer (b[1])->feature_arc_index) &&
259 : (vnet_buffer (b[0])->feature_arc_index ==
260 : vnet_buffer (b[2])->feature_arc_index) &&
261 : (vnet_buffer (b[0])->feature_arc_index ==
262 : vnet_buffer (b[3])->feature_arc_index));
263 0 : if (PREDICT_FALSE (vnet_get_feature_count
264 : (vnet_buffer (b[0])->feature_arc_index,
265 : last_member_sw_if_index) > 1))
266 0 : bond_update_next_x4 (b[0], b[1], b[2], b[3]);
267 :
268 0 : next[0] = next[1] = next[2] = next[3] = next_index;
269 0 : if (next_index == BOND_INPUT_NEXT_DROP)
270 : {
271 0 : b[0]->error = error;
272 0 : b[1]->error = error;
273 0 : b[2]->error = error;
274 0 : b[3]->error = error;
275 : }
276 : else
277 : {
278 0 : bond_sw_if_idx_rewrite (vm, node, b[0], bond_sw_if_index,
279 : &n_rx_packets, &n_rx_bytes);
280 0 : bond_sw_if_idx_rewrite (vm, node, b[1], bond_sw_if_index,
281 : &n_rx_packets, &n_rx_bytes);
282 0 : bond_sw_if_idx_rewrite (vm, node, b[2], bond_sw_if_index,
283 : &n_rx_packets, &n_rx_bytes);
284 0 : bond_sw_if_idx_rewrite (vm, node, b[3], bond_sw_if_index,
285 : &n_rx_packets, &n_rx_bytes);
286 : }
287 : }
288 : else
289 : {
290 0 : bond_update_next (vm, node, &last_member_sw_if_index,
291 : sw_if_index[0], &bond_sw_if_index, b[0],
292 : &next_index, &error);
293 0 : next[0] = next_index;
294 0 : if (next_index == BOND_INPUT_NEXT_DROP)
295 0 : b[0]->error = error;
296 : else
297 0 : bond_sw_if_idx_rewrite (vm, node, b[0], bond_sw_if_index,
298 : &n_rx_packets, &n_rx_bytes);
299 :
300 0 : bond_update_next (vm, node, &last_member_sw_if_index,
301 0 : sw_if_index[1], &bond_sw_if_index, b[1],
302 : &next_index, &error);
303 0 : next[1] = next_index;
304 0 : if (next_index == BOND_INPUT_NEXT_DROP)
305 0 : b[1]->error = error;
306 : else
307 0 : bond_sw_if_idx_rewrite (vm, node, b[1], bond_sw_if_index,
308 : &n_rx_packets, &n_rx_bytes);
309 :
310 0 : bond_update_next (vm, node, &last_member_sw_if_index,
311 0 : sw_if_index[2], &bond_sw_if_index, b[2],
312 : &next_index, &error);
313 0 : next[2] = next_index;
314 0 : if (next_index == BOND_INPUT_NEXT_DROP)
315 0 : b[2]->error = error;
316 : else
317 0 : bond_sw_if_idx_rewrite (vm, node, b[2], bond_sw_if_index,
318 : &n_rx_packets, &n_rx_bytes);
319 :
320 0 : bond_update_next (vm, node, &last_member_sw_if_index,
321 0 : sw_if_index[3], &bond_sw_if_index, b[3],
322 : &next_index, &error);
323 0 : next[3] = next_index;
324 0 : if (next_index == BOND_INPUT_NEXT_DROP)
325 0 : b[3]->error = error;
326 : else
327 0 : bond_sw_if_idx_rewrite (vm, node, b[3], bond_sw_if_index,
328 : &n_rx_packets, &n_rx_bytes);
329 : }
330 :
331 : /* next */
332 0 : n_left -= 4;
333 0 : b += 4;
334 0 : sw_if_index += 4;
335 0 : next += 4;
336 : }
337 :
338 40 : while (n_left)
339 : {
340 22 : sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
341 22 : bond_update_next (vm, node, &last_member_sw_if_index, sw_if_index[0],
342 : &bond_sw_if_index, b[0], &next_index, &error);
343 22 : next[0] = next_index;
344 22 : if (next_index == BOND_INPUT_NEXT_DROP)
345 0 : b[0]->error = error;
346 : else
347 22 : bond_sw_if_idx_rewrite (vm, node, b[0], bond_sw_if_index,
348 : &n_rx_packets, &n_rx_bytes);
349 :
350 : /* next */
351 22 : n_left -= 1;
352 22 : b += 1;
353 22 : sw_if_index += 1;
354 22 : next += 1;
355 : }
356 :
357 18 : if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
358 : {
359 1 : n_left = frame->n_vectors; /* number of packets to process */
360 1 : b = bufs;
361 1 : sw_if_index = sw_if_indices;
362 : bond_packet_trace_t *t0;
363 :
364 2 : while (n_left)
365 : {
366 1 : if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
367 : {
368 1 : t0 = vlib_add_trace (vm, node, b[0], sizeof (*t0));
369 1 : t0->sw_if_index = sw_if_index[0];
370 1 : clib_memcpy_fast (&t0->ethernet, vlib_buffer_get_current (b[0]),
371 : sizeof (ethernet_header_t));
372 1 : t0->bond_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
373 : }
374 : /* next */
375 1 : n_left--;
376 1 : b++;
377 1 : sw_if_index++;
378 : }
379 : }
380 :
381 : /* increase rx counters */
382 18 : vlib_increment_combined_counter
383 : (vnet_main.interface_main.combined_sw_if_counters +
384 : VNET_INTERFACE_COUNTER_RX, thread_index, bond_sw_if_index, n_rx_packets,
385 : n_rx_bytes);
386 :
387 18 : vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
388 18 : vlib_node_increment_counter (vm, bond_input_node.index,
389 18 : BOND_INPUT_ERROR_NONE, frame->n_vectors);
390 :
391 18 : return frame->n_vectors;
392 : }
393 :
394 : static clib_error_t *
395 559 : bond_input_init (vlib_main_t * vm)
396 : {
397 559 : return 0;
398 : }
399 :
400 : /* *INDENT-OFF* */
401 178120 : VLIB_REGISTER_NODE (bond_input_node) = {
402 : .name = "bond-input",
403 : .vector_size = sizeof (u32),
404 : .format_buffer = format_ethernet_header_with_length,
405 : .format_trace = format_bond_input_trace,
406 : .type = VLIB_NODE_TYPE_INTERNAL,
407 : .n_errors = BOND_INPUT_N_ERROR,
408 : .error_strings = bond_input_error_strings,
409 : .n_next_nodes = BOND_INPUT_N_NEXT,
410 : .next_nodes =
411 : {
412 : [BOND_INPUT_NEXT_DROP] = "error-drop"
413 : }
414 : };
415 :
416 31359 : VLIB_INIT_FUNCTION (bond_input_init);
417 :
418 70583 : VNET_FEATURE_INIT (bond_input, static) =
419 : {
420 : .arc_name = "device-input",
421 : .node_name = "bond-input",
422 : .runs_before = VNET_FEATURES ("ethernet-input"),
423 : };
424 : /* *INDENT-ON* */
425 :
426 : static clib_error_t *
427 13268 : bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
428 : {
429 13268 : bond_main_t *bm = &bond_main;
430 : member_if_t *mif;
431 13268 : vlib_main_t *vm = bm->vlib_main;
432 :
433 13268 : mif = bond_get_member_by_sw_if_index (sw_if_index);
434 13268 : if (mif)
435 : {
436 0 : if (mif->lacp_enabled)
437 0 : return 0;
438 :
439 : /* port_enabled is both admin up and hw link up */
440 0 : mif->port_enabled = ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) &&
441 0 : vnet_sw_interface_is_link_up (vnm, sw_if_index));
442 0 : if (mif->port_enabled == 0)
443 0 : bond_disable_collecting_distributing (vm, mif);
444 : else
445 0 : bond_enable_collecting_distributing (vm, mif);
446 : }
447 :
448 13268 : return 0;
449 : }
450 :
451 2801 : VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bond_sw_interface_up_down);
452 :
453 : static clib_error_t *
454 13092 : bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
455 : {
456 13092 : bond_main_t *bm = &bond_main;
457 : member_if_t *mif;
458 : vnet_sw_interface_t *sw;
459 13092 : vlib_main_t *vm = bm->vlib_main;
460 :
461 13092 : sw = vnet_get_hw_sw_interface (vnm, hw_if_index);
462 13092 : mif = bond_get_member_by_sw_if_index (sw->sw_if_index);
463 13092 : if (mif)
464 : {
465 2 : if (mif->lacp_enabled)
466 2 : return 0;
467 :
468 : /* port_enabled is both admin up and hw link up */
469 0 : mif->port_enabled = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) &&
470 0 : vnet_sw_interface_is_admin_up (vnm,
471 : sw->sw_if_index));
472 0 : if (mif->port_enabled == 0)
473 0 : bond_disable_collecting_distributing (vm, mif);
474 : else
475 0 : bond_enable_collecting_distributing (vm, mif);
476 : }
477 :
478 13090 : return 0;
479 : }
480 :
481 2801 : VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bond_hw_interface_up_down);
482 :
483 : /*
484 : * fd.io coding-style-patch-verification: ON
485 : *
486 : * Local Variables:
487 : * eval: (c-set-style "gnu")
488 : * End:
489 : */
|