Line data Source code
1 : /*
2 : * l2_input.c : layer 2 input packet processing
3 : *
4 : * Copyright (c) 2013 Cisco and/or its affiliates.
5 : * Licensed under the Apache License, Version 2.0 (the "License");
6 : * you may not use this file except in compliance with the License.
7 : * You may obtain a copy of the License at:
8 : *
9 : * http://www.apache.org/licenses/LICENSE-2.0
10 : *
11 : * Unless required by applicable law or agreed to in writing, software
12 : * distributed under the License is distributed on an "AS IS" BASIS,
13 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 : * See the License for the specific language governing permissions and
15 : * limitations under the License.
16 : */
17 :
18 : #include <vlib/vlib.h>
19 : #include <vnet/vnet.h>
20 : #include <vnet/pg/pg.h>
21 : #include <vnet/ethernet/ethernet.h>
22 : #include <vnet/ethernet/packet.h>
23 : #include <vnet/ip/ip4.h>
24 : #include <vnet/ip/ip6.h>
25 : #include <vnet/fib/fib_node.h>
26 : #include <vnet/ethernet/arp_packet.h>
27 : #include <vlib/cli.h>
28 : #include <vnet/l2/l2_input.h>
29 : #include <vnet/l2/l2_output.h>
30 : #include <vnet/l2/feat_bitmap.h>
31 : #include <vnet/l2/l2_bvi.h>
32 : #include <vnet/l2/l2_fib.h>
33 : #include <vnet/l2/l2_bd.h>
34 :
35 : #include <vppinfra/error.h>
36 : #include <vppinfra/hash.h>
37 : #include <vppinfra/cache.h>
38 :
39 : /**
40 : * @file
41 : * @brief Interface Input Mode (Layer 2 Cross-Connect or Bridge / Layer 3).
42 : *
43 : * This file contains the CLI Commands that modify the input mode of an
44 : * interface. For interfaces in a Layer 2 cross-connect, all packets
45 : * received on one interface will be transmitted to the other. For
46 : * interfaces in a bridge-domain, packets will be forwarded to other
47 : * interfaces in the same bridge-domain based on destination mac address.
48 : * For interfaces in Layer 3 mode, the packets will be routed.
49 : */
50 :
51 : typedef struct
52 : {
53 : /* per-pkt trace data */
54 : u8 dst_and_src[12];
55 : u32 sw_if_index;
56 : u32 feat_mask;
57 : } l2input_trace_t;
58 :
59 : /* packet trace format function */
60 : static u8 *
61 11141 : format_l2input_trace (u8 * s, va_list * args)
62 : {
63 11141 : CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64 11141 : CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65 11141 : l2input_trace_t *t = va_arg (*args, l2input_trace_t *);
66 :
67 11141 : s = format (s, "l2-input: sw_if_index %d dst %U src %U [%U]",
68 : t->sw_if_index,
69 11141 : format_ethernet_address, t->dst_and_src,
70 11141 : format_ethernet_address, t->dst_and_src + 6,
71 : format_l2_input_feature_bitmap, t->feat_mask, 0);
72 11141 : return s;
73 : }
74 :
75 : extern l2input_main_t l2input_main;
76 :
77 : #ifndef CLIB_MARCH_VARIANT
78 : l2input_main_t l2input_main;
79 : #endif /* CLIB_MARCH_VARIANT */
80 :
81 : #define foreach_l2input_error \
82 : _(L2INPUT, "L2 input packets") \
83 : _(DROP, "L2 input drops")
84 :
85 : typedef enum
86 : {
87 : #define _(sym,str) L2INPUT_ERROR_##sym,
88 : foreach_l2input_error
89 : #undef _
90 : L2INPUT_N_ERROR,
91 : } l2input_error_t;
92 :
93 : static char *l2input_error_strings[] = {
94 : #define _(sym,string) string,
95 : foreach_l2input_error
96 : #undef _
97 : };
98 :
99 : typedef enum
100 : { /* */
101 : L2INPUT_NEXT_LEARN,
102 : L2INPUT_NEXT_FWD,
103 : L2INPUT_NEXT_DROP,
104 : L2INPUT_N_NEXT,
105 : } l2input_next_t;
106 :
107 : static_always_inline void
108 56217500 : classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u16 * next0)
109 : {
110 : /*
111 : * Load L2 input feature struct
112 : * Load bridge domain struct
113 : * Parse ethernet header to determine unicast/mcast/broadcast
114 : * take L2 input stat
115 : * classify packet as IP/UDP/TCP, control, other
116 : * mask feature bitmap
117 : * go to first node in bitmap
118 : * Later: optimize VTM
119 : *
120 : * For L2XC,
121 : * set tx sw-if-handle
122 : */
123 :
124 56217500 : u32 feat_mask = ~0;
125 56217500 : u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
126 56217500 : ethernet_header_t *h0 = vlib_buffer_get_current (b0);
127 :
128 : /* Get config for the input interface */
129 56217500 : l2_input_config_t *config = vec_elt_at_index (msm->configs, sw_if_index0);
130 :
131 : /* Save split horizon group */
132 56217500 : vnet_buffer (b0)->l2.shg = config->shg;
133 :
134 : /* determine layer2 kind for stat and mask */
135 56217500 : if (PREDICT_FALSE (ethernet_address_cast (h0->dst_address)))
136 : {
137 4825 : u8 *l3h0 = (u8 *) h0 + vnet_buffer (b0)->l2.l2_len;
138 :
139 : #define get_u16(addr) ( *((u16 *)(addr)) )
140 4825 : u16 ethertype = clib_net_to_host_u16 (get_u16 (l3h0 - 2));
141 4825 : u8 protocol = ((ip6_header_t *) l3h0)->protocol;
142 :
143 : /* Disable bridge forwarding (flooding will execute instead if not xconnect) */
144 4825 : feat_mask &=
145 : ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD | L2INPUT_FEAT_UU_FWD);
146 :
147 4825 : if (ethertype != ETHERNET_TYPE_ARP)
148 4550 : feat_mask &= ~(L2INPUT_FEAT_ARP_UFWD);
149 :
150 : /* Disable ARP-term for non-ARP and non-ICMP6 packet */
151 4825 : if (ethertype != ETHERNET_TYPE_ARP &&
152 2250 : (ethertype != ETHERNET_TYPE_IP6 || protocol != IP_PROTOCOL_ICMP6))
153 3344 : feat_mask &= ~(L2INPUT_FEAT_ARP_TERM);
154 : /*
155 : * For packet from BVI - set SHG of ARP request or ICMPv6 neighbor
156 : * solicitation packet from BVI to 0 so it can also flood to VXLAN
157 : * tunnels or other ports with the same SHG as that of the BVI.
158 : */
159 1481 : else if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] ==
160 : L2INPUT_BVI))
161 : {
162 0 : if (ethertype == ETHERNET_TYPE_ARP)
163 : {
164 0 : ethernet_arp_header_t *arp0 = (ethernet_arp_header_t *) l3h0;
165 0 : if (arp0->opcode ==
166 0 : clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request))
167 0 : vnet_buffer (b0)->l2.shg = 0;
168 : }
169 : else /* must be ICMPv6 */
170 : {
171 0 : ip6_header_t *iph0 = (ip6_header_t *) l3h0;
172 : icmp6_neighbor_solicitation_or_advertisement_header_t *ndh0;
173 0 : ndh0 = ip6_next_header (iph0);
174 0 : if (ndh0->icmp.type == ICMP6_neighbor_solicitation)
175 0 : vnet_buffer (b0)->l2.shg = 0;
176 : }
177 : }
178 : }
179 : else
180 : {
181 : /*
182 : * For packet from BVI - set SHG of unicast packet from BVI to 0 so it
183 : * is not dropped on output to VXLAN tunnels or other ports with the
184 : * same SHG as that of the BVI.
185 : */
186 56212600 : if (PREDICT_FALSE (vnet_buffer (b0)->sw_if_index[VLIB_TX] ==
187 : L2INPUT_BVI))
188 527 : vnet_buffer (b0)->l2.shg = 0;
189 : }
190 :
191 :
192 56217500 : if (l2_input_is_bridge (config))
193 : {
194 : /* Do bridge-domain processing */
195 : /* save BD ID for next feature graph nodes */
196 56211500 : vnet_buffer (b0)->l2.bd_index = config->bd_index;
197 :
198 : /* Save bridge domain and interface seq_num */
199 112423000 : vnet_buffer (b0)->l2.l2fib_sn = l2_fib_mk_seq_num
200 56211500 : (config->bd_seq_num, config->seq_num);
201 56211500 : vnet_buffer (b0)->l2.bd_age = config->bd_mac_age;
202 :
203 : /*
204 : * Process bridge domain feature enables.
205 : * To perform learning/flooding/forwarding, the corresponding bit
206 : * must be enabled in both the input interface config and in the
207 : * bridge domain config. In the bd_bitmap, bits for features other
208 : * than learning/flooding/forwarding should always be set.
209 : */
210 56211500 : feat_mask = feat_mask & config->bd_feature_bitmap;
211 : }
212 5905 : else if (l2_input_is_xconnect (config))
213 : {
214 : /* Set the output interface */
215 5905 : vnet_buffer (b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index;
216 : }
217 : else
218 0 : feat_mask = L2INPUT_FEAT_DROP;
219 :
220 : /* mask out features from bitmap using packet type and bd config */
221 56217500 : u32 feature_bitmap = config->feature_bitmap & feat_mask;
222 :
223 : /* save for next feature graph nodes */
224 56217500 : vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap;
225 :
226 : /* Determine the next node */
227 56217500 : *next0 = feat_bitmap_get_next_node_index (msm->feat_next_node_index,
228 : feature_bitmap);
229 56217500 : }
230 :
231 : static_always_inline uword
232 1071920 : l2input_node_inline (vlib_main_t * vm,
233 : vlib_node_runtime_t * node, vlib_frame_t * frame,
234 : int do_trace)
235 : {
236 : u32 n_left, *from;
237 1071920 : l2input_main_t *msm = &l2input_main;
238 1071920 : vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
239 1071920 : u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
240 :
241 1071920 : from = vlib_frame_vector_args (frame);
242 1071920 : n_left = frame->n_vectors; /* number of packets to process */
243 :
244 1071920 : vlib_get_buffers (vm, from, bufs, n_left);
245 :
246 2143840 : while (n_left > 0)
247 : {
248 13859000 : while (n_left >= 8)
249 : {
250 : u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
251 :
252 : /* Prefetch next iteration. */
253 : {
254 : /* Prefetch the buffer header for the N+2 loop iteration */
255 12787100 : clib_prefetch_store (b[4]);
256 12787100 : clib_prefetch_store (b[5]);
257 12787100 : clib_prefetch_store (b[6]);
258 12787100 : clib_prefetch_store (b[7]);
259 :
260 12787100 : clib_prefetch_store (b[4]->data);
261 12787100 : clib_prefetch_store (b[5]->data);
262 12787100 : clib_prefetch_store (b[6]->data);
263 12787100 : clib_prefetch_store (b[7]->data);
264 : }
265 :
266 12787100 : classify_and_dispatch (msm, b[0], &next[0]);
267 12787100 : classify_and_dispatch (msm, b[1], &next[1]);
268 12787100 : classify_and_dispatch (msm, b[2], &next[2]);
269 12787100 : classify_and_dispatch (msm, b[3], &next[3]);
270 :
271 12787100 : if (do_trace)
272 : {
273 : /* RX interface handles */
274 3188 : sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
275 3188 : sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
276 3188 : sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
277 3188 : sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
278 :
279 3188 : if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
280 : {
281 3188 : ethernet_header_t *h0 = vlib_buffer_get_current (b[0]);
282 : l2input_trace_t *t =
283 3188 : vlib_add_trace (vm, node, b[0], sizeof (*t));
284 3188 : t->sw_if_index = sw_if_index0;
285 3188 : t->feat_mask = vnet_buffer (b[0])->l2.feature_bitmap;
286 3188 : clib_memcpy_fast (t->dst_and_src, h0->dst_address,
287 : sizeof (h0->dst_address) +
288 : sizeof (h0->src_address));
289 : }
290 3188 : if (b[1]->flags & VLIB_BUFFER_IS_TRACED)
291 : {
292 3188 : ethernet_header_t *h1 = vlib_buffer_get_current (b[1]);
293 : l2input_trace_t *t =
294 3188 : vlib_add_trace (vm, node, b[1], sizeof (*t));
295 3188 : t->sw_if_index = sw_if_index1;
296 3188 : t->feat_mask = vnet_buffer (b[1])->l2.feature_bitmap;
297 3188 : clib_memcpy_fast (t->dst_and_src, h1->dst_address,
298 : sizeof (h1->dst_address) +
299 : sizeof (h1->src_address));
300 : }
301 3188 : if (b[2]->flags & VLIB_BUFFER_IS_TRACED)
302 : {
303 3188 : ethernet_header_t *h2 = vlib_buffer_get_current (b[2]);
304 : l2input_trace_t *t =
305 3188 : vlib_add_trace (vm, node, b[2], sizeof (*t));
306 3188 : t->sw_if_index = sw_if_index2;
307 3188 : t->feat_mask = vnet_buffer (b[2])->l2.feature_bitmap;
308 3188 : clib_memcpy_fast (t->dst_and_src, h2->dst_address,
309 : sizeof (h2->dst_address) +
310 : sizeof (h2->src_address));
311 : }
312 3188 : if (b[3]->flags & VLIB_BUFFER_IS_TRACED)
313 : {
314 3188 : ethernet_header_t *h3 = vlib_buffer_get_current (b[3]);
315 : l2input_trace_t *t =
316 3188 : vlib_add_trace (vm, node, b[3], sizeof (*t));
317 3188 : t->sw_if_index = sw_if_index3;
318 3188 : t->feat_mask = vnet_buffer (b[3])->l2.feature_bitmap;
319 3188 : clib_memcpy_fast (t->dst_and_src, h3->dst_address,
320 : sizeof (h3->dst_address) +
321 : sizeof (h3->src_address));
322 : }
323 : }
324 :
325 12787100 : b += 4;
326 12787100 : n_left -= 4;
327 12787100 : next += 4;
328 : }
329 :
330 6141160 : while (n_left > 0)
331 : {
332 5069240 : classify_and_dispatch (msm, b[0], &next[0]);
333 :
334 5069240 : if (do_trace && PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
335 : {
336 1563 : ethernet_header_t *h0 = vlib_buffer_get_current (b[0]);
337 : l2input_trace_t *t =
338 1563 : vlib_add_trace (vm, node, b[0], sizeof (*t));
339 1563 : t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
340 1563 : t->feat_mask = vnet_buffer (b[0])->l2.feature_bitmap;
341 1563 : clib_memcpy_fast (t->dst_and_src, h0->dst_address,
342 : sizeof (h0->dst_address) +
343 : sizeof (h0->src_address));
344 : }
345 :
346 5069240 : b += 1;
347 5069240 : next += 1;
348 5069240 : n_left -= 1;
349 : }
350 : }
351 :
352 1071920 : vlib_node_increment_counter (vm, l2input_node.index,
353 1071920 : L2INPUT_ERROR_L2INPUT, frame->n_vectors);
354 :
355 1071920 : vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
356 :
357 1071920 : return frame->n_vectors;
358 : }
359 :
360 1074220 : VLIB_NODE_FN (l2input_node) (vlib_main_t * vm,
361 : vlib_node_runtime_t * node, vlib_frame_t * frame)
362 : {
363 1071920 : if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
364 445 : return l2input_node_inline (vm, node, frame, 1 /* do_trace */ );
365 1071480 : return l2input_node_inline (vm, node, frame, 0 /* do_trace */ );
366 : }
367 :
368 : /* *INDENT-OFF* */
369 183788 : VLIB_REGISTER_NODE (l2input_node) = {
370 : .name = "l2-input",
371 : .vector_size = sizeof (u32),
372 : .format_trace = format_l2input_trace,
373 : .format_buffer = format_ethernet_header_with_length,
374 : .type = VLIB_NODE_TYPE_INTERNAL,
375 :
376 : .n_errors = ARRAY_LEN(l2input_error_strings),
377 : .error_strings = l2input_error_strings,
378 :
379 : .n_next_nodes = L2INPUT_N_NEXT,
380 :
381 : /* edit / add dispositions here */
382 : .next_nodes = {
383 : [L2INPUT_NEXT_LEARN] = "l2-learn",
384 : [L2INPUT_NEXT_FWD] = "l2-fwd",
385 : [L2INPUT_NEXT_DROP] = "error-drop",
386 : },
387 : };
388 : /* *INDENT-ON* */
389 :
390 : /*
391 : * fd.io coding-style-patch-verification: ON
392 : *
393 : * Local Variables:
394 : * eval: (c-set-style "gnu")
395 : * End:
396 : */
|