Line data Source code
1 : /*
2 : * l2/l2_arp_term.c: IP v4 ARP L2 BD termination
3 : *
4 : * Copyright (c) 2010 Cisco and/or its affiliates.
5 : * Licensed under the Apache License, Version 2.0 (the "License");
6 : * you may not use this file except in compliance with the License.
7 : * You may obtain a copy of the License at:
8 : *
9 : * http://www.apache.org/licenses/LICENSE-2.0
10 : *
11 : * Unless required by applicable law or agreed to in writing, software
12 : * distributed under the License is distributed on an "AS IS" BASIS,
13 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 : * See the License for the specific language governing permissions and
15 : * limitations under the License.
16 : */
17 :
18 : #include <vlibmemory/api.h>
19 :
20 : #include <vnet/l2/l2_arp_term.h>
21 : #include <vnet/l2/l2_input.h>
22 : #include <vnet/l2/feat_bitmap.h>
23 :
24 : #include <vnet/ip/ip4_packet.h>
25 : #include <vnet/ip/ip6_packet.h>
26 : #include <vnet/ip/icmp6.h>
27 : #include <vnet/ip/ip6.h>
28 : #include <vnet/ip/ip.api_enum.h>
29 : #include <vnet/ip/format.h>
30 : #include <vnet/ethernet/arp_packet.h>
31 :
32 : static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
33 :
34 : l2_arp_term_main_t l2_arp_term_main;
35 :
36 : /*
37 : * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
38 : * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
39 : */
40 : typedef enum
41 : {
42 : ARP_TERM_NEXT_L2_OUTPUT,
43 : ARP_TERM_NEXT_DROP,
44 : ARP_TERM_N_NEXT,
45 : } arp_term_next_t;
46 :
47 : u32 arp_term_next_node_index[32];
48 :
49 : typedef struct
50 : {
51 : u8 packet_data[64];
52 : } ethernet_arp_input_trace_t;
53 :
54 : #define foreach_ethernet_arp_error \
55 : _ (replies_sent, "ARP replies sent") \
56 : _ (l2_type_not_ethernet, "L2 type not ethernet") \
57 : _ (l3_type_not_ip4, "L3 type not IP4") \
58 : _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
59 : _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
60 : _ (l3_dst_address_unset, "IP4 destination address is unset") \
61 : _ (l3_src_address_is_local, "IP4 source address matches local interface") \
62 : _ (l3_src_address_learned, "ARP request IP4 source address learned") \
63 : _ (replies_received, "ARP replies received") \
64 : _ (opcode_not_request, "ARP opcode not request") \
65 : _ (proxy_arp_replies_sent, "Proxy ARP replies sent") \
66 : _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
67 : _ (gratuitous_arp, "ARP probe or announcement dropped") \
68 : _ (interface_no_table, "Interface is not mapped to an IP table") \
69 : _ (interface_not_ip_enabled, "Interface is not IP enabled") \
70 : _ (unnumbered_mismatch, "RX interface is unnumbered to different subnet") \
71 :
72 : typedef enum
73 : {
74 : #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
75 : foreach_ethernet_arp_error
76 : #undef _
77 : ETHERNET_ARP_N_ERROR,
78 : } ethernet_arp_reply_error_t;
79 :
80 : static char *ethernet_arp_error_strings[] = {
81 : #define _(sym,string) string,
82 : foreach_ethernet_arp_error
83 : #undef _
84 : };
85 :
86 : static u8 *
87 198 : format_arp_term_input_trace (u8 * s, va_list * va)
88 : {
89 198 : CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
90 198 : CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
91 198 : ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
92 :
93 : /* arp-term trace data saved is either arp or ip6/icmp6 packet:
94 : - for arp, the 1st 16-bit field is hw type of value of 0x0001.
95 : - for ip6, the first nibble has value of 6. */
96 198 : s = format (s, "%U", t->packet_data[0] == 0 ?
97 : format_ethernet_arp_header : format_ip6_header,
98 198 : t->packet_data, sizeof (t->packet_data));
99 :
100 198 : return s;
101 : }
102 :
103 : void
104 4 : l2_arp_term_set_publisher_node (bool on)
105 : {
106 4 : l2_arp_term_main_t *l2am = &l2_arp_term_main;
107 :
108 4 : l2am->publish = on;
109 4 : }
110 :
111 : static int
112 20 : l2_arp_term_publish (l2_arp_term_publish_event_t * ctx)
113 : {
114 20 : l2_arp_term_main_t *l2am = &l2_arp_term_main;
115 :
116 20 : vec_add1 (l2am->publish_events, *ctx);
117 :
118 20 : vlib_process_signal_event (vlib_get_main (),
119 20 : l2_arp_term_process_node.index,
120 : L2_ARP_TERM_EVENT_PUBLISH, 0);
121 :
122 20 : return 0;
123 : }
124 :
125 : static inline void
126 153 : l2_arp_term_publish_v4_dp (u32 sw_if_index,
127 : const ethernet_arp_ip4_over_ethernet_address_t * a)
128 : {
129 153 : l2_arp_term_main_t *l2am = &l2_arp_term_main;
130 :
131 153 : if (!l2am->publish)
132 143 : return;
133 :
134 10 : l2_arp_term_publish_event_t args = {
135 : .sw_if_index = sw_if_index,
136 : .type = IP46_TYPE_IP4,
137 : .ip.ip4 = a->ip4,
138 : .mac = a->mac,
139 : };
140 :
141 10 : vl_api_rpc_call_main_thread (l2_arp_term_publish, (u8 *) & args,
142 : sizeof (args));
143 : }
144 :
145 : static inline void
146 165 : l2_arp_term_publish_v6_dp (u32 sw_if_index,
147 : const ip6_address_t * addr,
148 : const mac_address_t * mac)
149 : {
150 165 : l2_arp_term_main_t *l2am = &l2_arp_term_main;
151 :
152 165 : if (!l2am->publish)
153 155 : return;
154 :
155 10 : l2_arp_term_publish_event_t args = {
156 : .sw_if_index = sw_if_index,
157 : .type = IP46_TYPE_IP6,
158 : .ip.ip6 = *addr,
159 : .mac = *mac,
160 : };
161 :
162 10 : vl_api_rpc_call_main_thread (l2_arp_term_publish, (u8 *) & args,
163 : sizeof (args));
164 : }
165 :
166 : static inline int
167 165 : vnet_ip6_nd_term (vlib_main_t * vm,
168 : vlib_node_runtime_t * node,
169 : vlib_buffer_t * p0,
170 : ethernet_header_t * eth,
171 : ip6_header_t * ip, u32 sw_if_index, u16 bd_index)
172 : {
173 : icmp6_neighbor_solicitation_or_advertisement_header_t *ndh;
174 : mac_address_t mac;
175 :
176 165 : mac_address_from_bytes (&mac, eth->src_address);
177 165 : ndh = ip6_next_header (ip);
178 165 : if (ndh->icmp.type != ICMP6_neighbor_solicitation &&
179 0 : ndh->icmp.type != ICMP6_neighbor_advertisement)
180 0 : return 0;
181 :
182 165 : if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
183 : (p0->flags & VLIB_BUFFER_IS_TRACED)))
184 : {
185 165 : u8 *t0 = vlib_add_trace (vm, node, p0,
186 : sizeof (icmp6_input_trace_t));
187 165 : clib_memcpy (t0, ip, sizeof (icmp6_input_trace_t));
188 : }
189 :
190 : /* Check if anyone want ND events for L2 BDs */
191 165 : if (PREDICT_FALSE (!ip6_address_is_link_local_unicast (&ip->src_address)))
192 : {
193 165 : l2_arp_term_publish_v6_dp (sw_if_index, &ip->src_address, &mac);
194 : }
195 :
196 : /* Check if MAC entry exsist for solicited target IP */
197 165 : if (ndh->icmp.type == ICMP6_neighbor_solicitation)
198 : {
199 : icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *opt;
200 : l2_bridge_domain_t *bd_config;
201 : u8 *macp;
202 :
203 165 : opt = (void *) (ndh + 1);
204 165 : if ((opt->header.type !=
205 165 : ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address) ||
206 165 : (opt->header.n_data_u64s != 1))
207 0 : return 0; /* source link layer address option not present */
208 :
209 165 : bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
210 : macp =
211 330 : (u8 *) hash_get_mem (bd_config->mac_by_ip6, &ndh->target_address);
212 165 : if (macp)
213 : { /* found ip-mac entry, generate eighbor advertisement response */
214 : int bogus_length;
215 : vlib_node_runtime_t *error_node =
216 135 : vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
217 135 : ip->dst_address = ip->src_address;
218 135 : ip->src_address = ndh->target_address;
219 135 : ip->hop_limit = 255;
220 135 : opt->header.type =
221 : ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
222 135 : clib_memcpy (opt->ethernet_address, macp, 6);
223 135 : ndh->icmp.type = ICMP6_neighbor_advertisement;
224 135 : ndh->advertisement_flags = clib_host_to_net_u32
225 : (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
226 : ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
227 135 : ndh->icmp.checksum = 0;
228 135 : ndh->icmp.checksum =
229 135 : ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip, &bogus_length);
230 135 : clib_memcpy (eth->dst_address, eth->src_address, 6);
231 135 : clib_memcpy (eth->src_address, macp, 6);
232 135 : vlib_error_count (vm, error_node->node_index,
233 : ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, 1);
234 135 : return 1;
235 : }
236 : }
237 :
238 30 : return 0;
239 :
240 : }
241 :
242 : static uword
243 18 : arp_term_l2bd (vlib_main_t * vm,
244 : vlib_node_runtime_t * node, vlib_frame_t * frame)
245 : {
246 18 : l2input_main_t *l2im = &l2input_main;
247 : u32 n_left_from, next_index, *from, *to_next;
248 18 : u32 n_replies_sent = 0;
249 18 : u16 last_bd_index = ~0;
250 18 : l2_bridge_domain_t *last_bd_config = 0;
251 : l2_input_config_t *cfg0;
252 :
253 18 : from = vlib_frame_vector_args (frame);
254 18 : n_left_from = frame->n_vectors;
255 18 : next_index = node->cached_next_index;
256 :
257 36 : while (n_left_from > 0)
258 : {
259 : u32 n_left_to_next;
260 :
261 18 : vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
262 :
263 336 : while (n_left_from > 0 && n_left_to_next > 0)
264 : {
265 : vlib_buffer_t *p0;
266 : ethernet_header_t *eth0;
267 : ethernet_arp_header_t *arp0;
268 : ip6_header_t *iph0;
269 : u8 *l3h0;
270 : u32 pi0, error0, next0, sw_if_index0;
271 : u16 ethertype0;
272 : u16 bd_index0;
273 : u32 ip0;
274 : u8 *macp0;
275 :
276 318 : pi0 = from[0];
277 318 : to_next[0] = pi0;
278 318 : from += 1;
279 318 : to_next += 1;
280 318 : n_left_from -= 1;
281 318 : n_left_to_next -= 1;
282 :
283 318 : p0 = vlib_get_buffer (vm, pi0);
284 : // Terminate only local (SHG == 0) ARP
285 318 : if (vnet_buffer (p0)->l2.shg != 0)
286 0 : goto next_l2_feature;
287 :
288 318 : eth0 = vlib_buffer_get_current (p0);
289 318 : l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
290 318 : ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
291 318 : arp0 = (ethernet_arp_header_t *) l3h0;
292 :
293 318 : if (p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
294 0 : goto next_l2_feature;
295 :
296 318 : if (ethertype0 != ETHERNET_TYPE_ARP)
297 165 : goto check_ip6_nd;
298 :
299 153 : if ((arp0->opcode !=
300 153 : clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request)) &&
301 0 : (arp0->opcode !=
302 0 : clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)))
303 0 : goto check_ip6_nd;
304 :
305 : /* Must be ARP request/reply packet here */
306 153 : if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
307 : (p0->flags & VLIB_BUFFER_IS_TRACED)))
308 : {
309 153 : u8 *t0 = vlib_add_trace (vm, node, p0,
310 : sizeof (ethernet_arp_input_trace_t));
311 153 : clib_memcpy_fast (t0, l3h0,
312 : sizeof (ethernet_arp_input_trace_t));
313 : }
314 :
315 153 : error0 = 0;
316 153 : error0 =
317 153 : (arp0->l2_type !=
318 153 : clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
319 153 : ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
320 153 : error0 =
321 153 : (arp0->l3_type !=
322 153 : clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
323 153 : ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
324 :
325 153 : sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
326 :
327 153 : if (error0)
328 0 : goto drop;
329 :
330 : /* Trash ARP packets whose ARP-level source addresses do not
331 : match, or if requester address is mcast */
332 153 : if (PREDICT_FALSE
333 : (!ethernet_mac_address_equal (eth0->src_address,
334 : arp0->ip4_over_ethernet[0].
335 : mac.bytes))
336 153 : || ethernet_address_cast (arp0->ip4_over_ethernet[0].mac.bytes))
337 : {
338 : /* VRRP virtual MAC may be different to SMAC in ARP reply */
339 0 : if (clib_memcmp (arp0->ip4_over_ethernet[0].mac.bytes,
340 : vrrp_prefix, sizeof (vrrp_prefix)) != 0)
341 : {
342 0 : error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
343 0 : goto drop;
344 : }
345 : }
346 153 : if (PREDICT_FALSE
347 : (ip4_address_is_multicast (&arp0->ip4_over_ethernet[0].ip4)))
348 : {
349 0 : error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
350 0 : goto drop;
351 : }
352 :
353 : /* Check if anyone want ARP request events for L2 BDs */
354 153 : l2_arp_term_publish_v4_dp (sw_if_index0,
355 153 : &arp0->ip4_over_ethernet[0]);
356 :
357 : /* lookup BD mac_by_ip4 hash table for MAC entry */
358 153 : ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
359 153 : bd_index0 = vnet_buffer (p0)->l2.bd_index;
360 153 : if (PREDICT_FALSE ((bd_index0 != last_bd_index)
361 : || (last_bd_index == (u16) ~ 0)))
362 : {
363 10 : last_bd_index = bd_index0;
364 10 : last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
365 : }
366 153 : macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
367 :
368 153 : if (PREDICT_FALSE (!macp0))
369 27 : goto next_l2_feature; /* MAC not found */
370 126 : if (PREDICT_FALSE (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
371 : arp0->ip4_over_ethernet[1].ip4.as_u32))
372 0 : goto next_l2_feature; /* GARP */
373 :
374 : /* MAC found, send ARP reply -
375 : Convert ARP request packet to ARP reply */
376 126 : arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
377 126 : arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
378 126 : arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
379 126 : mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac, macp0);
380 126 : clib_memcpy_fast (eth0->dst_address, eth0->src_address, 6);
381 126 : clib_memcpy_fast (eth0->src_address, macp0, 6);
382 126 : n_replies_sent += 1;
383 :
384 261 : output_response:
385 : /* For BVI, need to use l2-fwd node to send ARP reply as
386 : l2-output node cannot output packet to BVI properly */
387 261 : cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
388 261 : if (PREDICT_FALSE (l2_input_is_bvi (cfg0)))
389 : {
390 0 : vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
391 0 : vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
392 0 : goto next_l2_feature;
393 : }
394 :
395 : /* Send ARP/ND reply back out input interface through l2-output */
396 261 : vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
397 261 : next0 = ARP_TERM_NEXT_L2_OUTPUT;
398 261 : vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
399 : to_next, n_left_to_next, pi0,
400 : next0);
401 261 : continue;
402 :
403 165 : check_ip6_nd:
404 : /* IP6 ND event notification or solicitation handling to generate
405 : local response instead of flooding */
406 165 : iph0 = (ip6_header_t *) l3h0;
407 165 : if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
408 : iph0->protocol == IP_PROTOCOL_ICMP6 &&
409 : !ip6_address_is_unspecified
410 : (&iph0->src_address)))
411 : {
412 165 : sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
413 165 : if (vnet_ip6_nd_term
414 : (vm, node, p0, eth0, iph0, sw_if_index0,
415 165 : vnet_buffer (p0)->l2.bd_index))
416 135 : goto output_response;
417 : }
418 :
419 30 : next_l2_feature:
420 : {
421 57 : next0 = vnet_l2_feature_next (p0, arp_term_next_node_index,
422 : L2INPUT_FEAT_ARP_TERM);
423 57 : vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
424 : to_next, n_left_to_next,
425 : pi0, next0);
426 57 : continue;
427 : }
428 :
429 0 : drop:
430 0 : if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
431 0 : (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
432 0 : arp0->ip4_over_ethernet[1].ip4.as_u32))
433 : {
434 0 : error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
435 : }
436 0 : next0 = ARP_TERM_NEXT_DROP;
437 0 : p0->error = node->errors[error0];
438 :
439 0 : vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
440 : to_next, n_left_to_next, pi0,
441 : next0);
442 : }
443 :
444 18 : vlib_put_next_frame (vm, node, next_index, n_left_to_next);
445 : }
446 :
447 18 : vlib_error_count (vm, node->node_index,
448 : ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
449 18 : return frame->n_vectors;
450 : }
451 :
452 : /* *INDENT-OFF* */
453 178120 : VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
454 : .function = arp_term_l2bd,
455 : .name = "arp-term-l2bd",
456 : .vector_size = sizeof (u32),
457 : .n_errors = ETHERNET_ARP_N_ERROR,
458 : .error_strings = ethernet_arp_error_strings,
459 : .n_next_nodes = ARP_TERM_N_NEXT,
460 : .next_nodes = {
461 : [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
462 : [ARP_TERM_NEXT_DROP] = "error-drop",
463 : },
464 : .format_buffer = format_ethernet_arp_header,
465 : .format_trace = format_arp_term_input_trace,
466 : };
467 : /* *INDENT-ON* */
468 :
469 : clib_error_t *
470 559 : arp_term_init (vlib_main_t * vm)
471 : {
472 : // Initialize the feature next-node indexes
473 559 : feat_bitmap_init_next_nodes (vm,
474 : arp_term_l2bd_node.index,
475 : L2INPUT_N_FEAT,
476 : l2input_get_feat_names (),
477 : arp_term_next_node_index);
478 559 : return 0;
479 : }
480 :
481 15119 : VLIB_INIT_FUNCTION (arp_term_init);
482 :
483 : /*
484 : * fd.io coding-style-patch-verification: ON
485 : *
486 : * Local Variables:
487 : * eval: (c-set-style "gnu")
488 : * End:
489 : */
|