Line data Source code
1 : /*
2 : * Copyright (c) 2015 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 : #include <vxlan/vxlan.h>
16 : #include <vnet/ip/format.h>
17 : #include <vnet/fib/fib_entry.h>
18 : #include <vnet/fib/fib_table.h>
19 : #include <vnet/fib/fib_entry_track.h>
20 : #include <vnet/mfib/mfib_table.h>
21 : #include <vnet/adj/adj_mcast.h>
22 : #include <vnet/adj/rewrite.h>
23 : #include <vnet/dpo/drop_dpo.h>
24 : #include <vnet/interface.h>
25 : #include <vnet/flow/flow.h>
26 : #include <vnet/udp/udp_local.h>
27 : #include <vlib/vlib.h>
28 :
29 : /**
30 : * @file
31 : * @brief VXLAN.
32 : *
33 : * VXLAN provides the features needed to allow L2 bridge domains (BDs)
34 : * to span multiple servers. This is done by building an L2 overlay on
35 : * top of an L3 network underlay using VXLAN tunnels.
36 : *
37 : * This makes it possible for servers to be co-located in the same data
38 : * center or be separated geographically as long as they are reachable
39 : * through the underlay L3 network.
40 : *
41 : * You can refer to this kind of L2 overlay bridge domain as a VXLAN
42 : * (Virtual eXtensible VLAN) segment.
43 : */
44 :
45 :
46 : vxlan_main_t vxlan_main;
47 :
48 : static u32
49 980 : vxlan_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
50 : {
51 : /* nothing for now */
52 980 : return 0;
53 : }
54 :
55 : static clib_error_t *
56 0 : vxlan_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
57 : u32 frame_size)
58 : {
59 : /* nothing for now */
60 0 : return 0;
61 : }
62 :
63 : static u8 *
64 0 : format_decap_next (u8 * s, va_list * args)
65 : {
66 0 : u32 next_index = va_arg (*args, u32);
67 :
68 0 : if (next_index == VXLAN_INPUT_NEXT_DROP)
69 0 : return format (s, "drop");
70 : else
71 0 : return format (s, "index %d", next_index);
72 : return s;
73 : }
74 :
75 : u8 *
76 484 : format_vxlan_tunnel (u8 * s, va_list * args)
77 : {
78 484 : vxlan_tunnel_t *t = va_arg (*args, vxlan_tunnel_t *);
79 :
80 484 : s = format (s,
81 : "[%d] instance %d src %U dst %U src_port %d dst_port %d vni %d "
82 : "fib-idx %d sw-if-idx %d ",
83 : t->dev_instance, t->user_instance, format_ip46_address, &t->src,
84 : IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY,
85 484 : t->src_port, t->dst_port, t->vni, t->encap_fib_index,
86 : t->sw_if_index);
87 :
88 484 : s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
89 :
90 484 : if (PREDICT_FALSE (t->decap_next_index != VXLAN_INPUT_NEXT_L2_INPUT))
91 0 : s = format (s, "decap-next-%U ", format_decap_next, t->decap_next_index);
92 :
93 484 : if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst)))
94 22 : s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
95 :
96 484 : if (t->flow_index != ~0)
97 0 : s = format (s, "flow-index %d [%U]", t->flow_index,
98 : format_flow_enabled_hw, t->flow_index);
99 :
100 484 : return s;
101 : }
102 :
103 : static u8 *
104 3416 : format_vxlan_name (u8 * s, va_list * args)
105 : {
106 3416 : u32 dev_instance = va_arg (*args, u32);
107 3416 : vxlan_main_t *vxm = &vxlan_main;
108 : vxlan_tunnel_t *t;
109 :
110 3416 : if (dev_instance == ~0)
111 0 : return format (s, "<cached-unused>");
112 :
113 3416 : if (dev_instance >= vec_len (vxm->tunnels))
114 0 : return format (s, "<improperly-referenced>");
115 :
116 3416 : t = pool_elt_at_index (vxm->tunnels, dev_instance);
117 :
118 3416 : return format (s, "vxlan_tunnel%d", t->user_instance);
119 : }
120 :
121 : static clib_error_t *
122 5840 : vxlan_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
123 : {
124 5840 : u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
125 : VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
126 5840 : vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
127 :
128 5840 : return /* no error */ 0;
129 : }
130 :
131 : /* *INDENT-OFF* */
132 1119 : VNET_DEVICE_CLASS (vxlan_device_class, static) = {
133 : .name = "VXLAN",
134 : .format_device_name = format_vxlan_name,
135 : .format_tx_trace = format_vxlan_encap_trace,
136 : .admin_up_down_function = vxlan_interface_admin_up_down,
137 : };
138 : /* *INDENT-ON* */
139 :
140 : static u8 *
141 0 : format_vxlan_header_with_length (u8 * s, va_list * args)
142 : {
143 0 : u32 dev_instance = va_arg (*args, u32);
144 0 : s = format (s, "unimplemented dev %u", dev_instance);
145 0 : return s;
146 : }
147 :
148 : /* *INDENT-OFF* */
149 1119 : VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
150 : .name = "VXLAN",
151 : .format_header = format_vxlan_header_with_length,
152 : .build_rewrite = default_build_rewrite,
153 : };
154 : /* *INDENT-ON* */
155 :
156 : static void
157 719 : vxlan_tunnel_restack_dpo (vxlan_tunnel_t * t)
158 : {
159 719 : u8 is_ip4 = ip46_address_is_ip4 (&t->dst);
160 719 : dpo_id_t dpo = DPO_INVALID;
161 719 : fib_forward_chain_type_t forw_type = is_ip4 ?
162 719 : FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
163 :
164 719 : fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
165 :
166 : /* vxlan uses the payload hash as the udp source port
167 : * hence the packet's hash is unknown
168 : * skip single bucket load balance dpo's */
169 2118 : while (DPO_LOAD_BALANCE == dpo.dpoi_type)
170 : {
171 : const load_balance_t *lb;
172 : const dpo_id_t *choice;
173 :
174 1399 : lb = load_balance_get (dpo.dpoi_index);
175 1399 : if (lb->lb_n_buckets > 1)
176 0 : break;
177 :
178 1399 : choice = load_balance_get_bucket_i (lb, 0);
179 :
180 1399 : if (DPO_RECEIVE == choice->dpoi_type)
181 1 : dpo_copy (&dpo, drop_dpo_get (choice->dpoi_proto));
182 : else
183 1398 : dpo_copy (&dpo, choice);
184 : }
185 :
186 719 : u32 encap_index = is_ip4 ?
187 719 : vxlan4_encap_node.index : vxlan6_encap_node.index;
188 719 : dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
189 719 : dpo_reset (&dpo);
190 719 : }
191 :
192 : static vxlan_tunnel_t *
193 241 : vxlan_tunnel_from_fib_node (fib_node_t * node)
194 : {
195 241 : ASSERT (FIB_NODE_TYPE_VXLAN_TUNNEL == node->fn_type);
196 241 : return ((vxlan_tunnel_t *) (((char *) node) -
197 : STRUCT_OFFSET_OF (vxlan_tunnel_t, node)));
198 : }
199 :
200 : /**
201 : * Function definition to backwalk a FIB node -
202 : * Here we will restack the new dpo of VXLAN DIP to encap node.
203 : */
204 : static fib_node_back_walk_rc_t
205 241 : vxlan_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
206 : {
207 241 : vxlan_tunnel_restack_dpo (vxlan_tunnel_from_fib_node (node));
208 241 : return (FIB_NODE_BACK_WALK_CONTINUE);
209 : }
210 :
211 : /**
212 : * Function definition to get a FIB node from its index
213 : */
214 : static fib_node_t *
215 241 : vxlan_tunnel_fib_node_get (fib_node_index_t index)
216 : {
217 : vxlan_tunnel_t *t;
218 241 : vxlan_main_t *vxm = &vxlan_main;
219 :
220 241 : t = pool_elt_at_index (vxm->tunnels, index);
221 :
222 241 : return (&t->node);
223 : }
224 :
225 : /**
226 : * Function definition to inform the FIB node that its last lock has gone.
227 : */
228 : static void
229 0 : vxlan_tunnel_last_lock_gone (fib_node_t * node)
230 : {
231 : /*
232 : * The VXLAN tunnel is a root of the graph. As such
233 : * it never has children and thus is never locked.
234 : */
235 0 : ASSERT (0);
236 0 : }
237 :
238 : /*
239 : * Virtual function table registered by VXLAN tunnels
240 : * for participation in the FIB object graph.
241 : */
242 : const static fib_node_vft_t vxlan_vft = {
243 : .fnv_get = vxlan_tunnel_fib_node_get,
244 : .fnv_last_lock = vxlan_tunnel_last_lock_gone,
245 : .fnv_back_walk = vxlan_tunnel_back_walk,
246 : };
247 :
248 : #define foreach_copy_field \
249 : _ (vni) \
250 : _ (mcast_sw_if_index) \
251 : _ (encap_fib_index) \
252 : _ (decap_next_index) \
253 : _ (src) \
254 : _ (dst) \
255 : _ (src_port) \
256 : _ (dst_port)
257 :
258 : static void
259 2920 : vxlan_rewrite (vxlan_tunnel_t * t, bool is_ip6)
260 : {
261 : union
262 : {
263 : ip4_vxlan_header_t h4;
264 : ip6_vxlan_header_t h6;
265 : } h;
266 2920 : int len = is_ip6 ? sizeof h.h6 : sizeof h.h4;
267 :
268 : udp_header_t *udp;
269 : vxlan_header_t *vxlan;
270 : /* Fixed portion of the (outer) ip header */
271 :
272 2920 : clib_memset (&h, 0, sizeof (h));
273 2920 : if (!is_ip6)
274 : {
275 2675 : ip4_header_t *ip = &h.h4.ip4;
276 2675 : udp = &h.h4.udp, vxlan = &h.h4.vxlan;
277 2675 : ip->ip_version_and_header_length = 0x45;
278 2675 : ip->ttl = 254;
279 2675 : ip->protocol = IP_PROTOCOL_UDP;
280 :
281 2675 : ip->src_address = t->src.ip4;
282 2675 : ip->dst_address = t->dst.ip4;
283 :
284 : /* we fix up the ip4 header length and checksum after-the-fact */
285 2675 : ip->checksum = ip4_header_checksum (ip);
286 : }
287 : else
288 : {
289 245 : ip6_header_t *ip = &h.h6.ip6;
290 245 : udp = &h.h6.udp, vxlan = &h.h6.vxlan;
291 245 : ip->ip_version_traffic_class_and_flow_label =
292 245 : clib_host_to_net_u32 (6 << 28);
293 245 : ip->hop_limit = 255;
294 245 : ip->protocol = IP_PROTOCOL_UDP;
295 :
296 245 : ip->src_address = t->src.ip6;
297 245 : ip->dst_address = t->dst.ip6;
298 : }
299 :
300 : /* UDP header, randomize src port on something, maybe? */
301 2920 : udp->src_port = clib_host_to_net_u16 (t->src_port);
302 2920 : udp->dst_port = clib_host_to_net_u16 (t->dst_port);
303 :
304 : /* VXLAN header */
305 2920 : vnet_set_vni_and_flags (vxlan, t->vni);
306 2920 : vnet_rewrite_set_data (*t, &h, len);
307 2920 : }
308 :
309 : static bool
310 2920 : vxlan_decap_next_is_valid (vxlan_main_t * vxm, u32 is_ip6,
311 : u32 decap_next_index)
312 : {
313 2920 : vlib_main_t *vm = vxm->vlib_main;
314 2920 : u32 input_idx = (!is_ip6) ?
315 2920 : vxlan4_input_node.index : vxlan6_input_node.index;
316 2920 : vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx);
317 :
318 2920 : return decap_next_index < r->n_next_nodes;
319 : }
320 :
321 : /* *INDENT-OFF* */
322 : typedef CLIB_PACKED(union
323 : {
324 : struct
325 : {
326 : fib_node_index_t mfib_entry_index;
327 : adj_index_t mcast_adj_index;
328 : };
329 : u64 as_u64;
330 : }) mcast_shared_t;
331 : /* *INDENT-ON* */
332 :
333 : static inline mcast_shared_t
334 4664 : mcast_shared_get (ip46_address_t * ip)
335 : {
336 4664 : ASSERT (ip46_address_is_multicast (ip));
337 4664 : uword *p = hash_get_mem (vxlan_main.mcast_shared, ip);
338 4664 : ALWAYS_ASSERT (p);
339 4664 : mcast_shared_t ret = {.as_u64 = *p };
340 4664 : return ret;
341 : }
342 :
343 : static inline void
344 2222 : mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai)
345 : {
346 2222 : mcast_shared_t new_ep = {
347 : .mcast_adj_index = ai,
348 : .mfib_entry_index = mfei,
349 : };
350 :
351 2222 : hash_set_mem_alloc (&vxlan_main.mcast_shared, dst, new_ep.as_u64);
352 2222 : }
353 :
354 : static inline void
355 2222 : mcast_shared_remove (ip46_address_t * dst)
356 : {
357 2222 : mcast_shared_t ep = mcast_shared_get (dst);
358 :
359 2222 : adj_unlock (ep.mcast_adj_index);
360 2222 : mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN);
361 :
362 2222 : hash_unset_mem_free (&vxlan_main.mcast_shared, dst);
363 2222 : }
364 :
365 5840 : int vnet_vxlan_add_del_tunnel
366 : (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
367 : {
368 5840 : vxlan_main_t *vxm = &vxlan_main;
369 5840 : vnet_main_t *vnm = vxm->vnet_main;
370 : vxlan_decap_info_t *p;
371 5840 : u32 sw_if_index = ~0;
372 : vxlan4_tunnel_key_t key4;
373 : vxlan6_tunnel_key_t key6;
374 5840 : u32 is_ip6 = a->is_ip6;
375 5840 : vlib_main_t *vm = vlib_get_main ();
376 : u8 hw_addr[6];
377 :
378 : /* Set udp-ports */
379 5840 : if (a->src_port == 0)
380 32 : a->src_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
381 :
382 5840 : if (a->dst_port == 0)
383 32 : a->dst_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
384 :
385 : int not_found;
386 5840 : if (!is_ip6)
387 : {
388 : /* ip4 mcast is indexed by mcast addr only */
389 5350 : key4.key[0] = ip46_address_is_multicast (&a->dst) ?
390 5350 : a->dst.ip4.as_u32 :
391 488 : a->dst.ip4.as_u32 | (((u64) a->src.ip4.as_u32) << 32);
392 5350 : key4.key[1] = ((u64) clib_host_to_net_u16 (a->src_port) << 48) |
393 10700 : (((u64) a->encap_fib_index) << 32) |
394 5350 : clib_host_to_net_u32 (a->vni << 8);
395 : not_found =
396 5350 : clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
397 5350 : p = (void *) &key4.value;
398 : }
399 : else
400 : {
401 490 : key6.key[0] = a->dst.ip6.as_u64[0];
402 490 : key6.key[1] = a->dst.ip6.as_u64[1];
403 490 : key6.key[2] = (((u64) clib_host_to_net_u16 (a->src_port) << 48) |
404 980 : ((u64) a->encap_fib_index) << 32) |
405 490 : clib_host_to_net_u32 (a->vni << 8);
406 : not_found =
407 490 : clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
408 490 : p = (void *) &key6.value;
409 : }
410 :
411 5840 : if (not_found)
412 2920 : p = 0;
413 :
414 5840 : if (a->is_add)
415 : {
416 2920 : l2input_main_t *l2im = &l2input_main;
417 : u32 dev_instance; /* real dev instance tunnel index */
418 : u32 user_instance; /* request and actual instance number */
419 :
420 : /* adding a tunnel: tunnel must not already exist */
421 2920 : if (p)
422 0 : return VNET_API_ERROR_TUNNEL_EXIST;
423 :
424 : /*if not set explicitly, default to l2 */
425 2920 : if (a->decap_next_index == ~0)
426 2920 : a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
427 2920 : if (!vxlan_decap_next_is_valid (vxm, is_ip6, a->decap_next_index))
428 0 : return VNET_API_ERROR_INVALID_DECAP_NEXT;
429 :
430 : vxlan_tunnel_t *t;
431 2920 : pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
432 2920 : clib_memset (t, 0, sizeof (*t));
433 2920 : dev_instance = t - vxm->tunnels;
434 :
435 : /* copy from arg structure */
436 : #define _(x) t->x = a->x;
437 2920 : foreach_copy_field;
438 : #undef _
439 :
440 2920 : vxlan_rewrite (t, is_ip6);
441 : /*
442 : * Reconcile the real dev_instance and a possible requested instance.
443 : */
444 2920 : user_instance = a->instance;
445 2920 : if (user_instance == ~0)
446 2920 : user_instance = dev_instance;
447 2920 : if (hash_get (vxm->instance_used, user_instance))
448 : {
449 0 : pool_put (vxm->tunnels, t);
450 0 : return VNET_API_ERROR_INSTANCE_IN_USE;
451 : }
452 :
453 2920 : hash_set (vxm->instance_used, user_instance, 1);
454 :
455 2920 : t->dev_instance = dev_instance; /* actual */
456 2920 : t->user_instance = user_instance; /* name */
457 2920 : t->flow_index = ~0;
458 :
459 2920 : if (a->is_l3)
460 0 : t->hw_if_index =
461 0 : vnet_register_interface (vnm, vxlan_device_class.index, dev_instance,
462 : vxlan_hw_class.index, dev_instance);
463 : else
464 : {
465 2920 : vnet_eth_interface_registration_t eir = {};
466 2920 : f64 now = vlib_time_now (vm);
467 : u32 rnd;
468 2920 : rnd = (u32) (now * 1e6);
469 2920 : rnd = random_u32 (&rnd);
470 2920 : memcpy (hw_addr + 2, &rnd, sizeof (rnd));
471 2920 : hw_addr[0] = 2;
472 2920 : hw_addr[1] = 0xfe;
473 :
474 2920 : eir.dev_class_index = vxlan_device_class.index;
475 2920 : eir.dev_instance = dev_instance;
476 2920 : eir.address = hw_addr;
477 2920 : eir.cb.flag_change = vxlan_eth_flag_change;
478 2920 : eir.cb.set_max_frame_size = vxlan_eth_set_max_frame_size;
479 2920 : t->hw_if_index = vnet_eth_register_interface (vnm, &eir);
480 : }
481 :
482 2920 : vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
483 :
484 : /* Set vxlan tunnel output node */
485 2920 : u32 encap_index = !is_ip6 ?
486 2920 : vxlan4_encap_node.index : vxlan6_encap_node.index;
487 2920 : vnet_set_interface_output_node (vnm, t->hw_if_index, encap_index);
488 :
489 2920 : t->sw_if_index = sw_if_index = hi->sw_if_index;
490 :
491 : /* copy the key */
492 : int add_failed;
493 2920 : if (is_ip6)
494 : {
495 245 : key6.value = (u64) dev_instance;
496 245 : add_failed = clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key,
497 : &key6, 1 /*add */ );
498 : }
499 : else
500 : {
501 2675 : vxlan_decap_info_t di = {.sw_if_index = t->sw_if_index, };
502 2675 : if (ip46_address_is_multicast (&t->dst))
503 2431 : di.local_ip = t->src.ip4;
504 : else
505 244 : di.next_index = t->decap_next_index;
506 2675 : key4.value = di.as_u64;
507 2675 : add_failed = clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key,
508 : &key4, 1 /*add */ );
509 : }
510 :
511 2920 : if (add_failed)
512 : {
513 0 : if (a->is_l3)
514 0 : vnet_delete_hw_interface (vnm, t->hw_if_index);
515 : else
516 0 : ethernet_delete_interface (vnm, t->hw_if_index);
517 0 : hash_unset (vxm->instance_used, t->user_instance);
518 0 : pool_put (vxm->tunnels, t);
519 0 : return VNET_API_ERROR_INVALID_REGISTRATION;
520 : }
521 :
522 3219 : vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index,
523 : ~0);
524 2920 : vxm->tunnel_index_by_sw_if_index[sw_if_index] = dev_instance;
525 :
526 : /* setup l2 input config with l2 feature and bd 0 to drop packet */
527 2920 : vec_validate (l2im->configs, sw_if_index);
528 2920 : l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
529 2920 : l2im->configs[sw_if_index].bd_index = 0;
530 :
531 2920 : vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
532 2920 : si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
533 2920 : vnet_sw_interface_set_flags (vnm, sw_if_index,
534 : VNET_SW_INTERFACE_FLAG_ADMIN_UP);
535 :
536 2920 : fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_TUNNEL);
537 : fib_prefix_t tun_dst_pfx;
538 2920 : vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
539 :
540 2920 : fib_protocol_t fp = fib_ip_proto (is_ip6);
541 2920 : fib_prefix_from_ip46_addr (fp, &t->dst, &tun_dst_pfx);
542 2920 : if (!ip46_address_is_multicast (&t->dst))
543 : {
544 : /* Unicast tunnel -
545 : * source the FIB entry for the tunnel's destination
546 : * and become a child thereof. The tunnel will then get poked
547 : * when the forwarding for the entry updates, and the tunnel can
548 : * re-stack accordingly
549 : */
550 478 : vtep_addr_ref (&vxm->vtep_table, t->encap_fib_index, &t->src);
551 956 : t->fib_entry_index = fib_entry_track (t->encap_fib_index,
552 : &tun_dst_pfx,
553 : FIB_NODE_TYPE_VXLAN_TUNNEL,
554 : dev_instance,
555 478 : &t->sibling_index);
556 478 : vxlan_tunnel_restack_dpo (t);
557 : }
558 : else
559 : {
560 : /* Multicast tunnel -
561 : * as the same mcast group can be used for multiple mcast tunnels
562 : * with different VNIs, create the output fib adjacency only if
563 : * it does not already exist
564 : */
565 2442 : if (vtep_addr_ref (&vxm->vtep_table,
566 2442 : t->encap_fib_index, &t->dst) == 1)
567 : {
568 : fib_node_index_t mfei;
569 : adj_index_t ai;
570 4444 : fib_route_path_t path = {
571 2222 : .frp_proto = fib_proto_to_dpo (fp),
572 : .frp_addr = zero_addr,
573 : .frp_sw_if_index = 0xffffffff,
574 : .frp_fib_index = ~0,
575 : .frp_weight = 1,
576 : .frp_flags = FIB_ROUTE_PATH_LOCAL,
577 : .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
578 : };
579 2222 : const mfib_prefix_t mpfx = {
580 : .fp_proto = fp,
581 : .fp_len = (is_ip6 ? 128 : 32),
582 : .fp_grp_addr = tun_dst_pfx.fp_addr,
583 : };
584 :
585 : /*
586 : * Setup the (*,G) to receive traffic on the mcast group
587 : * - the forwarding interface is for-us
588 : * - the accepting interface is that from the API
589 : */
590 2222 : mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
591 : MFIB_SOURCE_VXLAN,
592 : MFIB_ENTRY_FLAG_NONE, &path);
593 :
594 2222 : path.frp_sw_if_index = a->mcast_sw_if_index;
595 2222 : path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
596 2222 : path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
597 2222 : mfei = mfib_table_entry_path_update (
598 2222 : t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN,
599 : MFIB_ENTRY_FLAG_NONE, &path);
600 :
601 : /*
602 : * Create the mcast adjacency to send traffic to the group
603 : */
604 2222 : ai = adj_mcast_add_or_lock (fp,
605 2222 : fib_proto_to_link (fp),
606 : a->mcast_sw_if_index);
607 :
608 : /*
609 : * create a new end-point
610 : */
611 2222 : mcast_shared_add (&t->dst, mfei, ai);
612 : }
613 :
614 2442 : dpo_id_t dpo = DPO_INVALID;
615 2442 : mcast_shared_t ep = mcast_shared_get (&t->dst);
616 :
617 : /* Stack shared mcast dst mac addr rewrite on encap */
618 2442 : dpo_set (&dpo, DPO_ADJACENCY_MCAST,
619 2442 : fib_proto_to_dpo (fp), ep.mcast_adj_index);
620 :
621 2442 : dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
622 2442 : dpo_reset (&dpo);
623 2442 : flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
624 : }
625 :
626 2920 : vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class =
627 : flood_class;
628 : }
629 : else
630 : {
631 : /* deleting a tunnel: tunnel must exist */
632 2920 : if (!p)
633 0 : return VNET_API_ERROR_NO_SUCH_ENTRY;
634 :
635 2920 : u32 instance = is_ip6 ? key6.value :
636 2675 : vxm->tunnel_index_by_sw_if_index[p->sw_if_index];
637 2920 : vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, instance);
638 :
639 2920 : sw_if_index = t->sw_if_index;
640 2920 : vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
641 :
642 2920 : vxm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
643 :
644 2920 : if (!is_ip6)
645 2675 : clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key, &key4,
646 : 0 /*del */ );
647 : else
648 245 : clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key, &key6,
649 : 0 /*del */ );
650 :
651 2920 : if (!ip46_address_is_multicast (&t->dst))
652 : {
653 478 : if (t->flow_index != ~0)
654 0 : vnet_flow_del (vnm, t->flow_index);
655 :
656 478 : vtep_addr_unref (&vxm->vtep_table, t->encap_fib_index, &t->src);
657 478 : fib_entry_untrack (t->fib_entry_index, t->sibling_index);
658 : }
659 2442 : else if (vtep_addr_unref (&vxm->vtep_table,
660 : t->encap_fib_index, &t->dst) == 0)
661 : {
662 2222 : mcast_shared_remove (&t->dst);
663 : }
664 :
665 2920 : vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, t->hw_if_index);
666 2920 : if (hw->dev_class_index == vxlan_device_class.index)
667 2920 : vnet_delete_hw_interface (vnm, t->hw_if_index);
668 : else
669 0 : ethernet_delete_interface (vnm, t->hw_if_index);
670 2920 : hash_unset (vxm->instance_used, t->user_instance);
671 :
672 2920 : fib_node_deinit (&t->node);
673 2920 : pool_put (vxm->tunnels, t);
674 : }
675 :
676 5840 : if (sw_if_indexp)
677 5840 : *sw_if_indexp = sw_if_index;
678 :
679 5840 : if (a->is_add)
680 : {
681 : /* register udp ports */
682 2920 : if (!is_ip6 && !udp_is_valid_dst_port (a->src_port, 1))
683 6 : udp_register_dst_port (vxm->vlib_main, a->src_port,
684 : vxlan4_input_node.index, 1);
685 2920 : if (is_ip6 && !udp_is_valid_dst_port (a->src_port, 0))
686 5 : udp_register_dst_port (vxm->vlib_main, a->src_port,
687 : vxlan6_input_node.index, 0);
688 : }
689 :
690 5840 : return 0;
691 : }
692 :
693 : static uword
694 0 : get_decap_next_for_node (u32 node_index, u32 ipv4_set)
695 : {
696 0 : vxlan_main_t *vxm = &vxlan_main;
697 0 : vlib_main_t *vm = vxm->vlib_main;
698 0 : uword input_node = (ipv4_set) ? vxlan4_input_node.index :
699 0 : vxlan6_input_node.index;
700 :
701 0 : return vlib_node_add_next (vm, input_node, node_index);
702 : }
703 :
704 : static uword
705 0 : unformat_decap_next (unformat_input_t * input, va_list * args)
706 : {
707 0 : u32 *result = va_arg (*args, u32 *);
708 0 : u32 ipv4_set = va_arg (*args, int);
709 0 : vxlan_main_t *vxm = &vxlan_main;
710 0 : vlib_main_t *vm = vxm->vlib_main;
711 : u32 node_index;
712 : u32 tmp;
713 :
714 0 : if (unformat (input, "l2"))
715 0 : *result = VXLAN_INPUT_NEXT_L2_INPUT;
716 0 : else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index))
717 0 : *result = get_decap_next_for_node (node_index, ipv4_set);
718 0 : else if (unformat (input, "%d", &tmp))
719 0 : *result = tmp;
720 : else
721 0 : return 0;
722 0 : return 1;
723 : }
724 :
725 : static clib_error_t *
726 0 : vxlan_add_del_tunnel_command_fn (vlib_main_t * vm,
727 : unformat_input_t * input,
728 : vlib_cli_command_t * cmd)
729 : {
730 0 : unformat_input_t _line_input, *line_input = &_line_input;
731 0 : ip46_address_t src = ip46_address_initializer, dst =
732 : ip46_address_initializer;
733 0 : u8 is_add = 1;
734 0 : u8 src_set = 0;
735 0 : u8 dst_set = 0;
736 0 : u8 grp_set = 0;
737 0 : u8 ipv4_set = 0;
738 0 : u8 ipv6_set = 0;
739 0 : u8 is_l3 = 0;
740 0 : u32 instance = ~0;
741 0 : u32 encap_fib_index = 0;
742 0 : u32 mcast_sw_if_index = ~0;
743 0 : u32 decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
744 0 : u32 vni = 0;
745 0 : u32 src_port = 0;
746 0 : u32 dst_port = 0;
747 : u32 table_id;
748 0 : clib_error_t *parse_error = NULL;
749 :
750 : /* Get a line of input. */
751 0 : if (!unformat_user (input, unformat_line_input, line_input))
752 0 : return 0;
753 :
754 0 : while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
755 : {
756 0 : if (unformat (line_input, "del"))
757 : {
758 0 : is_add = 0;
759 : }
760 0 : else if (unformat (line_input, "instance %d", &instance))
761 : ;
762 0 : else if (unformat (line_input, "src %U",
763 : unformat_ip46_address, &src, IP46_TYPE_ANY))
764 : {
765 0 : src_set = 1;
766 0 : ip46_address_is_ip4 (&src) ? (ipv4_set = 1) : (ipv6_set = 1);
767 : }
768 0 : else if (unformat (line_input, "dst %U",
769 : unformat_ip46_address, &dst, IP46_TYPE_ANY))
770 : {
771 0 : dst_set = 1;
772 0 : ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
773 : }
774 0 : else if (unformat (line_input, "group %U %U",
775 : unformat_ip46_address, &dst, IP46_TYPE_ANY,
776 : unformat_vnet_sw_interface,
777 : vnet_get_main (), &mcast_sw_if_index))
778 : {
779 0 : grp_set = dst_set = 1;
780 0 : ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
781 : }
782 0 : else if (unformat (line_input, "encap-vrf-id %d", &table_id))
783 : {
784 : encap_fib_index =
785 0 : fib_table_find (fib_ip_proto (ipv6_set), table_id);
786 : }
787 0 : else if (unformat (line_input, "l3"))
788 0 : is_l3 = 1;
789 0 : else if (unformat (line_input, "decap-next %U", unformat_decap_next,
790 : &decap_next_index, ipv4_set))
791 : ;
792 0 : else if (unformat (line_input, "vni %d", &vni))
793 : ;
794 0 : else if (unformat (line_input, "src_port %d", &src_port))
795 : ;
796 0 : else if (unformat (line_input, "dst_port %d", &dst_port))
797 : ;
798 : else
799 : {
800 0 : parse_error = clib_error_return (0, "parse error: '%U'",
801 : format_unformat_error, line_input);
802 0 : break;
803 : }
804 : }
805 :
806 0 : unformat_free (line_input);
807 :
808 0 : if (parse_error)
809 0 : return parse_error;
810 :
811 0 : if (is_l3 && decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT)
812 : {
813 0 : vlib_node_t *node = vlib_get_node_by_name (
814 : vm, (u8 *) (ipv4_set ? "ip4-input" : "ip6-input"));
815 0 : decap_next_index = get_decap_next_for_node (node->index, ipv4_set);
816 : }
817 :
818 0 : if (encap_fib_index == ~0)
819 0 : return clib_error_return (0, "nonexistent encap-vrf-id %d", table_id);
820 :
821 0 : if (src_set == 0)
822 0 : return clib_error_return (0, "tunnel src address not specified");
823 :
824 0 : if (dst_set == 0)
825 0 : return clib_error_return (0, "tunnel dst address not specified");
826 :
827 0 : if (grp_set && !ip46_address_is_multicast (&dst))
828 0 : return clib_error_return (0, "tunnel group address not multicast");
829 :
830 0 : if (grp_set == 0 && ip46_address_is_multicast (&dst))
831 0 : return clib_error_return (0, "dst address must be unicast");
832 :
833 0 : if (grp_set && mcast_sw_if_index == ~0)
834 0 : return clib_error_return (0, "tunnel nonexistent multicast device");
835 :
836 0 : if (ipv4_set && ipv6_set)
837 0 : return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
838 :
839 0 : if (ip46_address_cmp (&src, &dst) == 0)
840 0 : return clib_error_return (0, "src and dst addresses are identical");
841 :
842 0 : if (decap_next_index == ~0)
843 0 : return clib_error_return (0, "next node not found");
844 :
845 0 : if (vni == 0)
846 0 : return clib_error_return (0, "vni not specified");
847 :
848 0 : if (vni >> 24)
849 0 : return clib_error_return (0, "vni %d out of range", vni);
850 :
851 0 : vnet_vxlan_add_del_tunnel_args_t a = { .is_add = is_add,
852 : .is_ip6 = ipv6_set,
853 : .is_l3 = is_l3,
854 : .instance = instance,
855 : #define _(x) .x = x,
856 : foreach_copy_field
857 : #undef _
858 : };
859 :
860 : u32 tunnel_sw_if_index;
861 0 : int rv = vnet_vxlan_add_del_tunnel (&a, &tunnel_sw_if_index);
862 :
863 0 : switch (rv)
864 : {
865 0 : case 0:
866 0 : if (is_add)
867 0 : vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
868 : vnet_get_main (), tunnel_sw_if_index);
869 0 : break;
870 :
871 0 : case VNET_API_ERROR_TUNNEL_EXIST:
872 0 : return clib_error_return (0, "tunnel already exists...");
873 :
874 0 : case VNET_API_ERROR_NO_SUCH_ENTRY:
875 0 : return clib_error_return (0, "tunnel does not exist...");
876 :
877 0 : case VNET_API_ERROR_INSTANCE_IN_USE:
878 0 : return clib_error_return (0, "Instance is in use");
879 :
880 0 : default:
881 0 : return clib_error_return
882 : (0, "vnet_vxlan_add_del_tunnel returned %d", rv);
883 : }
884 :
885 0 : return 0;
886 : }
887 :
888 : /*?
889 : * Add or delete a VXLAN Tunnel.
890 : *
891 : * VXLAN provides the features needed to allow L2 bridge domains (BDs)
892 : * to span multiple servers. This is done by building an L2 overlay on
893 : * top of an L3 network underlay using VXLAN tunnels.
894 : *
895 : * This makes it possible for servers to be co-located in the same data
896 : * center or be separated geographically as long as they are reachable
897 : * through the underlay L3 network.
898 : *
899 : * You can refer to this kind of L2 overlay bridge domain as a VXLAN
900 : * (Virtual eXtensible VLAN) segment.
901 : *
902 : * @cliexpar
903 : * Example of how to create a VXLAN Tunnel:
904 : * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id
905 : 7}
906 : * Example of how to create a VXLAN Tunnel with a known name, vxlan_tunnel42:
907 : * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 instance 42}
908 : * Example of how to create a multicast VXLAN Tunnel with a known name,
909 : vxlan_tunnel23:
910 : * @cliexcmd{create vxlan tunnel src 10.0.3.1 group 239.1.1.1
911 : GigabitEthernet0/8/0 instance 23}
912 : * Example of how to create a VXLAN Tunnel with custom udp-ports:
913 : * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 src_port
914 : 59000 dst_port 59001}
915 : * Example of how to delete a VXLAN Tunnel:
916 : * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
917 : ?*/
918 : /* *INDENT-OFF* */
919 5039 : VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = {
920 : .path = "create vxlan tunnel",
921 : .short_help =
922 : "create vxlan tunnel src <local-vtep-addr>"
923 : " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>"
924 : " [instance <id>]"
925 : " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del] [l3]"
926 : " [src_port <local-vtep-udp-port>] [dst_port <remote-vtep-udp-port>]",
927 : .function = vxlan_add_del_tunnel_command_fn,
928 : };
929 : /* *INDENT-ON* */
930 :
931 : static clib_error_t *
932 22 : show_vxlan_tunnel_command_fn (vlib_main_t * vm,
933 : unformat_input_t * input,
934 : vlib_cli_command_t * cmd)
935 : {
936 22 : vxlan_main_t *vxm = &vxlan_main;
937 : vxlan_tunnel_t *t;
938 22 : int raw = 0;
939 :
940 22 : while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
941 : {
942 0 : if (unformat (input, "raw"))
943 0 : raw = 1;
944 : else
945 0 : return clib_error_return (0, "parse error: '%U'",
946 : format_unformat_error, input);
947 : }
948 :
949 22 : if (pool_elts (vxm->tunnels) == 0)
950 0 : vlib_cli_output (vm, "No vxlan tunnels configured...");
951 :
952 : /* *INDENT-OFF* */
953 506 : pool_foreach (t, vxm->tunnels)
954 : {
955 484 : vlib_cli_output (vm, "%U", format_vxlan_tunnel, t);
956 : }
957 : /* *INDENT-ON* */
958 :
959 22 : if (raw)
960 : {
961 0 : vlib_cli_output (vm, "Raw IPv4 Hash Table:\n%U\n",
962 : format_bihash_16_8, &vxm->vxlan4_tunnel_by_key,
963 : 1 /* verbose */ );
964 0 : vlib_cli_output (vm, "Raw IPv6 Hash Table:\n%U\n",
965 : format_bihash_24_8, &vxm->vxlan6_tunnel_by_key,
966 : 1 /* verbose */ );
967 : }
968 :
969 22 : return 0;
970 : }
971 :
972 : /*?
973 : * Display all the VXLAN Tunnel entries.
974 : *
975 : * @cliexpar
976 : * Example of how to display the VXLAN Tunnel entries:
977 : * @cliexstart{show vxlan tunnel}
978 : * [0] src 10.0.3.1 dst 10.0.3.3 src_port 4789 dst_port 4789 vni 13
979 : encap_fib_index 0 sw_if_index 5 decap_next l2
980 : * @cliexend
981 : ?*/
982 : /* *INDENT-OFF* */
983 5039 : VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = {
984 : .path = "show vxlan tunnel",
985 : .short_help = "show vxlan tunnel [raw]",
986 : .function = show_vxlan_tunnel_command_fn,
987 : };
988 : /* *INDENT-ON* */
989 :
990 :
991 : void
992 0 : vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
993 : {
994 0 : vxlan_main_t *vxm = &vxlan_main;
995 :
996 0 : if (pool_is_free_index (vxm->vnet_main->interface_main.sw_interfaces,
997 : sw_if_index))
998 0 : return;
999 :
1000 0 : is_enable = ! !is_enable;
1001 :
1002 0 : if (is_ip6)
1003 : {
1004 0 : if (clib_bitmap_get (vxm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index)
1005 0 : != is_enable)
1006 : {
1007 0 : vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-bypass",
1008 : sw_if_index, is_enable, 0, 0);
1009 0 : vxm->bm_ip6_bypass_enabled_by_sw_if =
1010 0 : clib_bitmap_set (vxm->bm_ip6_bypass_enabled_by_sw_if,
1011 : sw_if_index, is_enable);
1012 : }
1013 : }
1014 : else
1015 : {
1016 0 : if (clib_bitmap_get (vxm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index)
1017 0 : != is_enable)
1018 : {
1019 0 : vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-bypass",
1020 : sw_if_index, is_enable, 0, 0);
1021 0 : vxm->bm_ip4_bypass_enabled_by_sw_if =
1022 0 : clib_bitmap_set (vxm->bm_ip4_bypass_enabled_by_sw_if,
1023 : sw_if_index, is_enable);
1024 : }
1025 : }
1026 : }
1027 :
1028 :
1029 : static clib_error_t *
1030 0 : set_ip_vxlan_bypass (u32 is_ip6,
1031 : unformat_input_t * input, vlib_cli_command_t * cmd)
1032 : {
1033 0 : unformat_input_t _line_input, *line_input = &_line_input;
1034 0 : vnet_main_t *vnm = vnet_get_main ();
1035 0 : clib_error_t *error = 0;
1036 : u32 sw_if_index, is_enable;
1037 :
1038 0 : sw_if_index = ~0;
1039 0 : is_enable = 1;
1040 :
1041 0 : if (!unformat_user (input, unformat_line_input, line_input))
1042 0 : return 0;
1043 :
1044 0 : while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1045 : {
1046 0 : if (unformat_user
1047 : (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
1048 : ;
1049 0 : else if (unformat (line_input, "del"))
1050 0 : is_enable = 0;
1051 : else
1052 : {
1053 0 : error = unformat_parse_error (line_input);
1054 0 : goto done;
1055 : }
1056 : }
1057 :
1058 0 : if (~0 == sw_if_index)
1059 : {
1060 0 : error = clib_error_return (0, "unknown interface `%U'",
1061 : format_unformat_error, line_input);
1062 0 : goto done;
1063 : }
1064 :
1065 0 : vnet_int_vxlan_bypass_mode (sw_if_index, is_ip6, is_enable);
1066 :
1067 0 : done:
1068 0 : unformat_free (line_input);
1069 :
1070 0 : return error;
1071 : }
1072 :
1073 : static clib_error_t *
1074 0 : set_ip4_vxlan_bypass (vlib_main_t * vm,
1075 : unformat_input_t * input, vlib_cli_command_t * cmd)
1076 : {
1077 0 : return set_ip_vxlan_bypass (0, input, cmd);
1078 : }
1079 :
1080 : /*?
1081 : * This command adds the 'ip4-vxlan-bypass' graph node for a given interface.
1082 : * By adding the IPv4 vxlan-bypass graph node to an interface, the node checks
1083 : * for and validate input vxlan packet and bypass ip4-lookup, ip4-local,
1084 : * ip4-udp-lookup nodes to speedup vxlan packet forwarding. This node will
1085 : * cause extra overhead to for non-vxlan packets which is kept at a minimum.
1086 : *
1087 : * @cliexpar
1088 : * @parblock
1089 : * Example of graph node before ip4-vxlan-bypass is enabled:
1090 : * @cliexstart{show vlib graph ip4-vxlan-bypass}
1091 : * Name Next Previous
1092 : * ip4-vxlan-bypass error-drop [0]
1093 : * vxlan4-input [1]
1094 : * ip4-lookup [2]
1095 : * @cliexend
1096 : *
1097 : * Example of how to enable ip4-vxlan-bypass on an interface:
1098 : * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0}
1099 : *
1100 : * Example of graph node after ip4-vxlan-bypass is enabled:
1101 : * @cliexstart{show vlib graph ip4-vxlan-bypass}
1102 : * Name Next Previous
1103 : * ip4-vxlan-bypass error-drop [0] ip4-input
1104 : * vxlan4-input [1] ip4-input-no-checksum
1105 : * ip4-lookup [2]
1106 : * @cliexend
1107 : *
1108 : * Example of how to display the feature enabled on an interface:
1109 : * @cliexstart{show ip interface features GigabitEthernet2/0/0}
1110 : * IP feature paths configured on GigabitEthernet2/0/0...
1111 : * ...
1112 : * ipv4 unicast:
1113 : * ip4-vxlan-bypass
1114 : * ip4-lookup
1115 : * ...
1116 : * @cliexend
1117 : *
1118 : * Example of how to disable ip4-vxlan-bypass on an interface:
1119 : * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0 del}
1120 : * @endparblock
1121 : ?*/
1122 : /* *INDENT-OFF* */
1123 5039 : VLIB_CLI_COMMAND (set_interface_ip_vxlan_bypass_command, static) = {
1124 : .path = "set interface ip vxlan-bypass",
1125 : .function = set_ip4_vxlan_bypass,
1126 : .short_help = "set interface ip vxlan-bypass <interface> [del]",
1127 : };
1128 : /* *INDENT-ON* */
1129 :
1130 : static clib_error_t *
1131 0 : set_ip6_vxlan_bypass (vlib_main_t * vm,
1132 : unformat_input_t * input, vlib_cli_command_t * cmd)
1133 : {
1134 0 : return set_ip_vxlan_bypass (1, input, cmd);
1135 : }
1136 :
1137 : /*?
1138 : * This command adds the 'ip6-vxlan-bypass' graph node for a given interface.
1139 : * By adding the IPv6 vxlan-bypass graph node to an interface, the node checks
1140 : * for and validate input vxlan packet and bypass ip6-lookup, ip6-local,
1141 : * ip6-udp-lookup nodes to speedup vxlan packet forwarding. This node will
1142 : * cause extra overhead to for non-vxlan packets which is kept at a minimum.
1143 : *
1144 : * @cliexpar
1145 : * @parblock
1146 : * Example of graph node before ip6-vxlan-bypass is enabled:
1147 : * @cliexstart{show vlib graph ip6-vxlan-bypass}
1148 : * Name Next Previous
1149 : * ip6-vxlan-bypass error-drop [0]
1150 : * vxlan6-input [1]
1151 : * ip6-lookup [2]
1152 : * @cliexend
1153 : *
1154 : * Example of how to enable ip6-vxlan-bypass on an interface:
1155 : * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0}
1156 : *
1157 : * Example of graph node after ip6-vxlan-bypass is enabled:
1158 : * @cliexstart{show vlib graph ip6-vxlan-bypass}
1159 : * Name Next Previous
1160 : * ip6-vxlan-bypass error-drop [0] ip6-input
1161 : * vxlan6-input [1] ip4-input-no-checksum
1162 : * ip6-lookup [2]
1163 : * @cliexend
1164 : *
1165 : * Example of how to display the feature enabled on an interface:
1166 : * @cliexstart{show ip interface features GigabitEthernet2/0/0}
1167 : * IP feature paths configured on GigabitEthernet2/0/0...
1168 : * ...
1169 : * ipv6 unicast:
1170 : * ip6-vxlan-bypass
1171 : * ip6-lookup
1172 : * ...
1173 : * @cliexend
1174 : *
1175 : * Example of how to disable ip6-vxlan-bypass on an interface:
1176 : * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0 del}
1177 : * @endparblock
1178 : ?*/
1179 : /* *INDENT-OFF* */
1180 5039 : VLIB_CLI_COMMAND (set_interface_ip6_vxlan_bypass_command, static) = {
1181 : .path = "set interface ip6 vxlan-bypass",
1182 : .function = set_ip6_vxlan_bypass,
1183 : .short_help = "set interface ip6 vxlan-bypass <interface> [del]",
1184 : };
1185 : /* *INDENT-ON* */
1186 :
1187 : int
1188 0 : vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_index, int is_add)
1189 : {
1190 0 : vxlan_main_t *vxm = &vxlan_main;
1191 0 : vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
1192 0 : vnet_main_t *vnm = vnet_get_main ();
1193 0 : if (is_add)
1194 : {
1195 0 : if (t->flow_index == ~0)
1196 : {
1197 0 : vxlan_main_t *vxm = &vxlan_main;
1198 0 : vnet_flow_t flow = {
1199 : .actions =
1200 : VNET_FLOW_ACTION_REDIRECT_TO_NODE | VNET_FLOW_ACTION_MARK |
1201 : VNET_FLOW_ACTION_BUFFER_ADVANCE,
1202 0 : .mark_flow_id = t->dev_instance + vxm->flow_id_start,
1203 0 : .redirect_node_index = vxlan4_flow_input_node.index,
1204 : .buffer_advance = sizeof (ethernet_header_t),
1205 : .type = VNET_FLOW_TYPE_IP4_VXLAN,
1206 : .ip4_vxlan = {
1207 : .protocol.prot = IP_PROTOCOL_UDP,
1208 : .src_addr.addr = t->dst.ip4,
1209 : .dst_addr.addr = t->src.ip4,
1210 : .src_addr.mask.as_u32 = ~0,
1211 : .dst_addr.mask.as_u32 = ~0,
1212 0 : .dst_port.port = t->src_port,
1213 : .dst_port.mask = 0xFF,
1214 0 : .vni = t->vni,
1215 : }
1216 : ,
1217 : };
1218 0 : vnet_flow_add (vnm, &flow, &t->flow_index);
1219 : }
1220 0 : return vnet_flow_enable (vnm, t->flow_index, hw_if_index);
1221 : }
1222 : /* flow index is removed when the tunnel is deleted */
1223 0 : return vnet_flow_disable (vnm, t->flow_index, hw_if_index);
1224 : }
1225 :
1226 : u32
1227 0 : vnet_vxlan_get_tunnel_index (u32 sw_if_index)
1228 : {
1229 0 : vxlan_main_t *vxm = &vxlan_main;
1230 :
1231 0 : if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index))
1232 0 : return ~0;
1233 0 : return vxm->tunnel_index_by_sw_if_index[sw_if_index];
1234 : }
1235 :
1236 : static clib_error_t *
1237 0 : vxlan_offload_command_fn (vlib_main_t * vm,
1238 : unformat_input_t * input, vlib_cli_command_t * cmd)
1239 : {
1240 0 : unformat_input_t _line_input, *line_input = &_line_input;
1241 :
1242 : /* Get a line of input. */
1243 0 : if (!unformat_user (input, unformat_line_input, line_input))
1244 0 : return 0;
1245 :
1246 0 : vnet_main_t *vnm = vnet_get_main ();
1247 0 : u32 rx_sw_if_index = ~0;
1248 0 : u32 hw_if_index = ~0;
1249 0 : int is_add = 1;
1250 :
1251 0 : while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1252 : {
1253 0 : if (unformat (line_input, "hw %U", unformat_vnet_hw_interface, vnm,
1254 : &hw_if_index))
1255 0 : continue;
1256 0 : if (unformat (line_input, "rx %U", unformat_vnet_sw_interface, vnm,
1257 : &rx_sw_if_index))
1258 0 : continue;
1259 0 : if (unformat (line_input, "del"))
1260 : {
1261 0 : is_add = 0;
1262 0 : continue;
1263 : }
1264 0 : return clib_error_return (0, "unknown input `%U'",
1265 : format_unformat_error, line_input);
1266 : }
1267 :
1268 0 : if (rx_sw_if_index == ~0)
1269 0 : return clib_error_return (0, "missing rx interface");
1270 0 : if (hw_if_index == ~0)
1271 0 : return clib_error_return (0, "missing hw interface");
1272 :
1273 0 : u32 t_index = vnet_vxlan_get_tunnel_index (rx_sw_if_index);;
1274 0 : if (t_index == ~0)
1275 0 : return clib_error_return (0, "%U is not a vxlan tunnel",
1276 : format_vnet_sw_if_index_name, vnm,
1277 : rx_sw_if_index);
1278 :
1279 0 : vxlan_main_t *vxm = &vxlan_main;
1280 0 : vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
1281 :
1282 0 : if (!ip46_address_is_ip4 (&t->dst))
1283 0 : return clib_error_return (0, "currently only IPV4 tunnels are supported");
1284 :
1285 0 : vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
1286 0 : ip4_main_t *im = &ip4_main;
1287 0 : u32 rx_fib_index =
1288 0 : vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index);
1289 :
1290 0 : if (t->encap_fib_index != rx_fib_index)
1291 0 : return clib_error_return (0, "interface/tunnel fib mismatch");
1292 :
1293 0 : if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, is_add))
1294 0 : return clib_error_return (0, "error %s flow",
1295 : is_add ? "enabling" : "disabling");
1296 :
1297 0 : return 0;
1298 : }
1299 :
1300 : /* *INDENT-OFF* */
1301 5039 : VLIB_CLI_COMMAND (vxlan_offload_command, static) = {
1302 : .path = "set flow-offload vxlan",
1303 : .short_help =
1304 : "set flow-offload vxlan hw <interface-name> rx <tunnel-name> [del]",
1305 : .function = vxlan_offload_command_fn,
1306 : };
1307 : /* *INDENT-ON* */
1308 :
1309 : #define VXLAN_HASH_NUM_BUCKETS (2 * 1024)
1310 : #define VXLAN_HASH_MEMORY_SIZE (1 << 20)
1311 :
1312 : clib_error_t *
1313 559 : vxlan_init (vlib_main_t * vm)
1314 : {
1315 559 : vxlan_main_t *vxm = &vxlan_main;
1316 :
1317 559 : vxm->vnet_main = vnet_get_main ();
1318 559 : vxm->vlib_main = vm;
1319 :
1320 559 : vnet_flow_get_range (vxm->vnet_main, "vxlan", 1024 * 1024,
1321 : &vxm->flow_id_start);
1322 :
1323 559 : vxm->bm_ip4_bypass_enabled_by_sw_if = 0;
1324 559 : vxm->bm_ip6_bypass_enabled_by_sw_if = 0;
1325 :
1326 : /* initialize the ip6 hash */
1327 559 : clib_bihash_init_16_8 (&vxm->vxlan4_tunnel_by_key, "vxlan4",
1328 : VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
1329 559 : clib_bihash_init_24_8 (&vxm->vxlan6_tunnel_by_key, "vxlan6",
1330 : VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
1331 559 : vxm->vtep_table = vtep_table_create ();
1332 559 : vxm->mcast_shared = hash_create_mem (0,
1333 : sizeof (ip46_address_t),
1334 : sizeof (mcast_shared_t));
1335 :
1336 559 : fib_node_register_type (FIB_NODE_TYPE_VXLAN_TUNNEL, &vxlan_vft);
1337 :
1338 559 : return 0;
1339 : }
1340 :
1341 1119 : VLIB_INIT_FUNCTION (vxlan_init);
1342 :
1343 : /*
1344 : * fd.io coding-style-patch-verification: ON
1345 : *
1346 : * Local Variables:
1347 : * eval: (c-set-style "gnu")
1348 : * End:
1349 : */
|