Line data Source code
1 : /*
2 : * Copyright (c) 2015 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 : #ifndef included_map_h
16 : #define included_map_h
17 :
18 : #include <stdbool.h>
19 : #include <vppinfra/error.h>
20 : #include <vnet/vnet.h>
21 : #include <vnet/ip/ip.h>
22 : #include <vlib/vlib.h>
23 : #include <vnet/fib/fib_types.h>
24 : #include <vnet/fib/ip4_fib.h>
25 : #include <vnet/adj/adj.h>
26 : #include <vnet/dpo/load_balance.h>
27 : #include "lpm.h"
28 : #include <vppinfra/lock.h>
29 : #include <map/map.api_enum.h>
30 :
31 : #define MAP_SKIP_IP6_LOOKUP 1
32 :
33 : #define MAP_ERR_GOOD 0
34 : #define MAP_ERR_BAD_POOL_SIZE -1
35 : #define MAP_ERR_BAD_HT_RATIO -2
36 : #define MAP_ERR_BAD_LIFETIME -3
37 : #define MAP_ERR_BAD_BUFFERS -4
38 : #define MAP_ERR_BAD_BUFFERS_TOO_LARGE -5
39 : #define MAP_ERR_UNSUPPORTED -6
40 :
41 : int map_create_domain (ip4_address_t * ip4_prefix, u8 ip4_prefix_len,
42 : ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
43 : ip6_address_t * ip6_src, u8 ip6_src_len,
44 : u8 ea_bits_len, u8 psid_offset, u8 psid_length,
45 : u32 * map_domain_index, u16 mtu, u8 flags, u8 * tag);
46 : int map_delete_domain (u32 map_domain_index);
47 : int map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep,
48 : bool is_add);
49 : int map_if_enable_disable (bool is_enable, u32 sw_if_index,
50 : bool is_translation);
51 : u8 *format_map_trace (u8 * s, va_list * args);
52 :
53 : int map_param_set_fragmentation (bool inner, bool ignore_df);
54 : int map_param_set_icmp (ip4_address_t * ip4_err_relay_src);
55 : int map_param_set_icmp6 (u8 enable_unreachable);
56 : void map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, bool is_del);
57 : int map_param_set_security_check (bool enable, bool fragments);
58 : int map_param_set_traffic_class (bool copy, u8 tc);
59 : int map_param_set_tcp (u16 tcp_mss);
60 :
61 :
62 : typedef enum
63 : {
64 : MAP_DOMAIN_PREFIX = 1 << 0,
65 : MAP_DOMAIN_TRANSLATION = 1 << 1, // The domain uses MAP-T
66 : MAP_DOMAIN_RFC6052 = 1 << 2,
67 : } __attribute__ ((__packed__)) map_domain_flags_e;
68 :
69 : //#define IP6_MAP_T_OVERRIDE_TOS 0
70 :
71 : /*
72 : * This structure _MUST_ be no larger than a single cache line (64 bytes).
73 : * If more space is needed make a union of ip6_prefix and *rules, as
74 : * those are mutually exclusive.
75 : */
76 : typedef struct
77 : {
78 : /* Required for pool_get_aligned */
79 : CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
80 : ip6_address_t ip6_src;
81 : ip6_address_t ip6_prefix;
82 : ip6_address_t *rules;
83 : u32 suffix_mask;
84 : ip4_address_t ip4_prefix;
85 : u16 psid_mask;
86 : u16 mtu;
87 : map_domain_flags_e flags;
88 : u8 ip6_prefix_len;
89 : u8 ip6_src_len;
90 : u8 ea_bits_len;
91 : u8 psid_offset;
92 : u8 psid_length;
93 :
94 : /* helpers */
95 : u8 psid_shift;
96 : u8 suffix_shift;
97 : u8 ea_shift;
98 :
99 : /* not used by forwarding */
100 : u8 ip4_prefix_len;
101 : } map_domain_t;
102 :
103 : STATIC_ASSERT ((sizeof (map_domain_t) <= CLIB_CACHE_LINE_BYTES),
104 : "MAP domain fits in one cacheline");
105 :
106 : /*
107 : * Extra data about a domain that doesn't need to be time/space critical.
108 : * This structure is in a vector parallel to the main map_domain_t,
109 : * and indexed by the same map-domain-index values.
110 : */
111 : typedef struct
112 : {
113 : u8 *tag; /* Probably a user-assigned domain name. */
114 : } map_domain_extra_t;
115 :
116 : #define MAP_REASS_INDEX_NONE ((u16)0xffff)
117 :
118 : /*
119 : * MAP domain counters
120 : */
121 : typedef enum
122 : {
123 : /* Simple counters */
124 : MAP_DOMAIN_IPV4_FRAGMENT = 0,
125 : /* Combined counters */
126 : MAP_DOMAIN_COUNTER_RX = 0,
127 : MAP_DOMAIN_COUNTER_TX,
128 : MAP_N_DOMAIN_COUNTER
129 : } map_domain_counter_t;
130 :
131 : #ifdef MAP_SKIP_IP6_LOOKUP
132 : /**
133 : * A pre-resolved next-hop
134 : */
135 : typedef struct map_main_pre_resolved_t_
136 : {
137 : /**
138 : * Linkage into the FIB graph
139 : */
140 : fib_node_t node;
141 :
142 : /**
143 : * The FIB entry index of the next-hop
144 : */
145 : fib_node_index_t fei;
146 :
147 : /**
148 : * This object sibling index on the FIB entry's child dependency list
149 : */
150 : u32 sibling;
151 :
152 : /**
153 : * The Load-balance object index to use to forward
154 : */
155 : dpo_id_t dpo;
156 : } map_main_pre_resolved_t;
157 :
158 : /**
159 : * Pre-resolved next hops for v4 and v6. Why these are global and not
160 : * per-domain is beyond me.
161 : */
162 : extern map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX];
163 : #endif
164 :
165 : typedef struct
166 : {
167 : /* pool of MAP domains */
168 : map_domain_t *domains;
169 : map_domain_extra_t *domain_extras;
170 :
171 : /* MAP Domain packet/byte counters indexed by map domain index */
172 : vlib_simple_counter_main_t *simple_domain_counters;
173 : vlib_combined_counter_main_t *domain_counters;
174 : volatile u32 *counter_lock;
175 :
176 : /* API message id base */
177 : u16 msg_id_base;
178 :
179 : /* Traffic class: zero, copy (~0) or fixed value */
180 : u8 tc;
181 : bool tc_copy;
182 :
183 : bool sec_check; /* Inbound security check */
184 : bool sec_check_frag; /* Inbound security check for (subsequent) fragments */
185 : bool icmp6_enabled; /* Send destination unreachable for security check failure */
186 :
187 : u16 tcp_mss; /* TCP MSS clamp value */
188 :
189 : /* ICMPv6 -> ICMPv4 relay parameters */
190 : ip4_address_t icmp4_src_address;
191 : vlib_simple_counter_main_t icmp_relayed;
192 :
193 : /* convenience */
194 : vlib_main_t *vlib_main;
195 : vnet_main_t *vnet_main;
196 :
197 : bool frag_inner; /* Inner or outer fragmentation */
198 : bool frag_ignore_df; /* Fragment (outer) packet even if DF is set */
199 :
200 : /* Graph node state */
201 : uword *bm_trans_enabled_by_sw_if;
202 : uword *bm_encap_enabled_by_sw_if;
203 :
204 : /* Lookup tables */
205 : lpm_t *ip4_prefix_tbl;
206 : lpm_t *ip6_prefix_tbl;
207 : lpm_t *ip6_src_prefix_tbl;
208 :
209 : uword ip4_sv_reass_custom_next_index;
210 : } map_main_t;
211 :
212 : typedef vl_counter_map_enum_t map_error_t;
213 : u64 map_error_counter_get (u32 node_index, map_error_t map_error);
214 :
215 : typedef struct
216 : {
217 : u32 map_domain_index;
218 : u16 port;
219 : } map_trace_t;
220 :
221 : always_inline void
222 60 : map_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
223 : vlib_buffer_t * b, u32 map_domain_index, u16 port)
224 : {
225 60 : map_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
226 60 : tr->map_domain_index = map_domain_index;
227 60 : tr->port = port;
228 60 : }
229 :
230 : extern map_main_t map_main;
231 :
232 : extern vlib_node_registration_t ip4_map_node;
233 : extern vlib_node_registration_t ip6_map_node;
234 :
235 : extern vlib_node_registration_t ip4_map_t_node;
236 : extern vlib_node_registration_t ip4_map_t_fragmented_node;
237 : extern vlib_node_registration_t ip4_map_t_tcp_udp_node;
238 : extern vlib_node_registration_t ip4_map_t_icmp_node;
239 :
240 : extern vlib_node_registration_t ip6_map_t_node;
241 : extern vlib_node_registration_t ip6_map_t_fragmented_node;
242 : extern vlib_node_registration_t ip6_map_t_tcp_udp_node;
243 : extern vlib_node_registration_t ip6_map_t_icmp_node;
244 :
245 : /*
246 : * map_get_pfx
247 : */
248 : static_always_inline u64
249 67 : map_get_pfx (map_domain_t * d, u32 addr, u16 port)
250 : {
251 67 : u16 psid = (port >> d->psid_shift) & d->psid_mask;
252 :
253 67 : if (d->ea_bits_len == 0 && d->rules)
254 0 : return clib_net_to_host_u64 (d->rules[psid].as_u64[0]);
255 :
256 67 : u32 suffix = (addr >> d->suffix_shift) & d->suffix_mask;
257 67 : u64 ea =
258 67 : d->ea_bits_len == 0 ? 0 : (((u64) suffix << d->psid_length)) | psid;
259 :
260 67 : return clib_net_to_host_u64 (d->ip6_prefix.as_u64[0]) | ea << d->ea_shift;
261 : }
262 :
263 : static_always_inline u64
264 46 : map_get_pfx_net (map_domain_t * d, u32 addr, u16 port)
265 : {
266 46 : return clib_host_to_net_u64 (map_get_pfx (d, clib_net_to_host_u32 (addr),
267 46 : clib_net_to_host_u16 (port)));
268 : }
269 :
270 : /*
271 : * map_get_sfx
272 : */
273 : static_always_inline u64
274 60 : map_get_sfx (map_domain_t * d, u32 addr, u16 port)
275 : {
276 60 : u16 psid = (port >> d->psid_shift) & d->psid_mask;
277 :
278 : /* Shared 1:1 mode. */
279 60 : if (d->ea_bits_len == 0 && d->rules)
280 0 : return clib_net_to_host_u64 (d->rules[psid].as_u64[1]);
281 60 : if (d->ip6_prefix_len == 128)
282 0 : return clib_net_to_host_u64 (d->ip6_prefix.as_u64[1]);
283 :
284 60 : if (d->ip6_src_len == 96)
285 0 : return (clib_net_to_host_u64 (d->ip6_prefix.as_u64[1]) | addr);
286 :
287 : /* IPv4 prefix */
288 60 : if (d->flags & MAP_DOMAIN_PREFIX)
289 0 : return (u64) (addr & (0xFFFFFFFF << d->suffix_shift)) << 16;
290 :
291 : /* Shared or full IPv4 address */
292 60 : return ((u64) addr << 16) | psid;
293 : }
294 :
295 : static_always_inline u64
296 39 : map_get_sfx_net (map_domain_t * d, u32 addr, u16 port)
297 : {
298 39 : return clib_host_to_net_u64 (map_get_sfx (d, clib_net_to_host_u32 (addr),
299 39 : clib_net_to_host_u16 (port)));
300 : }
301 :
302 : static_always_inline u32
303 28 : map_get_ip4 (ip6_address_t * addr, u16 prefix_len)
304 : {
305 28 : ASSERT (prefix_len == 64 || prefix_len == 96);
306 28 : if (prefix_len == 96)
307 0 : return clib_host_to_net_u32 (clib_net_to_host_u64 (addr->as_u64[1]));
308 : else
309 28 : return clib_host_to_net_u32 (clib_net_to_host_u64 (addr->as_u64[1]) >>
310 : 16);
311 : }
312 :
313 : static_always_inline map_domain_t *
314 58 : ip4_map_get_domain (ip4_address_t * addr, u32 * map_domain_index, u8 * error)
315 : {
316 58 : map_main_t *mm = &map_main;
317 :
318 58 : u32 mdi = mm->ip4_prefix_tbl->lookup (mm->ip4_prefix_tbl, addr, 32);
319 58 : if (mdi == ~0)
320 : {
321 6 : *error = MAP_ERROR_NO_DOMAIN;
322 6 : return 0;
323 : }
324 52 : *map_domain_index = mdi;
325 52 : return pool_elt_at_index (mm->domains, mdi);
326 : }
327 :
328 : /*
329 : * Get the MAP domain from an IPv6 address.
330 : * If the IPv6 address or
331 : * prefix is shared the IPv4 address must be used.
332 : */
333 : static_always_inline map_domain_t *
334 23 : ip6_map_get_domain (ip6_address_t * addr, u32 * map_domain_index, u8 * error)
335 : {
336 23 : map_main_t *mm = &map_main;
337 : u32 mdi =
338 23 : mm->ip6_src_prefix_tbl->lookup (mm->ip6_src_prefix_tbl, addr, 128);
339 23 : if (mdi == ~0)
340 : {
341 1 : *error = MAP_ERROR_NO_DOMAIN;
342 1 : return 0;
343 : }
344 :
345 22 : *map_domain_index = mdi;
346 22 : return pool_elt_at_index (mm->domains, mdi);
347 : }
348 :
349 : clib_error_t *map_plugin_api_hookup (vlib_main_t * vm);
350 :
351 : void map_ip6_drop_pi (u32 pi);
352 :
353 : /*
354 : * Supports prefix of 96 or 64 (with u-octet)
355 : */
356 : static_always_inline void
357 21 : ip4_map_t_embedded_address (map_domain_t * d,
358 : ip6_address_t * ip6, const ip4_address_t * ip4)
359 : {
360 21 : ASSERT (d->ip6_src_len == 96 || d->ip6_src_len == 64); //No support for other lengths for now
361 21 : u8 offset = d->ip6_src_len == 64 ? 9 : 12;
362 21 : ip6->as_u64[0] = d->ip6_src.as_u64[0];
363 21 : ip6->as_u64[1] = d->ip6_src.as_u64[1];
364 21 : clib_memcpy_fast (&ip6->as_u8[offset], ip4, 4);
365 21 : }
366 :
367 : static_always_inline u32
368 28 : ip6_map_t_embedded_address (map_domain_t * d, ip6_address_t * addr)
369 : {
370 28 : ASSERT (d->ip6_src_len == 64 || d->ip6_src_len == 96);
371 : u32 x;
372 28 : u8 offset = d->ip6_src_len == 64 ? 9 : 12;
373 28 : clib_memcpy (&x, &addr->as_u8[offset], 4);
374 28 : return x;
375 : }
376 :
377 : static inline void
378 4120 : map_domain_counter_lock (map_main_t * mm)
379 : {
380 4120 : if (mm->counter_lock)
381 0 : while (clib_atomic_test_and_set (mm->counter_lock))
382 : /* zzzz */ ;
383 4120 : }
384 :
385 : static inline void
386 4120 : map_domain_counter_unlock (map_main_t * mm)
387 : {
388 4120 : if (mm->counter_lock)
389 0 : clib_atomic_release (mm->counter_lock);
390 4120 : }
391 :
392 :
393 : static_always_inline void
394 : map_send_all_to_node (vlib_main_t * vm, u32 * pi_vector,
395 : vlib_node_runtime_t * node, vlib_error_t * error,
396 : u32 next)
397 : {
398 : u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
399 : //Deal with fragments that are ready
400 : from = pi_vector;
401 : n_left_from = vec_len (pi_vector);
402 : next_index = node->cached_next_index;
403 : while (n_left_from > 0)
404 : {
405 : vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
406 : while (n_left_from > 0 && n_left_to_next > 0)
407 : {
408 : u32 pi0 = to_next[0] = from[0];
409 : from += 1;
410 : n_left_from -= 1;
411 : to_next += 1;
412 : n_left_to_next -= 1;
413 : vlib_buffer_t *p0 = vlib_get_buffer (vm, pi0);
414 : p0->error = *error;
415 : vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
416 : n_left_to_next, pi0, next);
417 : }
418 : vlib_put_next_frame (vm, node, next_index, n_left_to_next);
419 : }
420 : }
421 :
422 : static_always_inline void
423 5 : map_mss_clamping (tcp_header_t * tcp, ip_csum_t * sum, u16 mss_clamping)
424 : {
425 : u8 *data;
426 : u8 opt_len, opts_len, kind;
427 : u16 mss;
428 5 : u16 mss_value_net = clib_host_to_net_u16 (mss_clamping);
429 :
430 5 : if (!tcp_syn (tcp))
431 3 : return;
432 :
433 5 : opts_len = (tcp_doff (tcp) << 2) - sizeof (tcp_header_t);
434 5 : data = (u8 *) (tcp + 1);
435 5 : for (; opts_len > 0; opts_len -= opt_len, data += opt_len)
436 : {
437 3 : kind = data[0];
438 :
439 3 : if (kind == TCP_OPTION_EOL)
440 0 : break;
441 3 : else if (kind == TCP_OPTION_NOOP)
442 : {
443 0 : opt_len = 1;
444 0 : continue;
445 : }
446 : else
447 : {
448 3 : if (opts_len < 2)
449 0 : return;
450 3 : opt_len = data[1];
451 :
452 3 : if (opt_len < 2 || opt_len > opts_len)
453 0 : return;
454 : }
455 :
456 3 : if (kind == TCP_OPTION_MSS)
457 : {
458 3 : mss = *(u16 *) (data + 2);
459 3 : if (clib_net_to_host_u16 (mss) > mss_clamping)
460 : {
461 3 : *sum =
462 3 : ip_csum_update (*sum, mss, mss_value_net, ip4_header_t,
463 : length);
464 3 : clib_memcpy (data + 2, &mss_value_net, 2);
465 : }
466 3 : return;
467 : }
468 : }
469 : }
470 :
471 : static_always_inline bool
472 23 : ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
473 : {
474 : #ifdef MAP_SKIP_IP6_LOOKUP
475 23 : if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei)
476 : {
477 4 : vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
478 4 : pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index;
479 4 : return (true);
480 : }
481 : #endif
482 19 : return (false);
483 : }
484 :
485 : static_always_inline bool
486 13 : ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
487 : {
488 : #ifdef MAP_SKIP_IP6_LOOKUP
489 13 : if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei)
490 : {
491 1 : vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
492 1 : pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index;
493 1 : return (true);
494 : }
495 : #endif
496 12 : return (false);
497 : }
498 :
499 : #endif
500 : /*
501 : * fd.io coding-style-patch-verification: ON
502 : *
503 : * Local Variables:
504 : * eval: (c-set-style "gnu")
505 : * End:
506 : */
|