Line data Source code
1 : /*
2 : * Copyright (c) 2016 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : #include <lb/lb.h>
17 : #include <vnet/plugin/plugin.h>
18 : #include <vpp/app/version.h>
19 : #include <vnet/api_errno.h>
20 : #include <vnet/udp/udp_local.h>
21 : #include <vppinfra/lock.h>
22 :
23 : //GC runs at most once every so many seconds
24 : #define LB_GARBAGE_RUN 60
25 :
26 : //After so many seconds. It is assumed that inter-core race condition will not occur.
27 : #define LB_CONCURRENCY_TIMEOUT 10
28 :
29 : // FIB source for adding routes
30 : static fib_source_t lb_fib_src;
31 :
32 : lb_main_t lb_main;
33 :
34 : #define lb_get_writer_lock() clib_spinlock_lock (&lb_main.writer_lock)
35 : #define lb_put_writer_lock() clib_spinlock_unlock (&lb_main.writer_lock)
36 :
37 : static void lb_as_stack (lb_as_t *as);
38 :
39 :
40 : const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL };
41 : const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL };
42 : const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] =
43 : {
44 : [DPO_PROTO_IP4] = lb_dpo_gre4_ip4,
45 : [DPO_PROTO_IP6] = lb_dpo_gre4_ip6,
46 : };
47 :
48 : const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL };
49 : const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL };
50 : const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
51 : {
52 : [DPO_PROTO_IP4] = lb_dpo_gre6_ip4,
53 : [DPO_PROTO_IP6] = lb_dpo_gre6_ip6,
54 : };
55 :
56 : const static char * const lb_dpo_gre4_ip4_port[] = { "lb4-gre4-port" , NULL };
57 : const static char * const lb_dpo_gre4_ip6_port[] = { "lb6-gre4-port" , NULL };
58 : const static char* const * const lb_dpo_gre4_port_nodes[DPO_PROTO_NUM] =
59 : {
60 : [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_port,
61 : [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_port,
62 : };
63 :
64 : const static char * const lb_dpo_gre6_ip4_port[] = { "lb4-gre6-port" , NULL };
65 : const static char * const lb_dpo_gre6_ip6_port[] = { "lb6-gre6-port" , NULL };
66 : const static char* const * const lb_dpo_gre6_port_nodes[DPO_PROTO_NUM] =
67 : {
68 : [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_port,
69 : [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_port,
70 : };
71 :
72 : const static char * const lb_dpo_l3dsr_ip4[] = {"lb4-l3dsr" , NULL};
73 : const static char* const * const lb_dpo_l3dsr_nodes[DPO_PROTO_NUM] =
74 : {
75 : [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4,
76 : };
77 :
78 : const static char * const lb_dpo_l3dsr_ip4_port[] = {"lb4-l3dsr-port" , NULL};
79 : const static char* const * const lb_dpo_l3dsr_port_nodes[DPO_PROTO_NUM] =
80 : {
81 : [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_port,
82 : };
83 :
84 : const static char * const lb_dpo_nat4_ip4_port[] = { "lb4-nat4-port" , NULL };
85 : const static char* const * const lb_dpo_nat4_port_nodes[DPO_PROTO_NUM] =
86 : {
87 : [DPO_PROTO_IP4] = lb_dpo_nat4_ip4_port,
88 : };
89 :
90 : const static char * const lb_dpo_nat6_ip6_port[] = { "lb6-nat6-port" , NULL };
91 : const static char* const * const lb_dpo_nat6_port_nodes[DPO_PROTO_NUM] =
92 : {
93 : [DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port,
94 : };
95 :
96 14 : u32 lb_hash_time_now(vlib_main_t * vm)
97 : {
98 14 : return (u32) (vlib_time_now(vm) + 10000);
99 : }
100 :
101 0 : u8 *format_lb_main (u8 * s, va_list * args)
102 : {
103 0 : vlib_thread_main_t *tm = vlib_get_thread_main();
104 0 : lb_main_t *lbm = &lb_main;
105 0 : s = format(s, "lb_main");
106 0 : s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address);
107 0 : s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address);
108 0 : s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
109 0 : s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
110 :
111 : u32 thread_index;
112 0 : for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
113 0 : lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
114 0 : if (h) {
115 0 : s = format(s, "core %d\n", thread_index);
116 0 : s = format(s, " timeout: %ds\n", h->timeout);
117 0 : s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h));
118 : }
119 : }
120 :
121 0 : return s;
122 : }
123 :
124 : static char *lb_vip_type_strings[] = {
125 : [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6",
126 : [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4",
127 : [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6",
128 : [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4",
129 : [LB_VIP_TYPE_IP4_L3DSR] = "ip4-l3dsr",
130 : [LB_VIP_TYPE_IP4_NAT4] = "ip4-nat4",
131 : [LB_VIP_TYPE_IP6_NAT6] = "ip6-nat6",
132 : };
133 :
134 1506 : u8 *format_lb_vip_type (u8 * s, va_list * args)
135 : {
136 1506 : lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t);
137 : u32 i;
138 5718 : for (i=0; i<LB_VIP_N_TYPES; i++)
139 5718 : if (vipt == i)
140 1506 : return format(s, lb_vip_type_strings[i]);
141 0 : return format(s, "_WRONG_TYPE_");
142 : }
143 :
144 0 : uword unformat_lb_vip_type (unformat_input_t * input, va_list * args)
145 : {
146 0 : lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *);
147 : u32 i;
148 0 : for (i=0; i<LB_VIP_N_TYPES; i++)
149 0 : if (unformat(input, lb_vip_type_strings[i])) {
150 0 : *vipt = i;
151 0 : return 1;
152 : }
153 0 : return 0;
154 : }
155 :
156 1400 : u8 *format_lb_vip (u8 * s, va_list * args)
157 : {
158 1400 : lb_vip_t *vip = va_arg (*args, lb_vip_t *);
159 1400 : s = format(s, "%U %U new_size:%u #as:%u%s",
160 1400 : format_lb_vip_type, vip->type,
161 1400 : format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY,
162 1400 : vip->new_flow_table_mask + 1,
163 1400 : pool_elts(vip->as_indexes),
164 1400 : (vip->flags & LB_VIP_FLAGS_USED)?"":" removed");
165 :
166 1400 : if (vip->port != 0)
167 : {
168 800 : s = format(s, " protocol:%u port:%u ", vip->protocol, vip->port);
169 : }
170 :
171 1400 : if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
172 : {
173 400 : s = format(s, " dscp:%u", vip->encap_args.dscp);
174 : }
175 1000 : else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
176 900 : || (vip->type == LB_VIP_TYPE_IP6_NAT6))
177 : {
178 200 : s = format (s, " type:%s port:%u target_port:%u",
179 200 : (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
180 : "nodeport",
181 200 : ntohs(vip->port), ntohs(vip->encap_args.target_port));
182 : }
183 :
184 1400 : return s;
185 : }
186 :
187 1400 : u8 *format_lb_as (u8 * s, va_list * args)
188 : {
189 1400 : lb_as_t *as = va_arg (*args, lb_as_t *);
190 1400 : return format(s, "%U %s", format_ip46_address,
191 : &as->address, IP46_TYPE_ANY,
192 1400 : (as->flags & LB_AS_FLAGS_USED)?"used":"removed");
193 : }
194 :
195 106 : u8 *format_lb_vip_detailed (u8 * s, va_list * args)
196 : {
197 106 : lb_main_t *lbm = &lb_main;
198 106 : lb_vip_t *vip = va_arg (*args, lb_vip_t *);
199 106 : u32 indent = format_get_indent (s);
200 :
201 : /* clang-format off */
202 212 : s = format(s, "%U %U [%lu] %U%s%s\n"
203 : "%U new_size:%u\n",
204 : format_white_space, indent,
205 106 : format_lb_vip_type, vip->type,
206 106 : vip - lbm->vips,
207 106 : format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY,
208 106 : lb_vip_is_src_ip_sticky (vip) ? " src_ip_sticky" : "",
209 106 : (vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
210 : format_white_space, indent,
211 106 : vip->new_flow_table_mask + 1);
212 : /* clang-format on */
213 :
214 106 : if (vip->port != 0)
215 : {
216 55 : s = format(s, "%U protocol:%u port:%u\n",
217 : format_white_space, indent,
218 55 : vip->protocol, vip->port);
219 : }
220 :
221 106 : if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
222 : {
223 35 : s = format(s, "%U dscp:%u\n",
224 : format_white_space, indent,
225 35 : vip->encap_args.dscp);
226 : }
227 71 : else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
228 65 : || (vip->type == LB_VIP_TYPE_IP6_NAT6))
229 : {
230 7 : s = format (s, "%U type:%s port:%u target_port:%u",
231 : format_white_space, indent,
232 7 : (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
233 : "nodeport",
234 7 : ntohs(vip->port), ntohs(vip->encap_args.target_port));
235 : }
236 :
237 : //Print counters
238 106 : s = format(s, "%U counters:\n",
239 : format_white_space, indent);
240 : u32 i;
241 530 : for (i=0; i<LB_N_VIP_COUNTERS; i++)
242 424 : s = format(s, "%U %s: %Lu\n",
243 : format_white_space, indent,
244 : lbm->vip_counters[i].name,
245 424 : vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips));
246 :
247 :
248 106 : s = format(s, "%U #as:%u\n",
249 : format_white_space, indent,
250 106 : pool_elts(vip->as_indexes));
251 :
252 : //Let's count the buckets for each AS
253 106 : u32 *count = 0;
254 106 : vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much...
255 : lb_new_flow_entry_t *nfe;
256 108650 : vec_foreach(nfe, vip->new_flow_table)
257 108544 : count[nfe->as_index]++;
258 :
259 : lb_as_t *as;
260 : u32 *as_index;
261 631 : pool_foreach (as_index, vip->as_indexes) {
262 525 : as = &lbm->ass[*as_index];
263 525 : s = format(s, "%U %U %u buckets %Lu flows dpo:%u %s\n",
264 : format_white_space, indent,
265 : format_ip46_address, &as->address, IP46_TYPE_ANY,
266 525 : count[as - lbm->ass],
267 525 : vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
268 : as->dpo.dpoi_index,
269 525 : (as->flags & LB_AS_FLAGS_USED)?"used":" removed");
270 : }
271 :
272 106 : vec_free(count);
273 106 : return s;
274 : }
275 :
276 : typedef struct {
277 : u32 as_index;
278 : u32 last;
279 : u32 skip;
280 : } lb_pseudorand_t;
281 :
282 266 : static int lb_pseudorand_compare(void *a, void *b)
283 : {
284 : lb_as_t *asa, *asb;
285 266 : lb_main_t *lbm = &lb_main;
286 266 : asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index];
287 266 : asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index];
288 266 : return memcmp(&asa->address, &asb->address, sizeof(asb->address));
289 : }
290 :
291 1604 : static void lb_vip_garbage_collection(lb_vip_t *vip)
292 : {
293 1604 : lb_main_t *lbm = &lb_main;
294 : lb_snat4_key_t m_key4;
295 : clib_bihash_kv_8_8_t kv4, value4;
296 : lb_snat6_key_t m_key6;
297 : clib_bihash_kv_24_8_t kv6, value6;
298 1604 : lb_snat_mapping_t *m = 0;
299 1604 : CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
300 :
301 1604 : u32 now = (u32) vlib_time_now(vlib_get_main());
302 1604 : if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN))
303 1604 : return;
304 :
305 0 : vip->last_garbage_collection = now;
306 : lb_as_t *as;
307 : u32 *as_index;
308 0 : pool_foreach (as_index, vip->as_indexes) {
309 0 : as = &lbm->ass[*as_index];
310 0 : if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
311 0 : clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) &&
312 0 : (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
313 : { //Not referenced
314 :
315 0 : if (lb_vip_is_nat4_port(vip)) {
316 0 : m_key4.addr = as->address.ip4;
317 0 : m_key4.port = vip->encap_args.target_port;
318 0 : m_key4.protocol = 0;
319 0 : m_key4.fib_index = 0;
320 :
321 0 : kv4.key = m_key4.as_u64;
322 0 : if(!clib_bihash_search_8_8(&lbm->mapping_by_as4, &kv4, &value4))
323 0 : m = pool_elt_at_index (lbm->snat_mappings, value4.value);
324 0 : ASSERT (m);
325 :
326 0 : kv4.value = m - lbm->snat_mappings;
327 0 : clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 0);
328 0 : pool_put (lbm->snat_mappings, m);
329 0 : } else if (lb_vip_is_nat6_port(vip)) {
330 0 : m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
331 0 : m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
332 0 : m_key6.port = vip->encap_args.target_port;
333 0 : m_key6.protocol = 0;
334 0 : m_key6.fib_index = 0;
335 :
336 0 : kv6.key[0] = m_key6.as_u64[0];
337 0 : kv6.key[1] = m_key6.as_u64[1];
338 0 : kv6.key[2] = m_key6.as_u64[2];
339 :
340 0 : if (!clib_bihash_search_24_8 (&lbm->mapping_by_as6, &kv6, &value6))
341 0 : m = pool_elt_at_index (lbm->snat_mappings, value6.value);
342 0 : ASSERT (m);
343 :
344 0 : kv6.value = m - lbm->snat_mappings;
345 0 : clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 0);
346 0 : pool_put (lbm->snat_mappings, m);
347 : }
348 0 : fib_entry_child_remove(as->next_hop_fib_entry_index,
349 : as->next_hop_child_index);
350 0 : fib_table_entry_delete_index(as->next_hop_fib_entry_index,
351 : FIB_SOURCE_RR);
352 0 : as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
353 :
354 0 : pool_put(vip->as_indexes, as_index);
355 0 : pool_put(lbm->ass, as);
356 : }
357 : }
358 : }
359 :
360 174 : void lb_garbage_collection()
361 : {
362 174 : lb_main_t *lbm = &lb_main;
363 174 : lb_get_writer_lock();
364 : lb_vip_t *vip;
365 174 : u32 *to_be_removed_vips = 0, *i;
366 1623 : pool_foreach (vip, lbm->vips) {
367 1449 : lb_vip_garbage_collection(vip);
368 :
369 2555 : if (!(vip->flags & LB_VIP_FLAGS_USED) &&
370 1106 : (pool_elts(vip->as_indexes) == 0)) {
371 1 : vec_add1(to_be_removed_vips, vip - lbm->vips);
372 : }
373 : }
374 :
375 175 : vec_foreach(i, to_be_removed_vips) {
376 1 : vip = &lbm->vips[*i];
377 1 : pool_put(lbm->vips, vip);
378 1 : pool_free(vip->as_indexes);
379 : }
380 :
381 174 : vec_free(to_be_removed_vips);
382 174 : lb_put_writer_lock();
383 174 : }
384 :
385 747 : static void lb_vip_update_new_flow_table(lb_vip_t *vip)
386 : {
387 747 : lb_main_t *lbm = &lb_main;
388 : lb_new_flow_entry_t *old_table;
389 : u32 i, *as_index;
390 747 : lb_new_flow_entry_t *new_flow_table = 0;
391 : lb_as_t *as;
392 747 : lb_pseudorand_t *pr, *sort_arr = 0;
393 :
394 747 : CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock); // We must have the lock
395 :
396 : //Check if some AS is configured or not
397 747 : i = 0;
398 1027 : pool_foreach (as_index, vip->as_indexes) {
399 407 : as = &lbm->ass[*as_index];
400 407 : if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore
401 127 : i = 1;
402 127 : goto out; //Not sure 'break' works in this macro-loop
403 : }
404 : }
405 :
406 620 : out:
407 747 : if (i == 0) {
408 : //Only the default. i.e. no AS
409 620 : vec_validate(new_flow_table, vip->new_flow_table_mask);
410 47275 : for (i=0; i<vec_len(new_flow_table); i++)
411 46655 : new_flow_table[i].as_index = 0;
412 :
413 620 : goto finished;
414 : }
415 :
416 : //First, let's sort the ASs
417 127 : vec_validate (sort_arr, pool_elts (vip->as_indexes) - 1);
418 :
419 127 : i = 0;
420 618 : pool_foreach (as_index, vip->as_indexes) {
421 491 : as = &lbm->ass[*as_index];
422 491 : if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore
423 140 : continue;
424 :
425 351 : sort_arr[i].as_index = as - lbm->ass;
426 351 : i++;
427 : }
428 127 : vec_set_len (sort_arr, i);
429 :
430 127 : vec_sort_with_function(sort_arr, lb_pseudorand_compare);
431 :
432 : //Now let's pseudo-randomly generate permutations
433 478 : vec_foreach(pr, sort_arr) {
434 351 : lb_as_t *as = &lbm->ass[pr->as_index];
435 :
436 351 : u64 seed = clib_xxhash(as->address.as_u64[0] ^
437 351 : as->address.as_u64[1]);
438 : /* We have 2^n buckets.
439 : * skip must be prime with 2^n.
440 : * So skip must be odd.
441 : * MagLev actually state that M should be prime,
442 : * but this has a big computation cost (% operation).
443 : * Using 2^n is more better (& operation).
444 : */
445 351 : pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask;
446 351 : pr->last = (seed >> 32) & vip->new_flow_table_mask;
447 : }
448 :
449 : //Let's create a new flow table
450 127 : vec_validate(new_flow_table, vip->new_flow_table_mask);
451 130175 : for (i=0; i<vec_len(new_flow_table); i++)
452 130048 : new_flow_table[i].as_index = 0;
453 :
454 127 : u32 done = 0;
455 : while (1) {
456 193567 : vec_foreach(pr, sort_arr) {
457 174435 : while (1) {
458 304483 : u32 last = pr->last;
459 304483 : pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
460 304483 : if (new_flow_table[last].as_index == 0) {
461 130048 : new_flow_table[last].as_index = pr->as_index;
462 130048 : break;
463 : }
464 : }
465 130048 : done++;
466 130048 : if (done == vec_len(new_flow_table))
467 127 : goto finished;
468 : }
469 : }
470 :
471 747 : finished:
472 747 : vec_free(sort_arr);
473 :
474 747 : old_table = vip->new_flow_table;
475 747 : vip->new_flow_table = new_flow_table;
476 747 : vec_free(old_table);
477 747 : }
478 :
479 1 : int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
480 : u32 per_cpu_sticky_buckets, u32 flow_timeout)
481 : {
482 1 : lb_main_t *lbm = &lb_main;
483 :
484 1 : if (!is_pow2(per_cpu_sticky_buckets))
485 0 : return VNET_API_ERROR_INVALID_MEMORY_SIZE;
486 :
487 1 : lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self
488 1 : lbm->ip4_src_address = *ip4_address;
489 1 : lbm->ip6_src_address = *ip6_address;
490 1 : lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets;
491 1 : lbm->flow_timeout = flow_timeout;
492 1 : lb_put_writer_lock();
493 1 : return 0;
494 : }
495 :
496 :
497 :
498 : static
499 218 : int lb_vip_port_find_index(ip46_address_t *prefix, u8 plen,
500 : u8 protocol, u16 port,
501 : lb_lkp_type_t lkp_type,
502 : u32 *vip_index)
503 : {
504 218 : lb_main_t *lbm = &lb_main;
505 : lb_vip_t *vip;
506 : /* This must be called with the lock owned */
507 218 : CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
508 218 : ip46_prefix_normalize(prefix, plen);
509 1848 : pool_foreach (vip, lbm->vips) {
510 1790 : if ((vip->flags & LB_AS_FLAGS_USED) &&
511 407 : vip->plen == plen &&
512 189 : vip->prefix.as_u64[0] == prefix->as_u64[0] &&
513 189 : vip->prefix.as_u64[1] == prefix->as_u64[1])
514 : {
515 189 : if((lkp_type == LB_LKP_SAME_IP_PORT &&
516 169 : vip->protocol == protocol &&
517 189 : vip->port == port) ||
518 1 : (lkp_type == LB_LKP_ALL_PORT_IP &&
519 32 : vip->port == 0) ||
520 19 : (lkp_type == LB_LKP_DIFF_IP_PORT &&
521 19 : (vip->protocol != protocol ||
522 19 : vip->port != port) ) )
523 : {
524 160 : *vip_index = vip - lbm->vips;
525 160 : return 0;
526 : }
527 : }
528 : }
529 58 : return VNET_API_ERROR_NO_SUCH_ENTRY;
530 : }
531 :
532 : static
533 174 : int lb_vip_port_find_index_with_lock(ip46_address_t *prefix, u8 plen,
534 : u8 protocol, u16 port, u32 *vip_index)
535 : {
536 174 : return lb_vip_port_find_index(prefix, plen, protocol, port,
537 : LB_LKP_SAME_IP_PORT, vip_index);
538 : }
539 :
540 : static
541 9 : int lb_vip_port_find_all_port_vip(ip46_address_t *prefix, u8 plen,
542 : u32 *vip_index)
543 : {
544 9 : return lb_vip_port_find_index(prefix, plen, ~0, 0,
545 : LB_LKP_ALL_PORT_IP, vip_index);
546 : }
547 :
548 : /* Find out per-port-vip entry with different protocol and port */
549 : static
550 35 : int lb_vip_port_find_diff_port(ip46_address_t *prefix, u8 plen,
551 : u8 protocol, u16 port, u32 *vip_index)
552 : {
553 35 : return lb_vip_port_find_index(prefix, plen, protocol, port,
554 : LB_LKP_DIFF_IP_PORT, vip_index);
555 : }
556 :
557 157 : int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u8 protocol,
558 : u16 port, u32 *vip_index)
559 : {
560 : int ret;
561 157 : lb_get_writer_lock();
562 157 : ret = lb_vip_port_find_index_with_lock(prefix, plen,
563 : protocol, port, vip_index);
564 157 : lb_put_writer_lock();
565 157 : return ret;
566 : }
567 :
568 211 : static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
569 : {
570 211 : lb_main_t *lbm = &lb_main;
571 : /* This must be called with the lock owned */
572 211 : CLIB_SPINLOCK_ASSERT_LOCKED (&lbm->writer_lock);
573 : lb_as_t *as;
574 : u32 *asi;
575 631 : pool_foreach (asi, vip->as_indexes) {
576 560 : as = &lbm->ass[*asi];
577 560 : if (as->vip_index == (vip - lbm->vips) &&
578 560 : as->address.as_u64[0] == address->as_u64[0] &&
579 560 : as->address.as_u64[1] == address->as_u64[1])
580 : {
581 140 : *as_index = as - lbm->ass;
582 140 : return 0;
583 : }
584 : }
585 71 : return -1;
586 : }
587 :
588 71 : int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
589 : {
590 71 : lb_main_t *lbm = &lb_main;
591 71 : lb_get_writer_lock();
592 : lb_vip_t *vip;
593 71 : if (!(vip = lb_vip_get_by_index(vip_index))) {
594 0 : lb_put_writer_lock();
595 0 : return VNET_API_ERROR_NO_SUCH_ENTRY;
596 : }
597 :
598 71 : ip46_type_t type = lb_encap_is_ip4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6;
599 71 : u32 *to_be_added = 0;
600 71 : u32 *to_be_updated = 0;
601 : u32 i;
602 : u32 *ip;
603 : lb_snat_mapping_t *m;
604 :
605 : //Sanity check
606 142 : while (n--) {
607 :
608 71 : if (!lb_as_find_index_vip(vip, &addresses[n], &i)) {
609 0 : if (lbm->ass[i].flags & LB_AS_FLAGS_USED) {
610 0 : vec_free(to_be_added);
611 0 : vec_free(to_be_updated);
612 0 : lb_put_writer_lock();
613 0 : return VNET_API_ERROR_VALUE_EXIST;
614 : }
615 0 : vec_add1(to_be_updated, i);
616 0 : goto next;
617 : }
618 :
619 71 : if (ip46_address_type(&addresses[n]) != type) {
620 0 : vec_free(to_be_added);
621 0 : vec_free(to_be_updated);
622 0 : lb_put_writer_lock();
623 0 : return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
624 : }
625 :
626 71 : if (n) {
627 0 : u32 n2 = n;
628 0 : while(n2--) //Check for duplicates
629 0 : if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
630 0 : addresses[n2].as_u64[1] == addresses[n].as_u64[1])
631 0 : goto next;
632 : }
633 :
634 71 : vec_add1(to_be_added, n);
635 :
636 71 : next:
637 71 : continue;
638 : }
639 :
640 : //Update reused ASs
641 71 : vec_foreach(ip, to_be_updated) {
642 0 : lbm->ass[*ip].flags = LB_AS_FLAGS_USED;
643 : }
644 71 : vec_free(to_be_updated);
645 :
646 : //Create those who have to be created
647 142 : vec_foreach(ip, to_be_added) {
648 : lb_as_t *as;
649 : u32 *as_index;
650 71 : pool_get(lbm->ass, as);
651 71 : as->address = addresses[*ip];
652 71 : as->flags = LB_AS_FLAGS_USED;
653 71 : as->vip_index = vip_index;
654 71 : pool_get(vip->as_indexes, as_index);
655 71 : *as_index = as - lbm->ass;
656 :
657 : /*
658 : * become a child of the FIB entry
659 : * so we are informed when its forwarding changes
660 : */
661 71 : fib_prefix_t nh = {};
662 71 : if (lb_encap_is_ip4(vip)) {
663 45 : nh.fp_addr.ip4 = as->address.ip4;
664 45 : nh.fp_len = 32;
665 45 : nh.fp_proto = FIB_PROTOCOL_IP4;
666 : } else {
667 26 : nh.fp_addr.ip6 = as->address.ip6;
668 26 : nh.fp_len = 128;
669 26 : nh.fp_proto = FIB_PROTOCOL_IP6;
670 : }
671 :
672 142 : as->next_hop_fib_entry_index =
673 71 : fib_table_entry_special_add(0,
674 : &nh,
675 : FIB_SOURCE_RR,
676 : FIB_ENTRY_FLAG_NONE);
677 142 : as->next_hop_child_index =
678 71 : fib_entry_child_add(as->next_hop_fib_entry_index,
679 71 : lbm->fib_node_type,
680 71 : as - lbm->ass);
681 :
682 71 : lb_as_stack(as);
683 :
684 71 : if ( lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip) )
685 : {
686 : /* Add SNAT static mapping */
687 10 : pool_get (lbm->snat_mappings, m);
688 10 : clib_memset (m, 0, sizeof (*m));
689 10 : if (lb_vip_is_nat4_port(vip)) {
690 : lb_snat4_key_t m_key4;
691 : clib_bihash_kv_8_8_t kv4;
692 5 : m_key4.addr = as->address.ip4;
693 5 : m_key4.port = vip->encap_args.target_port;
694 5 : m_key4.protocol = 0;
695 5 : m_key4.fib_index = 0;
696 :
697 5 : if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
698 : {
699 5 : m->src_ip.ip4 = vip->prefix.ip4;
700 : }
701 0 : else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
702 : {
703 0 : m->src_ip.ip4 = lbm->ip4_src_address;
704 : }
705 5 : m->src_ip_is_ipv6 = 0;
706 5 : m->as_ip.ip4 = as->address.ip4;
707 5 : m->as_ip_is_ipv6 = 0;
708 5 : m->src_port = vip->port;
709 5 : m->target_port = vip->encap_args.target_port;
710 5 : m->vrf_id = 0;
711 5 : m->fib_index = 0;
712 :
713 5 : kv4.key = m_key4.as_u64;
714 5 : kv4.value = m - lbm->snat_mappings;
715 5 : clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 1);
716 : } else {
717 : lb_snat6_key_t m_key6;
718 : clib_bihash_kv_24_8_t kv6;
719 5 : m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
720 5 : m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
721 5 : m_key6.port = vip->encap_args.target_port;
722 5 : m_key6.protocol = 0;
723 5 : m_key6.fib_index = 0;
724 :
725 5 : if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
726 : {
727 5 : m->src_ip.ip6.as_u64[0] = vip->prefix.ip6.as_u64[0];
728 5 : m->src_ip.ip6.as_u64[1] = vip->prefix.ip6.as_u64[1];
729 : }
730 0 : else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
731 : {
732 0 : m->src_ip.ip6.as_u64[0] = lbm->ip6_src_address.as_u64[0];
733 0 : m->src_ip.ip6.as_u64[1] = lbm->ip6_src_address.as_u64[1];
734 : }
735 5 : m->src_ip_is_ipv6 = 1;
736 5 : m->as_ip.ip6.as_u64[0] = as->address.ip6.as_u64[0];
737 5 : m->as_ip.ip6.as_u64[1] = as->address.ip6.as_u64[1];
738 5 : m->as_ip_is_ipv6 = 1;
739 5 : m->src_port = vip->port;
740 5 : m->target_port = vip->encap_args.target_port;
741 5 : m->vrf_id = 0;
742 5 : m->fib_index = 0;
743 :
744 5 : kv6.key[0] = m_key6.as_u64[0];
745 5 : kv6.key[1] = m_key6.as_u64[1];
746 5 : kv6.key[2] = m_key6.as_u64[2];
747 5 : kv6.value = m - lbm->snat_mappings;
748 5 : clib_bihash_add_del_24_8(&lbm->mapping_by_as6, &kv6, 1);
749 : }
750 : }
751 : }
752 71 : vec_free(to_be_added);
753 :
754 : //Recompute flows
755 71 : lb_vip_update_new_flow_table(vip);
756 :
757 : //Garbage collection maybe
758 71 : lb_vip_garbage_collection(vip);
759 :
760 71 : lb_put_writer_lock();
761 71 : return 0;
762 : }
763 :
764 : int
765 14 : lb_flush_vip_as (u32 vip_index, u32 as_index)
766 : {
767 : u32 thread_index;
768 14 : vlib_thread_main_t *tm = vlib_get_thread_main();
769 14 : lb_main_t *lbm = &lb_main;
770 :
771 28 : for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
772 14 : lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
773 14 : if (h != NULL) {
774 : u32 i;
775 : lb_hash_bucket_t *b;
776 :
777 71694 : lb_hash_foreach_entry(h, b, i) {
778 57344 : if ((vip_index == ~0)
779 0 : || ((b->vip[i] == vip_index) && (as_index == ~0))
780 0 : || ((b->vip[i] == vip_index) && (b->value[i] == as_index)))
781 : {
782 57344 : vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
783 57344 : vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
784 57344 : b->vip[i] = ~0;
785 57344 : b->value[i] = 0;
786 : }
787 : }
788 14 : if (vip_index == ~0)
789 : {
790 14 : lb_hash_free(h);
791 14 : lbm->per_cpu[thread_index].sticky_ht = 0;
792 : }
793 : }
794 : }
795 :
796 14 : return 0;
797 : }
798 :
799 84 : int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n,
800 : u8 flush)
801 : {
802 84 : lb_main_t *lbm = &lb_main;
803 84 : u32 now = (u32) vlib_time_now(vlib_get_main());
804 84 : u32 *ip = 0;
805 84 : u32 as_index = 0;
806 :
807 : lb_vip_t *vip;
808 84 : if (!(vip = lb_vip_get_by_index(vip_index))) {
809 0 : return VNET_API_ERROR_NO_SUCH_ENTRY;
810 : }
811 :
812 84 : u32 *indexes = NULL;
813 224 : while (n--) {
814 140 : if (lb_as_find_index_vip(vip, &addresses[n], &as_index)) {
815 0 : vec_free(indexes);
816 0 : return VNET_API_ERROR_NO_SUCH_ENTRY;
817 : }
818 :
819 140 : if (n) { //Check for duplicates
820 56 : u32 n2 = n - 1;
821 140 : while(n2--) {
822 84 : if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
823 84 : addresses[n2].as_u64[1] == addresses[n].as_u64[1])
824 0 : goto next;
825 : }
826 : }
827 :
828 140 : vec_add1(indexes, as_index);
829 140 : next:
830 140 : continue;
831 : }
832 :
833 : //Garbage collection maybe
834 84 : lb_vip_garbage_collection(vip);
835 :
836 84 : if (indexes != NULL) {
837 224 : vec_foreach(ip, indexes) {
838 140 : lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED;
839 140 : lbm->ass[*ip].last_used = now;
840 :
841 140 : if(flush)
842 : {
843 : /* flush flow table for deleted ASs*/
844 0 : lb_flush_vip_as(vip_index, *ip);
845 : }
846 : }
847 :
848 : //Recompute flows
849 84 : lb_vip_update_new_flow_table(vip);
850 : }
851 :
852 84 : vec_free(indexes);
853 84 : return 0;
854 : }
855 :
856 70 : int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n, u8 flush)
857 : {
858 70 : lb_get_writer_lock();
859 70 : int ret = lb_vip_del_ass_withlock(vip_index, addresses, n, flush);
860 70 : lb_put_writer_lock();
861 :
862 70 : return ret;
863 : }
864 :
865 : static int
866 8 : lb_vip_prefix_index_alloc (lb_main_t *lbm)
867 : {
868 : /*
869 : * Check for dynamically allocated instance number.
870 : */
871 : u32 bit;
872 :
873 8 : bit = clib_bitmap_first_clear (lbm->vip_prefix_indexes);
874 :
875 8 : lbm->vip_prefix_indexes = clib_bitmap_set(lbm->vip_prefix_indexes, bit, 1);
876 :
877 8 : return bit;
878 : }
879 :
880 : static int
881 8 : lb_vip_prefix_index_free (lb_main_t *lbm, u32 instance)
882 : {
883 :
884 8 : if (clib_bitmap_get (lbm->vip_prefix_indexes, instance) == 0)
885 : {
886 0 : return -1;
887 : }
888 :
889 8 : lbm->vip_prefix_indexes = clib_bitmap_set (lbm->vip_prefix_indexes,
890 : instance, 0);
891 :
892 8 : return 0;
893 : }
894 :
895 : /**
896 : * Add the VIP adjacency to the ip4 or ip6 fib
897 : */
898 17 : static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip,
899 : u32 *vip_prefix_index)
900 : {
901 17 : dpo_proto_t proto = 0;
902 17 : dpo_type_t dpo_type = 0;
903 17 : u32 vip_idx = 0;
904 :
905 17 : if (vip->port != 0)
906 : {
907 : /* for per-port vip, if VIP adjacency has been added,
908 : * no need to add adjacency. */
909 9 : if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
910 9 : vip->protocol, vip->port, &vip_idx))
911 : {
912 1 : lb_vip_t *exists_vip = lb_vip_get_by_index(vip_idx);
913 1 : *vip_prefix_index = exists_vip ? exists_vip->vip_prefix_index : ~0;
914 1 : return;
915 : }
916 :
917 : /* Allocate an index for per-port vip */
918 8 : *vip_prefix_index = lb_vip_prefix_index_alloc(lbm);
919 : }
920 : else
921 : {
922 8 : *vip_prefix_index = vip - lbm->vips;
923 : }
924 :
925 16 : dpo_id_t dpo = DPO_INVALID;
926 16 : fib_prefix_t pfx = {};
927 16 : if (lb_vip_is_ip4(vip->type)) {
928 9 : pfx.fp_addr.ip4 = vip->prefix.ip4;
929 9 : pfx.fp_len = vip->plen - 96;
930 9 : pfx.fp_proto = FIB_PROTOCOL_IP4;
931 9 : proto = DPO_PROTO_IP4;
932 : } else {
933 7 : pfx.fp_addr.ip6 = vip->prefix.ip6;
934 7 : pfx.fp_len = vip->plen;
935 7 : pfx.fp_proto = FIB_PROTOCOL_IP6;
936 7 : proto = DPO_PROTO_IP6;
937 : }
938 :
939 16 : if (lb_vip_is_gre4(vip))
940 2 : dpo_type = lbm->dpo_gre4_type;
941 14 : else if (lb_vip_is_gre6(vip))
942 4 : dpo_type = lbm->dpo_gre6_type;
943 10 : else if (lb_vip_is_gre4_port(vip))
944 2 : dpo_type = lbm->dpo_gre4_port_type;
945 8 : else if (lb_vip_is_gre6_port(vip))
946 2 : dpo_type = lbm->dpo_gre6_port_type;
947 6 : else if (lb_vip_is_l3dsr(vip))
948 2 : dpo_type = lbm->dpo_l3dsr_type;
949 4 : else if (lb_vip_is_l3dsr_port(vip))
950 2 : dpo_type = lbm->dpo_l3dsr_port_type;
951 2 : else if(lb_vip_is_nat4_port(vip))
952 1 : dpo_type = lbm->dpo_nat4_port_type;
953 1 : else if (lb_vip_is_nat6_port(vip))
954 1 : dpo_type = lbm->dpo_nat6_port_type;
955 :
956 16 : dpo_set(&dpo, dpo_type, proto, *vip_prefix_index);
957 16 : fib_table_entry_special_dpo_add(0,
958 : &pfx,
959 : lb_fib_src,
960 : FIB_ENTRY_FLAG_EXCLUSIVE,
961 : &dpo);
962 16 : dpo_reset(&dpo);
963 : }
964 :
965 : /**
966 : * Add the VIP filter entry
967 : */
968 9 : static int lb_vip_add_port_filter(lb_main_t *lbm, lb_vip_t *vip,
969 : u32 vip_prefix_index, u32 vip_idx)
970 : {
971 : vip_port_key_t key;
972 : clib_bihash_kv_8_8_t kv;
973 :
974 9 : key.vip_prefix_index = vip_prefix_index;
975 9 : key.protocol = vip->protocol;
976 9 : key.port = clib_host_to_net_u16(vip->port);
977 9 : key.rsv = 0;
978 :
979 9 : kv.key = key.as_u64;
980 9 : kv.value = vip_idx;
981 9 : clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 1);
982 :
983 9 : return 0;
984 : }
985 :
986 : /**
987 : * Del the VIP filter entry
988 : */
989 9 : static int lb_vip_del_port_filter(lb_main_t *lbm, lb_vip_t *vip)
990 : {
991 : vip_port_key_t key;
992 : clib_bihash_kv_8_8_t kv, value;
993 9 : lb_vip_t *m = 0;
994 :
995 9 : key.vip_prefix_index = vip->vip_prefix_index;
996 9 : key.protocol = vip->protocol;
997 9 : key.port = clib_host_to_net_u16(vip->port);
998 9 : key.rsv = 0;
999 :
1000 9 : kv.key = key.as_u64;
1001 9 : if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) != 0)
1002 : {
1003 0 : clib_warning("looking up vip_index_per_port failed.");
1004 0 : return VNET_API_ERROR_NO_SUCH_ENTRY;
1005 : }
1006 9 : m = pool_elt_at_index (lbm->vips, value.value);
1007 9 : ASSERT (m);
1008 :
1009 9 : kv.value = m - lbm->vips;
1010 9 : clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 0);
1011 :
1012 9 : return 0;
1013 : }
1014 :
1015 : /**
1016 : * Deletes the adjacency associated with the VIP
1017 : */
1018 16 : static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
1019 : {
1020 16 : fib_prefix_t pfx = {};
1021 16 : u32 vip_idx = 0;
1022 :
1023 16 : if (vip->port != 0)
1024 : {
1025 : /* If this vip adjacency is used by other per-port vip,
1026 : * no need to del this adjacency. */
1027 9 : if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
1028 9 : vip->protocol, vip->port, &vip_idx))
1029 : {
1030 1 : lb_put_writer_lock();
1031 1 : return;
1032 : }
1033 :
1034 : /* Return vip_prefix_index for per-port vip */
1035 8 : lb_vip_prefix_index_free(lbm, vip->vip_prefix_index);
1036 :
1037 : }
1038 :
1039 15 : if (lb_vip_is_ip4(vip->type)) {
1040 9 : pfx.fp_addr.ip4 = vip->prefix.ip4;
1041 9 : pfx.fp_len = vip->plen - 96;
1042 9 : pfx.fp_proto = FIB_PROTOCOL_IP4;
1043 : } else {
1044 6 : pfx.fp_addr.ip6 = vip->prefix.ip6;
1045 6 : pfx.fp_len = vip->plen;
1046 6 : pfx.fp_proto = FIB_PROTOCOL_IP6;
1047 : }
1048 15 : fib_table_entry_special_remove(0, &pfx, lb_fib_src);
1049 : }
1050 :
1051 17 : int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
1052 : {
1053 17 : lb_main_t *lbm = &lb_main;
1054 17 : vlib_main_t *vm = vlib_get_main();
1055 : lb_vip_t *vip;
1056 17 : lb_vip_type_t type = args.type;
1057 17 : u32 vip_prefix_index = 0;
1058 :
1059 17 : lb_get_writer_lock();
1060 17 : ip46_prefix_normalize(&(args.prefix), args.plen);
1061 :
1062 17 : if (!lb_vip_port_find_index_with_lock(&(args.prefix), args.plen,
1063 17 : args.protocol, args.port,
1064 : vip_index))
1065 : {
1066 0 : lb_put_writer_lock();
1067 0 : return VNET_API_ERROR_VALUE_EXIST;
1068 : }
1069 :
1070 : /* Make sure we can't add a per-port VIP entry
1071 : * when there already is an all-port VIP for the same prefix. */
1072 26 : if ((args.port != 0) &&
1073 9 : !lb_vip_port_find_all_port_vip(&(args.prefix), args.plen, vip_index))
1074 : {
1075 0 : lb_put_writer_lock();
1076 0 : return VNET_API_ERROR_VALUE_EXIST;
1077 : }
1078 :
1079 : /* Make sure we can't add a all-port VIP entry
1080 : * when there already is an per-port VIP for the same prefix. */
1081 25 : if ((args.port == 0) &&
1082 8 : !lb_vip_port_find_diff_port(&(args.prefix), args.plen,
1083 8 : args.protocol, args.port, vip_index))
1084 : {
1085 0 : lb_put_writer_lock();
1086 0 : return VNET_API_ERROR_VALUE_EXIST;
1087 : }
1088 :
1089 : /* Make sure all VIP for a given prefix (using different ports) have the same type. */
1090 26 : if ((args.port != 0) &&
1091 9 : !lb_vip_port_find_diff_port(&(args.prefix), args.plen,
1092 9 : args.protocol, args.port, vip_index)
1093 1 : && (args.type != lbm->vips[*vip_index].type))
1094 : {
1095 0 : lb_put_writer_lock();
1096 0 : return VNET_API_ERROR_INVALID_ARGUMENT;
1097 : }
1098 :
1099 17 : if (!is_pow2(args.new_length)) {
1100 0 : lb_put_writer_lock();
1101 0 : return VNET_API_ERROR_INVALID_MEMORY_SIZE;
1102 : }
1103 :
1104 17 : if (ip46_prefix_is_ip4(&(args.prefix), args.plen) &&
1105 8 : !lb_vip_is_ip4(type)) {
1106 0 : lb_put_writer_lock();
1107 0 : return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
1108 : }
1109 :
1110 17 : if ((!ip46_prefix_is_ip4(&(args.prefix), args.plen)) &&
1111 3 : !lb_vip_is_ip6(type)) {
1112 0 : lb_put_writer_lock();
1113 0 : return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
1114 : }
1115 :
1116 17 : if ((type == LB_VIP_TYPE_IP4_L3DSR) &&
1117 5 : (args.encap_args.dscp >= 64) )
1118 : {
1119 0 : lb_put_writer_lock();
1120 0 : return VNET_API_ERROR_VALUE_EXIST;
1121 : }
1122 :
1123 : //Allocate
1124 17 : pool_get(lbm->vips, vip);
1125 :
1126 : //Init
1127 17 : memcpy (&(vip->prefix), &(args.prefix), sizeof(args.prefix));
1128 17 : vip->plen = args.plen;
1129 17 : if (args.port != 0)
1130 : {
1131 9 : vip->protocol = args.protocol;
1132 9 : vip->port = args.port;
1133 : }
1134 : else
1135 : {
1136 8 : vip->protocol = (u8)~0;
1137 8 : vip->port = 0;
1138 : }
1139 17 : vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main());
1140 17 : vip->type = args.type;
1141 :
1142 17 : if (args.type == LB_VIP_TYPE_IP4_L3DSR) {
1143 5 : vip->encap_args.dscp = args.encap_args.dscp;
1144 : }
1145 12 : else if ((args.type == LB_VIP_TYPE_IP4_NAT4)
1146 11 : ||(args.type == LB_VIP_TYPE_IP6_NAT6)) {
1147 2 : vip->encap_args.srv_type = args.encap_args.srv_type;
1148 2 : vip->encap_args.target_port =
1149 2 : clib_host_to_net_u16(args.encap_args.target_port);
1150 : }
1151 :
1152 17 : vip->flags = LB_VIP_FLAGS_USED;
1153 17 : if (args.src_ip_sticky)
1154 : {
1155 2 : vip->flags |= LB_VIP_FLAGS_SRC_IP_STICKY;
1156 : }
1157 17 : vip->as_indexes = 0;
1158 :
1159 : //Validate counters
1160 : u32 i;
1161 85 : for (i = 0; i < LB_N_VIP_COUNTERS; i++) {
1162 68 : vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
1163 68 : vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
1164 : }
1165 :
1166 : //Configure new flow table
1167 17 : vip->new_flow_table_mask = args.new_length - 1;
1168 17 : vip->new_flow_table = 0;
1169 :
1170 : //Update flow hash table
1171 17 : lb_vip_update_new_flow_table(vip);
1172 :
1173 : //Create adjacency to direct traffic
1174 17 : lb_vip_add_adjacency(lbm, vip, &vip_prefix_index);
1175 :
1176 17 : if ( (lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip))
1177 2 : && (args.encap_args.srv_type == LB_SRV_TYPE_NODEPORT) )
1178 : {
1179 : u32 key;
1180 : uword * entry;
1181 :
1182 : //Create maping from nodeport to vip_index
1183 0 : key = clib_host_to_net_u16(args.port);
1184 0 : entry = hash_get_mem (lbm->vip_index_by_nodeport, &key);
1185 0 : if (entry) {
1186 0 : lb_put_writer_lock();
1187 0 : return VNET_API_ERROR_VALUE_EXIST;
1188 : }
1189 :
1190 0 : hash_set_mem (lbm->vip_index_by_nodeport, &key, vip - lbm->vips);
1191 :
1192 : /* receive packets destined to NodeIP:NodePort */
1193 0 : udp_register_dst_port (vm, args.port, lb4_nodeport_node.index, 1);
1194 0 : udp_register_dst_port (vm, args.port, lb6_nodeport_node.index, 0);
1195 : }
1196 :
1197 17 : *vip_index = vip - lbm->vips;
1198 : //Create per-port vip filtering table
1199 17 : if (args.port != 0)
1200 : {
1201 9 : lb_vip_add_port_filter(lbm, vip, vip_prefix_index, *vip_index);
1202 9 : vip->vip_prefix_index = vip_prefix_index;
1203 : }
1204 :
1205 17 : lb_put_writer_lock();
1206 17 : return 0;
1207 : }
1208 :
1209 16 : int lb_vip_del(u32 vip_index)
1210 : {
1211 16 : lb_main_t *lbm = &lb_main;
1212 : lb_vip_t *vip;
1213 16 : int rv = 0;
1214 :
1215 : /* Does not remove default vip, i.e. vip_index = 0 */
1216 16 : if (vip_index == 0)
1217 0 : return VNET_API_ERROR_INVALID_VALUE;
1218 :
1219 16 : lb_get_writer_lock();
1220 16 : if (!(vip = lb_vip_get_by_index(vip_index))) {
1221 0 : lb_put_writer_lock();
1222 0 : return VNET_API_ERROR_NO_SUCH_ENTRY;
1223 : }
1224 :
1225 : //FIXME: This operation is actually not working
1226 : //We will need to remove state before performing this.
1227 :
1228 : {
1229 : //Remove all ASs
1230 16 : ip46_address_t *ass = 0;
1231 : lb_as_t *as;
1232 : u32 *as_index;
1233 :
1234 86 : pool_foreach (as_index, vip->as_indexes) {
1235 70 : as = &lbm->ass[*as_index];
1236 70 : vec_add1(ass, as->address);
1237 : }
1238 16 : if (vec_len(ass))
1239 14 : lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass), 0);
1240 16 : vec_free(ass);
1241 : }
1242 :
1243 : //Delete adjacency
1244 16 : lb_vip_del_adjacency(lbm, vip);
1245 :
1246 : //Delete per-port vip filtering entry
1247 16 : if (vip->port != 0)
1248 : {
1249 9 : rv = lb_vip_del_port_filter(lbm, vip);
1250 : }
1251 :
1252 : //Set the VIP as unused
1253 16 : vip->flags &= ~LB_VIP_FLAGS_USED;
1254 :
1255 16 : lb_put_writer_lock();
1256 16 : return rv;
1257 : }
1258 :
1259 : /* *INDENT-OFF* */
1260 : VLIB_PLUGIN_REGISTER () = {
1261 : .version = VPP_BUILD_VER,
1262 : .description = "Load Balancer (LB)",
1263 : };
1264 : /* *INDENT-ON* */
1265 :
1266 0 : u8 *format_lb_dpo (u8 * s, va_list * va)
1267 : {
1268 0 : index_t index = va_arg (*va, index_t);
1269 0 : CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
1270 0 : lb_main_t *lbm = &lb_main;
1271 0 : lb_vip_t *vip = pool_elt_at_index (lbm->vips, index);
1272 0 : return format (s, "%U", format_lb_vip, vip);
1273 : }
1274 :
1275 96 : static void lb_dpo_lock (dpo_id_t *dpo) {}
1276 93 : static void lb_dpo_unlock (dpo_id_t *dpo) {}
1277 :
1278 : static fib_node_t *
1279 230 : lb_fib_node_get_node (fib_node_index_t index)
1280 : {
1281 230 : lb_main_t *lbm = &lb_main;
1282 230 : lb_as_t *as = pool_elt_at_index (lbm->ass, index);
1283 230 : return (&as->fib_node);
1284 : }
1285 :
1286 : static void
1287 0 : lb_fib_node_last_lock_gone (fib_node_t *node)
1288 : {
1289 0 : }
1290 :
1291 : static lb_as_t *
1292 230 : lb_as_from_fib_node (fib_node_t *node)
1293 : {
1294 230 : return ((lb_as_t*)(((char*)node) -
1295 : STRUCT_OFFSET_OF(lb_as_t, fib_node)));
1296 : }
1297 :
1298 : static void
1299 301 : lb_as_stack (lb_as_t *as)
1300 : {
1301 301 : lb_main_t *lbm = &lb_main;
1302 301 : lb_vip_t *vip = &lbm->vips[as->vip_index];
1303 301 : dpo_type_t dpo_type = 0;
1304 :
1305 301 : if (lb_vip_is_gre4(vip))
1306 55 : dpo_type = lbm->dpo_gre4_type;
1307 246 : else if (lb_vip_is_gre6(vip))
1308 41 : dpo_type = lbm->dpo_gre6_type;
1309 205 : else if (lb_vip_is_gre4_port(vip))
1310 45 : dpo_type = lbm->dpo_gre4_port_type;
1311 160 : else if (lb_vip_is_gre6_port(vip))
1312 30 : dpo_type = lbm->dpo_gre6_port_type;
1313 130 : else if (lb_vip_is_l3dsr(vip))
1314 55 : dpo_type = lbm->dpo_l3dsr_type;
1315 75 : else if (lb_vip_is_l3dsr_port(vip))
1316 55 : dpo_type = lbm->dpo_l3dsr_port_type;
1317 20 : else if(lb_vip_is_nat4_port(vip))
1318 15 : dpo_type = lbm->dpo_nat4_port_type;
1319 5 : else if (lb_vip_is_nat6_port(vip))
1320 5 : dpo_type = lbm->dpo_nat6_port_type;
1321 :
1322 301 : dpo_stack(dpo_type,
1323 301 : lb_vip_is_ip4(vip->type)?DPO_PROTO_IP4:DPO_PROTO_IP6,
1324 : &as->dpo,
1325 : fib_entry_contribute_ip_forwarding(
1326 : as->next_hop_fib_entry_index));
1327 301 : }
1328 :
1329 : static fib_node_back_walk_rc_t
1330 230 : lb_fib_node_back_walk_notify (fib_node_t *node,
1331 : fib_node_back_walk_ctx_t *ctx)
1332 : {
1333 230 : lb_as_stack(lb_as_from_fib_node(node));
1334 230 : return (FIB_NODE_BACK_WALK_CONTINUE);
1335 : }
1336 :
1337 0 : int lb_nat4_interface_add_del (u32 sw_if_index, int is_del)
1338 : {
1339 0 : if (is_del)
1340 : {
1341 0 : vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
1342 : sw_if_index, 0, 0, 0);
1343 : }
1344 : else
1345 : {
1346 0 : vnet_feature_enable_disable ("ip4-unicast", "lb-nat4-in2out",
1347 : sw_if_index, 1, 0, 0);
1348 : }
1349 :
1350 0 : return 0;
1351 : }
1352 :
1353 0 : int lb_nat6_interface_add_del (u32 sw_if_index, int is_del)
1354 : {
1355 0 : if (is_del)
1356 : {
1357 0 : vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
1358 : sw_if_index, 0, 0, 0);
1359 : }
1360 : else
1361 : {
1362 0 : vnet_feature_enable_disable ("ip6-unicast", "lb-nat6-in2out",
1363 : sw_if_index, 1, 0, 0);
1364 : }
1365 :
1366 0 : return 0;
1367 : }
1368 :
1369 : clib_error_t *
1370 575 : lb_init (vlib_main_t * vm)
1371 : {
1372 575 : vlib_thread_main_t *tm = vlib_get_thread_main ();
1373 575 : lb_main_t *lbm = &lb_main;
1374 575 : lbm->vnet_main = vnet_get_main ();
1375 575 : lbm->vlib_main = vm;
1376 :
1377 : lb_vip_t *default_vip;
1378 : lb_as_t *default_as;
1379 575 : fib_node_vft_t lb_fib_node_vft = {
1380 : .fnv_get = lb_fib_node_get_node,
1381 : .fnv_last_lock = lb_fib_node_last_lock_gone,
1382 : .fnv_back_walk = lb_fib_node_back_walk_notify,
1383 : };
1384 575 : dpo_vft_t lb_vft = {
1385 : .dv_lock = lb_dpo_lock,
1386 : .dv_unlock = lb_dpo_unlock,
1387 : .dv_format = format_lb_dpo,
1388 : };
1389 :
1390 : //Allocate and init default VIP.
1391 575 : lbm->vips = 0;
1392 575 : pool_get(lbm->vips, default_vip);
1393 575 : default_vip->new_flow_table_mask = 0;
1394 575 : default_vip->prefix.ip6.as_u64[0] = 0xffffffffffffffffL;
1395 575 : default_vip->prefix.ip6.as_u64[1] = 0xffffffffffffffffL;
1396 575 : default_vip->protocol = ~0;
1397 575 : default_vip->port = 0;
1398 575 : default_vip->flags = LB_VIP_FLAGS_USED;
1399 :
1400 575 : lbm->per_cpu = 0;
1401 575 : vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
1402 575 : clib_spinlock_init (&lbm->writer_lock);
1403 575 : lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS;
1404 575 : lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT;
1405 575 : lbm->ip4_src_address.as_u32 = 0xffffffff;
1406 575 : lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL;
1407 575 : lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
1408 575 : lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes);
1409 575 : lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes);
1410 575 : lbm->dpo_gre4_port_type = dpo_register_new_type(&lb_vft,
1411 : lb_dpo_gre4_port_nodes);
1412 575 : lbm->dpo_gre6_port_type = dpo_register_new_type(&lb_vft,
1413 : lb_dpo_gre6_port_nodes);
1414 575 : lbm->dpo_l3dsr_type = dpo_register_new_type(&lb_vft,
1415 : lb_dpo_l3dsr_nodes);
1416 575 : lbm->dpo_l3dsr_port_type = dpo_register_new_type(&lb_vft,
1417 : lb_dpo_l3dsr_port_nodes);
1418 575 : lbm->dpo_nat4_port_type = dpo_register_new_type(&lb_vft,
1419 : lb_dpo_nat4_port_nodes);
1420 575 : lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft,
1421 : lb_dpo_nat6_port_nodes);
1422 575 : lbm->fib_node_type = fib_node_register_new_type ("lb", &lb_fib_node_vft);
1423 :
1424 : //Init AS reference counters
1425 575 : vlib_refcount_init(&lbm->as_refcount);
1426 :
1427 : //Allocate and init default AS.
1428 575 : lbm->ass = 0;
1429 575 : pool_get(lbm->ass, default_as);
1430 575 : default_as->flags = 0;
1431 575 : default_as->dpo.dpoi_next_node = LB_NEXT_DROP;
1432 575 : default_as->vip_index = ~0;
1433 575 : default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
1434 575 : default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;
1435 :
1436 : /* Generate a valid flow table for default VIP */
1437 575 : default_vip->as_indexes = NULL;
1438 575 : lb_get_writer_lock();
1439 575 : lb_vip_update_new_flow_table(default_vip);
1440 575 : lb_put_writer_lock();
1441 :
1442 : lbm->vip_index_by_nodeport
1443 575 : = hash_create_mem (0, sizeof(u16), sizeof (uword));
1444 :
1445 575 : clib_bihash_init_8_8 (&lbm->vip_index_per_port,
1446 : "vip_index_per_port", LB_VIP_PER_PORT_BUCKETS,
1447 : LB_VIP_PER_PORT_MEMORY_SIZE);
1448 :
1449 575 : clib_bihash_init_8_8 (&lbm->mapping_by_as4,
1450 : "mapping_by_as4", LB_MAPPING_BUCKETS,
1451 : LB_MAPPING_MEMORY_SIZE);
1452 :
1453 575 : clib_bihash_init_24_8 (&lbm->mapping_by_as6,
1454 : "mapping_by_as6", LB_MAPPING_BUCKETS,
1455 : LB_MAPPING_MEMORY_SIZE);
1456 :
1457 : #define _(a,b,c) lbm->vip_counters[c].name = b;
1458 575 : lb_foreach_vip_counter
1459 : #undef _
1460 :
1461 575 : lb_fib_src = fib_source_allocate("lb",
1462 : FIB_SOURCE_PRIORITY_HI,
1463 : FIB_SOURCE_BH_SIMPLE);
1464 :
1465 575 : return NULL;
1466 : }
1467 :
1468 1151 : VLIB_INIT_FUNCTION (lb_init);
|