Line data Source code
1 : /*
2 : * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : /**
17 : * @file
18 : * @brief TCP host stack utilities
19 : */
20 :
21 : #include <vnet/tcp/tcp.h>
22 : #include <vnet/tcp/tcp_inlines.h>
23 : #include <vnet/session/session.h>
24 : #include <vnet/fib/fib.h>
25 : #include <vnet/dpo/load_balance.h>
26 : #include <math.h>
27 :
28 : #include <vlib/stats/stats.h>
29 :
30 : tcp_main_t tcp_main;
31 :
32 : typedef struct
33 : {
34 : fib_protocol_t nh_proto;
35 : vnet_link_t link_type;
36 : ip46_address_t ip;
37 : u32 sw_if_index;
38 : u8 is_add;
39 : } tcp_add_del_adj_args_t;
40 :
41 : static void
42 0 : tcp_add_del_adj_cb (tcp_add_del_adj_args_t * args)
43 : {
44 : u32 ai;
45 0 : if (args->is_add)
46 : {
47 0 : adj_nbr_add_or_lock (args->nh_proto, args->link_type, &args->ip,
48 : args->sw_if_index);
49 : }
50 : else
51 : {
52 0 : ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &args->ip,
53 : args->sw_if_index);
54 0 : if (ai != ADJ_INDEX_INVALID)
55 0 : adj_unlock (ai);
56 : }
57 0 : }
58 :
59 : static void
60 0 : tcp_add_del_adjacency (tcp_connection_t * tc, u8 is_add)
61 : {
62 0 : tcp_add_del_adj_args_t args = {
63 : .nh_proto = FIB_PROTOCOL_IP6,
64 : .link_type = VNET_LINK_IP6,
65 : .ip = tc->c_rmt_ip,
66 0 : .sw_if_index = tc->sw_if_index,
67 : .is_add = is_add
68 : };
69 0 : vlib_rpc_call_main_thread (tcp_add_del_adj_cb, (u8 *) & args,
70 : sizeof (args));
71 0 : }
72 :
73 : static void
74 267 : tcp_cc_init (tcp_connection_t * tc)
75 : {
76 : /* As per RFC 6582 initialize "recover" to iss */
77 267 : if (tcp_opts_sack_permitted (&tc->rcv_opts))
78 264 : tc->snd_congestion = tc->iss;
79 :
80 267 : tc->cc_algo->init (tc);
81 267 : }
82 :
83 : static void
84 134 : tcp_cc_cleanup (tcp_connection_t * tc)
85 : {
86 134 : if (tc->cc_algo->cleanup)
87 0 : tc->cc_algo->cleanup (tc);
88 134 : }
89 :
90 : void
91 1118 : tcp_cc_algo_register (tcp_cc_algorithm_type_e type,
92 : const tcp_cc_algorithm_t * vft)
93 : {
94 1118 : tcp_main_t *tm = vnet_get_tcp_main ();
95 1118 : vec_validate (tm->cc_algos, type);
96 :
97 1118 : tm->cc_algos[type] = *vft;
98 2236 : hash_set_mem (tm->cc_algo_by_name, vft->name, type);
99 1118 : }
100 :
101 : tcp_cc_algorithm_t *
102 171 : tcp_cc_algo_get (tcp_cc_algorithm_type_e type)
103 : {
104 171 : tcp_main_t *tm = vnet_get_tcp_main ();
105 171 : return &tm->cc_algos[type];
106 : }
107 :
108 : tcp_cc_algorithm_type_e
109 0 : tcp_cc_algo_new_type (const tcp_cc_algorithm_t * vft)
110 : {
111 0 : tcp_main_t *tm = vnet_get_tcp_main ();
112 0 : tcp_cc_algo_register (++tm->cc_last_type, vft);
113 0 : return tm->cc_last_type;
114 : }
115 :
116 : static u32
117 39 : tcp_connection_bind (u32 session_index, transport_endpoint_cfg_t *lcl)
118 : {
119 39 : tcp_main_t *tm = &tcp_main;
120 : tcp_connection_t *listener;
121 : void *iface_ip;
122 :
123 39 : pool_get (tm->listener_pool, listener);
124 39 : clib_memset (listener, 0, sizeof (*listener));
125 :
126 39 : listener->c_c_index = listener - tm->listener_pool;
127 39 : listener->c_lcl_port = lcl->port;
128 :
129 : /* If we are provided a sw_if_index, bind using one of its ips */
130 39 : if (ip_is_zero (&lcl->ip, 1) && lcl->sw_if_index != ENDPOINT_INVALID_INDEX)
131 : {
132 29 : if ((iface_ip = ip_interface_get_first_ip (lcl->sw_if_index,
133 29 : lcl->is_ip4)))
134 28 : ip_set (&lcl->ip, iface_ip, lcl->is_ip4);
135 : }
136 39 : ip_copy (&listener->c_lcl_ip, &lcl->ip, lcl->is_ip4);
137 39 : listener->c_is_ip4 = lcl->is_ip4;
138 39 : listener->c_proto = TRANSPORT_PROTO_TCP;
139 39 : listener->c_s_index = session_index;
140 39 : listener->c_fib_index = lcl->fib_index;
141 39 : listener->state = TCP_STATE_LISTEN;
142 39 : listener->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
143 :
144 39 : tcp_connection_timers_init (listener);
145 :
146 : TCP_EVT (TCP_EVT_BIND, listener);
147 :
148 39 : return listener->c_c_index;
149 : }
150 :
151 : static u32
152 39 : tcp_session_bind (u32 session_index, transport_endpoint_cfg_t *tep)
153 : {
154 39 : return tcp_connection_bind (session_index, tep);
155 : }
156 :
157 : static void
158 33 : tcp_connection_unbind (u32 listener_index)
159 : {
160 33 : tcp_main_t *tm = vnet_get_tcp_main ();
161 : tcp_connection_t *tc;
162 :
163 33 : tc = pool_elt_at_index (tm->listener_pool, listener_index);
164 :
165 : TCP_EVT (TCP_EVT_UNBIND, tc);
166 :
167 : /* Poison the entry */
168 : if (CLIB_DEBUG > 0)
169 33 : clib_memset (tc, 0xFA, sizeof (*tc));
170 :
171 33 : pool_put_index (tm->listener_pool, listener_index);
172 33 : }
173 :
174 : static u32
175 33 : tcp_session_unbind (u32 listener_index)
176 : {
177 33 : tcp_connection_unbind (listener_index);
178 33 : return 0;
179 : }
180 :
181 : static transport_connection_t *
182 418 : tcp_session_get_listener (u32 listener_index)
183 : {
184 418 : tcp_main_t *tm = vnet_get_tcp_main ();
185 : tcp_connection_t *tc;
186 418 : tc = pool_elt_at_index (tm->listener_pool, listener_index);
187 418 : return &tc->connection;
188 : }
189 :
190 : static tcp_connection_t *
191 132 : tcp_half_open_connection_alloc (void)
192 : {
193 132 : return tcp_connection_alloc (transport_cl_thread ());
194 : }
195 :
196 : /**
197 : * Cleanup half-open connection
198 : *
199 : */
200 : static void
201 132 : tcp_half_open_connection_free (tcp_connection_t * tc)
202 : {
203 132 : ASSERT (vlib_get_thread_index () == tc->c_thread_index ||
204 : vlib_thread_is_main_w_barrier ());
205 132 : return tcp_connection_free (tc);
206 : }
207 :
208 : /**
209 : * Try to cleanup half-open connection
210 : *
211 : * If called from a thread that doesn't own tc, the call won't have any
212 : * effect.
213 : *
214 : * @param tc - connection to be cleaned up
215 : * @return non-zero if cleanup failed.
216 : */
217 : int
218 132 : tcp_half_open_connection_cleanup (tcp_connection_t * tc)
219 : {
220 : tcp_worker_ctx_t *wrk;
221 :
222 : /* Make sure this is the owning thread */
223 132 : if (tc->c_thread_index != vlib_get_thread_index ())
224 0 : return 1;
225 :
226 132 : session_half_open_delete_notify (&tc->connection);
227 132 : wrk = tcp_get_worker (tc->c_thread_index);
228 132 : tcp_timer_reset (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN);
229 132 : tcp_half_open_connection_free (tc);
230 132 : return 0;
231 : }
232 :
233 : /**
234 : * Cleans up connection state.
235 : *
236 : * No notifications.
237 : */
238 : void
239 134 : tcp_connection_cleanup (tcp_connection_t * tc)
240 : {
241 : TCP_EVT (TCP_EVT_DELETE, tc);
242 :
243 : /* Cleanup local endpoint if this was an active connect */
244 134 : if (!(tc->cfg_flags & TCP_CFG_F_NO_ENDPOINT))
245 134 : transport_release_local_endpoint (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
246 134 : tc->c_lcl_port);
247 :
248 : /* Check if connection is not yet fully established */
249 134 : if (tc->state == TCP_STATE_SYN_SENT)
250 : {
251 : /* Try to remove the half-open connection. If this is not the owning
252 : * thread, tc won't be removed. Retransmit or establish timers will
253 : * eventually expire and call again cleanup on the right thread. */
254 0 : if (tcp_half_open_connection_cleanup (tc))
255 0 : tc->flags |= TCP_CONN_HALF_OPEN_DONE;
256 : }
257 : else
258 : {
259 : /* Make sure all timers are cleared */
260 134 : tcp_connection_timers_reset (tc);
261 :
262 134 : if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6))
263 0 : tcp_add_del_adjacency (tc, 0);
264 :
265 134 : tcp_cc_cleanup (tc);
266 134 : vec_free (tc->snd_sacks);
267 134 : vec_free (tc->snd_sacks_fl);
268 134 : vec_free (tc->rcv_opts.sacks);
269 134 : pool_free (tc->sack_sb.holes);
270 :
271 134 : if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
272 0 : tcp_bt_cleanup (tc);
273 :
274 134 : tcp_connection_free (tc);
275 : }
276 134 : }
277 :
278 : /**
279 : * Connection removal.
280 : *
281 : * This should be called only once connection enters CLOSED state. Note
282 : * that it notifies the session of the removal event, so if the goal is to
283 : * just remove the connection, call tcp_connection_cleanup instead.
284 : */
285 : void
286 0 : tcp_connection_del (tcp_connection_t * tc)
287 : {
288 0 : session_transport_delete_notify (&tc->connection);
289 0 : tcp_connection_cleanup (tc);
290 0 : }
291 :
292 : tcp_connection_t *
293 269 : tcp_connection_alloc (u8 thread_index)
294 : {
295 269 : tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
296 : tcp_connection_t *tc;
297 :
298 269 : pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES);
299 269 : clib_memset (tc, 0, sizeof (*tc));
300 269 : tc->c_c_index = tc - wrk->connections;
301 269 : tc->c_thread_index = thread_index;
302 269 : return tc;
303 : }
304 :
305 : tcp_connection_t *
306 132 : tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t **base)
307 : {
308 132 : tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
309 : tcp_connection_t *tc;
310 :
311 : /* Make sure connection is still valid if pool moves */
312 132 : if ((*base)->c_thread_index == thread_index)
313 : {
314 132 : u32 base_index = (*base)->c_c_index;
315 132 : pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES);
316 132 : *base = tcp_connection_get (base_index, thread_index);
317 : }
318 : else
319 : {
320 0 : pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES);
321 : }
322 132 : clib_memcpy_fast (tc, *base, sizeof (*tc));
323 132 : tc->c_c_index = tc - wrk->connections;
324 132 : tc->c_thread_index = thread_index;
325 132 : return tc;
326 : }
327 :
328 : void
329 266 : tcp_connection_free (tcp_connection_t * tc)
330 : {
331 266 : tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
332 : if (CLIB_DEBUG)
333 : {
334 266 : clib_memset (tc, 0xFA, sizeof (*tc));
335 266 : pool_put (wrk->connections, tc);
336 266 : return;
337 : }
338 : pool_put (wrk->connections, tc);
339 : }
340 :
341 : void
342 135 : tcp_program_cleanup (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
343 : {
344 : tcp_cleanup_req_t *req;
345 : clib_time_type_t now;
346 :
347 135 : now = tcp_time_now_us (tc->c_thread_index);
348 135 : clib_fifo_add2 (wrk->pending_cleanups, req);
349 135 : req->connection_index = tc->c_c_index;
350 135 : req->free_time = now + tcp_cfg.cleanup_time;
351 135 : }
352 :
353 : /**
354 : * Begin connection closing procedure.
355 : *
356 : * If at the end the connection is not in CLOSED state, it is not removed.
357 : * Instead, we rely on on TCP to advance through state machine to either
358 : * 1) LAST_ACK (passive close) whereby when the last ACK is received
359 : * tcp_connection_del is called. This notifies session of the delete and
360 : * calls cleanup.
361 : * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers
362 : * and cleanup is called.
363 : *
364 : */
365 : void
366 260 : tcp_connection_close (tcp_connection_t * tc)
367 : {
368 260 : tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
369 :
370 : TCP_EVT (TCP_EVT_CLOSE, tc);
371 :
372 : /* Send/Program FIN if needed and switch state */
373 260 : switch (tc->state)
374 : {
375 0 : case TCP_STATE_SYN_SENT:
376 : /* Try to cleanup. If not on the right thread, mark as half-open done.
377 : * Connection will be cleaned up when establish timer pops */
378 0 : tcp_connection_cleanup (tc);
379 0 : break;
380 0 : case TCP_STATE_SYN_RCVD:
381 0 : tcp_connection_timers_reset (tc);
382 0 : tcp_send_fin (tc);
383 0 : tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_1);
384 0 : tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
385 : tcp_cfg.finwait1_time);
386 0 : break;
387 133 : case TCP_STATE_ESTABLISHED:
388 : /* If closing with unread data, reset the connection */
389 133 : if (transport_max_rx_dequeue (&tc->connection))
390 : {
391 4 : tcp_send_reset (tc);
392 4 : tcp_connection_timers_reset (tc);
393 4 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
394 4 : session_transport_closed_notify (&tc->connection);
395 4 : tcp_program_cleanup (tcp_get_worker (tc->c_thread_index), tc);
396 4 : tcp_worker_stats_inc (wrk, rst_unread, 1);
397 4 : break;
398 : }
399 129 : if (!transport_max_tx_dequeue (&tc->connection))
400 127 : tcp_send_fin (tc);
401 : else
402 2 : tc->flags |= TCP_CONN_FINPNDG;
403 129 : tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_1);
404 : /* Set a timer in case the peer stops responding. Otherwise the
405 : * connection will be stuck here forever. */
406 129 : ASSERT (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID);
407 129 : tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
408 : tcp_cfg.finwait1_time);
409 129 : break;
410 127 : case TCP_STATE_CLOSE_WAIT:
411 127 : if (!transport_max_tx_dequeue (&tc->connection))
412 : {
413 125 : tcp_send_fin (tc);
414 125 : tcp_connection_timers_reset (tc);
415 125 : tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
416 125 : tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
417 : tcp_cfg.lastack_time);
418 : }
419 : else
420 2 : tc->flags |= TCP_CONN_FINPNDG;
421 127 : break;
422 0 : case TCP_STATE_FIN_WAIT_1:
423 0 : tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
424 : tcp_cfg.finwait1_time);
425 0 : break;
426 0 : case TCP_STATE_CLOSED:
427 : /* Cleanup should've been programmed already */
428 0 : break;
429 260 : default:
430 : TCP_DBG ("state: %u", tc->state);
431 : }
432 260 : }
433 :
434 : static void
435 0 : tcp_session_half_close (u32 conn_index, u32 thread_index)
436 : {
437 : tcp_worker_ctx_t *wrk;
438 : tcp_connection_t *tc;
439 :
440 0 : tc = tcp_connection_get (conn_index, thread_index);
441 0 : wrk = tcp_get_worker (tc->c_thread_index);
442 :
443 : /* If the connection is not in ESTABLISHED state, ignore it */
444 0 : if (tc->state != TCP_STATE_ESTABLISHED)
445 0 : return;
446 0 : if (!transport_max_tx_dequeue (&tc->connection))
447 0 : tcp_send_fin (tc);
448 : else
449 0 : tc->flags |= TCP_CONN_FINPNDG;
450 0 : tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_1);
451 : /* Set a timer in case the peer stops responding. Otherwise the
452 : * connection will be stuck here forever. */
453 0 : ASSERT (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID);
454 0 : tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
455 : tcp_cfg.finwait1_time);
456 : }
457 :
458 : static void
459 260 : tcp_session_close (u32 conn_index, u32 thread_index)
460 : {
461 : tcp_connection_t *tc;
462 260 : tc = tcp_connection_get (conn_index, thread_index);
463 260 : tcp_connection_close (tc);
464 260 : }
465 :
466 : static void
467 0 : tcp_session_cleanup (u32 conn_index, u32 thread_index)
468 : {
469 : tcp_connection_t *tc;
470 0 : tc = tcp_connection_get (conn_index, thread_index);
471 0 : if (!tc)
472 0 : return;
473 0 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
474 0 : tcp_connection_cleanup (tc);
475 : }
476 :
477 : static void
478 0 : tcp_session_cleanup_ho (u32 conn_index)
479 : {
480 : tcp_worker_ctx_t *wrk;
481 : tcp_connection_t *tc;
482 :
483 0 : tc = tcp_half_open_connection_get (conn_index);
484 0 : wrk = tcp_get_worker (tc->c_thread_index);
485 0 : tcp_timer_reset (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN);
486 0 : tcp_half_open_connection_free (tc);
487 0 : }
488 :
489 : static void
490 0 : tcp_session_reset (u32 conn_index, u32 thread_index)
491 : {
492 : tcp_connection_t *tc;
493 0 : tc = tcp_connection_get (conn_index, thread_index);
494 :
495 : /* For half-opens just cleanup */
496 0 : if (tc->state == TCP_STATE_SYN_SENT)
497 : {
498 0 : tcp_connection_cleanup (tc);
499 0 : return;
500 : }
501 :
502 0 : tcp_send_reset (tc);
503 0 : tcp_connection_timers_reset (tc);
504 0 : tcp_cong_recovery_off (tc);
505 0 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
506 0 : session_transport_closed_notify (&tc->connection);
507 0 : tcp_program_cleanup (tcp_get_worker (thread_index), tc);
508 : }
509 :
510 : /**
511 : * Initialize all connection timers as invalid
512 : */
513 : void
514 438 : tcp_connection_timers_init (tcp_connection_t * tc)
515 : {
516 : int i;
517 :
518 : /* Set all to invalid */
519 2190 : for (i = 0; i < TCP_N_TIMERS; i++)
520 : {
521 1752 : tc->timers[i] = TCP_TIMER_HANDLE_INVALID;
522 : }
523 :
524 438 : tc->rto = TCP_RTO_INIT;
525 438 : }
526 :
527 : /**
528 : * Stop all connection timers
529 : */
530 : void
531 652 : tcp_connection_timers_reset (tcp_connection_t * tc)
532 : {
533 652 : tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
534 : int i;
535 :
536 3260 : for (i = 0; i < TCP_N_TIMERS; i++)
537 2608 : tcp_timer_reset (&wrk->timer_wheel, tc, i);
538 652 : }
539 :
540 : #if 0
541 : typedef struct ip4_tcp_hdr
542 : {
543 : ip4_header_t ip;
544 : tcp_header_t tcp;
545 : } ip4_tcp_hdr_t;
546 :
547 : typedef struct ip6_tcp_hdr
548 : {
549 : ip6_header_t ip;
550 : tcp_header_t tcp;
551 : } ip6_tcp_hdr_t;
552 :
553 : static void
554 : tcp_connection_select_lb_bucket (tcp_connection_t * tc, const dpo_id_t * dpo,
555 : dpo_id_t * result)
556 : {
557 : const dpo_id_t *choice;
558 : load_balance_t *lb;
559 : int hash;
560 :
561 : lb = load_balance_get (dpo->dpoi_index);
562 : if (tc->c_is_ip4)
563 : {
564 : ip4_tcp_hdr_t hdr;
565 : clib_memset (&hdr, 0, sizeof (hdr));
566 : hdr.ip.protocol = IP_PROTOCOL_TCP;
567 : hdr.ip.address_pair.src.as_u32 = tc->c_lcl_ip.ip4.as_u32;
568 : hdr.ip.address_pair.dst.as_u32 = tc->c_rmt_ip.ip4.as_u32;
569 : hdr.tcp.src_port = tc->c_lcl_port;
570 : hdr.tcp.dst_port = tc->c_rmt_port;
571 : hash = ip4_compute_flow_hash (&hdr.ip, lb->lb_hash_config);
572 : }
573 : else
574 : {
575 : ip6_tcp_hdr_t hdr;
576 : clib_memset (&hdr, 0, sizeof (hdr));
577 : hdr.ip.protocol = IP_PROTOCOL_TCP;
578 : clib_memcpy_fast (&hdr.ip.src_address, &tc->c_lcl_ip.ip6,
579 : sizeof (ip6_address_t));
580 : clib_memcpy_fast (&hdr.ip.dst_address, &tc->c_rmt_ip.ip6,
581 : sizeof (ip6_address_t));
582 : hdr.tcp.src_port = tc->c_lcl_port;
583 : hdr.tcp.dst_port = tc->c_rmt_port;
584 : hash = ip6_compute_flow_hash (&hdr.ip, lb->lb_hash_config);
585 : }
586 : choice = load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
587 : dpo_copy (result, choice);
588 : }
589 :
590 : fib_node_index_t
591 : tcp_lookup_rmt_in_fib (tcp_connection_t * tc)
592 : {
593 : fib_prefix_t prefix;
594 : u32 fib_index;
595 :
596 : clib_memcpy_fast (&prefix.fp_addr, &tc->c_rmt_ip, sizeof (prefix.fp_addr));
597 : prefix.fp_proto = tc->c_is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
598 : prefix.fp_len = tc->c_is_ip4 ? 32 : 128;
599 : fib_index = fib_table_find (prefix.fp_proto, tc->c_fib_index);
600 : return fib_table_lookup (fib_index, &prefix);
601 : }
602 :
603 : static int
604 : tcp_connection_stack_on_fib_entry (tcp_connection_t * tc)
605 : {
606 : dpo_id_t choice = DPO_INVALID;
607 : u32 output_node_index;
608 : fib_entry_t *fe;
609 :
610 : fe = fib_entry_get (tc->c_rmt_fei);
611 : if (fe->fe_lb.dpoi_type != DPO_LOAD_BALANCE)
612 : return -1;
613 :
614 : tcp_connection_select_lb_bucket (tc, &fe->fe_lb, &choice);
615 :
616 : output_node_index =
617 : tc->c_is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
618 : dpo_stack_from_node (output_node_index, &tc->c_rmt_dpo, &choice);
619 : return 0;
620 : }
621 :
622 : /** Stack tcp connection on peer's fib entry.
623 : *
624 : * This ultimately populates the dpo the connection will use to send packets.
625 : */
626 : static void
627 : tcp_connection_fib_attach (tcp_connection_t * tc)
628 : {
629 : tc->c_rmt_fei = tcp_lookup_rmt_in_fib (tc);
630 :
631 : ASSERT (tc->c_rmt_fei != FIB_NODE_INDEX_INVALID);
632 :
633 : tcp_connection_stack_on_fib_entry (tc);
634 : }
635 : #endif /* 0 */
636 :
637 : /**
638 : * Generate random iss as per rfc6528
639 : */
640 : static u32
641 267 : tcp_generate_random_iss (tcp_connection_t * tc)
642 : {
643 267 : tcp_main_t *tm = &tcp_main;
644 : u64 tmp;
645 :
646 267 : if (tc->c_is_ip4)
647 265 : tmp = (u64) tc->c_lcl_ip.ip4.as_u32 << 32 | (u64) tc->c_rmt_ip.ip4.as_u32;
648 : else
649 2 : tmp = tc->c_lcl_ip.ip6.as_u64[0] ^ tc->c_lcl_ip.ip6.as_u64[1]
650 2 : ^ tc->c_rmt_ip.ip6.as_u64[0] ^ tc->c_rmt_ip.ip6.as_u64[1];
651 :
652 267 : tmp ^= tm->iss_seed.first | ((u64) tc->c_lcl_port << 16 | tc->c_rmt_port);
653 267 : tmp ^= tm->iss_seed.second;
654 267 : tmp = clib_xxhash (tmp) + clib_cpu_time_now ();
655 267 : return ((tmp >> 32) ^ (tmp & 0xffffffff));
656 : }
657 :
658 : /**
659 : * Initialize max segment size we're able to process.
660 : *
661 : * The value is constrained by the output interface's MTU and by the size
662 : * of the IP and TCP headers (see RFC6691). It is also what we advertise
663 : * to our peer.
664 : */
665 : static void
666 534 : tcp_init_rcv_mss (tcp_connection_t * tc)
667 : {
668 : u8 ip_hdr_len;
669 :
670 : /* Already provided at connection init time */
671 534 : if (tc->mss)
672 267 : return;
673 :
674 267 : ip_hdr_len = tc->c_is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
675 267 : tc->mss = tcp_cfg.default_mtu - sizeof (tcp_header_t) - ip_hdr_len;
676 : }
677 :
678 : static void
679 267 : tcp_init_mss (tcp_connection_t * tc)
680 : {
681 267 : u16 default_min_mss = 536;
682 :
683 267 : tcp_init_rcv_mss (tc);
684 :
685 : /* TODO consider PMTU discovery */
686 267 : tc->snd_mss = clib_min (tc->rcv_opts.mss, tc->mss);
687 :
688 267 : if (tc->snd_mss < 45)
689 : {
690 : /* Assume that at least the min default mss works */
691 3 : tc->snd_mss = default_min_mss;
692 3 : tc->rcv_opts.mss = default_min_mss;
693 : }
694 :
695 : /* We should have enough space for 40 bytes of options */
696 267 : ASSERT (tc->snd_mss > 45);
697 :
698 : /* If we use timestamp option, account for it and make sure
699 : * the options are 4-byte aligned */
700 267 : if (tcp_opts_tstamp (&tc->rcv_opts))
701 264 : tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP + 2 /* alignment */;
702 267 : }
703 :
704 : /**
705 : * Initialize connection send variables.
706 : */
707 : void
708 267 : tcp_init_snd_vars (tcp_connection_t * tc)
709 : {
710 : /*
711 : * We use the time to randomize iss and for setting up the initial
712 : * timestamp. Make sure it's updated otherwise syn and ack in the
713 : * handshake may make it look as if time has flown in the opposite
714 : * direction for us.
715 : */
716 267 : tcp_update_time_now (tcp_get_worker (vlib_get_thread_index ()));
717 :
718 267 : tcp_init_rcv_mss (tc);
719 : /*
720 : * In special case of early-kill of timewait socket, the iss will already
721 : * be initialized to ensure it is greater than the last incarnation of the
722 : * connection. see syn_during_timewait() for more details.
723 : */
724 267 : if (!tc->iss)
725 267 : tc->iss = tcp_generate_random_iss (tc);
726 267 : tc->snd_una = tc->iss;
727 267 : tc->snd_nxt = tc->iss + 1;
728 267 : tc->srtt = 0.1 * THZ; /* 100 ms */
729 :
730 267 : if (!tcp_cfg.csum_offload)
731 0 : tc->cfg_flags |= TCP_CFG_F_NO_CSUM_OFFLOAD;
732 267 : }
733 :
734 : void
735 267 : tcp_enable_pacing (tcp_connection_t * tc)
736 : {
737 : u32 byte_rate;
738 267 : byte_rate = tc->cwnd / (tc->srtt * TCP_TICK);
739 267 : transport_connection_tx_pacer_init (&tc->connection, byte_rate, tc->cwnd);
740 267 : tc->mrtt_us = (u32) ~ 0;
741 267 : }
742 :
743 : /** Initialize tcp connection variables
744 : *
745 : * Should be called after having received a msg from the peer, i.e., a SYN or
746 : * a SYNACK, such that connection options have already been exchanged. */
747 : void
748 267 : tcp_connection_init_vars (tcp_connection_t * tc)
749 : {
750 267 : tcp_connection_timers_init (tc);
751 267 : tcp_init_mss (tc);
752 267 : scoreboard_init (&tc->sack_sb);
753 267 : if (tc->state == TCP_STATE_SYN_RCVD)
754 135 : tcp_init_snd_vars (tc);
755 :
756 267 : tcp_cc_init (tc);
757 :
758 267 : if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6))
759 0 : tcp_add_del_adjacency (tc, 1);
760 :
761 : /* tcp_connection_fib_attach (tc); */
762 :
763 267 : if (transport_connection_is_tx_paced (&tc->connection)
764 267 : || tcp_cfg.enable_tx_pacing)
765 267 : tcp_enable_pacing (tc);
766 :
767 267 : if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
768 0 : tcp_bt_init (tc);
769 :
770 267 : if (!tcp_cfg.allow_tso)
771 267 : tc->cfg_flags |= TCP_CFG_F_NO_TSO;
772 :
773 267 : tc->start_ts = tcp_time_now_us (tc->c_thread_index);
774 267 : }
775 :
776 : static int
777 0 : tcp_alloc_custom_local_endpoint (ip46_address_t *lcl_addr, u16 *lcl_port,
778 : transport_endpoint_cfg_t *rmt)
779 : {
780 0 : tcp_main_t *tm = vnet_get_tcp_main ();
781 : int index, port;
782 :
783 0 : if (rmt->is_ip4)
784 : {
785 0 : index = tm->last_v4_addr_rotor++;
786 0 : if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs))
787 0 : tm->last_v4_addr_rotor = 0;
788 0 : clib_memset (lcl_addr, 0, sizeof (*lcl_addr));
789 0 : lcl_addr->ip4.as_u32 = tcp_cfg.ip4_src_addrs[index].as_u32;
790 : }
791 : else
792 : {
793 0 : index = tm->last_v6_addr_rotor++;
794 0 : if (tm->last_v6_addr_rotor >= vec_len (tcp_cfg.ip6_src_addrs))
795 0 : tm->last_v6_addr_rotor = 0;
796 0 : clib_memcpy_fast (&lcl_addr->ip6, &tcp_cfg.ip6_src_addrs[index],
797 : sizeof (ip6_address_t));
798 : }
799 0 : port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr, rmt);
800 0 : if (port < 1)
801 0 : return SESSION_E_NOPORT;
802 0 : *lcl_port = port;
803 0 : return 0;
804 : }
805 :
806 : static int
807 133 : tcp_session_open (transport_endpoint_cfg_t * rmt)
808 : {
809 : tcp_connection_t *tc;
810 : ip46_address_t lcl_addr;
811 : u16 lcl_port;
812 : int rv;
813 :
814 : /*
815 : * Allocate local endpoint
816 : */
817 133 : if ((rmt->is_ip4 && vec_len (tcp_cfg.ip4_src_addrs))
818 133 : || (!rmt->is_ip4 && vec_len (tcp_cfg.ip6_src_addrs)))
819 0 : rv = tcp_alloc_custom_local_endpoint (&lcl_addr, &lcl_port, rmt);
820 : else
821 133 : rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP, rmt, &lcl_addr,
822 : &lcl_port);
823 :
824 133 : if (rv)
825 1 : return rv;
826 :
827 : /*
828 : * Create connection and send SYN
829 : */
830 132 : tc = tcp_half_open_connection_alloc ();
831 132 : ip_copy (&tc->c_rmt_ip, &rmt->ip, rmt->is_ip4);
832 132 : ip_copy (&tc->c_lcl_ip, &lcl_addr, rmt->is_ip4);
833 132 : tc->c_rmt_port = rmt->port;
834 132 : tc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
835 132 : tc->c_is_ip4 = rmt->is_ip4;
836 132 : tc->c_proto = TRANSPORT_PROTO_TCP;
837 132 : tc->c_fib_index = rmt->fib_index;
838 132 : tc->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
839 : /* The other connection vars will be initialized after SYN ACK */
840 132 : tcp_connection_timers_init (tc);
841 132 : tc->mss = rmt->mss;
842 132 : if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX)
843 132 : tc->sw_if_index = rmt->peer.sw_if_index;
844 132 : tc->next_node_index = rmt->next_node_index;
845 132 : tc->next_node_opaque = rmt->next_node_opaque;
846 :
847 : TCP_EVT (TCP_EVT_OPEN, tc);
848 132 : tc->state = TCP_STATE_SYN_SENT;
849 132 : tcp_init_snd_vars (tc);
850 132 : tcp_send_syn (tc);
851 :
852 132 : return tc->c_c_index;
853 : }
854 :
855 : static u8 *
856 14 : format_tcp_session (u8 * s, va_list * args)
857 : {
858 14 : u32 tci = va_arg (*args, u32);
859 14 : u32 thread_index = va_arg (*args, u32);
860 14 : u32 verbose = va_arg (*args, u32);
861 : tcp_connection_t *tc;
862 :
863 14 : tc = tcp_connection_get (tci, thread_index);
864 14 : if (tc)
865 14 : s = format (s, "%U", format_tcp_connection, tc, verbose);
866 : else
867 0 : s = format (s, "empty\n");
868 14 : return s;
869 : }
870 :
871 : static u8 *
872 0 : format_tcp_listener_session (u8 * s, va_list * args)
873 : {
874 0 : u32 tci = va_arg (*args, u32);
875 0 : u32 __clib_unused thread_index = va_arg (*args, u32);
876 0 : u32 verbose = va_arg (*args, u32);
877 0 : tcp_connection_t *tc = tcp_listener_get (tci);
878 0 : s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_tcp_connection_id, tc);
879 0 : if (verbose)
880 0 : s = format (s, "%-" SESSION_CLI_STATE_LEN "U", format_tcp_state,
881 0 : tc->state);
882 0 : return s;
883 : }
884 :
885 : static u8 *
886 0 : format_tcp_half_open_session (u8 * s, va_list * args)
887 : {
888 0 : u32 tci = va_arg (*args, u32);
889 0 : u32 __clib_unused thread_index = va_arg (*args, u32);
890 0 : u32 verbose = va_arg (*args, u32);
891 : tcp_connection_t *tc;
892 0 : u8 *state = 0;
893 :
894 0 : tc = tcp_half_open_connection_get (tci);
895 0 : if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
896 0 : state = format (state, "%s", "CLOSED");
897 : else
898 0 : state = format (state, "%U", format_tcp_state, tc->state);
899 0 : s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_tcp_connection_id, tc);
900 0 : if (verbose)
901 0 : s = format (s, "%-" SESSION_CLI_STATE_LEN "v", state);
902 0 : vec_free (state);
903 0 : return s;
904 : }
905 :
906 : static transport_connection_t *
907 5581160 : tcp_session_get_transport (u32 conn_index, u32 thread_index)
908 : {
909 5581160 : tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index);
910 5581160 : if (PREDICT_FALSE (!tc))
911 0 : return 0;
912 5581160 : return &tc->connection;
913 : }
914 :
915 : static transport_connection_t *
916 264 : tcp_half_open_session_get_transport (u32 conn_index)
917 : {
918 264 : tcp_connection_t *tc = tcp_half_open_connection_get (conn_index);
919 264 : return &tc->connection;
920 : }
921 :
922 : static int
923 0 : tcp_set_attribute (tcp_connection_t *tc, transport_endpt_attr_t *attr)
924 : {
925 0 : int rv = 0;
926 :
927 0 : switch (attr->type)
928 : {
929 0 : case TRANSPORT_ENDPT_ATTR_NEXT_OUTPUT_NODE:
930 0 : tc->next_node_index = attr->next_output_node & 0xffffffff;
931 0 : tc->next_node_opaque = attr->next_output_node >> 32;
932 0 : break;
933 0 : case TRANSPORT_ENDPT_ATTR_MSS:
934 0 : tc->mss = attr->mss;
935 0 : tc->snd_mss = clib_min (tc->snd_mss, tc->mss);
936 0 : break;
937 0 : case TRANSPORT_ENDPT_ATTR_FLAGS:
938 0 : if (attr->flags & TRANSPORT_ENDPT_ATTR_F_CSUM_OFFLOAD)
939 0 : tc->cfg_flags |= TCP_CFG_F_NO_CSUM_OFFLOAD;
940 : else
941 0 : tc->cfg_flags &= ~TCP_CFG_F_NO_CSUM_OFFLOAD;
942 0 : if (attr->flags & TRANSPORT_ENDPT_ATTR_F_GSO)
943 : {
944 0 : if (!(tc->cfg_flags & TCP_CFG_F_TSO))
945 0 : tcp_check_gso (tc);
946 0 : tc->cfg_flags &= ~TCP_CFG_F_NO_TSO;
947 : }
948 : else
949 : {
950 0 : tc->cfg_flags |= TCP_CFG_F_NO_TSO;
951 0 : tc->cfg_flags &= ~TCP_CFG_F_TSO;
952 : }
953 0 : if (attr->flags & TRANSPORT_ENDPT_ATTR_F_RATE_SAMPLING)
954 : {
955 0 : if (!(tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE))
956 0 : tcp_bt_init (tc);
957 0 : tc->cfg_flags |= TCP_CFG_F_RATE_SAMPLE;
958 : }
959 : else
960 : {
961 0 : if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
962 0 : tcp_bt_cleanup (tc);
963 0 : tc->cfg_flags &= ~TCP_CFG_F_RATE_SAMPLE;
964 : }
965 0 : break;
966 0 : case TRANSPORT_ENDPT_ATTR_CC_ALGO:
967 0 : if (tc->cc_algo == tcp_cc_algo_get (attr->cc_algo))
968 0 : break;
969 0 : tcp_cc_cleanup (tc);
970 0 : tc->cc_algo = tcp_cc_algo_get (attr->cc_algo);
971 0 : tcp_cc_init (tc);
972 0 : break;
973 0 : default:
974 0 : rv = -1;
975 0 : break;
976 : }
977 :
978 0 : return rv;
979 : }
980 :
981 : static int
982 3 : tcp_get_attribute (tcp_connection_t *tc, transport_endpt_attr_t *attr)
983 : {
984 3 : int rv = 0;
985 : u64 non;
986 :
987 3 : switch (attr->type)
988 : {
989 0 : case TRANSPORT_ENDPT_ATTR_NEXT_OUTPUT_NODE:
990 0 : non = (u64) tc->next_node_opaque << 32 | tc->next_node_index;
991 0 : attr->next_output_node = non;
992 0 : break;
993 3 : case TRANSPORT_ENDPT_ATTR_MSS:
994 3 : attr->mss = tc->snd_mss;
995 3 : break;
996 0 : case TRANSPORT_ENDPT_ATTR_FLAGS:
997 0 : attr->flags = 0;
998 0 : if (!(tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
999 0 : attr->flags |= TRANSPORT_ENDPT_ATTR_F_CSUM_OFFLOAD;
1000 0 : if (tc->cfg_flags & TCP_CFG_F_TSO)
1001 0 : attr->flags |= TRANSPORT_ENDPT_ATTR_F_GSO;
1002 0 : if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1003 0 : attr->flags |= TRANSPORT_ENDPT_ATTR_F_RATE_SAMPLING;
1004 0 : break;
1005 0 : case TRANSPORT_ENDPT_ATTR_CC_ALGO:
1006 0 : attr->cc_algo = tc->cc_algo - tcp_main.cc_algos;
1007 0 : break;
1008 0 : default:
1009 0 : rv = -1;
1010 0 : break;
1011 : }
1012 :
1013 3 : return rv;
1014 : }
1015 :
1016 : static int
1017 3 : tcp_session_attribute (u32 conn_index, u32 thread_index, u8 is_get,
1018 : transport_endpt_attr_t *attr)
1019 : {
1020 3 : tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index);
1021 :
1022 3 : if (PREDICT_FALSE (!tc))
1023 0 : return -1;
1024 :
1025 3 : if (is_get)
1026 3 : return tcp_get_attribute (tc, attr);
1027 : else
1028 0 : return tcp_set_attribute (tc, attr);
1029 : }
1030 :
1031 : static u16
1032 0 : tcp_session_cal_goal_size (tcp_connection_t * tc)
1033 : {
1034 0 : u16 goal_size = tc->snd_mss;
1035 :
1036 0 : goal_size = tcp_cfg.max_gso_size - tc->snd_mss % tcp_cfg.max_gso_size;
1037 0 : goal_size = clib_min (goal_size, tc->snd_wnd / 2);
1038 :
1039 0 : return goal_size > tc->snd_mss ? goal_size : tc->snd_mss;
1040 : }
1041 :
1042 : always_inline u32
1043 99722 : tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space)
1044 : {
1045 99722 : if (PREDICT_FALSE (tc->snd_wnd < tc->snd_mss))
1046 : {
1047 0 : return tc->snd_wnd <= snd_space ? tc->snd_wnd : 0;
1048 : }
1049 :
1050 : /* If not snd_wnd constrained and we can't write at least a segment,
1051 : * don't try at all */
1052 99722 : if (PREDICT_FALSE (snd_space < tc->snd_mss))
1053 2840 : return snd_space < tc->cwnd ? 0 : snd_space;
1054 :
1055 : /* round down to mss multiple */
1056 96882 : return snd_space - (snd_space % tc->snd_mss);
1057 : }
1058 :
1059 : /**
1060 : * Compute tx window session is allowed to fill.
1061 : *
1062 : * Takes into account available send space, snd_mss and the congestion
1063 : * state of the connection. If possible, the value returned is a multiple
1064 : * of snd_mss.
1065 : *
1066 : * @param tc tcp connection
1067 : * @return number of bytes session is allowed to write
1068 : */
1069 : static inline u32
1070 99722 : tcp_snd_space_inline (tcp_connection_t * tc)
1071 : {
1072 : int snd_space;
1073 :
1074 : /* Fast path is disabled when recovery is on. @ref tcp_session_custom_tx
1075 : * controls both retransmits and the sending of new data while congested
1076 : */
1077 99722 : if (PREDICT_FALSE (tcp_in_cong_recovery (tc)
1078 : || tc->state == TCP_STATE_CLOSED))
1079 0 : return 0;
1080 :
1081 99722 : snd_space = tcp_available_output_snd_space (tc);
1082 :
1083 : /* If we got dupacks or sacked bytes but we're not yet in recovery, try
1084 : * to force the peer to send enough dupacks to start retransmitting as
1085 : * per Limited Transmit (RFC3042)
1086 : */
1087 99722 : if (PREDICT_FALSE (tc->rcv_dupacks || tc->sack_sb.sacked_bytes))
1088 : {
1089 : int snt_limited, n_pkts;
1090 :
1091 0 : n_pkts = tcp_opts_sack_permitted (&tc->rcv_opts)
1092 0 : ? tc->sack_sb.reorder - 1 : 2;
1093 :
1094 0 : if ((seq_lt (tc->limited_transmit, tc->snd_nxt - n_pkts * tc->snd_mss)
1095 0 : || seq_gt (tc->limited_transmit, tc->snd_nxt)))
1096 0 : tc->limited_transmit = tc->snd_nxt;
1097 :
1098 0 : ASSERT (seq_leq (tc->limited_transmit, tc->snd_nxt));
1099 :
1100 0 : snt_limited = tc->snd_nxt - tc->limited_transmit;
1101 0 : snd_space = clib_max (n_pkts * tc->snd_mss - snt_limited, 0);
1102 : }
1103 99722 : return tcp_round_snd_space (tc, snd_space);
1104 : }
1105 :
1106 : u32
1107 0 : tcp_snd_space (tcp_connection_t * tc)
1108 : {
1109 0 : return tcp_snd_space_inline (tc);
1110 : }
1111 :
1112 : static int
1113 70832 : tcp_session_send_params (transport_connection_t * trans_conn,
1114 : transport_send_params_t * sp)
1115 : {
1116 70832 : tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
1117 :
1118 : /* Ensure snd_mss does accurately reflect the amount of data we can push
1119 : * in a segment. This also makes sure that options are updated according to
1120 : * the current state of the connection. */
1121 70832 : tcp_update_burst_snd_vars (tc);
1122 :
1123 70832 : if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_TSO))
1124 0 : sp->snd_mss = tcp_session_cal_goal_size (tc);
1125 : else
1126 70832 : sp->snd_mss = tc->snd_mss;
1127 :
1128 70832 : sp->snd_space = clib_min (tcp_snd_space_inline (tc),
1129 : tc->snd_wnd - (tc->snd_nxt - tc->snd_una));
1130 :
1131 70832 : ASSERT (seq_geq (tc->snd_nxt, tc->snd_una));
1132 : /* This still works if fast retransmit is on */
1133 70832 : sp->tx_offset = tc->snd_nxt - tc->snd_una;
1134 :
1135 70832 : sp->flags = sp->snd_space ? 0 : TRANSPORT_SND_F_DESCHED;
1136 :
1137 70832 : return 0;
1138 : }
1139 :
1140 : static void
1141 0 : tcp_timer_waitclose_handler (tcp_connection_t * tc)
1142 : {
1143 0 : tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1144 :
1145 0 : switch (tc->state)
1146 : {
1147 0 : case TCP_STATE_CLOSE_WAIT:
1148 0 : tcp_connection_timers_reset (tc);
1149 : /* App never returned with a close */
1150 0 : if (!(tc->flags & TCP_CONN_FINPNDG))
1151 : {
1152 0 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1153 0 : session_transport_closed_notify (&tc->connection);
1154 0 : tcp_program_cleanup (wrk, tc);
1155 0 : tcp_worker_stats_inc (wrk, to_closewait, 1);
1156 0 : break;
1157 : }
1158 :
1159 : /* Send FIN either way and switch to LAST_ACK. */
1160 0 : tcp_cong_recovery_off (tc);
1161 : /* Make sure we don't try to send unsent data */
1162 0 : tc->snd_nxt = tc->snd_una;
1163 0 : tcp_send_fin (tc);
1164 0 : tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
1165 0 : session_transport_closed_notify (&tc->connection);
1166 :
1167 : /* Make sure we don't wait in LAST ACK forever */
1168 0 : tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
1169 : tcp_cfg.lastack_time);
1170 0 : tcp_worker_stats_inc (wrk, to_closewait2, 1);
1171 :
1172 : /* Don't delete the connection yet */
1173 0 : break;
1174 0 : case TCP_STATE_FIN_WAIT_1:
1175 0 : tcp_connection_timers_reset (tc);
1176 0 : if (tc->flags & TCP_CONN_FINPNDG)
1177 : {
1178 : /* If FIN pending, we haven't sent everything, but we did try.
1179 : * Notify session layer that transport is closed. */
1180 0 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1181 0 : tcp_send_reset (tc);
1182 0 : tcp_program_cleanup (wrk, tc);
1183 : }
1184 : else
1185 : {
1186 : /* We've sent the fin but no progress. Close the connection and
1187 : * to make sure everything is flushed, setup a cleanup timer */
1188 0 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1189 0 : tcp_program_cleanup (wrk, tc);
1190 : }
1191 0 : session_transport_closed_notify (&tc->connection);
1192 0 : tcp_worker_stats_inc (wrk, to_finwait1, 1);
1193 0 : break;
1194 0 : case TCP_STATE_LAST_ACK:
1195 0 : tcp_connection_timers_reset (tc);
1196 0 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1197 0 : session_transport_closed_notify (&tc->connection);
1198 0 : tcp_program_cleanup (wrk, tc);
1199 0 : tcp_worker_stats_inc (wrk, to_lastack, 1);
1200 0 : break;
1201 0 : case TCP_STATE_CLOSING:
1202 0 : tcp_connection_timers_reset (tc);
1203 0 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1204 0 : session_transport_closed_notify (&tc->connection);
1205 0 : tcp_program_cleanup (wrk, tc);
1206 0 : tcp_worker_stats_inc (wrk, to_closing, 1);
1207 0 : break;
1208 0 : case TCP_STATE_FIN_WAIT_2:
1209 0 : tcp_send_reset (tc);
1210 0 : tcp_connection_timers_reset (tc);
1211 0 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1212 0 : session_transport_closed_notify (&tc->connection);
1213 0 : tcp_program_cleanup (wrk, tc);
1214 0 : tcp_worker_stats_inc (wrk, to_finwait2, 1);
1215 0 : break;
1216 0 : case TCP_STATE_TIME_WAIT:
1217 0 : tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1218 0 : tcp_program_cleanup (wrk, tc);
1219 0 : break;
1220 0 : default:
1221 0 : clib_warning ("waitclose in state: %U", format_tcp_state, tc->state);
1222 0 : break;
1223 : }
1224 0 : }
1225 :
1226 : /* *INDENT-OFF* */
1227 : static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
1228 : {
1229 : tcp_timer_retransmit_handler,
1230 : tcp_timer_persist_handler,
1231 : tcp_timer_waitclose_handler,
1232 : tcp_timer_retransmit_syn_handler,
1233 : };
1234 : /* *INDENT-ON* */
1235 :
1236 : static void
1237 74946300 : tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk)
1238 : {
1239 : u32 n_timers, connection_index, timer_id, thread_index, timer_handle;
1240 : tcp_connection_t *tc;
1241 : int i;
1242 :
1243 74946300 : if (!(n_timers = clib_fifo_elts (wrk->pending_timers)))
1244 74901800 : return;
1245 :
1246 198 : thread_index = wrk->vm->thread_index;
1247 210 : for (i = 0; i < clib_min (n_timers, wrk->max_timers_per_loop); i++)
1248 : {
1249 12 : clib_fifo_sub1 (wrk->pending_timers, timer_handle);
1250 12 : connection_index = timer_handle & 0x0FFFFFFF;
1251 12 : timer_id = timer_handle >> 28;
1252 :
1253 12 : if (PREDICT_TRUE (timer_id != TCP_TIMER_RETRANSMIT_SYN))
1254 12 : tc = tcp_connection_get (connection_index, thread_index);
1255 : else
1256 0 : tc = tcp_half_open_connection_get (connection_index);
1257 :
1258 12 : if (PREDICT_FALSE (!tc))
1259 0 : continue;
1260 :
1261 : /* Skip if the timer is not pending. Probably it was reset while
1262 : * waiting for dispatch */
1263 12 : if (PREDICT_FALSE (!(tc->pending_timers & (1 << timer_id))))
1264 0 : continue;
1265 :
1266 12 : tc->pending_timers &= ~(1 << timer_id);
1267 :
1268 : /* Skip timer if it was rearmed while pending dispatch */
1269 12 : if (PREDICT_FALSE (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID))
1270 0 : continue;
1271 :
1272 12 : (*timer_expiration_handlers[timer_id]) (tc);
1273 : }
1274 :
1275 198 : if (thread_index == 0 && clib_fifo_elts (wrk->pending_timers))
1276 0 : session_queue_run_on_main_thread (wrk->vm);
1277 : }
1278 :
1279 : static void
1280 74980300 : tcp_handle_cleanups (tcp_worker_ctx_t * wrk, clib_time_type_t now)
1281 : {
1282 74980300 : u32 thread_index = wrk->vm->thread_index;
1283 : tcp_cleanup_req_t *req;
1284 : tcp_connection_t *tc;
1285 :
1286 74980500 : while (clib_fifo_elts (wrk->pending_cleanups))
1287 : {
1288 1741580 : req = clib_fifo_head (wrk->pending_cleanups);
1289 1741580 : if (req->free_time > now)
1290 1741450 : break;
1291 134 : clib_fifo_sub2 (wrk->pending_cleanups, req);
1292 134 : tc = tcp_connection_get (req->connection_index, thread_index);
1293 134 : if (PREDICT_FALSE (!tc))
1294 0 : continue;
1295 134 : session_transport_delete_notify (&tc->connection);
1296 134 : tcp_connection_cleanup (tc);
1297 : }
1298 74950600 : }
1299 :
1300 : static void
1301 75003500 : tcp_update_time (f64 now, u8 thread_index)
1302 : {
1303 75003500 : tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
1304 :
1305 74994700 : tcp_set_time_now (wrk, now);
1306 74985200 : tcp_handle_cleanups (wrk, now);
1307 74948500 : tcp_timer_expire_timers (&wrk->timer_wheel, now);
1308 74948200 : tcp_dispatch_pending_timers (wrk);
1309 74901800 : }
1310 :
1311 : static void
1312 79 : tcp_session_flush_data (transport_connection_t * tconn)
1313 : {
1314 79 : tcp_connection_t *tc = (tcp_connection_t *) tconn;
1315 79 : if (tc->flags & TCP_CONN_PSH_PENDING)
1316 55 : return;
1317 24 : tc->flags |= TCP_CONN_PSH_PENDING;
1318 24 : tc->psh_seq = tc->snd_una + transport_max_tx_dequeue (tconn) - 1;
1319 : }
1320 :
1321 : static int
1322 0 : tcp_session_app_rx_evt (transport_connection_t *conn)
1323 : {
1324 0 : tcp_connection_t *tc = (tcp_connection_t *) conn;
1325 0 : u32 min_free, lo = 4 << 10, hi = 128 << 10;
1326 :
1327 0 : if (!(tc->flags & TCP_CONN_ZERO_RWND_SENT))
1328 0 : return 0;
1329 :
1330 0 : min_free = clib_clamp (transport_rx_fifo_size (conn) >> 3, lo, hi);
1331 0 : if (transport_max_rx_enqueue (conn) < min_free)
1332 : {
1333 0 : transport_rx_fifo_req_deq_ntf (conn);
1334 0 : return 0;
1335 : }
1336 :
1337 0 : tcp_send_ack (tc);
1338 :
1339 0 : return 0;
1340 : }
1341 :
1342 : /* *INDENT-OFF* */
1343 : const static transport_proto_vft_t tcp_proto = {
1344 : .enable = vnet_tcp_enable_disable,
1345 : .start_listen = tcp_session_bind,
1346 : .stop_listen = tcp_session_unbind,
1347 : .push_header = tcp_session_push_header,
1348 : .get_connection = tcp_session_get_transport,
1349 : .get_listener = tcp_session_get_listener,
1350 : .get_half_open = tcp_half_open_session_get_transport,
1351 : .attribute = tcp_session_attribute,
1352 : .connect = tcp_session_open,
1353 : .half_close = tcp_session_half_close,
1354 : .close = tcp_session_close,
1355 : .cleanup = tcp_session_cleanup,
1356 : .cleanup_ho = tcp_session_cleanup_ho,
1357 : .reset = tcp_session_reset,
1358 : .send_params = tcp_session_send_params,
1359 : .update_time = tcp_update_time,
1360 : .flush_data = tcp_session_flush_data,
1361 : .custom_tx = tcp_session_custom_tx,
1362 : .app_rx_evt = tcp_session_app_rx_evt,
1363 : .format_connection = format_tcp_session,
1364 : .format_listener = format_tcp_listener_session,
1365 : .format_half_open = format_tcp_half_open_session,
1366 : .transport_options = {
1367 : .name = "tcp",
1368 : .short_name = "T",
1369 : .tx_type = TRANSPORT_TX_PEEK,
1370 : .service_type = TRANSPORT_SERVICE_VC,
1371 : },
1372 : };
1373 : /* *INDENT-ON* */
1374 :
1375 : void
1376 40937 : tcp_connection_tx_pacer_update (tcp_connection_t * tc)
1377 : {
1378 40937 : if (!transport_connection_is_tx_paced (&tc->connection))
1379 0 : return;
1380 :
1381 40937 : f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
1382 :
1383 40937 : transport_connection_tx_pacer_update (&tc->connection,
1384 : tcp_cc_get_pacing_rate (tc),
1385 40937 : srtt * CLIB_US_TIME_FREQ);
1386 : }
1387 :
1388 : void
1389 0 : tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window,
1390 : u32 start_bucket)
1391 : {
1392 0 : f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
1393 0 : transport_connection_tx_pacer_reset (&tc->connection,
1394 : tcp_cc_get_pacing_rate (tc),
1395 : start_bucket,
1396 0 : srtt * CLIB_US_TIME_FREQ);
1397 0 : }
1398 :
1399 : void
1400 28890 : tcp_reschedule (tcp_connection_t * tc)
1401 : {
1402 28890 : if (tcp_in_cong_recovery (tc) || tcp_snd_space_inline (tc))
1403 27076 : transport_connection_reschedule (&tc->connection);
1404 28890 : }
1405 :
1406 : static void
1407 12 : tcp_expired_timers_dispatch (u32 * expired_timers)
1408 : {
1409 12 : u32 thread_index = vlib_get_thread_index (), n_left, max_per_loop;
1410 : u32 connection_index, timer_id, n_expired, max_loops;
1411 : tcp_worker_ctx_t *wrk;
1412 : tcp_connection_t *tc;
1413 : int i;
1414 :
1415 12 : wrk = tcp_get_worker (thread_index);
1416 12 : n_expired = vec_len (expired_timers);
1417 12 : tcp_worker_stats_inc (wrk, timer_expirations, n_expired);
1418 12 : n_left = clib_fifo_elts (wrk->pending_timers);
1419 :
1420 : /*
1421 : * Invalidate all timer handles before dispatching. This avoids dangling
1422 : * index references to timer wheel pool entries that have been freed.
1423 : */
1424 24 : for (i = 0; i < n_expired; i++)
1425 : {
1426 12 : connection_index = expired_timers[i] & 0x0FFFFFFF;
1427 12 : timer_id = expired_timers[i] >> 28;
1428 :
1429 12 : if (timer_id != TCP_TIMER_RETRANSMIT_SYN)
1430 12 : tc = tcp_connection_get (connection_index, thread_index);
1431 : else
1432 0 : tc = tcp_half_open_connection_get (connection_index);
1433 :
1434 : TCP_EVT (TCP_EVT_TIMER_POP, connection_index, timer_id);
1435 :
1436 12 : tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID;
1437 12 : tc->pending_timers |= (1 << timer_id);
1438 : }
1439 :
1440 12 : clib_fifo_add (wrk->pending_timers, expired_timers, n_expired);
1441 :
1442 12 : max_loops =
1443 12 : clib_max ((u32) 0.5 * TCP_TIMER_TICK * wrk->vm->loops_per_second, 1);
1444 12 : max_per_loop = clib_max ((n_left + n_expired) / max_loops, 10);
1445 12 : max_per_loop = clib_min (max_per_loop, VLIB_FRAME_SIZE);
1446 12 : wrk->max_timers_per_loop = clib_max (n_left ? wrk->max_timers_per_loop : 0,
1447 : max_per_loop);
1448 :
1449 12 : if (thread_index == 0)
1450 12 : session_queue_run_on_main_thread (wrk->vm);
1451 12 : }
1452 :
1453 : static void
1454 49 : tcp_initialize_iss_seed (tcp_main_t * tm)
1455 : {
1456 49 : u32 default_seed = random_default_seed ();
1457 49 : u64 time_now = clib_cpu_time_now ();
1458 :
1459 49 : tm->iss_seed.first = (u64) random_u32 (&default_seed) << 32;
1460 49 : tm->iss_seed.second = random_u64 (&time_now);
1461 49 : }
1462 :
1463 : static void
1464 25 : tcp_stats_collector_fn (vlib_stats_collector_data_t *d)
1465 : {
1466 25 : tcp_main_t *tm = vnet_get_tcp_main ();
1467 25 : counter_t **counters = d->entry->data;
1468 25 : counter_t *cb = counters[0];
1469 25 : tcp_wrk_stats_t acc = {};
1470 : tcp_worker_ctx_t *wrk;
1471 :
1472 71 : vec_foreach (wrk, tm->wrk_ctx)
1473 : {
1474 : #define _(name, type, str) acc.name += wrk->stats.name;
1475 46 : foreach_tcp_wrk_stat
1476 : #undef _
1477 : }
1478 :
1479 : #define _(name, type, str) cb[TCP_STAT_##name] = acc.name;
1480 25 : foreach_tcp_wrk_stat
1481 : #undef _
1482 25 : }
1483 :
1484 : static void
1485 49 : tcp_counters_init (tcp_main_t *tm)
1486 : {
1487 49 : vlib_stats_collector_reg_t r = {};
1488 : u32 idx;
1489 :
1490 49 : if (tm->counters_init)
1491 0 : return;
1492 :
1493 49 : r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/tcp");
1494 49 : r.collect_fn = tcp_stats_collector_fn;
1495 49 : vlib_stats_validate (idx, 0, TCP_STAT_no_buffer);
1496 :
1497 : #define _(name, type, str) \
1498 : vlib_stats_add_symlink (idx, TCP_STAT_##name, "/sys/tcp/%s", \
1499 : CLIB_STRING_MACRO (name));
1500 49 : foreach_tcp_wrk_stat
1501 : #undef _
1502 :
1503 49 : vlib_stats_register_collector_fn (&r);
1504 :
1505 49 : tm->counters_init = 1;
1506 : }
1507 :
1508 : static clib_error_t *
1509 49 : tcp_main_enable (vlib_main_t * vm)
1510 : {
1511 49 : vlib_thread_main_t *vtm = vlib_get_thread_main ();
1512 : u32 num_threads, n_workers, prealloc_conn_per_wrk;
1513 : tcp_connection_t *tc __attribute__ ((unused));
1514 49 : tcp_main_t *tm = vnet_get_tcp_main ();
1515 : tcp_worker_ctx_t *wrk;
1516 49 : clib_error_t *error = 0;
1517 : int thread;
1518 :
1519 49 : if ((error = vlib_call_init_function (vm, ip_main_init)))
1520 0 : return error;
1521 49 : if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
1522 0 : return error;
1523 49 : if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
1524 0 : return error;
1525 :
1526 : /*
1527 : * Registrations
1528 : */
1529 :
1530 49 : ip4_register_protocol (IP_PROTOCOL_TCP, tcp4_input_node.index);
1531 49 : ip6_register_protocol (IP_PROTOCOL_TCP, tcp6_input_node.index);
1532 :
1533 : /*
1534 : * Initialize data structures
1535 : */
1536 :
1537 49 : num_threads = 1 /* main thread */ + vtm->n_threads;
1538 49 : vec_validate (tm->wrk_ctx, num_threads - 1);
1539 49 : n_workers = num_threads == 1 ? 1 : vtm->n_threads;
1540 49 : prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers;
1541 :
1542 49 : wrk = &tm->wrk_ctx[0];
1543 98 : wrk->tco_next_node[0] = vlib_node_get_next (vm, session_queue_node.index,
1544 49 : tcp4_output_node.index);
1545 98 : wrk->tco_next_node[1] = vlib_node_get_next (vm, session_queue_node.index,
1546 49 : tcp6_output_node.index);
1547 :
1548 119 : for (thread = 0; thread < num_threads; thread++)
1549 : {
1550 70 : wrk = &tm->wrk_ctx[thread];
1551 :
1552 70 : vec_validate (wrk->pending_deq_acked, 255);
1553 70 : vec_validate (wrk->pending_disconnects, 255);
1554 70 : vec_validate (wrk->pending_resets, 255);
1555 70 : vec_reset_length (wrk->pending_deq_acked);
1556 70 : vec_reset_length (wrk->pending_disconnects);
1557 70 : vec_reset_length (wrk->pending_resets);
1558 70 : wrk->vm = vlib_get_main_by_index (thread);
1559 70 : wrk->max_timers_per_loop = 10;
1560 :
1561 70 : if (thread > 0)
1562 : {
1563 21 : wrk->tco_next_node[0] = tm->wrk_ctx[0].tco_next_node[0];
1564 21 : wrk->tco_next_node[1] = tm->wrk_ctx[0].tco_next_node[1];
1565 : }
1566 :
1567 : /*
1568 : * Preallocate connections. Assume that thread 0 won't
1569 : * use preallocated threads when running multi-core
1570 : */
1571 70 : if ((thread > 0 || num_threads == 1) && prealloc_conn_per_wrk)
1572 0 : pool_init_fixed (wrk->connections, prealloc_conn_per_wrk);
1573 :
1574 70 : tcp_timer_initialize_wheel (&wrk->timer_wheel,
1575 : tcp_expired_timers_dispatch,
1576 : vlib_time_now (vm));
1577 : }
1578 :
1579 49 : tcp_initialize_iss_seed (tm);
1580 :
1581 49 : tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm);
1582 49 : tm->cc_last_type = TCP_CC_LAST;
1583 :
1584 49 : tcp_counters_init (tm);
1585 :
1586 49 : return error;
1587 : }
1588 :
1589 : clib_error_t *
1590 57 : vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en)
1591 : {
1592 57 : if (is_en)
1593 : {
1594 49 : if (tcp_main.is_enabled)
1595 0 : return 0;
1596 :
1597 49 : return tcp_main_enable (vm);
1598 : }
1599 : else
1600 : {
1601 8 : tcp_main.is_enabled = 0;
1602 : }
1603 :
1604 8 : return 0;
1605 : }
1606 :
1607 : void
1608 4 : tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add)
1609 : {
1610 4 : tcp_main_t *tm = &tcp_main;
1611 4 : if (is_ip4)
1612 2 : tm->punt_unknown4 = is_add;
1613 : else
1614 2 : tm->punt_unknown6 = is_add;
1615 4 : }
1616 :
1617 : /**
1618 : * Initialize default values for tcp parameters
1619 : */
1620 : static void
1621 559 : tcp_configuration_init (void)
1622 : {
1623 : /* Initial wnd for SYN. Fifos are not allocated at that point so use some
1624 : * predefined value. For SYN-ACK we still want the scale to be computed in
1625 : * the same way */
1626 559 : tcp_cfg.max_rx_fifo = 32 << 20;
1627 559 : tcp_cfg.min_rx_fifo = 4 << 10;
1628 :
1629 559 : tcp_cfg.default_mtu = 1500;
1630 559 : tcp_cfg.initial_cwnd_multiplier = 0;
1631 559 : tcp_cfg.enable_tx_pacing = 1;
1632 559 : tcp_cfg.allow_tso = 0;
1633 559 : tcp_cfg.csum_offload = 1;
1634 559 : tcp_cfg.cc_algo = TCP_CC_CUBIC;
1635 559 : tcp_cfg.rwnd_min_update_ack = 1;
1636 559 : tcp_cfg.max_gso_size = TCP_MAX_GSO_SZ;
1637 :
1638 : /* Time constants defined as timer tick (100us) multiples */
1639 559 : tcp_cfg.closewait_time = 20000; /* 2s */
1640 559 : tcp_cfg.timewait_time = 100000; /* 10s */
1641 559 : tcp_cfg.finwait1_time = 600000; /* 60s */
1642 559 : tcp_cfg.lastack_time = 300000; /* 30s */
1643 559 : tcp_cfg.finwait2_time = 300000; /* 30s */
1644 559 : tcp_cfg.closing_time = 300000; /* 30s */
1645 559 : tcp_cfg.alloc_err_timeout = 1000; /* 100ms */
1646 :
1647 : /* This value is seconds */
1648 559 : tcp_cfg.cleanup_time = 0.1; /* 100ms */
1649 559 : }
1650 :
1651 : static clib_error_t *
1652 559 : tcp_init (vlib_main_t * vm)
1653 : {
1654 559 : tcp_main_t *tm = vnet_get_tcp_main ();
1655 559 : ip_main_t *im = &ip_main;
1656 : ip_protocol_info_t *pi;
1657 :
1658 : /* Session layer, and by implication tcp, are disabled by default */
1659 559 : tm->is_enabled = 0;
1660 :
1661 : /* Register with IP for header parsing */
1662 559 : pi = ip_get_protocol_info (im, IP_PROTOCOL_TCP);
1663 559 : if (pi == 0)
1664 0 : return clib_error_return (0, "TCP protocol info AWOL");
1665 559 : pi->format_header = format_tcp_header;
1666 559 : pi->unformat_pg_edit = unformat_pg_tcp_header;
1667 :
1668 : /* Register as transport with session layer */
1669 559 : transport_register_protocol (TRANSPORT_PROTO_TCP, &tcp_proto,
1670 : FIB_PROTOCOL_IP4, tcp4_output_node.index);
1671 559 : transport_register_protocol (TRANSPORT_PROTO_TCP, &tcp_proto,
1672 : FIB_PROTOCOL_IP6, tcp6_output_node.index);
1673 :
1674 559 : tcp_configuration_init ();
1675 :
1676 559 : tm->cc_algo_by_name = hash_create_string (0, sizeof (uword));
1677 :
1678 559 : return 0;
1679 : }
1680 :
1681 58799 : VLIB_INIT_FUNCTION (tcp_init);
1682 :
1683 : /*
1684 : * fd.io coding-style-patch-verification: ON
1685 : *
1686 : * Local Variables:
1687 : * eval: (c-set-style "gnu")
1688 : * End:
1689 : */
|