Line data Source code
1 : /*
2 : * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : #ifndef _vnet_tcp_h_
17 : #define _vnet_tcp_h_
18 :
19 : #include <vnet/vnet.h>
20 : #include <vnet/ip/ip.h>
21 : #include <vnet/session/session.h>
22 : #include <vnet/tcp/tcp_types.h>
23 : #include <vnet/tcp/tcp_timer.h>
24 : #include <vnet/tcp/tcp_debug.h>
25 : #include <vnet/tcp/tcp_sack.h>
26 : #include <vnet/tcp/tcp_bt.h>
27 : #include <vnet/tcp/tcp_cc.h>
28 :
29 : typedef void (timer_expiration_handler) (tcp_connection_t * tc);
30 :
31 : extern timer_expiration_handler tcp_timer_retransmit_handler;
32 : extern timer_expiration_handler tcp_timer_persist_handler;
33 : extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
34 :
35 : typedef enum _tcp_error
36 : {
37 : #define tcp_error(f, n, s, d) TCP_ERROR_##f,
38 : #include <vnet/tcp/tcp_error.def>
39 : #undef tcp_error
40 : TCP_N_ERROR,
41 : } tcp_error_t;
42 :
43 : typedef struct _tcp_lookup_dispatch
44 : {
45 : u8 next, error;
46 : } tcp_lookup_dispatch_t;
47 :
48 : #define foreach_tcp_wrk_stat \
49 : _(timer_expirations, u64, "timer expirations") \
50 : _(rxt_segs, u64, "segments retransmitted") \
51 : _(tr_events, u32, "timer retransmit events") \
52 : _(to_closewait, u32, "timeout close-wait") \
53 : _(to_closewait2, u32, "timeout close-wait w/data") \
54 : _(to_finwait1, u32, "timeout fin-wait-1") \
55 : _(to_finwait2, u32, "timeout fin-wait-2") \
56 : _(to_lastack, u32, "timeout last-ack") \
57 : _(to_closing, u32, "timeout closing") \
58 : _(tr_abort, u32, "timer retransmit abort") \
59 : _(rst_unread, u32, "reset on close due to unread data") \
60 : _(no_buffer, u32, "out of buffers") \
61 :
62 : typedef struct tcp_wrk_stats_
63 : {
64 : #define _(name, type, str) type name;
65 : foreach_tcp_wrk_stat
66 : #undef _
67 : } tcp_wrk_stats_t;
68 :
69 : typedef enum
70 : {
71 : #define _(name, type, str) TCP_STAT_##name,
72 : foreach_tcp_wrk_stat
73 : #undef _
74 : } tcp_wrk_stats_e;
75 :
76 : typedef struct tcp_free_req_
77 : {
78 : clib_time_type_t free_time;
79 : u32 connection_index;
80 : } tcp_cleanup_req_t;
81 :
82 : typedef struct tcp_worker_ctx_
83 : {
84 : CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
85 :
86 : /** worker's pool of connections */
87 : tcp_connection_t *connections;
88 :
89 : /** vector of pending ack dequeues */
90 : u32 *pending_deq_acked;
91 :
92 : /** vector of pending disconnect notifications */
93 : u32 *pending_disconnects;
94 :
95 : /** vector of pending reset notifications */
96 : u32 *pending_resets;
97 :
98 : /** convenience pointer to this thread's vlib main */
99 : vlib_main_t *vm;
100 :
101 : /** Time used for high precision (us) measurements in seconds */
102 : f64 time_us;
103 :
104 : /** Time measured in @ref TCP_TSTAMP_TICK used for time stamps */
105 : u32 time_tstamp;
106 :
107 : /* Max timers to be handled per dispatch loop */
108 : u32 max_timers_per_loop;
109 :
110 : /* Fifo of pending timer expirations */
111 : u32 *pending_timers;
112 :
113 : CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
114 :
115 : /** cached 'on the wire' options for bursts */
116 : u8 cached_opts[40];
117 :
118 : /** tx buffer free list */
119 : u32 *tx_buffers;
120 :
121 : /* fifo of pending free requests */
122 : tcp_cleanup_req_t *pending_cleanups;
123 :
124 : /** Session layer edge indices to tcp output */
125 : u32 tco_next_node[2];
126 :
127 : /** worker timer wheel */
128 : tcp_timer_wheel_t timer_wheel;
129 :
130 : CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
131 :
132 : tcp_wrk_stats_t stats;
133 : } tcp_worker_ctx_t;
134 :
135 : #define tcp_worker_stats_inc(_wrk,_stat,_val) \
136 : _wrk->stats._stat += _val
137 :
138 : typedef struct tcp_iss_seed_
139 : {
140 : u64 first;
141 : u64 second;
142 : } tcp_iss_seed_t;
143 :
144 : typedef struct tcp_configuration_
145 : {
146 : /** Max rx fifo size for a session (in bytes). It is used in to compute the
147 : * rfc 7323 window scaling factor */
148 : u32 max_rx_fifo;
149 :
150 : /** Min rx fifo for a session (in bytes) */
151 : u32 min_rx_fifo;
152 :
153 : /** Default MTU to be used when establishing connections */
154 : u16 default_mtu;
155 :
156 : /** Initial CWND multiplier, which multiplies MSS to determine initial CWND.
157 : * Set 0 to determine the initial CWND by another way */
158 : u16 initial_cwnd_multiplier;
159 :
160 : /** Enable tx pacing for new connections */
161 : u8 enable_tx_pacing;
162 :
163 : /** Allow use of TSO whenever available */
164 : u8 allow_tso;
165 :
166 : /** Set if csum offloading is enabled */
167 : u8 csum_offload;
168 :
169 : /** Default congestion control algorithm type */
170 : tcp_cc_algorithm_type_e cc_algo;
171 :
172 : /** Min rwnd, as number of snd_mss segments, for update ack to be sent after
173 : * a zero rwnd advertisement */
174 : u32 rwnd_min_update_ack;
175 :
176 : /** Timer ticks to wait for close from app */
177 : u32 closewait_time;
178 :
179 : /** Timer ticks to wait in time-wait. Also known as 2MSL */
180 : u32 timewait_time;
181 :
182 : /** Timer ticks to wait in fin-wait1 to send fin and rcv fin-ack */
183 : u32 finwait1_time;
184 :
185 : /** Timer ticks to wait in last ack for ack */
186 : u32 lastack_time;
187 :
188 : /** Timer ticks to wait in fin-wait2 for fin */
189 : u32 finwait2_time;
190 :
191 : /** Timer ticks to wait in closing for fin ack */
192 : u32 closing_time;
193 :
194 : /** Timer ticks to wait for free buffer */
195 : u32 alloc_err_timeout;
196 :
197 : /** Time to wait (sec) before cleaning up the connection */
198 : f32 cleanup_time;
199 :
200 : /** Number of preallocated connections */
201 : u32 preallocated_connections;
202 :
203 : /** Maxium allowed GSO packet size */
204 : u32 max_gso_size;
205 :
206 : /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
207 : ip4_address_t *ip4_src_addrs;
208 : ip6_address_t *ip6_src_addrs;
209 :
210 : /** Fault-injection. Debug only */
211 : f64 buffer_fail_fraction;
212 : } tcp_configuration_t;
213 :
214 : typedef struct _tcp_main
215 : {
216 : /** per-worker context */
217 : tcp_worker_ctx_t *wrk_ctx;
218 :
219 : /* Pool of listeners. */
220 : tcp_connection_t *listener_pool;
221 :
222 : /** vlib buffer size */
223 : u32 bytes_per_buffer;
224 :
225 : /** Dispatch table by state and flags */
226 : tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64];
227 :
228 : /** Seed used to generate random iss */
229 : tcp_iss_seed_t iss_seed;
230 :
231 : /** Congestion control algorithms registered */
232 : tcp_cc_algorithm_t *cc_algos;
233 :
234 : /** Hash table of cc algorithms by name */
235 : uword *cc_algo_by_name;
236 :
237 : /** Last cc algo registered */
238 : tcp_cc_algorithm_type_e cc_last_type;
239 :
240 : /** Flag that indicates if stack is on or off */
241 : u8 is_enabled;
242 :
243 : /** Set if counters on stats segment initialized */
244 : u8 counters_init;
245 :
246 : /** Flag that indicates if v4 punting is enabled */
247 : u8 punt_unknown4;
248 :
249 : /** Flag that indicates if v6 punting is enabled */
250 : u8 punt_unknown6;
251 :
252 : /** Rotor for v4 source addresses */
253 : u32 last_v4_addr_rotor;
254 :
255 : /** Rotor for v6 source addresses */
256 : u32 last_v6_addr_rotor;
257 :
258 : /** Protocol configuration */
259 : tcp_configuration_t cfg;
260 :
261 : /** message ID base for API */
262 : u16 msg_id_base;
263 : } tcp_main_t;
264 :
265 : extern tcp_main_t tcp_main;
266 : extern vlib_node_registration_t tcp4_input_node;
267 : extern vlib_node_registration_t tcp6_input_node;
268 : extern vlib_node_registration_t tcp4_output_node;
269 : extern vlib_node_registration_t tcp6_output_node;
270 : extern vlib_node_registration_t tcp4_established_node;
271 : extern vlib_node_registration_t tcp6_established_node;
272 : extern vlib_node_registration_t tcp4_syn_sent_node;
273 : extern vlib_node_registration_t tcp6_syn_sent_node;
274 : extern vlib_node_registration_t tcp4_rcv_process_node;
275 : extern vlib_node_registration_t tcp6_rcv_process_node;
276 : extern vlib_node_registration_t tcp4_listen_node;
277 : extern vlib_node_registration_t tcp6_listen_node;
278 : extern vlib_node_registration_t tcp4_input_nolookup_node;
279 : extern vlib_node_registration_t tcp6_input_nolookup_node;
280 : extern vlib_node_registration_t tcp4_drop_node;
281 : extern vlib_node_registration_t tcp6_drop_node;
282 :
283 : #define tcp_cfg tcp_main.cfg
284 : #define tcp_node_index(node_id, is_ip4) \
285 : ((is_ip4) ? tcp4_##node_id##_node.index : tcp6_##node_id##_node.index)
286 :
287 : always_inline tcp_main_t *
288 33215 : vnet_get_tcp_main ()
289 : {
290 33215 : return &tcp_main;
291 : }
292 :
293 : always_inline tcp_worker_ctx_t *
294 82786117 : tcp_get_worker (u32 thread_index)
295 : {
296 82786117 : ASSERT (thread_index < vec_len (tcp_main.wrk_ctx));
297 82780817 : return &tcp_main.wrk_ctx[thread_index];
298 : }
299 :
300 : tcp_connection_t *tcp_connection_alloc (u8 thread_index);
301 : tcp_connection_t *tcp_connection_alloc_w_base (u8 thread_index,
302 : tcp_connection_t **base);
303 : void tcp_connection_free (tcp_connection_t * tc);
304 : void tcp_connection_close (tcp_connection_t * tc);
305 : void tcp_connection_cleanup (tcp_connection_t * tc);
306 : void tcp_connection_del (tcp_connection_t * tc);
307 : int tcp_half_open_connection_cleanup (tcp_connection_t * tc);
308 :
309 : void tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
310 : u32 thread_index, u8 is_ip4);
311 : void tcp_send_reset (tcp_connection_t * tc);
312 : void tcp_send_syn (tcp_connection_t * tc);
313 : void tcp_send_synack (tcp_connection_t * tc);
314 : void tcp_send_fin (tcp_connection_t * tc);
315 : void tcp_send_ack (tcp_connection_t * tc);
316 : void tcp_send_window_update_ack (tcp_connection_t * tc);
317 :
318 : void tcp_program_ack (tcp_connection_t * tc);
319 : void tcp_program_dupack (tcp_connection_t * tc);
320 : void tcp_program_retransmit (tcp_connection_t * tc);
321 :
322 : void tcp_update_burst_snd_vars (tcp_connection_t * tc);
323 : u32 tcp_snd_space (tcp_connection_t * tc);
324 : int tcp_fastrecovery_prr_snd_space (tcp_connection_t * tc);
325 : void tcp_reschedule (tcp_connection_t * tc);
326 : fib_node_index_t tcp_lookup_rmt_in_fib (tcp_connection_t * tc);
327 : u32 tcp_session_push_header (transport_connection_t *tconn, vlib_buffer_t **b,
328 : u32 n_bufs);
329 : int tcp_session_custom_tx (void *conn, transport_send_params_t * sp);
330 :
331 : void tcp_connection_timers_init (tcp_connection_t * tc);
332 : void tcp_connection_timers_reset (tcp_connection_t * tc);
333 : void tcp_init_snd_vars (tcp_connection_t * tc);
334 : void tcp_connection_init_vars (tcp_connection_t * tc);
335 : void tcp_connection_tx_pacer_update (tcp_connection_t * tc);
336 : void tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window,
337 : u32 start_bucket);
338 : void tcp_program_cleanup (tcp_worker_ctx_t * wrk, tcp_connection_t * tc);
339 : void tcp_check_gso (tcp_connection_t *tc);
340 :
341 : int tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4);
342 : void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
343 : int tcp_configure_v4_source_address_range (vlib_main_t * vm,
344 : ip4_address_t * start,
345 : ip4_address_t * end, u32 table_id);
346 : int tcp_configure_v6_source_address_range (vlib_main_t * vm,
347 : ip6_address_t * start,
348 : ip6_address_t * end, u32 table_id);
349 :
350 : clib_error_t *vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en);
351 :
352 : format_function_t format_tcp_state;
353 : format_function_t format_tcp_flags;
354 : format_function_t format_tcp_sacks;
355 : format_function_t format_tcp_rcv_sacks;
356 : format_function_t format_tcp_connection;
357 : format_function_t format_tcp_connection_id;
358 :
359 : #define tcp_validate_txf_size(_tc, _a) \
360 : ASSERT(_tc->state != TCP_STATE_ESTABLISHED \
361 : || transport_max_tx_dequeue (&_tc->connection) >= _a)
362 :
363 : #endif /* _vnet_tcp_h_ */
364 :
365 : /*
366 : * fd.io coding-style-patch-verification: ON
367 : *
368 : * Local Variables:
369 : * eval: (c-set-style "gnu")
370 : * End:
371 : */
|