Line data Source code
1 : /*
2 : * Copyright (c) 2020 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : #ifndef SRC_VNET_TCP_TCP_INLINES_H_
17 : #define SRC_VNET_TCP_TCP_INLINES_H_
18 :
19 : #include <vnet/tcp/tcp.h>
20 :
21 : always_inline void
22 68608 : tcp_node_inc_counter_i (vlib_main_t *vm, u32 tcp4_node, u32 tcp6_node,
23 : u8 is_ip4, u32 evt, u32 val)
24 : {
25 68608 : if (is_ip4)
26 68591 : vlib_node_increment_counter (vm, tcp4_node, evt, val);
27 : else
28 17 : vlib_node_increment_counter (vm, tcp6_node, evt, val);
29 68608 : }
30 :
31 : #define tcp_inc_counter(node_id, err, count) \
32 : tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
33 : tcp6_##node_id##_node.index, is_ip4, err, count)
34 : #define tcp_maybe_inc_err_counter(cnts, err) \
35 : { \
36 : cnts[err] += (next0 != tcp_next_drop (is_ip4)); \
37 : }
38 : #define tcp_inc_err_counter(cnts, err, val) \
39 : { \
40 : cnts[err] += val; \
41 : }
42 : #define tcp_store_err_counters(node_id, cnts) \
43 : { \
44 : int i; \
45 : for (i = 0; i < TCP_N_ERROR; i++) \
46 : if (cnts[i]) \
47 : tcp_inc_counter (node_id, i, cnts[i]); \
48 : }
49 :
50 : always_inline tcp_header_t *
51 3187596 : tcp_buffer_hdr (vlib_buffer_t * b)
52 : {
53 3187596 : ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
54 6375192 : return (tcp_header_t *) (b->data + b->current_data
55 3187596 : + vnet_buffer (b)->tcp.hdr_offset);
56 : }
57 :
58 : always_inline tcp_connection_t *
59 8013880 : tcp_connection_get (u32 conn_index, u32 thread_index)
60 : {
61 8013880 : tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
62 8013880 : if (PREDICT_FALSE (pool_is_free_index (wrk->connections, conn_index)))
63 0 : return 0;
64 8013880 : return pool_elt_at_index (wrk->connections, conn_index);
65 : }
66 :
67 : always_inline tcp_connection_t *
68 : tcp_connection_get_if_valid (u32 conn_index, u32 thread_index)
69 : {
70 : tcp_worker_ctx_t *wrk;
71 : if (thread_index >= vec_len (tcp_main.wrk_ctx))
72 : return 0;
73 : wrk = tcp_get_worker (thread_index);
74 : if (pool_is_free_index (wrk->connections, conn_index))
75 : return 0;
76 : return pool_elt_at_index (wrk->connections, conn_index);
77 : }
78 :
79 : always_inline void
80 776 : tcp_connection_set_state (tcp_connection_t * tc, tcp_state_t state)
81 : {
82 776 : tc->state = state;
83 : TCP_EVT (TCP_EVT_STATE_CHANGE, tc);
84 776 : }
85 :
86 : always_inline tcp_connection_t *
87 141 : tcp_listener_get (u32 tli)
88 : {
89 141 : tcp_connection_t *tc = 0;
90 141 : if (!pool_is_free_index (tcp_main.listener_pool, tli))
91 141 : tc = pool_elt_at_index (tcp_main.listener_pool, tli);
92 141 : return tc;
93 : }
94 :
95 : always_inline tcp_connection_t *
96 396 : tcp_half_open_connection_get (u32 conn_index)
97 : {
98 396 : return tcp_connection_get (conn_index, transport_cl_thread ());
99 : }
100 :
101 : /**
102 : * Our estimate of the number of bytes that have left the network
103 : */
104 : always_inline u32
105 36 : tcp_bytes_out (const tcp_connection_t * tc)
106 : {
107 36 : if (tcp_opts_sack_permitted (&tc->rcv_opts))
108 24 : return tc->sack_sb.sacked_bytes + tc->sack_sb.lost_bytes;
109 : else
110 12 : return clib_min (tc->rcv_dupacks * tc->snd_mss,
111 : tc->snd_nxt - tc->snd_una);
112 : }
113 :
114 : /**
115 : * Our estimate of the number of bytes in flight (pipe size)
116 : */
117 : always_inline u32
118 36 : tcp_flight_size (const tcp_connection_t * tc)
119 : {
120 : int flight_size;
121 :
122 36 : flight_size = (int) (tc->snd_nxt - tc->snd_una) - tcp_bytes_out (tc)
123 36 : + tc->snd_rxt_bytes - tc->rxt_delivered;
124 :
125 36 : ASSERT (flight_size >= 0);
126 :
127 36 : return flight_size;
128 : }
129 :
130 : /**
131 : * Initial cwnd as per RFC5681
132 : */
133 : always_inline u32
134 267 : tcp_initial_cwnd (const tcp_connection_t * tc)
135 : {
136 267 : if (tcp_cfg.initial_cwnd_multiplier > 0)
137 0 : return tcp_cfg.initial_cwnd_multiplier * tc->snd_mss;
138 :
139 267 : if (tc->snd_mss > 2190)
140 0 : return 2 * tc->snd_mss;
141 267 : else if (tc->snd_mss > 1095)
142 264 : return 3 * tc->snd_mss;
143 : else
144 3 : return 4 * tc->snd_mss;
145 : }
146 :
147 : /*
148 : * Accumulate acked bytes for cwnd increase
149 : *
150 : * Once threshold bytes are accumulated, snd_mss bytes are added
151 : * to the cwnd.
152 : */
153 : always_inline void
154 0 : tcp_cwnd_accumulate (tcp_connection_t * tc, u32 thresh, u32 bytes)
155 : {
156 0 : tc->cwnd_acc_bytes += bytes;
157 0 : if (tc->cwnd_acc_bytes >= thresh)
158 : {
159 0 : u32 inc = tc->cwnd_acc_bytes / thresh;
160 0 : tc->cwnd_acc_bytes -= inc * thresh;
161 0 : tc->cwnd += inc * tc->snd_mss;
162 0 : tc->cwnd = clib_min (tc->cwnd, tc->tx_fifo_size);
163 : }
164 0 : }
165 :
166 : always_inline u32
167 0 : tcp_loss_wnd (const tcp_connection_t * tc)
168 : {
169 : /* Whatever we have in flight + the packet we're about to send */
170 0 : return tcp_flight_size (tc) + tc->snd_mss;
171 : }
172 :
173 : always_inline u32
174 131827 : tcp_available_snd_wnd (const tcp_connection_t * tc)
175 : {
176 131827 : return clib_min (tc->cwnd, tc->snd_wnd);
177 : }
178 :
179 : always_inline u32
180 131815 : tcp_available_output_snd_space (const tcp_connection_t * tc)
181 : {
182 131815 : u32 available_wnd = tcp_available_snd_wnd (tc);
183 131815 : int flight_size = (int) (tc->snd_nxt - tc->snd_una);
184 :
185 131815 : if (available_wnd <= flight_size)
186 1 : return 0;
187 :
188 131814 : return available_wnd - flight_size;
189 : }
190 :
191 : /**
192 : * Estimate of how many bytes we can still push into the network
193 : */
194 : always_inline u32
195 12 : tcp_available_cc_snd_space (const tcp_connection_t * tc)
196 : {
197 12 : u32 available_wnd = tcp_available_snd_wnd (tc);
198 12 : u32 flight_size = tcp_flight_size (tc);
199 :
200 12 : if (available_wnd <= flight_size)
201 0 : return 0;
202 :
203 12 : return available_wnd - flight_size;
204 : }
205 :
206 : always_inline u8
207 1 : tcp_is_lost_fin (tcp_connection_t * tc)
208 : {
209 1 : if ((tc->flags & TCP_CONN_FINSNT) && (tc->snd_nxt - tc->snd_una == 1))
210 1 : return 1;
211 0 : return 0;
212 : }
213 :
214 : /**
215 : * Time used to generate timestamps, not the timestamp
216 : */
217 : always_inline u32
218 74930 : tcp_time_tstamp (u32 thread_index)
219 : {
220 74930 : return tcp_main.wrk_ctx[thread_index].time_tstamp;
221 : }
222 :
223 : /**
224 : * Generate timestamp for tcp connection
225 : */
226 : always_inline u32
227 129634 : tcp_tstamp (tcp_connection_t * tc)
228 : {
229 259268 : return (tcp_main.wrk_ctx[tc->c_thread_index].time_tstamp -
230 129634 : tc->timestamp_delta);
231 : }
232 :
233 : always_inline f64
234 100665 : tcp_time_now_us (u32 thread_index)
235 : {
236 100665 : return tcp_main.wrk_ctx[thread_index].time_us;
237 : }
238 :
239 : always_inline void
240 73768654 : tcp_set_time_now (tcp_worker_ctx_t *wrk, f64 now)
241 : {
242 : /* TCP internal cache of time reference. Could use @ref transport_time_now
243 : * but because @ref tcp_time_now_us is used per packet, caching might
244 : * slightly improve efficiency. */
245 73768654 : wrk->time_us = now;
246 73768654 : wrk->time_tstamp = (u64) (now * TCP_TSTP_HZ);
247 73768654 : }
248 :
249 : always_inline void
250 76609 : tcp_update_time_now (tcp_worker_ctx_t *wrk)
251 : {
252 76609 : f64 now = vlib_time_now (wrk->vm);
253 :
254 : /* Both pacer and tcp us time need to be updated */
255 76609 : transport_update_pacer_time (wrk->vm->thread_index, now);
256 76609 : tcp_set_time_now (wrk, now);
257 76609 : }
258 :
259 : always_inline tcp_connection_t *
260 1062520 : tcp_input_lookup_buffer (vlib_buffer_t * b, u8 thread_index, u32 * error,
261 : u8 is_ip4, u8 is_nolookup)
262 : {
263 1062520 : u32 fib_index = vnet_buffer (b)->ip.fib_index;
264 : int n_advance_bytes, n_data_bytes;
265 : transport_connection_t *tc;
266 : tcp_header_t *tcp;
267 1062520 : u8 result = 0;
268 :
269 1062520 : if (is_ip4)
270 : {
271 1062500 : ip4_header_t *ip4 = vlib_buffer_get_current (b);
272 1062500 : int ip_hdr_bytes = ip4_header_bytes (ip4);
273 1062500 : if (PREDICT_FALSE (b->current_length < ip_hdr_bytes + sizeof (*tcp)))
274 : {
275 0 : *error = TCP_ERROR_LENGTH;
276 0 : return 0;
277 : }
278 1062500 : tcp = ip4_next_header (ip4);
279 1062500 : vnet_buffer (b)->tcp.hdr_offset = (u8 *) tcp - (u8 *) ip4;
280 1062500 : n_advance_bytes = (ip_hdr_bytes + tcp_header_bytes (tcp));
281 1062500 : n_data_bytes = clib_net_to_host_u16 (ip4->length) - n_advance_bytes;
282 :
283 : /* Length check. Checksum computed by ipx_local no need to compute again */
284 1062500 : if (PREDICT_FALSE (n_data_bytes < 0))
285 : {
286 0 : *error = TCP_ERROR_LENGTH;
287 0 : return 0;
288 : }
289 :
290 1062500 : if (!is_nolookup)
291 1062500 : tc = session_lookup_connection_wt4 (fib_index, &ip4->dst_address,
292 1062500 : &ip4->src_address, tcp->dst_port,
293 1062500 : tcp->src_port,
294 : TRANSPORT_PROTO_TCP, thread_index,
295 : &result);
296 : }
297 : else
298 : {
299 17 : ip6_header_t *ip6 = vlib_buffer_get_current (b);
300 17 : if (PREDICT_FALSE (b->current_length < sizeof (*ip6) + sizeof (*tcp)))
301 : {
302 0 : *error = TCP_ERROR_LENGTH;
303 0 : return 0;
304 : }
305 17 : tcp = ip6_next_header (ip6);
306 17 : vnet_buffer (b)->tcp.hdr_offset = (u8 *) tcp - (u8 *) ip6;
307 17 : n_advance_bytes = tcp_header_bytes (tcp);
308 17 : n_data_bytes = clib_net_to_host_u16 (ip6->payload_length)
309 : - n_advance_bytes;
310 17 : n_advance_bytes += sizeof (ip6[0]);
311 :
312 17 : if (PREDICT_FALSE (n_data_bytes < 0))
313 : {
314 0 : *error = TCP_ERROR_LENGTH;
315 0 : return 0;
316 : }
317 :
318 17 : if (!is_nolookup)
319 : {
320 17 : if (PREDICT_FALSE
321 : (ip6_address_is_link_local_unicast (&ip6->dst_address)))
322 : {
323 0 : ip6_main_t *im = &ip6_main;
324 0 : fib_index = vec_elt (im->fib_index_by_sw_if_index,
325 : vnet_buffer (b)->ip.rx_sw_if_index);
326 : }
327 :
328 17 : tc = session_lookup_connection_wt6 (fib_index, &ip6->dst_address,
329 : &ip6->src_address,
330 17 : tcp->dst_port, tcp->src_port,
331 : TRANSPORT_PROTO_TCP,
332 : thread_index, &result);
333 : }
334 : }
335 :
336 : /* Set the sw_if_index[VLIB_RX] to the interface we received
337 : * the connection on (the local interface) */
338 1062520 : vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index;
339 :
340 1062520 : if (is_nolookup)
341 : tc =
342 0 : (transport_connection_t *) tcp_connection_get (vnet_buffer (b)->
343 : tcp.connection_index,
344 : thread_index);
345 :
346 1062520 : vnet_buffer (b)->tcp.seq_number = clib_net_to_host_u32 (tcp->seq_number);
347 1062520 : vnet_buffer (b)->tcp.ack_number = clib_net_to_host_u32 (tcp->ack_number);
348 1062520 : vnet_buffer (b)->tcp.data_offset = n_advance_bytes;
349 1062520 : vnet_buffer (b)->tcp.data_len = n_data_bytes;
350 1062520 : vnet_buffer (b)->tcp.seq_end = vnet_buffer (b)->tcp.seq_number
351 1062520 : + n_data_bytes;
352 :
353 1062520 : *error = result ? TCP_ERROR_NONE + result : *error;
354 :
355 1062520 : return tcp_get_connection_from_transport (tc);
356 : }
357 :
358 : /**
359 : * Initialize connection by gleaning network and rcv params from buffer
360 : *
361 : * @param tc connection to initialize
362 : * @param b buffer whose current data is pointing at ip
363 : * @param is_ip4 flag set to 1 if using ip4
364 : */
365 : always_inline void
366 135 : tcp_init_w_buffer (tcp_connection_t * tc, vlib_buffer_t * b, u8 is_ip4)
367 : {
368 135 : tcp_header_t *th = tcp_buffer_hdr (b);
369 :
370 135 : tc->c_lcl_port = th->dst_port;
371 135 : tc->c_rmt_port = th->src_port;
372 135 : tc->c_is_ip4 = is_ip4;
373 :
374 135 : if (is_ip4)
375 : {
376 134 : ip4_header_t *ip4 = vlib_buffer_get_current (b);
377 134 : tc->c_lcl_ip4.as_u32 = ip4->dst_address.as_u32;
378 134 : tc->c_rmt_ip4.as_u32 = ip4->src_address.as_u32;
379 : }
380 : else
381 : {
382 1 : ip6_header_t *ip6 = vlib_buffer_get_current (b);
383 1 : clib_memcpy_fast (&tc->c_lcl_ip6, &ip6->dst_address,
384 : sizeof (ip6_address_t));
385 1 : clib_memcpy_fast (&tc->c_rmt_ip6, &ip6->src_address,
386 : sizeof (ip6_address_t));
387 : }
388 :
389 135 : tc->irs = vnet_buffer (b)->tcp.seq_number;
390 135 : tc->rcv_nxt = vnet_buffer (b)->tcp.seq_number + 1;
391 135 : tc->rcv_las = tc->rcv_nxt;
392 135 : tc->sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
393 135 : tc->snd_wl1 = vnet_buffer (b)->tcp.seq_number;
394 135 : tc->snd_wl2 = vnet_buffer (b)->tcp.ack_number;
395 :
396 : /* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK}
397 : * segments are used to initialize PAWS. */
398 135 : if (tcp_opts_tstamp (&tc->rcv_opts))
399 : {
400 132 : tc->tsval_recent = tc->rcv_opts.tsval;
401 132 : tc->tsval_recent_age = tcp_time_tstamp (tc->c_thread_index);
402 : }
403 :
404 135 : if (tcp_opts_wscale (&tc->rcv_opts))
405 132 : tc->snd_wscale = tc->rcv_opts.wscale;
406 :
407 135 : tc->snd_wnd = clib_net_to_host_u16 (th->window) << tc->snd_wscale;
408 135 : }
409 :
410 : always_inline void
411 41962 : tcp_update_rto (tcp_connection_t * tc)
412 : {
413 41962 : tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX);
414 41962 : tc->rto = clib_max (tc->rto, TCP_RTO_MIN);
415 41962 : }
416 :
417 : always_inline u8
418 1103250 : tcp_is_descheduled (tcp_connection_t * tc)
419 : {
420 1103250 : return (transport_connection_is_descheduled (&tc->connection) ? 1 : 0);
421 : }
422 :
423 : /**
424 : * Push TCP header to buffer
425 : *
426 : * @param vm - vlib_main
427 : * @param b - buffer to write the header to
428 : * @param sp_net - source port net order
429 : * @param dp_net - destination port net order
430 : * @param seq - sequence number net order
431 : * @param ack - ack number net order
432 : * @param tcp_hdr_opts_len - header and options length in bytes
433 : * @param flags - header flags
434 : * @param wnd - window size
435 : *
436 : * @return - pointer to start of TCP header
437 : */
438 : always_inline void *
439 1062530 : vlib_buffer_push_tcp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, u32 seq,
440 : u32 ack, u8 tcp_hdr_opts_len, u8 flags,
441 : u16 wnd)
442 : {
443 : tcp_header_t *th;
444 :
445 1062530 : th = vlib_buffer_push_uninit (b, tcp_hdr_opts_len);
446 :
447 1062530 : th->src_port = sp;
448 1062530 : th->dst_port = dp;
449 1062530 : th->seq_number = seq;
450 1062530 : th->ack_number = ack;
451 1062530 : th->data_offset_and_reserved = (tcp_hdr_opts_len >> 2) << 4;
452 1062530 : th->flags = flags;
453 1062530 : th->window = wnd;
454 1062530 : th->checksum = 0;
455 1062530 : th->urgent_pointer = 0;
456 1062530 : vnet_buffer (b)->l4_hdr_offset = (u8 *) th - b->data;
457 1062530 : b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
458 1062530 : return th;
459 : }
460 :
461 : /**
462 : * Push TCP header to buffer
463 : *
464 : * @param b - buffer to write the header to
465 : * @param sp_net - source port net order
466 : * @param dp_net - destination port net order
467 : * @param seq - sequence number host order
468 : * @param ack - ack number host order
469 : * @param tcp_hdr_opts_len - header and options length in bytes
470 : * @param flags - header flags
471 : * @param wnd - window size
472 : *
473 : * @return - pointer to start of TCP header
474 : */
475 : always_inline void *
476 1062440 : vlib_buffer_push_tcp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, u32 seq,
477 : u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
478 : {
479 1062440 : return vlib_buffer_push_tcp_net_order (b, sp_net, dp_net,
480 : clib_host_to_net_u32 (seq),
481 : clib_host_to_net_u32 (ack),
482 : tcp_hdr_opts_len, flags,
483 1062440 : clib_host_to_net_u16 (wnd));
484 : }
485 :
486 : #endif /* SRC_VNET_TCP_TCP_INLINES_H_ */
487 :
488 : /*
489 : * fd.io coding-style-patch-verification: ON
490 : *
491 : * Local Variables:
492 : * eval: (c-set-style "gnu")
493 : * End:
494 : */
|