Line data Source code
1 : /*
2 : * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3 : * Licensed under the Apache License, Version 2.0 (the "License");
4 : * you may not use this file except in compliance with the License.
5 : * You may obtain a copy of the License at:
6 : *
7 : * http://www.apache.org/licenses/LICENSE-2.0
8 : *
9 : * Unless required by applicable law or agreed to in writing, software
10 : * distributed under the License is distributed on an "AS IS" BASIS,
11 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : * See the License for the specific language governing permissions and
13 : * limitations under the License.
14 : */
15 :
16 : #ifndef included_tcp_packet_h
17 : #define included_tcp_packet_h
18 :
19 : #include <vnet/ip/ip4_packet.h>
20 : #include <vnet/ip/ip6_packet.h>
21 :
22 : /* TCP flags bit 0 first. */
23 : #define foreach_tcp_flag \
24 : _ (FIN) /**< No more data from sender. */ \
25 : _ (SYN) /**< Synchronize sequence numbers. */ \
26 : _ (RST) /**< Reset the connection. */ \
27 : _ (PSH) /**< Push function. */ \
28 : _ (ACK) /**< Ack field significant. */ \
29 : _ (URG) /**< Urgent pointer field significant. */ \
30 : _ (ECE) /**< ECN-echo. Receiver got CE packet */ \
31 : _ (CWR) /**< Sender reduced congestion window */
32 :
33 : enum
34 : {
35 : #define _(f) TCP_FLAG_BIT_##f,
36 : foreach_tcp_flag
37 : #undef _
38 : TCP_N_FLAG_BITS,
39 : };
40 :
41 : enum
42 : {
43 : #define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
44 : foreach_tcp_flag
45 : #undef _
46 : };
47 :
48 : typedef struct _tcp_header
49 : {
50 : union
51 : {
52 : struct
53 : {
54 : u16 src_port; /**< Source port. */
55 : u16 dst_port; /**< Destination port. */
56 : };
57 : struct
58 : {
59 : u16 src, dst;
60 : };
61 : };
62 :
63 : u32 seq_number; /**< Sequence number of the first data octet in this
64 : * segment, except when SYN is present. If SYN
65 : * is present the seq number is is the ISN and the
66 : * first data octet is ISN+1 */
67 : u32 ack_number; /**< Acknowledgement number if ACK is set. It contains
68 : * the value of the next sequence number the sender
69 : * of the segment is expecting to receive. */
70 : u8 data_offset_and_reserved;
71 : u8 flags; /**< Flags: see the macro above */
72 : u16 window; /**< Number of bytes sender is willing to receive. */
73 :
74 : u16 checksum; /**< Checksum of TCP pseudo header and data. */
75 : u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */
76 : } __attribute__ ((packed)) tcp_header_t;
77 :
78 : /* Flag tests that return 0 or !0 */
79 : #define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4)
80 : #define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN)
81 : #define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN)
82 : #define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST)
83 : #define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH)
84 : #define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK)
85 : #define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG)
86 : #define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE)
87 : #define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR)
88 :
89 : /* Flag tests that return 0 or 1 */
90 : #define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN)
91 : #define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN)
92 :
93 : always_inline int
94 56732048 : tcp_header_bytes (tcp_header_t * t)
95 : {
96 56732048 : return tcp_doff (t) * sizeof (u32);
97 : }
98 :
99 : /*
100 : * TCP options.
101 : */
102 :
103 : typedef enum tcp_option_type
104 : {
105 : TCP_OPTION_EOL = 0, /**< End of options. */
106 : TCP_OPTION_NOOP = 1, /**< No operation. */
107 : TCP_OPTION_MSS = 2, /**< Limit MSS. */
108 : TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */
109 : TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */
110 : TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */
111 : TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */
112 : TCP_OPTION_UTO = 28, /**< User timeout. */
113 : TCP_OPTION_AO = 29, /**< Authentication Option. */
114 : } tcp_option_type_t;
115 :
116 : #define foreach_tcp_options_flag \
117 : _ (MSS) /**< MSS advertised in SYN */ \
118 : _ (TSTAMP) /**< Timestamp capability advertised in SYN */ \
119 : _ (WSCALE) /**< Wnd scale capability advertised in SYN */ \
120 : _ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \
121 : _ (SACK) /**< SACK present */
122 :
123 : enum
124 : {
125 : #define _(f) TCP_OPTS_FLAG_BIT_##f,
126 : foreach_tcp_options_flag
127 : #undef _
128 : TCP_OPTIONS_N_FLAG_BITS,
129 : };
130 :
131 : enum
132 : {
133 : #define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f,
134 : foreach_tcp_options_flag
135 : #undef _
136 : };
137 :
138 : typedef struct _sack_block
139 : {
140 : u32 start; /**< Start sequence number */
141 : u32 end; /**< End sequence number (first outside) */
142 : } sack_block_t;
143 :
144 : typedef struct
145 : {
146 : sack_block_t *sacks; /**< SACK blocks */
147 : u32 tsval; /**< Timestamp value */
148 : u32 tsecr; /**< Echoed/reflected time stamp */
149 : u16 mss; /**< Maximum segment size advertised */
150 : u8 flags; /**< Option flags, see above */
151 : u8 wscale; /**< Window scale advertised */
152 : u8 n_sack_blocks; /**< Number of SACKs blocks */
153 : } tcp_options_t;
154 :
155 : /* Flag tests that return 0 or !0 */
156 : #define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS)
157 : #define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP)
158 : #define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE)
159 : #define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK)
160 : #define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED)
161 :
162 : /* TCP option lengths */
163 : #define TCP_OPTION_LEN_EOL 1
164 : #define TCP_OPTION_LEN_NOOP 1
165 : #define TCP_OPTION_LEN_MSS 4
166 : #define TCP_OPTION_LEN_WINDOW_SCALE 3
167 : #define TCP_OPTION_LEN_SACK_PERMITTED 2
168 : #define TCP_OPTION_LEN_TIMESTAMP 10
169 : #define TCP_OPTION_LEN_SACK_BLOCK 8
170 :
171 : #define TCP_HDR_LEN_MAX 60
172 : #define TCP_WND_MAX 65535U
173 : #define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */
174 : #define TCP_OPTS_ALIGN 4
175 : #define TCP_OPTS_MAX_SACK_BLOCKS 3
176 : #define TCP_MAX_GSO_SZ 65536
177 :
178 : /* Modulo arithmetic for TCP sequence numbers */
179 : #define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0)
180 : #define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0)
181 : #define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0)
182 : #define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0)
183 : #define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2))
184 :
185 : /* Modulo arithmetic for timestamps */
186 : #define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
187 : #define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
188 :
189 : always_inline void
190 : ip4_tcp_reply_x1 (ip4_header_t *ip0, tcp_header_t *tcp0)
191 : {
192 : u32 src0, dst0;
193 :
194 : src0 = ip0->src_address.data_u32;
195 : dst0 = ip0->dst_address.data_u32;
196 : ip0->src_address.data_u32 = dst0;
197 : ip0->dst_address.data_u32 = src0;
198 :
199 : src0 = tcp0->src;
200 : dst0 = tcp0->dst;
201 : tcp0->src = dst0;
202 : tcp0->dst = src0;
203 : }
204 :
205 : always_inline void
206 : ip4_tcp_reply_x2 (ip4_header_t *ip0, ip4_header_t *ip1, tcp_header_t *tcp0,
207 : tcp_header_t *tcp1)
208 : {
209 : u32 src0, dst0, src1, dst1;
210 :
211 : src0 = ip0->src_address.data_u32;
212 : src1 = ip1->src_address.data_u32;
213 : dst0 = ip0->dst_address.data_u32;
214 : dst1 = ip1->dst_address.data_u32;
215 : ip0->src_address.data_u32 = dst0;
216 : ip1->src_address.data_u32 = dst1;
217 : ip0->dst_address.data_u32 = src0;
218 : ip1->dst_address.data_u32 = src1;
219 :
220 : src0 = tcp0->src;
221 : src1 = tcp1->src;
222 : dst0 = tcp0->dst;
223 : dst1 = tcp1->dst;
224 : tcp0->src = dst0;
225 : tcp1->src = dst1;
226 : tcp0->dst = src0;
227 : tcp1->dst = src1;
228 : }
229 :
230 : always_inline void
231 : ip6_tcp_reply_x1 (ip6_header_t *ip0, tcp_header_t *tcp0)
232 : {
233 : {
234 : ip6_address_t src0, dst0;
235 :
236 : src0 = ip0->src_address;
237 : dst0 = ip0->dst_address;
238 : ip0->src_address = dst0;
239 : ip0->dst_address = src0;
240 : }
241 :
242 : {
243 : u16 src0, dst0;
244 :
245 : src0 = tcp0->src;
246 : dst0 = tcp0->dst;
247 : tcp0->src = dst0;
248 : tcp0->dst = src0;
249 : }
250 : }
251 :
252 : always_inline void
253 : ip6_tcp_reply_x2 (ip6_header_t *ip0, ip6_header_t *ip1, tcp_header_t *tcp0,
254 : tcp_header_t *tcp1)
255 : {
256 : {
257 : ip6_address_t src0, dst0, src1, dst1;
258 :
259 : src0 = ip0->src_address;
260 : src1 = ip1->src_address;
261 : dst0 = ip0->dst_address;
262 : dst1 = ip1->dst_address;
263 : ip0->src_address = dst0;
264 : ip1->src_address = dst1;
265 : ip0->dst_address = src0;
266 : ip1->dst_address = src1;
267 : }
268 :
269 : {
270 : u16 src0, dst0, src1, dst1;
271 :
272 : src0 = tcp0->src;
273 : src1 = tcp1->src;
274 : dst0 = tcp0->dst;
275 : dst1 = tcp1->dst;
276 : tcp0->src = dst0;
277 : tcp1->src = dst1;
278 : tcp0->dst = src0;
279 : tcp1->dst = src1;
280 : }
281 : }
282 :
283 : /**
284 : * Parse TCP header options.
285 : *
286 : * @param th TCP header
287 : * @param to TCP options data structure to be populated
288 : * @param is_syn set if packet is syn
289 : * @return -1 if parsing failed
290 : */
291 : always_inline int
292 1082392 : tcp_options_parse (tcp_header_t * th, tcp_options_t * to, u8 is_syn)
293 : {
294 : const u8 *data;
295 : u8 opt_len, opts_len, kind;
296 : int j;
297 : sack_block_t b;
298 :
299 1082392 : opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t);
300 1082392 : data = (const u8 *) (th + 1);
301 :
302 : /* Zero out all flags but those set in SYN */
303 1082392 : to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE
304 : | TCP_OPTS_FLAG_TSTAMP | TCP_OPTS_FLAG_MSS);
305 :
306 4272461 : for (; opts_len > 0; opts_len -= opt_len, data += opt_len)
307 : {
308 3192589 : kind = data[0];
309 :
310 : /* Get options length */
311 3192589 : if (kind == TCP_OPTION_EOL)
312 2520 : break;
313 3190069 : else if (kind == TCP_OPTION_NOOP)
314 : {
315 2124410 : opt_len = 1;
316 2124410 : continue;
317 : }
318 : else
319 : {
320 : /* broken options */
321 1065659 : if (opts_len < 2)
322 0 : return -1;
323 1065659 : opt_len = data[1];
324 :
325 : /* weird option length */
326 1065659 : if (opt_len < 2 || opt_len > opts_len)
327 0 : return -1;
328 : }
329 :
330 : /* Parse options */
331 1065659 : switch (kind)
332 : {
333 2793 : case TCP_OPTION_MSS:
334 2793 : if (!is_syn)
335 0 : break;
336 2793 : if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th))
337 : {
338 2793 : to->flags |= TCP_OPTS_FLAG_MSS;
339 2793 : to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2));
340 : }
341 2793 : break;
342 264 : case TCP_OPTION_WINDOW_SCALE:
343 264 : if (!is_syn)
344 0 : break;
345 264 : if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th))
346 : {
347 264 : to->flags |= TCP_OPTS_FLAG_WSCALE;
348 264 : to->wscale = data[2];
349 264 : if (to->wscale > TCP_MAX_WND_SCALE)
350 0 : to->wscale = TCP_MAX_WND_SCALE;
351 : }
352 264 : break;
353 1062340 : case TCP_OPTION_TIMESTAMP:
354 1062340 : if (is_syn)
355 264 : to->flags |= TCP_OPTS_FLAG_TSTAMP;
356 1062340 : if ((to->flags & TCP_OPTS_FLAG_TSTAMP)
357 1062340 : && opt_len == TCP_OPTION_LEN_TIMESTAMP)
358 : {
359 1062340 : to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2));
360 1062340 : to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6));
361 : }
362 1062340 : break;
363 264 : case TCP_OPTION_SACK_PERMITTED:
364 264 : if (!is_syn)
365 0 : break;
366 264 : if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th))
367 264 : to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
368 264 : break;
369 0 : case TCP_OPTION_SACK_BLOCK:
370 : /* If SACK permitted was not advertised or a SYN, break */
371 0 : if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th))
372 : break;
373 :
374 : /* If too short or not correctly formatted, break */
375 0 : if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK))
376 : break;
377 :
378 0 : to->flags |= TCP_OPTS_FLAG_SACK;
379 0 : to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK;
380 0 : vec_reset_length (to->sacks);
381 0 : for (j = 0; j < to->n_sack_blocks; j++)
382 : {
383 0 : b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 8 * j));
384 0 : b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 8 * j));
385 0 : vec_add1 (to->sacks, b);
386 : }
387 0 : break;
388 0 : default:
389 : /* Nothing to see here */
390 0 : continue;
391 : }
392 0 : }
393 1082392 : return 0;
394 : }
395 :
396 : /**
397 : * Write TCP options to segment.
398 : *
399 : * @param data buffer where to write the options
400 : * @param opts options to write
401 : * @return length of options written
402 : */
403 : always_inline u32
404 116952 : tcp_options_write (u8 * data, tcp_options_t * opts)
405 : {
406 116952 : u32 opts_len = 0;
407 116952 : u32 buf, seq_len = 4;
408 :
409 116952 : if (tcp_opts_mss (opts))
410 : {
411 279 : *data++ = TCP_OPTION_MSS;
412 279 : *data++ = TCP_OPTION_LEN_MSS;
413 279 : buf = clib_host_to_net_u16 (opts->mss);
414 279 : clib_memcpy_fast (data, &buf, sizeof (opts->mss));
415 279 : data += sizeof (opts->mss);
416 279 : opts_len += TCP_OPTION_LEN_MSS;
417 : }
418 :
419 116952 : if (tcp_opts_wscale (opts))
420 : {
421 264 : *data++ = TCP_OPTION_WINDOW_SCALE;
422 264 : *data++ = TCP_OPTION_LEN_WINDOW_SCALE;
423 264 : *data++ = opts->wscale;
424 264 : opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
425 : }
426 :
427 116952 : if (tcp_opts_sack_permitted (opts))
428 : {
429 264 : *data++ = TCP_OPTION_SACK_PERMITTED;
430 264 : *data++ = TCP_OPTION_LEN_SACK_PERMITTED;
431 264 : opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
432 : }
433 :
434 116952 : if (tcp_opts_tstamp (opts))
435 : {
436 116937 : *data++ = TCP_OPTION_TIMESTAMP;
437 116937 : *data++ = TCP_OPTION_LEN_TIMESTAMP;
438 116937 : buf = clib_host_to_net_u32 (opts->tsval);
439 116937 : clib_memcpy_fast (data, &buf, sizeof (opts->tsval));
440 116937 : data += sizeof (opts->tsval);
441 116937 : buf = clib_host_to_net_u32 (opts->tsecr);
442 116937 : clib_memcpy_fast (data, &buf, sizeof (opts->tsecr));
443 116937 : data += sizeof (opts->tsecr);
444 116937 : opts_len += TCP_OPTION_LEN_TIMESTAMP;
445 : }
446 :
447 116952 : if (tcp_opts_sack (opts))
448 : {
449 : int i;
450 :
451 0 : if (opts->n_sack_blocks != 0)
452 : {
453 0 : *data++ = TCP_OPTION_SACK_BLOCK;
454 0 : *data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
455 0 : for (i = 0; i < opts->n_sack_blocks; i++)
456 : {
457 0 : buf = clib_host_to_net_u32 (opts->sacks[i].start);
458 0 : clib_memcpy_fast (data, &buf, seq_len);
459 0 : data += seq_len;
460 0 : buf = clib_host_to_net_u32 (opts->sacks[i].end);
461 0 : clib_memcpy_fast (data, &buf, seq_len);
462 0 : data += seq_len;
463 : }
464 0 : opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
465 : }
466 : }
467 :
468 : /* Terminate TCP options by padding with NOPs to a u32 boundary. Avoid using
469 : * EOL because, it seems, it can break peers with broken option parsers that
470 : * rely on options ending on a u32 boundary.
471 : */
472 350562 : while (opts_len % 4)
473 : {
474 233610 : *data++ = TCP_OPTION_NOOP;
475 233610 : opts_len += TCP_OPTION_LEN_NOOP;
476 : }
477 116952 : return opts_len;
478 : }
479 :
480 : #endif /* included_tcp_packet_h */
481 :
482 : /*
483 : * fd.io coding-style-patch-verification: ON
484 : *
485 : * Local Variables:
486 : * eval: (c-set-style "gnu")
487 : * End:
488 : */
|