LCOV - code coverage report
Current view: top level - vnet/tcp - tcp_packet.h (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 78 109 71.6 %
Date: 2023-07-05 22:20:52 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
       3             :  * Licensed under the Apache License, Version 2.0 (the "License");
       4             :  * you may not use this file except in compliance with the License.
       5             :  * You may obtain a copy of the License at:
       6             :  *
       7             :  *     http://www.apache.org/licenses/LICENSE-2.0
       8             :  *
       9             :  * Unless required by applicable law or agreed to in writing, software
      10             :  * distributed under the License is distributed on an "AS IS" BASIS,
      11             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             :  * See the License for the specific language governing permissions and
      13             :  * limitations under the License.
      14             :  */
      15             : 
      16             : #ifndef included_tcp_packet_h
      17             : #define included_tcp_packet_h
      18             : 
      19             : #include <vnet/ip/ip4_packet.h>
      20             : #include <vnet/ip/ip6_packet.h>
      21             : 
      22             : /* TCP flags bit 0 first. */
      23             : #define foreach_tcp_flag                                \
      24             :   _ (FIN) /**< No more data from sender. */             \
      25             :   _ (SYN) /**< Synchronize sequence numbers. */         \
      26             :   _ (RST) /**< Reset the connection. */                 \
      27             :   _ (PSH) /**< Push function. */                        \
      28             :   _ (ACK) /**< Ack field significant. */                \
      29             :   _ (URG) /**< Urgent pointer field significant. */     \
      30             :   _ (ECE) /**< ECN-echo. Receiver got CE packet */      \
      31             :   _ (CWR) /**< Sender reduced congestion window */
      32             : 
      33             : enum
      34             : {
      35             : #define _(f) TCP_FLAG_BIT_##f,
      36             :   foreach_tcp_flag
      37             : #undef _
      38             :     TCP_N_FLAG_BITS,
      39             : };
      40             : 
      41             : enum
      42             : {
      43             : #define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
      44             :   foreach_tcp_flag
      45             : #undef _
      46             : };
      47             : 
      48             : typedef struct _tcp_header
      49             : {
      50             :   union
      51             :   {
      52             :     struct
      53             :     {
      54             :       u16 src_port; /**< Source port. */
      55             :       u16 dst_port; /**< Destination port. */
      56             :     };
      57             :     struct
      58             :     {
      59             :       u16 src, dst;
      60             :     };
      61             :   };
      62             : 
      63             :   u32 seq_number;       /**< Sequence number of the first data octet in this
      64             :                          *   segment, except when SYN is present. If SYN
      65             :                          *   is present the seq number is is the ISN and the
      66             :                          *   first data octet is ISN+1 */
      67             :   u32 ack_number;       /**< Acknowledgement number if ACK is set. It contains
      68             :                          *   the value of the next sequence number the sender
      69             :                          *   of the segment is expecting to receive. */
      70             :   u8 data_offset_and_reserved;
      71             :   u8 flags;             /**< Flags: see the macro above */
      72             :   u16 window;           /**< Number of bytes sender is willing to receive. */
      73             : 
      74             :   u16 checksum;         /**< Checksum of TCP pseudo header and data. */
      75             :   u16 urgent_pointer;   /**< Seq number of the byte after the urgent data. */
      76             : } __attribute__ ((packed)) tcp_header_t;
      77             : 
      78             : /* Flag tests that return 0 or !0 */
      79             : #define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4)
      80             : #define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN)
      81             : #define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN)
      82             : #define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST)
      83             : #define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH)
      84             : #define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK)
      85             : #define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG)
      86             : #define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE)
      87             : #define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR)
      88             : 
      89             : /* Flag tests that return 0 or 1 */
      90             : #define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN)
      91             : #define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN)
      92             : 
      93             : always_inline int
      94    55861481 : tcp_header_bytes (tcp_header_t * t)
      95             : {
      96    55861481 :   return tcp_doff (t) * sizeof (u32);
      97             : }
      98             : 
      99             : /*
     100             :  * TCP options.
     101             :  */
     102             : 
     103             : typedef enum tcp_option_type
     104             : {
     105             :   TCP_OPTION_EOL = 0,                   /**< End of options. */
     106             :   TCP_OPTION_NOOP = 1,                  /**< No operation. */
     107             :   TCP_OPTION_MSS = 2,                   /**< Limit MSS. */
     108             :   TCP_OPTION_WINDOW_SCALE = 3,          /**< Window scale. */
     109             :   TCP_OPTION_SACK_PERMITTED = 4,        /**< Selective Ack permitted. */
     110             :   TCP_OPTION_SACK_BLOCK = 5,            /**< Selective Ack block. */
     111             :   TCP_OPTION_TIMESTAMP = 8,             /**< Timestamps. */
     112             :   TCP_OPTION_UTO = 28,                  /**< User timeout. */
     113             :   TCP_OPTION_AO = 29,                   /**< Authentication Option. */
     114             : } tcp_option_type_t;
     115             : 
     116             : #define foreach_tcp_options_flag                                        \
     117             :   _ (MSS)               /**< MSS advertised in SYN */                   \
     118             :   _ (TSTAMP)            /**< Timestamp capability advertised in SYN */  \
     119             :   _ (WSCALE)            /**< Wnd scale capability advertised in SYN */  \
     120             :   _ (SACK_PERMITTED)    /**< SACK capability advertised in SYN */       \
     121             :   _ (SACK)              /**< SACK present */
     122             : 
     123             : enum
     124             : {
     125             : #define _(f) TCP_OPTS_FLAG_BIT_##f,
     126             :   foreach_tcp_options_flag
     127             : #undef _
     128             :     TCP_OPTIONS_N_FLAG_BITS,
     129             : };
     130             : 
     131             : enum
     132             : {
     133             : #define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f,
     134             :   foreach_tcp_options_flag
     135             : #undef _
     136             : };
     137             : 
     138             : typedef struct _sack_block
     139             : {
     140             :   u32 start;            /**< Start sequence number */
     141             :   u32 end;              /**< End sequence number (first outside) */
     142             : } sack_block_t;
     143             : 
     144             : typedef struct
     145             : {
     146             :   sack_block_t *sacks;  /**< SACK blocks */
     147             :   u32 tsval;            /**< Timestamp value */
     148             :   u32 tsecr;            /**< Echoed/reflected time stamp */
     149             :   u16 mss;              /**< Maximum segment size advertised */
     150             :   u8 flags;             /**< Option flags, see above */
     151             :   u8 wscale;            /**< Window scale advertised */
     152             :   u8 n_sack_blocks;     /**< Number of SACKs blocks */
     153             : } tcp_options_t;
     154             : 
     155             : /* Flag tests that return 0 or !0 */
     156             : #define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS)
     157             : #define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP)
     158             : #define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE)
     159             : #define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK)
     160             : #define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED)
     161             : 
     162             : /* TCP option lengths */
     163             : #define TCP_OPTION_LEN_EOL              1
     164             : #define TCP_OPTION_LEN_NOOP             1
     165             : #define TCP_OPTION_LEN_MSS              4
     166             : #define TCP_OPTION_LEN_WINDOW_SCALE     3
     167             : #define TCP_OPTION_LEN_SACK_PERMITTED   2
     168             : #define TCP_OPTION_LEN_TIMESTAMP        10
     169             : #define TCP_OPTION_LEN_SACK_BLOCK        8
     170             : 
     171             : #define TCP_HDR_LEN_MAX                 60
     172             : #define TCP_WND_MAX                     65535U
     173             : #define TCP_MAX_WND_SCALE               14      /* See RFC 1323 */
     174             : #define TCP_OPTS_ALIGN                  4
     175             : #define TCP_OPTS_MAX_SACK_BLOCKS        3
     176             : #define TCP_MAX_GSO_SZ                  65536
     177             : 
     178             : /* Modulo arithmetic for TCP sequence numbers */
     179             : #define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0)
     180             : #define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0)
     181             : #define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0)
     182             : #define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0)
     183             : #define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2))
     184             : 
     185             : /* Modulo arithmetic for timestamps */
     186             : #define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
     187             : #define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
     188             : 
     189             : always_inline void
     190             : ip4_tcp_reply_x1 (ip4_header_t *ip0, tcp_header_t *tcp0)
     191             : {
     192             :   u32 src0, dst0;
     193             : 
     194             :   src0 = ip0->src_address.data_u32;
     195             :   dst0 = ip0->dst_address.data_u32;
     196             :   ip0->src_address.data_u32 = dst0;
     197             :   ip0->dst_address.data_u32 = src0;
     198             : 
     199             :   src0 = tcp0->src;
     200             :   dst0 = tcp0->dst;
     201             :   tcp0->src = dst0;
     202             :   tcp0->dst = src0;
     203             : }
     204             : 
     205             : always_inline void
     206             : ip4_tcp_reply_x2 (ip4_header_t *ip0, ip4_header_t *ip1, tcp_header_t *tcp0,
     207             :                   tcp_header_t *tcp1)
     208             : {
     209             :   u32 src0, dst0, src1, dst1;
     210             : 
     211             :   src0 = ip0->src_address.data_u32;
     212             :   src1 = ip1->src_address.data_u32;
     213             :   dst0 = ip0->dst_address.data_u32;
     214             :   dst1 = ip1->dst_address.data_u32;
     215             :   ip0->src_address.data_u32 = dst0;
     216             :   ip1->src_address.data_u32 = dst1;
     217             :   ip0->dst_address.data_u32 = src0;
     218             :   ip1->dst_address.data_u32 = src1;
     219             : 
     220             :   src0 = tcp0->src;
     221             :   src1 = tcp1->src;
     222             :   dst0 = tcp0->dst;
     223             :   dst1 = tcp1->dst;
     224             :   tcp0->src = dst0;
     225             :   tcp1->src = dst1;
     226             :   tcp0->dst = src0;
     227             :   tcp1->dst = src1;
     228             : }
     229             : 
     230             : always_inline void
     231             : ip6_tcp_reply_x1 (ip6_header_t *ip0, tcp_header_t *tcp0)
     232             : {
     233             :   {
     234             :     ip6_address_t src0, dst0;
     235             : 
     236             :     src0 = ip0->src_address;
     237             :     dst0 = ip0->dst_address;
     238             :     ip0->src_address = dst0;
     239             :     ip0->dst_address = src0;
     240             :   }
     241             : 
     242             :   {
     243             :     u16 src0, dst0;
     244             : 
     245             :     src0 = tcp0->src;
     246             :     dst0 = tcp0->dst;
     247             :     tcp0->src = dst0;
     248             :     tcp0->dst = src0;
     249             :   }
     250             : }
     251             : 
     252             : always_inline void
     253             : ip6_tcp_reply_x2 (ip6_header_t *ip0, ip6_header_t *ip1, tcp_header_t *tcp0,
     254             :                   tcp_header_t *tcp1)
     255             : {
     256             :   {
     257             :     ip6_address_t src0, dst0, src1, dst1;
     258             : 
     259             :     src0 = ip0->src_address;
     260             :     src1 = ip1->src_address;
     261             :     dst0 = ip0->dst_address;
     262             :     dst1 = ip1->dst_address;
     263             :     ip0->src_address = dst0;
     264             :     ip1->src_address = dst1;
     265             :     ip0->dst_address = src0;
     266             :     ip1->dst_address = src1;
     267             :   }
     268             : 
     269             :   {
     270             :     u16 src0, dst0, src1, dst1;
     271             : 
     272             :     src0 = tcp0->src;
     273             :     src1 = tcp1->src;
     274             :     dst0 = tcp0->dst;
     275             :     dst1 = tcp1->dst;
     276             :     tcp0->src = dst0;
     277             :     tcp1->src = dst1;
     278             :     tcp0->dst = src0;
     279             :     tcp1->dst = src1;
     280             :   }
     281             : }
     282             : 
     283             : /**
     284             :  * Parse TCP header options.
     285             :  *
     286             :  * @param th TCP header
     287             :  * @param to TCP options data structure to be populated
     288             :  * @param is_syn set if packet is syn
     289             :  * @return -1 if parsing failed
     290             :  */
     291             : always_inline int
     292     1038692 : tcp_options_parse (tcp_header_t * th, tcp_options_t * to, u8 is_syn)
     293             : {
     294             :   const u8 *data;
     295             :   u8 opt_len, opts_len, kind;
     296             :   int j;
     297             :   sack_block_t b;
     298             : 
     299     1038692 :   opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t);
     300     1038692 :   data = (const u8 *) (th + 1);
     301             : 
     302             :   /* Zero out all flags but those set in SYN */
     303     1038692 :   to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE
     304             :                 | TCP_OPTS_FLAG_TSTAMP | TCP_OPTS_FLAG_MSS);
     305             : 
     306     4103871 :   for (; opts_len > 0; opts_len -= opt_len, data += opt_len)
     307             :     {
     308     3067699 :       kind = data[0];
     309             : 
     310             :       /* Get options length */
     311     3067699 :       if (kind == TCP_OPTION_EOL)
     312        2520 :         break;
     313     3065179 :       else if (kind == TCP_OPTION_NOOP)
     314             :         {
     315     2041150 :           opt_len = 1;
     316     2041150 :           continue;
     317             :         }
     318             :       else
     319             :         {
     320             :           /* broken options */
     321     1024029 :           if (opts_len < 2)
     322           0 :             return -1;
     323     1024029 :           opt_len = data[1];
     324             : 
     325             :           /* weird option length */
     326     1024029 :           if (opt_len < 2 || opt_len > opts_len)
     327           0 :             return -1;
     328             :         }
     329             : 
     330             :       /* Parse options */
     331     1024029 :       switch (kind)
     332             :         {
     333        2793 :         case TCP_OPTION_MSS:
     334        2793 :           if (!is_syn)
     335           0 :             break;
     336        2793 :           if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th))
     337             :             {
     338        2793 :               to->flags |= TCP_OPTS_FLAG_MSS;
     339        2793 :               to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2));
     340             :             }
     341        2793 :           break;
     342         264 :         case TCP_OPTION_WINDOW_SCALE:
     343         264 :           if (!is_syn)
     344           0 :             break;
     345         264 :           if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th))
     346             :             {
     347         264 :               to->flags |= TCP_OPTS_FLAG_WSCALE;
     348         264 :               to->wscale = data[2];
     349         264 :               if (to->wscale > TCP_MAX_WND_SCALE)
     350           0 :                 to->wscale = TCP_MAX_WND_SCALE;
     351             :             }
     352         264 :           break;
     353     1020710 :         case TCP_OPTION_TIMESTAMP:
     354     1020710 :           if (is_syn)
     355         264 :             to->flags |= TCP_OPTS_FLAG_TSTAMP;
     356     1020710 :           if ((to->flags & TCP_OPTS_FLAG_TSTAMP)
     357     1020710 :               && opt_len == TCP_OPTION_LEN_TIMESTAMP)
     358             :             {
     359     1020710 :               to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2));
     360     1020710 :               to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6));
     361             :             }
     362     1020710 :           break;
     363         264 :         case TCP_OPTION_SACK_PERMITTED:
     364         264 :           if (!is_syn)
     365           0 :             break;
     366         264 :           if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th))
     367         264 :             to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
     368         264 :           break;
     369           0 :         case TCP_OPTION_SACK_BLOCK:
     370             :           /* If SACK permitted was not advertised or a SYN, break */
     371           0 :           if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th))
     372             :             break;
     373             : 
     374             :           /* If too short or not correctly formatted, break */
     375           0 :           if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK))
     376             :             break;
     377             : 
     378           0 :           to->flags |= TCP_OPTS_FLAG_SACK;
     379           0 :           to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK;
     380           0 :           vec_reset_length (to->sacks);
     381           0 :           for (j = 0; j < to->n_sack_blocks; j++)
     382             :             {
     383           0 :               b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 8 * j));
     384           0 :               b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 8 * j));
     385           0 :               vec_add1 (to->sacks, b);
     386             :             }
     387           0 :           break;
     388           0 :         default:
     389             :           /* Nothing to see here */
     390           0 :           continue;
     391             :         }
     392           0 :     }
     393     1038692 :   return 0;
     394             : }
     395             : 
     396             : /**
     397             :  * Write TCP options to segment.
     398             :  *
     399             :  * @param data  buffer where to write the options
     400             :  * @param opts  options to write
     401             :  * @return      length of options written
     402             :  */
     403             : always_inline u32
     404       95612 : tcp_options_write (u8 * data, tcp_options_t * opts)
     405             : {
     406       95612 :   u32 opts_len = 0;
     407       95612 :   u32 buf, seq_len = 4;
     408             : 
     409       95612 :   if (tcp_opts_mss (opts))
     410             :     {
     411         279 :       *data++ = TCP_OPTION_MSS;
     412         279 :       *data++ = TCP_OPTION_LEN_MSS;
     413         279 :       buf = clib_host_to_net_u16 (opts->mss);
     414         279 :       clib_memcpy_fast (data, &buf, sizeof (opts->mss));
     415         279 :       data += sizeof (opts->mss);
     416         279 :       opts_len += TCP_OPTION_LEN_MSS;
     417             :     }
     418             : 
     419       95612 :   if (tcp_opts_wscale (opts))
     420             :     {
     421         264 :       *data++ = TCP_OPTION_WINDOW_SCALE;
     422         264 :       *data++ = TCP_OPTION_LEN_WINDOW_SCALE;
     423         264 :       *data++ = opts->wscale;
     424         264 :       opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
     425             :     }
     426             : 
     427       95612 :   if (tcp_opts_sack_permitted (opts))
     428             :     {
     429         264 :       *data++ = TCP_OPTION_SACK_PERMITTED;
     430         264 :       *data++ = TCP_OPTION_LEN_SACK_PERMITTED;
     431         264 :       opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
     432             :     }
     433             : 
     434       95612 :   if (tcp_opts_tstamp (opts))
     435             :     {
     436       95597 :       *data++ = TCP_OPTION_TIMESTAMP;
     437       95597 :       *data++ = TCP_OPTION_LEN_TIMESTAMP;
     438       95597 :       buf = clib_host_to_net_u32 (opts->tsval);
     439       95597 :       clib_memcpy_fast (data, &buf, sizeof (opts->tsval));
     440       95597 :       data += sizeof (opts->tsval);
     441       95597 :       buf = clib_host_to_net_u32 (opts->tsecr);
     442       95597 :       clib_memcpy_fast (data, &buf, sizeof (opts->tsecr));
     443       95597 :       data += sizeof (opts->tsecr);
     444       95597 :       opts_len += TCP_OPTION_LEN_TIMESTAMP;
     445             :     }
     446             : 
     447       95612 :   if (tcp_opts_sack (opts))
     448             :     {
     449             :       int i;
     450             : 
     451           0 :       if (opts->n_sack_blocks != 0)
     452             :         {
     453           0 :           *data++ = TCP_OPTION_SACK_BLOCK;
     454           0 :           *data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
     455           0 :           for (i = 0; i < opts->n_sack_blocks; i++)
     456             :             {
     457           0 :               buf = clib_host_to_net_u32 (opts->sacks[i].start);
     458           0 :               clib_memcpy_fast (data, &buf, seq_len);
     459           0 :               data += seq_len;
     460           0 :               buf = clib_host_to_net_u32 (opts->sacks[i].end);
     461           0 :               clib_memcpy_fast (data, &buf, seq_len);
     462           0 :               data += seq_len;
     463             :             }
     464           0 :           opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
     465             :         }
     466             :     }
     467             : 
     468             :   /* Terminate TCP options by padding with NOPs to a u32 boundary. Avoid using
     469             :    * EOL because, it seems, it can break peers with broken option parsers that
     470             :    * rely on options ending on a u32 boundary.
     471             :    */
     472      286542 :   while (opts_len % 4)
     473             :     {
     474      190930 :       *data++ = TCP_OPTION_NOOP;
     475      190930 :       opts_len += TCP_OPTION_LEN_NOOP;
     476             :     }
     477       95612 :   return opts_len;
     478             : }
     479             : 
     480             : #endif /* included_tcp_packet_h */
     481             : 
     482             : /*
     483             :  * fd.io coding-style-patch-verification: ON
     484             :  *
     485             :  * Local Variables:
     486             :  * eval: (c-set-style "gnu")
     487             :  * End:
     488             :  */

Generated by: LCOV version 1.14