|           Line data    Source code 
       1             : /*
       2             :  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
       3             :  * Licensed under the Apache License, Version 2.0 (the "License");
       4             :  * you may not use this file except in compliance with the License.
       5             :  * You may obtain a copy of the License at:
       6             :  *
       7             :  *     http://www.apache.org/licenses/LICENSE-2.0
       8             :  *
       9             :  * Unless required by applicable law or agreed to in writing, software
      10             :  * distributed under the License is distributed on an "AS IS" BASIS,
      11             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             :  * See the License for the specific language governing permissions and
      13             :  * limitations under the License.
      14             :  */
      15             : 
      16             : #include <vnet/tcp/tcp.h>
      17             : #include <vnet/tcp/tcp_inlines.h>
      18             : #include <math.h>
      19             : #include <vnet/ip/ip4_inlines.h>
      20             : #include <vnet/ip/ip6_inlines.h>
      21             : 
      22             : typedef enum _tcp_output_next
      23             : {
      24             :   TCP_OUTPUT_NEXT_DROP,
      25             :   TCP_OUTPUT_NEXT_IP_LOOKUP,
      26             :   TCP_OUTPUT_NEXT_IP_REWRITE,
      27             :   TCP_OUTPUT_NEXT_IP_ARP,
      28             :   TCP_OUTPUT_N_NEXT
      29             : } tcp_output_next_t;
      30             : 
      31             : #define foreach_tcp4_output_next                \
      32             :   _ (DROP, "error-drop")                        \
      33             :   _ (IP_LOOKUP, "ip4-lookup")                 \
      34             :   _ (IP_REWRITE, "ip4-rewrite")                       \
      35             :   _ (IP_ARP, "ip4-arp")
      36             : 
      37             : #define foreach_tcp6_output_next                \
      38             :   _ (DROP, "error-drop")                        \
      39             :   _ (IP_LOOKUP, "ip6-lookup")                 \
      40             :   _ (IP_REWRITE, "ip6-rewrite")                       \
      41             :   _ (IP_ARP, "ip6-discover-neighbor")
      42             : 
      43             : static vlib_error_desc_t tcp_output_error_counters[] = {
      44             : #define tcp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
      45             : #include <vnet/tcp/tcp_error.def>
      46             : #undef tcp_error
      47             : };
      48             : 
      49             : typedef struct
      50             : {
      51             :   tcp_header_t tcp_header;
      52             :   tcp_connection_t tcp_connection;
      53             : } tcp_tx_trace_t;
      54             : 
      55             : static u8 *
      56           0 : format_tcp_tx_trace (u8 * s, va_list * args)
      57             : {
      58           0 :   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
      59           0 :   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
      60           0 :   tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
      61           0 :   tcp_connection_t *tc = &t->tcp_connection;
      62           0 :   u32 indent = format_get_indent (s);
      63             : 
      64           0 :   s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
      65           0 :               format_tcp_state, tc->state, format_white_space, indent,
      66             :               format_tcp_header, &t->tcp_header, 128);
      67             : 
      68           0 :   return s;
      69             : }
      70             : 
      71             : #ifndef CLIB_MARCH_VARIANT
      72             : static u8
      73         264 : tcp_window_compute_scale (u32 window)
      74             : {
      75         264 :   u8 wnd_scale = 0;
      76        2904 :   while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX)
      77        2640 :     wnd_scale++;
      78         264 :   return wnd_scale;
      79             : }
      80             : 
      81             : /**
      82             :  * TCP's initial window
      83             :  */
      84             : always_inline u32
      85         279 : tcp_initial_wnd_unscaled (tcp_connection_t * tc)
      86             : {
      87             :   /* RFC 6928 recommends the value lower. However at the time our connections
      88             :    * are initialized, fifos may not be allocated. Therefore, advertise the
      89             :    * smallest possible unscaled window size and update once fifos are
      90             :    * assigned to the session.
      91             :    */
      92             :   /*
      93             :      tcp_update_rcv_mss (tc);
      94             :      TCP_IW_N_SEGMENTS * tc->mss;
      95             :    */
      96         279 :   return tcp_cfg.min_rx_fifo;
      97             : }
      98             : 
      99             : /**
     100             :  * Compute initial window and scale factor. As per RFC1323, window field in
     101             :  * SYN and SYN-ACK segments is never scaled.
     102             :  */
     103             : u32
     104         279 : tcp_initial_window_to_advertise (tcp_connection_t * tc)
     105             : {
     106             :   /* Compute rcv wscale only if peer advertised support for it */
     107         279 :   if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts))
     108         264 :     tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo);
     109             : 
     110         279 :   tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
     111             : 
     112         279 :   return clib_min (tc->rcv_wnd, TCP_WND_MAX);
     113             : }
     114             : 
     115             : static inline void
     116      116667 : tcp_update_rcv_wnd (tcp_connection_t * tc)
     117             : {
     118             :   u32 available_space, wnd;
     119             :   i32 observed_wnd;
     120             : 
     121             :   /*
     122             :    * Figure out how much space we have available
     123             :    */
     124      116667 :   available_space = transport_max_rx_enqueue (&tc->connection);
     125             : 
     126             :   /*
     127             :    * Use the above and what we know about what we've previously advertised
     128             :    * to compute the new window
     129             :    */
     130      116667 :   observed_wnd = (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
     131             : 
     132             :   /* Check if we are about to retract the window. Do the comparison before
     133             :    * rounding to avoid errors. Per RFC7323 sec. 2.4 we could remove this */
     134      116667 :   if (PREDICT_FALSE ((i32) available_space < observed_wnd))
     135             :     {
     136           0 :       wnd = round_down_pow2 (clib_max (observed_wnd, 0), 1 << tc->rcv_wscale);
     137             :       TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
     138             :     }
     139             :   else
     140             :     {
     141             :       /* Make sure we have a multiple of 1 << rcv_wscale. We round down to
     142             :        * avoid advertising a window larger than what can be buffered */
     143      116667 :       wnd = round_down_pow2 (available_space, 1 << tc->rcv_wscale);
     144             :     }
     145             : 
     146      116667 :   if (PREDICT_FALSE (wnd < tc->rcv_opts.mss))
     147           0 :     wnd = 0;
     148             : 
     149      116667 :   tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
     150      116667 : }
     151             : 
     152             : /**
     153             :  * Compute and return window to advertise, scaled as per RFC1323
     154             :  */
     155             : static inline u32
     156       33064 : tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
     157             : {
     158       33064 :   if (state < TCP_STATE_ESTABLISHED)
     159           0 :     return tcp_initial_window_to_advertise (tc);
     160             : 
     161       33064 :   tcp_update_rcv_wnd (tc);
     162       33064 :   return tc->rcv_wnd >> tc->rcv_wscale;
     163             : }
     164             : 
     165             : static int
     166         132 : tcp_make_syn_options (tcp_connection_t * tc, tcp_options_t * opts)
     167             : {
     168         132 :   u8 len = 0;
     169             : 
     170         132 :   opts->flags |= TCP_OPTS_FLAG_MSS;
     171         132 :   opts->mss = tc->mss;
     172         132 :   len += TCP_OPTION_LEN_MSS;
     173             : 
     174         132 :   opts->flags |= TCP_OPTS_FLAG_WSCALE;
     175         132 :   opts->wscale = tc->rcv_wscale;
     176         132 :   len += TCP_OPTION_LEN_WINDOW_SCALE;
     177             : 
     178         132 :   opts->flags |= TCP_OPTS_FLAG_TSTAMP;
     179         132 :   opts->tsval = tcp_time_tstamp (tc->c_thread_index);
     180         132 :   opts->tsecr = 0;
     181         132 :   len += TCP_OPTION_LEN_TIMESTAMP;
     182             : 
     183             :   if (TCP_USE_SACKS)
     184             :     {
     185         132 :       opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
     186         132 :       len += TCP_OPTION_LEN_SACK_PERMITTED;
     187             :     }
     188             : 
     189             :   /* Align to needed boundary */
     190         132 :   len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
     191         132 :   return len;
     192             : }
     193             : 
     194             : static int
     195         147 : tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts)
     196             : {
     197         147 :   u8 len = 0;
     198             : 
     199         147 :   opts->flags |= TCP_OPTS_FLAG_MSS;
     200         147 :   opts->mss = tc->mss;
     201         147 :   len += TCP_OPTION_LEN_MSS;
     202             : 
     203         147 :   if (tcp_opts_wscale (&tc->rcv_opts))
     204             :     {
     205         132 :       opts->flags |= TCP_OPTS_FLAG_WSCALE;
     206         132 :       opts->wscale = tc->rcv_wscale;
     207         132 :       len += TCP_OPTION_LEN_WINDOW_SCALE;
     208             :     }
     209             : 
     210         147 :   if (tcp_opts_tstamp (&tc->rcv_opts))
     211             :     {
     212         132 :       opts->flags |= TCP_OPTS_FLAG_TSTAMP;
     213         132 :       opts->tsval = tcp_time_tstamp (tc->c_thread_index);
     214         132 :       opts->tsecr = tc->tsval_recent;
     215         132 :       len += TCP_OPTION_LEN_TIMESTAMP;
     216             :     }
     217             : 
     218         147 :   if (tcp_opts_sack_permitted (&tc->rcv_opts))
     219             :     {
     220         132 :       opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
     221         132 :       len += TCP_OPTION_LEN_SACK_PERMITTED;
     222             :     }
     223             : 
     224             :   /* Align to needed boundary */
     225         147 :   len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
     226         147 :   return len;
     227             : }
     228             : 
     229             : static int
     230      116673 : tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts)
     231             : {
     232      116673 :   u8 len = 0;
     233             : 
     234      116673 :   opts->flags = 0;
     235             : 
     236      116673 :   if (tcp_opts_tstamp (&tc->rcv_opts))
     237             :     {
     238      116673 :       opts->flags |= TCP_OPTS_FLAG_TSTAMP;
     239      116673 :       opts->tsval = tcp_tstamp (tc);
     240      116673 :       opts->tsecr = tc->tsval_recent;
     241      116673 :       len += TCP_OPTION_LEN_TIMESTAMP;
     242             :     }
     243      116673 :   if (tcp_opts_sack_permitted (&tc->rcv_opts))
     244             :     {
     245      116673 :       if (vec_len (tc->snd_sacks))
     246             :         {
     247           0 :           opts->flags |= TCP_OPTS_FLAG_SACK;
     248           0 :           if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
     249           0 :             tc->snd_sack_pos = 0;
     250           0 :           opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
     251           0 :           opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
     252           0 :           opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
     253             :                                           TCP_OPTS_MAX_SACK_BLOCKS);
     254           0 :           tc->snd_sack_pos += opts->n_sack_blocks;
     255           0 :           len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
     256             :         }
     257             :     }
     258             : 
     259             :   /* Align to needed boundary */
     260      116673 :   len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
     261      116673 :   return len;
     262             : }
     263             : 
     264             : always_inline int
     265       83609 : tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
     266             :                   tcp_state_t state)
     267             : {
     268       83609 :   switch (state)
     269             :     {
     270       83609 :     case TCP_STATE_ESTABLISHED:
     271             :     case TCP_STATE_CLOSE_WAIT:
     272             :     case TCP_STATE_FIN_WAIT_1:
     273             :     case TCP_STATE_LAST_ACK:
     274             :     case TCP_STATE_CLOSING:
     275             :     case TCP_STATE_FIN_WAIT_2:
     276             :     case TCP_STATE_TIME_WAIT:
     277             :     case TCP_STATE_CLOSED:
     278       83609 :       return tcp_make_established_options (tc, opts);
     279           0 :     case TCP_STATE_SYN_RCVD:
     280           0 :       return tcp_make_synack_options (tc, opts);
     281           0 :     case TCP_STATE_SYN_SENT:
     282           0 :       return tcp_make_syn_options (tc, opts);
     283           0 :     default:
     284           0 :       clib_warning ("State not handled! %d", state);
     285           0 :       return 0;
     286             :     }
     287             : }
     288             : 
     289             : /**
     290             :  * Update burst send vars
     291             :  *
     292             :  * - Updates snd_mss to reflect the effective segment size that we can send
     293             :  * by taking into account all TCP options, including SACKs.
     294             :  * - Cache 'on the wire' options for reuse
     295             :  * - Updates receive window which can be reused for a burst.
     296             :  *
     297             :  * This should *only* be called when doing bursts
     298             :  */
     299             : void
     300       83603 : tcp_update_burst_snd_vars (tcp_connection_t * tc)
     301             : {
     302       83603 :   tcp_main_t *tm = &tcp_main;
     303             : 
     304             :   /* Compute options to be used for connection. These may be reused when
     305             :    * sending data or to compute the effective mss (snd_mss) */
     306       83603 :   tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts,
     307             :                                        TCP_STATE_ESTABLISHED);
     308             : 
     309             :   /* XXX check if MTU has been updated */
     310       83603 :   tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
     311       83603 :   ASSERT (tc->snd_mss > 0);
     312             : 
     313       83603 :   tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
     314             :                      &tc->snd_opts);
     315             : 
     316       83603 :   tcp_update_rcv_wnd (tc);
     317             : 
     318       83603 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
     319           0 :     tcp_bt_check_app_limited (tc);
     320             : 
     321       83603 :   if (tc->snd_una == tc->snd_nxt)
     322             :     {
     323       41927 :       tcp_cc_event (tc, TCP_CC_EVT_START_TX);
     324             :     }
     325             : 
     326       83603 :   if (tc->flags & TCP_CONN_PSH_PENDING)
     327             :     {
     328       24620 :       u32 max_deq = transport_max_tx_dequeue (&tc->connection);
     329             :       /* Last byte marked for push */
     330       24620 :       tc->psh_seq = tc->snd_una + max_deq - 1;
     331             :     }
     332       83603 : }
     333             : 
     334             : static void *
     335       33349 : tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
     336             : {
     337       33349 :   ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
     338       33349 :   b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
     339       33349 :   b->total_length_not_including_first_buffer = 0;
     340       33349 :   b->current_data = 0;
     341       33349 :   vnet_buffer (b)->tcp.flags = 0;
     342             :   /* Leave enough space for headers */
     343       33349 :   return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
     344             : }
     345             : 
     346             : /* Compute TCP checksum in software when offloading is disabled for a connection */
     347             : u16
     348           0 : ip6_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
     349             :                                  ip46_address_t * src, ip46_address_t * dst)
     350             : {
     351             :   ip_csum_t sum0;
     352             :   u16 payload_length_host_byte_order;
     353             :   u32 i;
     354             : 
     355             :   /* Initialize checksum with ip header. */
     356           0 :   sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
     357           0 :     clib_host_to_net_u16 (IP_PROTOCOL_TCP);
     358           0 :   payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
     359             : 
     360           0 :   for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
     361             :     {
     362           0 :       sum0 = ip_csum_with_carry
     363           0 :         (sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
     364           0 :       sum0 = ip_csum_with_carry
     365           0 :         (sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
     366             :     }
     367             : 
     368           0 :   return ip_calculate_l4_checksum (vm, p0, sum0,
     369             :                                    payload_length_host_byte_order, NULL, 0,
     370             :                                    NULL);
     371             : }
     372             : 
     373             : u16
     374           0 : ip4_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
     375             :                                  ip46_address_t * src, ip46_address_t * dst)
     376             : {
     377             :   ip_csum_t sum0;
     378             :   u32 payload_length_host_byte_order;
     379             : 
     380           0 :   payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
     381           0 :   sum0 =
     382           0 :     clib_host_to_net_u32 (payload_length_host_byte_order +
     383             :                           (IP_PROTOCOL_TCP << 16));
     384             : 
     385           0 :   sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
     386           0 :   sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
     387             : 
     388           0 :   return ip_calculate_l4_checksum (vm, p0, sum0,
     389             :                                    payload_length_host_byte_order, NULL, 0,
     390             :                                    NULL);
     391             : }
     392             : 
     393             : static inline u16
     394     1062440 : tcp_compute_checksum (tcp_connection_t * tc, vlib_buffer_t * b)
     395             : {
     396     1062440 :   u16 checksum = 0;
     397     1062440 :   if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
     398             :     {
     399           0 :       tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     400           0 :       vlib_main_t *vm = wrk->vm;
     401             : 
     402           0 :       if (tc->c_is_ip4)
     403           0 :         checksum = ip4_tcp_compute_checksum_custom
     404             :           (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
     405             :       else
     406           0 :         checksum = ip6_tcp_compute_checksum_custom
     407             :           (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
     408             :     }
     409             :   else
     410             :     {
     411     1062440 :       vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
     412             :     }
     413     1062440 :   return checksum;
     414             : }
     415             : 
     416             : /**
     417             :  * Prepare ACK
     418             :  */
     419             : static inline void
     420       33064 : tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
     421             :                 u8 flags)
     422             : {
     423       33064 :   tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
     424             :   u8 tcp_opts_len, tcp_hdr_opts_len;
     425             :   tcp_header_t *th;
     426             :   u16 wnd;
     427             : 
     428       33064 :   wnd = tcp_window_to_advertise (tc, state);
     429             : 
     430             :   /* Make and write options */
     431       33064 :   tcp_opts_len = tcp_make_established_options (tc, snd_opts);
     432       33064 :   tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
     433             : 
     434       33064 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
     435             :                              tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
     436             : 
     437       33064 :   tcp_options_write ((u8 *) (th + 1), snd_opts);
     438             : 
     439       33064 :   th->checksum = tcp_compute_checksum (tc, b);
     440             : 
     441       33064 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     442             : 
     443       33064 :   if (wnd == 0)
     444             :     {
     445           0 :       transport_rx_fifo_req_deq_ntf (&tc->connection);
     446           0 :       tcp_zero_rwnd_sent_on (tc);
     447             :     }
     448             :   else
     449       33064 :     tcp_zero_rwnd_sent_off (tc);
     450       33064 : }
     451             : 
     452             : /**
     453             :  * Convert buffer to ACK
     454             :  */
     455             : static inline void
     456       32808 : tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b)
     457             : {
     458       32808 :   tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
     459             :   TCP_EVT (TCP_EVT_ACK_SENT, tc);
     460       32808 :   tc->rcv_las = tc->rcv_nxt;
     461       32808 : }
     462             : 
     463             : /**
     464             :  * Convert buffer to FIN-ACK
     465             :  */
     466             : static void
     467         256 : tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b)
     468             : {
     469         256 :   tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK);
     470         256 : }
     471             : 
     472             : /**
     473             :  * Convert buffer to SYN
     474             :  */
     475             : void
     476         132 : tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b)
     477             : {
     478             :   u8 tcp_hdr_opts_len, tcp_opts_len;
     479             :   tcp_header_t *th;
     480             :   u16 initial_wnd;
     481             :   tcp_options_t snd_opts;
     482             : 
     483         132 :   initial_wnd = tcp_initial_window_to_advertise (tc);
     484             : 
     485             :   /* Make and write options */
     486         132 :   clib_memset (&snd_opts, 0, sizeof (snd_opts));
     487         132 :   tcp_opts_len = tcp_make_syn_options (tc, &snd_opts);
     488         132 :   tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
     489             : 
     490         132 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
     491             :                              tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
     492             :                              initial_wnd);
     493         132 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     494         132 :   tcp_options_write ((u8 *) (th + 1), &snd_opts);
     495         132 :   th->checksum = tcp_compute_checksum (tc, b);
     496         132 : }
     497             : 
     498             : /**
     499             :  * Convert buffer to SYN-ACK
     500             :  */
     501             : static void
     502         147 : tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
     503             : {
     504         147 :   tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
     505             :   u8 tcp_opts_len, tcp_hdr_opts_len;
     506             :   tcp_header_t *th;
     507             :   u16 initial_wnd;
     508             : 
     509         147 :   clib_memset (snd_opts, 0, sizeof (*snd_opts));
     510         147 :   initial_wnd = tcp_initial_window_to_advertise (tc);
     511         147 :   tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
     512         147 :   tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
     513             : 
     514         147 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
     515             :                              tc->rcv_nxt, tcp_hdr_opts_len,
     516             :                              TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
     517         147 :   tcp_options_write ((u8 *) (th + 1), snd_opts);
     518             : 
     519         147 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     520         147 :   th->checksum = tcp_compute_checksum (tc, b);
     521         147 : }
     522             : 
     523             : static void
     524         132 : tcp_enqueue_half_open (tcp_worker_ctx_t *wrk, tcp_connection_t *tc,
     525             :                        vlib_buffer_t *b, u32 bi)
     526             : {
     527         132 :   vlib_main_t *vm = wrk->vm;
     528             : 
     529         132 :   b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
     530         132 :   b->error = 0;
     531             : 
     532         132 :   session_add_pending_tx_buffer (vm->thread_index, bi,
     533         132 :                                  wrk->tco_next_node[!tc->c_is_ip4]);
     534             : 
     535         132 :   if (vm->thread_index == 0 && vlib_num_workers ())
     536           0 :     session_queue_run_on_main_thread (vm);
     537         132 : }
     538             : 
     539             : static void
     540       33217 : tcp_enqueue_to_output (tcp_worker_ctx_t * wrk, vlib_buffer_t * b, u32 bi,
     541             :                        u8 is_ip4)
     542             : {
     543       33217 :   b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
     544       33217 :   b->error = 0;
     545             : 
     546       33217 :   session_add_pending_tx_buffer (wrk->vm->thread_index, bi,
     547       33217 :                                  wrk->tco_next_node[!is_ip4]);
     548       33217 : }
     549             : 
     550             : int
     551          86 : tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
     552             : {
     553          86 :   ip4_address_t src_ip4 = {}, dst_ip4 = {};
     554             :   ip6_address_t src_ip6, dst_ip6;
     555             :   u16 src_port, dst_port;
     556             :   u32 tmp, len, seq, ack;
     557             :   ip4_header_t *ih4;
     558             :   ip6_header_t *ih6;
     559             :   tcp_header_t *th;
     560             :   u8 flags;
     561             : 
     562             :   /*
     563             :    * Find IP and TCP headers and glean information from them. Assumes
     564             :    * buffer was parsed by something like @ref tcp_input_lookup_buffer
     565             :    */
     566          86 :   th = tcp_buffer_hdr (b);
     567             : 
     568          86 :   if (is_ip4)
     569             :     {
     570          86 :       ih4 = vlib_buffer_get_current (b);
     571          86 :       ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
     572          86 :       src_ip4.as_u32 = ih4->src_address.as_u32;
     573          86 :       dst_ip4.as_u32 = ih4->dst_address.as_u32;
     574             :     }
     575             :   else
     576             :     {
     577           0 :       ih6 = vlib_buffer_get_current (b);
     578           0 :       ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
     579           0 :       clib_memcpy_fast (&src_ip6, &ih6->src_address, sizeof (ip6_address_t));
     580           0 :       clib_memcpy_fast (&dst_ip6, &ih6->dst_address, sizeof (ip6_address_t));
     581             :     }
     582             : 
     583          86 :   src_port = th->src_port;
     584          86 :   dst_port = th->dst_port;
     585          86 :   flags = TCP_FLAG_RST;
     586             : 
     587             :   /*
     588             :    * RFC 793. If the ACK bit is off, sequence number zero is used,
     589             :    *   <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
     590             :    * If the ACK bit is on,
     591             :    *   <SEQ=SEG.ACK><CTL=RST>
     592             :    */
     593          86 :   if (tcp_ack (th))
     594             :     {
     595          86 :       seq = th->ack_number;
     596          86 :       ack = 0;
     597             :     }
     598             :   else
     599             :     {
     600           0 :       flags |= TCP_FLAG_ACK;
     601           0 :       tmp = clib_net_to_host_u32 (th->seq_number);
     602           0 :       len = vnet_buffer (b)->tcp.data_len + tcp_is_syn (th) + tcp_is_fin (th);
     603           0 :       ack = clib_host_to_net_u32 (tmp + len);
     604           0 :       seq = 0;
     605             :     }
     606             : 
     607             :   /*
     608             :    * Clear and reuse current buffer for reset
     609             :    */
     610          86 :   if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
     611           0 :     vlib_buffer_free_one (vm, b->next_buffer);
     612             : 
     613             :   /* Zero all flags but free list index and trace flag */
     614          86 :   b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
     615             :   /* Make sure new tcp header comes after current ip */
     616          86 :   b->current_data = ((u8 *) th - b->data) + sizeof (tcp_header_t);
     617          86 :   b->current_length = 0;
     618          86 :   b->total_length_not_including_first_buffer = 0;
     619          86 :   vnet_buffer (b)->tcp.flags = 0;
     620             : 
     621             :   /*
     622             :    * Add TCP and IP headers
     623             :    */
     624          86 :   th = vlib_buffer_push_tcp_net_order (b, dst_port, src_port, seq, ack,
     625             :                                        sizeof (tcp_header_t), flags, 0);
     626             : 
     627          86 :   if (is_ip4)
     628             :     {
     629          86 :       ih4 = vlib_buffer_push_ip4 (vm, b, &dst_ip4, &src_ip4,
     630             :                                   IP_PROTOCOL_TCP, 1);
     631          86 :       th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
     632             :     }
     633             :   else
     634             :     {
     635           0 :       int bogus = ~0;
     636           0 :       ih6 = vlib_buffer_push_ip6 (vm, b, &dst_ip6, &src_ip6, IP_PROTOCOL_TCP);
     637           0 :       th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
     638           0 :       ASSERT (!bogus);
     639             :     }
     640             : 
     641          86 :   return 0;
     642             : }
     643             : 
     644             : /**
     645             :  *  Send reset without reusing existing buffer
     646             :  *
     647             :  *  It extracts connection info out of original packet
     648             :  */
     649             : void
     650           0 : tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
     651             :                       u32 thread_index, u8 is_ip4)
     652             : {
     653           0 :   tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
     654           0 :   vlib_main_t *vm = wrk->vm;
     655             :   vlib_buffer_t *b;
     656           0 :   u8 tcp_hdr_len, flags = 0;
     657             :   tcp_header_t *th, *pkt_th;
     658             :   u32 seq, ack, bi;
     659             :   ip4_header_t *ih4, *pkt_ih4;
     660             :   ip6_header_t *ih6, *pkt_ih6;
     661             : 
     662           0 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     663             :     {
     664           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     665           0 :       return;
     666             :     }
     667             : 
     668           0 :   b = vlib_get_buffer (vm, bi);
     669           0 :   tcp_init_buffer (vm, b);
     670           0 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     671             : 
     672             :   /* Make and write options */
     673           0 :   tcp_hdr_len = sizeof (tcp_header_t);
     674             : 
     675           0 :   if (is_ip4)
     676             :     {
     677           0 :       pkt_ih4 = vlib_buffer_get_current (pkt);
     678           0 :       pkt_th = ip4_next_header (pkt_ih4);
     679             :     }
     680             :   else
     681             :     {
     682           0 :       pkt_ih6 = vlib_buffer_get_current (pkt);
     683           0 :       pkt_th = ip6_next_header (pkt_ih6);
     684             :     }
     685             : 
     686           0 :   if (tcp_ack (pkt_th))
     687             :     {
     688           0 :       flags = TCP_FLAG_RST;
     689           0 :       seq = pkt_th->ack_number;
     690           0 :       ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
     691           0 :       ack = clib_host_to_net_u32 (ack);
     692             :     }
     693             :   else
     694             :     {
     695           0 :       flags = TCP_FLAG_RST | TCP_FLAG_ACK;
     696           0 :       seq = 0;
     697           0 :       ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
     698             :     }
     699             : 
     700           0 :   th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
     701             :                                        seq, ack, tcp_hdr_len, flags, 0);
     702             : 
     703             :   /* Swap src and dst ip */
     704           0 :   if (is_ip4)
     705             :     {
     706           0 :       ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
     707           0 :       ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
     708             :                                   &pkt_ih4->src_address, IP_PROTOCOL_TCP,
     709           0 :                                   tcp_csum_offload (tc));
     710           0 :       th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
     711             :     }
     712             :   else
     713             :     {
     714           0 :       int bogus = ~0;
     715           0 :       ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
     716             :               0x60);
     717           0 :       ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
     718             :                                          &pkt_ih6->src_address,
     719             :                                          IP_PROTOCOL_TCP,
     720             :                                          tc->ipv6_flow_label);
     721           0 :       th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
     722           0 :       ASSERT (!bogus);
     723             :     }
     724             : 
     725           0 :   tcp_enqueue_half_open (wrk, tc, b, bi);
     726             :   TCP_EVT (TCP_EVT_RST_SENT, tc);
     727           0 :   vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
     728             :                                TCP_ERROR_RST_SENT, 1);
     729             : }
     730             : 
     731             : /**
     732             :  * Build and set reset packet for connection
     733             :  */
     734             : void
     735           6 : tcp_send_reset (tcp_connection_t * tc)
     736             : {
     737           6 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     738           6 :   vlib_main_t *vm = wrk->vm;
     739             :   vlib_buffer_t *b;
     740             :   u32 bi;
     741             :   tcp_header_t *th;
     742             :   u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
     743             :   u8 flags;
     744             : 
     745           6 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     746             :     {
     747           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     748           0 :       return;
     749             :     }
     750           6 :   b = vlib_get_buffer (vm, bi);
     751           6 :   tcp_init_buffer (vm, b);
     752             : 
     753           6 :   tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
     754           6 :   tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
     755           6 :   advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
     756           6 :   flags = TCP_FLAG_RST | TCP_FLAG_ACK;
     757           6 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
     758             :                              tc->rcv_nxt, tcp_hdr_opts_len, flags,
     759             :                              advertise_wnd);
     760           6 :   opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
     761           6 :   th->checksum = tcp_compute_checksum (tc, b);
     762           6 :   ASSERT (opts_write_len == tc->snd_opts_len);
     763           6 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     764           6 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
     765             :   TCP_EVT (TCP_EVT_RST_SENT, tc);
     766           6 :   vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
     767             :                                TCP_ERROR_RST_SENT, 1);
     768             : }
     769             : 
     770             : /**
     771             :  *  Send SYN
     772             :  *
     773             :  *  Builds a SYN packet for a half-open connection and sends it to tcp-output.
     774             :  *  The packet is handled by main thread and because half-open and established
     775             :  *  connections use the same pool the connection can be retrieved without
     776             :  *  additional logic.
     777             :  */
     778             : void
     779         132 : tcp_send_syn (tcp_connection_t * tc)
     780             : {
     781         132 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     782         132 :   vlib_main_t *vm = wrk->vm;
     783             :   vlib_buffer_t *b;
     784             :   u32 bi;
     785             : 
     786             :   /*
     787             :    * Setup retransmit and establish timers before requesting buffer
     788             :    * such that we can return if we've ran out.
     789             :    */
     790         132 :   tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
     791         132 :                     (u32) tc->rto * TCP_TO_TIMER_TICK);
     792             : 
     793         132 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     794             :     {
     795           0 :       tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
     796             :                         tcp_cfg.alloc_err_timeout);
     797           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     798           0 :       return;
     799             :     }
     800             : 
     801         132 :   b = vlib_get_buffer (vm, bi);
     802         132 :   tcp_init_buffer (vm, b);
     803         132 :   tcp_make_syn (tc, b);
     804             : 
     805             :   /* Measure RTT with this */
     806         132 :   tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0);
     807         132 :   tc->rtt_seq = tc->snd_nxt;
     808         132 :   tc->rto_boff = 0;
     809             : 
     810         132 :   tcp_enqueue_half_open (wrk, tc, b, bi);
     811             :   TCP_EVT (TCP_EVT_SYN_SENT, tc);
     812             : }
     813             : 
     814             : void
     815         135 : tcp_send_synack (tcp_connection_t * tc)
     816             : {
     817         135 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     818         135 :   vlib_main_t *vm = wrk->vm;
     819             :   vlib_buffer_t *b;
     820             :   u32 bi;
     821             : 
     822         135 :   ASSERT (tc->snd_una != tc->snd_nxt);
     823         135 :   tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
     824             : 
     825         135 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     826             :     {
     827           0 :       tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
     828             :                         tcp_cfg.alloc_err_timeout);
     829           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     830           0 :       return;
     831             :     }
     832             : 
     833         135 :   tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
     834         135 :   b = vlib_get_buffer (vm, bi);
     835         135 :   tcp_init_buffer (vm, b);
     836         135 :   tcp_make_synack (tc, b);
     837         135 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
     838             :   TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
     839             : }
     840             : 
     841             : /**
     842             :  *  Send FIN
     843             :  */
     844             : void
     845         256 : tcp_send_fin (tcp_connection_t * tc)
     846             : {
     847         256 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     848         256 :   vlib_main_t *vm = wrk->vm;
     849             :   vlib_buffer_t *b;
     850             :   u32 bi;
     851         256 :   u8 fin_snt = 0;
     852             : 
     853         256 :   fin_snt = tc->flags & TCP_CONN_FINSNT;
     854         256 :   if (fin_snt)
     855           0 :     tc->snd_nxt -= 1;
     856             : 
     857         256 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     858             :     {
     859             :       /* Out of buffers so program fin retransmit ASAP */
     860           0 :       tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
     861             :                         tcp_cfg.alloc_err_timeout);
     862           0 :       if (fin_snt)
     863           0 :         tc->snd_nxt += 1;
     864             :       else
     865             :         /* Make sure retransmit retries a fin not data */
     866           0 :         tc->flags |= TCP_CONN_FINSNT;
     867           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     868           0 :       return;
     869             :     }
     870             : 
     871             :   /* If we have non-dupacks programmed, no need to send them */
     872         256 :   if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
     873           3 :     tc->flags &= ~TCP_CONN_SNDACK;
     874             : 
     875         256 :   b = vlib_get_buffer (vm, bi);
     876         256 :   tcp_init_buffer (vm, b);
     877         256 :   tcp_make_fin (tc, b);
     878         256 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
     879             :   TCP_EVT (TCP_EVT_FIN_SENT, tc);
     880             :   /* Account for the FIN */
     881         256 :   tc->snd_nxt += 1;
     882         256 :   tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
     883         256 :   if (!fin_snt)
     884             :     {
     885         256 :       tc->flags |= TCP_CONN_FINSNT;
     886         256 :       tc->flags &= ~TCP_CONN_FINPNDG;
     887             :     }
     888             : }
     889             : 
     890             : /**
     891             :  * Push TCP header and update connection variables. Should only be called
     892             :  * for segments with data, not for 'control' packets.
     893             :  */
     894             : always_inline void
     895     1029090 : tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt,
     896             :                 u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
     897             : {
     898     1029090 :   u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
     899             :   u32 advertise_wnd, data_len;
     900     1029090 :   tcp_main_t *tm = &tcp_main;
     901             :   tcp_header_t *th;
     902             : 
     903     1029090 :   data_len = b->current_length;
     904     1029090 :   if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
     905           0 :     data_len += b->total_length_not_including_first_buffer;
     906             : 
     907     1029090 :   vnet_buffer (b)->tcp.flags = 0;
     908     1029090 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     909             : 
     910     1029090 :   if (compute_opts)
     911           0 :     tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
     912             : 
     913     1029090 :   tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
     914             : 
     915     1029090 :   if (maybe_burst)
     916     1029090 :     advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
     917             :   else
     918           0 :     advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
     919             : 
     920     1029090 :   if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
     921             :     {
     922      950988 :       if (seq_geq (tc->psh_seq, snd_nxt)
     923      950988 :           && seq_lt (tc->psh_seq, snd_nxt + data_len))
     924         321 :         flags |= TCP_FLAG_PSH;
     925             :     }
     926     1029090 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, snd_nxt,
     927             :                              tc->rcv_nxt, tcp_hdr_opts_len, flags,
     928             :                              advertise_wnd);
     929             : 
     930     1029090 :   if (maybe_burst)
     931             :     {
     932     1029090 :       clib_memcpy_fast ((u8 *) (th + 1),
     933     1029090 :                         tm->wrk_ctx[tc->c_thread_index].cached_opts,
     934     1029090 :                         tc->snd_opts_len);
     935             :     }
     936             :   else
     937             :     {
     938           0 :       u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
     939           0 :       ASSERT (len == tc->snd_opts_len);
     940             :     }
     941             : 
     942             :   /*
     943             :    * Update connection variables
     944             :    */
     945             : 
     946     1029090 :   if (update_snd_nxt)
     947     1029090 :     tc->snd_nxt += data_len;
     948     1029090 :   tc->rcv_las = tc->rcv_nxt;
     949             : 
     950     1029090 :   tc->bytes_out += data_len;
     951     1029090 :   tc->data_segs_out += 1;
     952             : 
     953     1029090 :   th->checksum = tcp_compute_checksum (tc, b);
     954             : 
     955             :   TCP_EVT (TCP_EVT_PKTIZE, tc);
     956     1029090 : }
     957             : 
     958             : always_inline u32
     959           0 : tcp_buffer_len (vlib_buffer_t * b)
     960             : {
     961           0 :   u32 data_len = b->current_length;
     962           0 :   if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
     963           0 :     data_len += b->total_length_not_including_first_buffer;
     964           0 :   return data_len;
     965             : }
     966             : 
     967             : always_inline u32
     968     1029090 : tcp_push_one_header (tcp_connection_t *tc, vlib_buffer_t *b)
     969             : {
     970     1029090 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
     971           0 :     tcp_bt_track_tx (tc, tcp_buffer_len (b));
     972             : 
     973     1029090 :   tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
     974             :                   /* update_snd_nxt */ 1);
     975             : 
     976     1029090 :   tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
     977     1029090 :   return 0;
     978             : }
     979             : 
     980             : u32
     981       42020 : tcp_session_push_header (transport_connection_t *tconn, vlib_buffer_t **bs,
     982             :                          u32 n_bufs)
     983             : {
     984       42020 :   tcp_connection_t *tc = (tcp_connection_t *) tconn;
     985             : 
     986      511100 :   while (n_bufs >= 4)
     987             :     {
     988      469080 :       vlib_prefetch_buffer_header (bs[2], STORE);
     989      469080 :       vlib_prefetch_buffer_header (bs[3], STORE);
     990             : 
     991      469080 :       tcp_push_one_header (tc, bs[0]);
     992      469080 :       tcp_push_one_header (tc, bs[1]);
     993             : 
     994      469080 :       n_bufs -= 2;
     995      469080 :       bs += 2;
     996             :     }
     997      132952 :   while (n_bufs)
     998             :     {
     999       90932 :       if (n_bufs > 1)
    1000       48912 :         vlib_prefetch_buffer_header (bs[1], STORE);
    1001             : 
    1002       90932 :       tcp_push_one_header (tc, bs[0]);
    1003             : 
    1004       90932 :       n_bufs -= 1;
    1005       90932 :       bs += 1;
    1006             :     }
    1007             : 
    1008             :   /* If not tracking an ACK, start tracking */
    1009       42020 :   if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
    1010             :     {
    1011       28751 :       tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
    1012       28751 :       tc->rtt_seq = tc->snd_nxt;
    1013             :     }
    1014       42020 :   if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
    1015             :     {
    1016       16347 :       tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1017       16347 :       tcp_retransmit_timer_set (&wrk->timer_wheel, tc);
    1018       16347 :       tc->rto_boff = 0;
    1019             :     }
    1020       42020 :   return 0;
    1021             : }
    1022             : 
    1023             : void
    1024       32808 : tcp_send_ack (tcp_connection_t * tc)
    1025             : {
    1026       32808 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1027       32808 :   vlib_main_t *vm = wrk->vm;
    1028             :   vlib_buffer_t *b;
    1029             :   u32 bi;
    1030             : 
    1031       32808 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1032             :     {
    1033           0 :       tcp_update_rcv_wnd (tc);
    1034           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
    1035           0 :       return;
    1036             :     }
    1037       32808 :   b = vlib_get_buffer (vm, bi);
    1038       32808 :   tcp_init_buffer (vm, b);
    1039       32808 :   tcp_make_ack (tc, b);
    1040       32808 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1041             : }
    1042             : 
    1043             : void
    1044     1029260 : tcp_program_ack (tcp_connection_t * tc)
    1045             : {
    1046     1029260 :   if (!(tc->flags & TCP_CONN_SNDACK))
    1047             :     {
    1048       41829 :       session_add_self_custom_tx_evt (&tc->connection, 1);
    1049       41829 :       tc->flags |= TCP_CONN_SNDACK;
    1050             :     }
    1051     1029260 : }
    1052             : 
    1053             : void
    1054           0 : tcp_program_dupack (tcp_connection_t * tc)
    1055             : {
    1056           0 :   if (!(tc->flags & TCP_CONN_SNDACK))
    1057             :     {
    1058           0 :       session_add_self_custom_tx_evt (&tc->connection, 1);
    1059           0 :       tc->flags |= TCP_CONN_SNDACK;
    1060             :     }
    1061           0 :   if (tc->pending_dupacks < 255)
    1062           0 :     tc->pending_dupacks += 1;
    1063           0 : }
    1064             : 
    1065             : void
    1066           0 : tcp_program_retransmit (tcp_connection_t * tc)
    1067             : {
    1068           0 :   if (!(tc->flags & TCP_CONN_RXT_PENDING))
    1069             :     {
    1070           0 :       session_add_self_custom_tx_evt (&tc->connection, 0);
    1071           0 :       tc->flags |= TCP_CONN_RXT_PENDING;
    1072             :     }
    1073           0 : }
    1074             : 
    1075             : /**
    1076             :  * Send window update ack
    1077             :  *
    1078             :  * Ensures that it will be sent only once, after a zero rwnd has been
    1079             :  * advertised in a previous ack, and only if rwnd has grown beyond a
    1080             :  * configurable value.
    1081             :  */
    1082             : void
    1083           0 : tcp_send_window_update_ack (tcp_connection_t * tc)
    1084             : {
    1085           0 :   if (tcp_zero_rwnd_sent (tc))
    1086             :     {
    1087           0 :       tcp_update_rcv_wnd (tc);
    1088           0 :       if (tc->rcv_wnd >= tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
    1089             :         {
    1090           0 :           tcp_zero_rwnd_sent_off (tc);
    1091           0 :           tcp_program_ack (tc);
    1092             :         }
    1093             :     }
    1094           0 : }
    1095             : 
    1096             : /**
    1097             :  * Allocate a new buffer and build a new tcp segment
    1098             :  *
    1099             :  * @param wrk           tcp worker
    1100             :  * @param tc            connection for which the segment will be allocated
    1101             :  * @param offset        offset of the first byte in the tx fifo
    1102             :  * @param max_deq_byte  segment size
    1103             :  * @param[out] b        pointer to buffer allocated
    1104             :  *
    1105             :  * @return      the number of bytes in the segment or 0 if buffer cannot be
    1106             :  *              allocated or no data available
    1107             :  */
    1108             : static int
    1109           0 : tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
    1110             :                      u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b)
    1111             : {
    1112           0 :   u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer;
    1113           0 :   vlib_main_t *vm = wrk->vm;
    1114             :   u32 bi, seg_size;
    1115           0 :   int n_bytes = 0;
    1116             :   u8 *data;
    1117             : 
    1118           0 :   seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN;
    1119             : 
    1120             :   /*
    1121             :    * Prepare options
    1122             :    */
    1123           0 :   tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
    1124             : 
    1125             :   /*
    1126             :    * Allocate and fill in buffer(s)
    1127             :    */
    1128             : 
    1129             :   /* Easy case, buffer size greater than mss */
    1130           0 :   if (PREDICT_TRUE (seg_size <= bytes_per_buffer))
    1131             :     {
    1132           0 :       if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1133             :         {
    1134           0 :           tcp_worker_stats_inc (wrk, no_buffer, 1);
    1135           0 :           return 0;
    1136             :         }
    1137           0 :       *b = vlib_get_buffer (vm, bi);
    1138           0 :       data = tcp_init_buffer (vm, *b);
    1139           0 :       n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
    1140             :                                             max_deq_bytes);
    1141           0 :       ASSERT (n_bytes == max_deq_bytes);
    1142           0 :       b[0]->current_length = n_bytes;
    1143           0 :       tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
    1144             :                       /* burst */ 0, /* update_snd_nxt */ 0);
    1145             :     }
    1146             :   /* Split mss into multiple buffers */
    1147             :   else
    1148             :     {
    1149           0 :       u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
    1150             :       u16 n_peeked, len_to_deq;
    1151             :       vlib_buffer_t *chain_b, *prev_b;
    1152             :       int i;
    1153             : 
    1154             :       /* Make sure we have enough buffers */
    1155           0 :       n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer);
    1156           0 :       vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1,
    1157             :                             CLIB_CACHE_LINE_BYTES);
    1158           0 :       n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg);
    1159           0 :       if (PREDICT_FALSE (n_bufs != n_bufs_per_seg))
    1160             :         {
    1161           0 :           if (n_bufs)
    1162           0 :             vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
    1163           0 :           tcp_worker_stats_inc (wrk, no_buffer, 1);
    1164           0 :           return 0;
    1165             :         }
    1166             : 
    1167           0 :       *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]);
    1168           0 :       data = tcp_init_buffer (vm, *b);
    1169           0 :       n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
    1170             :                                             bytes_per_buffer -
    1171             :                                             TRANSPORT_MAX_HDRS_LEN);
    1172           0 :       b[0]->current_length = n_bytes;
    1173           0 :       b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
    1174           0 :       b[0]->total_length_not_including_first_buffer = 0;
    1175           0 :       max_deq_bytes -= n_bytes;
    1176             : 
    1177           0 :       chain_b = *b;
    1178           0 :       for (i = 1; i < n_bufs_per_seg; i++)
    1179             :         {
    1180           0 :           prev_b = chain_b;
    1181           0 :           len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer);
    1182           0 :           chain_bi = wrk->tx_buffers[--n_bufs];
    1183           0 :           chain_b = vlib_get_buffer (vm, chain_bi);
    1184           0 :           chain_b->current_data = 0;
    1185           0 :           data = vlib_buffer_get_current (chain_b);
    1186           0 :           n_peeked = session_tx_fifo_peek_bytes (&tc->connection, data,
    1187             :                                                  offset + n_bytes,
    1188             :                                                  len_to_deq);
    1189           0 :           ASSERT (n_peeked == len_to_deq);
    1190           0 :           n_bytes += n_peeked;
    1191           0 :           chain_b->current_length = n_peeked;
    1192           0 :           chain_b->next_buffer = 0;
    1193             : 
    1194             :           /* update previous buffer */
    1195           0 :           prev_b->next_buffer = chain_bi;
    1196           0 :           prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
    1197             : 
    1198           0 :           max_deq_bytes -= n_peeked;
    1199           0 :           b[0]->total_length_not_including_first_buffer += n_peeked;
    1200             :         }
    1201             : 
    1202           0 :       tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
    1203             :                       /* burst */ 0, /* update_snd_nxt */ 0);
    1204             : 
    1205           0 :       if (PREDICT_FALSE (n_bufs))
    1206             :         {
    1207           0 :           clib_warning ("not all buffers consumed");
    1208           0 :           vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
    1209             :         }
    1210             :     }
    1211             : 
    1212           0 :   ASSERT (n_bytes > 0);
    1213           0 :   ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
    1214             : 
    1215           0 :   return n_bytes;
    1216             : }
    1217             : 
    1218             : /**
    1219             :  * Build a retransmit segment
    1220             :  *
    1221             :  * @return the number of bytes in the segment or 0 if there's nothing to
    1222             :  *         retransmit
    1223             :  */
    1224             : static u32
    1225           0 : tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk,
    1226             :                                 tcp_connection_t * tc, u32 offset,
    1227             :                                 u32 max_deq_bytes, vlib_buffer_t ** b)
    1228             : {
    1229             :   u32 start, available_bytes;
    1230           0 :   int n_bytes = 0;
    1231             : 
    1232           0 :   ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
    1233           0 :   ASSERT (max_deq_bytes != 0);
    1234             : 
    1235             :   /*
    1236             :    * Make sure we can retransmit something
    1237             :    */
    1238           0 :   available_bytes = transport_max_tx_dequeue (&tc->connection);
    1239           0 :   ASSERT (available_bytes >= offset);
    1240           0 :   available_bytes -= offset;
    1241           0 :   if (!available_bytes)
    1242           0 :     return 0;
    1243             : 
    1244           0 :   max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
    1245           0 :   max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
    1246             : 
    1247           0 :   start = tc->snd_una + offset;
    1248           0 :   ASSERT (seq_leq (start + max_deq_bytes, tc->snd_nxt));
    1249             : 
    1250           0 :   n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b);
    1251           0 :   if (!n_bytes)
    1252           0 :     return 0;
    1253             : 
    1254           0 :   tc->snd_rxt_bytes += n_bytes;
    1255             : 
    1256           0 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
    1257           0 :     tcp_bt_track_rxt (tc, start, start + n_bytes);
    1258             : 
    1259           0 :   tc->bytes_retrans += n_bytes;
    1260           0 :   tc->segs_retrans += 1;
    1261           0 :   tcp_worker_stats_inc (wrk, rxt_segs, 1);
    1262             :   TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
    1263             : 
    1264           0 :   return n_bytes;
    1265             : }
    1266             : 
    1267             : static void
    1268           0 : tcp_check_sack_reneging (tcp_connection_t * tc)
    1269             : {
    1270           0 :   sack_scoreboard_t *sb = &tc->sack_sb;
    1271             :   sack_scoreboard_hole_t *hole;
    1272             : 
    1273           0 :   hole = scoreboard_first_hole (sb);
    1274           0 :   if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
    1275           0 :     return;
    1276             : 
    1277           0 :   scoreboard_clear_reneging (sb, tc->snd_una, tc->snd_nxt);
    1278             : }
    1279             : 
    1280             : /**
    1281             :  * Reset congestion control, switch cwnd to loss window and try again.
    1282             :  */
    1283             : static void
    1284           0 : tcp_cc_init_rxt_timeout (tcp_connection_t * tc)
    1285             : {
    1286             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
    1287             : 
    1288           0 :   tc->prev_ssthresh = tc->ssthresh;
    1289           0 :   tc->prev_cwnd = tc->cwnd;
    1290             : 
    1291             :   /* If we entrered loss without fast recovery, notify cc algo of the
    1292             :    * congestion event such that it can update ssthresh and its state */
    1293           0 :   if (!tcp_in_fastrecovery (tc))
    1294           0 :     tcp_cc_congestion (tc);
    1295             : 
    1296             :   /* Let cc algo decide loss cwnd and ssthresh post unrecovered loss */
    1297           0 :   tcp_cc_loss (tc);
    1298             : 
    1299           0 :   tc->rtt_ts = 0;
    1300           0 :   tc->cwnd_acc_bytes = 0;
    1301           0 :   tc->tr_occurences += 1;
    1302           0 :   tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD;
    1303           0 :   tcp_recovery_on (tc);
    1304           0 : }
    1305             : 
    1306             : void
    1307          12 : tcp_timer_retransmit_handler (tcp_connection_t * tc)
    1308             : {
    1309          12 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1310          12 :   vlib_main_t *vm = wrk->vm;
    1311          12 :   vlib_buffer_t *b = 0;
    1312             :   u32 bi, n_bytes;
    1313             : 
    1314          12 :   tcp_worker_stats_inc (wrk, tr_events, 1);
    1315             : 
    1316             :   /* Should be handled by a different handler */
    1317          12 :   if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT))
    1318           0 :     return;
    1319             : 
    1320             :   /* Wait-close and retransmit could pop at the same time */
    1321          12 :   if (tc->state == TCP_STATE_CLOSED)
    1322           0 :     return;
    1323             : 
    1324          12 :   if (tc->state >= TCP_STATE_ESTABLISHED)
    1325             :     {
    1326             :       TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
    1327             : 
    1328             :       /* Lost FIN, retransmit and return */
    1329           0 :       if (tc->flags & TCP_CONN_FINSNT)
    1330             :         {
    1331           0 :           tcp_send_fin (tc);
    1332           0 :           tc->rto_boff += 1;
    1333           0 :           tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1334           0 :           return;
    1335             :         }
    1336             : 
    1337             :       /* Shouldn't be here */
    1338           0 :       if (tc->snd_una == tc->snd_nxt)
    1339             :         {
    1340           0 :           ASSERT (!tcp_in_recovery (tc));
    1341           0 :           tc->rto_boff = 0;
    1342           0 :           return;
    1343             :         }
    1344             : 
    1345             :       /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due
    1346             :        * to persist timer timeout */
    1347           0 :       if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
    1348             :         {
    1349           0 :           tc->rto_boff = 0;
    1350           0 :           tcp_update_rto (tc);
    1351             :         }
    1352             : 
    1353             :       /* Peer is dead or network connectivity is lost. Close connection.
    1354             :        * RFC 1122 section 4.2.3.5 recommends a value of at least 100s. For
    1355             :        * a min rto of 0.2s we need to retry about 8 times. */
    1356           0 :       if (tc->rto_boff >= TCP_RTO_BOFF_MAX)
    1357             :         {
    1358           0 :           tcp_send_reset (tc);
    1359           0 :           tcp_connection_set_state (tc, TCP_STATE_CLOSED);
    1360           0 :           session_transport_closing_notify (&tc->connection);
    1361           0 :           session_transport_closed_notify (&tc->connection);
    1362           0 :           tcp_connection_timers_reset (tc);
    1363           0 :           tcp_program_cleanup (wrk, tc);
    1364           0 :           tcp_worker_stats_inc (wrk, tr_abort, 1);
    1365           0 :           return;
    1366             :         }
    1367             : 
    1368           0 :       if (tcp_opts_sack_permitted (&tc->rcv_opts))
    1369             :         {
    1370           0 :           tcp_check_sack_reneging (tc);
    1371           0 :           scoreboard_rxt_mark_lost (&tc->sack_sb, tc->snd_una, tc->snd_nxt);
    1372             :         }
    1373             : 
    1374             :       /* Update send congestion to make sure that rxt has data to send */
    1375           0 :       tc->snd_congestion = tc->snd_nxt;
    1376             : 
    1377             :       /* Send the first unacked segment. If we're short on buffers, return
    1378             :        * as soon as possible */
    1379           0 :       n_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
    1380           0 :       n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, n_bytes, &b);
    1381           0 :       if (!n_bytes)
    1382             :         {
    1383           0 :           tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
    1384             :                             tcp_cfg.alloc_err_timeout);
    1385           0 :           return;
    1386             :         }
    1387             : 
    1388           0 :       bi = vlib_get_buffer_index (vm, b);
    1389           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1390             : 
    1391           0 :       tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1392           0 :       tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
    1393             : 
    1394           0 :       tc->rto_boff += 1;
    1395           0 :       if (tc->rto_boff == 1)
    1396             :         {
    1397           0 :           tcp_cc_init_rxt_timeout (tc);
    1398             :           /* Record timestamp. Eifel detection algorithm RFC3522 */
    1399           0 :           tc->snd_rxt_ts = tcp_tstamp (tc);
    1400             :         }
    1401             : 
    1402           0 :       if (tcp_opts_sack_permitted (&tc->rcv_opts))
    1403           0 :         scoreboard_init_rxt (&tc->sack_sb, tc->snd_una + n_bytes);
    1404             : 
    1405           0 :       tcp_program_retransmit (tc);
    1406             :     }
    1407             :   /* Retransmit SYN-ACK */
    1408          12 :   else if (tc->state == TCP_STATE_SYN_RCVD)
    1409             :     {
    1410             :       TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
    1411             : 
    1412          12 :       tc->rtt_ts = 0;
    1413             : 
    1414             :       /* Passive open establish timeout */
    1415          12 :       if (tc->rto > TCP_ESTABLISH_TIME >> 1)
    1416             :         {
    1417           0 :           tcp_connection_set_state (tc, TCP_STATE_CLOSED);
    1418           0 :           tcp_connection_timers_reset (tc);
    1419           0 :           tcp_program_cleanup (wrk, tc);
    1420           0 :           tcp_worker_stats_inc (wrk, tr_abort, 1);
    1421           0 :           return;
    1422             :         }
    1423             : 
    1424          12 :       if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1425             :         {
    1426           0 :           tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
    1427             :                             tcp_cfg.alloc_err_timeout);
    1428           0 :           tcp_worker_stats_inc (wrk, no_buffer, 1);
    1429           0 :           return;
    1430             :         }
    1431             : 
    1432          12 :       tc->rto_boff += 1;
    1433          12 :       if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
    1434           3 :         tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1435             : 
    1436          12 :       ASSERT (tc->snd_una != tc->snd_nxt);
    1437          12 :       tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
    1438             : 
    1439          12 :       b = vlib_get_buffer (vm, bi);
    1440          12 :       tcp_init_buffer (vm, b);
    1441          12 :       tcp_make_synack (tc, b);
    1442             :       TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
    1443             : 
    1444             :       /* Retransmit timer already updated, just enqueue to output */
    1445          12 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1446             :     }
    1447             :   else
    1448             :     {
    1449           0 :       ASSERT (tc->state == TCP_STATE_CLOSED);
    1450           0 :       return;
    1451             :     }
    1452             : }
    1453             : 
    1454             : /**
    1455             :  * SYN retransmit timer handler. Active open only.
    1456             :  */
    1457             : void
    1458           0 : tcp_timer_retransmit_syn_handler (tcp_connection_t * tc)
    1459             : {
    1460           0 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1461           0 :   vlib_main_t *vm = wrk->vm;
    1462           0 :   vlib_buffer_t *b = 0;
    1463             :   u32 bi;
    1464             : 
    1465             :   /* Note: the connection may have transitioned to ESTABLISHED... */
    1466           0 :   if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT))
    1467           0 :     return;
    1468             : 
    1469             :   /* Half-open connection actually moved to established but we were
    1470             :    * waiting for syn retransmit to pop to call cleanup from the right
    1471             :    * thread. */
    1472           0 :   if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
    1473             :     {
    1474           0 :       if (tcp_half_open_connection_cleanup (tc))
    1475             :         TCP_DBG ("could not remove half-open connection");
    1476           0 :       return;
    1477             :     }
    1478             : 
    1479             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
    1480           0 :   tc->rtt_ts = 0;
    1481             : 
    1482             :   /* Active open establish timeout */
    1483           0 :   if (tc->rto >= TCP_ESTABLISH_TIME >> 1)
    1484             :     {
    1485           0 :       session_stream_connect_notify (&tc->connection, SESSION_E_TIMEDOUT);
    1486           0 :       tcp_connection_cleanup (tc);
    1487           0 :       return;
    1488             :     }
    1489             : 
    1490           0 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1491             :     {
    1492           0 :       tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
    1493             :                         tcp_cfg.alloc_err_timeout);
    1494           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
    1495           0 :       return;
    1496             :     }
    1497             : 
    1498             :   /* Try without increasing RTO a number of times. If this fails,
    1499             :    * start growing RTO exponentially */
    1500           0 :   tc->rto_boff += 1;
    1501           0 :   if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
    1502           0 :     tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1503             : 
    1504           0 :   b = vlib_get_buffer (vm, bi);
    1505           0 :   tcp_init_buffer (vm, b);
    1506           0 :   tcp_make_syn (tc, b);
    1507             : 
    1508             :   TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
    1509             : 
    1510           0 :   tcp_enqueue_half_open (wrk, tc, b, bi);
    1511             : 
    1512           0 :   tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
    1513           0 :                     (u32) tc->rto * TCP_TO_TIMER_TICK);
    1514             : }
    1515             : 
    1516             : /**
    1517             :  * Got 0 snd_wnd from peer, try to do something about it.
    1518             :  *
    1519             :  */
    1520             : void
    1521           0 : tcp_timer_persist_handler (tcp_connection_t * tc)
    1522             : {
    1523           0 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1524             :   u32 bi, max_snd_bytes, available_bytes, offset;
    1525           0 :   tcp_main_t *tm = vnet_get_tcp_main ();
    1526           0 :   vlib_main_t *vm = wrk->vm;
    1527             :   vlib_buffer_t *b;
    1528           0 :   int n_bytes = 0;
    1529             :   u8 *data;
    1530             : 
    1531             :   /* Problem already solved or worse */
    1532           0 :   if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
    1533           0 :       || (tc->flags & TCP_CONN_FINSNT))
    1534           0 :     goto update_scheduler;
    1535             : 
    1536           0 :   available_bytes = transport_max_tx_dequeue (&tc->connection);
    1537           0 :   offset = tc->snd_nxt - tc->snd_una;
    1538             : 
    1539             :   /* Reprogram persist if no new bytes available to send. We may have data
    1540             :    * next time */
    1541           0 :   if (!available_bytes)
    1542             :     {
    1543           0 :       tcp_persist_timer_set (&wrk->timer_wheel, tc);
    1544           0 :       return;
    1545             :     }
    1546             : 
    1547           0 :   if (available_bytes <= offset)
    1548           0 :     goto update_scheduler;
    1549             : 
    1550             :   /* Increment RTO backoff */
    1551           0 :   tc->rto_boff += 1;
    1552           0 :   tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1553             : 
    1554             :   /*
    1555             :    * Try to force the first unsent segment (or buffer)
    1556             :    */
    1557           0 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1558             :     {
    1559           0 :       tcp_persist_timer_set (&wrk->timer_wheel, tc);
    1560           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
    1561           0 :       return;
    1562             :     }
    1563             : 
    1564           0 :   b = vlib_get_buffer (vm, bi);
    1565           0 :   data = tcp_init_buffer (vm, b);
    1566             : 
    1567           0 :   tcp_validate_txf_size (tc, offset);
    1568           0 :   tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
    1569           0 :   max_snd_bytes = clib_min (clib_min (tc->snd_mss, available_bytes),
    1570             :                             tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
    1571           0 :   if (tc->snd_wnd > 0)
    1572           0 :     max_snd_bytes = clib_min (tc->snd_wnd, max_snd_bytes);
    1573           0 :   n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
    1574             :                                         max_snd_bytes);
    1575           0 :   b->current_length = n_bytes;
    1576           0 :   ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
    1577             :                            || tc->snd_una == tc->snd_nxt
    1578             :                            || tc->rto_boff > 1));
    1579             : 
    1580           0 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
    1581             :     {
    1582           0 :       tcp_bt_check_app_limited (tc);
    1583           0 :       tcp_bt_track_tx (tc, n_bytes);
    1584             :     }
    1585             : 
    1586           0 :   tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0,
    1587             :                   /* burst */ 0, /* update_snd_nxt */ 1);
    1588           0 :   tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
    1589           0 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1590             : 
    1591             :   /* Just sent new data, enable retransmit */
    1592           0 :   tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
    1593             : 
    1594           0 :   return;
    1595             : 
    1596           0 : update_scheduler:
    1597             : 
    1598           0 :   if (tcp_is_descheduled (tc))
    1599           0 :     transport_connection_reschedule (&tc->connection);
    1600             : }
    1601             : 
    1602             : /**
    1603             :  * Retransmit first unacked segment
    1604             :  */
    1605             : int
    1606           0 : tcp_retransmit_first_unacked (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
    1607             : {
    1608           0 :   vlib_main_t *vm = wrk->vm;
    1609             :   vlib_buffer_t *b;
    1610             :   u32 bi, n_bytes;
    1611             : 
    1612             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
    1613             : 
    1614           0 :   n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b);
    1615           0 :   if (!n_bytes)
    1616           0 :     return -1;
    1617             : 
    1618           0 :   bi = vlib_get_buffer_index (vm, b);
    1619           0 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1620             : 
    1621           0 :   return 0;
    1622             : }
    1623             : 
    1624             : static int
    1625           0 : tcp_transmit_unsent (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
    1626             :                      u32 burst_size)
    1627             : {
    1628           0 :   u32 offset, n_segs = 0, n_written, bi, available_wnd;
    1629           0 :   vlib_main_t *vm = wrk->vm;
    1630           0 :   vlib_buffer_t *b = 0;
    1631             : 
    1632           0 :   offset = tc->snd_nxt - tc->snd_una;
    1633           0 :   available_wnd = tc->snd_wnd - offset;
    1634           0 :   burst_size = clib_min (burst_size, available_wnd / tc->snd_mss);
    1635             : 
    1636           0 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
    1637           0 :     tcp_bt_check_app_limited (tc);
    1638             : 
    1639           0 :   while (n_segs < burst_size)
    1640             :     {
    1641           0 :       n_written = tcp_prepare_segment (wrk, tc, offset, tc->snd_mss, &b);
    1642           0 :       if (!n_written)
    1643           0 :         goto done;
    1644             : 
    1645           0 :       bi = vlib_get_buffer_index (vm, b);
    1646           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1647           0 :       offset += n_written;
    1648           0 :       n_segs += 1;
    1649             : 
    1650           0 :       if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
    1651           0 :         tcp_bt_track_tx (tc, n_written);
    1652             : 
    1653           0 :       tc->snd_nxt += n_written;
    1654             :     }
    1655             : 
    1656           0 : done:
    1657           0 :   return n_segs;
    1658             : }
    1659             : 
    1660             : /**
    1661             :  * Estimate send space using proportional rate reduction (RFC6937)
    1662             :  */
    1663             : int
    1664           0 : tcp_fastrecovery_prr_snd_space (tcp_connection_t * tc)
    1665             : {
    1666             :   u32 pipe, prr_out;
    1667             :   int space;
    1668             : 
    1669           0 :   pipe = tcp_flight_size (tc);
    1670           0 :   prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
    1671             : 
    1672           0 :   if (pipe > tc->ssthresh)
    1673             :     {
    1674           0 :       space = ((int) tc->prr_delivered * ((f64) tc->ssthresh / tc->prev_cwnd))
    1675           0 :         - prr_out;
    1676             :     }
    1677             :   else
    1678             :     {
    1679             :       int limit;
    1680           0 :       limit = clib_max ((int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
    1681           0 :       space = clib_min (tc->ssthresh - pipe, limit);
    1682             :     }
    1683           0 :   space = clib_max (space, prr_out ? 0 : tc->snd_mss);
    1684           0 :   return space;
    1685             : }
    1686             : 
    1687             : static inline u8
    1688           0 : tcp_retransmit_should_retry_head (tcp_connection_t * tc,
    1689             :                                   sack_scoreboard_t * sb)
    1690             : {
    1691           0 :   u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
    1692           0 :   f64 rr = (f64) tc->ssthresh / tc->prev_cwnd;
    1693             : 
    1694           0 :   if (tcp_fastrecovery_first (tc))
    1695           0 :     return 1;
    1696             : 
    1697           0 :   return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
    1698             : }
    1699             : 
    1700             : static inline u8
    1701       41826 : tcp_max_tx_deq (tcp_connection_t * tc)
    1702             : {
    1703       41826 :   return (transport_max_tx_dequeue (&tc->connection)
    1704       41826 :           - (tc->snd_nxt - tc->snd_una));
    1705             : }
    1706             : 
    1707             : #define scoreboard_rescue_rxt_valid(_sb, _tc)                   \
    1708             :     (seq_geq (_sb->rescue_rxt, _tc->snd_una)                      \
    1709             :         && seq_leq (_sb->rescue_rxt, _tc->snd_congestion))
    1710             : 
    1711             : /**
    1712             :  * Do retransmit with SACKs
    1713             :  */
    1714             : static int
    1715           0 : tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
    1716             :                      u32 burst_size)
    1717             : {
    1718           0 :   u32 n_written = 0, offset, max_bytes, n_segs = 0;
    1719           0 :   u8 snd_limited = 0, can_rescue = 0;
    1720             :   u32 bi, max_deq, burst_bytes;
    1721             :   sack_scoreboard_hole_t *hole;
    1722           0 :   vlib_main_t *vm = wrk->vm;
    1723           0 :   vlib_buffer_t *b = 0;
    1724             :   sack_scoreboard_t *sb;
    1725             :   int snd_space;
    1726             : 
    1727           0 :   ASSERT (tcp_in_cong_recovery (tc));
    1728             : 
    1729           0 :   burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
    1730           0 :   burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
    1731           0 :   if (!burst_size)
    1732             :     {
    1733           0 :       tcp_program_retransmit (tc);
    1734           0 :       return 0;
    1735             :     }
    1736             : 
    1737           0 :   if (tcp_in_recovery (tc))
    1738           0 :     snd_space = tcp_available_cc_snd_space (tc);
    1739             :   else
    1740           0 :     snd_space = tcp_fastrecovery_prr_snd_space (tc);
    1741             : 
    1742           0 :   if (snd_space < tc->snd_mss)
    1743           0 :     goto done;
    1744             : 
    1745           0 :   sb = &tc->sack_sb;
    1746             : 
    1747             :   /* Check if snd_una is a lost retransmit */
    1748           0 :   if (pool_elts (sb->holes)
    1749           0 :       && seq_gt (sb->high_sacked, tc->snd_congestion)
    1750           0 :       && tc->rxt_head != tc->snd_una
    1751           0 :       && tcp_retransmit_should_retry_head (tc, sb))
    1752             :     {
    1753           0 :       max_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
    1754           0 :       n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
    1755           0 :       if (!n_written)
    1756             :         {
    1757           0 :           tcp_program_retransmit (tc);
    1758           0 :           goto done;
    1759             :         }
    1760           0 :       bi = vlib_get_buffer_index (vm, b);
    1761           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1762           0 :       n_segs = 1;
    1763             : 
    1764           0 :       tc->rxt_head = tc->snd_una;
    1765           0 :       tc->rxt_delivered += n_written;
    1766           0 :       tc->prr_delivered += n_written;
    1767           0 :       ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
    1768             :     }
    1769             : 
    1770           0 :   tcp_fastrecovery_first_off (tc);
    1771             : 
    1772             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
    1773           0 :   hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
    1774             : 
    1775           0 :   max_deq = transport_max_tx_dequeue (&tc->connection);
    1776           0 :   max_deq -= tc->snd_nxt - tc->snd_una;
    1777             : 
    1778           0 :   while (snd_space > 0 && n_segs < burst_size)
    1779             :     {
    1780           0 :       hole = scoreboard_next_rxt_hole (sb, hole, max_deq != 0, &can_rescue,
    1781             :                                        &snd_limited);
    1782           0 :       if (!hole)
    1783             :         {
    1784             :           /* We are out of lost holes to retransmit so send some new data. */
    1785           0 :           if (max_deq > tc->snd_mss)
    1786             :             {
    1787             :               u32 n_segs_new;
    1788             :               int av_wnd;
    1789             : 
    1790             :               /* Make sure we don't exceed available window and leave space
    1791             :                * for one more packet, to avoid zero window acks */
    1792           0 :               av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
    1793           0 :               av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
    1794           0 :               snd_space = clib_min (snd_space, av_wnd);
    1795           0 :               snd_space = clib_min (max_deq, snd_space);
    1796           0 :               burst_size = clib_min (burst_size - n_segs,
    1797             :                                      snd_space / tc->snd_mss);
    1798           0 :               burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
    1799           0 :               n_segs_new = tcp_transmit_unsent (wrk, tc, burst_size);
    1800           0 :               if (max_deq > n_segs_new * tc->snd_mss)
    1801           0 :                 tcp_program_retransmit (tc);
    1802             : 
    1803           0 :               n_segs += n_segs_new;
    1804           0 :               goto done;
    1805             :             }
    1806             : 
    1807           0 :           if (tcp_in_recovery (tc) || !can_rescue
    1808           0 :               || scoreboard_rescue_rxt_valid (sb, tc))
    1809             :             break;
    1810             : 
    1811             :           /* If rescue rxt undefined or less than snd_una then one segment of
    1812             :            * up to SMSS octets that MUST include the highest outstanding
    1813             :            * unSACKed sequence number SHOULD be returned, and RescueRxt set to
    1814             :            * RecoveryPoint. HighRxt MUST NOT be updated.
    1815             :            */
    1816           0 :           hole = scoreboard_last_hole (sb);
    1817           0 :           max_bytes = clib_min (tc->snd_mss, hole->end - hole->start);
    1818           0 :           max_bytes = clib_min (max_bytes, snd_space);
    1819           0 :           offset = hole->end - tc->snd_una - max_bytes;
    1820           0 :           n_written = tcp_prepare_retransmit_segment (wrk, tc, offset,
    1821             :                                                       max_bytes, &b);
    1822           0 :           if (!n_written)
    1823           0 :             goto done;
    1824             : 
    1825           0 :           sb->rescue_rxt = tc->snd_congestion;
    1826           0 :           bi = vlib_get_buffer_index (vm, b);
    1827           0 :           tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1828           0 :           n_segs += 1;
    1829           0 :           break;
    1830             :         }
    1831             : 
    1832           0 :       max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
    1833           0 :       max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
    1834           0 :       if (max_bytes == 0)
    1835           0 :         break;
    1836             : 
    1837           0 :       offset = sb->high_rxt - tc->snd_una;
    1838           0 :       n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
    1839             :                                                   &b);
    1840           0 :       ASSERT (n_written <= snd_space);
    1841             : 
    1842             :       /* Nothing left to retransmit */
    1843           0 :       if (n_written == 0)
    1844           0 :         break;
    1845             : 
    1846           0 :       bi = vlib_get_buffer_index (vm, b);
    1847           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1848             : 
    1849           0 :       sb->high_rxt += n_written;
    1850           0 :       ASSERT (seq_leq (sb->high_rxt, tc->snd_nxt));
    1851             : 
    1852           0 :       snd_space -= n_written;
    1853           0 :       n_segs += 1;
    1854             :     }
    1855             : 
    1856           0 :   if (hole)
    1857           0 :     tcp_program_retransmit (tc);
    1858             : 
    1859           0 : done:
    1860             : 
    1861           0 :   transport_connection_tx_pacer_reset_bucket (&tc->connection, 0);
    1862           0 :   return n_segs;
    1863             : }
    1864             : 
    1865             : /**
    1866             :  * Fast retransmit without SACK info
    1867             :  */
    1868             : static int
    1869           0 : tcp_retransmit_no_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
    1870             :                         u32 burst_size)
    1871             : {
    1872           0 :   u32 n_written = 0, offset = 0, bi, max_deq, n_segs_now, max_bytes;
    1873             :   u32 burst_bytes, sent_bytes;
    1874           0 :   vlib_main_t *vm = wrk->vm;
    1875           0 :   int snd_space, n_segs = 0;
    1876           0 :   u8 cc_limited = 0;
    1877             :   vlib_buffer_t *b;
    1878             : 
    1879           0 :   ASSERT (tcp_in_cong_recovery (tc));
    1880             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
    1881             : 
    1882           0 :   burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
    1883           0 :   burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
    1884           0 :   if (!burst_size)
    1885             :     {
    1886           0 :       tcp_program_retransmit (tc);
    1887           0 :       return 0;
    1888             :     }
    1889             : 
    1890           0 :   snd_space = tcp_available_cc_snd_space (tc);
    1891           0 :   cc_limited = snd_space < burst_bytes;
    1892             : 
    1893           0 :   if (!tcp_fastrecovery_first (tc))
    1894           0 :     goto send_unsent;
    1895             : 
    1896             :   /* RFC 6582: [If a partial ack], retransmit the first unacknowledged
    1897             :    * segment. */
    1898           0 :   while (snd_space > 0 && n_segs < burst_size)
    1899             :     {
    1900           0 :       max_bytes = clib_min (tc->snd_mss,
    1901             :                             tc->snd_congestion - tc->snd_una - offset);
    1902           0 :       if (!max_bytes)
    1903           0 :         break;
    1904           0 :       n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
    1905             :                                                   &b);
    1906             : 
    1907             :       /* Nothing left to retransmit */
    1908           0 :       if (n_written == 0)
    1909           0 :         break;
    1910             : 
    1911           0 :       bi = vlib_get_buffer_index (vm, b);
    1912           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1913           0 :       snd_space -= n_written;
    1914           0 :       offset += n_written;
    1915           0 :       n_segs += 1;
    1916             :     }
    1917             : 
    1918           0 :   if (n_segs == burst_size)
    1919           0 :     goto done;
    1920             : 
    1921           0 : send_unsent:
    1922             : 
    1923             :   /* RFC 6582: Send a new segment if permitted by the new value of cwnd. */
    1924           0 :   if (snd_space < tc->snd_mss || tc->snd_mss == 0)
    1925           0 :     goto done;
    1926             : 
    1927           0 :   max_deq = transport_max_tx_dequeue (&tc->connection);
    1928           0 :   max_deq -= tc->snd_nxt - tc->snd_una;
    1929           0 :   if (max_deq)
    1930             :     {
    1931           0 :       snd_space = clib_min (max_deq, snd_space);
    1932           0 :       burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
    1933           0 :       n_segs_now = tcp_transmit_unsent (wrk, tc, burst_size);
    1934           0 :       if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
    1935           0 :         tcp_program_retransmit (tc);
    1936           0 :       n_segs += n_segs_now;
    1937             :     }
    1938             : 
    1939           0 : done:
    1940           0 :   tcp_fastrecovery_first_off (tc);
    1941             : 
    1942           0 :   sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
    1943           0 :   sent_bytes = cc_limited ? burst_bytes : sent_bytes;
    1944           0 :   transport_connection_tx_pacer_update_bytes (&tc->connection, sent_bytes);
    1945             : 
    1946           0 :   return n_segs;
    1947             : }
    1948             : 
    1949             : static int
    1950       41826 : tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size)
    1951             : {
    1952             :   int j, n_acks;
    1953             : 
    1954       41826 :   if (!tc->pending_dupacks)
    1955             :     {
    1956       41826 :       if (tcp_in_cong_recovery (tc) || !tcp_max_tx_deq (tc)
    1957        9152 :           || tc->state != TCP_STATE_ESTABLISHED)
    1958             :         {
    1959       32676 :           tcp_send_ack (tc);
    1960       32676 :           return 1;
    1961             :         }
    1962        9150 :       return 0;
    1963             :     }
    1964             : 
    1965             :   /* If we're supposed to send dupacks but have no ooo data
    1966             :    * send only one ack */
    1967           0 :   if (!vec_len (tc->snd_sacks))
    1968             :     {
    1969           0 :       tcp_send_ack (tc);
    1970           0 :       tc->dupacks_out += 1;
    1971           0 :       tc->pending_dupacks = 0;
    1972           0 :       return 1;
    1973             :     }
    1974             : 
    1975             :   /* Start with first sack block */
    1976           0 :   tc->snd_sack_pos = 0;
    1977             : 
    1978             :   /* Generate enough dupacks to cover all sack blocks. Do not generate
    1979             :    * more sacks than the number of packets received. But do generate at
    1980             :    * least 3, i.e., the number needed to signal congestion, if needed. */
    1981           0 :   n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
    1982           0 :   n_acks = clib_min (n_acks, tc->pending_dupacks);
    1983           0 :   n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
    1984           0 :   for (j = 0; j < clib_min (n_acks, max_burst_size); j++)
    1985           0 :     tcp_send_ack (tc);
    1986             : 
    1987           0 :   if (n_acks < max_burst_size)
    1988             :     {
    1989           0 :       tc->pending_dupacks = 0;
    1990           0 :       tc->snd_sack_pos = 0;
    1991           0 :       tc->dupacks_out += n_acks;
    1992           0 :       return n_acks;
    1993             :     }
    1994             :   else
    1995             :     {
    1996             :       TCP_DBG ("constrained by burst size");
    1997           0 :       tc->pending_dupacks = n_acks - max_burst_size;
    1998           0 :       tc->dupacks_out += max_burst_size;
    1999           0 :       tcp_program_dupack (tc);
    2000           0 :       return max_burst_size;
    2001             :     }
    2002             : }
    2003             : 
    2004             : static int
    2005           0 : tcp_do_retransmit (tcp_connection_t * tc, u32 max_burst_size)
    2006             : {
    2007             :   tcp_worker_ctx_t *wrk;
    2008             :   u32 n_segs;
    2009             : 
    2010           0 :   if (PREDICT_FALSE (tc->state == TCP_STATE_CLOSED))
    2011           0 :     return 0;
    2012             : 
    2013           0 :   wrk = tcp_get_worker (tc->c_thread_index);
    2014             : 
    2015           0 :   if (tcp_opts_sack_permitted (&tc->rcv_opts))
    2016           0 :     n_segs = tcp_retransmit_sack (wrk, tc, max_burst_size);
    2017             :   else
    2018           0 :     n_segs = tcp_retransmit_no_sack (wrk, tc, max_burst_size);
    2019             : 
    2020           0 :   return n_segs;
    2021             : }
    2022             : 
    2023             : int
    2024       41829 : tcp_session_custom_tx (void *conn, transport_send_params_t * sp)
    2025             : {
    2026       41829 :   tcp_connection_t *tc = (tcp_connection_t *) conn;
    2027       41829 :   u32 n_segs = 0;
    2028             : 
    2029       41829 :   if (tcp_in_cong_recovery (tc) && (tc->flags & TCP_CONN_RXT_PENDING))
    2030             :     {
    2031           0 :       tc->flags &= ~TCP_CONN_RXT_PENDING;
    2032           0 :       n_segs = tcp_do_retransmit (tc, sp->max_burst_size);
    2033             :     }
    2034             : 
    2035       41829 :   if (!(tc->flags & TCP_CONN_SNDACK))
    2036           3 :     return n_segs;
    2037             : 
    2038       41826 :   tc->flags &= ~TCP_CONN_SNDACK;
    2039             : 
    2040             :   /* We have retransmitted packets and no dupack */
    2041       41826 :   if (n_segs && !tc->pending_dupacks)
    2042           0 :     return n_segs;
    2043             : 
    2044       41826 :   if (sp->max_burst_size <= n_segs)
    2045             :     {
    2046           0 :       tcp_program_ack (tc);
    2047           0 :       return n_segs;
    2048             :     }
    2049             : 
    2050       41826 :   n_segs += tcp_send_acks (tc, sp->max_burst_size - n_segs);
    2051             : 
    2052       41826 :   return n_segs;
    2053             : }
    2054             : #endif /* CLIB_MARCH_VARIANT */
    2055             : 
    2056             : static void
    2057           0 : tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0,
    2058             :                               u16 * next0, u32 * error0)
    2059             : {
    2060             :   ip_adjacency_t *adj;
    2061             :   adj_index_t ai;
    2062             : 
    2063             :   /* Not thread safe but as long as the connection exists the adj should
    2064             :    * not be removed */
    2065           0 :   ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
    2066             :                      tc0->sw_if_index);
    2067           0 :   if (ai == ADJ_INDEX_INVALID)
    2068             :     {
    2069           0 :       vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
    2070           0 :       *next0 = TCP_OUTPUT_NEXT_DROP;
    2071           0 :       *error0 = TCP_ERROR_LINK_LOCAL_RW;
    2072           0 :       return;
    2073             :     }
    2074             : 
    2075           0 :   adj = adj_get (ai);
    2076           0 :   if (PREDICT_TRUE (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE))
    2077           0 :     *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
    2078           0 :   else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
    2079           0 :     *next0 = TCP_OUTPUT_NEXT_IP_ARP;
    2080             :   else
    2081             :     {
    2082           0 :       *next0 = TCP_OUTPUT_NEXT_DROP;
    2083           0 :       *error0 = TCP_ERROR_LINK_LOCAL_RW;
    2084             :     }
    2085           0 :   vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
    2086             : }
    2087             : 
    2088             : static void
    2089           0 : tcp46_output_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
    2090             :                           u32 * to_next, u32 n_bufs)
    2091             : {
    2092             :   tcp_connection_t *tc;
    2093             :   tcp_tx_trace_t *t;
    2094             :   vlib_buffer_t *b;
    2095             :   tcp_header_t *th;
    2096             :   int i;
    2097             : 
    2098           0 :   for (i = 0; i < n_bufs; i++)
    2099             :     {
    2100           0 :       b = vlib_get_buffer (vm, to_next[i]);
    2101           0 :       if (!(b->flags & VLIB_BUFFER_IS_TRACED))
    2102           0 :         continue;
    2103           0 :       th = vlib_buffer_get_current (b);
    2104           0 :       tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
    2105             :                                vm->thread_index);
    2106           0 :       t = vlib_add_trace (vm, node, b, sizeof (*t));
    2107           0 :       clib_memcpy_fast (&t->tcp_header, th, sizeof (t->tcp_header));
    2108           0 :       clib_memcpy_fast (&t->tcp_connection, tc, sizeof (t->tcp_connection));
    2109             :     }
    2110           0 : }
    2111             : 
    2112             : always_inline void
    2113     1062440 : tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
    2114             :                     tcp_connection_t * tc0, u8 is_ip4)
    2115             : {
    2116             :   TCP_EVT (TCP_EVT_OUTPUT, tc0,
    2117             :            ((tcp_header_t *) vlib_buffer_get_current (b0))->flags,
    2118             :            b0->current_length);
    2119             : 
    2120     1062440 :   if (is_ip4)
    2121     1062420 :     vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
    2122     1062420 :                           IP_PROTOCOL_TCP, tcp_csum_offload (tc0));
    2123             :   else
    2124          17 :     vlib_buffer_push_ip6_custom (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
    2125             :                                  IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
    2126     1062440 : }
    2127             : 
    2128             : always_inline void
    2129     1062440 : tcp_check_if_gso (tcp_connection_t * tc, vlib_buffer_t * b)
    2130             : {
    2131     1062440 :   if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO)))
    2132     1062440 :     return;
    2133             : 
    2134           0 :   u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len;
    2135             : 
    2136           0 :   if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))
    2137           0 :     data_len += b->total_length_not_including_first_buffer;
    2138             : 
    2139           0 :   if (PREDICT_TRUE (data_len <= tc->snd_mss))
    2140           0 :     return;
    2141             :   else
    2142             :     {
    2143           0 :       ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
    2144           0 :       ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
    2145           0 :       b->flags |= VNET_BUFFER_F_GSO;
    2146           0 :       vnet_buffer2 (b)->gso_l4_hdr_sz =
    2147           0 :         sizeof (tcp_header_t) + tc->snd_opts_len;
    2148           0 :       vnet_buffer2 (b)->gso_size = tc->snd_mss;
    2149             :     }
    2150             : }
    2151             : 
    2152             : always_inline void
    2153     1062440 : tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0,
    2154             :                           vlib_node_runtime_t * error_node, u16 * next0,
    2155             :                           u8 is_ip4)
    2156             : {
    2157             :   /* If next_index is not drop use it */
    2158     1062440 :   if (tc0->next_node_index)
    2159             :     {
    2160           0 :       *next0 = tc0->next_node_index;
    2161           0 :       vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
    2162             :     }
    2163             :   else
    2164             :     {
    2165     1062440 :       *next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
    2166             :     }
    2167             : 
    2168     1062440 :   vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
    2169     1062440 :   vnet_buffer (b0)->sw_if_index[VLIB_RX] = tc0->sw_if_index;
    2170             : 
    2171     1062440 :   if (!is_ip4)
    2172             :     {
    2173          17 :       u32 error0 = 0;
    2174             : 
    2175          17 :       if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
    2176           0 :         tcp_output_handle_link_local (tc0, b0, next0, &error0);
    2177             : 
    2178          17 :       if (PREDICT_FALSE (error0))
    2179             :         {
    2180           0 :           b0->error = error_node->errors[error0];
    2181           0 :           return;
    2182             :         }
    2183             :     }
    2184             : 
    2185     1062440 :   tc0->segs_out += 1;
    2186             : }
    2187             : 
    2188             : always_inline uword
    2189       38176 : tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
    2190             :                      vlib_frame_t * frame, int is_ip4)
    2191             : {
    2192       38176 :   u32 n_left_from, *from, thread_index = vm->thread_index;
    2193             :   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
    2194             :   u16 nexts[VLIB_FRAME_SIZE], *next;
    2195       38176 :   u16 err_counters[TCP_N_ERROR] = { 0 };
    2196             : 
    2197       38176 :   from = vlib_frame_vector_args (frame);
    2198       38176 :   n_left_from = frame->n_vectors;
    2199       38176 :   tcp_update_time_now (tcp_get_worker (thread_index));
    2200             : 
    2201       38176 :   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
    2202           0 :     tcp46_output_trace_frame (vm, node, from, n_left_from);
    2203             : 
    2204       38176 :   vlib_get_buffers (vm, from, bufs, n_left_from);
    2205       38176 :   b = bufs;
    2206       38176 :   next = nexts;
    2207             : 
    2208      534839 :   while (n_left_from >= 4)
    2209             :     {
    2210             :       tcp_connection_t *tc0, *tc1;
    2211             : 
    2212             :       {
    2213      496663 :         vlib_prefetch_buffer_header (b[2], STORE);
    2214      496663 :         CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
    2215             : 
    2216      496663 :         vlib_prefetch_buffer_header (b[3], STORE);
    2217      496663 :         CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
    2218             :       }
    2219             : 
    2220      496663 :       tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
    2221             :                                 thread_index);
    2222      496663 :       tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index,
    2223             :                                 thread_index);
    2224             : 
    2225      496663 :       if (PREDICT_TRUE (!tc0 + !tc1 == 0))
    2226             :         {
    2227      496663 :           tcp_output_push_ip (vm, b[0], tc0, is_ip4);
    2228      496663 :           tcp_output_push_ip (vm, b[1], tc1, is_ip4);
    2229             : 
    2230      496663 :           tcp_check_if_gso (tc0, b[0]);
    2231      496663 :           tcp_check_if_gso (tc1, b[1]);
    2232             : 
    2233      496663 :           tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
    2234      496663 :           tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
    2235             :         }
    2236             :       else
    2237             :         {
    2238           0 :           if (tc0 != 0)
    2239             :             {
    2240           0 :               tcp_output_push_ip (vm, b[0], tc0, is_ip4);
    2241           0 :               tcp_check_if_gso (tc0, b[0]);
    2242           0 :               tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
    2243             :             }
    2244             :           else
    2245             :             {
    2246           0 :               tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
    2247             :                                    1);
    2248           0 :               next[0] = TCP_OUTPUT_NEXT_DROP;
    2249             :             }
    2250           0 :           if (tc1 != 0)
    2251             :             {
    2252           0 :               tcp_output_push_ip (vm, b[1], tc1, is_ip4);
    2253           0 :               tcp_check_if_gso (tc1, b[1]);
    2254           0 :               tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
    2255             :             }
    2256             :           else
    2257             :             {
    2258           0 :               tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
    2259             :                                    1);
    2260           0 :               next[1] = TCP_OUTPUT_NEXT_DROP;
    2261             :             }
    2262             :         }
    2263             : 
    2264      496663 :       b += 2;
    2265      496663 :       next += 2;
    2266      496663 :       n_left_from -= 2;
    2267             :     }
    2268      107291 :   while (n_left_from > 0)
    2269             :     {
    2270             :       tcp_connection_t *tc0;
    2271             : 
    2272       69115 :       if (n_left_from > 1)
    2273             :         {
    2274       30939 :           vlib_prefetch_buffer_header (b[1], STORE);
    2275       30939 :           CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
    2276             :         }
    2277             : 
    2278       69115 :       tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
    2279             :                                 thread_index);
    2280             : 
    2281       69115 :       if (PREDICT_TRUE (tc0 != 0))
    2282             :         {
    2283       69115 :           tcp_output_push_ip (vm, b[0], tc0, is_ip4);
    2284       69115 :           tcp_check_if_gso (tc0, b[0]);
    2285       69115 :           tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
    2286             :         }
    2287             :       else
    2288             :         {
    2289           0 :           tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, 1);
    2290           0 :           next[0] = TCP_OUTPUT_NEXT_DROP;
    2291             :         }
    2292             : 
    2293       69115 :       b += 1;
    2294       69115 :       next += 1;
    2295       69115 :       n_left_from -= 1;
    2296             :     }
    2297             : 
    2298     1450690 :   tcp_store_err_counters (output, err_counters);
    2299       38176 :   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
    2300       38176 :   vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
    2301       38176 :                                TCP_ERROR_PKTS_SENT, frame->n_vectors);
    2302       38176 :   return frame->n_vectors;
    2303             : }
    2304             : 
    2305       40459 : VLIB_NODE_FN (tcp4_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2306             :                                  vlib_frame_t * from_frame)
    2307             : {
    2308       38159 :   return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
    2309             : }
    2310             : 
    2311        2317 : VLIB_NODE_FN (tcp6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2312             :                                  vlib_frame_t * from_frame)
    2313             : {
    2314          17 :   return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
    2315             : }
    2316             : 
    2317             : /* *INDENT-OFF* */
    2318      183788 : VLIB_REGISTER_NODE (tcp4_output_node) =
    2319             : {
    2320             :   .name = "tcp4-output",
    2321             :   /* Takes a vector of packets. */
    2322             :   .vector_size = sizeof (u32),
    2323             :   .n_errors = TCP_N_ERROR,
    2324             :   .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
    2325             :   .error_counters = tcp_output_error_counters,
    2326             :   .n_next_nodes = TCP_OUTPUT_N_NEXT,
    2327             :   .next_nodes = {
    2328             : #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
    2329             :     foreach_tcp4_output_next
    2330             : #undef _
    2331             :   },
    2332             :   .format_buffer = format_tcp_header,
    2333             :   .format_trace = format_tcp_tx_trace,
    2334             : };
    2335             : /* *INDENT-ON* */
    2336             : 
    2337             : /* *INDENT-OFF* */
    2338      183788 : VLIB_REGISTER_NODE (tcp6_output_node) =
    2339             : {
    2340             :   .name = "tcp6-output",
    2341             :     /* Takes a vector of packets. */
    2342             :   .vector_size = sizeof (u32),
    2343             :   .n_errors = TCP_N_ERROR,
    2344             :   .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
    2345             :   .error_counters = tcp_output_error_counters,
    2346             :   .n_next_nodes = TCP_OUTPUT_N_NEXT,
    2347             :   .next_nodes = {
    2348             : #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
    2349             :     foreach_tcp6_output_next
    2350             : #undef _
    2351             :   },
    2352             :   .format_buffer = format_tcp_header,
    2353             :   .format_trace = format_tcp_tx_trace,
    2354             : };
    2355             : /* *INDENT-ON* */
    2356             : 
    2357             : typedef enum _tcp_reset_next
    2358             : {
    2359             :   TCP_RESET_NEXT_DROP,
    2360             :   TCP_RESET_NEXT_IP_LOOKUP,
    2361             :   TCP_RESET_N_NEXT
    2362             : } tcp_reset_next_t;
    2363             : 
    2364             : #define foreach_tcp4_reset_next         \
    2365             :   _(DROP, "error-drop")                 \
    2366             :   _(IP_LOOKUP, "ip4-lookup")
    2367             : 
    2368             : #define foreach_tcp6_reset_next         \
    2369             :   _(DROP, "error-drop")                 \
    2370             :   _(IP_LOOKUP, "ip6-lookup")
    2371             : 
    2372             : static void
    2373           0 : tcp_reset_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
    2374             :                        vlib_buffer_t **bs, u32 n_bufs, u8 is_ip4)
    2375             : {
    2376             :   tcp_header_t *tcp;
    2377             :   tcp_tx_trace_t *t;
    2378             :   int i;
    2379             : 
    2380           0 :   for (i = 0; i < n_bufs; i++)
    2381             :     {
    2382           0 :       if (bs[i]->flags & VLIB_BUFFER_IS_TRACED)
    2383             :         {
    2384           0 :           tcp = vlib_buffer_get_current (bs[i]);
    2385           0 :           t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
    2386             : 
    2387           0 :           if (is_ip4)
    2388             :             {
    2389           0 :               ip4_header_t *ih4 = vlib_buffer_get_current (bs[i]);
    2390           0 :               tcp = ip4_next_header (ih4);
    2391           0 :               t->tcp_connection.c_lcl_ip.ip4 = ih4->dst_address;
    2392           0 :               t->tcp_connection.c_rmt_ip.ip4 = ih4->src_address;
    2393           0 :               t->tcp_connection.c_is_ip4 = 1;
    2394             :             }
    2395             :           else
    2396             :             {
    2397           0 :               ip6_header_t *ih6 = vlib_buffer_get_current (bs[i]);
    2398           0 :               tcp = ip6_next_header (ih6);
    2399           0 :               t->tcp_connection.c_lcl_ip.ip6 = ih6->dst_address;
    2400           0 :               t->tcp_connection.c_rmt_ip.ip6 = ih6->src_address;
    2401             :             }
    2402           0 :           t->tcp_connection.c_lcl_port = tcp->dst_port;
    2403           0 :           t->tcp_connection.c_rmt_port = tcp->src_port;
    2404           0 :           t->tcp_connection.c_proto = TRANSPORT_PROTO_TCP;
    2405           0 :           clib_memcpy_fast (&t->tcp_header, tcp, sizeof (t->tcp_header));
    2406             :         }
    2407             :     }
    2408           0 : }
    2409             : 
    2410             : static uword
    2411           2 : tcp46_reset_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
    2412             :                     vlib_frame_t *frame, u8 is_ip4)
    2413             : {
    2414             :   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
    2415             :   u16 nexts[VLIB_FRAME_SIZE], *next;
    2416             :   u32 n_left_from, *from;
    2417             : 
    2418           2 :   from = vlib_frame_vector_args (frame);
    2419           2 :   n_left_from = frame->n_vectors;
    2420           2 :   vlib_get_buffers (vm, from, bufs, n_left_from);
    2421             : 
    2422           2 :   b = bufs;
    2423           2 :   next = nexts;
    2424             : 
    2425          88 :   while (n_left_from > 0)
    2426             :     {
    2427          86 :       tcp_buffer_make_reset (vm, b[0], is_ip4);
    2428             : 
    2429             :       /* IP lookup in fib where it was received. Previous value
    2430             :        * was overwritten by tcp-input */
    2431          86 :       vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
    2432          86 :         vec_elt (ip4_main.fib_index_by_sw_if_index,
    2433             :                  vnet_buffer (b[0])->sw_if_index[VLIB_RX]);
    2434             : 
    2435          86 :       b[0]->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
    2436          86 :       next[0] = TCP_RESET_NEXT_IP_LOOKUP;
    2437             : 
    2438          86 :       b += 1;
    2439          86 :       next += 1;
    2440          86 :       n_left_from -= 1;
    2441             :     }
    2442             : 
    2443           2 :   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
    2444           0 :     tcp_reset_trace_frame (vm, node, bufs, frame->n_vectors, is_ip4);
    2445             : 
    2446           2 :   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
    2447             : 
    2448           2 :   vlib_node_increment_counter (vm, node->node_index, TCP_ERROR_RST_SENT,
    2449           2 :                                frame->n_vectors);
    2450             : 
    2451           2 :   return frame->n_vectors;
    2452             : }
    2453             : 
    2454        2302 : VLIB_NODE_FN (tcp4_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2455             :                                 vlib_frame_t * from_frame)
    2456             : {
    2457           2 :   return tcp46_reset_inline (vm, node, from_frame, 1);
    2458             : }
    2459             : 
    2460        2300 : VLIB_NODE_FN (tcp6_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2461             :                                 vlib_frame_t * from_frame)
    2462             : {
    2463           0 :   return tcp46_reset_inline (vm, node, from_frame, 0);
    2464             : }
    2465             : 
    2466             : /* *INDENT-OFF* */
    2467      183788 : VLIB_REGISTER_NODE (tcp4_reset_node) = {
    2468             :   .name = "tcp4-reset",
    2469             :   .vector_size = sizeof (u32),
    2470             :   .n_errors = TCP_N_ERROR,
    2471             :   .error_counters = tcp_output_error_counters,
    2472             :   .n_next_nodes = TCP_RESET_N_NEXT,
    2473             :   .next_nodes = {
    2474             : #define _(s,n) [TCP_RESET_NEXT_##s] = n,
    2475             :     foreach_tcp4_reset_next
    2476             : #undef _
    2477             :   },
    2478             :   .format_trace = format_tcp_tx_trace,
    2479             : };
    2480             : /* *INDENT-ON* */
    2481             : 
    2482             : /* *INDENT-OFF* */
    2483      183788 : VLIB_REGISTER_NODE (tcp6_reset_node) = {
    2484             :   .name = "tcp6-reset",
    2485             :   .vector_size = sizeof (u32),
    2486             :   .n_errors = TCP_N_ERROR,
    2487             :   .error_counters = tcp_output_error_counters,
    2488             :   .n_next_nodes = TCP_RESET_N_NEXT,
    2489             :   .next_nodes = {
    2490             : #define _(s,n) [TCP_RESET_NEXT_##s] = n,
    2491             :     foreach_tcp6_reset_next
    2492             : #undef _
    2493             :   },
    2494             :   .format_trace = format_tcp_tx_trace,
    2495             : };
    2496             : /* *INDENT-ON* */
    2497             : 
    2498             : /*
    2499             :  * fd.io coding-style-patch-verification: ON
    2500             :  *
    2501             :  * Local Variables:
    2502             :  * eval: (c-set-style "gnu")
    2503             :  * End:
    2504             :  */
 |