LCOV - code coverage report
Current view: top level - vnet/tcp - tcp_output.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 443 1104 40.1 %
Date: 2023-07-05 22:20:52 Functions: 65 102 63.7 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
       3             :  * Licensed under the Apache License, Version 2.0 (the "License");
       4             :  * you may not use this file except in compliance with the License.
       5             :  * You may obtain a copy of the License at:
       6             :  *
       7             :  *     http://www.apache.org/licenses/LICENSE-2.0
       8             :  *
       9             :  * Unless required by applicable law or agreed to in writing, software
      10             :  * distributed under the License is distributed on an "AS IS" BASIS,
      11             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             :  * See the License for the specific language governing permissions and
      13             :  * limitations under the License.
      14             :  */
      15             : 
      16             : #include <vnet/tcp/tcp.h>
      17             : #include <vnet/tcp/tcp_inlines.h>
      18             : #include <math.h>
      19             : #include <vnet/ip/ip4_inlines.h>
      20             : #include <vnet/ip/ip6_inlines.h>
      21             : 
      22             : typedef enum _tcp_output_next
      23             : {
      24             :   TCP_OUTPUT_NEXT_DROP,
      25             :   TCP_OUTPUT_NEXT_IP_LOOKUP,
      26             :   TCP_OUTPUT_NEXT_IP_REWRITE,
      27             :   TCP_OUTPUT_NEXT_IP_ARP,
      28             :   TCP_OUTPUT_N_NEXT
      29             : } tcp_output_next_t;
      30             : 
      31             : #define foreach_tcp4_output_next                \
      32             :   _ (DROP, "error-drop")                        \
      33             :   _ (IP_LOOKUP, "ip4-lookup")                 \
      34             :   _ (IP_REWRITE, "ip4-rewrite")                       \
      35             :   _ (IP_ARP, "ip4-arp")
      36             : 
      37             : #define foreach_tcp6_output_next                \
      38             :   _ (DROP, "error-drop")                        \
      39             :   _ (IP_LOOKUP, "ip6-lookup")                 \
      40             :   _ (IP_REWRITE, "ip6-rewrite")                       \
      41             :   _ (IP_ARP, "ip6-discover-neighbor")
      42             : 
      43             : static vlib_error_desc_t tcp_output_error_counters[] = {
      44             : #define tcp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
      45             : #include <vnet/tcp/tcp_error.def>
      46             : #undef tcp_error
      47             : };
      48             : 
      49             : typedef struct
      50             : {
      51             :   tcp_header_t tcp_header;
      52             :   tcp_connection_t tcp_connection;
      53             : } tcp_tx_trace_t;
      54             : 
      55             : static u8 *
      56           0 : format_tcp_tx_trace (u8 * s, va_list * args)
      57             : {
      58           0 :   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
      59           0 :   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
      60           0 :   tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
      61           0 :   tcp_connection_t *tc = &t->tcp_connection;
      62           0 :   u32 indent = format_get_indent (s);
      63             : 
      64           0 :   s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
      65           0 :               format_tcp_state, tc->state, format_white_space, indent,
      66             :               format_tcp_header, &t->tcp_header, 128);
      67             : 
      68           0 :   return s;
      69             : }
      70             : 
      71             : #ifndef CLIB_MARCH_VARIANT
      72             : static u8
      73         264 : tcp_window_compute_scale (u32 window)
      74             : {
      75         264 :   u8 wnd_scale = 0;
      76        2904 :   while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX)
      77        2640 :     wnd_scale++;
      78         264 :   return wnd_scale;
      79             : }
      80             : 
      81             : /**
      82             :  * TCP's initial window
      83             :  */
      84             : always_inline u32
      85         279 : tcp_initial_wnd_unscaled (tcp_connection_t * tc)
      86             : {
      87             :   /* RFC 6928 recommends the value lower. However at the time our connections
      88             :    * are initialized, fifos may not be allocated. Therefore, advertise the
      89             :    * smallest possible unscaled window size and update once fifos are
      90             :    * assigned to the session.
      91             :    */
      92             :   /*
      93             :      tcp_update_rcv_mss (tc);
      94             :      TCP_IW_N_SEGMENTS * tc->mss;
      95             :    */
      96         279 :   return tcp_cfg.min_rx_fifo;
      97             : }
      98             : 
      99             : /**
     100             :  * Compute initial window and scale factor. As per RFC1323, window field in
     101             :  * SYN and SYN-ACK segments is never scaled.
     102             :  */
     103             : u32
     104         279 : tcp_initial_window_to_advertise (tcp_connection_t * tc)
     105             : {
     106             :   /* Compute rcv wscale only if peer advertised support for it */
     107         279 :   if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts))
     108         264 :     tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo);
     109             : 
     110         279 :   tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
     111             : 
     112         279 :   return clib_min (tc->rcv_wnd, TCP_WND_MAX);
     113             : }
     114             : 
     115             : static inline void
     116       95327 : tcp_update_rcv_wnd (tcp_connection_t * tc)
     117             : {
     118             :   u32 available_space, wnd;
     119             :   i32 observed_wnd;
     120             : 
     121             :   /*
     122             :    * Figure out how much space we have available
     123             :    */
     124       95327 :   available_space = transport_max_rx_enqueue (&tc->connection);
     125             : 
     126             :   /*
     127             :    * Use the above and what we know about what we've previously advertised
     128             :    * to compute the new window
     129             :    */
     130       95327 :   observed_wnd = (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
     131             : 
     132             :   /* Check if we are about to retract the window. Do the comparison before
     133             :    * rounding to avoid errors. Per RFC7323 sec. 2.4 we could remove this */
     134       95327 :   if (PREDICT_FALSE ((i32) available_space < observed_wnd))
     135             :     {
     136           0 :       wnd = round_down_pow2 (clib_max (observed_wnd, 0), 1 << tc->rcv_wscale);
     137             :       TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
     138             :     }
     139             :   else
     140             :     {
     141             :       /* Make sure we have a multiple of 1 << rcv_wscale. We round down to
     142             :        * avoid advertising a window larger than what can be buffered */
     143       95327 :       wnd = round_down_pow2 (available_space, 1 << tc->rcv_wscale);
     144             :     }
     145             : 
     146       95327 :   if (PREDICT_FALSE (wnd < tc->rcv_opts.mss))
     147           0 :     wnd = 0;
     148             : 
     149       95327 :   tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
     150       95327 : }
     151             : 
     152             : /**
     153             :  * Compute and return window to advertise, scaled as per RFC1323
     154             :  */
     155             : static inline u32
     156       24495 : tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
     157             : {
     158       24495 :   if (state < TCP_STATE_ESTABLISHED)
     159           0 :     return tcp_initial_window_to_advertise (tc);
     160             : 
     161       24495 :   tcp_update_rcv_wnd (tc);
     162       24495 :   return tc->rcv_wnd >> tc->rcv_wscale;
     163             : }
     164             : 
     165             : static int
     166         132 : tcp_make_syn_options (tcp_connection_t * tc, tcp_options_t * opts)
     167             : {
     168         132 :   u8 len = 0;
     169             : 
     170         132 :   opts->flags |= TCP_OPTS_FLAG_MSS;
     171         132 :   opts->mss = tc->mss;
     172         132 :   len += TCP_OPTION_LEN_MSS;
     173             : 
     174         132 :   opts->flags |= TCP_OPTS_FLAG_WSCALE;
     175         132 :   opts->wscale = tc->rcv_wscale;
     176         132 :   len += TCP_OPTION_LEN_WINDOW_SCALE;
     177             : 
     178         132 :   opts->flags |= TCP_OPTS_FLAG_TSTAMP;
     179         132 :   opts->tsval = tcp_time_tstamp (tc->c_thread_index);
     180         132 :   opts->tsecr = 0;
     181         132 :   len += TCP_OPTION_LEN_TIMESTAMP;
     182             : 
     183             :   if (TCP_USE_SACKS)
     184             :     {
     185         132 :       opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
     186         132 :       len += TCP_OPTION_LEN_SACK_PERMITTED;
     187             :     }
     188             : 
     189             :   /* Align to needed boundary */
     190         132 :   len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
     191         132 :   return len;
     192             : }
     193             : 
     194             : static int
     195         147 : tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts)
     196             : {
     197         147 :   u8 len = 0;
     198             : 
     199         147 :   opts->flags |= TCP_OPTS_FLAG_MSS;
     200         147 :   opts->mss = tc->mss;
     201         147 :   len += TCP_OPTION_LEN_MSS;
     202             : 
     203         147 :   if (tcp_opts_wscale (&tc->rcv_opts))
     204             :     {
     205         132 :       opts->flags |= TCP_OPTS_FLAG_WSCALE;
     206         132 :       opts->wscale = tc->rcv_wscale;
     207         132 :       len += TCP_OPTION_LEN_WINDOW_SCALE;
     208             :     }
     209             : 
     210         147 :   if (tcp_opts_tstamp (&tc->rcv_opts))
     211             :     {
     212         132 :       opts->flags |= TCP_OPTS_FLAG_TSTAMP;
     213         132 :       opts->tsval = tcp_time_tstamp (tc->c_thread_index);
     214         132 :       opts->tsecr = tc->tsval_recent;
     215         132 :       len += TCP_OPTION_LEN_TIMESTAMP;
     216             :     }
     217             : 
     218         147 :   if (tcp_opts_sack_permitted (&tc->rcv_opts))
     219             :     {
     220         132 :       opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
     221         132 :       len += TCP_OPTION_LEN_SACK_PERMITTED;
     222             :     }
     223             : 
     224             :   /* Align to needed boundary */
     225         147 :   len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
     226         147 :   return len;
     227             : }
     228             : 
     229             : static int
     230       95333 : tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts)
     231             : {
     232       95333 :   u8 len = 0;
     233             : 
     234       95333 :   opts->flags = 0;
     235             : 
     236       95333 :   if (tcp_opts_tstamp (&tc->rcv_opts))
     237             :     {
     238       95333 :       opts->flags |= TCP_OPTS_FLAG_TSTAMP;
     239       95333 :       opts->tsval = tcp_tstamp (tc);
     240       95333 :       opts->tsecr = tc->tsval_recent;
     241       95333 :       len += TCP_OPTION_LEN_TIMESTAMP;
     242             :     }
     243       95333 :   if (tcp_opts_sack_permitted (&tc->rcv_opts))
     244             :     {
     245       95333 :       if (vec_len (tc->snd_sacks))
     246             :         {
     247           0 :           opts->flags |= TCP_OPTS_FLAG_SACK;
     248           0 :           if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
     249           0 :             tc->snd_sack_pos = 0;
     250           0 :           opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
     251           0 :           opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
     252           0 :           opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
     253             :                                           TCP_OPTS_MAX_SACK_BLOCKS);
     254           0 :           tc->snd_sack_pos += opts->n_sack_blocks;
     255           0 :           len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
     256             :         }
     257             :     }
     258             : 
     259             :   /* Align to needed boundary */
     260       95333 :   len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
     261       95333 :   return len;
     262             : }
     263             : 
     264             : always_inline int
     265       70838 : tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
     266             :                   tcp_state_t state)
     267             : {
     268       70838 :   switch (state)
     269             :     {
     270       70838 :     case TCP_STATE_ESTABLISHED:
     271             :     case TCP_STATE_CLOSE_WAIT:
     272             :     case TCP_STATE_FIN_WAIT_1:
     273             :     case TCP_STATE_LAST_ACK:
     274             :     case TCP_STATE_CLOSING:
     275             :     case TCP_STATE_FIN_WAIT_2:
     276             :     case TCP_STATE_TIME_WAIT:
     277             :     case TCP_STATE_CLOSED:
     278       70838 :       return tcp_make_established_options (tc, opts);
     279           0 :     case TCP_STATE_SYN_RCVD:
     280           0 :       return tcp_make_synack_options (tc, opts);
     281           0 :     case TCP_STATE_SYN_SENT:
     282           0 :       return tcp_make_syn_options (tc, opts);
     283           0 :     default:
     284           0 :       clib_warning ("State not handled! %d", state);
     285           0 :       return 0;
     286             :     }
     287             : }
     288             : 
     289             : /**
     290             :  * Update burst send vars
     291             :  *
     292             :  * - Updates snd_mss to reflect the effective segment size that we can send
     293             :  * by taking into account all TCP options, including SACKs.
     294             :  * - Cache 'on the wire' options for reuse
     295             :  * - Updates receive window which can be reused for a burst.
     296             :  *
     297             :  * This should *only* be called when doing bursts
     298             :  */
     299             : void
     300       70832 : tcp_update_burst_snd_vars (tcp_connection_t * tc)
     301             : {
     302       70832 :   tcp_main_t *tm = &tcp_main;
     303             : 
     304             :   /* Compute options to be used for connection. These may be reused when
     305             :    * sending data or to compute the effective mss (snd_mss) */
     306       70832 :   tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts,
     307             :                                        TCP_STATE_ESTABLISHED);
     308             : 
     309             :   /* XXX check if MTU has been updated */
     310       70832 :   tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
     311       70832 :   ASSERT (tc->snd_mss > 0);
     312             : 
     313       70832 :   tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
     314             :                      &tc->snd_opts);
     315             : 
     316       70832 :   tcp_update_rcv_wnd (tc);
     317             : 
     318       70832 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
     319           0 :     tcp_bt_check_app_limited (tc);
     320             : 
     321       70832 :   if (tc->snd_una == tc->snd_nxt)
     322             :     {
     323       25861 :       tcp_cc_event (tc, TCP_CC_EVT_START_TX);
     324             :     }
     325             : 
     326       70832 :   if (tc->flags & TCP_CONN_PSH_PENDING)
     327             :     {
     328       23937 :       u32 max_deq = transport_max_tx_dequeue (&tc->connection);
     329             :       /* Last byte marked for push */
     330       23937 :       tc->psh_seq = tc->snd_una + max_deq - 1;
     331             :     }
     332       70832 : }
     333             : 
     334             : static void *
     335       24780 : tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
     336             : {
     337       24780 :   ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
     338       24780 :   b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
     339       24780 :   b->total_length_not_including_first_buffer = 0;
     340       24780 :   b->current_data = 0;
     341       24780 :   vnet_buffer (b)->tcp.flags = 0;
     342             :   /* Leave enough space for headers */
     343       24780 :   return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
     344             : }
     345             : 
     346             : /* Compute TCP checksum in software when offloading is disabled for a connection */
     347             : u16
     348           0 : ip6_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
     349             :                                  ip46_address_t * src, ip46_address_t * dst)
     350             : {
     351             :   ip_csum_t sum0;
     352             :   u16 payload_length_host_byte_order;
     353             :   u32 i;
     354             : 
     355             :   /* Initialize checksum with ip header. */
     356           0 :   sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
     357           0 :     clib_host_to_net_u16 (IP_PROTOCOL_TCP);
     358           0 :   payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
     359             : 
     360           0 :   for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
     361             :     {
     362           0 :       sum0 = ip_csum_with_carry
     363           0 :         (sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
     364           0 :       sum0 = ip_csum_with_carry
     365           0 :         (sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
     366             :     }
     367             : 
     368           0 :   return ip_calculate_l4_checksum (vm, p0, sum0,
     369             :                                    payload_length_host_byte_order, NULL, 0,
     370             :                                    NULL);
     371             : }
     372             : 
     373             : u16
     374           0 : ip4_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
     375             :                                  ip46_address_t * src, ip46_address_t * dst)
     376             : {
     377             :   ip_csum_t sum0;
     378             :   u32 payload_length_host_byte_order;
     379             : 
     380           0 :   payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
     381           0 :   sum0 =
     382           0 :     clib_host_to_net_u32 (payload_length_host_byte_order +
     383             :                           (IP_PROTOCOL_TCP << 16));
     384             : 
     385           0 :   sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
     386           0 :   sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
     387             : 
     388           0 :   return ip_calculate_l4_checksum (vm, p0, sum0,
     389             :                                    payload_length_host_byte_order, NULL, 0,
     390             :                                    NULL);
     391             : }
     392             : 
     393             : static inline u16
     394     1020810 : tcp_compute_checksum (tcp_connection_t * tc, vlib_buffer_t * b)
     395             : {
     396     1020810 :   u16 checksum = 0;
     397     1020810 :   if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
     398             :     {
     399           0 :       tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     400           0 :       vlib_main_t *vm = wrk->vm;
     401             : 
     402           0 :       if (tc->c_is_ip4)
     403           0 :         checksum = ip4_tcp_compute_checksum_custom
     404             :           (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
     405             :       else
     406           0 :         checksum = ip6_tcp_compute_checksum_custom
     407             :           (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
     408             :     }
     409             :   else
     410             :     {
     411     1020810 :       vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
     412             :     }
     413     1020810 :   return checksum;
     414             : }
     415             : 
     416             : /**
     417             :  * Prepare ACK
     418             :  */
     419             : static inline void
     420       24495 : tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
     421             :                 u8 flags)
     422             : {
     423       24495 :   tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
     424             :   u8 tcp_opts_len, tcp_hdr_opts_len;
     425             :   tcp_header_t *th;
     426             :   u16 wnd;
     427             : 
     428       24495 :   wnd = tcp_window_to_advertise (tc, state);
     429             : 
     430             :   /* Make and write options */
     431       24495 :   tcp_opts_len = tcp_make_established_options (tc, snd_opts);
     432       24495 :   tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
     433             : 
     434       24495 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
     435             :                              tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
     436             : 
     437       24495 :   tcp_options_write ((u8 *) (th + 1), snd_opts);
     438             : 
     439       24495 :   th->checksum = tcp_compute_checksum (tc, b);
     440             : 
     441       24495 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     442             : 
     443       24495 :   if (wnd == 0)
     444             :     {
     445           0 :       transport_rx_fifo_req_deq_ntf (&tc->connection);
     446           0 :       tcp_zero_rwnd_sent_on (tc);
     447             :     }
     448             :   else
     449       24495 :     tcp_zero_rwnd_sent_off (tc);
     450       24495 : }
     451             : 
     452             : /**
     453             :  * Convert buffer to ACK
     454             :  */
     455             : static inline void
     456       24239 : tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b)
     457             : {
     458       24239 :   tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
     459             :   TCP_EVT (TCP_EVT_ACK_SENT, tc);
     460       24239 :   tc->rcv_las = tc->rcv_nxt;
     461       24239 : }
     462             : 
     463             : /**
     464             :  * Convert buffer to FIN-ACK
     465             :  */
     466             : static void
     467         256 : tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b)
     468             : {
     469         256 :   tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK);
     470         256 : }
     471             : 
     472             : /**
     473             :  * Convert buffer to SYN
     474             :  */
     475             : void
     476         132 : tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b)
     477             : {
     478             :   u8 tcp_hdr_opts_len, tcp_opts_len;
     479             :   tcp_header_t *th;
     480             :   u16 initial_wnd;
     481             :   tcp_options_t snd_opts;
     482             : 
     483         132 :   initial_wnd = tcp_initial_window_to_advertise (tc);
     484             : 
     485             :   /* Make and write options */
     486         132 :   clib_memset (&snd_opts, 0, sizeof (snd_opts));
     487         132 :   tcp_opts_len = tcp_make_syn_options (tc, &snd_opts);
     488         132 :   tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
     489             : 
     490         132 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
     491             :                              tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
     492             :                              initial_wnd);
     493         132 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     494         132 :   tcp_options_write ((u8 *) (th + 1), &snd_opts);
     495         132 :   th->checksum = tcp_compute_checksum (tc, b);
     496         132 : }
     497             : 
     498             : /**
     499             :  * Convert buffer to SYN-ACK
     500             :  */
     501             : static void
     502         147 : tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
     503             : {
     504         147 :   tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
     505             :   u8 tcp_opts_len, tcp_hdr_opts_len;
     506             :   tcp_header_t *th;
     507             :   u16 initial_wnd;
     508             : 
     509         147 :   clib_memset (snd_opts, 0, sizeof (*snd_opts));
     510         147 :   initial_wnd = tcp_initial_window_to_advertise (tc);
     511         147 :   tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
     512         147 :   tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
     513             : 
     514         147 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
     515             :                              tc->rcv_nxt, tcp_hdr_opts_len,
     516             :                              TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
     517         147 :   tcp_options_write ((u8 *) (th + 1), snd_opts);
     518             : 
     519         147 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     520         147 :   th->checksum = tcp_compute_checksum (tc, b);
     521         147 : }
     522             : 
     523             : static void
     524         132 : tcp_enqueue_half_open (tcp_worker_ctx_t *wrk, tcp_connection_t *tc,
     525             :                        vlib_buffer_t *b, u32 bi)
     526             : {
     527         132 :   vlib_main_t *vm = wrk->vm;
     528             : 
     529         132 :   b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
     530         132 :   b->error = 0;
     531             : 
     532         132 :   session_add_pending_tx_buffer (vm->thread_index, bi,
     533         132 :                                  wrk->tco_next_node[!tc->c_is_ip4]);
     534             : 
     535         132 :   if (vm->thread_index == 0 && vlib_num_workers ())
     536           0 :     session_queue_run_on_main_thread (vm);
     537         132 : }
     538             : 
     539             : static void
     540       24648 : tcp_enqueue_to_output (tcp_worker_ctx_t * wrk, vlib_buffer_t * b, u32 bi,
     541             :                        u8 is_ip4)
     542             : {
     543       24648 :   b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
     544       24648 :   b->error = 0;
     545             : 
     546       24648 :   session_add_pending_tx_buffer (wrk->vm->thread_index, bi,
     547       24648 :                                  wrk->tco_next_node[!is_ip4]);
     548       24648 : }
     549             : 
     550             : int
     551          86 : tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
     552             : {
     553          86 :   ip4_address_t src_ip4 = {}, dst_ip4 = {};
     554             :   ip6_address_t src_ip6, dst_ip6;
     555             :   u16 src_port, dst_port;
     556             :   u32 tmp, len, seq, ack;
     557             :   ip4_header_t *ih4;
     558             :   ip6_header_t *ih6;
     559             :   tcp_header_t *th;
     560             :   u8 flags;
     561             : 
     562             :   /*
     563             :    * Find IP and TCP headers and glean information from them. Assumes
     564             :    * buffer was parsed by something like @ref tcp_input_lookup_buffer
     565             :    */
     566          86 :   th = tcp_buffer_hdr (b);
     567             : 
     568          86 :   if (is_ip4)
     569             :     {
     570          86 :       ih4 = vlib_buffer_get_current (b);
     571          86 :       ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
     572          86 :       src_ip4.as_u32 = ih4->src_address.as_u32;
     573          86 :       dst_ip4.as_u32 = ih4->dst_address.as_u32;
     574             :     }
     575             :   else
     576             :     {
     577           0 :       ih6 = vlib_buffer_get_current (b);
     578           0 :       ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
     579           0 :       clib_memcpy_fast (&src_ip6, &ih6->src_address, sizeof (ip6_address_t));
     580           0 :       clib_memcpy_fast (&dst_ip6, &ih6->dst_address, sizeof (ip6_address_t));
     581             :     }
     582             : 
     583          86 :   src_port = th->src_port;
     584          86 :   dst_port = th->dst_port;
     585          86 :   flags = TCP_FLAG_RST;
     586             : 
     587             :   /*
     588             :    * RFC 793. If the ACK bit is off, sequence number zero is used,
     589             :    *   <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
     590             :    * If the ACK bit is on,
     591             :    *   <SEQ=SEG.ACK><CTL=RST>
     592             :    */
     593          86 :   if (tcp_ack (th))
     594             :     {
     595          86 :       seq = th->ack_number;
     596          86 :       ack = 0;
     597             :     }
     598             :   else
     599             :     {
     600           0 :       flags |= TCP_FLAG_ACK;
     601           0 :       tmp = clib_net_to_host_u32 (th->seq_number);
     602           0 :       len = vnet_buffer (b)->tcp.data_len + tcp_is_syn (th) + tcp_is_fin (th);
     603           0 :       ack = clib_host_to_net_u32 (tmp + len);
     604           0 :       seq = 0;
     605             :     }
     606             : 
     607             :   /*
     608             :    * Clear and reuse current buffer for reset
     609             :    */
     610          86 :   if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
     611           0 :     vlib_buffer_free_one (vm, b->next_buffer);
     612             : 
     613             :   /* Zero all flags but free list index and trace flag */
     614          86 :   b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
     615             :   /* Make sure new tcp header comes after current ip */
     616          86 :   b->current_data = ((u8 *) th - b->data) + sizeof (tcp_header_t);
     617          86 :   b->current_length = 0;
     618          86 :   b->total_length_not_including_first_buffer = 0;
     619          86 :   vnet_buffer (b)->tcp.flags = 0;
     620             : 
     621             :   /*
     622             :    * Add TCP and IP headers
     623             :    */
     624          86 :   th = vlib_buffer_push_tcp_net_order (b, dst_port, src_port, seq, ack,
     625             :                                        sizeof (tcp_header_t), flags, 0);
     626             : 
     627          86 :   if (is_ip4)
     628             :     {
     629          86 :       ih4 = vlib_buffer_push_ip4 (vm, b, &dst_ip4, &src_ip4,
     630             :                                   IP_PROTOCOL_TCP, 1);
     631          86 :       th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
     632             :     }
     633             :   else
     634             :     {
     635           0 :       int bogus = ~0;
     636           0 :       ih6 = vlib_buffer_push_ip6 (vm, b, &dst_ip6, &src_ip6, IP_PROTOCOL_TCP);
     637           0 :       th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
     638           0 :       ASSERT (!bogus);
     639             :     }
     640             : 
     641          86 :   return 0;
     642             : }
     643             : 
     644             : /**
     645             :  *  Send reset without reusing existing buffer
     646             :  *
     647             :  *  It extracts connection info out of original packet
     648             :  */
     649             : void
     650           0 : tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
     651             :                       u32 thread_index, u8 is_ip4)
     652             : {
     653           0 :   tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
     654           0 :   vlib_main_t *vm = wrk->vm;
     655             :   vlib_buffer_t *b;
     656           0 :   u8 tcp_hdr_len, flags = 0;
     657             :   tcp_header_t *th, *pkt_th;
     658             :   u32 seq, ack, bi;
     659             :   ip4_header_t *ih4, *pkt_ih4;
     660             :   ip6_header_t *ih6, *pkt_ih6;
     661             : 
     662           0 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     663             :     {
     664           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     665           0 :       return;
     666             :     }
     667             : 
     668           0 :   b = vlib_get_buffer (vm, bi);
     669           0 :   tcp_init_buffer (vm, b);
     670             : 
     671             :   /* Make and write options */
     672           0 :   tcp_hdr_len = sizeof (tcp_header_t);
     673             : 
     674           0 :   if (is_ip4)
     675             :     {
     676           0 :       pkt_ih4 = vlib_buffer_get_current (pkt);
     677           0 :       pkt_th = ip4_next_header (pkt_ih4);
     678             :     }
     679             :   else
     680             :     {
     681           0 :       pkt_ih6 = vlib_buffer_get_current (pkt);
     682           0 :       pkt_th = ip6_next_header (pkt_ih6);
     683             :     }
     684             : 
     685           0 :   if (tcp_ack (pkt_th))
     686             :     {
     687           0 :       flags = TCP_FLAG_RST;
     688           0 :       seq = pkt_th->ack_number;
     689           0 :       ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
     690           0 :       ack = clib_host_to_net_u32 (ack);
     691             :     }
     692             :   else
     693             :     {
     694           0 :       flags = TCP_FLAG_RST | TCP_FLAG_ACK;
     695           0 :       seq = 0;
     696           0 :       ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
     697             :     }
     698             : 
     699           0 :   th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
     700             :                                        seq, ack, tcp_hdr_len, flags, 0);
     701             : 
     702             :   /* Swap src and dst ip */
     703           0 :   if (is_ip4)
     704             :     {
     705           0 :       ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
     706           0 :       ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
     707             :                                   &pkt_ih4->src_address, IP_PROTOCOL_TCP,
     708           0 :                                   tcp_csum_offload (tc));
     709           0 :       th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
     710             :     }
     711             :   else
     712             :     {
     713           0 :       int bogus = ~0;
     714           0 :       ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
     715             :               0x60);
     716           0 :       ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
     717             :                                          &pkt_ih6->src_address,
     718             :                                          IP_PROTOCOL_TCP,
     719             :                                          tc->ipv6_flow_label);
     720           0 :       th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
     721           0 :       ASSERT (!bogus);
     722             :     }
     723             : 
     724           0 :   tcp_enqueue_half_open (wrk, tc, b, bi);
     725             :   TCP_EVT (TCP_EVT_RST_SENT, tc);
     726           0 :   vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
     727             :                                TCP_ERROR_RST_SENT, 1);
     728             : }
     729             : 
     730             : /**
     731             :  * Build and set reset packet for connection
     732             :  */
     733             : void
     734           6 : tcp_send_reset (tcp_connection_t * tc)
     735             : {
     736           6 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     737           6 :   vlib_main_t *vm = wrk->vm;
     738             :   vlib_buffer_t *b;
     739             :   u32 bi;
     740             :   tcp_header_t *th;
     741             :   u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
     742             :   u8 flags;
     743             : 
     744           6 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     745             :     {
     746           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     747           0 :       return;
     748             :     }
     749           6 :   b = vlib_get_buffer (vm, bi);
     750           6 :   tcp_init_buffer (vm, b);
     751             : 
     752           6 :   tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
     753           6 :   tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
     754           6 :   advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
     755           6 :   flags = TCP_FLAG_RST | TCP_FLAG_ACK;
     756           6 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
     757             :                              tc->rcv_nxt, tcp_hdr_opts_len, flags,
     758             :                              advertise_wnd);
     759           6 :   opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
     760           6 :   th->checksum = tcp_compute_checksum (tc, b);
     761           6 :   ASSERT (opts_write_len == tc->snd_opts_len);
     762           6 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     763           6 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
     764             :   TCP_EVT (TCP_EVT_RST_SENT, tc);
     765           6 :   vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
     766             :                                TCP_ERROR_RST_SENT, 1);
     767             : }
     768             : 
     769             : /**
     770             :  *  Send SYN
     771             :  *
     772             :  *  Builds a SYN packet for a half-open connection and sends it to tcp-output.
     773             :  *  The packet is handled by main thread and because half-open and established
     774             :  *  connections use the same pool the connection can be retrieved without
     775             :  *  additional logic.
     776             :  */
     777             : void
     778         132 : tcp_send_syn (tcp_connection_t * tc)
     779             : {
     780         132 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     781         132 :   vlib_main_t *vm = wrk->vm;
     782             :   vlib_buffer_t *b;
     783             :   u32 bi;
     784             : 
     785             :   /*
     786             :    * Setup retransmit and establish timers before requesting buffer
     787             :    * such that we can return if we've ran out.
     788             :    */
     789         132 :   tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
     790         132 :                     (u32) tc->rto * TCP_TO_TIMER_TICK);
     791             : 
     792         132 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     793             :     {
     794           0 :       tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
     795             :                         tcp_cfg.alloc_err_timeout);
     796           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     797           0 :       return;
     798             :     }
     799             : 
     800         132 :   b = vlib_get_buffer (vm, bi);
     801         132 :   tcp_init_buffer (vm, b);
     802         132 :   tcp_make_syn (tc, b);
     803             : 
     804             :   /* Measure RTT with this */
     805         132 :   tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0);
     806         132 :   tc->rtt_seq = tc->snd_nxt;
     807         132 :   tc->rto_boff = 0;
     808             : 
     809         132 :   tcp_enqueue_half_open (wrk, tc, b, bi);
     810             :   TCP_EVT (TCP_EVT_SYN_SENT, tc);
     811             : }
     812             : 
     813             : void
     814         135 : tcp_send_synack (tcp_connection_t * tc)
     815             : {
     816         135 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     817         135 :   vlib_main_t *vm = wrk->vm;
     818             :   vlib_buffer_t *b;
     819             :   u32 bi;
     820             : 
     821         135 :   ASSERT (tc->snd_una != tc->snd_nxt);
     822         135 :   tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
     823             : 
     824         135 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     825             :     {
     826           0 :       tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
     827             :                         tcp_cfg.alloc_err_timeout);
     828           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     829           0 :       return;
     830             :     }
     831             : 
     832         135 :   tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
     833         135 :   b = vlib_get_buffer (vm, bi);
     834         135 :   tcp_init_buffer (vm, b);
     835         135 :   tcp_make_synack (tc, b);
     836         135 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
     837             :   TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
     838             : }
     839             : 
     840             : /**
     841             :  *  Send FIN
     842             :  */
     843             : void
     844         256 : tcp_send_fin (tcp_connection_t * tc)
     845             : {
     846         256 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     847         256 :   vlib_main_t *vm = wrk->vm;
     848             :   vlib_buffer_t *b;
     849             :   u32 bi;
     850         256 :   u8 fin_snt = 0;
     851             : 
     852         256 :   fin_snt = tc->flags & TCP_CONN_FINSNT;
     853         256 :   if (fin_snt)
     854           0 :     tc->snd_nxt -= 1;
     855             : 
     856         256 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
     857             :     {
     858             :       /* Out of buffers so program fin retransmit ASAP */
     859           0 :       tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
     860             :                         tcp_cfg.alloc_err_timeout);
     861           0 :       if (fin_snt)
     862           0 :         tc->snd_nxt += 1;
     863             :       else
     864             :         /* Make sure retransmit retries a fin not data */
     865           0 :         tc->flags |= TCP_CONN_FINSNT;
     866           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
     867           0 :       return;
     868             :     }
     869             : 
     870             :   /* If we have non-dupacks programmed, no need to send them */
     871         256 :   if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
     872         205 :     tc->flags &= ~TCP_CONN_SNDACK;
     873             : 
     874         256 :   b = vlib_get_buffer (vm, bi);
     875         256 :   tcp_init_buffer (vm, b);
     876         256 :   tcp_make_fin (tc, b);
     877         256 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
     878             :   TCP_EVT (TCP_EVT_FIN_SENT, tc);
     879             :   /* Account for the FIN */
     880         256 :   tc->snd_nxt += 1;
     881         256 :   tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
     882         256 :   if (!fin_snt)
     883             :     {
     884         256 :       tc->flags |= TCP_CONN_FINSNT;
     885         256 :       tc->flags &= ~TCP_CONN_FINPNDG;
     886             :     }
     887             : }
     888             : 
     889             : /**
     890             :  * Push TCP header and update connection variables. Should only be called
     891             :  * for segments with data, not for 'control' packets.
     892             :  */
     893             : always_inline void
     894      996032 : tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt,
     895             :                 u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
     896             : {
     897      996032 :   u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
     898             :   u32 advertise_wnd, data_len;
     899      996032 :   tcp_main_t *tm = &tcp_main;
     900             :   tcp_header_t *th;
     901             : 
     902      996032 :   data_len = b->current_length;
     903      996032 :   if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
     904           0 :     data_len += b->total_length_not_including_first_buffer;
     905             : 
     906      996032 :   vnet_buffer (b)->tcp.flags = 0;
     907      996032 :   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
     908             : 
     909      996032 :   if (compute_opts)
     910           0 :     tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
     911             : 
     912      996032 :   tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
     913             : 
     914      996032 :   if (maybe_burst)
     915      996032 :     advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
     916             :   else
     917           0 :     advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
     918             : 
     919      996032 :   if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
     920             :     {
     921      917020 :       if (seq_geq (tc->psh_seq, snd_nxt)
     922      917019 :           && seq_lt (tc->psh_seq, snd_nxt + data_len))
     923         319 :         flags |= TCP_FLAG_PSH;
     924             :     }
     925      996032 :   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, snd_nxt,
     926             :                              tc->rcv_nxt, tcp_hdr_opts_len, flags,
     927             :                              advertise_wnd);
     928             : 
     929      996032 :   if (maybe_burst)
     930             :     {
     931      996032 :       clib_memcpy_fast ((u8 *) (th + 1),
     932      996032 :                         tm->wrk_ctx[tc->c_thread_index].cached_opts,
     933      996032 :                         tc->snd_opts_len);
     934             :     }
     935             :   else
     936             :     {
     937           0 :       u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
     938           0 :       ASSERT (len == tc->snd_opts_len);
     939             :     }
     940             : 
     941             :   /*
     942             :    * Update connection variables
     943             :    */
     944             : 
     945      996032 :   if (update_snd_nxt)
     946      996032 :     tc->snd_nxt += data_len;
     947      996032 :   tc->rcv_las = tc->rcv_nxt;
     948             : 
     949      996032 :   tc->bytes_out += data_len;
     950      996032 :   tc->data_segs_out += 1;
     951             : 
     952      996032 :   th->checksum = tcp_compute_checksum (tc, b);
     953             : 
     954             :   TCP_EVT (TCP_EVT_PKTIZE, tc);
     955      996032 : }
     956             : 
     957             : always_inline u32
     958           0 : tcp_buffer_len (vlib_buffer_t * b)
     959             : {
     960           0 :   u32 data_len = b->current_length;
     961           0 :   if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
     962           0 :     data_len += b->total_length_not_including_first_buffer;
     963           0 :   return data_len;
     964             : }
     965             : 
     966             : always_inline u32
     967      996032 : tcp_push_one_header (tcp_connection_t *tc, vlib_buffer_t *b)
     968             : {
     969      996032 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
     970           0 :     tcp_bt_track_tx (tc, tcp_buffer_len (b));
     971             : 
     972      996032 :   tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
     973             :                   /* update_snd_nxt */ 1);
     974             : 
     975      996032 :   tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
     976      996032 :   return 0;
     977             : }
     978             : 
     979             : u32
     980       42153 : tcp_session_push_header (transport_connection_t *tconn, vlib_buffer_t **bs,
     981             :                          u32 n_bufs)
     982             : {
     983       42153 :   tcp_connection_t *tc = (tcp_connection_t *) tconn;
     984             : 
     985      495416 :   while (n_bufs >= 4)
     986             :     {
     987      453263 :       vlib_prefetch_buffer_header (bs[2], STORE);
     988      453263 :       vlib_prefetch_buffer_header (bs[3], STORE);
     989             : 
     990      453263 :       tcp_push_one_header (tc, bs[0]);
     991      453263 :       tcp_push_one_header (tc, bs[1]);
     992             : 
     993      453263 :       n_bufs -= 2;
     994      453263 :       bs += 2;
     995             :     }
     996      131659 :   while (n_bufs)
     997             :     {
     998       89506 :       if (n_bufs > 1)
     999       47353 :         vlib_prefetch_buffer_header (bs[1], STORE);
    1000             : 
    1001       89506 :       tcp_push_one_header (tc, bs[0]);
    1002             : 
    1003       89506 :       n_bufs -= 1;
    1004       89506 :       bs += 1;
    1005             :     }
    1006             : 
    1007             :   /* If not tracking an ACK, start tracking */
    1008       42153 :   if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
    1009             :     {
    1010       21099 :       tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
    1011       21099 :       tc->rtt_seq = tc->snd_nxt;
    1012             :     }
    1013       42153 :   if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
    1014             :     {
    1015        1937 :       tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1016        1937 :       tcp_retransmit_timer_set (&wrk->timer_wheel, tc);
    1017        1937 :       tc->rto_boff = 0;
    1018             :     }
    1019       42153 :   return 0;
    1020             : }
    1021             : 
    1022             : void
    1023       24239 : tcp_send_ack (tcp_connection_t * tc)
    1024             : {
    1025       24239 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1026       24239 :   vlib_main_t *vm = wrk->vm;
    1027             :   vlib_buffer_t *b;
    1028             :   u32 bi;
    1029             : 
    1030       24239 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1031             :     {
    1032           0 :       tcp_update_rcv_wnd (tc);
    1033           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
    1034           0 :       return;
    1035             :     }
    1036       24239 :   b = vlib_get_buffer (vm, bi);
    1037       24239 :   tcp_init_buffer (vm, b);
    1038       24239 :   tcp_make_ack (tc, b);
    1039       24239 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1040             : }
    1041             : 
    1042             : void
    1043      996202 : tcp_program_ack (tcp_connection_t * tc)
    1044             : {
    1045      996202 :   if (!(tc->flags & TCP_CONN_SNDACK))
    1046             :     {
    1047       41252 :       session_add_self_custom_tx_evt (&tc->connection, 1);
    1048       41252 :       tc->flags |= TCP_CONN_SNDACK;
    1049             :     }
    1050      996202 : }
    1051             : 
    1052             : void
    1053           0 : tcp_program_dupack (tcp_connection_t * tc)
    1054             : {
    1055           0 :   if (!(tc->flags & TCP_CONN_SNDACK))
    1056             :     {
    1057           0 :       session_add_self_custom_tx_evt (&tc->connection, 1);
    1058           0 :       tc->flags |= TCP_CONN_SNDACK;
    1059             :     }
    1060           0 :   if (tc->pending_dupacks < 255)
    1061           0 :     tc->pending_dupacks += 1;
    1062           0 : }
    1063             : 
    1064             : void
    1065           0 : tcp_program_retransmit (tcp_connection_t * tc)
    1066             : {
    1067           0 :   if (!(tc->flags & TCP_CONN_RXT_PENDING))
    1068             :     {
    1069           0 :       session_add_self_custom_tx_evt (&tc->connection, 0);
    1070           0 :       tc->flags |= TCP_CONN_RXT_PENDING;
    1071             :     }
    1072           0 : }
    1073             : 
    1074             : /**
    1075             :  * Send window update ack
    1076             :  *
    1077             :  * Ensures that it will be sent only once, after a zero rwnd has been
    1078             :  * advertised in a previous ack, and only if rwnd has grown beyond a
    1079             :  * configurable value.
    1080             :  */
    1081             : void
    1082           0 : tcp_send_window_update_ack (tcp_connection_t * tc)
    1083             : {
    1084           0 :   if (tcp_zero_rwnd_sent (tc))
    1085             :     {
    1086           0 :       tcp_update_rcv_wnd (tc);
    1087           0 :       if (tc->rcv_wnd >= tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
    1088             :         {
    1089           0 :           tcp_zero_rwnd_sent_off (tc);
    1090           0 :           tcp_program_ack (tc);
    1091             :         }
    1092             :     }
    1093           0 : }
    1094             : 
    1095             : /**
    1096             :  * Allocate a new buffer and build a new tcp segment
    1097             :  *
    1098             :  * @param wrk           tcp worker
    1099             :  * @param tc            connection for which the segment will be allocated
    1100             :  * @param offset        offset of the first byte in the tx fifo
    1101             :  * @param max_deq_byte  segment size
    1102             :  * @param[out] b        pointer to buffer allocated
    1103             :  *
    1104             :  * @return      the number of bytes in the segment or 0 if buffer cannot be
    1105             :  *              allocated or no data available
    1106             :  */
    1107             : static int
    1108           0 : tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
    1109             :                      u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b)
    1110             : {
    1111           0 :   u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer;
    1112           0 :   vlib_main_t *vm = wrk->vm;
    1113             :   u32 bi, seg_size;
    1114           0 :   int n_bytes = 0;
    1115             :   u8 *data;
    1116             : 
    1117           0 :   seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN;
    1118             : 
    1119             :   /*
    1120             :    * Prepare options
    1121             :    */
    1122           0 :   tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
    1123             : 
    1124             :   /*
    1125             :    * Allocate and fill in buffer(s)
    1126             :    */
    1127             : 
    1128             :   /* Easy case, buffer size greater than mss */
    1129           0 :   if (PREDICT_TRUE (seg_size <= bytes_per_buffer))
    1130             :     {
    1131           0 :       if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1132             :         {
    1133           0 :           tcp_worker_stats_inc (wrk, no_buffer, 1);
    1134           0 :           return 0;
    1135             :         }
    1136           0 :       *b = vlib_get_buffer (vm, bi);
    1137           0 :       data = tcp_init_buffer (vm, *b);
    1138           0 :       n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
    1139             :                                             max_deq_bytes);
    1140           0 :       ASSERT (n_bytes == max_deq_bytes);
    1141           0 :       b[0]->current_length = n_bytes;
    1142           0 :       tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
    1143             :                       /* burst */ 0, /* update_snd_nxt */ 0);
    1144             :     }
    1145             :   /* Split mss into multiple buffers */
    1146             :   else
    1147             :     {
    1148           0 :       u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
    1149             :       u16 n_peeked, len_to_deq;
    1150             :       vlib_buffer_t *chain_b, *prev_b;
    1151             :       int i;
    1152             : 
    1153             :       /* Make sure we have enough buffers */
    1154           0 :       n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer);
    1155           0 :       vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1,
    1156             :                             CLIB_CACHE_LINE_BYTES);
    1157           0 :       n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg);
    1158           0 :       if (PREDICT_FALSE (n_bufs != n_bufs_per_seg))
    1159             :         {
    1160           0 :           if (n_bufs)
    1161           0 :             vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
    1162           0 :           tcp_worker_stats_inc (wrk, no_buffer, 1);
    1163           0 :           return 0;
    1164             :         }
    1165             : 
    1166           0 :       *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]);
    1167           0 :       data = tcp_init_buffer (vm, *b);
    1168           0 :       n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
    1169             :                                             bytes_per_buffer -
    1170             :                                             TRANSPORT_MAX_HDRS_LEN);
    1171           0 :       b[0]->current_length = n_bytes;
    1172           0 :       b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
    1173           0 :       b[0]->total_length_not_including_first_buffer = 0;
    1174           0 :       max_deq_bytes -= n_bytes;
    1175             : 
    1176           0 :       chain_b = *b;
    1177           0 :       for (i = 1; i < n_bufs_per_seg; i++)
    1178             :         {
    1179           0 :           prev_b = chain_b;
    1180           0 :           len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer);
    1181           0 :           chain_bi = wrk->tx_buffers[--n_bufs];
    1182           0 :           chain_b = vlib_get_buffer (vm, chain_bi);
    1183           0 :           chain_b->current_data = 0;
    1184           0 :           data = vlib_buffer_get_current (chain_b);
    1185           0 :           n_peeked = session_tx_fifo_peek_bytes (&tc->connection, data,
    1186             :                                                  offset + n_bytes,
    1187             :                                                  len_to_deq);
    1188           0 :           ASSERT (n_peeked == len_to_deq);
    1189           0 :           n_bytes += n_peeked;
    1190           0 :           chain_b->current_length = n_peeked;
    1191           0 :           chain_b->next_buffer = 0;
    1192             : 
    1193             :           /* update previous buffer */
    1194           0 :           prev_b->next_buffer = chain_bi;
    1195           0 :           prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
    1196             : 
    1197           0 :           max_deq_bytes -= n_peeked;
    1198           0 :           b[0]->total_length_not_including_first_buffer += n_peeked;
    1199             :         }
    1200             : 
    1201           0 :       tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
    1202             :                       /* burst */ 0, /* update_snd_nxt */ 0);
    1203             : 
    1204           0 :       if (PREDICT_FALSE (n_bufs))
    1205             :         {
    1206           0 :           clib_warning ("not all buffers consumed");
    1207           0 :           vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
    1208             :         }
    1209             :     }
    1210             : 
    1211           0 :   ASSERT (n_bytes > 0);
    1212           0 :   ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
    1213             : 
    1214           0 :   return n_bytes;
    1215             : }
    1216             : 
    1217             : /**
    1218             :  * Build a retransmit segment
    1219             :  *
    1220             :  * @return the number of bytes in the segment or 0 if there's nothing to
    1221             :  *         retransmit
    1222             :  */
    1223             : static u32
    1224           0 : tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk,
    1225             :                                 tcp_connection_t * tc, u32 offset,
    1226             :                                 u32 max_deq_bytes, vlib_buffer_t ** b)
    1227             : {
    1228             :   u32 start, available_bytes;
    1229           0 :   int n_bytes = 0;
    1230             : 
    1231           0 :   ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
    1232           0 :   ASSERT (max_deq_bytes != 0);
    1233             : 
    1234             :   /*
    1235             :    * Make sure we can retransmit something
    1236             :    */
    1237           0 :   available_bytes = transport_max_tx_dequeue (&tc->connection);
    1238           0 :   ASSERT (available_bytes >= offset);
    1239           0 :   available_bytes -= offset;
    1240           0 :   if (!available_bytes)
    1241           0 :     return 0;
    1242             : 
    1243           0 :   max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
    1244           0 :   max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
    1245             : 
    1246           0 :   start = tc->snd_una + offset;
    1247           0 :   ASSERT (seq_leq (start + max_deq_bytes, tc->snd_nxt));
    1248             : 
    1249           0 :   n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b);
    1250           0 :   if (!n_bytes)
    1251           0 :     return 0;
    1252             : 
    1253           0 :   tc->snd_rxt_bytes += n_bytes;
    1254             : 
    1255           0 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
    1256           0 :     tcp_bt_track_rxt (tc, start, start + n_bytes);
    1257             : 
    1258           0 :   tc->bytes_retrans += n_bytes;
    1259           0 :   tc->segs_retrans += 1;
    1260           0 :   tcp_worker_stats_inc (wrk, rxt_segs, 1);
    1261             :   TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
    1262             : 
    1263           0 :   return n_bytes;
    1264             : }
    1265             : 
    1266             : static void
    1267           0 : tcp_check_sack_reneging (tcp_connection_t * tc)
    1268             : {
    1269           0 :   sack_scoreboard_t *sb = &tc->sack_sb;
    1270             :   sack_scoreboard_hole_t *hole;
    1271             : 
    1272           0 :   hole = scoreboard_first_hole (sb);
    1273           0 :   if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
    1274           0 :     return;
    1275             : 
    1276           0 :   scoreboard_clear_reneging (sb, tc->snd_una, tc->snd_nxt);
    1277             : }
    1278             : 
    1279             : /**
    1280             :  * Reset congestion control, switch cwnd to loss window and try again.
    1281             :  */
    1282             : static void
    1283           0 : tcp_cc_init_rxt_timeout (tcp_connection_t * tc)
    1284             : {
    1285             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
    1286             : 
    1287           0 :   tc->prev_ssthresh = tc->ssthresh;
    1288           0 :   tc->prev_cwnd = tc->cwnd;
    1289             : 
    1290             :   /* If we entrered loss without fast recovery, notify cc algo of the
    1291             :    * congestion event such that it can update ssthresh and its state */
    1292           0 :   if (!tcp_in_fastrecovery (tc))
    1293           0 :     tcp_cc_congestion (tc);
    1294             : 
    1295             :   /* Let cc algo decide loss cwnd and ssthresh post unrecovered loss */
    1296           0 :   tcp_cc_loss (tc);
    1297             : 
    1298           0 :   tc->rtt_ts = 0;
    1299           0 :   tc->cwnd_acc_bytes = 0;
    1300           0 :   tc->tr_occurences += 1;
    1301           0 :   tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD;
    1302           0 :   tcp_recovery_on (tc);
    1303           0 : }
    1304             : 
    1305             : void
    1306          12 : tcp_timer_retransmit_handler (tcp_connection_t * tc)
    1307             : {
    1308          12 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1309          12 :   vlib_main_t *vm = wrk->vm;
    1310          12 :   vlib_buffer_t *b = 0;
    1311             :   u32 bi, n_bytes;
    1312             : 
    1313          12 :   tcp_worker_stats_inc (wrk, tr_events, 1);
    1314             : 
    1315             :   /* Should be handled by a different handler */
    1316          12 :   if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT))
    1317           0 :     return;
    1318             : 
    1319             :   /* Wait-close and retransmit could pop at the same time */
    1320          12 :   if (tc->state == TCP_STATE_CLOSED)
    1321           0 :     return;
    1322             : 
    1323          12 :   if (tc->state >= TCP_STATE_ESTABLISHED)
    1324             :     {
    1325             :       TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
    1326             : 
    1327             :       /* Lost FIN, retransmit and return */
    1328           0 :       if (tc->flags & TCP_CONN_FINSNT)
    1329             :         {
    1330           0 :           tcp_send_fin (tc);
    1331           0 :           tc->rto_boff += 1;
    1332           0 :           tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1333           0 :           return;
    1334             :         }
    1335             : 
    1336             :       /* Shouldn't be here */
    1337           0 :       if (tc->snd_una == tc->snd_nxt)
    1338             :         {
    1339           0 :           ASSERT (!tcp_in_recovery (tc));
    1340           0 :           tc->rto_boff = 0;
    1341           0 :           return;
    1342             :         }
    1343             : 
    1344             :       /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due
    1345             :        * to persist timer timeout */
    1346           0 :       if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
    1347             :         {
    1348           0 :           tc->rto_boff = 0;
    1349           0 :           tcp_update_rto (tc);
    1350             :         }
    1351             : 
    1352             :       /* Peer is dead or network connectivity is lost. Close connection.
    1353             :        * RFC 1122 section 4.2.3.5 recommends a value of at least 100s. For
    1354             :        * a min rto of 0.2s we need to retry about 8 times. */
    1355           0 :       if (tc->rto_boff >= TCP_RTO_BOFF_MAX)
    1356             :         {
    1357           0 :           tcp_send_reset (tc);
    1358           0 :           tcp_connection_set_state (tc, TCP_STATE_CLOSED);
    1359           0 :           session_transport_closing_notify (&tc->connection);
    1360           0 :           session_transport_closed_notify (&tc->connection);
    1361           0 :           tcp_connection_timers_reset (tc);
    1362           0 :           tcp_program_cleanup (wrk, tc);
    1363           0 :           tcp_worker_stats_inc (wrk, tr_abort, 1);
    1364           0 :           return;
    1365             :         }
    1366             : 
    1367           0 :       if (tcp_opts_sack_permitted (&tc->rcv_opts))
    1368             :         {
    1369           0 :           tcp_check_sack_reneging (tc);
    1370           0 :           scoreboard_rxt_mark_lost (&tc->sack_sb, tc->snd_una, tc->snd_nxt);
    1371             :         }
    1372             : 
    1373             :       /* Update send congestion to make sure that rxt has data to send */
    1374           0 :       tc->snd_congestion = tc->snd_nxt;
    1375             : 
    1376             :       /* Send the first unacked segment. If we're short on buffers, return
    1377             :        * as soon as possible */
    1378           0 :       n_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
    1379           0 :       n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, n_bytes, &b);
    1380           0 :       if (!n_bytes)
    1381             :         {
    1382           0 :           tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
    1383             :                             tcp_cfg.alloc_err_timeout);
    1384           0 :           return;
    1385             :         }
    1386             : 
    1387           0 :       bi = vlib_get_buffer_index (vm, b);
    1388           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1389             : 
    1390           0 :       tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1391           0 :       tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
    1392             : 
    1393           0 :       tc->rto_boff += 1;
    1394           0 :       if (tc->rto_boff == 1)
    1395             :         {
    1396           0 :           tcp_cc_init_rxt_timeout (tc);
    1397             :           /* Record timestamp. Eifel detection algorithm RFC3522 */
    1398           0 :           tc->snd_rxt_ts = tcp_tstamp (tc);
    1399             :         }
    1400             : 
    1401           0 :       if (tcp_opts_sack_permitted (&tc->rcv_opts))
    1402           0 :         scoreboard_init_rxt (&tc->sack_sb, tc->snd_una + n_bytes);
    1403             : 
    1404           0 :       tcp_program_retransmit (tc);
    1405             :     }
    1406             :   /* Retransmit SYN-ACK */
    1407          12 :   else if (tc->state == TCP_STATE_SYN_RCVD)
    1408             :     {
    1409             :       TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
    1410             : 
    1411          12 :       tc->rtt_ts = 0;
    1412             : 
    1413             :       /* Passive open establish timeout */
    1414          12 :       if (tc->rto > TCP_ESTABLISH_TIME >> 1)
    1415             :         {
    1416           0 :           tcp_connection_set_state (tc, TCP_STATE_CLOSED);
    1417           0 :           tcp_connection_timers_reset (tc);
    1418           0 :           tcp_program_cleanup (wrk, tc);
    1419           0 :           tcp_worker_stats_inc (wrk, tr_abort, 1);
    1420           0 :           return;
    1421             :         }
    1422             : 
    1423          12 :       if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1424             :         {
    1425           0 :           tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
    1426             :                             tcp_cfg.alloc_err_timeout);
    1427           0 :           tcp_worker_stats_inc (wrk, no_buffer, 1);
    1428           0 :           return;
    1429             :         }
    1430             : 
    1431          12 :       tc->rto_boff += 1;
    1432          12 :       if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
    1433           3 :         tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1434             : 
    1435          12 :       ASSERT (tc->snd_una != tc->snd_nxt);
    1436          12 :       tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
    1437             : 
    1438          12 :       b = vlib_get_buffer (vm, bi);
    1439          12 :       tcp_init_buffer (vm, b);
    1440          12 :       tcp_make_synack (tc, b);
    1441             :       TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
    1442             : 
    1443             :       /* Retransmit timer already updated, just enqueue to output */
    1444          12 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1445             :     }
    1446             :   else
    1447             :     {
    1448           0 :       ASSERT (tc->state == TCP_STATE_CLOSED);
    1449           0 :       return;
    1450             :     }
    1451             : }
    1452             : 
    1453             : /**
    1454             :  * SYN retransmit timer handler. Active open only.
    1455             :  */
    1456             : void
    1457           0 : tcp_timer_retransmit_syn_handler (tcp_connection_t * tc)
    1458             : {
    1459           0 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1460           0 :   vlib_main_t *vm = wrk->vm;
    1461           0 :   vlib_buffer_t *b = 0;
    1462             :   u32 bi;
    1463             : 
    1464             :   /* Note: the connection may have transitioned to ESTABLISHED... */
    1465           0 :   if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT))
    1466           0 :     return;
    1467             : 
    1468             :   /* Half-open connection actually moved to established but we were
    1469             :    * waiting for syn retransmit to pop to call cleanup from the right
    1470             :    * thread. */
    1471           0 :   if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
    1472             :     {
    1473           0 :       if (tcp_half_open_connection_cleanup (tc))
    1474             :         TCP_DBG ("could not remove half-open connection");
    1475           0 :       return;
    1476             :     }
    1477             : 
    1478             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
    1479           0 :   tc->rtt_ts = 0;
    1480             : 
    1481             :   /* Active open establish timeout */
    1482           0 :   if (tc->rto >= TCP_ESTABLISH_TIME >> 1)
    1483             :     {
    1484           0 :       session_stream_connect_notify (&tc->connection, SESSION_E_TIMEDOUT);
    1485           0 :       tcp_connection_cleanup (tc);
    1486           0 :       return;
    1487             :     }
    1488             : 
    1489           0 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1490             :     {
    1491           0 :       tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
    1492             :                         tcp_cfg.alloc_err_timeout);
    1493           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
    1494           0 :       return;
    1495             :     }
    1496             : 
    1497             :   /* Try without increasing RTO a number of times. If this fails,
    1498             :    * start growing RTO exponentially */
    1499           0 :   tc->rto_boff += 1;
    1500           0 :   if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
    1501           0 :     tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1502             : 
    1503           0 :   b = vlib_get_buffer (vm, bi);
    1504           0 :   tcp_init_buffer (vm, b);
    1505           0 :   tcp_make_syn (tc, b);
    1506             : 
    1507             :   TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
    1508             : 
    1509           0 :   tcp_enqueue_half_open (wrk, tc, b, bi);
    1510             : 
    1511           0 :   tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
    1512           0 :                     (u32) tc->rto * TCP_TO_TIMER_TICK);
    1513             : }
    1514             : 
    1515             : /**
    1516             :  * Got 0 snd_wnd from peer, try to do something about it.
    1517             :  *
    1518             :  */
    1519             : void
    1520           0 : tcp_timer_persist_handler (tcp_connection_t * tc)
    1521             : {
    1522           0 :   tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
    1523             :   u32 bi, max_snd_bytes, available_bytes, offset;
    1524           0 :   tcp_main_t *tm = vnet_get_tcp_main ();
    1525           0 :   vlib_main_t *vm = wrk->vm;
    1526             :   vlib_buffer_t *b;
    1527           0 :   int n_bytes = 0;
    1528             :   u8 *data;
    1529             : 
    1530             :   /* Problem already solved or worse */
    1531           0 :   if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
    1532           0 :       || (tc->flags & TCP_CONN_FINSNT))
    1533           0 :     goto update_scheduler;
    1534             : 
    1535           0 :   available_bytes = transport_max_tx_dequeue (&tc->connection);
    1536           0 :   offset = tc->snd_nxt - tc->snd_una;
    1537             : 
    1538             :   /* Reprogram persist if no new bytes available to send. We may have data
    1539             :    * next time */
    1540           0 :   if (!available_bytes)
    1541             :     {
    1542           0 :       tcp_persist_timer_set (&wrk->timer_wheel, tc);
    1543           0 :       return;
    1544             :     }
    1545             : 
    1546           0 :   if (available_bytes <= offset)
    1547           0 :     goto update_scheduler;
    1548             : 
    1549             :   /* Increment RTO backoff */
    1550           0 :   tc->rto_boff += 1;
    1551           0 :   tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
    1552             : 
    1553             :   /*
    1554             :    * Try to force the first unsent segment (or buffer)
    1555             :    */
    1556           0 :   if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
    1557             :     {
    1558           0 :       tcp_persist_timer_set (&wrk->timer_wheel, tc);
    1559           0 :       tcp_worker_stats_inc (wrk, no_buffer, 1);
    1560           0 :       return;
    1561             :     }
    1562             : 
    1563           0 :   b = vlib_get_buffer (vm, bi);
    1564           0 :   data = tcp_init_buffer (vm, b);
    1565             : 
    1566           0 :   tcp_validate_txf_size (tc, offset);
    1567           0 :   tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
    1568           0 :   max_snd_bytes = clib_min (clib_min (tc->snd_mss, available_bytes),
    1569             :                             tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
    1570           0 :   if (tc->snd_wnd > 0)
    1571           0 :     max_snd_bytes = clib_min (tc->snd_wnd, max_snd_bytes);
    1572           0 :   n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
    1573             :                                         max_snd_bytes);
    1574           0 :   b->current_length = n_bytes;
    1575           0 :   ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
    1576             :                            || tc->snd_una == tc->snd_nxt
    1577             :                            || tc->rto_boff > 1));
    1578             : 
    1579           0 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
    1580             :     {
    1581           0 :       tcp_bt_check_app_limited (tc);
    1582           0 :       tcp_bt_track_tx (tc, n_bytes);
    1583             :     }
    1584             : 
    1585           0 :   tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0,
    1586             :                   /* burst */ 0, /* update_snd_nxt */ 1);
    1587           0 :   tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
    1588           0 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1589             : 
    1590             :   /* Just sent new data, enable retransmit */
    1591           0 :   tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
    1592             : 
    1593           0 :   return;
    1594             : 
    1595           0 : update_scheduler:
    1596             : 
    1597           0 :   if (tcp_is_descheduled (tc))
    1598           0 :     transport_connection_reschedule (&tc->connection);
    1599             : }
    1600             : 
    1601             : /**
    1602             :  * Retransmit first unacked segment
    1603             :  */
    1604             : int
    1605           0 : tcp_retransmit_first_unacked (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
    1606             : {
    1607           0 :   vlib_main_t *vm = wrk->vm;
    1608             :   vlib_buffer_t *b;
    1609             :   u32 bi, n_bytes;
    1610             : 
    1611             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
    1612             : 
    1613           0 :   n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b);
    1614           0 :   if (!n_bytes)
    1615           0 :     return -1;
    1616             : 
    1617           0 :   bi = vlib_get_buffer_index (vm, b);
    1618           0 :   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1619             : 
    1620           0 :   return 0;
    1621             : }
    1622             : 
    1623             : static int
    1624           0 : tcp_transmit_unsent (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
    1625             :                      u32 burst_size)
    1626             : {
    1627           0 :   u32 offset, n_segs = 0, n_written, bi, available_wnd;
    1628           0 :   vlib_main_t *vm = wrk->vm;
    1629           0 :   vlib_buffer_t *b = 0;
    1630             : 
    1631           0 :   offset = tc->snd_nxt - tc->snd_una;
    1632           0 :   available_wnd = tc->snd_wnd - offset;
    1633           0 :   burst_size = clib_min (burst_size, available_wnd / tc->snd_mss);
    1634             : 
    1635           0 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
    1636           0 :     tcp_bt_check_app_limited (tc);
    1637             : 
    1638           0 :   while (n_segs < burst_size)
    1639             :     {
    1640           0 :       n_written = tcp_prepare_segment (wrk, tc, offset, tc->snd_mss, &b);
    1641           0 :       if (!n_written)
    1642           0 :         goto done;
    1643             : 
    1644           0 :       bi = vlib_get_buffer_index (vm, b);
    1645           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1646           0 :       offset += n_written;
    1647           0 :       n_segs += 1;
    1648             : 
    1649           0 :       if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
    1650           0 :         tcp_bt_track_tx (tc, n_written);
    1651             : 
    1652           0 :       tc->snd_nxt += n_written;
    1653             :     }
    1654             : 
    1655           0 : done:
    1656           0 :   return n_segs;
    1657             : }
    1658             : 
    1659             : /**
    1660             :  * Estimate send space using proportional rate reduction (RFC6937)
    1661             :  */
    1662             : int
    1663           0 : tcp_fastrecovery_prr_snd_space (tcp_connection_t * tc)
    1664             : {
    1665             :   u32 pipe, prr_out;
    1666             :   int space;
    1667             : 
    1668           0 :   pipe = tcp_flight_size (tc);
    1669           0 :   prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
    1670             : 
    1671           0 :   if (pipe > tc->ssthresh)
    1672             :     {
    1673           0 :       space = ((int) tc->prr_delivered * ((f64) tc->ssthresh / tc->prev_cwnd))
    1674           0 :         - prr_out;
    1675             :     }
    1676             :   else
    1677             :     {
    1678             :       int limit;
    1679           0 :       limit = clib_max ((int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
    1680           0 :       space = clib_min (tc->ssthresh - pipe, limit);
    1681             :     }
    1682           0 :   space = clib_max (space, prr_out ? 0 : tc->snd_mss);
    1683           0 :   return space;
    1684             : }
    1685             : 
    1686             : static inline u8
    1687           0 : tcp_retransmit_should_retry_head (tcp_connection_t * tc,
    1688             :                                   sack_scoreboard_t * sb)
    1689             : {
    1690           0 :   u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
    1691           0 :   f64 rr = (f64) tc->ssthresh / tc->prev_cwnd;
    1692             : 
    1693           0 :   if (tcp_fastrecovery_first (tc))
    1694           0 :     return 1;
    1695             : 
    1696           0 :   return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
    1697             : }
    1698             : 
    1699             : static inline u8
    1700       41047 : tcp_max_tx_deq (tcp_connection_t * tc)
    1701             : {
    1702       41047 :   return (transport_max_tx_dequeue (&tc->connection)
    1703       41047 :           - (tc->snd_nxt - tc->snd_una));
    1704             : }
    1705             : 
    1706             : #define scoreboard_rescue_rxt_valid(_sb, _tc)                   \
    1707             :     (seq_geq (_sb->rescue_rxt, _tc->snd_una)                      \
    1708             :         && seq_leq (_sb->rescue_rxt, _tc->snd_congestion))
    1709             : 
    1710             : /**
    1711             :  * Do retransmit with SACKs
    1712             :  */
    1713             : static int
    1714           0 : tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
    1715             :                      u32 burst_size)
    1716             : {
    1717           0 :   u32 n_written = 0, offset, max_bytes, n_segs = 0;
    1718           0 :   u8 snd_limited = 0, can_rescue = 0;
    1719             :   u32 bi, max_deq, burst_bytes;
    1720             :   sack_scoreboard_hole_t *hole;
    1721           0 :   vlib_main_t *vm = wrk->vm;
    1722           0 :   vlib_buffer_t *b = 0;
    1723             :   sack_scoreboard_t *sb;
    1724             :   int snd_space;
    1725             : 
    1726           0 :   ASSERT (tcp_in_cong_recovery (tc));
    1727             : 
    1728           0 :   burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
    1729           0 :   burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
    1730           0 :   if (!burst_size)
    1731             :     {
    1732           0 :       tcp_program_retransmit (tc);
    1733           0 :       return 0;
    1734             :     }
    1735             : 
    1736           0 :   if (tcp_in_recovery (tc))
    1737           0 :     snd_space = tcp_available_cc_snd_space (tc);
    1738             :   else
    1739           0 :     snd_space = tcp_fastrecovery_prr_snd_space (tc);
    1740             : 
    1741           0 :   if (snd_space < tc->snd_mss)
    1742           0 :     goto done;
    1743             : 
    1744           0 :   sb = &tc->sack_sb;
    1745             : 
    1746             :   /* Check if snd_una is a lost retransmit */
    1747           0 :   if (pool_elts (sb->holes)
    1748           0 :       && seq_gt (sb->high_sacked, tc->snd_congestion)
    1749           0 :       && tc->rxt_head != tc->snd_una
    1750           0 :       && tcp_retransmit_should_retry_head (tc, sb))
    1751             :     {
    1752           0 :       max_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
    1753           0 :       n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
    1754           0 :       if (!n_written)
    1755             :         {
    1756           0 :           tcp_program_retransmit (tc);
    1757           0 :           goto done;
    1758             :         }
    1759           0 :       bi = vlib_get_buffer_index (vm, b);
    1760           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1761           0 :       n_segs = 1;
    1762             : 
    1763           0 :       tc->rxt_head = tc->snd_una;
    1764           0 :       tc->rxt_delivered += n_written;
    1765           0 :       tc->prr_delivered += n_written;
    1766           0 :       ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
    1767             :     }
    1768             : 
    1769           0 :   tcp_fastrecovery_first_off (tc);
    1770             : 
    1771             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
    1772           0 :   hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
    1773             : 
    1774           0 :   max_deq = transport_max_tx_dequeue (&tc->connection);
    1775           0 :   max_deq -= tc->snd_nxt - tc->snd_una;
    1776             : 
    1777           0 :   while (snd_space > 0 && n_segs < burst_size)
    1778             :     {
    1779           0 :       hole = scoreboard_next_rxt_hole (sb, hole, max_deq != 0, &can_rescue,
    1780             :                                        &snd_limited);
    1781           0 :       if (!hole)
    1782             :         {
    1783             :           /* We are out of lost holes to retransmit so send some new data. */
    1784           0 :           if (max_deq > tc->snd_mss)
    1785             :             {
    1786             :               u32 n_segs_new;
    1787             :               int av_wnd;
    1788             : 
    1789             :               /* Make sure we don't exceed available window and leave space
    1790             :                * for one more packet, to avoid zero window acks */
    1791           0 :               av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
    1792           0 :               av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
    1793           0 :               snd_space = clib_min (snd_space, av_wnd);
    1794           0 :               snd_space = clib_min (max_deq, snd_space);
    1795           0 :               burst_size = clib_min (burst_size - n_segs,
    1796             :                                      snd_space / tc->snd_mss);
    1797           0 :               burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
    1798           0 :               n_segs_new = tcp_transmit_unsent (wrk, tc, burst_size);
    1799           0 :               if (max_deq > n_segs_new * tc->snd_mss)
    1800           0 :                 tcp_program_retransmit (tc);
    1801             : 
    1802           0 :               n_segs += n_segs_new;
    1803           0 :               goto done;
    1804             :             }
    1805             : 
    1806           0 :           if (tcp_in_recovery (tc) || !can_rescue
    1807           0 :               || scoreboard_rescue_rxt_valid (sb, tc))
    1808             :             break;
    1809             : 
    1810             :           /* If rescue rxt undefined or less than snd_una then one segment of
    1811             :            * up to SMSS octets that MUST include the highest outstanding
    1812             :            * unSACKed sequence number SHOULD be returned, and RescueRxt set to
    1813             :            * RecoveryPoint. HighRxt MUST NOT be updated.
    1814             :            */
    1815           0 :           hole = scoreboard_last_hole (sb);
    1816           0 :           max_bytes = clib_min (tc->snd_mss, hole->end - hole->start);
    1817           0 :           max_bytes = clib_min (max_bytes, snd_space);
    1818           0 :           offset = hole->end - tc->snd_una - max_bytes;
    1819           0 :           n_written = tcp_prepare_retransmit_segment (wrk, tc, offset,
    1820             :                                                       max_bytes, &b);
    1821           0 :           if (!n_written)
    1822           0 :             goto done;
    1823             : 
    1824           0 :           sb->rescue_rxt = tc->snd_congestion;
    1825           0 :           bi = vlib_get_buffer_index (vm, b);
    1826           0 :           tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1827           0 :           n_segs += 1;
    1828           0 :           break;
    1829             :         }
    1830             : 
    1831           0 :       max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
    1832           0 :       max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
    1833           0 :       if (max_bytes == 0)
    1834           0 :         break;
    1835             : 
    1836           0 :       offset = sb->high_rxt - tc->snd_una;
    1837           0 :       n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
    1838             :                                                   &b);
    1839           0 :       ASSERT (n_written <= snd_space);
    1840             : 
    1841             :       /* Nothing left to retransmit */
    1842           0 :       if (n_written == 0)
    1843           0 :         break;
    1844             : 
    1845           0 :       bi = vlib_get_buffer_index (vm, b);
    1846           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1847             : 
    1848           0 :       sb->high_rxt += n_written;
    1849           0 :       ASSERT (seq_leq (sb->high_rxt, tc->snd_nxt));
    1850             : 
    1851           0 :       snd_space -= n_written;
    1852           0 :       n_segs += 1;
    1853             :     }
    1854             : 
    1855           0 :   if (hole)
    1856           0 :     tcp_program_retransmit (tc);
    1857             : 
    1858           0 : done:
    1859             : 
    1860           0 :   transport_connection_tx_pacer_reset_bucket (&tc->connection, 0);
    1861           0 :   return n_segs;
    1862             : }
    1863             : 
    1864             : /**
    1865             :  * Fast retransmit without SACK info
    1866             :  */
    1867             : static int
    1868           0 : tcp_retransmit_no_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
    1869             :                         u32 burst_size)
    1870             : {
    1871           0 :   u32 n_written = 0, offset = 0, bi, max_deq, n_segs_now, max_bytes;
    1872             :   u32 burst_bytes, sent_bytes;
    1873           0 :   vlib_main_t *vm = wrk->vm;
    1874           0 :   int snd_space, n_segs = 0;
    1875           0 :   u8 cc_limited = 0;
    1876             :   vlib_buffer_t *b;
    1877             : 
    1878           0 :   ASSERT (tcp_in_cong_recovery (tc));
    1879             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
    1880             : 
    1881           0 :   burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
    1882           0 :   burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
    1883           0 :   if (!burst_size)
    1884             :     {
    1885           0 :       tcp_program_retransmit (tc);
    1886           0 :       return 0;
    1887             :     }
    1888             : 
    1889           0 :   snd_space = tcp_available_cc_snd_space (tc);
    1890           0 :   cc_limited = snd_space < burst_bytes;
    1891             : 
    1892           0 :   if (!tcp_fastrecovery_first (tc))
    1893           0 :     goto send_unsent;
    1894             : 
    1895             :   /* RFC 6582: [If a partial ack], retransmit the first unacknowledged
    1896             :    * segment. */
    1897           0 :   while (snd_space > 0 && n_segs < burst_size)
    1898             :     {
    1899           0 :       max_bytes = clib_min (tc->snd_mss,
    1900             :                             tc->snd_congestion - tc->snd_una - offset);
    1901           0 :       if (!max_bytes)
    1902           0 :         break;
    1903           0 :       n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
    1904             :                                                   &b);
    1905             : 
    1906             :       /* Nothing left to retransmit */
    1907           0 :       if (n_written == 0)
    1908           0 :         break;
    1909             : 
    1910           0 :       bi = vlib_get_buffer_index (vm, b);
    1911           0 :       tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
    1912           0 :       snd_space -= n_written;
    1913           0 :       offset += n_written;
    1914           0 :       n_segs += 1;
    1915             :     }
    1916             : 
    1917           0 :   if (n_segs == burst_size)
    1918           0 :     goto done;
    1919             : 
    1920           0 : send_unsent:
    1921             : 
    1922             :   /* RFC 6582: Send a new segment if permitted by the new value of cwnd. */
    1923           0 :   if (snd_space < tc->snd_mss || tc->snd_mss == 0)
    1924           0 :     goto done;
    1925             : 
    1926           0 :   max_deq = transport_max_tx_dequeue (&tc->connection);
    1927           0 :   max_deq -= tc->snd_nxt - tc->snd_una;
    1928           0 :   if (max_deq)
    1929             :     {
    1930           0 :       snd_space = clib_min (max_deq, snd_space);
    1931           0 :       burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
    1932           0 :       n_segs_now = tcp_transmit_unsent (wrk, tc, burst_size);
    1933           0 :       if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
    1934           0 :         tcp_program_retransmit (tc);
    1935           0 :       n_segs += n_segs_now;
    1936             :     }
    1937             : 
    1938           0 : done:
    1939           0 :   tcp_fastrecovery_first_off (tc);
    1940             : 
    1941           0 :   sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
    1942           0 :   sent_bytes = cc_limited ? burst_bytes : sent_bytes;
    1943           0 :   transport_connection_tx_pacer_update_bytes (&tc->connection, sent_bytes);
    1944             : 
    1945           0 :   return n_segs;
    1946             : }
    1947             : 
    1948             : static int
    1949       41047 : tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size)
    1950             : {
    1951             :   int j, n_acks;
    1952             : 
    1953       41047 :   if (!tc->pending_dupacks)
    1954             :     {
    1955       41047 :       if (tcp_in_cong_recovery (tc) || !tcp_max_tx_deq (tc)
    1956       16943 :           || tc->state != TCP_STATE_ESTABLISHED)
    1957             :         {
    1958       24107 :           tcp_send_ack (tc);
    1959       24107 :           return 1;
    1960             :         }
    1961       16940 :       return 0;
    1962             :     }
    1963             : 
    1964             :   /* If we're supposed to send dupacks but have no ooo data
    1965             :    * send only one ack */
    1966           0 :   if (!vec_len (tc->snd_sacks))
    1967             :     {
    1968           0 :       tcp_send_ack (tc);
    1969           0 :       tc->dupacks_out += 1;
    1970           0 :       tc->pending_dupacks = 0;
    1971           0 :       return 1;
    1972             :     }
    1973             : 
    1974             :   /* Start with first sack block */
    1975           0 :   tc->snd_sack_pos = 0;
    1976             : 
    1977             :   /* Generate enough dupacks to cover all sack blocks. Do not generate
    1978             :    * more sacks than the number of packets received. But do generate at
    1979             :    * least 3, i.e., the number needed to signal congestion, if needed. */
    1980           0 :   n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
    1981           0 :   n_acks = clib_min (n_acks, tc->pending_dupacks);
    1982           0 :   n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
    1983           0 :   for (j = 0; j < clib_min (n_acks, max_burst_size); j++)
    1984           0 :     tcp_send_ack (tc);
    1985             : 
    1986           0 :   if (n_acks < max_burst_size)
    1987             :     {
    1988           0 :       tc->pending_dupacks = 0;
    1989           0 :       tc->snd_sack_pos = 0;
    1990           0 :       tc->dupacks_out += n_acks;
    1991           0 :       return n_acks;
    1992             :     }
    1993             :   else
    1994             :     {
    1995             :       TCP_DBG ("constrained by burst size");
    1996           0 :       tc->pending_dupacks = n_acks - max_burst_size;
    1997           0 :       tc->dupacks_out += max_burst_size;
    1998           0 :       tcp_program_dupack (tc);
    1999           0 :       return max_burst_size;
    2000             :     }
    2001             : }
    2002             : 
    2003             : static int
    2004           0 : tcp_do_retransmit (tcp_connection_t * tc, u32 max_burst_size)
    2005             : {
    2006             :   tcp_worker_ctx_t *wrk;
    2007             :   u32 n_segs;
    2008             : 
    2009           0 :   if (PREDICT_FALSE (tc->state == TCP_STATE_CLOSED))
    2010           0 :     return 0;
    2011             : 
    2012           0 :   wrk = tcp_get_worker (tc->c_thread_index);
    2013             : 
    2014           0 :   if (tcp_opts_sack_permitted (&tc->rcv_opts))
    2015           0 :     n_segs = tcp_retransmit_sack (wrk, tc, max_burst_size);
    2016             :   else
    2017           0 :     n_segs = tcp_retransmit_no_sack (wrk, tc, max_burst_size);
    2018             : 
    2019           0 :   return n_segs;
    2020             : }
    2021             : 
    2022             : int
    2023       41252 : tcp_session_custom_tx (void *conn, transport_send_params_t * sp)
    2024             : {
    2025       41252 :   tcp_connection_t *tc = (tcp_connection_t *) conn;
    2026       41252 :   u32 n_segs = 0;
    2027             : 
    2028       41252 :   if (tcp_in_cong_recovery (tc) && (tc->flags & TCP_CONN_RXT_PENDING))
    2029             :     {
    2030           0 :       tc->flags &= ~TCP_CONN_RXT_PENDING;
    2031           0 :       n_segs = tcp_do_retransmit (tc, sp->max_burst_size);
    2032             :     }
    2033             : 
    2034       41252 :   if (!(tc->flags & TCP_CONN_SNDACK))
    2035         205 :     return n_segs;
    2036             : 
    2037       41047 :   tc->flags &= ~TCP_CONN_SNDACK;
    2038             : 
    2039             :   /* We have retransmitted packets and no dupack */
    2040       41047 :   if (n_segs && !tc->pending_dupacks)
    2041           0 :     return n_segs;
    2042             : 
    2043       41047 :   if (sp->max_burst_size <= n_segs)
    2044             :     {
    2045           0 :       tcp_program_ack (tc);
    2046           0 :       return n_segs;
    2047             :     }
    2048             : 
    2049       41047 :   n_segs += tcp_send_acks (tc, sp->max_burst_size - n_segs);
    2050             : 
    2051       41047 :   return n_segs;
    2052             : }
    2053             : #endif /* CLIB_MARCH_VARIANT */
    2054             : 
    2055             : static void
    2056           0 : tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0,
    2057             :                               u16 * next0, u32 * error0)
    2058             : {
    2059             :   ip_adjacency_t *adj;
    2060             :   adj_index_t ai;
    2061             : 
    2062             :   /* Not thread safe but as long as the connection exists the adj should
    2063             :    * not be removed */
    2064           0 :   ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
    2065             :                      tc0->sw_if_index);
    2066           0 :   if (ai == ADJ_INDEX_INVALID)
    2067             :     {
    2068           0 :       vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
    2069           0 :       *next0 = TCP_OUTPUT_NEXT_DROP;
    2070           0 :       *error0 = TCP_ERROR_LINK_LOCAL_RW;
    2071           0 :       return;
    2072             :     }
    2073             : 
    2074           0 :   adj = adj_get (ai);
    2075           0 :   if (PREDICT_TRUE (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE))
    2076           0 :     *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
    2077           0 :   else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
    2078           0 :     *next0 = TCP_OUTPUT_NEXT_IP_ARP;
    2079             :   else
    2080             :     {
    2081           0 :       *next0 = TCP_OUTPUT_NEXT_DROP;
    2082           0 :       *error0 = TCP_ERROR_LINK_LOCAL_RW;
    2083             :     }
    2084           0 :   vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
    2085             : }
    2086             : 
    2087             : static void
    2088           0 : tcp46_output_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
    2089             :                           u32 * to_next, u32 n_bufs)
    2090             : {
    2091             :   tcp_connection_t *tc;
    2092             :   tcp_tx_trace_t *t;
    2093             :   vlib_buffer_t *b;
    2094             :   tcp_header_t *th;
    2095             :   int i;
    2096             : 
    2097           0 :   for (i = 0; i < n_bufs; i++)
    2098             :     {
    2099           0 :       b = vlib_get_buffer (vm, to_next[i]);
    2100           0 :       if (!(b->flags & VLIB_BUFFER_IS_TRACED))
    2101           0 :         continue;
    2102           0 :       th = vlib_buffer_get_current (b);
    2103           0 :       tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
    2104             :                                vm->thread_index);
    2105           0 :       t = vlib_add_trace (vm, node, b, sizeof (*t));
    2106           0 :       clib_memcpy_fast (&t->tcp_header, th, sizeof (t->tcp_header));
    2107           0 :       clib_memcpy_fast (&t->tcp_connection, tc, sizeof (t->tcp_connection));
    2108             :     }
    2109           0 : }
    2110             : 
    2111             : always_inline void
    2112     1020810 : tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
    2113             :                     tcp_connection_t * tc0, u8 is_ip4)
    2114             : {
    2115             :   TCP_EVT (TCP_EVT_OUTPUT, tc0,
    2116             :            ((tcp_header_t *) vlib_buffer_get_current (b0))->flags,
    2117             :            b0->current_length);
    2118             : 
    2119     1020810 :   if (is_ip4)
    2120     1020800 :     vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
    2121     1020800 :                           IP_PROTOCOL_TCP, tcp_csum_offload (tc0));
    2122             :   else
    2123          17 :     vlib_buffer_push_ip6_custom (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
    2124             :                                  IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
    2125     1020810 : }
    2126             : 
    2127             : always_inline void
    2128     1020810 : tcp_check_if_gso (tcp_connection_t * tc, vlib_buffer_t * b)
    2129             : {
    2130     1020810 :   if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO)))
    2131     1020810 :     return;
    2132             : 
    2133           0 :   u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len;
    2134             : 
    2135           0 :   if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))
    2136           0 :     data_len += b->total_length_not_including_first_buffer;
    2137             : 
    2138           0 :   if (PREDICT_TRUE (data_len <= tc->snd_mss))
    2139           0 :     return;
    2140             :   else
    2141             :     {
    2142           0 :       ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
    2143           0 :       ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
    2144           0 :       b->flags |= VNET_BUFFER_F_GSO;
    2145           0 :       vnet_buffer2 (b)->gso_l4_hdr_sz =
    2146           0 :         sizeof (tcp_header_t) + tc->snd_opts_len;
    2147           0 :       vnet_buffer2 (b)->gso_size = tc->snd_mss;
    2148             :     }
    2149             : }
    2150             : 
    2151             : always_inline void
    2152     1020810 : tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0,
    2153             :                           vlib_node_runtime_t * error_node, u16 * next0,
    2154             :                           u8 is_ip4)
    2155             : {
    2156             :   /* If next_index is not drop use it */
    2157     1020810 :   if (tc0->next_node_index)
    2158             :     {
    2159           0 :       *next0 = tc0->next_node_index;
    2160           0 :       vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
    2161             :     }
    2162             :   else
    2163             :     {
    2164     1020810 :       *next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
    2165             :     }
    2166             : 
    2167     1020810 :   vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
    2168     1020810 :   vnet_buffer (b0)->sw_if_index[VLIB_RX] = tc0->sw_if_index;
    2169             : 
    2170     1020810 :   if (!is_ip4)
    2171             :     {
    2172          17 :       u32 error0 = 0;
    2173             : 
    2174          17 :       if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
    2175           0 :         tcp_output_handle_link_local (tc0, b0, next0, &error0);
    2176             : 
    2177          17 :       if (PREDICT_FALSE (error0))
    2178             :         {
    2179           0 :           b0->error = error_node->errors[error0];
    2180           0 :           return;
    2181             :         }
    2182             :     }
    2183             : 
    2184     1020810 :   tc0->segs_out += 1;
    2185             : }
    2186             : 
    2187             : always_inline uword
    2188       30293 : tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
    2189             :                      vlib_frame_t * frame, int is_ip4)
    2190             : {
    2191       30293 :   u32 n_left_from, *from, thread_index = vm->thread_index;
    2192             :   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
    2193             :   u16 nexts[VLIB_FRAME_SIZE], *next;
    2194       30293 :   u16 err_counters[TCP_N_ERROR] = { 0 };
    2195             : 
    2196       30293 :   from = vlib_frame_vector_args (frame);
    2197       30293 :   n_left_from = frame->n_vectors;
    2198       30293 :   tcp_update_time_now (tcp_get_worker (thread_index));
    2199             : 
    2200       30293 :   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
    2201           0 :     tcp46_output_trace_frame (vm, node, from, n_left_from);
    2202             : 
    2203       30293 :   vlib_get_buffers (vm, from, bufs, n_left_from);
    2204       30293 :   b = bufs;
    2205       30293 :   next = nexts;
    2206             : 
    2207      510000 :   while (n_left_from >= 4)
    2208             :     {
    2209             :       tcp_connection_t *tc0, *tc1;
    2210             : 
    2211             :       {
    2212      479707 :         vlib_prefetch_buffer_header (b[2], STORE);
    2213      479707 :         CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
    2214             : 
    2215      479707 :         vlib_prefetch_buffer_header (b[3], STORE);
    2216      479707 :         CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
    2217             :       }
    2218             : 
    2219      479707 :       tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
    2220             :                                 thread_index);
    2221      479707 :       tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index,
    2222             :                                 thread_index);
    2223             : 
    2224      479707 :       if (PREDICT_TRUE (!tc0 + !tc1 == 0))
    2225             :         {
    2226      479707 :           tcp_output_push_ip (vm, b[0], tc0, is_ip4);
    2227      479707 :           tcp_output_push_ip (vm, b[1], tc1, is_ip4);
    2228             : 
    2229      479707 :           tcp_check_if_gso (tc0, b[0]);
    2230      479707 :           tcp_check_if_gso (tc1, b[1]);
    2231             : 
    2232      479707 :           tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
    2233      479707 :           tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
    2234             :         }
    2235             :       else
    2236             :         {
    2237           0 :           if (tc0 != 0)
    2238             :             {
    2239           0 :               tcp_output_push_ip (vm, b[0], tc0, is_ip4);
    2240           0 :               tcp_check_if_gso (tc0, b[0]);
    2241           0 :               tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
    2242             :             }
    2243             :           else
    2244             :             {
    2245           0 :               tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
    2246             :                                    1);
    2247           0 :               next[0] = TCP_OUTPUT_NEXT_DROP;
    2248             :             }
    2249           0 :           if (tc1 != 0)
    2250             :             {
    2251           0 :               tcp_output_push_ip (vm, b[1], tc1, is_ip4);
    2252           0 :               tcp_check_if_gso (tc1, b[1]);
    2253           0 :               tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
    2254             :             }
    2255             :           else
    2256             :             {
    2257           0 :               tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
    2258             :                                    1);
    2259           0 :               next[1] = TCP_OUTPUT_NEXT_DROP;
    2260             :             }
    2261             :         }
    2262             : 
    2263      479707 :       b += 2;
    2264      479707 :       next += 2;
    2265      479707 :       n_left_from -= 2;
    2266             :     }
    2267       91691 :   while (n_left_from > 0)
    2268             :     {
    2269             :       tcp_connection_t *tc0;
    2270             : 
    2271       61398 :       if (n_left_from > 1)
    2272             :         {
    2273       31105 :           vlib_prefetch_buffer_header (b[1], STORE);
    2274       31105 :           CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
    2275             :         }
    2276             : 
    2277       61398 :       tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
    2278             :                                 thread_index);
    2279             : 
    2280       61398 :       if (PREDICT_TRUE (tc0 != 0))
    2281             :         {
    2282       61398 :           tcp_output_push_ip (vm, b[0], tc0, is_ip4);
    2283       61398 :           tcp_check_if_gso (tc0, b[0]);
    2284       61398 :           tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
    2285             :         }
    2286             :       else
    2287             :         {
    2288           0 :           tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, 1);
    2289           0 :           next[0] = TCP_OUTPUT_NEXT_DROP;
    2290             :         }
    2291             : 
    2292       61398 :       b += 1;
    2293       61398 :       next += 1;
    2294       61398 :       n_left_from -= 1;
    2295             :     }
    2296             : 
    2297     1151130 :   tcp_store_err_counters (output, err_counters);
    2298       30293 :   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
    2299       30293 :   vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
    2300       30293 :                                TCP_ERROR_PKTS_SENT, frame->n_vectors);
    2301       30293 :   return frame->n_vectors;
    2302             : }
    2303             : 
    2304       32512 : VLIB_NODE_FN (tcp4_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2305             :                                  vlib_frame_t * from_frame)
    2306             : {
    2307       30276 :   return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
    2308             : }
    2309             : 
    2310        2253 : VLIB_NODE_FN (tcp6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2311             :                                  vlib_frame_t * from_frame)
    2312             : {
    2313          17 :   return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
    2314             : }
    2315             : 
    2316             : /* *INDENT-OFF* */
    2317      178120 : VLIB_REGISTER_NODE (tcp4_output_node) =
    2318             : {
    2319             :   .name = "tcp4-output",
    2320             :   /* Takes a vector of packets. */
    2321             :   .vector_size = sizeof (u32),
    2322             :   .n_errors = TCP_N_ERROR,
    2323             :   .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
    2324             :   .error_counters = tcp_output_error_counters,
    2325             :   .n_next_nodes = TCP_OUTPUT_N_NEXT,
    2326             :   .next_nodes = {
    2327             : #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
    2328             :     foreach_tcp4_output_next
    2329             : #undef _
    2330             :   },
    2331             :   .format_buffer = format_tcp_header,
    2332             :   .format_trace = format_tcp_tx_trace,
    2333             : };
    2334             : /* *INDENT-ON* */
    2335             : 
    2336             : /* *INDENT-OFF* */
    2337      178120 : VLIB_REGISTER_NODE (tcp6_output_node) =
    2338             : {
    2339             :   .name = "tcp6-output",
    2340             :     /* Takes a vector of packets. */
    2341             :   .vector_size = sizeof (u32),
    2342             :   .n_errors = TCP_N_ERROR,
    2343             :   .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
    2344             :   .error_counters = tcp_output_error_counters,
    2345             :   .n_next_nodes = TCP_OUTPUT_N_NEXT,
    2346             :   .next_nodes = {
    2347             : #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
    2348             :     foreach_tcp6_output_next
    2349             : #undef _
    2350             :   },
    2351             :   .format_buffer = format_tcp_header,
    2352             :   .format_trace = format_tcp_tx_trace,
    2353             : };
    2354             : /* *INDENT-ON* */
    2355             : 
    2356             : typedef enum _tcp_reset_next
    2357             : {
    2358             :   TCP_RESET_NEXT_DROP,
    2359             :   TCP_RESET_NEXT_IP_LOOKUP,
    2360             :   TCP_RESET_N_NEXT
    2361             : } tcp_reset_next_t;
    2362             : 
    2363             : #define foreach_tcp4_reset_next         \
    2364             :   _(DROP, "error-drop")                 \
    2365             :   _(IP_LOOKUP, "ip4-lookup")
    2366             : 
    2367             : #define foreach_tcp6_reset_next         \
    2368             :   _(DROP, "error-drop")                 \
    2369             :   _(IP_LOOKUP, "ip6-lookup")
    2370             : 
    2371             : static void
    2372           0 : tcp_reset_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
    2373             :                        vlib_buffer_t **bs, u32 n_bufs, u8 is_ip4)
    2374             : {
    2375             :   tcp_header_t *tcp;
    2376             :   tcp_tx_trace_t *t;
    2377             :   int i;
    2378             : 
    2379           0 :   for (i = 0; i < n_bufs; i++)
    2380             :     {
    2381           0 :       if (bs[i]->flags & VLIB_BUFFER_IS_TRACED)
    2382             :         {
    2383           0 :           tcp = vlib_buffer_get_current (bs[i]);
    2384           0 :           t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
    2385             : 
    2386           0 :           if (is_ip4)
    2387             :             {
    2388           0 :               ip4_header_t *ih4 = vlib_buffer_get_current (bs[i]);
    2389           0 :               tcp = ip4_next_header (ih4);
    2390           0 :               t->tcp_connection.c_lcl_ip.ip4 = ih4->dst_address;
    2391           0 :               t->tcp_connection.c_rmt_ip.ip4 = ih4->src_address;
    2392           0 :               t->tcp_connection.c_is_ip4 = 1;
    2393             :             }
    2394             :           else
    2395             :             {
    2396           0 :               ip6_header_t *ih6 = vlib_buffer_get_current (bs[i]);
    2397           0 :               tcp = ip6_next_header (ih6);
    2398           0 :               t->tcp_connection.c_lcl_ip.ip6 = ih6->dst_address;
    2399           0 :               t->tcp_connection.c_rmt_ip.ip6 = ih6->src_address;
    2400             :             }
    2401           0 :           t->tcp_connection.c_lcl_port = tcp->dst_port;
    2402           0 :           t->tcp_connection.c_rmt_port = tcp->src_port;
    2403           0 :           t->tcp_connection.c_proto = TRANSPORT_PROTO_TCP;
    2404           0 :           clib_memcpy_fast (&t->tcp_header, tcp, sizeof (t->tcp_header));
    2405             :         }
    2406             :     }
    2407           0 : }
    2408             : 
    2409             : static uword
    2410           2 : tcp46_reset_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
    2411             :                     vlib_frame_t *frame, u8 is_ip4)
    2412             : {
    2413             :   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
    2414             :   u16 nexts[VLIB_FRAME_SIZE], *next;
    2415             :   u32 n_left_from, *from;
    2416             : 
    2417           2 :   from = vlib_frame_vector_args (frame);
    2418           2 :   n_left_from = frame->n_vectors;
    2419           2 :   vlib_get_buffers (vm, from, bufs, n_left_from);
    2420             : 
    2421           2 :   b = bufs;
    2422           2 :   next = nexts;
    2423             : 
    2424          88 :   while (n_left_from > 0)
    2425             :     {
    2426          86 :       tcp_buffer_make_reset (vm, b[0], is_ip4);
    2427             : 
    2428             :       /* IP lookup in fib where it was received. Previous value
    2429             :        * was overwritten by tcp-input */
    2430          86 :       vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
    2431          86 :         vec_elt (ip4_main.fib_index_by_sw_if_index,
    2432             :                  vnet_buffer (b[0])->sw_if_index[VLIB_RX]);
    2433             : 
    2434          86 :       b[0]->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
    2435          86 :       next[0] = TCP_RESET_NEXT_IP_LOOKUP;
    2436             : 
    2437          86 :       b += 1;
    2438          86 :       next += 1;
    2439          86 :       n_left_from -= 1;
    2440             :     }
    2441             : 
    2442           2 :   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
    2443           0 :     tcp_reset_trace_frame (vm, node, bufs, frame->n_vectors, is_ip4);
    2444             : 
    2445           2 :   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
    2446             : 
    2447           2 :   vlib_node_increment_counter (vm, node->node_index, TCP_ERROR_RST_SENT,
    2448           2 :                                frame->n_vectors);
    2449             : 
    2450           2 :   return frame->n_vectors;
    2451             : }
    2452             : 
    2453        2238 : VLIB_NODE_FN (tcp4_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2454             :                                 vlib_frame_t * from_frame)
    2455             : {
    2456           2 :   return tcp46_reset_inline (vm, node, from_frame, 1);
    2457             : }
    2458             : 
    2459        2236 : VLIB_NODE_FN (tcp6_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2460             :                                 vlib_frame_t * from_frame)
    2461             : {
    2462           0 :   return tcp46_reset_inline (vm, node, from_frame, 0);
    2463             : }
    2464             : 
    2465             : /* *INDENT-OFF* */
    2466      178120 : VLIB_REGISTER_NODE (tcp4_reset_node) = {
    2467             :   .name = "tcp4-reset",
    2468             :   .vector_size = sizeof (u32),
    2469             :   .n_errors = TCP_N_ERROR,
    2470             :   .error_counters = tcp_output_error_counters,
    2471             :   .n_next_nodes = TCP_RESET_N_NEXT,
    2472             :   .next_nodes = {
    2473             : #define _(s,n) [TCP_RESET_NEXT_##s] = n,
    2474             :     foreach_tcp4_reset_next
    2475             : #undef _
    2476             :   },
    2477             :   .format_trace = format_tcp_tx_trace,
    2478             : };
    2479             : /* *INDENT-ON* */
    2480             : 
    2481             : /* *INDENT-OFF* */
    2482      178120 : VLIB_REGISTER_NODE (tcp6_reset_node) = {
    2483             :   .name = "tcp6-reset",
    2484             :   .vector_size = sizeof (u32),
    2485             :   .n_errors = TCP_N_ERROR,
    2486             :   .error_counters = tcp_output_error_counters,
    2487             :   .n_next_nodes = TCP_RESET_N_NEXT,
    2488             :   .next_nodes = {
    2489             : #define _(s,n) [TCP_RESET_NEXT_##s] = n,
    2490             :     foreach_tcp6_reset_next
    2491             : #undef _
    2492             :   },
    2493             :   .format_trace = format_tcp_tx_trace,
    2494             : };
    2495             : /* *INDENT-ON* */
    2496             : 
    2497             : /*
    2498             :  * fd.io coding-style-patch-verification: ON
    2499             :  *
    2500             :  * Local Variables:
    2501             :  * eval: (c-set-style "gnu")
    2502             :  * End:
    2503             :  */

Generated by: LCOV version 1.14