LCOV - code coverage report
Current view: top level - vnet/tcp - tcp_input.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 826 1390 59.4 %
Date: 2023-07-05 22:20:52 Functions: 138 200 69.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
       3             :  * Licensed under the Apache License, Version 2.0 (the "License");
       4             :  * you may not use this file except in compliance with the License.
       5             :  * You may obtain a copy of the License at:
       6             :  *
       7             :  *     http://www.apache.org/licenses/LICENSE-2.0
       8             :  *
       9             :  * Unless required by applicable law or agreed to in writing, software
      10             :  * distributed under the License is distributed on an "AS IS" BASIS,
      11             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             :  * See the License for the specific language governing permissions and
      13             :  * limitations under the License.
      14             :  */
      15             : 
      16             : #include <vppinfra/sparse_vec.h>
      17             : #include <vnet/fib/ip4_fib.h>
      18             : #include <vnet/fib/ip6_fib.h>
      19             : #include <vnet/tcp/tcp.h>
      20             : #include <vnet/tcp/tcp_inlines.h>
      21             : #include <vnet/session/session.h>
      22             : #include <math.h>
      23             : 
      24             : static vlib_error_desc_t tcp_input_error_counters[] = {
      25             : #define tcp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
      26             : #include <vnet/tcp/tcp_error.def>
      27             : #undef tcp_error
      28             : };
      29             : 
      30             : typedef enum _tcp_input_next
      31             : {
      32             :   TCP_INPUT_NEXT_DROP,
      33             :   TCP_INPUT_NEXT_LISTEN,
      34             :   TCP_INPUT_NEXT_RCV_PROCESS,
      35             :   TCP_INPUT_NEXT_SYN_SENT,
      36             :   TCP_INPUT_NEXT_ESTABLISHED,
      37             :   TCP_INPUT_NEXT_RESET,
      38             :   TCP_INPUT_NEXT_PUNT,
      39             :   TCP_INPUT_N_NEXT
      40             : } tcp_input_next_t;
      41             : 
      42             : /**
      43             :  * Validate segment sequence number. As per RFC793:
      44             :  *
      45             :  * Segment Receive Test
      46             :  *      Length  Window
      47             :  *      ------- -------  -------------------------------------------
      48             :  *      0       0       SEG.SEQ = RCV.NXT
      49             :  *      0       >0      RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
      50             :  *      >0      0       not acceptable
      51             :  *      >0      >0      RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
      52             :  *                      or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
      53             :  *
      54             :  * This ultimately consists in checking if segment falls within the window.
      55             :  * The one important difference compared to RFC793 is that we use rcv_las,
      56             :  * or the rcv_nxt at last ack sent instead of rcv_nxt since that's the
      57             :  * peer's reference when computing our receive window.
      58             :  *
      59             :  * This:
      60             :  *  seq_leq (end_seq, tc->rcv_las + tc->rcv_wnd) && seq_geq (seq, tc->rcv_las)
      61             :  * however, is too strict when we have retransmits. Instead we just check that
      62             :  * the seq is not beyond the right edge and that the end of the segment is not
      63             :  * less than the left edge.
      64             :  *
      65             :  * N.B. rcv_nxt and rcv_wnd are both updated in this node if acks are sent, so
      66             :  * use rcv_nxt in the right edge window test instead of rcv_las.
      67             :  *
      68             :  */
      69             : always_inline u8
      70     1020440 : tcp_segment_in_rcv_wnd (tcp_connection_t * tc, u32 seq, u32 end_seq)
      71             : {
      72     1020440 :   return (seq_geq (end_seq, tc->rcv_las)
      73     1020440 :           && seq_leq (seq, tc->rcv_nxt + tc->rcv_wnd));
      74             : }
      75             : 
      76             : /**
      77             :  * RFC1323: Check against wrapped sequence numbers (PAWS). If we have
      78             :  * timestamp to echo and it's less than tsval_recent, drop segment
      79             :  * but still send an ACK in order to retain TCP's mechanism for detecting
      80             :  * and recovering from half-open connections
      81             :  *
      82             :  * Or at least that's what the theory says. It seems that this might not work
      83             :  * very well with packet reordering and fast retransmit. XXX
      84             :  */
      85             : always_inline int
      86     1020440 : tcp_segment_check_paws (tcp_connection_t * tc)
      87             : {
      88     1020440 :   return tcp_opts_tstamp (&tc->rcv_opts)
      89     1020440 :     && timestamp_lt (tc->rcv_opts.tsval, tc->tsval_recent);
      90             : }
      91             : 
      92             : /**
      93             :  * Update tsval recent
      94             :  */
      95             : always_inline void
      96     1020440 : tcp_update_timestamp (tcp_connection_t * tc, u32 seq, u32 seq_end)
      97             : {
      98             :   /*
      99             :    * RFC1323: If Last.ACK.sent falls within the range of sequence numbers
     100             :    * of an incoming segment:
     101             :    *    SEG.SEQ <= Last.ACK.sent < SEG.SEQ + SEG.LEN
     102             :    * then the TSval from the segment is copied to TS.Recent;
     103             :    * otherwise, the TSval is ignored.
     104             :    */
     105     1020440 :   if (tcp_opts_tstamp (&tc->rcv_opts) && seq_leq (seq, tc->rcv_las)
     106       65095 :       && seq_leq (tc->rcv_las, seq_end))
     107             :     {
     108       65095 :       ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval));
     109       65095 :       tc->tsval_recent = tc->rcv_opts.tsval;
     110       65095 :       tc->tsval_recent_age = tcp_time_tstamp (tc->c_thread_index);
     111             :     }
     112     1020440 : }
     113             : 
     114             : static void
     115           4 : tcp_handle_rst (tcp_connection_t * tc)
     116             : {
     117           4 :   switch (tc->rst_state)
     118             :     {
     119           0 :     case TCP_STATE_SYN_RCVD:
     120             :       /* Cleanup everything. App wasn't notified yet */
     121           0 :       session_transport_delete_notify (&tc->connection);
     122           0 :       tcp_connection_cleanup (tc);
     123           0 :       break;
     124           0 :     case TCP_STATE_SYN_SENT:
     125           0 :       session_stream_connect_notify (&tc->connection, SESSION_E_REFUSED);
     126           0 :       tcp_connection_cleanup (tc);
     127           0 :       break;
     128           4 :     case TCP_STATE_ESTABLISHED:
     129           4 :       session_transport_reset_notify (&tc->connection);
     130           4 :       session_transport_closed_notify (&tc->connection);
     131           4 :       break;
     132           0 :     case TCP_STATE_CLOSE_WAIT:
     133             :     case TCP_STATE_FIN_WAIT_1:
     134             :     case TCP_STATE_FIN_WAIT_2:
     135             :     case TCP_STATE_CLOSING:
     136             :     case TCP_STATE_LAST_ACK:
     137           0 :       session_transport_closed_notify (&tc->connection);
     138           0 :       break;
     139           0 :     case TCP_STATE_CLOSED:
     140             :     case TCP_STATE_TIME_WAIT:
     141           0 :       break;
     142           4 :     default:
     143             :       TCP_DBG ("reset state: %u", tc->state);
     144             :     }
     145           4 : }
     146             : 
     147             : static void
     148           4 : tcp_program_reset_ntf (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
     149             : {
     150           4 :   if (!tcp_disconnect_pending (tc))
     151             :     {
     152           4 :       tc->rst_state = tc->state;
     153           4 :       vec_add1 (wrk->pending_resets, tc->c_c_index);
     154           4 :       tcp_disconnect_pending_on (tc);
     155             :     }
     156           4 : }
     157             : 
     158             : /**
     159             :  * Handle reset packet
     160             :  *
     161             :  * Programs disconnect/reset notification that should be sent
     162             :  * later by calling @ref tcp_handle_disconnects
     163             :  */
     164             : static void
     165           4 : tcp_rcv_rst (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
     166             : {
     167             :   TCP_EVT (TCP_EVT_RST_RCVD, tc);
     168           4 :   switch (tc->state)
     169             :     {
     170           0 :     case TCP_STATE_SYN_RCVD:
     171           0 :       tcp_program_reset_ntf (wrk, tc);
     172           0 :       tcp_connection_set_state (tc, TCP_STATE_CLOSED);
     173           0 :       break;
     174           0 :     case TCP_STATE_SYN_SENT:
     175             :       /* Do not program ntf because the connection is half-open */
     176           0 :       tc->rst_state = tc->state;
     177           0 :       tcp_handle_rst (tc);
     178           0 :       break;
     179           4 :     case TCP_STATE_ESTABLISHED:
     180           4 :       tcp_connection_timers_reset (tc);
     181           4 :       tcp_cong_recovery_off (tc);
     182           4 :       tcp_program_reset_ntf (wrk, tc);
     183           4 :       tcp_connection_set_state (tc, TCP_STATE_CLOSED);
     184           4 :       tcp_program_cleanup (wrk, tc);
     185           4 :       break;
     186           0 :     case TCP_STATE_CLOSE_WAIT:
     187             :     case TCP_STATE_FIN_WAIT_1:
     188             :     case TCP_STATE_FIN_WAIT_2:
     189             :     case TCP_STATE_CLOSING:
     190             :     case TCP_STATE_LAST_ACK:
     191           0 :       tcp_connection_timers_reset (tc);
     192           0 :       tcp_cong_recovery_off (tc);
     193           0 :       tcp_program_reset_ntf (wrk, tc);
     194             :       /* Make sure we mark the session as closed. In some states we may
     195             :        * be still trying to send data */
     196           0 :       tcp_connection_set_state (tc, TCP_STATE_CLOSED);
     197           0 :       tcp_program_cleanup (wrk, tc);
     198           0 :       break;
     199           0 :     case TCP_STATE_CLOSED:
     200             :     case TCP_STATE_TIME_WAIT:
     201           0 :       break;
     202           4 :     default:
     203             :       TCP_DBG ("reset state: %u", tc->state);
     204             :     }
     205           4 : }
     206             : 
     207             : /**
     208             :  * Validate incoming segment as per RFC793 p. 69 and RFC1323 p. 19
     209             :  *
     210             :  * It first verifies if segment has a wrapped sequence number (PAWS) and then
     211             :  * does the processing associated to the first four steps (ignoring security
     212             :  * and precedence): sequence number, rst bit and syn bit checks.
     213             :  *
     214             :  * @return 0 if segments passes validation.
     215             :  */
     216             : static int
     217     1020440 : tcp_segment_validate (tcp_worker_ctx_t * wrk, tcp_connection_t * tc0,
     218             :                       vlib_buffer_t * b0, tcp_header_t * th0, u32 * error0)
     219             : {
     220             :   /* We could get a burst of RSTs interleaved with acks */
     221     1020440 :   if (PREDICT_FALSE (tc0->state == TCP_STATE_CLOSED))
     222             :     {
     223           2 :       tcp_send_reset (tc0);
     224           2 :       *error0 = TCP_ERROR_CONNECTION_CLOSED;
     225           2 :       goto error;
     226             :     }
     227             : 
     228     1020440 :   if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0)))
     229             :     {
     230           0 :       *error0 = TCP_ERROR_SEGMENT_INVALID;
     231           0 :       goto error;
     232             :     }
     233             : 
     234     1020440 :   if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->rcv_opts, 0)))
     235             :     {
     236           0 :       *error0 = TCP_ERROR_OPTIONS;
     237           0 :       goto error;
     238             :     }
     239             : 
     240     1020440 :   if (PREDICT_FALSE (tcp_segment_check_paws (tc0)))
     241             :     {
     242           0 :       *error0 = TCP_ERROR_PAWS;
     243             :       TCP_EVT (TCP_EVT_PAWS_FAIL, tc0, vnet_buffer (b0)->tcp.seq_number,
     244             :                vnet_buffer (b0)->tcp.seq_end);
     245             : 
     246             :       /* If it just so happens that a segment updates tsval_recent for a
     247             :        * segment over 24 days old, invalidate tsval_recent. */
     248           0 :       if (timestamp_lt (tc0->tsval_recent_age + TCP_PAWS_IDLE,
     249             :                         tcp_time_tstamp (tc0->c_thread_index)))
     250             :         {
     251           0 :           tc0->tsval_recent = tc0->rcv_opts.tsval;
     252           0 :           clib_warning ("paws failed: 24-day old segment");
     253             :         }
     254             :       /* Drop after ack if not rst. Resets can fail paws check as per
     255             :        * RFC 7323 sec. 5.2: When an <RST> segment is received, it MUST NOT
     256             :        * be subjected to the PAWS check by verifying an acceptable value in
     257             :        * SEG.TSval */
     258           0 :       else if (!tcp_rst (th0))
     259             :         {
     260           0 :           tcp_program_ack (tc0);
     261             :           TCP_EVT (TCP_EVT_DUPACK_SENT, tc0, vnet_buffer (b0)->tcp);
     262           0 :           goto error;
     263             :         }
     264             :     }
     265             : 
     266             :   /* 1st: check sequence number */
     267     1020440 :   if (!tcp_segment_in_rcv_wnd (tc0, vnet_buffer (b0)->tcp.seq_number,
     268     1020440 :                                vnet_buffer (b0)->tcp.seq_end))
     269             :     {
     270             :       /* SYN/SYN-ACK retransmit */
     271           0 :       if (tcp_syn (th0)
     272           0 :           && vnet_buffer (b0)->tcp.seq_number == tc0->rcv_nxt - 1)
     273             :         {
     274           0 :           tcp_options_parse (th0, &tc0->rcv_opts, 1);
     275           0 :           if (tc0->state == TCP_STATE_SYN_RCVD)
     276             :             {
     277           0 :               tcp_send_synack (tc0);
     278             :               TCP_EVT (TCP_EVT_SYN_RCVD, tc0, 0);
     279           0 :               *error0 = TCP_ERROR_SYNS_RCVD;
     280             :             }
     281             :           else
     282             :             {
     283           0 :               tcp_program_ack (tc0);
     284             :               TCP_EVT (TCP_EVT_SYNACK_RCVD, tc0);
     285           0 :               *error0 = TCP_ERROR_SYN_ACKS_RCVD;
     286             :             }
     287           0 :           goto error;
     288             :         }
     289             : 
     290             :       /* If our window is 0 and the packet is in sequence, let it pass
     291             :        * through for ack processing. It should be dropped later. */
     292           0 :       if (tc0->rcv_wnd < tc0->snd_mss
     293           0 :           && tc0->rcv_nxt == vnet_buffer (b0)->tcp.seq_number)
     294           0 :         goto check_reset;
     295             : 
     296             :       /* If we entered recovery and peer did so as well, there's a chance that
     297             :        * dup acks won't be acceptable on either end because seq_end may be less
     298             :        * than rcv_las. This can happen if acks are lost in both directions. */
     299           0 :       if (tcp_in_recovery (tc0)
     300           0 :           && seq_geq (vnet_buffer (b0)->tcp.seq_number,
     301             :                       tc0->rcv_las - tc0->rcv_wnd)
     302           0 :           && seq_leq (vnet_buffer (b0)->tcp.seq_end,
     303             :                       tc0->rcv_nxt + tc0->rcv_wnd))
     304           0 :         goto check_reset;
     305             : 
     306           0 :       *error0 = TCP_ERROR_RCV_WND;
     307             : 
     308             :       /* If we advertised a zero rcv_wnd and the segment is in the past or the
     309             :        * next one that we expect, it is probably a window probe */
     310           0 :       if ((tc0->flags & TCP_CONN_ZERO_RWND_SENT)
     311           0 :           && seq_lt (vnet_buffer (b0)->tcp.seq_end,
     312             :                      tc0->rcv_las + tc0->rcv_opts.mss))
     313           0 :         *error0 = TCP_ERROR_ZERO_RWND;
     314             : 
     315           0 :       tc0->errors.below_data_wnd += seq_lt (vnet_buffer (b0)->tcp.seq_end,
     316             :                                             tc0->rcv_las);
     317             : 
     318             :       /* If not RST, send dup ack */
     319           0 :       if (!tcp_rst (th0))
     320             :         {
     321           0 :           tcp_program_dupack (tc0);
     322             :           TCP_EVT (TCP_EVT_DUPACK_SENT, tc0, vnet_buffer (b0)->tcp);
     323             :         }
     324           0 :       goto error;
     325             : 
     326     1020440 :     check_reset:
     327             :       ;
     328             :     }
     329             : 
     330             :   /* 2nd: check the RST bit */
     331     1020440 :   if (PREDICT_FALSE (tcp_rst (th0)))
     332             :     {
     333           4 :       tcp_rcv_rst (wrk, tc0);
     334           4 :       *error0 = TCP_ERROR_RST_RCVD;
     335           4 :       goto error;
     336             :     }
     337             : 
     338             :   /* 3rd: check security and precedence (skip) */
     339             : 
     340             :   /* 4th: check the SYN bit (in window) */
     341     1020440 :   if (PREDICT_FALSE (tcp_syn (th0)))
     342             :     {
     343             :       /* As per RFC5961 send challenge ack instead of reset */
     344           0 :       tcp_program_ack (tc0);
     345           0 :       *error0 = TCP_ERROR_SPURIOUS_SYN;
     346           0 :       goto error;
     347             :     }
     348             : 
     349             :   /* If segment in window, save timestamp */
     350     1020440 :   tcp_update_timestamp (tc0, vnet_buffer (b0)->tcp.seq_number,
     351     1020440 :                         vnet_buffer (b0)->tcp.seq_end);
     352     1020440 :   return 0;
     353             : 
     354           6 : error:
     355           6 :   return -1;
     356             : }
     357             : 
     358             : always_inline int
     359         286 : tcp_rcv_ack_no_cc (tcp_connection_t * tc, vlib_buffer_t * b, u32 * error)
     360             : {
     361             :   /* SND.UNA =< SEG.ACK =< SND.NXT */
     362         286 :   if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number)
     363         286 :         && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
     364             :     {
     365           0 :       if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)
     366           0 :           && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
     367             :         {
     368           0 :           tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
     369           0 :           goto acceptable;
     370             :         }
     371           0 :       *error = TCP_ERROR_ACK_INVALID;
     372           0 :       return -1;
     373             :     }
     374             : 
     375         286 : acceptable:
     376         286 :   tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
     377         286 :   tc->snd_una = vnet_buffer (b)->tcp.ack_number;
     378         286 :   *error = TCP_ERROR_ACK_OK;
     379         286 :   return 0;
     380             : }
     381             : 
     382             : /**
     383             :  * Compute smoothed RTT as per VJ's '88 SIGCOMM and RFC6298
     384             :  *
     385             :  * Note that although in the original article srtt and rttvar are scaled
     386             :  * to minimize round-off errors, here we don't. Instead, we rely on
     387             :  * better precision time measurements.
     388             :  *
     389             :  * A known limitation of the algorithm is that a drop in rtt results in a
     390             :  * rttvar increase and bigger RTO.
     391             :  *
     392             :  * mrtt must be provided in @ref TCP_TICK multiples, i.e., in us. Note that
     393             :  * timestamps are measured as ms ticks so they must be converted before
     394             :  * calling this function.
     395             :  */
     396             : static void
     397       40932 : tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt)
     398             : {
     399             :   int err, diff;
     400             : 
     401       40932 :   err = mrtt - tc->srtt;
     402       40932 :   tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1);
     403       40932 :   diff = (clib_abs (err) - (int) tc->rttvar) >> 2;
     404       40932 :   tc->rttvar = clib_max ((int) tc->rttvar + diff, 1);
     405       40932 : }
     406             : 
     407             : static inline void
     408       21097 : tcp_estimate_rtt_us (tcp_connection_t * tc, f64 mrtt)
     409             : {
     410       21097 :   tc->mrtt_us = tc->mrtt_us + (mrtt - tc->mrtt_us) * 0.125;
     411       21097 : }
     412             : 
     413             : /**
     414             :  * Update rtt estimate
     415             :  *
     416             :  * We have potentially three sources of rtt measurements:
     417             :  *
     418             :  * TSOPT        difference between current and echoed timestamp. It has ms
     419             :  *              precision and can be computed per ack
     420             :  * ACK timing   one sequence number is tracked per rtt with us (micro second)
     421             :  *              precision.
     422             :  * rate sample  if enabled, all outstanding bytes are tracked with us
     423             :  *              precision. Every ack and sack are a rtt sample
     424             :  *
     425             :  * Middle boxes are known to fiddle with TCP options so we give higher
     426             :  * priority to ACK timing.
     427             :  *
     428             :  * For now, rate sample rtts are only used under congestion.
     429             :  */
     430             : static int
     431       40932 : tcp_update_rtt (tcp_connection_t * tc, tcp_rate_sample_t * rs, u32 ack)
     432             : {
     433       40932 :   u32 mrtt = 0;
     434             : 
     435             :   /* Karn's rule, part 1. Don't use retransmitted segments to estimate
     436             :    * RTT because they're ambiguous. */
     437       40932 :   if (tcp_in_cong_recovery (tc))
     438             :     {
     439             :       /* Accept rtt estimates for samples that have not been retransmitted */
     440           0 :       if (!(tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
     441           0 :           || (rs->flags & TCP_BTS_IS_RXT))
     442           0 :         goto done;
     443           0 :       if (rs->rtt_time)
     444           0 :         tcp_estimate_rtt_us (tc, rs->rtt_time);
     445           0 :       mrtt = rs->rtt_time * THZ;
     446           0 :       goto estimate_rtt;
     447             :     }
     448             : 
     449       40932 :   if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq))
     450       21097 :     {
     451       21097 :       f64 sample = tcp_time_now_us (tc->c_thread_index) - tc->rtt_ts;
     452       21097 :       tcp_estimate_rtt_us (tc, sample);
     453       21097 :       mrtt = clib_max ((u32) (sample * THZ), 1);
     454             :       /* Allow measuring of a new RTT */
     455       21097 :       tc->rtt_ts = 0;
     456             :     }
     457             :   /* As per RFC7323 TSecr can be used for RTTM only if the segment advances
     458             :    * snd_una, i.e., the left side of the send window:
     459             :    * seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
     460       19835 :   else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
     461             :     {
     462       19835 :       mrtt = clib_max (tcp_tstamp (tc) - tc->rcv_opts.tsecr, 1);
     463       19835 :       mrtt *= TCP_TSTP_TO_HZ;
     464             :     }
     465             : 
     466           0 : estimate_rtt:
     467             : 
     468             :   /* Ignore dubious measurements */
     469       40932 :   if (mrtt == 0 || mrtt > TCP_RTT_MAX)
     470           0 :     goto done;
     471             : 
     472       40932 :   tcp_estimate_rtt (tc, mrtt);
     473             : 
     474       40932 : done:
     475             : 
     476             :   /* If we got here something must've been ACKed so make sure boff is 0,
     477             :    * even if mrtt is not valid since we update the rto lower */
     478       40932 :   tc->rto_boff = 0;
     479       40932 :   tcp_update_rto (tc);
     480             : 
     481       40932 :   return 0;
     482             : }
     483             : 
     484             : static void
     485         264 : tcp_estimate_initial_rtt (tcp_connection_t * tc)
     486             : {
     487         264 :   u8 thread_index = vlib_num_workers ()? 1 : 0;
     488             :   int mrtt;
     489             : 
     490         264 :   if (tc->rtt_ts)
     491             :     {
     492         264 :       tc->mrtt_us = tcp_time_now_us (thread_index) - tc->rtt_ts;
     493         264 :       tc->mrtt_us = clib_max (tc->mrtt_us, 0.0001);
     494         264 :       mrtt = clib_max ((u32) (tc->mrtt_us * THZ), 1);
     495         264 :       tc->rtt_ts = 0;
     496             :     }
     497             :   else
     498             :     {
     499           0 :       mrtt = tcp_tstamp (tc) - tc->rcv_opts.tsecr;
     500           0 :       mrtt = clib_max (mrtt, 1) * TCP_TSTP_TO_HZ;
     501             :       /* Due to retransmits we don't know the initial mrtt */
     502           0 :       if (tc->rto_boff && mrtt > 1 * THZ)
     503           0 :         mrtt = 1 * THZ;
     504           0 :       tc->mrtt_us = (f64) mrtt *TCP_TICK;
     505             :     }
     506             : 
     507         264 :   if (mrtt > 0 && mrtt < TCP_RTT_MAX)
     508             :     {
     509             :       /* First measurement as per RFC 6298 */
     510         264 :       tc->srtt = mrtt;
     511         264 :       tc->rttvar = mrtt >> 1;
     512             :     }
     513         264 :   tcp_update_rto (tc);
     514         264 : }
     515             : 
     516             : /**
     517             :  * Dequeue bytes for connections that have received acks in last burst
     518             :  */
     519             : static void
     520       30221 : tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk)
     521             : {
     522       30221 :   u32 thread_index = wrk->vm->thread_index;
     523             :   u32 *pending_deq_acked;
     524             :   tcp_connection_t *tc;
     525             :   int i;
     526             : 
     527       30221 :   if (!vec_len (wrk->pending_deq_acked))
     528         212 :     return;
     529             : 
     530       30009 :   pending_deq_acked = wrk->pending_deq_acked;
     531       70941 :   for (i = 0; i < vec_len (pending_deq_acked); i++)
     532             :     {
     533       40932 :       tc = tcp_connection_get (pending_deq_acked[i], thread_index);
     534       40932 :       tc->flags &= ~TCP_CONN_DEQ_PENDING;
     535             : 
     536       40932 :       if (PREDICT_FALSE (!tc->burst_acked))
     537         127 :         continue;
     538             : 
     539             :       /* Dequeue the newly ACKed bytes */
     540       40805 :       session_tx_fifo_dequeue_drop (&tc->connection, tc->burst_acked);
     541       40805 :       tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
     542             : 
     543       40805 :       if (tcp_is_descheduled (tc))
     544           1 :         tcp_reschedule (tc);
     545             : 
     546             :       /* If everything has been acked, stop retransmit timer
     547             :        * otherwise update. */
     548       40805 :       tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
     549             : 
     550             :       /* Update pacer based on our new cwnd estimate */
     551       40805 :       tcp_connection_tx_pacer_update (tc);
     552             : 
     553       40805 :       tc->burst_acked = 0;
     554             :     }
     555       30009 :   vec_set_len (wrk->pending_deq_acked, 0);
     556             : }
     557             : 
     558             : static void
     559       40932 : tcp_program_dequeue (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
     560             : {
     561       40932 :   if (!(tc->flags & TCP_CONN_DEQ_PENDING))
     562             :     {
     563       40932 :       vec_add1 (wrk->pending_deq_acked, tc->c_c_index);
     564       40932 :       tc->flags |= TCP_CONN_DEQ_PENDING;
     565             :     }
     566       40932 :   tc->burst_acked += tc->bytes_acked;
     567       40932 : }
     568             : 
     569             : /**
     570             :  * Try to update snd_wnd based on feedback received from peer.
     571             :  *
     572             :  * If successful, and new window is 'effectively' 0, activate persist
     573             :  * timer.
     574             :  */
     575             : static void
     576     1020150 : tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd)
     577             : {
     578             :   /* If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and SND.WL2 =< SEG.ACK)), set
     579             :    * SND.WND <- SEG.WND, set SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK */
     580     1020150 :   if (seq_lt (tc->snd_wl1, seq)
     581       24079 :       || (tc->snd_wl1 == seq && seq_leq (tc->snd_wl2, ack)))
     582             :     {
     583     1020150 :       tc->snd_wnd = snd_wnd;
     584     1020150 :       tc->snd_wl1 = seq;
     585     1020150 :       tc->snd_wl2 = ack;
     586             :       TCP_EVT (TCP_EVT_SND_WND, tc);
     587             : 
     588     1020150 :       if (PREDICT_FALSE (tc->snd_wnd < tc->snd_mss))
     589             :         {
     590           0 :           if (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT))
     591             :             {
     592           0 :               tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     593             : 
     594             :               /* Set persist timer if we just got 0 wnd. If already set,
     595             :                * update it because some data sent with snd_wnd < snd_mss was
     596             :                * acked. */
     597           0 :               if (tcp_timer_is_active (tc, TCP_TIMER_PERSIST))
     598           0 :                 tcp_persist_timer_reset (&wrk->timer_wheel, tc);
     599           0 :               tcp_persist_timer_set (&wrk->timer_wheel, tc);
     600             :             }
     601             :         }
     602             :       else
     603             :         {
     604     1020150 :           if (PREDICT_FALSE (tcp_timer_is_active (tc, TCP_TIMER_PERSIST)))
     605             :             {
     606           0 :               tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
     607           0 :               tcp_persist_timer_reset (&wrk->timer_wheel, tc);
     608             :             }
     609             : 
     610     1020150 :           if (PREDICT_FALSE (tcp_is_descheduled (tc)))
     611       28889 :             tcp_reschedule (tc);
     612             : 
     613     1020150 :           if (PREDICT_FALSE (!tcp_in_recovery (tc) && tc->rto_boff > 0))
     614             :             {
     615           0 :               tc->rto_boff = 0;
     616           0 :               tcp_update_rto (tc);
     617             :             }
     618             :         }
     619             :     }
     620     1020150 : }
     621             : 
     622             : /**
     623             :  * Init loss recovery/fast recovery.
     624             :  *
     625             :  * Triggered by dup acks as opposed to timer timeout. Note that cwnd is
     626             :  * updated in @ref tcp_cc_handle_event after fast retransmit
     627             :  */
     628             : static void
     629           0 : tcp_cc_init_congestion (tcp_connection_t * tc)
     630             : {
     631           0 :   tcp_fastrecovery_on (tc);
     632           0 :   tc->snd_congestion = tc->snd_nxt;
     633           0 :   tc->cwnd_acc_bytes = 0;
     634           0 :   tc->snd_rxt_bytes = 0;
     635           0 :   tc->rxt_delivered = 0;
     636           0 :   tc->prr_delivered = 0;
     637           0 :   tc->prr_start = tc->snd_una;
     638           0 :   tc->prev_ssthresh = tc->ssthresh;
     639           0 :   tc->prev_cwnd = tc->cwnd;
     640             : 
     641           0 :   tc->snd_rxt_ts = tcp_tstamp (tc);
     642           0 :   tcp_cc_congestion (tc);
     643             : 
     644             :   /* Post retransmit update cwnd to ssthresh and account for the
     645             :    * three segments that have left the network and should've been
     646             :    * buffered at the receiver XXX */
     647           0 :   if (!tcp_opts_sack_permitted (&tc->rcv_opts))
     648           0 :     tc->cwnd += TCP_DUPACK_THRESHOLD * tc->snd_mss;
     649             : 
     650           0 :   tc->fr_occurences += 1;
     651             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 4);
     652           0 : }
     653             : 
     654             : static void
     655           0 : tcp_cc_congestion_undo (tcp_connection_t * tc)
     656             : {
     657           0 :   tc->cwnd = tc->prev_cwnd;
     658           0 :   tc->ssthresh = tc->prev_ssthresh;
     659           0 :   tcp_cc_undo_recovery (tc);
     660           0 :   ASSERT (tc->rto_boff == 0);
     661             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 5);
     662           0 : }
     663             : 
     664             : static inline u8
     665           0 : tcp_cc_is_spurious_timeout_rxt (tcp_connection_t * tc)
     666             : {
     667           0 :   return (tcp_in_recovery (tc) && tc->rto_boff == 1
     668           0 :           && tc->snd_rxt_ts
     669           0 :           && tcp_opts_tstamp (&tc->rcv_opts)
     670           0 :           && timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts));
     671             : }
     672             : 
     673             : static inline u8
     674           0 : tcp_cc_is_spurious_retransmit (tcp_connection_t * tc)
     675             : {
     676           0 :   return (tcp_cc_is_spurious_timeout_rxt (tc));
     677             : }
     678             : 
     679             : static inline u8
     680           0 : tcp_should_fastrecover (tcp_connection_t * tc, u8 has_sack)
     681             : {
     682           0 :   if (!has_sack)
     683             :     {
     684             :       /* If of of the two conditions lower hold, reset dupacks because
     685             :        * we're probably after timeout (RFC6582 heuristics).
     686             :        * If Cumulative ack does not cover more than congestion threshold,
     687             :        * and:
     688             :        * 1) The following doesn't hold: The congestion window is greater
     689             :        *    than SMSS bytes and the difference between highest_ack
     690             :        *    and prev_highest_ack is at most 4*SMSS bytes
     691             :        * 2) Echoed timestamp in the last non-dup ack does not equal the
     692             :        *    stored timestamp
     693             :        */
     694           0 :       if (seq_leq (tc->snd_una, tc->snd_congestion)
     695           0 :           && ((!(tc->cwnd > tc->snd_mss
     696           0 :                  && tc->bytes_acked <= 4 * tc->snd_mss))
     697           0 :               || (tc->rcv_opts.tsecr != tc->tsecr_last_ack)))
     698             :         {
     699           0 :           tc->rcv_dupacks = 0;
     700           0 :           return 0;
     701             :         }
     702             :     }
     703           0 :   return tc->sack_sb.lost_bytes || tc->rcv_dupacks >= tc->sack_sb.reorder;
     704             : }
     705             : 
     706             : static int
     707           0 : tcp_cc_try_recover (tcp_connection_t *tc)
     708             : {
     709             :   sack_scoreboard_hole_t *hole;
     710           0 :   u8 is_spurious = 0;
     711             : 
     712           0 :   ASSERT (tcp_in_cong_recovery (tc));
     713             : 
     714           0 :   if (tcp_cc_is_spurious_retransmit (tc))
     715             :     {
     716           0 :       tcp_cc_congestion_undo (tc);
     717           0 :       is_spurious = 1;
     718             :     }
     719             : 
     720           0 :   tcp_connection_tx_pacer_reset (tc, tc->cwnd, 0 /* start bucket */ );
     721           0 :   tc->rcv_dupacks = 0;
     722           0 :   tcp_recovery_off (tc);
     723             : 
     724             :   /* Previous recovery left us congested. Continue sending as part
     725             :    * of the current recovery event with an updated snd_congestion */
     726           0 :   if (tc->sack_sb.sacked_bytes && tcp_in_fastrecovery (tc))
     727             :     {
     728           0 :       tc->snd_congestion = tc->snd_nxt;
     729           0 :       return -1;
     730             :     }
     731             : 
     732           0 :   tc->rxt_delivered = 0;
     733           0 :   tc->snd_rxt_bytes = 0;
     734           0 :   tc->snd_rxt_ts = 0;
     735           0 :   tc->prr_delivered = 0;
     736           0 :   tc->rtt_ts = 0;
     737           0 :   tc->flags &= ~TCP_CONN_RXT_PENDING;
     738             : 
     739           0 :   hole = scoreboard_first_hole (&tc->sack_sb);
     740           0 :   if (hole && hole->start == tc->snd_una && hole->end == tc->snd_nxt)
     741           0 :     scoreboard_clear (&tc->sack_sb);
     742             : 
     743           0 :   if (tcp_in_fastrecovery (tc) && !is_spurious)
     744           0 :     tcp_cc_recovered (tc);
     745             : 
     746           0 :   tcp_fastrecovery_off (tc);
     747           0 :   tcp_fastrecovery_first_off (tc);
     748             :   TCP_EVT (TCP_EVT_CC_EVT, tc, 3);
     749             : 
     750           0 :   ASSERT (tc->rto_boff == 0);
     751           0 :   ASSERT (!tcp_in_cong_recovery (tc));
     752           0 :   ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
     753             : 
     754           0 :   return 0;
     755             : }
     756             : 
     757             : static void
     758     1020150 : tcp_cc_update (tcp_connection_t * tc, tcp_rate_sample_t * rs)
     759             : {
     760     1020150 :   ASSERT (!tcp_in_cong_recovery (tc) || tcp_is_lost_fin (tc));
     761             : 
     762             :   /* Congestion avoidance */
     763     1020150 :   tcp_cc_rcv_ack (tc, rs);
     764             : 
     765             :   /* If a cumulative ack, make sure dupacks is 0 */
     766     1020150 :   tc->rcv_dupacks = 0;
     767     1020150 : }
     768             : 
     769             : /**
     770             :  * One function to rule them all ... and in the darkness bind them
     771             :  */
     772             : static void
     773           1 : tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs,
     774             :                      u32 is_dack)
     775             : {
     776           1 :   u8 has_sack = tcp_opts_sack_permitted (&tc->rcv_opts);
     777             : 
     778             :   /* If reneging, wait for timer based retransmits */
     779           1 :   if (PREDICT_FALSE (tcp_is_lost_fin (tc) || tc->sack_sb.is_reneging))
     780           1 :     return;
     781             : 
     782             :   /*
     783             :    * If not in recovery, figure out if we should enter
     784             :    */
     785           0 :   if (!tcp_in_cong_recovery (tc))
     786             :     {
     787           0 :       ASSERT (is_dack);
     788             : 
     789           0 :       tc->rcv_dupacks++;
     790             :       TCP_EVT (TCP_EVT_DUPACK_RCVD, tc, 1);
     791           0 :       tcp_cc_rcv_cong_ack (tc, TCP_CC_DUPACK, rs);
     792             : 
     793           0 :       if (tcp_should_fastrecover (tc, has_sack))
     794             :         {
     795           0 :           tcp_cc_init_congestion (tc);
     796             : 
     797           0 :           if (has_sack)
     798           0 :             scoreboard_init_rxt (&tc->sack_sb, tc->snd_una);
     799             : 
     800           0 :           tcp_connection_tx_pacer_reset (tc, tc->cwnd, 0 /* start bucket */ );
     801           0 :           tcp_program_retransmit (tc);
     802             :         }
     803             : 
     804           0 :       return;
     805             :     }
     806             : 
     807             :   /*
     808             :    * Already in recovery
     809             :    */
     810             : 
     811             :   /*
     812             :    * See if we can exit and stop retransmitting
     813             :    */
     814           0 :   if (seq_geq (tc->snd_una, tc->snd_congestion))
     815             :     {
     816             :       /* If successfully recovered, treat ack as congestion avoidance ack
     817             :        * and return. Otherwise, we're still congested so process feedback */
     818           0 :       if (!tcp_cc_try_recover (tc))
     819             :         {
     820           0 :           tcp_cc_rcv_ack (tc, rs);
     821           0 :           return;
     822             :         }
     823             :     }
     824             : 
     825             :   /*
     826             :    * Process (re)transmit feedback. Output path uses this to decide how much
     827             :    * more data to release into the network
     828             :    */
     829           0 :   if (has_sack)
     830             :     {
     831           0 :       if (!tc->bytes_acked && tc->sack_sb.rxt_sacked)
     832           0 :         tcp_fastrecovery_first_on (tc);
     833             : 
     834           0 :       tc->rxt_delivered += tc->sack_sb.rxt_sacked;
     835           0 :       tc->prr_delivered += rs->delivered;
     836             :     }
     837             :   else
     838             :     {
     839           0 :       if (is_dack)
     840             :         {
     841           0 :           tc->rcv_dupacks += 1;
     842             :           TCP_EVT (TCP_EVT_DUPACK_RCVD, tc, 1);
     843             :         }
     844           0 :       tc->rxt_delivered = clib_min (tc->rxt_delivered + tc->bytes_acked,
     845             :                                     tc->snd_rxt_bytes);
     846           0 :       if (is_dack)
     847           0 :         tc->prr_delivered += clib_min (tc->snd_mss,
     848             :                                        tc->snd_nxt - tc->snd_una);
     849             :       else
     850           0 :         tc->prr_delivered += tc->bytes_acked - clib_min (tc->bytes_acked,
     851             :                                                          tc->snd_mss *
     852             :                                                          tc->rcv_dupacks);
     853             : 
     854             :       /* If partial ack, assume that the first un-acked segment was lost */
     855           0 :       if (tc->bytes_acked || tc->rcv_dupacks == TCP_DUPACK_THRESHOLD)
     856           0 :         tcp_fastrecovery_first_on (tc);
     857             :     }
     858             : 
     859           0 :   tcp_program_retransmit (tc);
     860             : 
     861             :   /*
     862             :    * Notify cc of the event
     863             :    */
     864             : 
     865           0 :   if (!tc->bytes_acked)
     866             :     {
     867           0 :       tcp_cc_rcv_cong_ack (tc, TCP_CC_DUPACK, rs);
     868           0 :       return;
     869             :     }
     870             : 
     871             :   /* RFC6675: If the incoming ACK is a cumulative acknowledgment,
     872             :    * reset dupacks to 0. Also needed if in congestion recovery */
     873           0 :   tc->rcv_dupacks = 0;
     874             : 
     875           0 :   if (tcp_in_recovery (tc))
     876           0 :     tcp_cc_rcv_ack (tc, rs);
     877             :   else
     878           0 :     tcp_cc_rcv_cong_ack (tc, TCP_CC_PARTIALACK, rs);
     879             : }
     880             : 
     881             : static void
     882           0 : tcp_handle_old_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
     883             : {
     884           0 :   if (!tcp_in_cong_recovery (tc))
     885           0 :     return;
     886             : 
     887           0 :   if (tcp_opts_sack_permitted (&tc->rcv_opts))
     888           0 :     tcp_rcv_sacks (tc, tc->snd_una);
     889             : 
     890           0 :   tc->bytes_acked = 0;
     891             : 
     892           0 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
     893           0 :     tcp_bt_sample_delivery_rate (tc, rs);
     894             : 
     895           0 :   tcp_cc_handle_event (tc, rs, 1);
     896             : }
     897             : 
     898             : /**
     899             :  * Check if duplicate ack as per RFC5681 Sec. 2
     900             :  */
     901             : always_inline u8
     902     1020150 : tcp_ack_is_dupack (tcp_connection_t * tc, vlib_buffer_t * b, u32 prev_snd_wnd,
     903             :                    u32 prev_snd_una)
     904             : {
     905     1020150 :   return ((vnet_buffer (b)->tcp.ack_number == prev_snd_una)
     906      979221 :           && seq_gt (tc->snd_nxt, tc->snd_una)
     907       33701 :           && (vnet_buffer (b)->tcp.seq_end == vnet_buffer (b)->tcp.seq_number)
     908     1999370 :           && (prev_snd_wnd == tc->snd_wnd));
     909             : }
     910             : 
     911             : /**
     912             :  * Checks if ack is a congestion control event.
     913             :  */
     914             : static u8
     915     1020150 : tcp_ack_is_cc_event (tcp_connection_t * tc, vlib_buffer_t * b,
     916             :                      u32 prev_snd_wnd, u32 prev_snd_una, u8 * is_dack)
     917             : {
     918             :   /* Check if ack is duplicate. Per RFC 6675, ACKs that SACK new data are
     919             :    * defined to be 'duplicate' as well */
     920     2040310 :   *is_dack = tc->sack_sb.last_sacked_bytes
     921     1020150 :     || tcp_ack_is_dupack (tc, b, prev_snd_wnd, prev_snd_una);
     922             : 
     923     1020150 :   return (*is_dack || tcp_in_cong_recovery (tc));
     924             : }
     925             : 
     926             : /**
     927             :  * Process incoming ACK
     928             :  */
     929             : static int
     930     1020150 : tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
     931             :              tcp_header_t * th, u32 * error)
     932             : {
     933             :   u32 prev_snd_wnd, prev_snd_una;
     934     1020150 :   tcp_rate_sample_t rs = { 0 };
     935             :   u8 is_dack;
     936             : 
     937             :   TCP_EVT (TCP_EVT_CC_STAT, tc);
     938             : 
     939             :   /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) */
     940     1020150 :   if (PREDICT_FALSE (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
     941             :     {
     942             :       /* We've probably entered recovery and the peer still has some
     943             :        * of the data we've sent. Update snd_nxt and accept the ack */
     944           0 :       if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)
     945           0 :           && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
     946             :         {
     947           0 :           tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
     948           0 :           goto process_ack;
     949             :         }
     950             : 
     951           0 :       tc->errors.above_ack_wnd += 1;
     952           0 :       *error = TCP_ERROR_ACK_FUTURE;
     953             :       TCP_EVT (TCP_EVT_ACK_RCV_ERR, tc, 0, vnet_buffer (b)->tcp.ack_number);
     954           0 :       return -1;
     955             :     }
     956             : 
     957             :   /* If old ACK, probably it's an old dupack */
     958     1020150 :   if (PREDICT_FALSE (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una)))
     959             :     {
     960           0 :       tc->errors.below_ack_wnd += 1;
     961           0 :       *error = TCP_ERROR_ACK_OLD;
     962             :       TCP_EVT (TCP_EVT_ACK_RCV_ERR, tc, 1, vnet_buffer (b)->tcp.ack_number);
     963             : 
     964           0 :       if (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una - tc->rcv_wnd))
     965           0 :         return -1;
     966             : 
     967           0 :       tcp_handle_old_ack (tc, &rs);
     968             : 
     969             :       /* Don't drop yet */
     970           0 :       return 0;
     971             :     }
     972             : 
     973     1020150 : process_ack:
     974             : 
     975             :   /*
     976             :    * Looks okay, process feedback
     977             :    */
     978             : 
     979     1020150 :   if (tcp_opts_sack_permitted (&tc->rcv_opts))
     980     1020150 :     tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number);
     981             : 
     982     1020150 :   prev_snd_wnd = tc->snd_wnd;
     983     1020150 :   prev_snd_una = tc->snd_una;
     984     1020150 :   tcp_update_snd_wnd (tc, vnet_buffer (b)->tcp.seq_number,
     985     1020150 :                       vnet_buffer (b)->tcp.ack_number,
     986     1020150 :                       clib_net_to_host_u16 (th->window) << tc->snd_wscale);
     987     1020150 :   tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
     988     1020150 :   tc->snd_una = vnet_buffer (b)->tcp.ack_number;
     989     1020150 :   tcp_validate_txf_size (tc, tc->bytes_acked);
     990             : 
     991     1020150 :   if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
     992           0 :     tcp_bt_sample_delivery_rate (tc, &rs);
     993             :   else
     994     1020150 :     rs.delivered = tc->bytes_acked + tc->sack_sb.last_sacked_bytes -
     995     1020150 :                    tc->sack_sb.last_bytes_delivered;
     996             : 
     997     1020150 :   if (tc->bytes_acked + tc->sack_sb.last_sacked_bytes)
     998             :     {
     999       40932 :       tcp_update_rtt (tc, &rs, vnet_buffer (b)->tcp.ack_number);
    1000       40932 :       if (tc->bytes_acked)
    1001       40932 :         tcp_program_dequeue (wrk, tc);
    1002             :     }
    1003             : 
    1004             :   TCP_EVT (TCP_EVT_ACK_RCVD, tc);
    1005             : 
    1006             :   /*
    1007             :    * Check if we have congestion event
    1008             :    */
    1009             : 
    1010     1020150 :   if (tcp_ack_is_cc_event (tc, b, prev_snd_wnd, prev_snd_una, &is_dack))
    1011             :     {
    1012           1 :       tcp_cc_handle_event (tc, &rs, is_dack);
    1013           1 :       tc->dupacks_in += is_dack;
    1014           1 :       if (!tcp_in_cong_recovery (tc))
    1015             :         {
    1016           1 :           *error = TCP_ERROR_ACK_OK;
    1017           1 :           return 0;
    1018             :         }
    1019           0 :       *error = TCP_ERROR_ACK_DUP;
    1020           0 :       if (vnet_buffer (b)->tcp.data_len || tcp_is_fin (th))
    1021           0 :         return 0;
    1022           0 :       return -1;
    1023             :     }
    1024             : 
    1025             :   /*
    1026             :    * Update congestion control (slow start/congestion avoidance)
    1027             :    */
    1028     1020150 :   tcp_cc_update (tc, &rs);
    1029     1020150 :   *error = TCP_ERROR_ACK_OK;
    1030     1020150 :   return 0;
    1031             : }
    1032             : 
    1033             : static void
    1034         127 : tcp_program_disconnect (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
    1035             : {
    1036         127 :   if (!tcp_disconnect_pending (tc))
    1037             :     {
    1038         127 :       vec_add1 (wrk->pending_disconnects, tc->c_c_index);
    1039         127 :       tcp_disconnect_pending_on (tc);
    1040             :     }
    1041         127 : }
    1042             : 
    1043             : static void
    1044       30254 : tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
    1045             : {
    1046             :   u32 thread_index, *pending_disconnects, *pending_resets;
    1047             :   tcp_connection_t *tc;
    1048             :   int i;
    1049             : 
    1050       30254 :   if (vec_len (wrk->pending_disconnects))
    1051             :     {
    1052          23 :       thread_index = wrk->vm->thread_index;
    1053          23 :       pending_disconnects = wrk->pending_disconnects;
    1054         150 :       for (i = 0; i < vec_len (pending_disconnects); i++)
    1055             :         {
    1056         127 :           tc = tcp_connection_get (pending_disconnects[i], thread_index);
    1057         127 :           tcp_disconnect_pending_off (tc);
    1058         127 :           session_transport_closing_notify (&tc->connection);
    1059             :         }
    1060          23 :       vec_set_len (wrk->pending_disconnects, 0);
    1061             :     }
    1062             : 
    1063       30254 :   if (vec_len (wrk->pending_resets))
    1064             :     {
    1065           4 :       thread_index = wrk->vm->thread_index;
    1066           4 :       pending_resets = wrk->pending_resets;
    1067           8 :       for (i = 0; i < vec_len (pending_resets); i++)
    1068             :         {
    1069           4 :           tc = tcp_connection_get (pending_resets[i], thread_index);
    1070           4 :           tcp_disconnect_pending_off (tc);
    1071           4 :           tcp_handle_rst (tc);
    1072             :         }
    1073           4 :       vec_set_len (wrk->pending_resets, 0);
    1074             :     }
    1075       30254 : }
    1076             : 
    1077             : static void
    1078         127 : tcp_rcv_fin (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
    1079             :              u32 * error)
    1080             : {
    1081             :   /* Reject out-of-order fins */
    1082         127 :   if (vnet_buffer (b)->tcp.seq_end != tc->rcv_nxt)
    1083           0 :     return;
    1084             : 
    1085             :   /* Account for the FIN and send ack */
    1086         127 :   tc->rcv_nxt += 1;
    1087         127 :   tc->flags |= TCP_CONN_FINRCVD;
    1088         127 :   tcp_program_ack (tc);
    1089             :   /* Enter CLOSE-WAIT and notify session. To avoid lingering
    1090             :    * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
    1091         127 :   tcp_connection_set_state (tc, TCP_STATE_CLOSE_WAIT);
    1092         127 :   tcp_program_disconnect (wrk, tc);
    1093         127 :   tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    1094             :                     tcp_cfg.closewait_time);
    1095             :   TCP_EVT (TCP_EVT_FIN_RCVD, tc);
    1096         127 :   *error = TCP_ERROR_FIN_RCVD;
    1097             : }
    1098             : 
    1099             : /** Enqueue data for delivery to application */
    1100             : static int
    1101      995946 : tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
    1102             :                           u16 data_len)
    1103             : {
    1104      995946 :   int written, error = TCP_ERROR_ENQUEUED;
    1105             : 
    1106      995946 :   ASSERT (seq_geq (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt));
    1107      995946 :   ASSERT (data_len);
    1108      995946 :   written = session_enqueue_stream_connection (&tc->connection, b, 0,
    1109             :                                                1 /* queue event */ , 1);
    1110             : 
    1111             :   TCP_EVT (TCP_EVT_INPUT, tc, 0, data_len, written);
    1112             : 
    1113             :   /* Update rcv_nxt */
    1114      995946 :   if (PREDICT_TRUE (written == data_len))
    1115             :     {
    1116      995946 :       tc->rcv_nxt += written;
    1117      995946 :       tc->bytes_in += written;
    1118             :     }
    1119             :   /* If more data written than expected, account for out-of-order bytes. */
    1120           0 :   else if (written > data_len)
    1121             :     {
    1122           0 :       tc->rcv_nxt += written;
    1123           0 :       tc->bytes_in += data_len;
    1124             :       TCP_EVT (TCP_EVT_CC_INPUT, tc, data_len, written);
    1125             :     }
    1126           0 :   else if (written > 0)
    1127             :     {
    1128             :       /* We've written something but FIFO is probably full now */
    1129           0 :       tc->rcv_nxt += written;
    1130           0 :       tc->bytes_in += written;
    1131           0 :       error = TCP_ERROR_PARTIALLY_ENQUEUED;
    1132             :     }
    1133             :   else
    1134             :     {
    1135             :       /* Packet made it through for ack processing */
    1136           0 :       if (tc->rcv_wnd < tc->snd_mss)
    1137           0 :         return TCP_ERROR_ZERO_RWND;
    1138             : 
    1139           0 :       return TCP_ERROR_FIFO_FULL;
    1140             :     }
    1141             : 
    1142             :   /* Update SACK list if need be */
    1143      995946 :   if (tcp_opts_sack_permitted (&tc->rcv_opts) && vec_len (tc->snd_sacks))
    1144             :     {
    1145             :       /* Remove SACK blocks that have been delivered */
    1146           0 :       tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt);
    1147             :     }
    1148             : 
    1149      995946 :   return error;
    1150             : }
    1151             : 
    1152             : /** Enqueue out-of-order data */
    1153             : static int
    1154           0 : tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
    1155             :                          u16 data_len)
    1156             : {
    1157             :   session_t *s0;
    1158             :   int rv, offset;
    1159             : 
    1160           0 :   ASSERT (seq_gt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt));
    1161           0 :   ASSERT (data_len);
    1162             : 
    1163             :   /* Enqueue out-of-order data with relative offset */
    1164           0 :   rv = session_enqueue_stream_connection (&tc->connection, b,
    1165           0 :                                           vnet_buffer (b)->tcp.seq_number -
    1166           0 :                                           tc->rcv_nxt, 0 /* queue event */ ,
    1167             :                                           0);
    1168             : 
    1169             :   /* Nothing written */
    1170           0 :   if (rv)
    1171             :     {
    1172             :       TCP_EVT (TCP_EVT_INPUT, tc, 1, data_len, 0);
    1173           0 :       return TCP_ERROR_FIFO_FULL;
    1174             :     }
    1175             : 
    1176             :   TCP_EVT (TCP_EVT_INPUT, tc, 1, data_len, data_len);
    1177           0 :   tc->bytes_in += data_len;
    1178             : 
    1179             :   /* Update SACK list if in use */
    1180           0 :   if (tcp_opts_sack_permitted (&tc->rcv_opts))
    1181             :     {
    1182             :       ooo_segment_t *newest;
    1183             :       u32 start, end;
    1184             : 
    1185           0 :       s0 = session_get (tc->c_s_index, tc->c_thread_index);
    1186             : 
    1187             :       /* Get the newest segment from the fifo */
    1188           0 :       newest = svm_fifo_newest_ooo_segment (s0->rx_fifo);
    1189           0 :       if (newest)
    1190             :         {
    1191           0 :           offset = ooo_segment_offset_prod (s0->rx_fifo, newest);
    1192           0 :           ASSERT (offset <= vnet_buffer (b)->tcp.seq_number - tc->rcv_nxt);
    1193           0 :           start = tc->rcv_nxt + offset;
    1194           0 :           end = start + ooo_segment_length (s0->rx_fifo, newest);
    1195           0 :           tcp_update_sack_list (tc, start, end);
    1196           0 :           svm_fifo_newest_ooo_segment_reset (s0->rx_fifo);
    1197             :           TCP_EVT (TCP_EVT_CC_SACKS, tc);
    1198             :         }
    1199             :     }
    1200             : 
    1201           0 :   return TCP_ERROR_ENQUEUED_OOO;
    1202             : }
    1203             : 
    1204             : static int
    1205           0 : tcp_buffer_discard_bytes (vlib_buffer_t * b, u32 n_bytes_to_drop)
    1206             : {
    1207           0 :   u32 discard, first = b->current_length;
    1208           0 :   vlib_main_t *vm = vlib_get_main ();
    1209             : 
    1210             :   /* Handle multi-buffer segments */
    1211           0 :   if (n_bytes_to_drop > b->current_length)
    1212             :     {
    1213           0 :       if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
    1214           0 :         return -1;
    1215             :       do
    1216             :         {
    1217           0 :           discard = clib_min (n_bytes_to_drop, b->current_length);
    1218           0 :           vlib_buffer_advance (b, discard);
    1219           0 :           b = vlib_get_buffer (vm, b->next_buffer);
    1220           0 :           n_bytes_to_drop -= discard;
    1221             :         }
    1222           0 :       while (n_bytes_to_drop);
    1223           0 :       if (n_bytes_to_drop > first)
    1224           0 :         b->total_length_not_including_first_buffer -= n_bytes_to_drop - first;
    1225             :     }
    1226             :   else
    1227           0 :     vlib_buffer_advance (b, n_bytes_to_drop);
    1228           0 :   vnet_buffer (b)->tcp.data_len -= n_bytes_to_drop;
    1229           0 :   return 0;
    1230             : }
    1231             : 
    1232             : /**
    1233             :  * Receive buffer for connection and handle acks
    1234             :  *
    1235             :  * It handles both in order or out-of-order data.
    1236             :  */
    1237             : static int
    1238      995946 : tcp_segment_rcv (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
    1239             :                  vlib_buffer_t * b)
    1240             : {
    1241             :   u32 error, n_bytes_to_drop, n_data_bytes;
    1242             : 
    1243      995946 :   vlib_buffer_advance (b, vnet_buffer (b)->tcp.data_offset);
    1244      995946 :   n_data_bytes = vnet_buffer (b)->tcp.data_len;
    1245      995946 :   ASSERT (n_data_bytes);
    1246      995946 :   tc->data_segs_in += 1;
    1247             : 
    1248             :   /* Make sure we don't consume trailing bytes */
    1249      995946 :   if (PREDICT_FALSE (b->current_length > n_data_bytes))
    1250           0 :     b->current_length = n_data_bytes;
    1251             : 
    1252             :   /* Handle out-of-order data */
    1253      995946 :   if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt))
    1254             :     {
    1255             :       /* Old sequence numbers allowed through because they overlapped
    1256             :        * the rx window */
    1257           0 :       if (seq_lt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt))
    1258             :         {
    1259             :           /* Completely in the past (possible retransmit). Ack
    1260             :            * retransmissions since we may not have any data to send */
    1261           0 :           if (seq_leq (vnet_buffer (b)->tcp.seq_end, tc->rcv_nxt))
    1262             :             {
    1263           0 :               tcp_program_dupack (tc);
    1264           0 :               tc->errors.below_data_wnd++;
    1265           0 :               error = TCP_ERROR_SEGMENT_OLD;
    1266           0 :               goto done;
    1267             :             }
    1268             : 
    1269             :           /* Chop off the bytes in the past and see if what is left
    1270             :            * can be enqueued in order */
    1271           0 :           n_bytes_to_drop = tc->rcv_nxt - vnet_buffer (b)->tcp.seq_number;
    1272           0 :           n_data_bytes -= n_bytes_to_drop;
    1273           0 :           vnet_buffer (b)->tcp.seq_number = tc->rcv_nxt;
    1274           0 :           if (tcp_buffer_discard_bytes (b, n_bytes_to_drop))
    1275             :             {
    1276           0 :               error = TCP_ERROR_SEGMENT_OLD;
    1277           0 :               goto done;
    1278             :             }
    1279           0 :           goto in_order;
    1280             :         }
    1281             : 
    1282             :       /* RFC2581: Enqueue and send DUPACK for fast retransmit */
    1283           0 :       error = tcp_session_enqueue_ooo (tc, b, n_data_bytes);
    1284           0 :       tcp_program_dupack (tc);
    1285             :       TCP_EVT (TCP_EVT_DUPACK_SENT, tc, vnet_buffer (b)->tcp);
    1286           0 :       tc->errors.above_data_wnd += seq_gt (vnet_buffer (b)->tcp.seq_end,
    1287             :                                            tc->rcv_las + tc->rcv_wnd);
    1288           0 :       goto done;
    1289             :     }
    1290             : 
    1291      995946 : in_order:
    1292             : 
    1293             :   /* In order data, enqueue. Fifo figures out by itself if any out-of-order
    1294             :    * segments can be enqueued after fifo tail offset changes. */
    1295      995946 :   error = tcp_session_enqueue_data (tc, b, n_data_bytes);
    1296      995946 :   tcp_program_ack (tc);
    1297             : 
    1298      995946 : done:
    1299      995946 :   return error;
    1300             : }
    1301             : 
    1302             : typedef struct
    1303             : {
    1304             :   tcp_header_t tcp_header;
    1305             :   tcp_connection_t tcp_connection;
    1306             : } tcp_rx_trace_t;
    1307             : 
    1308             : static u8 *
    1309           6 : format_tcp_rx_trace (u8 * s, va_list * args)
    1310             : {
    1311           6 :   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
    1312           6 :   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
    1313           6 :   tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
    1314           6 :   tcp_connection_t *tc = &t->tcp_connection;
    1315           6 :   u32 indent = format_get_indent (s);
    1316             : 
    1317           6 :   if (!tc->c_lcl_port)
    1318           0 :     s = format (s, "no tcp connection\n%U%U", format_white_space, indent,
    1319             :                 format_tcp_header, &t->tcp_header, 128);
    1320             :   else
    1321           6 :     s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
    1322           6 :                 format_tcp_state, tc->state, format_white_space, indent,
    1323             :                 format_tcp_header, &t->tcp_header, 128);
    1324             : 
    1325           6 :   return s;
    1326             : }
    1327             : 
    1328             : static u8 *
    1329           6 : format_tcp_rx_trace_short (u8 * s, va_list * args)
    1330             : {
    1331           6 :   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
    1332           6 :   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
    1333           6 :   tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
    1334             : 
    1335           6 :   s = format (s, "%d -> %d (%U)",
    1336           6 :               clib_net_to_host_u16 (t->tcp_header.dst_port),
    1337           6 :               clib_net_to_host_u16 (t->tcp_header.src_port), format_tcp_state,
    1338           6 :               t->tcp_connection.state);
    1339             : 
    1340           6 :   return s;
    1341             : }
    1342             : 
    1343             : static void
    1344           6 : tcp_set_rx_trace_data (tcp_rx_trace_t * t0, tcp_connection_t * tc0,
    1345             :                        tcp_header_t * th0, vlib_buffer_t * b0, u8 is_ip4)
    1346             : {
    1347           6 :   if (tc0)
    1348             :     {
    1349           6 :       clib_memcpy_fast (&t0->tcp_connection, tc0,
    1350             :                         sizeof (t0->tcp_connection));
    1351             :     }
    1352             :   else
    1353             :     {
    1354           0 :       th0 = tcp_buffer_hdr (b0);
    1355             :     }
    1356           6 :   clib_memcpy_fast (&t0->tcp_header, th0, sizeof (t0->tcp_header));
    1357           6 : }
    1358             : 
    1359             : static void
    1360           0 : tcp_established_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
    1361             :                              vlib_frame_t * frame, u8 is_ip4)
    1362             : {
    1363             :   u32 *from, n_left;
    1364             : 
    1365           0 :   n_left = frame->n_vectors;
    1366           0 :   from = vlib_frame_vector_args (frame);
    1367             : 
    1368           0 :   while (n_left >= 1)
    1369             :     {
    1370             :       tcp_connection_t *tc0;
    1371             :       tcp_rx_trace_t *t0;
    1372             :       tcp_header_t *th0;
    1373             :       vlib_buffer_t *b0;
    1374             :       u32 bi0;
    1375             : 
    1376           0 :       bi0 = from[0];
    1377           0 :       b0 = vlib_get_buffer (vm, bi0);
    1378             : 
    1379           0 :       if (b0->flags & VLIB_BUFFER_IS_TRACED)
    1380             :         {
    1381           0 :           t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
    1382           0 :           tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
    1383             :                                     vm->thread_index);
    1384           0 :           th0 = tcp_buffer_hdr (b0);
    1385           0 :           tcp_set_rx_trace_data (t0, tc0, th0, b0, is_ip4);
    1386             :         }
    1387             : 
    1388           0 :       from += 1;
    1389           0 :       n_left -= 1;
    1390             :     }
    1391           0 : }
    1392             : 
    1393             : always_inline uword
    1394       30121 : tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
    1395             :                           vlib_frame_t * frame, int is_ip4)
    1396             : {
    1397       30121 :   u32 thread_index = vm->thread_index, errors = 0;
    1398       30121 :   tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
    1399             :   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
    1400       30121 :   u16 err_counters[TCP_N_ERROR] = { 0 };
    1401             :   u32 n_left_from, *from;
    1402             : 
    1403       30121 :   if (node->flags & VLIB_NODE_FLAG_TRACE)
    1404           0 :     tcp_established_trace_frame (vm, node, frame, is_ip4);
    1405             : 
    1406       30121 :   from = vlib_frame_vector_args (frame);
    1407       30121 :   n_left_from = frame->n_vectors;
    1408             : 
    1409       30121 :   vlib_get_buffers (vm, from, bufs, n_left_from);
    1410       30121 :   b = bufs;
    1411             : 
    1412     1050120 :   while (n_left_from > 0)
    1413             :     {
    1414     1020000 :       u32 error = TCP_ERROR_ACK_OK;
    1415             :       tcp_connection_t *tc;
    1416             :       tcp_header_t *th;
    1417             : 
    1418     1020000 :       if (n_left_from > 1)
    1419             :         {
    1420      989874 :           vlib_prefetch_buffer_header (b[1], LOAD);
    1421      989874 :           CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
    1422             :         }
    1423             : 
    1424     1020000 :       tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
    1425             :                                thread_index);
    1426             : 
    1427     1020000 :       if (PREDICT_FALSE (tc == 0))
    1428             :         {
    1429           0 :           error = TCP_ERROR_INVALID_CONNECTION;
    1430           0 :           goto done;
    1431             :         }
    1432             : 
    1433     1020000 :       th = tcp_buffer_hdr (b[0]);
    1434             : 
    1435             :       /* TODO header prediction fast path */
    1436             : 
    1437             :       /* 1-4: check SEQ, RST, SYN */
    1438     1020000 :       if (PREDICT_FALSE (tcp_segment_validate (wrk, tc, b[0], th, &error)))
    1439             :         {
    1440             :           TCP_EVT (TCP_EVT_SEG_INVALID, tc, vnet_buffer (b[0])->tcp);
    1441           6 :           goto done;
    1442             :         }
    1443             : 
    1444             :       /* 5: check the ACK field  */
    1445     1019990 :       if (PREDICT_FALSE (tcp_rcv_ack (wrk, tc, b[0], th, &error)))
    1446           0 :         goto done;
    1447             : 
    1448             :       /* 6: check the URG bit TODO */
    1449             : 
    1450             :       /* 7: process the segment text */
    1451     1019990 :       if (vnet_buffer (b[0])->tcp.data_len)
    1452      995913 :         error = tcp_segment_rcv (wrk, tc, b[0]);
    1453             : 
    1454             :       /* 8: check the FIN bit */
    1455     1019990 :       if (PREDICT_FALSE (tcp_is_fin (th)))
    1456         127 :         tcp_rcv_fin (wrk, tc, b[0], &error);
    1457             : 
    1458     1019860 :     done:
    1459     1020000 :       tcp_inc_err_counter (err_counters, error, 1);
    1460             : 
    1461     1020000 :       n_left_from -= 1;
    1462     1020000 :       b += 1;
    1463             :     }
    1464             : 
    1465       30121 :   errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP,
    1466             :                                               thread_index);
    1467       30121 :   err_counters[TCP_ERROR_MSG_QUEUE_FULL] = errors;
    1468     1144600 :   tcp_store_err_counters (established, err_counters);
    1469       30121 :   tcp_handle_postponed_dequeues (wrk);
    1470       30121 :   tcp_handle_disconnects (wrk);
    1471       30121 :   vlib_buffer_free (vm, from, frame->n_vectors);
    1472             : 
    1473       30121 :   return frame->n_vectors;
    1474             : }
    1475             : 
    1476       32346 : VLIB_NODE_FN (tcp4_established_node) (vlib_main_t * vm,
    1477             :                                       vlib_node_runtime_t * node,
    1478             :                                       vlib_frame_t * from_frame)
    1479             : {
    1480       30110 :   return tcp46_established_inline (vm, node, from_frame, 1 /* is_ip4 */ );
    1481             : }
    1482             : 
    1483        2247 : VLIB_NODE_FN (tcp6_established_node) (vlib_main_t * vm,
    1484             :                                       vlib_node_runtime_t * node,
    1485             :                                       vlib_frame_t * from_frame)
    1486             : {
    1487          11 :   return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ );
    1488             : }
    1489             : 
    1490             : /* *INDENT-OFF* */
    1491      178120 : VLIB_REGISTER_NODE (tcp4_established_node) = {
    1492             :   .name = "tcp4-established",
    1493             :   /* Takes a vector of packets. */
    1494             :   .vector_size = sizeof (u32),
    1495             :   .n_errors = TCP_N_ERROR,
    1496             :   .error_counters = tcp_input_error_counters,
    1497             :   .format_trace = format_tcp_rx_trace_short,
    1498             : };
    1499             : /* *INDENT-ON* */
    1500             : 
    1501             : /* *INDENT-OFF* */
    1502      178120 : VLIB_REGISTER_NODE (tcp6_established_node) = {
    1503             :   .name = "tcp6-established",
    1504             :   /* Takes a vector of packets. */
    1505             :   .vector_size = sizeof (u32),
    1506             :   .n_errors = TCP_N_ERROR,
    1507             :   .error_counters = tcp_input_error_counters,
    1508             :   .format_trace = format_tcp_rx_trace_short,
    1509             : };
    1510             : /* *INDENT-ON* */
    1511             : 
    1512             : 
    1513             : static u8
    1514     1021470 : tcp_lookup_is_valid (tcp_connection_t * tc, vlib_buffer_t * b,
    1515             :                      tcp_header_t * hdr)
    1516             : {
    1517     1021470 :   transport_connection_t *tmp = 0;
    1518             :   u64 handle;
    1519             : 
    1520     1021470 :   if (!tc)
    1521           0 :     return 1;
    1522             : 
    1523             :   /* Proxy case */
    1524     1021470 :   if (tc->c_lcl_port == 0 && tc->state == TCP_STATE_LISTEN)
    1525           0 :     return 1;
    1526             : 
    1527     1021470 :   u8 is_ip_valid = 0, val_l, val_r;
    1528             : 
    1529     1021470 :   if (tc->connection.is_ip4)
    1530             :     {
    1531     1021450 :       ip4_header_t *ip4_hdr = (ip4_header_t *) vlib_buffer_get_current (b);
    1532             : 
    1533     1021450 :       val_l = !ip4_address_compare (&ip4_hdr->dst_address,
    1534             :                                     &tc->connection.lcl_ip.ip4);
    1535     1021450 :       val_l = val_l || ip_is_zero (&tc->connection.lcl_ip, 1);
    1536     1021450 :       val_r = !ip4_address_compare (&ip4_hdr->src_address,
    1537             :                                     &tc->connection.rmt_ip.ip4);
    1538     1021450 :       val_r = val_r || tc->state == TCP_STATE_LISTEN;
    1539     1021450 :       is_ip_valid = val_l && val_r;
    1540             :     }
    1541             :   else
    1542             :     {
    1543          22 :       ip6_header_t *ip6_hdr = (ip6_header_t *) vlib_buffer_get_current (b);
    1544             : 
    1545          22 :       val_l = !ip6_address_compare (&ip6_hdr->dst_address,
    1546             :                                     &tc->connection.lcl_ip.ip6);
    1547          22 :       val_l = val_l || ip_is_zero (&tc->connection.lcl_ip, 0);
    1548          22 :       val_r = !ip6_address_compare (&ip6_hdr->src_address,
    1549             :                                     &tc->connection.rmt_ip.ip6);
    1550          22 :       val_r = val_r || tc->state == TCP_STATE_LISTEN;
    1551          22 :       is_ip_valid = val_l && val_r;
    1552             :     }
    1553             : 
    1554     2042940 :   u8 is_valid = (tc->c_lcl_port == hdr->dst_port
    1555     1021470 :                  && (tc->state == TCP_STATE_LISTEN
    1556     2042940 :                      || tc->c_rmt_port == hdr->src_port) && is_ip_valid);
    1557             : 
    1558     1021470 :   if (!is_valid)
    1559             :     {
    1560           0 :       handle = session_lookup_half_open_handle (&tc->connection);
    1561           0 :       tmp = session_lookup_half_open_connection (handle & 0xFFFFFFFF,
    1562           0 :                                                  tc->c_proto, tc->c_is_ip4);
    1563             : 
    1564           0 :       if (tmp)
    1565             :         {
    1566           0 :           if (tmp->lcl_port == hdr->dst_port
    1567           0 :               && tmp->rmt_port == hdr->src_port)
    1568             :             {
    1569             :               TCP_DBG ("half-open is valid!");
    1570           0 :               is_valid = 1;
    1571             :             }
    1572             :         }
    1573             :     }
    1574     1021470 :   return is_valid;
    1575             : }
    1576             : 
    1577             : /**
    1578             :  * Lookup transport connection
    1579             :  */
    1580             : static tcp_connection_t *
    1581         585 : tcp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index,
    1582             :                        u8 is_ip4)
    1583             : {
    1584             :   tcp_header_t *tcp;
    1585             :   transport_connection_t *tconn;
    1586             :   tcp_connection_t *tc;
    1587         585 :   u8 is_filtered = 0;
    1588         585 :   if (is_ip4)
    1589             :     {
    1590             :       ip4_header_t *ip4;
    1591         580 :       ip4 = vlib_buffer_get_current (b);
    1592         580 :       tcp = ip4_next_header (ip4);
    1593         580 :       tconn = session_lookup_connection_wt4 (fib_index,
    1594             :                                              &ip4->dst_address,
    1595             :                                              &ip4->src_address,
    1596         580 :                                              tcp->dst_port,
    1597         580 :                                              tcp->src_port,
    1598             :                                              TRANSPORT_PROTO_TCP,
    1599             :                                              thread_index, &is_filtered);
    1600         580 :       tc = tcp_get_connection_from_transport (tconn);
    1601         580 :       ASSERT (tcp_lookup_is_valid (tc, b, tcp));
    1602             :     }
    1603             :   else
    1604             :     {
    1605             :       ip6_header_t *ip6;
    1606           5 :       ip6 = vlib_buffer_get_current (b);
    1607           5 :       tcp = ip6_next_header (ip6);
    1608           5 :       tconn = session_lookup_connection_wt6 (fib_index,
    1609             :                                              &ip6->dst_address,
    1610             :                                              &ip6->src_address,
    1611           5 :                                              tcp->dst_port,
    1612           5 :                                              tcp->src_port,
    1613             :                                              TRANSPORT_PROTO_TCP,
    1614             :                                              thread_index, &is_filtered);
    1615           5 :       tc = tcp_get_connection_from_transport (tconn);
    1616           5 :       ASSERT (tcp_lookup_is_valid (tc, b, tcp));
    1617             :     }
    1618         585 :   return tc;
    1619             : }
    1620             : 
    1621             : static tcp_connection_t *
    1622           0 : tcp_lookup_listener (vlib_buffer_t * b, u32 fib_index, int is_ip4)
    1623             : {
    1624             :   session_t *s;
    1625             : 
    1626           0 :   if (is_ip4)
    1627             :     {
    1628           0 :       ip4_header_t *ip4 = vlib_buffer_get_current (b);
    1629           0 :       tcp_header_t *tcp = tcp_buffer_hdr (b);
    1630           0 :       s = session_lookup_listener4 (fib_index,
    1631             :                                     &ip4->dst_address,
    1632           0 :                                     tcp->dst_port, TRANSPORT_PROTO_TCP, 1);
    1633             :     }
    1634             :   else
    1635             :     {
    1636           0 :       ip6_header_t *ip6 = vlib_buffer_get_current (b);
    1637           0 :       tcp_header_t *tcp = tcp_buffer_hdr (b);
    1638           0 :       s = session_lookup_listener6 (fib_index,
    1639             :                                     &ip6->dst_address,
    1640           0 :                                     tcp->dst_port, TRANSPORT_PROTO_TCP, 1);
    1641             : 
    1642             :     }
    1643           0 :   if (PREDICT_TRUE (s != 0))
    1644           0 :     return tcp_get_connection_from_transport (transport_get_listener
    1645             :                                               (TRANSPORT_PROTO_TCP,
    1646             :                                                s->connection_index));
    1647             :   else
    1648           0 :     return 0;
    1649             : }
    1650             : 
    1651             : static void
    1652           0 : tcp46_syn_sent_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
    1653             :                             u32 *from, u32 n_bufs)
    1654             : {
    1655           0 :   tcp_connection_t *tc = 0;
    1656             :   tcp_rx_trace_t *t;
    1657             :   vlib_buffer_t *b;
    1658             :   int i;
    1659             : 
    1660           0 :   for (i = 0; i < n_bufs; i++)
    1661             :     {
    1662           0 :       b = vlib_get_buffer (vm, from[i]);
    1663           0 :       if (!(b->flags & VLIB_BUFFER_IS_TRACED))
    1664           0 :         continue;
    1665             :       tc =
    1666           0 :         tcp_half_open_connection_get (vnet_buffer (b)->tcp.connection_index);
    1667           0 :       t = vlib_add_trace (vm, node, b, sizeof (*t));
    1668           0 :       tcp_set_rx_trace_data (t, tc, tcp_buffer_hdr (b), b, 1);
    1669             :     }
    1670           0 : }
    1671             : 
    1672             : always_inline void
    1673           0 : tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4)
    1674             : {
    1675           0 :   vnet_main_t *vnm = vnet_get_main ();
    1676             :   const dpo_id_t *dpo;
    1677             :   const load_balance_t *lb;
    1678             :   vnet_hw_interface_t *hw_if;
    1679             :   u32 sw_if_idx, lb_idx;
    1680             : 
    1681           0 :   if (is_ipv4)
    1682             :     {
    1683           0 :       ip4_address_t *dst_addr = &(tc->c_rmt_ip.ip4);
    1684           0 :       lb_idx = ip4_fib_forwarding_lookup (tc->c_fib_index, dst_addr);
    1685             :     }
    1686             :   else
    1687             :     {
    1688           0 :       ip6_address_t *dst_addr = &(tc->c_rmt_ip.ip6);
    1689           0 :       lb_idx = ip6_fib_table_fwding_lookup (tc->c_fib_index, dst_addr);
    1690             :     }
    1691             : 
    1692           0 :   lb = load_balance_get (lb_idx);
    1693           0 :   if (PREDICT_FALSE (lb->lb_n_buckets > 1))
    1694           0 :     return;
    1695           0 :   dpo = load_balance_get_bucket_i (lb, 0);
    1696             : 
    1697           0 :   sw_if_idx = dpo_get_urpf (dpo);
    1698           0 :   if (PREDICT_FALSE (sw_if_idx == ~0))
    1699           0 :     return;
    1700             : 
    1701           0 :   hw_if = vnet_get_sup_hw_interface (vnm, sw_if_idx);
    1702           0 :   if (hw_if->caps & VNET_HW_IF_CAP_TCP_GSO)
    1703           0 :     tc->cfg_flags |= TCP_CFG_F_TSO;
    1704             : }
    1705             : 
    1706             : static void
    1707           3 : tcp_input_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
    1708             :                        vlib_buffer_t **bs, u16 *nexts, u32 n_bufs, u8 is_ip4)
    1709             : {
    1710             :   tcp_connection_t *tc;
    1711             :   tcp_header_t *tcp;
    1712             :   tcp_rx_trace_t *t;
    1713             :   u8 flags;
    1714             :   int i;
    1715             : 
    1716           6 :   for (i = 0; i < n_bufs; i++)
    1717             :     {
    1718           3 :       if (!(bs[i]->flags & VLIB_BUFFER_IS_TRACED))
    1719           0 :         continue;
    1720             : 
    1721           3 :       t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
    1722           3 :       if (nexts[i] == TCP_INPUT_NEXT_DROP || nexts[i] == TCP_INPUT_NEXT_PUNT ||
    1723           3 :           nexts[i] == TCP_INPUT_NEXT_RESET)
    1724             :         {
    1725           0 :           tc = 0;
    1726             :         }
    1727             :       else
    1728             :         {
    1729           3 :           flags = vnet_buffer (bs[i])->tcp.flags;
    1730             : 
    1731           3 :           if (flags == TCP_STATE_LISTEN)
    1732           3 :             tc = tcp_listener_get (vnet_buffer (bs[i])->tcp.connection_index);
    1733           0 :           else if (flags == TCP_STATE_SYN_SENT)
    1734           0 :             tc = tcp_half_open_connection_get (
    1735           0 :               vnet_buffer (bs[i])->tcp.connection_index);
    1736             :           else
    1737           0 :             tc = tcp_connection_get (vnet_buffer (bs[i])->tcp.connection_index,
    1738             :                                      vm->thread_index);
    1739             :         }
    1740           3 :       tcp = tcp_buffer_hdr (bs[i]);
    1741           3 :       tcp_set_rx_trace_data (t, tc, tcp, bs[i], is_ip4);
    1742             :     }
    1743           3 : }
    1744             : 
    1745             : always_inline uword
    1746          33 : tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
    1747             :                        vlib_frame_t *frame, int is_ip4)
    1748             : {
    1749          33 :   u32 n_left_from, *from, thread_index = vm->thread_index, errors = 0;
    1750          33 :   tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
    1751             :   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
    1752             : 
    1753          33 :   from = vlib_frame_vector_args (frame);
    1754          33 :   n_left_from = frame->n_vectors;
    1755             : 
    1756          33 :   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
    1757           0 :     tcp46_syn_sent_trace_frame (vm, node, from, n_left_from);
    1758             : 
    1759          33 :   vlib_get_buffers (vm, from, bufs, n_left_from);
    1760          33 :   b = bufs;
    1761             : 
    1762         165 :   while (n_left_from > 0)
    1763             :     {
    1764         132 :       u32 ack, seq, error = TCP_ERROR_NONE;
    1765             :       tcp_connection_t *tc, *new_tc;
    1766             :       tcp_header_t *tcp;
    1767             : 
    1768         264 :       tc = tcp_half_open_connection_get (
    1769         132 :         vnet_buffer (b[0])->tcp.connection_index);
    1770         132 :       if (PREDICT_FALSE (tc == 0))
    1771             :         {
    1772           0 :           error = TCP_ERROR_INVALID_CONNECTION;
    1773           0 :           goto drop;
    1774             :         }
    1775             : 
    1776             :       /* Half-open completed or cancelled recently but the connection
    1777             :        * was't removed yet by the owning thread */
    1778         132 :       if (PREDICT_FALSE (tc->flags & TCP_CONN_HALF_OPEN_DONE))
    1779             :         {
    1780           0 :           error = TCP_ERROR_SPURIOUS_SYN_ACK;
    1781           0 :           goto drop;
    1782             :         }
    1783             : 
    1784         132 :       ack = vnet_buffer (b[0])->tcp.ack_number;
    1785         132 :       seq = vnet_buffer (b[0])->tcp.seq_number;
    1786         132 :       tcp = tcp_buffer_hdr (b[0]);
    1787             : 
    1788             :       /* Crude check to see if the connection handle does not match
    1789             :        * the packet. Probably connection just switched to established */
    1790         132 :       if (PREDICT_FALSE (tcp->dst_port != tc->c_lcl_port ||
    1791             :                          tcp->src_port != tc->c_rmt_port))
    1792             :         {
    1793           0 :           error = TCP_ERROR_INVALID_CONNECTION;
    1794           0 :           goto drop;
    1795             :         }
    1796             : 
    1797         132 :       if (PREDICT_FALSE (!tcp_ack (tcp) && !tcp_rst (tcp) && !tcp_syn (tcp)))
    1798             :         {
    1799           0 :           error = TCP_ERROR_SEGMENT_INVALID;
    1800           0 :           goto drop;
    1801             :         }
    1802             : 
    1803             :       /* SYNs consume sequence numbers */
    1804         132 :       vnet_buffer (b[0])->tcp.seq_end += tcp_is_syn (tcp);
    1805             : 
    1806             :       /*
    1807             :        *  1. check the ACK bit
    1808             :        */
    1809             : 
    1810             :       /*
    1811             :        *   If the ACK bit is set
    1812             :        *     If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless
    1813             :        *     the RST bit is set, if so drop the segment and return)
    1814             :        *       <SEQ=SEG.ACK><CTL=RST>
    1815             :        *     and discard the segment.  Return.
    1816             :        *     If SND.UNA =< SEG.ACK =< SND.NXT then the ACK is acceptable.
    1817             :        */
    1818         132 :       if (tcp_ack (tcp))
    1819             :         {
    1820         132 :           if (seq_leq (ack, tc->iss) || seq_gt (ack, tc->snd_nxt))
    1821             :             {
    1822           0 :               if (!tcp_rst (tcp))
    1823           0 :                 tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
    1824           0 :               error = TCP_ERROR_RCV_WND;
    1825           0 :               goto drop;
    1826             :             }
    1827             : 
    1828             :           /* Make sure ACK is valid */
    1829         132 :           if (seq_gt (tc->snd_una, ack))
    1830             :             {
    1831           0 :               error = TCP_ERROR_ACK_INVALID;
    1832           0 :               goto drop;
    1833             :             }
    1834             :         }
    1835             : 
    1836             :       /*
    1837             :        * 2. check the RST bit
    1838             :        */
    1839             : 
    1840         132 :       if (tcp_rst (tcp))
    1841             :         {
    1842             :           /* If ACK is acceptable, signal client that peer is not
    1843             :            * willing to accept connection and drop connection*/
    1844           0 :           if (tcp_ack (tcp))
    1845           0 :             tcp_rcv_rst (wrk, tc);
    1846           0 :           error = TCP_ERROR_RST_RCVD;
    1847           0 :           goto drop;
    1848             :         }
    1849             : 
    1850             :       /*
    1851             :        * 3. check the security and precedence (skipped)
    1852             :        */
    1853             : 
    1854             :       /*
    1855             :        * 4. check the SYN bit
    1856             :        */
    1857             : 
    1858             :       /* No SYN flag. Drop. */
    1859         132 :       if (!tcp_syn (tcp))
    1860             :         {
    1861           0 :           error = TCP_ERROR_SEGMENT_INVALID;
    1862           0 :           goto drop;
    1863             :         }
    1864             : 
    1865             :       /* Parse options */
    1866         132 :       if (tcp_options_parse (tcp, &tc->rcv_opts, 1))
    1867             :         {
    1868           0 :           error = TCP_ERROR_OPTIONS;
    1869           0 :           goto drop;
    1870             :         }
    1871             : 
    1872             :       /* Valid SYN or SYN-ACK. Move connection from half-open pool to
    1873             :        * current thread pool. */
    1874         132 :       new_tc = tcp_connection_alloc_w_base (thread_index, &tc);
    1875         132 :       new_tc->rcv_nxt = vnet_buffer (b[0])->tcp.seq_end;
    1876         132 :       new_tc->irs = seq;
    1877         132 :       new_tc->timers[TCP_TIMER_RETRANSMIT_SYN] = TCP_TIMER_HANDLE_INVALID;
    1878             : 
    1879         132 :       if (tcp_opts_tstamp (&new_tc->rcv_opts))
    1880             :         {
    1881         132 :           new_tc->tsval_recent = new_tc->rcv_opts.tsval;
    1882         132 :           new_tc->tsval_recent_age = tcp_time_tstamp (thread_index);
    1883             :         }
    1884             : 
    1885         132 :       if (tcp_opts_wscale (&new_tc->rcv_opts))
    1886         132 :         new_tc->snd_wscale = new_tc->rcv_opts.wscale;
    1887             :       else
    1888           0 :         new_tc->rcv_wscale = 0;
    1889             : 
    1890         132 :       new_tc->snd_wnd = clib_net_to_host_u16 (tcp->window)
    1891         132 :                         << new_tc->snd_wscale;
    1892         132 :       new_tc->snd_wl1 = seq;
    1893         132 :       new_tc->snd_wl2 = ack;
    1894             : 
    1895         132 :       tcp_connection_init_vars (new_tc);
    1896             : 
    1897             :       /* SYN-ACK: See if we can switch to ESTABLISHED state */
    1898         132 :       if (PREDICT_TRUE (tcp_ack (tcp)))
    1899             :         {
    1900             :           /* Our SYN is ACKed: we have iss < ack = snd_una */
    1901             : 
    1902             :           /* TODO Dequeue acknowledged segments if we support Fast Open */
    1903         132 :           new_tc->snd_una = ack;
    1904         132 :           new_tc->state = TCP_STATE_ESTABLISHED;
    1905             : 
    1906             :           /* Make sure las is initialized for the wnd computation */
    1907         132 :           new_tc->rcv_las = new_tc->rcv_nxt;
    1908             : 
    1909             :           /* Notify app that we have connection. If session layer can't
    1910             :            * allocate session send reset */
    1911         132 :           if (session_stream_connect_notify (&new_tc->connection,
    1912             :                                              SESSION_E_NONE))
    1913             :             {
    1914           0 :               tcp_send_reset_w_pkt (new_tc, b[0], thread_index, is_ip4);
    1915           0 :               tcp_connection_cleanup (new_tc);
    1916           0 :               error = TCP_ERROR_CREATE_SESSION_FAIL;
    1917           0 :               goto cleanup_ho;
    1918             :             }
    1919             : 
    1920         132 :           transport_fifos_init_ooo (&new_tc->connection);
    1921         132 :           new_tc->tx_fifo_size = transport_tx_fifo_size (&new_tc->connection);
    1922             :           /* Update rtt with the syn-ack sample */
    1923         132 :           tcp_estimate_initial_rtt (new_tc);
    1924             :           TCP_EVT (TCP_EVT_SYNACK_RCVD, new_tc);
    1925         132 :           error = TCP_ERROR_SYN_ACKS_RCVD;
    1926             :         }
    1927             :       /* SYN: Simultaneous open. Change state to SYN-RCVD and send SYN-ACK */
    1928             :       else
    1929             :         {
    1930           0 :           new_tc->state = TCP_STATE_SYN_RCVD;
    1931             : 
    1932             :           /* Notify app that we have connection */
    1933           0 :           if (session_stream_connect_notify (&new_tc->connection,
    1934             :                                              SESSION_E_NONE))
    1935             :             {
    1936           0 :               tcp_connection_cleanup (new_tc);
    1937           0 :               tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
    1938             :               TCP_EVT (TCP_EVT_RST_SENT, tc);
    1939           0 :               error = TCP_ERROR_CREATE_SESSION_FAIL;
    1940           0 :               goto cleanup_ho;
    1941             :             }
    1942             : 
    1943           0 :           transport_fifos_init_ooo (&new_tc->connection);
    1944           0 :           new_tc->tx_fifo_size = transport_tx_fifo_size (&new_tc->connection);
    1945           0 :           new_tc->rtt_ts = 0;
    1946           0 :           tcp_init_snd_vars (new_tc);
    1947           0 :           tcp_send_synack (new_tc);
    1948           0 :           error = TCP_ERROR_SYNS_RCVD;
    1949           0 :           goto cleanup_ho;
    1950             :         }
    1951             : 
    1952         132 :       if (!(new_tc->cfg_flags & TCP_CFG_F_NO_TSO))
    1953           0 :         tcp_check_tx_offload (new_tc, is_ip4);
    1954             : 
    1955             :       /* Read data, if any */
    1956         132 :       if (PREDICT_FALSE (vnet_buffer (b[0])->tcp.data_len))
    1957             :         {
    1958           0 :           clib_warning ("rcvd data in syn-sent");
    1959           0 :           error = tcp_segment_rcv (wrk, new_tc, b[0]);
    1960           0 :           if (error == TCP_ERROR_ACK_OK)
    1961           0 :             error = TCP_ERROR_SYN_ACKS_RCVD;
    1962             :         }
    1963             :       else
    1964             :         {
    1965             :           /* Send ack now instead of programming it because connection was
    1966             :            * just established and it's not optional. */
    1967         132 :           tcp_send_ack (new_tc);
    1968             :         }
    1969             : 
    1970         132 :     cleanup_ho:
    1971             : 
    1972             :       /* If this is not the owning thread, wait for syn retransmit to
    1973             :        * expire and cleanup then */
    1974         132 :       if (tcp_half_open_connection_cleanup (tc))
    1975           0 :         tc->flags |= TCP_CONN_HALF_OPEN_DONE;
    1976             : 
    1977         132 :     drop:
    1978             : 
    1979         132 :       b += 1;
    1980         132 :       n_left_from -= 1;
    1981         132 :       tcp_inc_counter (syn_sent, error, 1);
    1982             :     }
    1983             : 
    1984          33 :   errors =
    1985          33 :     session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
    1986          33 :   tcp_inc_counter (syn_sent, TCP_ERROR_MSG_QUEUE_FULL, errors);
    1987          33 :   vlib_buffer_free (vm, from, frame->n_vectors);
    1988          33 :   tcp_handle_disconnects (wrk);
    1989             : 
    1990          33 :   return frame->n_vectors;
    1991             : }
    1992             : 
    1993        2268 : VLIB_NODE_FN (tcp4_syn_sent_node) (vlib_main_t * vm,
    1994             :                                    vlib_node_runtime_t * node,
    1995             :                                    vlib_frame_t * from_frame)
    1996             : {
    1997          32 :   return tcp46_syn_sent_inline (vm, node, from_frame, 1 /* is_ip4 */ );
    1998             : }
    1999             : 
    2000        2237 : VLIB_NODE_FN (tcp6_syn_sent_node) (vlib_main_t * vm,
    2001             :                                    vlib_node_runtime_t * node,
    2002             :                                    vlib_frame_t * from_frame)
    2003             : {
    2004           1 :   return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ );
    2005             : }
    2006             : 
    2007             : /* *INDENT-OFF* */
    2008      178120 : VLIB_REGISTER_NODE (tcp4_syn_sent_node) =
    2009             : {
    2010             :   .name = "tcp4-syn-sent",
    2011             :   /* Takes a vector of packets. */
    2012             :   .vector_size = sizeof (u32),
    2013             :   .n_errors = TCP_N_ERROR,
    2014             :   .error_counters = tcp_input_error_counters,
    2015             :   .format_trace = format_tcp_rx_trace_short,
    2016             : };
    2017             : /* *INDENT-ON* */
    2018             : 
    2019             : /* *INDENT-OFF* */
    2020      178120 : VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
    2021             : {
    2022             :   .name = "tcp6-syn-sent",
    2023             :   /* Takes a vector of packets. */
    2024             :   .vector_size = sizeof (u32),
    2025             :   .n_errors = TCP_N_ERROR,
    2026             :   .error_counters = tcp_input_error_counters,
    2027             :   .format_trace = format_tcp_rx_trace_short,
    2028             : };
    2029             : /* *INDENT-ON* */
    2030             : 
    2031             : static void
    2032           0 : tcp46_rcv_process_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
    2033             :                                u32 *from, u32 n_bufs)
    2034             : {
    2035           0 :   u32 thread_index = vm->thread_index;
    2036           0 :   tcp_connection_t *tc = 0;
    2037             :   tcp_rx_trace_t *t;
    2038             :   vlib_buffer_t *b;
    2039             :   int i;
    2040             : 
    2041           0 :   for (i = 0; i < n_bufs; i++)
    2042             :     {
    2043           0 :       b = vlib_get_buffer (vm, from[i]);
    2044           0 :       if (!(b->flags & VLIB_BUFFER_IS_TRACED))
    2045           0 :         continue;
    2046           0 :       tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
    2047             :                                thread_index);
    2048           0 :       t = vlib_add_trace (vm, node, b, sizeof (*t));
    2049           0 :       tcp_set_rx_trace_data (t, tc, tcp_buffer_hdr (b), b, 1);
    2050             :     }
    2051           0 : }
    2052             : 
    2053             : /**
    2054             :  * Handles reception for all states except LISTEN, SYN-SENT and ESTABLISHED
    2055             :  * as per RFC793 p. 64
    2056             :  */
    2057             : always_inline uword
    2058         100 : tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
    2059             :                           vlib_frame_t *frame, int is_ip4)
    2060             : {
    2061         100 :   u32 thread_index = vm->thread_index, errors, n_left_from, *from, max_deq;
    2062         100 :   tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
    2063             :   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
    2064             : 
    2065         100 :   from = vlib_frame_vector_args (frame);
    2066         100 :   n_left_from = frame->n_vectors;
    2067             : 
    2068         100 :   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
    2069           0 :     tcp46_rcv_process_trace_frame (vm, node, from, n_left_from);
    2070             : 
    2071         100 :   vlib_get_buffers (vm, from, bufs, n_left_from);
    2072         100 :   b = bufs;
    2073             : 
    2074         550 :   while (n_left_from > 0)
    2075             :     {
    2076         450 :       u32 error = TCP_ERROR_NONE;
    2077         450 :       tcp_header_t *tcp = 0;
    2078             :       tcp_connection_t *tc;
    2079             :       u8 is_fin;
    2080             : 
    2081         450 :       tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
    2082             :                                thread_index);
    2083         450 :       if (PREDICT_FALSE (tc == 0))
    2084             :         {
    2085           0 :           error = TCP_ERROR_INVALID_CONNECTION;
    2086           0 :           goto drop;
    2087             :         }
    2088             : 
    2089         450 :       tcp = tcp_buffer_hdr (b[0]);
    2090         450 :       is_fin = tcp_is_fin (tcp);
    2091             : 
    2092             :       if (CLIB_DEBUG)
    2093             :         {
    2094         450 :           if (!(tc->connection.flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
    2095             :             {
    2096             :               tcp_connection_t *tmp;
    2097         450 :               tmp = tcp_lookup_connection (tc->c_fib_index, b[0], thread_index,
    2098             :                                            is_ip4);
    2099         450 :               if (tmp->state != tc->state)
    2100             :                 {
    2101           0 :                   if (tc->state != TCP_STATE_CLOSED)
    2102           0 :                     clib_warning ("state changed");
    2103           0 :                   goto drop;
    2104             :                 }
    2105             :             }
    2106             :         }
    2107             : 
    2108             :       /*
    2109             :        * Special treatment for CLOSED
    2110             :        */
    2111         450 :       if (PREDICT_FALSE (tc->state == TCP_STATE_CLOSED))
    2112             :         {
    2113           0 :           error = TCP_ERROR_CONNECTION_CLOSED;
    2114           0 :           goto drop;
    2115             :         }
    2116             : 
    2117             :       /*
    2118             :        * For all other states (except LISTEN)
    2119             :        */
    2120             : 
    2121             :       /* 1-4: check SEQ, RST, SYN */
    2122         450 :       if (PREDICT_FALSE (tcp_segment_validate (wrk, tc, b[0], tcp, &error)))
    2123           0 :         goto drop;
    2124             : 
    2125             :       /* 5: check the ACK field  */
    2126         450 :       switch (tc->state)
    2127             :         {
    2128         132 :         case TCP_STATE_SYN_RCVD:
    2129             : 
    2130             :           /* Make sure the segment is exactly right */
    2131         132 :           if (tc->rcv_nxt != vnet_buffer (b[0])->tcp.seq_number || is_fin)
    2132             :             {
    2133           0 :               tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
    2134           0 :               error = TCP_ERROR_SEGMENT_INVALID;
    2135           0 :               goto drop;
    2136             :             }
    2137             : 
    2138             :           /*
    2139             :            * If the segment acknowledgment is not acceptable, form a
    2140             :            * reset segment,
    2141             :            *  <SEQ=SEG.ACK><CTL=RST>
    2142             :            * and send it.
    2143             :            */
    2144         132 :           if (tcp_rcv_ack_no_cc (tc, b[0], &error))
    2145             :             {
    2146           0 :               tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
    2147           0 :               error = TCP_ERROR_SEGMENT_INVALID;
    2148           0 :               goto drop;
    2149             :             }
    2150             : 
    2151             :           /* Update rtt and rto */
    2152         132 :           tcp_estimate_initial_rtt (tc);
    2153         132 :           tcp_connection_tx_pacer_update (tc);
    2154             : 
    2155             :           /* Switch state to ESTABLISHED */
    2156         132 :           tc->state = TCP_STATE_ESTABLISHED;
    2157             :           TCP_EVT (TCP_EVT_STATE_CHANGE, tc);
    2158             : 
    2159         132 :           if (!(tc->cfg_flags & TCP_CFG_F_NO_TSO))
    2160           0 :             tcp_check_tx_offload (tc, is_ip4);
    2161             : 
    2162             :           /* Initialize session variables */
    2163         132 :           tc->snd_una = vnet_buffer (b[0])->tcp.ack_number;
    2164         132 :           tc->snd_wnd = clib_net_to_host_u16 (tcp->window)
    2165         132 :                         << tc->rcv_opts.wscale;
    2166         132 :           tc->snd_wl1 = vnet_buffer (b[0])->tcp.seq_number;
    2167         132 :           tc->snd_wl2 = vnet_buffer (b[0])->tcp.ack_number;
    2168             : 
    2169             :           /* Reset SYN-ACK retransmit and SYN_RCV establish timers */
    2170         132 :           tcp_retransmit_timer_reset (&wrk->timer_wheel, tc);
    2171         132 :           if (session_stream_accept_notify (&tc->connection))
    2172             :             {
    2173           0 :               error = TCP_ERROR_MSG_QUEUE_FULL;
    2174           0 :               tcp_send_reset (tc);
    2175           0 :               session_transport_delete_notify (&tc->connection);
    2176           0 :               tcp_connection_cleanup (tc);
    2177           0 :               goto drop;
    2178             :             }
    2179         132 :           error = TCP_ERROR_ACK_OK;
    2180         132 :           break;
    2181          31 :         case TCP_STATE_ESTABLISHED:
    2182             :           /* We can get packets in established state here because they
    2183             :            * were enqueued before state change */
    2184          31 :           if (tcp_rcv_ack (wrk, tc, b[0], tcp, &error))
    2185           0 :             goto drop;
    2186             : 
    2187          31 :           break;
    2188         131 :         case TCP_STATE_FIN_WAIT_1:
    2189             :           /* In addition to the processing for the ESTABLISHED state, if
    2190             :            * our FIN is now acknowledged then enter FIN-WAIT-2 and
    2191             :            * continue processing in that state. */
    2192         131 :           if (tcp_rcv_ack (wrk, tc, b[0], tcp, &error))
    2193           0 :             goto drop;
    2194             : 
    2195             :           /* Still have to send the FIN */
    2196         131 :           if (tc->flags & TCP_CONN_FINPNDG)
    2197             :             {
    2198             :               /* TX fifo finally drained */
    2199           2 :               max_deq = transport_max_tx_dequeue (&tc->connection);
    2200           2 :               if (max_deq <= tc->burst_acked)
    2201           2 :                 tcp_send_fin (tc);
    2202             :               /* If a fin was received and data was acked extend wait */
    2203           0 :               else if ((tc->flags & TCP_CONN_FINRCVD) && tc->bytes_acked)
    2204           0 :                 tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2205             :                                   tcp_cfg.closewait_time);
    2206             :             }
    2207             :           /* If FIN is ACKed */
    2208         129 :           else if (tc->snd_una == tc->snd_nxt)
    2209             :             {
    2210             :               /* Stop all retransmit timers because we have nothing more
    2211             :                * to send. */
    2212         127 :               tcp_connection_timers_reset (tc);
    2213             : 
    2214             :               /* We already have a FIN but didn't transition to CLOSING
    2215             :                * because of outstanding tx data. Close the connection. */
    2216         127 :               if (tc->flags & TCP_CONN_FINRCVD)
    2217             :                 {
    2218           0 :                   tcp_connection_set_state (tc, TCP_STATE_CLOSED);
    2219           0 :                   session_transport_closed_notify (&tc->connection);
    2220           0 :                   tcp_program_cleanup (wrk, tc);
    2221           0 :                   goto drop;
    2222             :                 }
    2223             : 
    2224         127 :               tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_2);
    2225             :               /* Enable waitclose because we're willing to wait for peer's
    2226             :                * FIN but not indefinitely. */
    2227         127 :               tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2228             :                              tcp_cfg.finwait2_time);
    2229             : 
    2230             :               /* Don't try to deq the FIN acked */
    2231         127 :               if (tc->burst_acked > 1)
    2232           0 :                 session_tx_fifo_dequeue_drop (&tc->connection,
    2233           0 :                                               tc->burst_acked - 1);
    2234         127 :               tc->burst_acked = 0;
    2235             :             }
    2236         131 :           break;
    2237          25 :         case TCP_STATE_FIN_WAIT_2:
    2238             :           /* In addition to the processing for the ESTABLISHED state, if
    2239             :            * the retransmission queue is empty, the user's CLOSE can be
    2240             :            * acknowledged ("ok") but do not delete the TCB. */
    2241          25 :           if (tcp_rcv_ack_no_cc (tc, b[0], &error))
    2242           0 :             goto drop;
    2243          25 :           tc->burst_acked = 0;
    2244          25 :           break;
    2245           2 :         case TCP_STATE_CLOSE_WAIT:
    2246             :           /* Do the same processing as for the ESTABLISHED state. */
    2247           2 :           if (tcp_rcv_ack (wrk, tc, b[0], tcp, &error))
    2248           0 :             goto drop;
    2249             : 
    2250           2 :           if (!(tc->flags & TCP_CONN_FINPNDG))
    2251           0 :             break;
    2252             : 
    2253             :           /* Still have outstanding tx data */
    2254           2 :           max_deq = transport_max_tx_dequeue (&tc->connection);
    2255           2 :           if (max_deq > tc->burst_acked)
    2256           0 :             break;
    2257             : 
    2258           2 :           tcp_send_fin (tc);
    2259           2 :           tcp_connection_timers_reset (tc);
    2260           2 :           tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
    2261           2 :           tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2262             :                          tcp_cfg.lastack_time);
    2263           2 :           break;
    2264           2 :         case TCP_STATE_CLOSING:
    2265             :           /* In addition to the processing for the ESTABLISHED state, if
    2266             :            * the ACK acknowledges our FIN then enter the TIME-WAIT state,
    2267             :            * otherwise ignore the segment. */
    2268           2 :           if (tcp_rcv_ack_no_cc (tc, b[0], &error))
    2269           0 :             goto drop;
    2270             : 
    2271           2 :           if (tc->snd_una != tc->snd_nxt)
    2272           0 :             goto drop;
    2273             : 
    2274           2 :           tcp_connection_timers_reset (tc);
    2275           2 :           tcp_connection_set_state (tc, TCP_STATE_TIME_WAIT);
    2276           2 :           tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2277             :                          tcp_cfg.timewait_time);
    2278           2 :           session_transport_closed_notify (&tc->connection);
    2279           2 :           goto drop;
    2280             : 
    2281             :           break;
    2282         127 :         case TCP_STATE_LAST_ACK:
    2283             :           /* The only thing that [should] arrive in this state is an
    2284             :            * acknowledgment of our FIN. If our FIN is now acknowledged,
    2285             :            * delete the TCB, enter the CLOSED state, and return. */
    2286             : 
    2287         127 :           if (tcp_rcv_ack_no_cc (tc, b[0], &error))
    2288           0 :             goto drop;
    2289             : 
    2290             :           /* Apparently our ACK for the peer's FIN was lost */
    2291         127 :           if (is_fin && tc->snd_una != tc->snd_nxt)
    2292             :             {
    2293           0 :               tcp_send_fin (tc);
    2294           0 :               goto drop;
    2295             :             }
    2296             : 
    2297         127 :           tcp_connection_set_state (tc, TCP_STATE_CLOSED);
    2298         127 :           session_transport_closed_notify (&tc->connection);
    2299             : 
    2300             :           /* Don't free the connection from the data path since
    2301             :            * we can't ensure that we have no packets already enqueued
    2302             :            * to output. Rely instead on the waitclose timer */
    2303         127 :           tcp_connection_timers_reset (tc);
    2304         127 :           tcp_program_cleanup (tcp_get_worker (tc->c_thread_index), tc);
    2305             : 
    2306         127 :           goto drop;
    2307             : 
    2308             :           break;
    2309           0 :         case TCP_STATE_TIME_WAIT:
    2310             :           /* The only thing that can arrive in this state is a
    2311             :            * retransmission of the remote FIN. Acknowledge it, and restart
    2312             :            * the 2 MSL timeout. */
    2313             : 
    2314           0 :           if (tcp_rcv_ack_no_cc (tc, b[0], &error))
    2315           0 :             goto drop;
    2316             : 
    2317           0 :           if (!is_fin)
    2318           0 :             goto drop;
    2319             : 
    2320           0 :           tcp_program_ack (tc);
    2321           0 :           tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2322             :                             tcp_cfg.timewait_time);
    2323           0 :           goto drop;
    2324             : 
    2325             :           break;
    2326           0 :         default:
    2327           0 :           ASSERT (0);
    2328             :         }
    2329             : 
    2330             :       /* 6: check the URG bit TODO */
    2331             : 
    2332             :       /* 7: process the segment text */
    2333         321 :       switch (tc->state)
    2334             :         {
    2335         319 :         case TCP_STATE_ESTABLISHED:
    2336             :         case TCP_STATE_FIN_WAIT_1:
    2337             :         case TCP_STATE_FIN_WAIT_2:
    2338         319 :           if (vnet_buffer (b[0])->tcp.data_len)
    2339          33 :             error = tcp_segment_rcv (wrk, tc, b[0]);
    2340             :           /* Don't accept out of order fins lower */
    2341         319 :           if (vnet_buffer (b[0])->tcp.seq_end != tc->rcv_nxt)
    2342           0 :             goto drop;
    2343         319 :           break;
    2344           2 :         case TCP_STATE_CLOSE_WAIT:
    2345             :         case TCP_STATE_CLOSING:
    2346             :         case TCP_STATE_LAST_ACK:
    2347             :         case TCP_STATE_TIME_WAIT:
    2348             :           /* This should not occur, since a FIN has been received from the
    2349             :            * remote side.  Ignore the segment text. */
    2350           2 :           break;
    2351             :         }
    2352             : 
    2353             :       /* 8: check the FIN bit */
    2354         321 :       if (!is_fin)
    2355         192 :         goto drop;
    2356             : 
    2357             :       TCP_EVT (TCP_EVT_FIN_RCVD, tc);
    2358             : 
    2359         129 :       switch (tc->state)
    2360             :         {
    2361           0 :         case TCP_STATE_ESTABLISHED:
    2362             :           /* Account for the FIN and send ack */
    2363           0 :           tc->rcv_nxt += 1;
    2364           0 :           tcp_program_ack (tc);
    2365           0 :           tcp_connection_set_state (tc, TCP_STATE_CLOSE_WAIT);
    2366           0 :           tcp_program_disconnect (wrk, tc);
    2367           0 :           tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2368             :                             tcp_cfg.closewait_time);
    2369           0 :           break;
    2370           0 :         case TCP_STATE_SYN_RCVD:
    2371             :           /* Send FIN-ACK, enter LAST-ACK and because the app was not
    2372             :            * notified yet, set a cleanup timer instead of relying on
    2373             :            * disconnect notify and the implicit close call. */
    2374           0 :           tcp_connection_timers_reset (tc);
    2375           0 :           tc->rcv_nxt += 1;
    2376           0 :           tcp_send_fin (tc);
    2377           0 :           tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
    2378           0 :           tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2379             :                          tcp_cfg.lastack_time);
    2380           0 :           break;
    2381           0 :         case TCP_STATE_CLOSE_WAIT:
    2382             :         case TCP_STATE_CLOSING:
    2383             :         case TCP_STATE_LAST_ACK:
    2384             :           /* move along .. */
    2385           0 :           break;
    2386           2 :         case TCP_STATE_FIN_WAIT_1:
    2387           2 :           tc->rcv_nxt += 1;
    2388             : 
    2389           2 :           if (tc->flags & TCP_CONN_FINPNDG)
    2390             :             {
    2391             :               /* If data is outstanding, stay in FIN_WAIT_1 and try to finish
    2392             :                * sending it. Since we already received a fin, do not wait
    2393             :                * for too long. */
    2394           0 :               tc->flags |= TCP_CONN_FINRCVD;
    2395           0 :               tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2396             :                                 tcp_cfg.closewait_time);
    2397             :             }
    2398             :           else
    2399             :             {
    2400           2 :               tcp_connection_set_state (tc, TCP_STATE_CLOSING);
    2401           2 :               tcp_program_ack (tc);
    2402             :               /* Wait for ACK for our FIN but not forever */
    2403           2 :               tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2404             :                                 tcp_cfg.closing_time);
    2405             :             }
    2406           2 :           break;
    2407         127 :         case TCP_STATE_FIN_WAIT_2:
    2408             :           /* Got FIN, send ACK! Be more aggressive with resource cleanup */
    2409         127 :           tc->rcv_nxt += 1;
    2410         127 :           tcp_connection_set_state (tc, TCP_STATE_TIME_WAIT);
    2411         127 :           tcp_connection_timers_reset (tc);
    2412         127 :           tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2413             :                          tcp_cfg.timewait_time);
    2414         127 :           tcp_program_ack (tc);
    2415         127 :           session_transport_closed_notify (&tc->connection);
    2416         127 :           break;
    2417           0 :         case TCP_STATE_TIME_WAIT:
    2418             :           /* Remain in the TIME-WAIT state. Restart the time-wait
    2419             :            * timeout.
    2420             :            */
    2421           0 :           tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
    2422             :                             tcp_cfg.timewait_time);
    2423           0 :           break;
    2424             :         }
    2425         129 :       error = TCP_ERROR_FIN_RCVD;
    2426             : 
    2427         450 :     drop:
    2428             : 
    2429         450 :       b += 1;
    2430         450 :       n_left_from -= 1;
    2431         450 :       tcp_inc_counter (rcv_process, error, 1);
    2432             :     }
    2433             : 
    2434         100 :   errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP,
    2435             :                                               thread_index);
    2436         100 :   tcp_inc_counter (rcv_process, TCP_ERROR_MSG_QUEUE_FULL, errors);
    2437         100 :   tcp_handle_postponed_dequeues (wrk);
    2438         100 :   tcp_handle_disconnects (wrk);
    2439         100 :   vlib_buffer_free (vm, from, frame->n_vectors);
    2440             : 
    2441         100 :   return frame->n_vectors;
    2442             : }
    2443             : 
    2444        2332 : VLIB_NODE_FN (tcp4_rcv_process_node) (vlib_main_t * vm,
    2445             :                                       vlib_node_runtime_t * node,
    2446             :                                       vlib_frame_t * from_frame)
    2447             : {
    2448          96 :   return tcp46_rcv_process_inline (vm, node, from_frame, 1 /* is_ip4 */ );
    2449             : }
    2450             : 
    2451        2240 : VLIB_NODE_FN (tcp6_rcv_process_node) (vlib_main_t * vm,
    2452             :                                       vlib_node_runtime_t * node,
    2453             :                                       vlib_frame_t * from_frame)
    2454             : {
    2455           4 :   return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ );
    2456             : }
    2457             : 
    2458             : /* *INDENT-OFF* */
    2459      178120 : VLIB_REGISTER_NODE (tcp4_rcv_process_node) = {
    2460             :   .name = "tcp4-rcv-process",
    2461             :   /* Takes a vector of packets. */
    2462             :   .vector_size = sizeof (u32),
    2463             :   .n_errors = TCP_N_ERROR,
    2464             :   .error_counters = tcp_input_error_counters,
    2465             :   .format_trace = format_tcp_rx_trace_short,
    2466             : };
    2467             : /* *INDENT-ON* */
    2468             : 
    2469             : /* *INDENT-OFF* */
    2470      178120 : VLIB_REGISTER_NODE (tcp6_rcv_process_node) = {
    2471             :   .name = "tcp6-rcv-process",
    2472             :   /* Takes a vector of packets. */
    2473             :   .vector_size = sizeof (u32),
    2474             :   .n_errors = TCP_N_ERROR,
    2475             :   .error_counters = tcp_input_error_counters,
    2476             :   .format_trace = format_tcp_rx_trace_short,
    2477             : };
    2478             : /* *INDENT-ON* */
    2479             : 
    2480             : static void
    2481           3 : tcp46_listen_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
    2482             :                           u32 *to_next, u32 n_bufs)
    2483             : {
    2484           3 :   tcp_connection_t *tc = 0;
    2485             :   tcp_rx_trace_t *t;
    2486             :   vlib_buffer_t *b;
    2487             :   int i;
    2488             : 
    2489           6 :   for (i = 0; i < n_bufs; i++)
    2490             :     {
    2491           3 :       b = vlib_get_buffer (vm, to_next[i]);
    2492           3 :       if (!(b->flags & VLIB_BUFFER_IS_TRACED))
    2493           0 :         continue;
    2494           3 :       if (vnet_buffer (b)->tcp.flags == TCP_STATE_LISTEN)
    2495           3 :         tc = tcp_listener_get (vnet_buffer (b)->tcp.connection_index);
    2496           3 :       t = vlib_add_trace (vm, node, b, sizeof (*t));
    2497           3 :       tcp_set_rx_trace_data (t, tc, tcp_buffer_hdr (b), b, 1);
    2498             :     }
    2499           3 : }
    2500             : 
    2501             : /**
    2502             :  * SYN received in TIME-WAIT state.
    2503             :  *
    2504             :  * RFC 1122:
    2505             :  * "When a connection is [...] on TIME-WAIT state [...]
    2506             :  * [a TCP] MAY accept a new SYN from the remote TCP to
    2507             :  * reopen the connection directly, if it:
    2508             :  *
    2509             :  * (1)  assigns its initial sequence number for the new
    2510             :  * connection to be larger than the largest sequence
    2511             :  * number it used on the previous connection incarnation,
    2512             :  * and
    2513             :  *
    2514             :  * (2)  returns to TIME-WAIT state if the SYN turns out
    2515             :  * to be an old duplicate".
    2516             :  *
    2517             :  * The function returns true if the syn can be accepted during
    2518             :  * connection time-wait (port reuse). In this case the function
    2519             :  * also calculates what the iss should be for the new connection.
    2520             :  */
    2521             : always_inline int
    2522           0 : syn_during_timewait (tcp_connection_t *tc, vlib_buffer_t *b, u32 *iss)
    2523             : {
    2524           0 :   int paws_reject = tcp_segment_check_paws (tc);
    2525             :   u32 tw_iss;
    2526             : 
    2527           0 :   *iss = 0;
    2528             :   /* Check that the SYN arrived out of window. We accept it */
    2529           0 :   if (!paws_reject &&
    2530           0 :       (seq_geq (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt) ||
    2531           0 :        (tcp_opts_tstamp (&tc->rcv_opts) &&
    2532           0 :         timestamp_lt (tc->tsval_recent, tc->rcv_opts.tsval))))
    2533             :     {
    2534             :       /* Set the iss of the new connection to be the largest sequence number
    2535             :        * the old peer would have accepted and add some random number
    2536             :        */
    2537           0 :       tw_iss = tc->snd_nxt + tcp_available_snd_wnd (tc) +
    2538           0 :                (uword) (tcp_time_now_us (tc->c_thread_index) * 1e6) % 65535;
    2539           0 :       if (tw_iss == 0)
    2540           0 :         tw_iss++;
    2541           0 :       *iss = tw_iss;
    2542             : 
    2543           0 :       return 1;
    2544             :     }
    2545             :   else
    2546             :     {
    2547             :       TCP_DBG (
    2548             :         "ERROR not accepting SYN in timewait,paws_reject=%d, seq_num =%ld, "
    2549             :         "rcv_nxt=%ld, tstamp_present=%d, tsval_recent = %d, tsval = %d\n",
    2550             :         paws_reject, vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt,
    2551             :         tcp_opts_tstamp (&tc->rcv_opts), tc->tsval_recent, tc->rcv_opts.tsval);
    2552           0 :       return 0;
    2553             :     }
    2554             : }
    2555             : 
    2556             : /**
    2557             :  * LISTEN state processing as per RFC 793 p. 65
    2558             :  */
    2559             : always_inline uword
    2560          36 : tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
    2561             :                      vlib_frame_t *frame, int is_ip4)
    2562             : {
    2563          36 :   u32 n_left_from, *from, n_syns = 0;
    2564             :   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
    2565          36 :   u32 thread_index = vm->thread_index;
    2566          36 :   u32 tw_iss = 0;
    2567             : 
    2568          36 :   from = vlib_frame_vector_args (frame);
    2569          36 :   n_left_from = frame->n_vectors;
    2570             : 
    2571          36 :   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
    2572           3 :     tcp46_listen_trace_frame (vm, node, from, n_left_from);
    2573             : 
    2574          36 :   vlib_get_buffers (vm, from, bufs, n_left_from);
    2575          36 :   b = bufs;
    2576             : 
    2577         171 :   while (n_left_from > 0)
    2578             :     {
    2579             :       tcp_connection_t *lc, *child;
    2580             : 
    2581             :       /* Flags initialized with connection state after lookup */
    2582         135 :       if (vnet_buffer (b[0])->tcp.flags == TCP_STATE_LISTEN)
    2583             :         {
    2584         135 :           lc = tcp_listener_get (vnet_buffer (b[0])->tcp.connection_index);
    2585             :         }
    2586             :       /* Probably we are in time-wait or closed state */
    2587             :       else
    2588             :         {
    2589             :           tcp_connection_t *tc;
    2590           0 :           tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
    2591             :                                    thread_index);
    2592           0 :           if (tc->state != TCP_STATE_TIME_WAIT)
    2593             :             {
    2594           0 :               tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1);
    2595           0 :               goto done;
    2596             :             }
    2597             : 
    2598           0 :           if (PREDICT_FALSE (!syn_during_timewait (tc, b[0], &tw_iss)))
    2599             :             {
    2600             :               /* This SYN can't be accepted */
    2601           0 :               tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1);
    2602           0 :               goto done;
    2603             :             }
    2604             : 
    2605           0 :           lc = tcp_lookup_listener (b[0], tc->c_fib_index, is_ip4);
    2606             :           /* clean up the old session */
    2607           0 :           tcp_connection_del (tc);
    2608             :           /* listener was cleaned up */
    2609           0 :           if (!lc)
    2610             :             {
    2611           0 :               tcp_inc_counter (listen, TCP_ERROR_NO_LISTENER, 1);
    2612           0 :               goto done;
    2613             :             }
    2614             :         }
    2615             : 
    2616             :       /* Make sure connection wasn't just created */
    2617             :       child =
    2618         135 :         tcp_lookup_connection (lc->c_fib_index, b[0], thread_index, is_ip4);
    2619         135 :       if (PREDICT_FALSE (child->state != TCP_STATE_LISTEN))
    2620             :         {
    2621           0 :           tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1);
    2622           0 :           goto done;
    2623             :         }
    2624             : 
    2625             :       /* Create child session. For syn-flood protection use filter */
    2626             : 
    2627             :       /* 1. first check for an RST: handled by input dispatch */
    2628             : 
    2629             :       /* 2. second check for an ACK: handled by input dispatch */
    2630             : 
    2631             :       /* 3. check for a SYN (did that already) */
    2632             : 
    2633             :       /* Create child session and send SYN-ACK */
    2634         135 :       child = tcp_connection_alloc (thread_index);
    2635             : 
    2636         135 :       if (tcp_options_parse (tcp_buffer_hdr (b[0]), &child->rcv_opts, 1))
    2637             :         {
    2638           0 :           tcp_inc_counter (listen, TCP_ERROR_OPTIONS, 1);
    2639           0 :           tcp_connection_free (child);
    2640           0 :           goto done;
    2641             :         }
    2642             : 
    2643         135 :       tcp_init_w_buffer (child, b[0], is_ip4);
    2644             : 
    2645         135 :       child->state = TCP_STATE_SYN_RCVD;
    2646         135 :       child->c_fib_index = lc->c_fib_index;
    2647         135 :       child->cc_algo = lc->cc_algo;
    2648             : 
    2649             :       /* In the regular case, the tw_iss will be zero, but
    2650             :        * in the special case of syn arriving in time_wait state, the value
    2651             :        * will be set according to rfc 1122
    2652             :        */
    2653         135 :       child->iss = tw_iss;
    2654         135 :       tcp_connection_init_vars (child);
    2655         135 :       child->rto = TCP_RTO_MIN;
    2656             : 
    2657             :       /*
    2658             :        * This initializes elog track, must be done before synack.
    2659             :        * We also do it before possible tcp_connection_cleanup() as it
    2660             :        * generates TCP_EVT_DELETE event.
    2661             :        */
    2662             :       TCP_EVT (TCP_EVT_SYN_RCVD, child, 1);
    2663             : 
    2664         135 :       if (session_stream_accept (&child->connection, lc->c_s_index,
    2665             :                                  lc->c_thread_index, 0 /* notify */ ))
    2666             :         {
    2667           0 :           tcp_connection_cleanup (child);
    2668           0 :           tcp_inc_counter (listen, TCP_ERROR_CREATE_SESSION_FAIL, 1);
    2669           0 :           goto done;
    2670             :         }
    2671             : 
    2672         135 :       transport_fifos_init_ooo (&child->connection);
    2673         135 :       child->tx_fifo_size = transport_tx_fifo_size (&child->connection);
    2674             : 
    2675         135 :       tcp_send_synack (child);
    2676         135 :       n_syns += 1;
    2677             : 
    2678         135 :     done:
    2679         135 :       b += 1;
    2680         135 :       n_left_from -= 1;
    2681             :     }
    2682             : 
    2683          36 :   tcp_inc_counter (listen, TCP_ERROR_SYNS_RCVD, n_syns);
    2684          36 :   vlib_buffer_free (vm, from, frame->n_vectors);
    2685             : 
    2686          36 :   return frame->n_vectors;
    2687             : }
    2688             : 
    2689        2271 : VLIB_NODE_FN (tcp4_listen_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2690             :                                  vlib_frame_t * from_frame)
    2691             : {
    2692          35 :   return tcp46_listen_inline (vm, node, from_frame, 1 /* is_ip4 */ );
    2693             : }
    2694             : 
    2695        2237 : VLIB_NODE_FN (tcp6_listen_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    2696             :                                  vlib_frame_t * from_frame)
    2697             : {
    2698           1 :   return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ );
    2699             : }
    2700             : 
    2701             : /* *INDENT-OFF* */
    2702      178120 : VLIB_REGISTER_NODE (tcp4_listen_node) = {
    2703             :   .name = "tcp4-listen",
    2704             :   /* Takes a vector of packets. */
    2705             :   .vector_size = sizeof (u32),
    2706             :   .n_errors = TCP_N_ERROR,
    2707             :   .error_counters = tcp_input_error_counters,
    2708             :   .format_trace = format_tcp_rx_trace_short,
    2709             : };
    2710             : /* *INDENT-ON* */
    2711             : 
    2712             : /* *INDENT-OFF* */
    2713      178120 : VLIB_REGISTER_NODE (tcp6_listen_node) = {
    2714             :   .name = "tcp6-listen",
    2715             :   /* Takes a vector of packets. */
    2716             :   .vector_size = sizeof (u32),
    2717             :   .n_errors = TCP_N_ERROR,
    2718             :   .error_counters = tcp_input_error_counters,
    2719             :   .format_trace = format_tcp_rx_trace_short,
    2720             : };
    2721             : /* *INDENT-ON* */
    2722             : 
    2723             : always_inline uword
    2724           4 : tcp46_drop_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
    2725             :                    vlib_frame_t *frame, int is_ip4)
    2726             : {
    2727           4 :   u32 *from = vlib_frame_vector_args (frame);
    2728             : 
    2729             :   /* Error counters must be incremented by previous nodes */
    2730           4 :   vlib_buffer_free (vm, from, frame->n_vectors);
    2731             : 
    2732           4 :   return frame->n_vectors;
    2733             : }
    2734             : 
    2735        2240 : VLIB_NODE_FN (tcp4_drop_node)
    2736             : (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
    2737             : {
    2738           4 :   return tcp46_drop_inline (vm, node, from_frame, 1 /* is_ip4 */);
    2739             : }
    2740             : 
    2741        2236 : VLIB_NODE_FN (tcp6_drop_node)
    2742             : (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
    2743             : {
    2744           0 :   return tcp46_drop_inline (vm, node, from_frame, 0 /* is_ip4 */);
    2745             : }
    2746             : 
    2747      178120 : VLIB_REGISTER_NODE (tcp4_drop_node) = {
    2748             :   .name = "tcp4-drop",
    2749             :   .vector_size = sizeof (u32),
    2750             :   .n_errors = TCP_N_ERROR,
    2751             :   .error_counters = tcp_input_error_counters,
    2752             : };
    2753             : 
    2754      178120 : VLIB_REGISTER_NODE (tcp6_drop_node) = {
    2755             :   .name = "tcp6-drop",
    2756             :   .vector_size = sizeof (u32),
    2757             :   .n_errors = TCP_N_ERROR,
    2758             :   .error_counters = tcp_input_error_counters,
    2759             : };
    2760             : 
    2761             : #define foreach_tcp4_input_next                                               \
    2762             :   _ (DROP, "tcp4-drop")                                                       \
    2763             :   _ (LISTEN, "tcp4-listen")                                                   \
    2764             :   _ (RCV_PROCESS, "tcp4-rcv-process")                                         \
    2765             :   _ (SYN_SENT, "tcp4-syn-sent")                                               \
    2766             :   _ (ESTABLISHED, "tcp4-established")                                         \
    2767             :   _ (RESET, "tcp4-reset")                                                     \
    2768             :   _ (PUNT, "ip4-punt")
    2769             : 
    2770             : #define foreach_tcp6_input_next                                               \
    2771             :   _ (DROP, "tcp6-drop")                                                       \
    2772             :   _ (LISTEN, "tcp6-listen")                                                   \
    2773             :   _ (RCV_PROCESS, "tcp6-rcv-process")                                         \
    2774             :   _ (SYN_SENT, "tcp6-syn-sent")                                               \
    2775             :   _ (ESTABLISHED, "tcp6-established")                                         \
    2776             :   _ (RESET, "tcp6-reset")                                                     \
    2777             :   _ (PUNT, "ip6-punt")
    2778             : 
    2779             : #define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
    2780             : 
    2781             : static void
    2782           0 : tcp_input_set_error_next (tcp_main_t * tm, u16 * next, u32 * error, u8 is_ip4)
    2783             : {
    2784           0 :   if (*error == TCP_ERROR_FILTERED || *error == TCP_ERROR_WRONG_THREAD)
    2785             :     {
    2786           0 :       *next = TCP_INPUT_NEXT_DROP;
    2787             :     }
    2788           0 :   else if ((is_ip4 && tm->punt_unknown4) || (!is_ip4 && tm->punt_unknown6))
    2789             :     {
    2790           0 :       *next = TCP_INPUT_NEXT_PUNT;
    2791           0 :       *error = TCP_ERROR_PUNT;
    2792             :     }
    2793             :   else
    2794             :     {
    2795           0 :       *next = TCP_INPUT_NEXT_RESET;
    2796           0 :       *error = TCP_ERROR_NO_LISTENER;
    2797             :     }
    2798           0 : }
    2799             : 
    2800             : static inline void
    2801     1020890 : tcp_input_dispatch_buffer (tcp_main_t *tm, tcp_connection_t *tc,
    2802             :                            vlib_buffer_t *b, u16 *next, u16 *err_counters)
    2803             : {
    2804             :   tcp_header_t *tcp;
    2805             :   u32 error;
    2806             :   u8 flags;
    2807             : 
    2808     1020890 :   tcp = tcp_buffer_hdr (b);
    2809     1020890 :   flags = tcp->flags & filter_flags;
    2810     1020890 :   *next = tm->dispatch_table[tc->state][flags].next;
    2811     1020890 :   error = tm->dispatch_table[tc->state][flags].error;
    2812     1020890 :   tc->segs_in += 1;
    2813             : 
    2814             :   /* Track connection state when packet was received. It is required
    2815             :    * for @ref tcp46_listen_inline to detect whether we reached
    2816             :    * the node as a result of a SYN packet received while in time-wait
    2817             :    * state. In this case the connection_index in vnet buffer will point
    2818             :    * to the existing tcp connection and not the listener
    2819             :    */
    2820     1020890 :   vnet_buffer (b)->tcp.flags = tc->state;
    2821             : 
    2822     1020890 :   if (PREDICT_FALSE (error != TCP_ERROR_NONE))
    2823             :     {
    2824         174 :       tcp_inc_err_counter (err_counters, error, 1);
    2825         174 :       if (error == TCP_ERROR_DISPATCH)
    2826           0 :         clib_warning ("tcp conn %u disp error state %U flags %U",
    2827             :                       tc->c_c_index, format_tcp_state, tc->state,
    2828             :                       format_tcp_flags, (int) flags);
    2829             :     }
    2830     1020890 : }
    2831             : 
    2832             : always_inline uword
    2833       30283 : tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
    2834             :                     vlib_frame_t * frame, int is_ip4, u8 is_nolookup)
    2835             : {
    2836       30283 :   u32 n_left_from, *from, thread_index = vm->thread_index;
    2837       30283 :   tcp_main_t *tm = vnet_get_tcp_main ();
    2838             :   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
    2839             :   u16 nexts[VLIB_FRAME_SIZE], *next;
    2840       30283 :   u16 err_counters[TCP_N_ERROR] = { 0 };
    2841             : 
    2842       30283 :   tcp_update_time_now (tcp_get_worker (thread_index));
    2843             : 
    2844       30283 :   from = vlib_frame_vector_args (frame);
    2845       30283 :   n_left_from = frame->n_vectors;
    2846       30283 :   vlib_get_buffers (vm, from, bufs, n_left_from);
    2847             : 
    2848       30283 :   b = bufs;
    2849       30283 :   next = nexts;
    2850             : 
    2851      510030 :   while (n_left_from >= 4)
    2852             :     {
    2853      479747 :       u32 error0 = TCP_ERROR_NO_LISTENER, error1 = TCP_ERROR_NO_LISTENER;
    2854             :       tcp_connection_t *tc0, *tc1;
    2855             : 
    2856             :       {
    2857      479747 :         vlib_prefetch_buffer_header (b[2], STORE);
    2858      479747 :         CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
    2859             : 
    2860      479747 :         vlib_prefetch_buffer_header (b[3], STORE);
    2861      479747 :         CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
    2862             :       }
    2863             : 
    2864      479747 :       next[0] = next[1] = TCP_INPUT_NEXT_DROP;
    2865             : 
    2866      479747 :       tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4,
    2867             :                                      is_nolookup);
    2868      479747 :       tc1 = tcp_input_lookup_buffer (b[1], thread_index, &error1, is_ip4,
    2869             :                                      is_nolookup);
    2870             : 
    2871      479747 :       if (PREDICT_TRUE (!tc0 + !tc1 == 0))
    2872             :         {
    2873      479747 :           ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
    2874      479747 :           ASSERT (tcp_lookup_is_valid (tc1, b[1], tcp_buffer_hdr (b[1])));
    2875             : 
    2876      479747 :           vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
    2877      479747 :           vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
    2878             : 
    2879      479747 :           tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters);
    2880      479747 :           tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], err_counters);
    2881             :         }
    2882             :       else
    2883             :         {
    2884           0 :           if (PREDICT_TRUE (tc0 != 0))
    2885             :             {
    2886           0 :               ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
    2887           0 :               vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
    2888           0 :               tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0],
    2889             :                                          err_counters);
    2890             :             }
    2891             :           else
    2892             :             {
    2893           0 :               tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
    2894           0 :               tcp_inc_err_counter (err_counters, error0, 1);
    2895             :             }
    2896             : 
    2897           0 :           if (PREDICT_TRUE (tc1 != 0))
    2898             :             {
    2899           0 :               ASSERT (tcp_lookup_is_valid (tc1, b[1], tcp_buffer_hdr (b[1])));
    2900           0 :               vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
    2901           0 :               tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1],
    2902             :                                          err_counters);
    2903             :             }
    2904             :           else
    2905             :             {
    2906           0 :               tcp_input_set_error_next (tm, &next[1], &error1, is_ip4);
    2907           0 :               tcp_inc_err_counter (err_counters, error1, 1);
    2908             :             }
    2909             :         }
    2910             : 
    2911      479747 :       b += 2;
    2912      479747 :       next += 2;
    2913      479747 :       n_left_from -= 2;
    2914             :     }
    2915       91675 :   while (n_left_from > 0)
    2916             :     {
    2917             :       tcp_connection_t *tc0;
    2918       61392 :       u32 error0 = TCP_ERROR_NO_LISTENER;
    2919             : 
    2920       61392 :       if (n_left_from > 1)
    2921             :         {
    2922       31109 :           vlib_prefetch_buffer_header (b[1], STORE);
    2923       31109 :           CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
    2924             :         }
    2925             : 
    2926       61392 :       next[0] = TCP_INPUT_NEXT_DROP;
    2927       61392 :       tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4,
    2928             :                                      is_nolookup);
    2929       61392 :       if (PREDICT_TRUE (tc0 != 0))
    2930             :         {
    2931       61392 :           ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
    2932       61392 :           vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
    2933       61392 :           tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters);
    2934             :         }
    2935             :       else
    2936             :         {
    2937           0 :           tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
    2938           0 :           tcp_inc_err_counter (err_counters, error0, 1);
    2939             :         }
    2940             : 
    2941       61392 :       b += 1;
    2942       61392 :       next += 1;
    2943       61392 :       n_left_from -= 1;
    2944             :     }
    2945             : 
    2946       30283 :   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
    2947           3 :     tcp_input_trace_frame (vm, node, bufs, nexts, frame->n_vectors, is_ip4);
    2948             : 
    2949     1150750 :   tcp_store_err_counters (input, err_counters);
    2950       30283 :   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
    2951       30283 :   return frame->n_vectors;
    2952             : }
    2953             : 
    2954        2236 : VLIB_NODE_FN (tcp4_input_nolookup_node) (vlib_main_t * vm,
    2955             :                                          vlib_node_runtime_t * node,
    2956             :                                          vlib_frame_t * from_frame)
    2957             : {
    2958           0 :   return tcp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ,
    2959             :                              1 /* is_nolookup */ );
    2960             : }
    2961             : 
    2962        2236 : VLIB_NODE_FN (tcp6_input_nolookup_node) (vlib_main_t * vm,
    2963             :                                          vlib_node_runtime_t * node,
    2964             :                                          vlib_frame_t * from_frame)
    2965             : {
    2966           0 :   return tcp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ,
    2967             :                              1 /* is_nolookup */ );
    2968             : }
    2969             : 
    2970             : /* *INDENT-OFF* */
    2971      178120 : VLIB_REGISTER_NODE (tcp4_input_nolookup_node) =
    2972             : {
    2973             :   .name = "tcp4-input-nolookup",
    2974             :   /* Takes a vector of packets. */
    2975             :   .vector_size = sizeof (u32),
    2976             :   .n_errors = TCP_N_ERROR,
    2977             :   .error_counters = tcp_input_error_counters,
    2978             :   .n_next_nodes = TCP_INPUT_N_NEXT,
    2979             :   .next_nodes =
    2980             :   {
    2981             : #define _(s,n) [TCP_INPUT_NEXT_##s] = n,
    2982             :     foreach_tcp4_input_next
    2983             : #undef _
    2984             :   },
    2985             :   .format_buffer = format_tcp_header,
    2986             :   .format_trace = format_tcp_rx_trace,
    2987             : };
    2988             : /* *INDENT-ON* */
    2989             : 
    2990             : /* *INDENT-OFF* */
    2991      178120 : VLIB_REGISTER_NODE (tcp6_input_nolookup_node) =
    2992             : {
    2993             :   .name = "tcp6-input-nolookup",
    2994             :   /* Takes a vector of packets. */
    2995             :   .vector_size = sizeof (u32),
    2996             :   .n_errors = TCP_N_ERROR,
    2997             :   .error_counters = tcp_input_error_counters,
    2998             :   .n_next_nodes = TCP_INPUT_N_NEXT,
    2999             :   .next_nodes =
    3000             :   {
    3001             : #define _(s,n) [TCP_INPUT_NEXT_##s] = n,
    3002             :     foreach_tcp6_input_next
    3003             : #undef _
    3004             :   },
    3005             :   .format_buffer = format_tcp_header,
    3006             :   .format_trace = format_tcp_rx_trace,
    3007             : };
    3008             : /* *INDENT-ON* */
    3009             : 
    3010       32502 : VLIB_NODE_FN (tcp4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    3011             :                                 vlib_frame_t * from_frame)
    3012             : {
    3013       30266 :   return tcp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ,
    3014             :                              0 /* is_nolookup */ );
    3015             : }
    3016             : 
    3017        2253 : VLIB_NODE_FN (tcp6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
    3018             :                                 vlib_frame_t * from_frame)
    3019             : {
    3020          17 :   return tcp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ,
    3021             :                              0 /* is_nolookup */ );
    3022             : }
    3023             : 
    3024             : /* *INDENT-OFF* */
    3025      178120 : VLIB_REGISTER_NODE (tcp4_input_node) =
    3026             : {
    3027             :   .name = "tcp4-input",
    3028             :   /* Takes a vector of packets. */
    3029             :   .vector_size = sizeof (u32),
    3030             :   .n_errors = TCP_N_ERROR,
    3031             :   .error_counters = tcp_input_error_counters,
    3032             :   .n_next_nodes = TCP_INPUT_N_NEXT,
    3033             :   .next_nodes =
    3034             :   {
    3035             : #define _(s,n) [TCP_INPUT_NEXT_##s] = n,
    3036             :     foreach_tcp4_input_next
    3037             : #undef _
    3038             :   },
    3039             :   .format_buffer = format_tcp_header,
    3040             :   .format_trace = format_tcp_rx_trace,
    3041             : };
    3042             : /* *INDENT-ON* */
    3043             : 
    3044             : /* *INDENT-OFF* */
    3045      178120 : VLIB_REGISTER_NODE (tcp6_input_node) =
    3046             : {
    3047             :   .name = "tcp6-input",
    3048             :   /* Takes a vector of packets. */
    3049             :   .vector_size = sizeof (u32),
    3050             :   .n_errors = TCP_N_ERROR,
    3051             :   .error_counters = tcp_input_error_counters,
    3052             :   .n_next_nodes = TCP_INPUT_N_NEXT,
    3053             :   .next_nodes =
    3054             :   {
    3055             : #define _(s,n) [TCP_INPUT_NEXT_##s] = n,
    3056             :     foreach_tcp6_input_next
    3057             : #undef _
    3058             :   },
    3059             :   .format_buffer = format_tcp_header,
    3060             :   .format_trace = format_tcp_rx_trace,
    3061             : };
    3062             : /* *INDENT-ON* */
    3063             : 
    3064             : #ifndef CLIB_MARCH_VARIANT
    3065             : void
    3066           0 : tcp_check_gso (tcp_connection_t *tc)
    3067             : {
    3068           0 :   tcp_check_tx_offload (tc, tc->c_is_ip4);
    3069           0 : }
    3070             : 
    3071             : static void
    3072         559 : tcp_dispatch_table_init (tcp_main_t * tm)
    3073             : {
    3074             :   int i, j;
    3075        6708 :   for (i = 0; i < ARRAY_LEN (tm->dispatch_table); i++)
    3076      399685 :     for (j = 0; j < ARRAY_LEN (tm->dispatch_table[i]); j++)
    3077             :       {
    3078      393536 :         tm->dispatch_table[i][j].next = TCP_INPUT_NEXT_DROP;
    3079      393536 :         tm->dispatch_table[i][j].error = TCP_ERROR_DISPATCH;
    3080             :       }
    3081             : 
    3082             : #define _(t,f,n,e)                                              \
    3083             : do {                                                            \
    3084             :     tm->dispatch_table[TCP_STATE_##t][f].next = (n);                 \
    3085             :     tm->dispatch_table[TCP_STATE_##t][f].error = (e);                \
    3086             : } while (0)
    3087             : 
    3088             :   /* RFC 793: In LISTEN if RST drop and if ACK return RST */
    3089         559 :   _(LISTEN, 0, TCP_INPUT_NEXT_DROP, TCP_ERROR_SEGMENT_INVALID);
    3090         559 :   _(LISTEN, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_ACK_INVALID);
    3091         559 :   _(LISTEN, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_INVALID_CONNECTION);
    3092         559 :   _(LISTEN, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
    3093         559 :   _(LISTEN, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
    3094             :     TCP_ERROR_ACK_INVALID);
    3095         559 :   _(LISTEN, TCP_FLAG_SYN | TCP_FLAG_RST, TCP_INPUT_NEXT_DROP,
    3096             :     TCP_ERROR_SEGMENT_INVALID);
    3097         559 :   _(LISTEN, TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
    3098             :     TCP_ERROR_SEGMENT_INVALID);
    3099         559 :   _(LISTEN, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
    3100             :     TCP_ERROR_INVALID_CONNECTION);
    3101         559 :   _(LISTEN, TCP_FLAG_FIN, TCP_INPUT_NEXT_RESET, TCP_ERROR_SEGMENT_INVALID);
    3102         559 :   _(LISTEN, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
    3103             :     TCP_ERROR_SEGMENT_INVALID);
    3104         559 :   _(LISTEN, TCP_FLAG_FIN | TCP_FLAG_RST, TCP_INPUT_NEXT_DROP,
    3105             :     TCP_ERROR_SEGMENT_INVALID);
    3106         559 :   _(LISTEN, TCP_FLAG_FIN | TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
    3107             :     TCP_ERROR_SEGMENT_INVALID);
    3108         559 :   _(LISTEN, TCP_FLAG_FIN | TCP_FLAG_SYN, TCP_INPUT_NEXT_DROP,
    3109             :     TCP_ERROR_SEGMENT_INVALID);
    3110         559 :   _(LISTEN, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
    3111             :     TCP_ERROR_SEGMENT_INVALID);
    3112         559 :   _(LISTEN, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST, TCP_INPUT_NEXT_DROP,
    3113             :     TCP_ERROR_SEGMENT_INVALID);
    3114         559 :   _(LISTEN, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3115             :     TCP_INPUT_NEXT_DROP, TCP_ERROR_SEGMENT_INVALID);
    3116             :   /* ACK for for a SYN-ACK -> tcp-rcv-process. */
    3117         559 :   _(SYN_RCVD, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3118         559 :   _(SYN_RCVD, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3119         559 :   _(SYN_RCVD, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3120             :     TCP_ERROR_NONE);
    3121         559 :   _(SYN_RCVD, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3122         559 :   _(SYN_RCVD, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3123             :     TCP_ERROR_NONE);
    3124         559 :   _(SYN_RCVD, TCP_FLAG_SYN | TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS,
    3125             :     TCP_ERROR_NONE);
    3126         559 :   _(SYN_RCVD, TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3127             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3128         559 :   _(SYN_RCVD, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3129         559 :   _(SYN_RCVD, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3130             :     TCP_ERROR_NONE);
    3131         559 :   _(SYN_RCVD, TCP_FLAG_FIN | TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS,
    3132             :     TCP_ERROR_NONE);
    3133         559 :   _(SYN_RCVD, TCP_FLAG_FIN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3134             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3135         559 :   _(SYN_RCVD, TCP_FLAG_FIN | TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS,
    3136             :     TCP_ERROR_NONE);
    3137         559 :   _(SYN_RCVD, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST,
    3138             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3139         559 :   _(SYN_RCVD, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_ACK,
    3140             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3141         559 :   _(SYN_RCVD, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3142             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3143         559 :   _(SYN_RCVD, 0, TCP_INPUT_NEXT_DROP, TCP_ERROR_SEGMENT_INVALID);
    3144             :   /* SYN-ACK for a SYN */
    3145         559 :   _(SYN_SENT, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
    3146             :     TCP_ERROR_NONE);
    3147         559 :   _(SYN_SENT, TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE);
    3148         559 :   _(SYN_SENT, TCP_FLAG_RST, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE);
    3149         559 :   _(SYN_SENT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
    3150             :     TCP_ERROR_NONE);
    3151         559 :   _(SYN_SENT, TCP_FLAG_FIN, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE);
    3152         559 :   _(SYN_SENT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
    3153             :     TCP_ERROR_NONE);
    3154             :   /* ACK for for established connection -> tcp-established. */
    3155         559 :   _(ESTABLISHED, TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
    3156             :   /* FIN for for established connection -> tcp-established. */
    3157         559 :   _(ESTABLISHED, TCP_FLAG_FIN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
    3158         559 :   _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
    3159             :     TCP_ERROR_NONE);
    3160         559 :   _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_RST, TCP_INPUT_NEXT_ESTABLISHED,
    3161             :     TCP_ERROR_NONE);
    3162         559 :   _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3163             :     TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
    3164         559 :   _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_SYN, TCP_INPUT_NEXT_ESTABLISHED,
    3165             :     TCP_ERROR_NONE);
    3166         559 :   _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_ACK,
    3167             :     TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
    3168         559 :   _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST,
    3169             :     TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
    3170         559 :   _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3171             :     TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
    3172         559 :   _(ESTABLISHED, TCP_FLAG_RST, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
    3173         559 :   _(ESTABLISHED, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
    3174             :     TCP_ERROR_NONE);
    3175         559 :   _(ESTABLISHED, TCP_FLAG_SYN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
    3176         559 :   _(ESTABLISHED, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
    3177             :     TCP_ERROR_NONE);
    3178         559 :   _(ESTABLISHED, TCP_FLAG_SYN | TCP_FLAG_RST, TCP_INPUT_NEXT_ESTABLISHED,
    3179             :     TCP_ERROR_NONE);
    3180         559 :   _(ESTABLISHED, TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3181             :     TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
    3182         559 :   _(ESTABLISHED, 0, TCP_INPUT_NEXT_DROP, TCP_ERROR_SEGMENT_INVALID);
    3183             :   /* ACK or FIN-ACK to our FIN */
    3184         559 :   _(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3185         559 :   _(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS,
    3186             :     TCP_ERROR_NONE);
    3187             :   /* FIN in reply to our FIN from the other side */
    3188         559 :   _(FIN_WAIT_1, 0, TCP_INPUT_NEXT_DROP, TCP_ERROR_SEGMENT_INVALID);
    3189         559 :   _(FIN_WAIT_1, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3190         559 :   _(FIN_WAIT_1, TCP_FLAG_FIN | TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS,
    3191             :     TCP_ERROR_NONE);
    3192         559 :   _(FIN_WAIT_1, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_ACK,
    3193             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3194         559 :   _(FIN_WAIT_1, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST,
    3195             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3196         559 :   _(FIN_WAIT_1, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3197             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3198         559 :   _(FIN_WAIT_1, TCP_FLAG_FIN | TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS,
    3199             :     TCP_ERROR_NONE);
    3200         559 :   _(FIN_WAIT_1, TCP_FLAG_FIN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3201             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3202         559 :   _(FIN_WAIT_1, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3203         559 :   _(FIN_WAIT_1, TCP_FLAG_SYN | TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS,
    3204             :     TCP_ERROR_NONE);
    3205         559 :   _(FIN_WAIT_1, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3206             :     TCP_ERROR_NONE);
    3207         559 :   _(FIN_WAIT_1, TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3208             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3209         559 :   _(FIN_WAIT_1, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3210         559 :   _(FIN_WAIT_1, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3211             :     TCP_ERROR_NONE);
    3212         559 :   _(CLOSING, 0, TCP_INPUT_NEXT_DROP, TCP_ERROR_SEGMENT_INVALID);
    3213         559 :   _(CLOSING, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3214         559 :   _(CLOSING, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3215         559 :   _(CLOSING, TCP_FLAG_SYN | TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS,
    3216             :     TCP_ERROR_NONE);
    3217         559 :   _(CLOSING, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3218             :     TCP_ERROR_NONE);
    3219         559 :   _(CLOSING, TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3220             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3221         559 :   _(CLOSING, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3222         559 :   _(CLOSING, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3223             :     TCP_ERROR_NONE);
    3224         559 :   _(CLOSING, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3225         559 :   _(CLOSING, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3226             :     TCP_ERROR_NONE);
    3227         559 :   _(CLOSING, TCP_FLAG_FIN | TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS,
    3228             :     TCP_ERROR_NONE);
    3229         559 :   _(CLOSING, TCP_FLAG_FIN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3230             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3231         559 :   _(CLOSING, TCP_FLAG_FIN | TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS,
    3232             :     TCP_ERROR_NONE);
    3233         559 :   _(CLOSING, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_ACK,
    3234             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3235         559 :   _(CLOSING, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST,
    3236             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3237         559 :   _(CLOSING, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3238             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3239             :   /* FIN confirming that the peer (app) has closed */
    3240         559 :   _(FIN_WAIT_2, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3241         559 :   _(FIN_WAIT_2, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3242         559 :   _(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3243             :     TCP_ERROR_NONE);
    3244         559 :   _(FIN_WAIT_2, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3245         559 :   _(FIN_WAIT_2, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3246             :     TCP_ERROR_NONE);
    3247         559 :   _(FIN_WAIT_2, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3248         559 :   _(CLOSE_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3249         559 :   _(CLOSE_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3250             :     TCP_ERROR_NONE);
    3251         559 :   _(CLOSE_WAIT, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3252         559 :   _(CLOSE_WAIT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3253             :     TCP_ERROR_NONE);
    3254         559 :   _(CLOSE_WAIT, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3255         559 :   _(LAST_ACK, 0, TCP_INPUT_NEXT_DROP, TCP_ERROR_SEGMENT_INVALID);
    3256         559 :   _(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3257         559 :   _(LAST_ACK, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3258         559 :   _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3259             :     TCP_ERROR_NONE);
    3260         559 :   _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS,
    3261             :     TCP_ERROR_NONE);
    3262         559 :   _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_ACK,
    3263             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3264         559 :   _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS,
    3265             :     TCP_ERROR_NONE);
    3266         559 :   _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3267             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3268         559 :   _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST,
    3269             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3270         559 :   _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3271             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3272         559 :   _(LAST_ACK, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3273         559 :   _(LAST_ACK, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3274             :     TCP_ERROR_NONE);
    3275         559 :   _(LAST_ACK, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3276         559 :   _(LAST_ACK, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3277             :     TCP_ERROR_NONE);
    3278         559 :   _(LAST_ACK, TCP_FLAG_SYN | TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS,
    3279             :     TCP_ERROR_NONE);
    3280         559 :   _(LAST_ACK, TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
    3281             :     TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3282         559 :   _(TIME_WAIT, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
    3283         559 :   _(TIME_WAIT, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3284         559 :   _(TIME_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3285             :     TCP_ERROR_NONE);
    3286         559 :   _(TIME_WAIT, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3287         559 :   _(TIME_WAIT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
    3288             :     TCP_ERROR_NONE);
    3289         559 :   _(TIME_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
    3290             :   /* RFC793 CLOSED: An incoming segment containing a RST is discarded. An
    3291             :    * incoming segment not containing a RST causes a RST to be sent in
    3292             :    * response.*/
    3293         559 :   _(CLOSED, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED);
    3294         559 :   _(CLOSED, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
    3295             :     TCP_ERROR_CONNECTION_CLOSED);
    3296         559 :   _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
    3297         559 :   _ (CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
    3298         559 :   _(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
    3299             :     TCP_ERROR_CONNECTION_CLOSED);
    3300             : #undef _
    3301         559 : }
    3302             : 
    3303             : static clib_error_t *
    3304         559 : tcp_input_init (vlib_main_t * vm)
    3305             : {
    3306         559 :   clib_error_t *error = 0;
    3307         559 :   tcp_main_t *tm = vnet_get_tcp_main ();
    3308             : 
    3309         559 :   if ((error = vlib_call_init_function (vm, tcp_init)))
    3310           0 :     return error;
    3311             : 
    3312             :   /* Initialize dispatch table. */
    3313         559 :   tcp_dispatch_table_init (tm);
    3314             : 
    3315         559 :   return error;
    3316             : }
    3317             : 
    3318       57119 : VLIB_INIT_FUNCTION (tcp_input_init);
    3319             : 
    3320             : #endif /* CLIB_MARCH_VARIANT */
    3321             : 
    3322             : /*
    3323             :  * fd.io coding-style-patch-verification: ON
    3324             :  *
    3325             :  * Local Variables:
    3326             :  * eval: (c-set-style "gnu")
    3327             :  * End:
    3328             :  */

Generated by: LCOV version 1.14