LCOV - code coverage report
Current view: top level - vlib/unix - input.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 122 169 72.2 %
Date: 2023-10-26 01:39:38 Functions: 11 11 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2015 Cisco and/or its affiliates.
       3             :  * Licensed under the Apache License, Version 2.0 (the "License");
       4             :  * you may not use this file except in compliance with the License.
       5             :  * You may obtain a copy of the License at:
       6             :  *
       7             :  *     http://www.apache.org/licenses/LICENSE-2.0
       8             :  *
       9             :  * Unless required by applicable law or agreed to in writing, software
      10             :  * distributed under the License is distributed on an "AS IS" BASIS,
      11             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12             :  * See the License for the specific language governing permissions and
      13             :  * limitations under the License.
      14             :  */
      15             : /*
      16             :  * input.c: Unix file input
      17             :  *
      18             :  * Copyright (c) 2008 Eliot Dresselhaus
      19             :  *
      20             :  * Permission is hereby granted, free of charge, to any person obtaining
      21             :  * a copy of this software and associated documentation files (the
      22             :  * "Software"), to deal in the Software without restriction, including
      23             :  * without limitation the rights to use, copy, modify, merge, publish,
      24             :  * distribute, sublicense, and/or sell copies of the Software, and to
      25             :  * permit persons to whom the Software is furnished to do so, subject to
      26             :  * the following conditions:
      27             :  *
      28             :  * The above copyright notice and this permission notice shall be
      29             :  * included in all copies or substantial portions of the Software.
      30             :  *
      31             :  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      32             :  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      33             :  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
      34             :  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
      35             :  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
      36             :  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
      37             :  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
      38             :  */
      39             : 
      40             : #include <vlib/vlib.h>
      41             : #include <vlib/unix/unix.h>
      42             : #include <signal.h>
      43             : #include <unistd.h>
      44             : #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
      45             : 
      46             : /* FIXME autoconf */
      47             : #define HAVE_LINUX_EPOLL
      48             : 
      49             : #ifdef HAVE_LINUX_EPOLL
      50             : 
      51             : #include <sys/epoll.h>
      52             : 
      53             : typedef struct
      54             : {
      55             :   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
      56             :   int epoll_fd;
      57             :   struct epoll_event *epoll_events;
      58             :   int n_epoll_fds;
      59             : 
      60             :   /* Statistics. */
      61             :   u64 epoll_files_ready;
      62             :   u64 epoll_waits;
      63             : } linux_epoll_main_t;
      64             : 
      65             : static linux_epoll_main_t *linux_epoll_mains = 0;
      66             : 
      67             : static void
      68        7667 : linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type)
      69             : {
      70        7667 :   clib_file_main_t *fm = &file_main;
      71        7667 :   linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains,
      72             :                                              f->polling_thread_index);
      73        7667 :   struct epoll_event e = { 0 };
      74        7667 :   int op, add_del = 0;
      75             : 
      76        7667 :   e.events = EPOLLIN;
      77        7667 :   if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
      78         730 :     e.events |= EPOLLOUT;
      79        7667 :   if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED)
      80        2496 :     e.events |= EPOLLET;
      81        7667 :   e.data.u32 = f - fm->file_pool;
      82             : 
      83        7667 :   op = -1;
      84             : 
      85        7667 :   switch (update_type)
      86             :     {
      87        3708 :     case UNIX_FILE_UPDATE_ADD:
      88        3708 :       op = EPOLL_CTL_ADD;
      89        3708 :       add_del = 1;
      90        3708 :       break;
      91             : 
      92        1460 :     case UNIX_FILE_UPDATE_MODIFY:
      93        1460 :       op = EPOLL_CTL_MOD;
      94        1460 :       break;
      95             : 
      96        2499 :     case UNIX_FILE_UPDATE_DELETE:
      97        2499 :       op = EPOLL_CTL_DEL;
      98        2499 :       add_del = -1;
      99        2499 :       break;
     100             : 
     101           0 :     default:
     102           0 :       clib_warning ("unknown update_type %d", update_type);
     103           0 :       return;
     104             :     }
     105             : 
     106             :   /* worker threads open epoll fd only if needed */
     107        7667 :   if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1)
     108             :     {
     109           0 :       em->epoll_fd = epoll_create (1);
     110           0 :       if (em->epoll_fd < 0)
     111             :         {
     112           0 :           clib_unix_warning ("epoll_create");
     113           0 :           return;
     114             :         }
     115           0 :       em->n_epoll_fds = 0;
     116             :     }
     117             : 
     118        7667 :   if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
     119             :     {
     120           0 :       clib_unix_warning ("epoll_ctl");
     121           0 :       return;
     122             :     }
     123             : 
     124        7667 :   em->n_epoll_fds += add_del;
     125             : 
     126        7667 :   if (em->n_epoll_fds == 0)
     127             :     {
     128           0 :       close (em->epoll_fd);
     129           0 :       em->epoll_fd = -1;
     130             :     }
     131             : }
     132             : 
     133             : static_always_inline uword
     134    85651700 : linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
     135             :                           vlib_frame_t * frame, u32 thread_index)
     136             : {
     137    85651700 :   unix_main_t *um = &unix_main;
     138    85651700 :   clib_file_main_t *fm = &file_main;
     139    85651700 :   linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index);
     140             :   struct epoll_event *e;
     141             :   int n_fds_ready;
     142    85651700 :   int is_main = (thread_index == 0);
     143             : 
     144             :   {
     145    85651700 :     vlib_node_main_t *nm = &vm->node_main;
     146             :     u32 ticks_until_expiration;
     147             :     f64 timeout;
     148             :     f64 now;
     149    85651700 :     int timeout_ms = 0, max_timeout_ms = 10;
     150    85651700 :     f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
     151             : 
     152    85651800 :     if (is_main == 0)
     153      169203 :       now = vlib_time_now (vm);
     154             : 
     155             :     /*
     156             :      * If we've been asked for a fixed-sleep between main loop polls,
     157             :      * do so right away.
     158             :      */
     159    85652400 :     if (PREDICT_FALSE (is_main && um->poll_sleep_usec))
     160           0 :       {
     161             :         struct timespec ts, tsrem;
     162           0 :         timeout = 0;
     163           0 :         timeout_ms = 0;
     164           0 :         node->input_main_loops_per_call = 0;
     165           0 :         ts.tv_sec = 0;
     166           0 :         ts.tv_nsec = 1000 * um->poll_sleep_usec;
     167             : 
     168           0 :         while (nanosleep (&ts, &tsrem) < 0)
     169             :           {
     170           0 :             ts = tsrem;
     171             :           }
     172             :       }
     173             :     /* If we're not working very hard, decide how long to sleep */
     174    85652400 :     else if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0
     175    85454900 :              && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
     176             :       {
     177    84915600 :         ticks_until_expiration = TW (tw_timer_first_expires_in_ticks)
     178    84915600 :           ((TWT (tw_timer_wheel) *) nm->timing_wheel);
     179             : 
     180             :         /* Nothing on the fast wheel, sleep 10ms */
     181    84915600 :         if (ticks_until_expiration == TW_SLOTS_PER_RING)
     182             :           {
     183           0 :             timeout = 10e-3;
     184           0 :             timeout_ms = max_timeout_ms;
     185             :           }
     186             :         else
     187             :           {
     188    84915600 :             timeout = (f64) ticks_until_expiration *1e-5;
     189    84915600 :             if (timeout < 1e-3)
     190    83745000 :               timeout_ms = 0;
     191             :             else
     192             :               {
     193     1170600 :                 timeout_ms = timeout * 1e3;
     194             :                 /* Must be between 1 and 10 ms. */
     195     1170600 :                 timeout_ms = clib_max (1, timeout_ms);
     196     1170600 :                 timeout_ms = clib_min (max_timeout_ms, timeout_ms);
     197             :               }
     198             :           }
     199    84915600 :         node->input_main_loops_per_call = 0;
     200             :       }
     201      736798 :     else if (is_main == 0 && vector_rate < 2 &&
     202      169486 :              (vlib_get_first_main ()->time_last_barrier_release + 0.5 < now) &&
     203      129412 :              nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
     204             :       {
     205      128667 :         timeout = 10e-3;
     206      128667 :         timeout_ms = max_timeout_ms;
     207      128667 :         node->input_main_loops_per_call = 0;
     208             :       }
     209             :     else                        /* busy */
     210             :       {
     211             :         /* Don't come back for a respectable number of dispatch cycles */
     212      608071 :         node->input_main_loops_per_call = 1024;
     213             :       }
     214             : 
     215             :     /* Allow any signal to wakeup our sleep. */
     216    85652300 :     if (is_main || em->epoll_fd != -1)
     217    85482900 :       {
     218             :         static sigset_t unblock_all_signals;
     219    85482900 :         n_fds_ready = epoll_pwait (em->epoll_fd,
     220             :                                    em->epoll_events,
     221    85482900 :                                    vec_len (em->epoll_events),
     222             :                                    timeout_ms, &unblock_all_signals);
     223             : 
     224             :         /* This kludge is necessary to run over absurdly old kernels */
     225    85482900 :         if (n_fds_ready < 0 && errno == ENOSYS)
     226             :           {
     227           0 :             n_fds_ready = epoll_wait (em->epoll_fd,
     228             :                                       em->epoll_events,
     229           0 :                                       vec_len (em->epoll_events), timeout_ms);
     230             :           }
     231             : 
     232             :       }
     233             :     else
     234             :       {
     235             :         /*
     236             :          * Worker thread, no epoll fd's, sleep for 100us at a time
     237             :          * and check for a barrier sync request
     238             :          */
     239      169431 :         if (timeout_ms)
     240             :           {
     241             :             struct timespec ts, tsrem;
     242      128667 :             f64 limit = now + (f64) timeout_ms * 1e-3;
     243             : 
     244     8489020 :             while (vlib_time_now (vm) < limit)
     245             :               {
     246             :                 /* Sleep for 100us at a time */
     247     8182460 :                 ts.tv_sec = 0;
     248     8182460 :                 ts.tv_nsec = 1000 * 100;
     249             : 
     250     8182460 :                 while (nanosleep (&ts, &tsrem) < 0)
     251           0 :                   ts = tsrem;
     252     8360990 :                 if (*vlib_worker_threads->wait_at_barrier ||
     253     8361570 :                     nm->pending_interrupts)
     254         638 :                   goto done;
     255             :               }
     256             :           }
     257      182972 :         goto done;
     258             :       }
     259             :   }
     260             : 
     261    85482900 :   if (n_fds_ready < 0)
     262             :     {
     263         501 :       if (unix_error_is_fatal (errno))
     264           0 :         vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
     265             : 
     266             :       /* non fatal error (e.g. EINTR). */
     267         501 :       goto done;
     268             :     }
     269             : 
     270    85482400 :   em->epoll_waits += 1;
     271    85482400 :   em->epoll_files_ready += n_fds_ready;
     272             : 
     273    86116600 :   for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
     274             :     {
     275      634215 :       u32 i = e->data.u32;
     276             :       clib_file_t *f;
     277             :       clib_error_t *errors[4];
     278      634215 :       int n_errors = 0;
     279             : 
     280             :       /*
     281             :        * Under rare scenarios, epoll may still post us events for the
     282             :        * deleted file descriptor. We just deal with it and throw away the
     283             :        * events for the corresponding file descriptor.
     284             :        */
     285      634215 :       f = fm->file_pool + i;
     286      634215 :       if (PREDICT_FALSE (pool_is_free (fm->file_pool, f)))
     287             :         {
     288           0 :           if (e->events & EPOLLIN)
     289             :             {
     290           0 :               errors[n_errors] =
     291           0 :                 clib_error_return (0, "epoll event EPOLLIN dropped due "
     292             :                                    "to free index %u", i);
     293           0 :               n_errors++;
     294             :             }
     295           0 :           if (e->events & EPOLLOUT)
     296             :             {
     297           0 :               errors[n_errors] =
     298           0 :                 clib_error_return (0, "epoll event EPOLLOUT dropped due "
     299             :                                    "to free index %u", i);
     300           0 :               n_errors++;
     301             :             }
     302           0 :           if (e->events & EPOLLERR)
     303             :             {
     304           0 :               errors[n_errors] =
     305           0 :                 clib_error_return (0, "epoll event EPOLLERR dropped due "
     306             :                                    "to free index %u", i);
     307           0 :               n_errors++;
     308             :             }
     309             :         }
     310      634215 :       else if (PREDICT_TRUE (!(e->events & EPOLLERR)))
     311             :         {
     312      634215 :           if (e->events & EPOLLIN)
     313             :             {
     314      633370 :               f->read_events++;
     315      633370 :               errors[n_errors] = f->read_function (f);
     316             :               /* Make sure f is valid if the file pool moves */
     317      633370 :               if (pool_is_free_index (fm->file_pool, i))
     318          14 :                 continue;
     319      633356 :               f = pool_elt_at_index (fm->file_pool, i);
     320      633356 :               n_errors += errors[n_errors] != 0;
     321             :             }
     322      634201 :           if (e->events & EPOLLOUT)
     323             :             {
     324         845 :               f->write_events++;
     325         845 :               errors[n_errors] = f->write_function (f);
     326         845 :               n_errors += errors[n_errors] != 0;
     327             :             }
     328             :         }
     329             :       else
     330             :         {
     331           0 :           if (f->error_function)
     332             :             {
     333           0 :               f->error_events++;
     334           0 :               errors[n_errors] = f->error_function (f);
     335           0 :               n_errors += errors[n_errors] != 0;
     336             :             }
     337             :           else
     338           0 :             close (f->file_descriptor);
     339             :         }
     340             : 
     341      634201 :       ASSERT (n_errors < ARRAY_LEN (errors));
     342      634201 :       for (i = 0; i < n_errors; i++)
     343             :         {
     344           0 :           unix_save_error (um, errors[i]);
     345             :         }
     346             :     }
     347             : 
     348    85482400 : done:
     349    85666500 :   if (PREDICT_FALSE (vm->cpu_id != clib_get_current_cpu_id ()))
     350             :     {
     351           0 :       vm->cpu_id = clib_get_current_cpu_id ();
     352           0 :       vm->numa_node = clib_get_current_numa_node ();
     353             :     }
     354             : 
     355    85653400 :   return 0;
     356             : }
     357             : 
     358             : static uword
     359    85651700 : linux_epoll_input (vlib_main_t * vm,
     360             :                    vlib_node_runtime_t * node, vlib_frame_t * frame)
     361             : {
     362    85651700 :   u32 thread_index = vlib_get_thread_index ();
     363             : 
     364    85651700 :   if (thread_index == 0)
     365    85482900 :     return linux_epoll_input_inline (vm, node, frame, 0);
     366             :   else
     367      168820 :     return linux_epoll_input_inline (vm, node, frame, thread_index);
     368             : }
     369             : 
     370             : /* *INDENT-OFF* */
     371      183788 : VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
     372             :   .function = linux_epoll_input,
     373             :   .type = VLIB_NODE_TYPE_PRE_INPUT,
     374             :   .name = "unix-epoll-input",
     375             : };
     376             : /* *INDENT-ON* */
     377             : 
     378             : clib_error_t *
     379         575 : linux_epoll_input_init (vlib_main_t * vm)
     380             : {
     381             :   linux_epoll_main_t *em;
     382         575 :   clib_file_main_t *fm = &file_main;
     383         575 :   vlib_thread_main_t *tm = vlib_get_thread_main ();
     384             : 
     385             : 
     386         575 :   vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains,
     387             :                         CLIB_CACHE_LINE_BYTES);
     388             : 
     389        1780 :   vec_foreach (em, linux_epoll_mains)
     390             :   {
     391             :     /* Allocate some events. */
     392        1205 :     vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
     393             : 
     394        1205 :     if (linux_epoll_mains == em)
     395             :       {
     396         575 :         em->epoll_fd = epoll_create (1);
     397         575 :         if (em->epoll_fd < 0)
     398           0 :           return clib_error_return_unix (0, "epoll_create");
     399             :       }
     400             :     else
     401         630 :       em->epoll_fd = -1;
     402             :   }
     403             : 
     404         575 :   fm->file_update = linux_epoll_file_update;
     405             : 
     406         575 :   return 0;
     407             : }
     408             : 
     409        4031 : VLIB_INIT_FUNCTION (linux_epoll_input_init);
     410             : 
     411             : #endif /* HAVE_LINUX_EPOLL */
     412             : 
     413             : static clib_error_t *
     414         575 : unix_input_init (vlib_main_t * vm)
     415             : {
     416         575 :   return 0;
     417             : }
     418             : 
     419             : /* *INDENT-OFF* */
     420        2303 : VLIB_INIT_FUNCTION (unix_input_init) =
     421             : {
     422             :   .runs_before = VLIB_INITS ("linux_epoll_input_init"),
     423             : };
     424             : /* *INDENT-ON* */
     425             : 
     426             : /*
     427             :  * fd.io coding-style-patch-verification: ON
     428             :  *
     429             :  * Local Variables:
     430             :  * eval: (c-set-style "gnu")
     431             :  * End:
     432             :  */

Generated by: LCOV version 1.14