LCOV - code coverage report
Current view: top level - plugins/dma_intel - dsa.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 2 219 0.9 %
Date: 2023-07-05 22:20:52 Functions: 3 15 20.0 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: Apache-2.0
       2             :  * Copyright(c) 2022 Cisco Systems, Inc.
       3             :  * Copyright (c) 2022 Intel and/or its affiliates.
       4             :  */
       5             : 
       6             : #include <vlib/vlib.h>
       7             : #include <vlib/pci/pci.h>
       8             : #include <vlib/dma/dma.h>
       9             : #include <vppinfra/heap.h>
      10             : #include <vppinfra/atomics.h>
      11             : #include <vnet/plugin/plugin.h>
      12             : #include <vpp/app/version.h>
      13             : #include <dma_intel/dsa_intel.h>
      14             : 
      15             : extern vlib_node_registration_t intel_dsa_node;
      16             : 
      17         559 : VLIB_REGISTER_LOG_CLASS (intel_dsa_log, static) = {
      18             :   .class_name = "intel_dsa",
      19             :   .subclass_name = "dsa",
      20             : };
      21             : 
      22             : static void
      23           0 : intel_dsa_channel_lock (intel_dsa_channel_t *ch)
      24             : {
      25           0 :   u8 expected = 0;
      26           0 :   if (ch->n_threads < 2)
      27           0 :     return;
      28             : 
      29             :   /* channel is used by multiple threads so we need to lock it */
      30           0 :   while (!__atomic_compare_exchange_n (&ch->lock, &expected,
      31             :                                        /* desired */ 1, /* weak */ 0,
      32             :                                        __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
      33             :     {
      34           0 :       while (__atomic_load_n (&ch->lock, __ATOMIC_RELAXED))
      35           0 :         CLIB_PAUSE ();
      36           0 :       expected = 0;
      37             :     }
      38             : }
      39             : 
      40             : static void
      41           0 : intel_dsa_channel_unlock (intel_dsa_channel_t *ch)
      42             : {
      43           0 :   if (ch->n_threads < 2)
      44           0 :     return;
      45             : 
      46           0 :   __atomic_store_n (&ch->lock, 0, __ATOMIC_RELEASE);
      47             : }
      48             : 
      49             : static vlib_dma_batch_t *
      50           0 : intel_dsa_batch_new (vlib_main_t *vm, struct vlib_dma_config_data *cd)
      51             : {
      52           0 :   intel_dsa_main_t *idm = &intel_dsa_main;
      53             :   intel_dsa_config_t *idc;
      54             :   intel_dsa_batch_t *b;
      55             : 
      56           0 :   idc = vec_elt_at_index (idm->dsa_config_heap,
      57             :                           cd->private_data + vm->thread_index);
      58             : 
      59           0 :   if (vec_len (idc->freelist) > 0)
      60           0 :     b = vec_pop (idc->freelist);
      61             :   else
      62             :     {
      63           0 :       clib_spinlock_lock (&idm->lock);
      64           0 :       b = vlib_physmem_alloc (vm, idc->alloc_size);
      65           0 :       clib_spinlock_unlock (&idm->lock);
      66             :       /* if no free space in physmem, force quit */
      67           0 :       ASSERT (b != NULL);
      68           0 :       *b = idc->batch_template;
      69           0 :       b->max_transfers = idc->max_transfers;
      70             : 
      71           0 :       u32 def_flags = (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) |
      72             :                       INTEL_DSA_FLAG_CACHE_CONTROL;
      73           0 :       if (b->ch->block_on_fault)
      74           0 :         def_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT;
      75           0 :       for (int i = 0; i < idc->max_transfers; i++)
      76             :         {
      77           0 :           intel_dsa_desc_t *dsa_desc = b->descs + i;
      78           0 :           dsa_desc->op_flags = def_flags;
      79             :         }
      80             :     }
      81             : 
      82           0 :   return &b->batch;
      83             : }
      84             : 
      85             : #if defined(__x86_64__) || defined(i386)
      86             : static_always_inline void
      87           0 : __movdir64b (volatile void *dst, const void *src)
      88             : {
      89           0 :   asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
      90             :                :
      91             :                : "a"(dst), "d"(src)
      92             :                : "memory");
      93           0 : }
      94             : #endif
      95             : 
      96             : static_always_inline void
      97           0 : intel_dsa_batch_fallback (vlib_main_t *vm, intel_dsa_batch_t *b,
      98             :                           intel_dsa_channel_t *ch)
      99             : {
     100           0 :   for (u16 i = 0; i < b->batch.n_enq; i++)
     101             :     {
     102           0 :       intel_dsa_desc_t *desc = &b->descs[i];
     103           0 :       clib_memcpy_fast (desc->dst, desc->src, desc->size);
     104             :     }
     105           0 :   b->status = INTEL_DSA_STATUS_CPU_SUCCESS;
     106           0 :   ch->submitted++;
     107           0 :   return;
     108             : }
     109             : 
     110             : int
     111           0 : intel_dsa_batch_submit (vlib_main_t *vm, struct vlib_dma_batch *vb)
     112             : {
     113           0 :   intel_dsa_main_t *idm = &intel_dsa_main;
     114           0 :   intel_dsa_batch_t *b = (intel_dsa_batch_t *) vb;
     115           0 :   intel_dsa_channel_t *ch = b->ch;
     116           0 :   if (PREDICT_FALSE (vb->n_enq == 0))
     117             :     {
     118           0 :       vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b);
     119           0 :       return 0;
     120             :     }
     121             : 
     122           0 :   intel_dsa_channel_lock (ch);
     123           0 :   if (ch->n_enq >= ch->size)
     124             :     {
     125           0 :       if (!b->sw_fallback)
     126             :         {
     127           0 :           intel_dsa_channel_unlock (ch);
     128           0 :           return 0;
     129             :         }
     130             :       /* skip channel limitation if first pending finished */
     131           0 :       intel_dsa_batch_t *lb = NULL;
     132           0 :       u32 n_pendings =
     133           0 :         vec_len (idm->dsa_threads[vm->thread_index].pending_batches);
     134           0 :       if (n_pendings)
     135           0 :         lb =
     136           0 :           idm->dsa_threads[vm->thread_index].pending_batches[n_pendings - 1];
     137             : 
     138           0 :       if (!lb || lb->status != INTEL_DSA_STATUS_SUCCESS)
     139             :         {
     140           0 :           intel_dsa_batch_fallback (vm, b, ch);
     141           0 :           goto done;
     142             :         }
     143             :     }
     144             : 
     145           0 :   b->status = INTEL_DSA_STATUS_BUSY;
     146           0 :   if (PREDICT_FALSE (vb->n_enq == 1))
     147             :     {
     148           0 :       intel_dsa_desc_t *desc = &b->descs[0];
     149           0 :       desc->completion = (u64) &b->completion_cl;
     150           0 :       desc->op_flags |= INTEL_DSA_FLAG_COMPLETION_ADDR_VALID |
     151             :                         INTEL_DSA_FLAG_REQUEST_COMPLETION;
     152             : #if defined(__x86_64__) || defined(i386)
     153             :       _mm_sfence (); /* fence before writing desc to device */
     154           0 :       __movdir64b (ch->portal, (void *) desc);
     155             : #endif
     156             :     }
     157             :   else
     158             :     {
     159           0 :       intel_dsa_desc_t *batch_desc = &b->descs[b->max_transfers];
     160           0 :       batch_desc->op_flags = (INTEL_DSA_OP_BATCH << INTEL_DSA_OP_SHIFT) |
     161             :                              INTEL_DSA_FLAG_COMPLETION_ADDR_VALID |
     162             :                              INTEL_DSA_FLAG_REQUEST_COMPLETION;
     163           0 :       batch_desc->desc_addr = (void *) (b->descs);
     164           0 :       batch_desc->size = vb->n_enq;
     165           0 :       batch_desc->completion = (u64) &b->completion_cl;
     166             : #if defined(__x86_64__) || defined(i386)
     167             :       _mm_sfence (); /* fence before writing desc to device */
     168           0 :       __movdir64b (ch->portal, (void *) batch_desc);
     169             : #endif
     170             :     }
     171             : 
     172           0 :   ch->submitted++;
     173           0 :   ch->n_enq++;
     174             : 
     175           0 : done:
     176           0 :   intel_dsa_channel_unlock (ch);
     177           0 :   vec_add1 (idm->dsa_threads[vm->thread_index].pending_batches, b);
     178           0 :   vlib_node_set_interrupt_pending (vm, intel_dsa_node.index);
     179           0 :   return 1;
     180             : }
     181             : 
     182             : static int
     183           0 : intel_dsa_check_channel (intel_dsa_channel_t *ch, vlib_dma_config_data_t *cd)
     184             : {
     185           0 :   if (!ch)
     186             :     {
     187           0 :       dsa_log_error ("no available dsa channel");
     188           0 :       return 1;
     189             :     }
     190           0 :   vlib_dma_config_t supported_cfg = {
     191             :     .barrier_before_last = 1,
     192             :     .sw_fallback = 1,
     193             :   };
     194             : 
     195           0 :   if (cd->cfg.features & ~supported_cfg.features)
     196             :     {
     197           0 :       dsa_log_error ("unsupported feature requested");
     198           0 :       return 1;
     199             :     }
     200             : 
     201           0 :   if (cd->cfg.max_transfers > ch->max_transfers)
     202             :     {
     203           0 :       dsa_log_error ("transfer number (%u) too big", cd->cfg.max_transfers);
     204           0 :       return 1;
     205             :     }
     206             : 
     207           0 :   if (cd->cfg.max_transfer_size > ch->max_transfer_size)
     208             :     {
     209           0 :       dsa_log_error ("transfer size (%u) too big", cd->cfg.max_transfer_size);
     210           0 :       return 1;
     211             :     }
     212           0 :   return 0;
     213             : }
     214             : 
     215             : static_always_inline void
     216           0 : intel_dsa_alloc_dma_batch (vlib_main_t *vm, intel_dsa_config_t *idc)
     217             : {
     218             :   intel_dsa_batch_t *b;
     219           0 :   b = vlib_physmem_alloc (vm, idc->alloc_size);
     220             :   /* if no free space in physmem, force quit */
     221           0 :   ASSERT (b != NULL);
     222           0 :   *b = idc->batch_template;
     223           0 :   b->max_transfers = idc->max_transfers;
     224             : 
     225           0 :   u32 def_flags = (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) |
     226             :                   INTEL_DSA_FLAG_CACHE_CONTROL;
     227           0 :   if (b->ch->block_on_fault)
     228           0 :     def_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT;
     229             : 
     230           0 :   for (int i = 0; i < idc->max_transfers; i++)
     231             :     {
     232           0 :       intel_dsa_desc_t *dsa_desc = b->descs + i;
     233           0 :       dsa_desc->op_flags = def_flags;
     234             :     }
     235           0 :   vec_add1 (idc->freelist, b);
     236           0 : }
     237             : 
     238             : static int
     239           0 : intel_dsa_config_add_fn (vlib_main_t *vm, vlib_dma_config_data_t *cd)
     240             : {
     241           0 :   intel_dsa_main_t *idm = &intel_dsa_main;
     242             :   intel_dsa_config_t *idc;
     243           0 :   u32 index, n_threads = vlib_get_n_threads ();
     244             : 
     245           0 :   vec_validate (idm->dsa_config_heap_handle_by_config_index, cd->config_index);
     246           0 :   index = heap_alloc_aligned (
     247             :     idm->dsa_config_heap, n_threads, CLIB_CACHE_LINE_BYTES,
     248             :     idm->dsa_config_heap_handle_by_config_index[cd->config_index]);
     249             : 
     250           0 :   cd->batch_new_fn = intel_dsa_batch_new;
     251           0 :   cd->private_data = index;
     252             : 
     253           0 :   for (u32 thread = 0; thread < n_threads; thread++)
     254             :     {
     255             :       intel_dsa_batch_t *idb;
     256             :       vlib_dma_batch_t *b;
     257           0 :       idc = vec_elt_at_index (idm->dsa_config_heap, index + thread);
     258             : 
     259             :       /* size of physmem allocation for this config */
     260           0 :       idc->max_transfers = cd->cfg.max_transfers;
     261           0 :       idc->alloc_size = sizeof (intel_dsa_batch_t) +
     262           0 :                         sizeof (intel_dsa_desc_t) * (idc->max_transfers + 1);
     263             :       /* fill batch template */
     264           0 :       idb = &idc->batch_template;
     265           0 :       idb->ch = idm->dsa_threads[thread].ch;
     266           0 :       if (intel_dsa_check_channel (idb->ch, cd))
     267           0 :         return 0;
     268             : 
     269           0 :       dsa_log_debug ("config %d in thread %d using channel %u/%u",
     270             :                      cd->config_index, thread, idb->ch->did, idb->ch->qid);
     271           0 :       idb->config_heap_index = index + thread;
     272           0 :       idb->config_index = cd->config_index;
     273           0 :       idb->batch.callback_fn = cd->cfg.callback_fn;
     274           0 :       idb->features = cd->cfg.features;
     275           0 :       b = &idb->batch;
     276           0 :       b->stride = sizeof (intel_dsa_desc_t);
     277           0 :       b->src_ptr_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].src);
     278           0 :       b->dst_ptr_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].dst);
     279           0 :       b->size_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].size);
     280           0 :       b->submit_fn = intel_dsa_batch_submit;
     281           0 :       dsa_log_debug (
     282             :         "config %d in thread %d stride %d src/dst/size offset %d-%d-%d",
     283             :         cd->config_index, thread, b->stride, b->src_ptr_off, b->dst_ptr_off,
     284             :         b->size_off);
     285             : 
     286             :       /* allocate dma batch in advance */
     287           0 :       for (u32 index = 0; index < cd->cfg.max_batches; index++)
     288           0 :         intel_dsa_alloc_dma_batch (vm, idc);
     289             :     }
     290             : 
     291           0 :   dsa_log_info ("config %u added", cd->private_data);
     292             : 
     293           0 :   return 1;
     294             : }
     295             : 
     296             : static void
     297           0 : intel_dsa_config_del_fn (vlib_main_t *vm, vlib_dma_config_data_t *cd)
     298             : {
     299           0 :   intel_dsa_main_t *idm = &intel_dsa_main;
     300           0 :   intel_dsa_thread_t *t =
     301           0 :     vec_elt_at_index (idm->dsa_threads, vm->thread_index);
     302           0 :   u32 n_pending, n_threads, config_heap_index, n = 0;
     303           0 :   n_threads = vlib_get_n_threads ();
     304             : 
     305           0 :   if (!t->pending_batches)
     306           0 :     goto free_heap;
     307             : 
     308           0 :   n_pending = vec_len (t->pending_batches);
     309             :   intel_dsa_batch_t *b;
     310             : 
     311             :   /* clean pending list and free list */
     312           0 :   for (u32 i = 0; i < n_pending; i++)
     313             :     {
     314           0 :       b = t->pending_batches[i];
     315           0 :       if (b->config_index == cd->config_index)
     316             :         {
     317           0 :           vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b);
     318           0 :           if (b->status == INTEL_DSA_STATUS_SUCCESS ||
     319           0 :               b->status == INTEL_DSA_STATUS_BUSY)
     320           0 :             b->ch->n_enq--;
     321             :         }
     322             :       else
     323           0 :         t->pending_batches[n++] = b;
     324             :     }
     325             : 
     326           0 :   vec_set_len (t->pending_batches, n);
     327             : 
     328           0 : free_heap:
     329           0 :   for (u32 thread = 0; thread < n_threads; thread++)
     330             :     {
     331           0 :       config_heap_index = cd->private_data + thread;
     332           0 :       while (vec_len (idm->dsa_config_heap[config_heap_index].freelist) > 0)
     333             :         {
     334           0 :           b = vec_pop (idm->dsa_config_heap[config_heap_index].freelist);
     335           0 :           vlib_physmem_free (vm, b);
     336             :         }
     337             :     }
     338             : 
     339           0 :   heap_dealloc (idm->dsa_config_heap,
     340           0 :                 idm->dsa_config_heap_handle_by_config_index[cd->config_index]);
     341             : 
     342           0 :   dsa_log_debug ("config %u removed", cd->private_data);
     343           0 : }
     344             : 
     345             : static uword
     346           0 : intel_dsa_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
     347             :                    vlib_frame_t *frame)
     348             : {
     349           0 :   intel_dsa_main_t *idm = &intel_dsa_main;
     350           0 :   intel_dsa_thread_t *t =
     351           0 :     vec_elt_at_index (idm->dsa_threads, vm->thread_index);
     352           0 :   u32 n_pending = 0, n = 0;
     353           0 :   u8 glitch = 0, status;
     354             : 
     355           0 :   if (!t->pending_batches)
     356           0 :     return 0;
     357             : 
     358           0 :   n_pending = vec_len (t->pending_batches);
     359             : 
     360           0 :   for (u32 i = 0; i < n_pending; i++)
     361             :     {
     362           0 :       intel_dsa_batch_t *b = t->pending_batches[i];
     363           0 :       intel_dsa_channel_t *ch = b->ch;
     364             : 
     365           0 :       status = b->status;
     366           0 :       if ((status == INTEL_DSA_STATUS_SUCCESS ||
     367           0 :            status == INTEL_DSA_STATUS_CPU_SUCCESS) &&
     368             :           !glitch)
     369             :         {
     370             :           /* callback */
     371           0 :           if (b->batch.callback_fn)
     372           0 :             b->batch.callback_fn (vm, &b->batch);
     373             : 
     374             :           /* restore last descriptor fields */
     375           0 :           if (b->batch.n_enq == 1)
     376             :             {
     377           0 :               b->descs[0].completion = 0;
     378           0 :               b->descs[0].op_flags =
     379             :                 (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) |
     380             :                 INTEL_DSA_FLAG_CACHE_CONTROL;
     381           0 :               if (b->ch->block_on_fault)
     382           0 :                 b->descs[0].op_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT;
     383             :             }
     384             :           /* add to freelist */
     385           0 :           vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b);
     386             : 
     387           0 :           intel_dsa_channel_lock (ch);
     388           0 :           if (status == INTEL_DSA_STATUS_SUCCESS)
     389             :             {
     390           0 :               ch->n_enq--;
     391           0 :               ch->completed++;
     392             :             }
     393             :           else
     394           0 :             ch->sw_fallback++;
     395           0 :           intel_dsa_channel_unlock (ch);
     396             : 
     397           0 :           b->batch.n_enq = 0;
     398           0 :           b->status = INTEL_DSA_STATUS_IDLE;
     399             :         }
     400           0 :       else if (status == INTEL_DSA_STATUS_BUSY)
     401             :         {
     402           0 :           glitch = 1 & b->barrier_before_last;
     403           0 :           t->pending_batches[n++] = b;
     404             :         }
     405           0 :       else if (!glitch)
     406             :         {
     407             :           /* fallback to software if exception happened */
     408           0 :           intel_dsa_batch_fallback (vm, b, ch);
     409           0 :           glitch = 1 & b->barrier_before_last;
     410             :         }
     411             :       else
     412             :         {
     413           0 :           t->pending_batches[n++] = b;
     414             :         }
     415             :     }
     416           0 :   vec_set_len (t->pending_batches, n);
     417             : 
     418           0 :   if (n)
     419             :     {
     420           0 :       vlib_node_set_interrupt_pending (vm, intel_dsa_node.index);
     421             :     }
     422             : 
     423           0 :   return n_pending - n;
     424             : }
     425             : 
     426             : u8 *
     427           0 : format_dsa_info (u8 *s, va_list *args)
     428             : {
     429           0 :   intel_dsa_main_t *idm = &intel_dsa_main;
     430           0 :   vlib_main_t *vm = va_arg (*args, vlib_main_t *);
     431             :   intel_dsa_channel_t *ch;
     432           0 :   ch = idm->dsa_threads[vm->thread_index].ch;
     433           0 :   s = format (s, "thread %d dma %u/%u request %-16lld hw %-16lld cpu %-16lld",
     434           0 :               vm->thread_index, ch->did, ch->qid, ch->submitted, ch->completed,
     435             :               ch->sw_fallback);
     436           0 :   return s;
     437             : }
     438             : 
     439      148440 : VLIB_REGISTER_NODE (intel_dsa_node) = {
     440             :   .function = intel_dsa_node_fn,
     441             :   .name = "intel-dsa",
     442             :   .type = VLIB_NODE_TYPE_INPUT,
     443             :   .state = VLIB_NODE_STATE_INTERRUPT,
     444             :   .vector_size = 4,
     445             : };
     446             : 
     447             : vlib_dma_backend_t intel_dsa_backend = {
     448             :   .name = "Intel DSA",
     449             :   .config_add_fn = intel_dsa_config_add_fn,
     450             :   .config_del_fn = intel_dsa_config_del_fn,
     451             :   .info_fn = format_dsa_info,
     452             : };

Generated by: LCOV version 1.14