Line data Source code
1 : #include <string.h>
2 : #include <vppinfra/clib.h>
3 : #include <vlib/vlib.h>
4 : #include <vlib/unix/unix.h>
5 : #include <vnet/ethernet/ethernet.h>
6 : #include <vnet/devices/devices.h>
7 : #include <af_xdp/af_xdp.h>
8 :
9 : #define AF_XDP_TX_RETRIES 5
10 :
11 : static_always_inline void
12 0 : af_xdp_device_output_free (vlib_main_t * vm, const vlib_node_runtime_t * node,
13 : af_xdp_txq_t * txq)
14 : {
15 : const __u64 *compl;
16 0 : const u32 size = txq->cq.size;
17 0 : const u32 mask = size - 1;
18 0 : u32 bis[VLIB_FRAME_SIZE], *bi = bis;
19 : u32 n_wrap, idx;
20 0 : u32 n = xsk_ring_cons__peek (&txq->cq, ARRAY_LEN (bis), &idx);
21 0 : const u32 n_free = n;
22 :
23 : /* we rely on on casting addr (u64) -> bi (u32) to discard XSK offset below */
24 : STATIC_ASSERT (BITS (bi[0]) + CLIB_LOG2_CACHE_LINE_BYTES <=
25 : XSK_UNALIGNED_BUF_OFFSET_SHIFT, "wrong size");
26 0 : ASSERT (mask == txq->cq.mask);
27 :
28 0 : if (!n_free)
29 0 : return;
30 :
31 0 : compl = xsk_ring_cons__comp_addr (&txq->cq, idx);
32 0 : n = clib_min (n_free, size - (idx & mask));
33 0 : n_wrap = n_free - n;
34 :
35 0 : wrap_around:
36 :
37 0 : while (n >= 8)
38 : {
39 : #ifdef CLIB_HAVE_VEC256
40 0 : u64x4 b0 = (*(u64x4u *) (compl + 0)) >> CLIB_LOG2_CACHE_LINE_BYTES;
41 0 : u64x4 b1 = (*(u64x4u *) (compl + 4)) >> CLIB_LOG2_CACHE_LINE_BYTES;
42 : /* permute 256-bit register so lower u32s of each buffer index are
43 : * placed into lower 128-bits */
44 0 : const u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 };
45 0 : u32x8 b2 = u32x8_permute ((u32x8) b0, mask);
46 0 : u32x8 b3 = u32x8_permute ((u32x8) b1, mask);
47 : /* extract lower 128-bits and save them to the array of buffer indices */
48 0 : *(u32x4u *) (bi + 0) = u32x8_extract_lo (b2);
49 0 : *(u32x4u *) (bi + 4) = u32x8_extract_lo (b3);
50 : #else
51 0 : bi[0] = compl[0] >> CLIB_LOG2_CACHE_LINE_BYTES;
52 0 : bi[1] = compl[1] >> CLIB_LOG2_CACHE_LINE_BYTES;
53 0 : bi[2] = compl[2] >> CLIB_LOG2_CACHE_LINE_BYTES;
54 0 : bi[3] = compl[3] >> CLIB_LOG2_CACHE_LINE_BYTES;
55 0 : bi[4] = compl[4] >> CLIB_LOG2_CACHE_LINE_BYTES;
56 0 : bi[5] = compl[5] >> CLIB_LOG2_CACHE_LINE_BYTES;
57 0 : bi[6] = compl[6] >> CLIB_LOG2_CACHE_LINE_BYTES;
58 0 : bi[7] = compl[7] >> CLIB_LOG2_CACHE_LINE_BYTES;
59 : #endif
60 0 : compl += 8;
61 0 : bi += 8;
62 0 : n -= 8;
63 : }
64 :
65 0 : while (n >= 1)
66 : {
67 0 : bi[0] = compl[0] >> CLIB_LOG2_CACHE_LINE_BYTES;
68 0 : ASSERT (vlib_buffer_is_known (vm, bi[0]) ==
69 : VLIB_BUFFER_KNOWN_ALLOCATED);
70 0 : compl += 1;
71 0 : bi += 1;
72 0 : n -= 1;
73 : }
74 :
75 0 : if (n_wrap)
76 : {
77 0 : compl = xsk_ring_cons__comp_addr (&txq->cq, 0);
78 0 : n = n_wrap;
79 0 : n_wrap = 0;
80 0 : goto wrap_around;
81 : }
82 :
83 0 : xsk_ring_cons__release (&txq->cq, n_free);
84 0 : vlib_buffer_free (vm, bis, n_free);
85 : }
86 :
87 : static_always_inline void
88 0 : af_xdp_device_output_tx_db (vlib_main_t * vm,
89 : const vlib_node_runtime_t * node,
90 : af_xdp_device_t * ad,
91 : af_xdp_txq_t * txq, const u32 n_tx)
92 : {
93 0 : xsk_ring_prod__submit (&txq->tx, n_tx);
94 :
95 0 : if (!xsk_ring_prod__needs_wakeup (&txq->tx))
96 0 : return;
97 :
98 0 : vlib_error_count (vm, node->node_index, AF_XDP_TX_ERROR_SYSCALL_REQUIRED, 1);
99 :
100 0 : clib_spinlock_lock_if_init (&txq->syscall_lock);
101 :
102 0 : if (xsk_ring_prod__needs_wakeup (&txq->tx))
103 : {
104 0 : const struct msghdr msg = {};
105 : int ret;
106 : /* On tx, xsk socket will only tx up to TX_BATCH_SIZE, as defined in
107 : * kernel net/xdp/xsk.c. Unfortunately we do not know how much this is,
108 : * our only option is to retry until everything is sent... */
109 : do
110 : {
111 0 : ret = sendmsg (txq->xsk_fd, &msg, MSG_DONTWAIT);
112 : }
113 0 : while (ret < 0 && EAGAIN == errno);
114 0 : if (PREDICT_FALSE (ret < 0))
115 : {
116 : /* not EAGAIN: something bad is happening */
117 0 : vlib_error_count (vm, node->node_index,
118 : AF_XDP_TX_ERROR_SYSCALL_FAILURES, 1);
119 0 : af_xdp_device_error (ad, "tx poll() failed");
120 : }
121 : }
122 :
123 0 : clib_spinlock_unlock_if_init (&txq->syscall_lock);
124 : }
125 :
126 : static_always_inline u32
127 0 : af_xdp_device_output_tx_try (vlib_main_t * vm,
128 : const vlib_node_runtime_t * node,
129 : af_xdp_device_t * ad, af_xdp_txq_t * txq,
130 : u32 n_tx, u32 * bi)
131 : {
132 0 : vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
133 0 : const uword start = vm->buffer_main->buffer_mem_start;
134 0 : const u32 size = txq->tx.size;
135 0 : const u32 mask = size - 1;
136 : struct xdp_desc *desc;
137 : u64 offset, addr;
138 : u32 idx, n, n_wrap;
139 :
140 0 : ASSERT (mask == txq->cq.mask);
141 :
142 0 : n_tx = xsk_ring_prod__reserve (&txq->tx, n_tx, &idx);
143 :
144 : /* if ring is full, do nothing */
145 0 : if (PREDICT_FALSE (0 == n_tx))
146 0 : return 0;
147 :
148 0 : vlib_get_buffers (vm, bi, bufs, n_tx);
149 :
150 0 : desc = xsk_ring_prod__tx_desc (&txq->tx, idx);
151 0 : n = clib_min (n_tx, size - (idx & mask));
152 0 : n_wrap = n_tx - n;
153 :
154 0 : wrap_around:
155 :
156 0 : while (n >= 8)
157 : {
158 0 : if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT ||
159 : b[1]->flags & VLIB_BUFFER_NEXT_PRESENT ||
160 : b[2]->flags & VLIB_BUFFER_NEXT_PRESENT ||
161 : b[3]->flags & VLIB_BUFFER_NEXT_PRESENT))
162 : {
163 : break;
164 : }
165 :
166 0 : vlib_prefetch_buffer_header (b[4], LOAD);
167 0 : offset =
168 0 : (sizeof (vlib_buffer_t) +
169 0 : b[0]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
170 0 : addr = pointer_to_uword (b[0]) - start;
171 0 : desc[0].addr = offset | addr;
172 0 : desc[0].len = b[0]->current_length;
173 :
174 0 : vlib_prefetch_buffer_header (b[5], LOAD);
175 0 : offset =
176 0 : (sizeof (vlib_buffer_t) +
177 0 : b[1]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
178 0 : addr = pointer_to_uword (b[1]) - start;
179 0 : desc[1].addr = offset | addr;
180 0 : desc[1].len = b[1]->current_length;
181 :
182 0 : vlib_prefetch_buffer_header (b[6], LOAD);
183 0 : offset =
184 0 : (sizeof (vlib_buffer_t) +
185 0 : b[2]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
186 0 : addr = pointer_to_uword (b[2]) - start;
187 0 : desc[2].addr = offset | addr;
188 0 : desc[2].len = b[2]->current_length;
189 :
190 0 : vlib_prefetch_buffer_header (b[7], LOAD);
191 0 : offset =
192 0 : (sizeof (vlib_buffer_t) +
193 0 : b[3]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
194 0 : addr = pointer_to_uword (b[3]) - start;
195 0 : desc[3].addr = offset | addr;
196 0 : desc[3].len = b[3]->current_length;
197 :
198 0 : desc += 4;
199 0 : b += 4;
200 0 : n -= 4;
201 : }
202 :
203 0 : while (n >= 1)
204 : {
205 0 : if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT))
206 : {
207 0 : if (vlib_buffer_chain_linearize (vm, b[0]) != 1)
208 : {
209 0 : af_xdp_log (VLIB_LOG_LEVEL_ERR, ad,
210 : "vlib_buffer_chain_linearize failed");
211 0 : vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b[0]));
212 0 : continue;
213 : }
214 : }
215 :
216 0 : offset =
217 0 : (sizeof (vlib_buffer_t) +
218 0 : b[0]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
219 0 : addr = pointer_to_uword (b[0]) - start;
220 0 : desc[0].addr = offset | addr;
221 0 : desc[0].len = b[0]->current_length;
222 0 : desc += 1;
223 0 : b += 1;
224 0 : n -= 1;
225 : }
226 :
227 0 : if (n_wrap)
228 : {
229 0 : desc = xsk_ring_prod__tx_desc (&txq->tx, 0);
230 0 : n = n_wrap;
231 0 : n_wrap = 0;
232 0 : goto wrap_around;
233 : }
234 :
235 0 : return n_tx;
236 : }
237 :
238 2236 : VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm,
239 : vlib_node_runtime_t * node,
240 : vlib_frame_t * frame)
241 : {
242 0 : af_xdp_main_t *rm = &af_xdp_main;
243 0 : vnet_interface_output_runtime_t *ord = (void *) node->runtime_data;
244 0 : af_xdp_device_t *ad = pool_elt_at_index (rm->devices, ord->dev_instance);
245 0 : const vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
246 0 : const int shared_queue = tf->shared_queue;
247 0 : af_xdp_txq_t *txq = vec_elt_at_index (ad->txqs, tf->queue_id);
248 : u32 *from;
249 : u32 n, n_tx;
250 : int i;
251 :
252 0 : from = vlib_frame_vector_args (frame);
253 0 : n_tx = frame->n_vectors;
254 :
255 0 : if (shared_queue)
256 0 : clib_spinlock_lock (&txq->lock);
257 :
258 0 : for (i = 0, n = 0; i < AF_XDP_TX_RETRIES && n < n_tx; i++)
259 : {
260 : u32 n_enq;
261 0 : af_xdp_device_output_free (vm, node, txq);
262 : n_enq =
263 0 : af_xdp_device_output_tx_try (vm, node, ad, txq, n_tx - n, from + n);
264 0 : n += n_enq;
265 : }
266 :
267 0 : af_xdp_device_output_tx_db (vm, node, ad, txq, n);
268 :
269 0 : if (shared_queue)
270 0 : clib_spinlock_unlock (&txq->lock);
271 :
272 0 : if (PREDICT_FALSE (n != n_tx))
273 : {
274 0 : vlib_buffer_free (vm, from + n, n_tx - n);
275 0 : vlib_error_count (vm, node->node_index,
276 0 : AF_XDP_TX_ERROR_NO_FREE_SLOTS, n_tx - n);
277 : }
278 :
279 0 : return n;
280 : }
281 :
282 : /*
283 : * fd.io coding-style-patch-verification: ON
284 : *
285 : * Local Variables:
286 : * eval: (c-set-style "gnu")
287 : * End:
288 : */
|