Line data Source code
1 : /*
2 : *------------------------------------------------------------------
3 : * af_packet.c - linux kernel packet interface
4 : *
5 : * Copyright (c) 2016 Cisco and/or its affiliates.
6 : * Licensed under the Apache License, Version 2.0 (the "License");
7 : * you may not use this file except in compliance with the License.
8 : * You may obtain a copy of the License at:
9 : *
10 : * http://www.apache.org/licenses/LICENSE-2.0
11 : *
12 : * Unless required by applicable law or agreed to in writing, software
13 : * distributed under the License is distributed on an "AS IS" BASIS,
14 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 : * See the License for the specific language governing permissions and
16 : * limitations under the License.
17 : *------------------------------------------------------------------
18 : */
19 :
20 : #include <linux/if_packet.h>
21 :
22 : #include <vlib/vlib.h>
23 : #include <vlib/unix/unix.h>
24 : #include <vnet/ip/ip.h>
25 : #include <vnet/ethernet/ethernet.h>
26 : #include <vnet/interface/rx_queue_funcs.h>
27 : #include <vnet/feature/feature.h>
28 : #include <vnet/ethernet/packet.h>
29 :
30 : #include <af_packet/af_packet.h>
31 : #include <vnet/devices/virtio/virtio_std.h>
32 :
33 : #define foreach_af_packet_input_error \
34 : _ (PARTIAL_PKT, "partial packet") \
35 : _ (TIMEDOUT_BLK, "timed out block") \
36 : _ (TOTAL_RECV_BLK, "total received block")
37 : typedef enum
38 : {
39 : #define _(f,s) AF_PACKET_INPUT_ERROR_##f,
40 : foreach_af_packet_input_error
41 : #undef _
42 : AF_PACKET_INPUT_N_ERROR,
43 : } af_packet_input_error_t;
44 :
45 : static char *af_packet_input_error_strings[] = {
46 : #define _(n,s) s,
47 : foreach_af_packet_input_error
48 : #undef _
49 : };
50 :
51 : typedef struct
52 : {
53 : u32 next_index;
54 : u32 hw_if_index;
55 : u16 queue_id;
56 : int block;
57 : u32 pkt_num;
58 : void *block_start;
59 : block_desc_t bd;
60 : union
61 : {
62 : tpacket3_hdr_t tph3;
63 : tpacket2_hdr_t tph2;
64 : };
65 : vnet_virtio_net_hdr_t vnet_hdr;
66 : u8 is_v3;
67 : } af_packet_input_trace_t;
68 :
69 : static u8 *
70 0 : format_af_packet_input_trace (u8 * s, va_list * args)
71 : {
72 0 : CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
73 0 : CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
74 0 : af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *);
75 0 : u32 indent = format_get_indent (s);
76 :
77 0 : s = format (s, "af_packet: hw_if_index %d rx-queue %u next-index %d",
78 0 : t->hw_if_index, t->queue_id, t->next_index);
79 :
80 0 : if (t->is_v3)
81 : {
82 0 : s = format (
83 : s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u",
84 : format_white_space, indent + 2, t->block, format_white_space,
85 : indent + 4, t->block_start, t->bd.version, t->bd.hdr.bh1.seq_num,
86 : t->pkt_num);
87 0 : s = format (
88 : s,
89 : "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
90 : "\n%Usec 0x%x nsec 0x%x vlan %U"
91 : #ifdef TP_STATUS_VLAN_TPID_VALID
92 : " vlan_tpid %u"
93 : #endif
94 : ,
95 : format_white_space, indent + 2, format_white_space, indent + 4,
96 0 : t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac,
97 0 : t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec,
98 : t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci
99 : #ifdef TP_STATUS_VLAN_TPID_VALID
100 : ,
101 0 : t->tph3.hv1.tp_vlan_tpid
102 : #endif
103 : );
104 : }
105 : else
106 : {
107 0 : s = format (
108 : s,
109 : "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
110 : "\n%Usec 0x%x nsec 0x%x vlan %U"
111 : #ifdef TP_STATUS_VLAN_TPID_VALID
112 : " vlan_tpid %u"
113 : #endif
114 : ,
115 : format_white_space, indent + 2, format_white_space, indent + 4,
116 0 : t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac,
117 0 : t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec,
118 0 : t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci
119 : #ifdef TP_STATUS_VLAN_TPID_VALID
120 : ,
121 0 : t->tph2.tp_vlan_tpid
122 : #endif
123 : );
124 : }
125 :
126 0 : s = format (s,
127 : "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u"
128 : "\n%Ugso_size %u csum_start %u csum_offset %u",
129 : format_white_space, indent + 2, format_white_space, indent + 4,
130 0 : t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len,
131 0 : format_white_space, indent + 4, t->vnet_hdr.gso_size,
132 0 : t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset);
133 0 : return s;
134 : }
135 :
136 : always_inline void
137 32777800 : buffer_add_to_chain (vlib_buffer_t *b, vlib_buffer_t *first_b,
138 : vlib_buffer_t *prev_b, u32 bi)
139 : {
140 : /* update first buffer */
141 32777800 : first_b->total_length_not_including_first_buffer += b->current_length;
142 :
143 : /* update previous buffer */
144 32777800 : prev_b->next_buffer = bi;
145 32777800 : prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
146 :
147 : /* update current buffer */
148 32777800 : b->next_buffer = ~0;
149 32777800 : }
150 :
151 : static_always_inline void
152 885213 : fill_gso_offload (vlib_buffer_t *b, u32 gso_size, u8 l4_hdr_sz)
153 : {
154 885213 : b->flags |= VNET_BUFFER_F_GSO;
155 885213 : vnet_buffer2 (b)->gso_size = gso_size;
156 885213 : vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_sz;
157 885213 : }
158 :
159 : static_always_inline void
160 2266660 : fill_cksum_offload (vlib_buffer_t *b, u8 *l4_hdr_sz, u8 is_ip)
161 : {
162 2266660 : vnet_buffer_oflags_t oflags = 0;
163 2266660 : u16 l2hdr_sz = 0;
164 2266660 : u16 ethertype = 0;
165 2266660 : u8 l4_proto = 0;
166 :
167 2266660 : if (is_ip)
168 : {
169 0 : switch (b->data[0] & 0xf0)
170 : {
171 0 : case 0x40:
172 0 : ethertype = ETHERNET_TYPE_IP4;
173 0 : break;
174 0 : case 0x60:
175 0 : ethertype = ETHERNET_TYPE_IP6;
176 0 : break;
177 : }
178 0 : }
179 : else
180 : {
181 2266660 : ethernet_header_t *eth = (ethernet_header_t *) b->data;
182 2266660 : ethertype = clib_net_to_host_u16 (eth->type);
183 2266660 : l2hdr_sz = sizeof (ethernet_header_t);
184 2266660 : if (ethernet_frame_is_tagged (ethertype))
185 : {
186 0 : ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eth + 1);
187 :
188 0 : ethertype = clib_net_to_host_u16 (vlan->type);
189 0 : l2hdr_sz += sizeof (*vlan);
190 0 : if (ethertype == ETHERNET_TYPE_VLAN)
191 : {
192 0 : vlan++;
193 0 : ethertype = clib_net_to_host_u16 (vlan->type);
194 0 : l2hdr_sz += sizeof (*vlan);
195 : }
196 : }
197 : }
198 :
199 2266660 : vnet_buffer (b)->l2_hdr_offset = 0;
200 2266660 : vnet_buffer (b)->l3_hdr_offset = l2hdr_sz;
201 :
202 2266660 : if (ethertype == ETHERNET_TYPE_IP4)
203 : {
204 1243170 : ip4_header_t *ip4 = (ip4_header_t *) (b->data + l2hdr_sz);
205 1243170 : vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
206 1243170 : b->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
207 : VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
208 : VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
209 :
210 1243170 : l4_proto = ip4->protocol;
211 : }
212 1023490 : else if (ethertype == ETHERNET_TYPE_IP6)
213 : {
214 1023490 : ip6_header_t *ip6 = (ip6_header_t *) (b->data + l2hdr_sz);
215 1023490 : b->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
216 : VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
217 : VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
218 1023490 : u16 ip6_hdr_len = sizeof (ip6_header_t);
219 :
220 1023490 : if (ip6_ext_hdr (ip6->protocol))
221 : {
222 0 : ip6_ext_header_t *p = (void *) (ip6 + 1);
223 0 : ip6_hdr_len += ip6_ext_header_len (p);
224 0 : while (ip6_ext_hdr (p->next_hdr))
225 : {
226 0 : ip6_hdr_len += ip6_ext_header_len (p);
227 0 : p = ip6_ext_next_header (p);
228 : }
229 0 : l4_proto = p->next_hdr;
230 : }
231 : else
232 1023490 : l4_proto = ip6->protocol;
233 1023490 : vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip6_hdr_len;
234 : }
235 :
236 2266660 : if (l4_proto == IP_PROTOCOL_TCP)
237 : {
238 2266660 : oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
239 2266660 : tcp_header_t *tcp =
240 2266660 : (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
241 2266660 : *l4_hdr_sz = tcp_header_bytes (tcp);
242 : }
243 0 : else if (l4_proto == IP_PROTOCOL_UDP)
244 : {
245 0 : oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
246 0 : *l4_hdr_sz = sizeof (udp_header_t);
247 : }
248 :
249 2266660 : if (oflags)
250 2266660 : vnet_buffer_offload_flags_set (b, oflags);
251 2266660 : }
252 :
253 : always_inline uword
254 661027 : af_packet_v3_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
255 : vlib_frame_t *frame, af_packet_if_t *apif,
256 : u16 queue_id, u8 is_cksum_gso_enabled)
257 : {
258 661027 : af_packet_main_t *apm = &af_packet_main;
259 661027 : af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id);
260 : tpacket3_hdr_t *tph;
261 : u32 next_index;
262 : u32 n_free_bufs;
263 661027 : u32 n_rx_packets = 0;
264 661027 : u32 n_rx_bytes = 0;
265 661027 : u32 timedout_blk = 0;
266 661027 : u32 total = 0;
267 661027 : u32 *to_next = 0;
268 661027 : u32 block = rx_queue->next_rx_block;
269 661027 : u32 block_nr = rx_queue->rx_req->req3.tp_block_nr;
270 661027 : u8 *block_start = 0;
271 661027 : uword n_trace = vlib_get_trace_count (vm, node);
272 661027 : u32 thread_index = vm->thread_index;
273 661027 : u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
274 661027 : u32 min_bufs = rx_queue->rx_req->req3.tp_frame_size / n_buffer_bytes;
275 661027 : u32 num_pkts = 0;
276 661027 : u32 rx_frame_offset = 0;
277 661027 : block_desc_t *bd = 0;
278 661027 : vlib_buffer_t bt = {};
279 661027 : u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP);
280 :
281 661027 : if (is_ip)
282 0 : next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
283 : else
284 : {
285 661027 : next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
286 661027 : if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
287 0 : next_index = apif->per_interface_next_index;
288 :
289 : /* redirect if feature path enabled */
290 661027 : vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt);
291 : }
292 :
293 661027 : if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
294 661027 : ->hdr.bh1.block_status &
295 : TP_STATUS_USER) != 0)
296 : {
297 539407 : u32 n_required = 0;
298 539407 : bd = (block_desc_t *) block_start;
299 :
300 539407 : if (PREDICT_FALSE (rx_queue->is_rx_pending))
301 : {
302 0 : num_pkts = rx_queue->num_rx_pkts;
303 0 : rx_frame_offset = rx_queue->rx_frame_offset;
304 0 : rx_queue->is_rx_pending = 0;
305 : }
306 : else
307 : {
308 539407 : num_pkts = bd->hdr.bh1.num_pkts;
309 539407 : rx_frame_offset = bd->hdr.bh1.offset_to_first_pkt;
310 539407 : total++;
311 :
312 539407 : if (TP_STATUS_BLK_TMO & bd->hdr.bh1.block_status)
313 25548 : timedout_blk++;
314 : }
315 :
316 539407 : n_required = clib_max (num_pkts, VLIB_FRAME_SIZE);
317 539407 : n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
318 539407 : if (PREDICT_FALSE (n_free_bufs < n_required))
319 : {
320 65923 : vec_validate (apm->rx_buffers[thread_index],
321 : n_required + n_free_bufs - 1);
322 131846 : n_free_bufs += vlib_buffer_alloc (
323 65923 : vm, &apm->rx_buffers[thread_index][n_free_bufs], n_required);
324 65923 : vec_set_len (apm->rx_buffers[thread_index], n_free_bufs);
325 : }
326 :
327 1084310 : while (num_pkts && (n_free_bufs >= min_bufs))
328 : {
329 544901 : u32 next0 = next_index;
330 : u32 n_left_to_next;
331 :
332 544901 : vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
333 :
334 8743470 : while (num_pkts && n_left_to_next && (n_free_bufs >= min_bufs))
335 : {
336 8198570 : tph = (tpacket3_hdr_t *) (block_start + rx_frame_offset);
337 :
338 8198570 : if (num_pkts > 1)
339 7659160 : CLIB_PREFETCH (block_start + rx_frame_offset +
340 : tph->tp_next_offset,
341 : 2 * CLIB_CACHE_LINE_BYTES, LOAD);
342 :
343 8198570 : vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0;
344 8198570 : vnet_virtio_net_hdr_t *vnet_hdr = 0;
345 8198570 : u32 data_len = tph->tp_snaplen;
346 8198570 : u32 offset = 0;
347 8198570 : u32 bi0 = ~0, first_bi0 = ~0;
348 8198570 : u8 l4_hdr_sz = 0;
349 :
350 8198570 : if (is_cksum_gso_enabled)
351 464572 : vnet_hdr =
352 464572 : (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac -
353 : sizeof (vnet_virtio_net_hdr_t));
354 :
355 : // save current state and return
356 8198570 : if (PREDICT_FALSE (((data_len / n_buffer_bytes) + 1) >
357 : vec_len (apm->rx_buffers[thread_index])))
358 : {
359 0 : rx_queue->rx_frame_offset = rx_frame_offset;
360 0 : rx_queue->num_rx_pkts = num_pkts;
361 0 : rx_queue->is_rx_pending = 1;
362 0 : vlib_put_next_frame (vm, node, next_index, n_left_to_next);
363 0 : goto done;
364 : }
365 :
366 28801600 : while (data_len)
367 : {
368 : /* grab free buffer */
369 20603100 : u32 last_empty_buffer =
370 20603100 : vec_len (apm->rx_buffers[thread_index]) - 1;
371 20603100 : bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
372 20603100 : vec_set_len (apm->rx_buffers[thread_index],
373 : last_empty_buffer);
374 20603100 : n_free_bufs--;
375 :
376 : /* copy data */
377 20603100 : u32 bytes_to_copy =
378 : data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
379 20603100 : u32 vlan_len = 0;
380 20603100 : u32 bytes_copied = 0;
381 :
382 20603100 : b0 = vlib_get_buffer (vm, bi0);
383 20603100 : b0->current_data = 0;
384 :
385 : /* Kernel removes VLAN headers, so reconstruct VLAN */
386 20603100 : if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
387 : {
388 0 : if (PREDICT_TRUE (offset == 0))
389 : {
390 0 : clib_memcpy_fast (vlib_buffer_get_current (b0),
391 0 : (u8 *) tph + tph->tp_mac,
392 : sizeof (ethernet_header_t));
393 : ethernet_header_t *eth =
394 0 : vlib_buffer_get_current (b0);
395 0 : ethernet_vlan_header_t *vlan =
396 : (ethernet_vlan_header_t *) (eth + 1);
397 0 : vlan->priority_cfi_and_id =
398 0 : clib_host_to_net_u16 (tph->hv1.tp_vlan_tci);
399 0 : vlan->type = eth->type;
400 0 : eth->type =
401 0 : clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
402 0 : vlan_len = sizeof (ethernet_vlan_header_t);
403 0 : bytes_copied = sizeof (ethernet_header_t);
404 : }
405 : }
406 41206100 : clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
407 20603100 : bytes_copied + vlan_len,
408 20603100 : (u8 *) tph + tph->tp_mac + offset +
409 : bytes_copied,
410 20603100 : (bytes_to_copy - bytes_copied));
411 :
412 : /* fill buffer header */
413 20603100 : b0->current_length = bytes_to_copy + vlan_len;
414 :
415 20603100 : if (offset == 0)
416 : {
417 8198570 : b0->total_length_not_including_first_buffer = 0;
418 8198570 : b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
419 8198570 : vnet_buffer (b0)->sw_if_index[VLIB_RX] =
420 8198570 : apif->sw_if_index;
421 8198570 : vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0;
422 8198570 : first_b0 = b0;
423 8198570 : first_bi0 = bi0;
424 8198570 : if (is_cksum_gso_enabled)
425 : {
426 464572 : if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
427 464123 : fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip);
428 464572 : if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 |
429 : VIRTIO_NET_HDR_GSO_TCPV6))
430 242386 : fill_gso_offload (first_b0, vnet_hdr->gso_size,
431 : l4_hdr_sz);
432 : }
433 : }
434 : else
435 12404500 : buffer_add_to_chain (b0, first_b0, prev_b0, bi0);
436 :
437 20603100 : prev_b0 = b0;
438 20603100 : offset += bytes_to_copy;
439 20603100 : data_len -= bytes_to_copy;
440 : }
441 8198570 : n_rx_packets++;
442 8198570 : n_rx_bytes += tph->tp_snaplen;
443 8198570 : to_next[0] = first_bi0;
444 8198570 : to_next += 1;
445 8198570 : n_left_to_next--;
446 :
447 : /* drop partial packets */
448 8198570 : if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
449 : {
450 0 : next0 = VNET_DEVICE_INPUT_NEXT_DROP;
451 0 : first_b0->error =
452 0 : node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
453 : }
454 : else
455 : {
456 8198570 : if (PREDICT_FALSE (apif->mode == AF_PACKET_IF_MODE_IP))
457 : {
458 0 : switch (first_b0->data[0] & 0xf0)
459 : {
460 0 : case 0x40:
461 0 : next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
462 0 : break;
463 0 : case 0x60:
464 0 : next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
465 0 : break;
466 0 : default:
467 0 : next0 = VNET_DEVICE_INPUT_NEXT_DROP;
468 0 : break;
469 : }
470 0 : if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
471 0 : next0 = apif->per_interface_next_index;
472 : }
473 : else
474 : {
475 : /* copy feature arc data from template */
476 8198570 : first_b0->current_config_index = bt.current_config_index;
477 8198570 : vnet_buffer (first_b0)->feature_arc_index =
478 8198570 : vnet_buffer (&bt)->feature_arc_index;
479 : }
480 : }
481 :
482 : /* trace */
483 8198570 : if (PREDICT_FALSE (n_trace > 0 &&
484 : vlib_trace_buffer (vm, node, next0, first_b0,
485 : /* follow_chain */ 0)))
486 : {
487 : af_packet_input_trace_t *tr;
488 0 : vlib_set_trace_count (vm, node, --n_trace);
489 0 : tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
490 0 : tr->is_v3 = 1;
491 0 : tr->next_index = next0;
492 0 : tr->hw_if_index = apif->hw_if_index;
493 0 : tr->queue_id = queue_id;
494 0 : tr->block = block;
495 0 : tr->block_start = bd;
496 0 : tr->pkt_num = bd->hdr.bh1.num_pkts - num_pkts;
497 0 : clib_memcpy_fast (&tr->bd, bd, sizeof (block_desc_t));
498 0 : clib_memcpy_fast (&tr->tph3, tph, sizeof (tpacket3_hdr_t));
499 0 : if (is_cksum_gso_enabled)
500 0 : clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr,
501 : sizeof (vnet_virtio_net_hdr_t));
502 : else
503 0 : clib_memset_u8 (&tr->vnet_hdr, 0,
504 : sizeof (vnet_virtio_net_hdr_t));
505 : }
506 :
507 : /* enque and take next packet */
508 8198570 : vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
509 : n_left_to_next, first_bi0,
510 : next0);
511 :
512 : /* next packet */
513 8198570 : num_pkts--;
514 8198570 : rx_frame_offset += tph->tp_next_offset;
515 : }
516 :
517 544901 : vlib_put_next_frame (vm, node, next_index, n_left_to_next);
518 : }
519 :
520 539407 : if (PREDICT_TRUE (num_pkts == 0))
521 : {
522 539407 : bd->hdr.bh1.block_status = TP_STATUS_KERNEL;
523 539407 : block = (block + 1) % block_nr;
524 : }
525 : else
526 : {
527 0 : rx_queue->rx_frame_offset = rx_frame_offset;
528 0 : rx_queue->num_rx_pkts = num_pkts;
529 0 : rx_queue->is_rx_pending = 1;
530 : }
531 : }
532 :
533 661027 : rx_queue->next_rx_block = block;
534 :
535 661027 : done:
536 :
537 661027 : if (apm->polling_count == 0)
538 : {
539 661027 : if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
540 661027 : ->hdr.bh1.block_status &
541 : TP_STATUS_USER) != 0)
542 512263 : vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_POLLING);
543 : else
544 148764 : vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_INTERRUPT);
545 : }
546 :
547 661027 : vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TOTAL_RECV_BLK,
548 : total);
549 661027 : vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TIMEDOUT_BLK,
550 : timedout_blk);
551 :
552 661027 : vlib_increment_combined_counter
553 661027 : (vnet_get_main ()->interface_main.combined_sw_if_counters
554 : + VNET_INTERFACE_COUNTER_RX,
555 661027 : vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
556 :
557 661027 : vnet_device_increment_rx_packets (thread_index, n_rx_packets);
558 661027 : return n_rx_packets;
559 : }
560 :
561 : always_inline uword
562 253929 : af_packet_v2_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
563 : vlib_frame_t *frame, af_packet_if_t *apif,
564 : u16 queue_id, u8 is_cksum_gso_enabled)
565 : {
566 253929 : af_packet_main_t *apm = &af_packet_main;
567 253929 : af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id);
568 : tpacket2_hdr_t *tph;
569 : u32 next_index;
570 253929 : u32 block = 0;
571 : u32 rx_frame;
572 : u32 n_free_bufs;
573 253929 : u32 n_rx_packets = 0;
574 253929 : u32 n_rx_bytes = 0;
575 253929 : u32 *to_next = 0;
576 253929 : u32 frame_size = rx_queue->rx_req->req.tp_frame_size;
577 253929 : u32 frame_num = rx_queue->rx_req->req.tp_frame_nr;
578 253929 : u8 *block_start = rx_queue->rx_ring[block];
579 253929 : uword n_trace = vlib_get_trace_count (vm, node);
580 253929 : u32 thread_index = vm->thread_index;
581 253929 : u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
582 253929 : u32 min_bufs = rx_queue->rx_req->req.tp_frame_size / n_buffer_bytes;
583 253929 : u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP);
584 253929 : vlib_buffer_t bt = {};
585 :
586 253929 : if (is_ip)
587 : {
588 0 : next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
589 : }
590 : else
591 : {
592 253929 : next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
593 253929 : if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
594 0 : next_index = apif->per_interface_next_index;
595 :
596 : /* redirect if feature path enabled */
597 253929 : vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt);
598 : }
599 :
600 253929 : n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
601 253929 : if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
602 : {
603 113082 : vec_validate (apm->rx_buffers[thread_index],
604 : VLIB_FRAME_SIZE + n_free_bufs - 1);
605 226164 : n_free_bufs += vlib_buffer_alloc (
606 113082 : vm, &apm->rx_buffers[thread_index][n_free_bufs], VLIB_FRAME_SIZE);
607 113082 : vec_set_len (apm->rx_buffers[thread_index], n_free_bufs);
608 : }
609 :
610 253929 : rx_frame = rx_queue->next_rx_frame;
611 253929 : tph = (tpacket2_hdr_t *) (block_start + rx_frame * frame_size);
612 510583 : while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs))
613 : {
614 256654 : vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0;
615 256654 : u32 next0 = next_index;
616 :
617 : u32 n_left_to_next;
618 256654 : vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
619 5330850 : while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) &&
620 : n_left_to_next)
621 : {
622 5074200 : vnet_virtio_net_hdr_t *vnet_hdr = 0;
623 5074200 : u32 data_len = tph->tp_snaplen;
624 5074200 : u32 offset = 0;
625 5074200 : u32 bi0 = 0, first_bi0 = 0;
626 5074200 : u8 l4_hdr_sz = 0;
627 :
628 5074200 : if (is_cksum_gso_enabled)
629 1803740 : vnet_hdr =
630 1803740 : (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac -
631 : sizeof (vnet_virtio_net_hdr_t));
632 30521700 : while (data_len)
633 : {
634 : /* grab free buffer */
635 25447500 : u32 last_empty_buffer =
636 25447500 : vec_len (apm->rx_buffers[thread_index]) - 1;
637 25447500 : bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
638 25447500 : b0 = vlib_get_buffer (vm, bi0);
639 25447500 : vec_set_len (apm->rx_buffers[thread_index], last_empty_buffer);
640 25447500 : n_free_bufs--;
641 :
642 : /* copy data */
643 25447500 : u32 bytes_to_copy =
644 : data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
645 25447500 : u32 vlan_len = 0;
646 25447500 : u32 bytes_copied = 0;
647 25447500 : b0->current_data = 0;
648 : /* Kernel removes VLAN headers, so reconstruct VLAN */
649 25447500 : if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
650 : {
651 0 : if (PREDICT_TRUE (offset == 0))
652 : {
653 0 : clib_memcpy_fast (vlib_buffer_get_current (b0),
654 0 : (u8 *) tph + tph->tp_mac,
655 : sizeof (ethernet_header_t));
656 0 : ethernet_header_t *eth = vlib_buffer_get_current (b0);
657 0 : ethernet_vlan_header_t *vlan =
658 : (ethernet_vlan_header_t *) (eth + 1);
659 0 : vlan->priority_cfi_and_id =
660 0 : clib_host_to_net_u16 (tph->tp_vlan_tci);
661 0 : vlan->type = eth->type;
662 0 : eth->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
663 0 : vlan_len = sizeof (ethernet_vlan_header_t);
664 0 : bytes_copied = sizeof (ethernet_header_t);
665 : }
666 : }
667 50895100 : clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
668 25447500 : bytes_copied + vlan_len,
669 25447500 : (u8 *) tph + tph->tp_mac + offset +
670 : bytes_copied,
671 25447500 : (bytes_to_copy - bytes_copied));
672 :
673 : /* fill buffer header */
674 25447500 : b0->current_length = bytes_to_copy + vlan_len;
675 :
676 25447500 : if (offset == 0)
677 : {
678 5074200 : b0->total_length_not_including_first_buffer = 0;
679 5074200 : b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
680 5074200 : vnet_buffer (b0)->sw_if_index[VLIB_RX] = apif->sw_if_index;
681 5074200 : vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0;
682 5074200 : first_bi0 = bi0;
683 5074200 : first_b0 = vlib_get_buffer (vm, first_bi0);
684 :
685 5074200 : if (is_cksum_gso_enabled)
686 : {
687 1803740 : if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
688 1802540 : fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip);
689 1803740 : if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 |
690 : VIRTIO_NET_HDR_GSO_TCPV6))
691 642827 : fill_gso_offload (first_b0, vnet_hdr->gso_size,
692 : l4_hdr_sz);
693 : }
694 : }
695 : else
696 20373300 : buffer_add_to_chain (b0, first_b0, prev_b0, bi0);
697 :
698 25447500 : prev_b0 = b0;
699 25447500 : offset += bytes_to_copy;
700 25447500 : data_len -= bytes_to_copy;
701 : }
702 5074200 : n_rx_packets++;
703 5074200 : n_rx_bytes += tph->tp_snaplen;
704 5074200 : to_next[0] = first_bi0;
705 5074200 : to_next += 1;
706 5074200 : n_left_to_next--;
707 :
708 : /* drop partial packets */
709 5074200 : if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
710 : {
711 0 : next0 = VNET_DEVICE_INPUT_NEXT_DROP;
712 0 : first_b0->error =
713 0 : node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
714 : }
715 : else
716 : {
717 5074200 : if (PREDICT_FALSE (is_ip))
718 : {
719 0 : switch (first_b0->data[0] & 0xf0)
720 : {
721 0 : case 0x40:
722 0 : next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
723 0 : break;
724 0 : case 0x60:
725 0 : next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
726 0 : break;
727 0 : default:
728 0 : next0 = VNET_DEVICE_INPUT_NEXT_DROP;
729 0 : break;
730 : }
731 0 : if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
732 0 : next0 = apif->per_interface_next_index;
733 : }
734 : else
735 : {
736 : /* copy feature arc data from template */
737 5074200 : first_b0->current_config_index = bt.current_config_index;
738 5074200 : vnet_buffer (first_b0)->feature_arc_index =
739 5074200 : vnet_buffer (&bt)->feature_arc_index;
740 : }
741 : }
742 :
743 : /* trace */
744 5074200 : if (PREDICT_FALSE (n_trace > 0 &&
745 : vlib_trace_buffer (vm, node, next0, first_b0,
746 : /* follow_chain */ 0)))
747 : {
748 : af_packet_input_trace_t *tr;
749 0 : vlib_set_trace_count (vm, node, --n_trace);
750 0 : tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
751 0 : tr->is_v3 = 0;
752 0 : tr->next_index = next0;
753 0 : tr->hw_if_index = apif->hw_if_index;
754 0 : tr->queue_id = queue_id;
755 0 : clib_memcpy_fast (&tr->tph2, tph, sizeof (struct tpacket2_hdr));
756 0 : if (is_cksum_gso_enabled)
757 0 : clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr,
758 : sizeof (vnet_virtio_net_hdr_t));
759 : else
760 0 : clib_memset_u8 (&tr->vnet_hdr, 0,
761 : sizeof (vnet_virtio_net_hdr_t));
762 : }
763 :
764 : /* enque and take next packet */
765 5074200 : vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
766 : n_left_to_next, first_bi0, next0);
767 :
768 : /* next packet */
769 5074200 : tph->tp_status = TP_STATUS_KERNEL;
770 5074200 : rx_frame = (rx_frame + 1) % frame_num;
771 5074200 : tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
772 : }
773 :
774 256654 : vlib_put_next_frame (vm, node, next_index, n_left_to_next);
775 : }
776 :
777 253929 : rx_queue->next_rx_frame = rx_frame;
778 :
779 253929 : vlib_increment_combined_counter (
780 253929 : vnet_get_main ()->interface_main.combined_sw_if_counters +
781 : VNET_INTERFACE_COUNTER_RX,
782 253929 : vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
783 :
784 253929 : vnet_device_increment_rx_packets (thread_index, n_rx_packets);
785 253929 : return n_rx_packets;
786 : }
787 :
788 : always_inline uword
789 914956 : af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
790 : vlib_frame_t *frame, af_packet_if_t *apif,
791 : u16 queue_id, u8 is_cksum_gso_enabled)
792 :
793 : {
794 914956 : if (apif->version == TPACKET_V3)
795 661027 : return af_packet_v3_device_input_fn (vm, node, frame, apif, queue_id,
796 : is_cksum_gso_enabled);
797 : else
798 253929 : return af_packet_v2_device_input_fn (vm, node, frame, apif, queue_id,
799 : is_cksum_gso_enabled);
800 : }
801 :
802 630943 : VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm,
803 : vlib_node_runtime_t * node,
804 : vlib_frame_t * frame)
805 : {
806 628707 : u32 n_rx_packets = 0;
807 628707 : af_packet_main_t *apm = &af_packet_main;
808 : vnet_hw_if_rxq_poll_vector_t *pv;
809 628707 : pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
810 1543710 : for (int i = 0; i < vec_len (pv); i++)
811 : {
812 : af_packet_if_t *apif;
813 915000 : apif = vec_elt_at_index (apm->interfaces, pv[i].dev_instance);
814 915000 : if (apif->is_admin_up)
815 : {
816 914956 : if (apif->is_cksum_gso_enabled)
817 499826 : n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
818 499826 : pv[i].queue_id, 1);
819 : else
820 415130 : n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
821 415130 : pv[i].queue_id, 0);
822 : }
823 : }
824 628707 : return n_rx_packets;
825 : }
826 :
827 167480 : VLIB_REGISTER_NODE (af_packet_input_node) = {
828 : .name = "af-packet-input",
829 : .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
830 : .sibling_of = "device-input",
831 : .format_trace = format_af_packet_input_trace,
832 : .type = VLIB_NODE_TYPE_INPUT,
833 : .state = VLIB_NODE_STATE_INTERRUPT,
834 : .n_errors = AF_PACKET_INPUT_N_ERROR,
835 : .error_strings = af_packet_input_error_strings,
836 : };
837 :
838 :
839 : /*
840 : * fd.io coding-style-patch-verification: ON
841 : *
842 : * Local Variables:
843 : * eval: (c-set-style "gnu")
844 : * End:
845 : */
|