Line data Source code
1 : /*
2 : *------------------------------------------------------------------
3 : * tuntap.c - kernel stack (reverse) punt/inject path
4 : *
5 : * Copyright (c) 2009 Cisco and/or its affiliates.
6 : * Licensed under the Apache License, Version 2.0 (the "License");
7 : * you may not use this file except in compliance with the License.
8 : * You may obtain a copy of the License at:
9 : *
10 : * http://www.apache.org/licenses/LICENSE-2.0
11 : *
12 : * Unless required by applicable law or agreed to in writing, software
13 : * distributed under the License is distributed on an "AS IS" BASIS,
14 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 : * See the License for the specific language governing permissions and
16 : * limitations under the License.
17 : *------------------------------------------------------------------
18 : */
19 : /**
20 : * @file
21 : * @brief TunTap Kernel stack (reverse) punt/inject path.
22 : *
23 : * This driver runs in one of two distinct modes:
24 : * - "punt/inject" mode, where we send pkts not otherwise processed
25 : * by the forwarding to the Linux kernel stack, and
26 : *
27 : * - "normal interface" mode, where we treat the Linux kernel stack
28 : * as a peer.
29 : *
30 : * By default, we select punt/inject mode.
31 : */
32 :
33 : #include <fcntl.h> /* for open */
34 : #include <sys/ioctl.h>
35 : #include <sys/socket.h>
36 : #include <sys/stat.h>
37 : #include <sys/types.h>
38 : #include <sys/uio.h> /* for iovec */
39 : #include <netinet/in.h>
40 :
41 : #include <linux/if_arp.h>
42 : #include <linux/if_tun.h>
43 :
44 : #include <vlib/vlib.h>
45 : #include <vlib/unix/unix.h>
46 :
47 : #include <vnet/ip/ip.h>
48 : #include <vnet/fib/fib_table.h>
49 :
50 : #include <vnet/ethernet/ethernet.h>
51 : #include <vnet/devices/devices.h>
52 : #include <vnet/feature/feature.h>
53 :
54 : static vnet_device_class_t tuntap_dev_class;
55 : static vnet_hw_interface_class_t tuntap_interface_class;
56 :
57 : static void tuntap_punt_frame (vlib_main_t * vm,
58 : vlib_node_runtime_t * node,
59 : vlib_frame_t * frame);
60 : static void tuntap_nopunt_frame (vlib_main_t * vm,
61 : vlib_node_runtime_t * node,
62 : vlib_frame_t * frame);
63 :
64 : typedef struct
65 : {
66 : u32 sw_if_index;
67 : u8 is_v6;
68 : u8 addr[16];
69 : } subif_address_t;
70 :
71 : /**
72 : * @brief TUNTAP per thread struct
73 : */
74 : typedef struct
75 : {
76 : /** Vector of VLIB rx buffers to use. We allocate them in blocks
77 : of VLIB_FRAME_SIZE (256). */
78 : u32 *rx_buffers;
79 :
80 : /** Vector of iovecs for readv/writev calls. */
81 : struct iovec *iovecs;
82 : } tuntap_per_thread_t;
83 :
84 : /**
85 : * @brief TUNTAP node main state
86 : */
87 : typedef struct
88 : {
89 : /** per thread variables */
90 : tuntap_per_thread_t *threads;
91 :
92 : /** File descriptors for /dev/net/tun and provisioning socket. */
93 : int dev_net_tun_fd, dev_tap_fd;
94 :
95 : /** Create a "tap" [ethernet] encaps device */
96 : int is_ether;
97 :
98 : /** 1 if a "normal" routed intfc, 0 if a punt/inject interface */
99 :
100 : int have_normal_interface;
101 :
102 : /** tap device destination MAC address. Required, or Linux drops pkts */
103 : u8 ether_dst_mac[6];
104 :
105 : /** Interface MTU in bytes and # of default sized buffers. */
106 : u32 mtu_bytes, mtu_buffers;
107 :
108 : /** Linux interface name for tun device. */
109 : char *tun_name;
110 :
111 : /** Pool of subinterface addresses */
112 : subif_address_t *subifs;
113 :
114 : /** Hash for subif addresses */
115 : mhash_t subif_mhash;
116 :
117 : /** Unix file index */
118 : u32 clib_file_index;
119 :
120 : /** For the "normal" interface, if configured */
121 : u32 hw_if_index, sw_if_index;
122 :
123 : } tuntap_main_t;
124 :
125 : static tuntap_main_t tuntap_main = {
126 : .tun_name = "vnet",
127 :
128 : /** Suitable defaults for an Ethernet-like tun/tap device */
129 : .mtu_bytes = 4096 + 256,
130 : };
131 :
132 : /**
133 : * @brief tuntap_tx
134 : * @node tuntap-tx
135 : *
136 : * Output node, writes the buffers comprising the incoming frame
137 : * to the tun/tap device, aka hands them to the Linux kernel stack.
138 : *
139 : * @param *vm - vlib_main_t
140 : * @param *node - vlib_node_runtime_t
141 : * @param *frame - vlib_frame_t
142 : *
143 : * @return rc - uword
144 : *
145 : */
146 : static uword
147 0 : tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
148 : {
149 0 : u32 *buffers = vlib_frame_vector_args (frame);
150 0 : uword n_packets = frame->n_vectors;
151 0 : tuntap_main_t *tm = &tuntap_main;
152 0 : vnet_main_t *vnm = vnet_get_main ();
153 0 : vnet_interface_main_t *im = &vnm->interface_main;
154 0 : u32 n_bytes = 0;
155 : int i;
156 0 : u16 thread_index = vm->thread_index;
157 :
158 0 : for (i = 0; i < n_packets; i++)
159 : {
160 : struct iovec *iov;
161 : vlib_buffer_t *b;
162 : uword l;
163 :
164 0 : b = vlib_get_buffer (vm, buffers[i]);
165 :
166 0 : if (tm->is_ether && (!tm->have_normal_interface))
167 : {
168 0 : vlib_buffer_reset (b);
169 0 : clib_memcpy_fast (vlib_buffer_get_current (b), tm->ether_dst_mac,
170 : 6);
171 : }
172 :
173 : /* Re-set iovecs if present. */
174 0 : if (tm->threads[thread_index].iovecs)
175 0 : vec_set_len (tm->threads[thread_index].iovecs, 0);
176 :
177 : /** VLIB buffer chain -> Unix iovec(s). */
178 0 : vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
179 0 : iov->iov_base = b->data + b->current_data;
180 0 : iov->iov_len = l = b->current_length;
181 :
182 0 : if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
183 : {
184 : do
185 : {
186 0 : b = vlib_get_buffer (vm, b->next_buffer);
187 :
188 0 : vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
189 :
190 0 : iov->iov_base = b->data + b->current_data;
191 0 : iov->iov_len = b->current_length;
192 0 : l += b->current_length;
193 : }
194 0 : while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
195 : }
196 :
197 0 : if (writev (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs,
198 0 : vec_len (tm->threads[thread_index].iovecs)) < l)
199 0 : clib_unix_warning ("writev");
200 :
201 0 : n_bytes += l;
202 : }
203 :
204 : /* Update tuntap interface output stats. */
205 0 : vlib_increment_combined_counter (im->combined_sw_if_counters
206 : + VNET_INTERFACE_COUNTER_TX,
207 : vm->thread_index,
208 : tm->sw_if_index, n_packets, n_bytes);
209 :
210 :
211 : /** The normal interface path flattens the buffer chain */
212 0 : if (tm->have_normal_interface)
213 0 : vlib_buffer_free_no_next (vm, buffers, n_packets);
214 : else
215 0 : vlib_buffer_free (vm, buffers, n_packets);
216 :
217 0 : return n_packets;
218 : }
219 :
220 : /* *INDENT-OFF* */
221 183788 : VLIB_REGISTER_NODE (tuntap_tx_node,static) = {
222 : .function = tuntap_tx,
223 : .name = "tuntap-tx",
224 : .type = VLIB_NODE_TYPE_INTERNAL,
225 : .vector_size = 4,
226 : };
227 : /* *INDENT-ON* */
228 :
229 : /**
230 : * @brief TUNTAP receive node
231 : * @node tuntap-rx
232 : *
233 : * @param *vm - vlib_main_t
234 : * @param *node - vlib_node_runtime_t
235 : * @param *frame - vlib_frame_t
236 : *
237 : * @return rc - uword
238 : *
239 : */
240 : static uword
241 0 : tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
242 : {
243 0 : tuntap_main_t *tm = &tuntap_main;
244 : vlib_buffer_t *b;
245 : u32 bi;
246 0 : const uword buffer_size = vlib_buffer_get_default_data_size (vm);
247 0 : u16 thread_index = vm->thread_index;
248 :
249 : /** Make sure we have some RX buffers. */
250 : {
251 0 : uword n_left = vec_len (tm->threads[thread_index].rx_buffers);
252 : uword n_alloc;
253 :
254 0 : if (n_left < VLIB_FRAME_SIZE / 2)
255 : {
256 0 : if (!tm->threads[thread_index].rx_buffers)
257 0 : vec_alloc (tm->threads[thread_index].rx_buffers, VLIB_FRAME_SIZE);
258 :
259 0 : n_alloc =
260 0 : vlib_buffer_alloc (vm,
261 0 : tm->threads[thread_index].rx_buffers + n_left,
262 : VLIB_FRAME_SIZE - n_left);
263 0 : vec_set_len (tm->threads[thread_index].rx_buffers, n_left + n_alloc);
264 : }
265 : }
266 :
267 : /** Allocate RX buffers from end of rx_buffers.
268 : Turn them into iovecs to pass to readv. */
269 : {
270 0 : uword i_rx = vec_len (tm->threads[thread_index].rx_buffers) - 1;
271 : vlib_buffer_t *b;
272 : word i, n_bytes_left, n_bytes_in_packet;
273 :
274 : /** We should have enough buffers left for an MTU sized packet. */
275 0 : ASSERT (vec_len (tm->threads[thread_index].rx_buffers) >=
276 : tm->mtu_buffers);
277 :
278 0 : vec_validate (tm->threads[thread_index].iovecs, tm->mtu_buffers - 1);
279 0 : for (i = 0; i < tm->mtu_buffers; i++)
280 : {
281 : b =
282 0 : vlib_get_buffer (vm,
283 0 : tm->threads[thread_index].rx_buffers[i_rx - i]);
284 0 : tm->threads[thread_index].iovecs[i].iov_base = b->data;
285 0 : tm->threads[thread_index].iovecs[i].iov_len = buffer_size;
286 : }
287 :
288 : n_bytes_left =
289 0 : readv (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs,
290 0 : tm->mtu_buffers);
291 0 : n_bytes_in_packet = n_bytes_left;
292 0 : if (n_bytes_left <= 0)
293 : {
294 0 : if (errno != EAGAIN)
295 0 : clib_unix_warning ("readv %d", n_bytes_left);
296 0 : return 0;
297 : }
298 :
299 0 : bi = tm->threads[thread_index].rx_buffers[i_rx];
300 :
301 : while (1)
302 : {
303 0 : b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx]);
304 0 : b->flags = 0;
305 0 : b->current_data = 0;
306 0 : b->current_length =
307 0 : n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
308 :
309 0 : n_bytes_left -= buffer_size;
310 :
311 0 : if (n_bytes_left <= 0)
312 : {
313 0 : break;
314 : }
315 :
316 0 : i_rx--;
317 0 : b->flags |= VLIB_BUFFER_NEXT_PRESENT;
318 0 : b->next_buffer = tm->threads[thread_index].rx_buffers[i_rx];
319 : }
320 :
321 : /** Interface counters for tuntap interface. */
322 0 : vlib_increment_combined_counter
323 : (vnet_main.interface_main.combined_sw_if_counters
324 : + VNET_INTERFACE_COUNTER_RX,
325 : thread_index, tm->sw_if_index, 1, n_bytes_in_packet);
326 :
327 0 : vec_set_len (tm->threads[thread_index].rx_buffers, i_rx);
328 : }
329 :
330 0 : b = vlib_get_buffer (vm, bi);
331 :
332 : {
333 : u32 next_index;
334 0 : uword n_trace = vlib_get_trace_count (vm, node);
335 :
336 0 : vnet_buffer (b)->sw_if_index[VLIB_RX] = tm->sw_if_index;
337 0 : vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
338 :
339 0 : b->error = node->errors[0];
340 :
341 0 : if (tm->is_ether)
342 : {
343 0 : next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
344 : }
345 : else
346 0 : switch (b->data[0] & 0xf0)
347 : {
348 0 : case 0x40:
349 0 : next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
350 0 : break;
351 0 : case 0x60:
352 0 : next_index = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
353 0 : break;
354 0 : default:
355 0 : next_index = VNET_DEVICE_INPUT_NEXT_DROP;
356 0 : break;
357 : }
358 :
359 : /* The linux kernel couldn't care less if our interface is up */
360 0 : if (tm->have_normal_interface)
361 : {
362 0 : vnet_main_t *vnm = vnet_get_main ();
363 : vnet_sw_interface_t *si;
364 0 : si = vnet_get_sw_interface (vnm, tm->sw_if_index);
365 0 : if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
366 0 : next_index = VNET_DEVICE_INPUT_NEXT_DROP;
367 : }
368 :
369 0 : vnet_feature_start_device_input (tm->sw_if_index, &next_index, b);
370 :
371 0 : vlib_set_next_frame_buffer (vm, node, next_index, bi);
372 :
373 0 : if (PREDICT_FALSE (n_trace > 0 && vlib_trace_buffer (vm, node, next_index, b, /* follow_chain */
374 : 1)))
375 0 : vlib_set_trace_count (vm, node, n_trace - 1);
376 : }
377 :
378 0 : return 1;
379 : }
380 :
381 : /**
382 : * @brief TUNTAP_RX error strings
383 : */
384 : static char *tuntap_rx_error_strings[] = {
385 : "unknown packet type",
386 : };
387 :
388 : /* *INDENT-OFF* */
389 183788 : VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
390 : .function = tuntap_rx,
391 : .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
392 : .name = "tuntap-rx",
393 : .sibling_of = "device-input",
394 : .type = VLIB_NODE_TYPE_INPUT,
395 : .state = VLIB_NODE_STATE_INTERRUPT,
396 : .vector_size = 4,
397 : .n_errors = 1,
398 : .error_strings = tuntap_rx_error_strings,
399 : };
400 : /* *INDENT-ON* */
401 :
402 : /**
403 : * @brief Gets called when file descriptor is ready from epoll.
404 : *
405 : * @param *uf - clib_file_t
406 : *
407 : * @return error - clib_error_t
408 : */
409 : static clib_error_t *
410 0 : tuntap_read_ready (clib_file_t * uf)
411 : {
412 0 : vlib_main_t *vm = vlib_get_main ();
413 0 : vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index);
414 0 : return 0;
415 : }
416 :
417 : /**
418 : * @brief Clean up the tun/tap device
419 : *
420 : * @param *vm - vlib_main_t
421 : *
422 : * @return error - clib_error_t
423 : *
424 : */
425 : static clib_error_t *
426 575 : tuntap_exit (vlib_main_t * vm)
427 : {
428 575 : tuntap_main_t *tm = &tuntap_main;
429 : struct ifreq ifr;
430 : int sfd;
431 :
432 : /* Not present. */
433 575 : if (!tm->dev_net_tun_fd || tm->dev_net_tun_fd < 0)
434 575 : return 0;
435 :
436 0 : sfd = socket (AF_INET, SOCK_STREAM, 0);
437 0 : if (sfd < 0)
438 0 : clib_unix_warning ("provisioning socket");
439 :
440 0 : clib_memset (&ifr, 0, sizeof (ifr));
441 0 : strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name) - 1);
442 :
443 : /* get flags, modify to bring down interface... */
444 0 : if (ioctl (sfd, SIOCGIFFLAGS, &ifr) < 0)
445 0 : clib_unix_warning ("SIOCGIFFLAGS");
446 :
447 0 : ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
448 :
449 0 : if (ioctl (sfd, SIOCSIFFLAGS, &ifr) < 0)
450 0 : clib_unix_warning ("SIOCSIFFLAGS");
451 :
452 : /* Turn off persistence */
453 0 : if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 0) < 0)
454 0 : clib_unix_warning ("TUNSETPERSIST");
455 0 : close (tm->dev_tap_fd);
456 0 : if (tm->dev_net_tun_fd >= 0)
457 0 : close (tm->dev_net_tun_fd);
458 0 : if (sfd >= 0)
459 0 : close (sfd);
460 :
461 0 : return 0;
462 : }
463 :
464 2876 : VLIB_MAIN_LOOP_EXIT_FUNCTION (tuntap_exit);
465 :
466 : /**
467 : * @brief CLI function for tun/tap config
468 : *
469 : * @param *vm - vlib_main_t
470 : * @param *input - unformat_input_t
471 : *
472 : * @return error - clib_error_t
473 : *
474 : */
475 : static clib_error_t *
476 575 : tuntap_config (vlib_main_t * vm, unformat_input_t * input)
477 : {
478 575 : tuntap_main_t *tm = &tuntap_main;
479 575 : clib_error_t *error = 0;
480 : struct ifreq ifr;
481 : u8 *name;
482 575 : int flags = IFF_TUN | IFF_NO_PI;
483 575 : int is_enabled = 0, is_ether = 0, have_normal_interface = 0;
484 575 : const uword buffer_size = vlib_buffer_get_default_data_size (vm);
485 :
486 575 : while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
487 : {
488 0 : if (unformat (input, "mtu %d", &tm->mtu_bytes))
489 : ;
490 0 : else if (unformat (input, "enable"))
491 0 : is_enabled = 1;
492 0 : else if (unformat (input, "disable"))
493 0 : is_enabled = 0;
494 0 : else if (unformat (input, "ethernet") || unformat (input, "ether"))
495 0 : is_ether = 1;
496 0 : else if (unformat (input, "have-normal-interface") ||
497 0 : unformat (input, "have-normal"))
498 0 : have_normal_interface = 1;
499 0 : else if (unformat (input, "name %s", &name))
500 0 : tm->tun_name = (char *) name;
501 : else
502 0 : return clib_error_return (0, "unknown input `%U'",
503 : format_unformat_error, input);
504 : }
505 :
506 575 : tm->dev_net_tun_fd = -1;
507 575 : tm->dev_tap_fd = -1;
508 :
509 575 : if (is_enabled == 0)
510 575 : return 0;
511 :
512 0 : if (geteuid ())
513 : {
514 0 : clib_warning ("tuntap disabled: must be superuser");
515 0 : return 0;
516 : }
517 :
518 0 : tm->is_ether = is_ether;
519 0 : tm->have_normal_interface = have_normal_interface;
520 :
521 0 : if (is_ether)
522 0 : flags = IFF_TAP | IFF_NO_PI;
523 :
524 0 : if ((tm->dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0)
525 : {
526 0 : error = clib_error_return_unix (0, "open /dev/net/tun");
527 0 : goto done;
528 : }
529 :
530 0 : clib_memset (&ifr, 0, sizeof (ifr));
531 0 : strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name) - 1);
532 0 : ifr.ifr_flags = flags;
533 0 : if (ioctl (tm->dev_net_tun_fd, TUNSETIFF, (void *) &ifr) < 0)
534 : {
535 0 : error = clib_error_return_unix (0, "ioctl TUNSETIFF");
536 0 : goto done;
537 : }
538 :
539 : /* Make it persistent, at least until we split. */
540 0 : if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 1) < 0)
541 : {
542 0 : error = clib_error_return_unix (0, "TUNSETPERSIST");
543 0 : goto done;
544 : }
545 :
546 : /* Open a provisioning socket */
547 0 : if ((tm->dev_tap_fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
548 : {
549 0 : error = clib_error_return_unix (0, "socket");
550 0 : goto done;
551 : }
552 :
553 : /* Find the interface index. */
554 : {
555 : struct ifreq ifr;
556 : struct sockaddr_ll sll;
557 :
558 0 : clib_memset (&ifr, 0, sizeof (ifr));
559 0 : strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name) - 1);
560 0 : if (ioctl (tm->dev_tap_fd, SIOCGIFINDEX, &ifr) < 0)
561 : {
562 0 : error = clib_error_return_unix (0, "ioctl SIOCGIFINDEX");
563 0 : goto done;
564 : }
565 :
566 : /* Bind the provisioning socket to the interface. */
567 0 : clib_memset (&sll, 0, sizeof (sll));
568 0 : sll.sll_family = AF_PACKET;
569 0 : sll.sll_ifindex = ifr.ifr_ifindex;
570 0 : sll.sll_protocol = htons (ETH_P_ALL);
571 :
572 0 : if (bind (tm->dev_tap_fd, (struct sockaddr *) &sll, sizeof (sll)) < 0)
573 : {
574 0 : error = clib_error_return_unix (0, "bind");
575 0 : goto done;
576 : }
577 : }
578 :
579 : /* non-blocking I/O on /dev/tapX */
580 : {
581 0 : int one = 1;
582 0 : if (ioctl (tm->dev_net_tun_fd, FIONBIO, &one) < 0)
583 : {
584 0 : error = clib_error_return_unix (0, "ioctl FIONBIO");
585 0 : goto done;
586 : }
587 : }
588 :
589 0 : tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size;
590 :
591 0 : ifr.ifr_mtu = tm->mtu_bytes;
592 0 : if (ioctl (tm->dev_tap_fd, SIOCSIFMTU, &ifr) < 0)
593 : {
594 0 : error = clib_error_return_unix (0, "ioctl SIOCSIFMTU");
595 0 : goto done;
596 : }
597 :
598 : /* get flags, modify to bring up interface... */
599 0 : if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
600 : {
601 0 : error = clib_error_return_unix (0, "ioctl SIOCGIFFLAGS");
602 0 : goto done;
603 : }
604 :
605 0 : ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
606 :
607 0 : if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
608 : {
609 0 : error = clib_error_return_unix (0, "ioctl SIOCSIFFLAGS");
610 0 : goto done;
611 : }
612 :
613 0 : if (is_ether)
614 : {
615 0 : if (ioctl (tm->dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0)
616 : {
617 0 : error = clib_error_return_unix (0, "ioctl SIOCGIFHWADDR");
618 0 : goto done;
619 : }
620 : else
621 0 : clib_memcpy_fast (tm->ether_dst_mac, ifr.ifr_hwaddr.sa_data, 6);
622 : }
623 :
624 0 : if (have_normal_interface)
625 : {
626 0 : vnet_main_t *vnm = vnet_get_main ();
627 0 : vnet_eth_interface_registration_t eir = {};
628 :
629 0 : eir.dev_class_index = tuntap_dev_class.index;
630 0 : eir.address = tm->ether_dst_mac;
631 0 : tm->hw_if_index = vnet_eth_register_interface (vnm, &eir);
632 :
633 0 : tm->sw_if_index = tm->hw_if_index;
634 0 : vm->os_punt_frame = tuntap_nopunt_frame;
635 : }
636 : else
637 : {
638 0 : vnet_main_t *vnm = vnet_get_main ();
639 : vnet_hw_interface_t *hi;
640 :
641 0 : vm->os_punt_frame = tuntap_punt_frame;
642 :
643 0 : tm->hw_if_index = vnet_register_interface
644 : (vnm, tuntap_dev_class.index, 0 /* device instance */ ,
645 : tuntap_interface_class.index, 0);
646 0 : hi = vnet_get_hw_interface (vnm, tm->hw_if_index);
647 0 : tm->sw_if_index = hi->sw_if_index;
648 :
649 : /* Interface is always up. */
650 0 : vnet_hw_interface_set_flags (vnm, tm->hw_if_index,
651 : VNET_HW_INTERFACE_FLAG_LINK_UP);
652 0 : vnet_sw_interface_set_flags (vnm, tm->sw_if_index,
653 : VNET_SW_INTERFACE_FLAG_ADMIN_UP);
654 : }
655 :
656 : {
657 0 : clib_file_t template = { 0 };
658 0 : template.read_function = tuntap_read_ready;
659 0 : template.file_descriptor = tm->dev_net_tun_fd;
660 0 : template.description = format (0, "vnet tuntap");
661 0 : tm->clib_file_index = clib_file_add (&file_main, &template);
662 : }
663 :
664 0 : done:
665 0 : if (error)
666 : {
667 0 : if (tm->dev_net_tun_fd >= 0)
668 0 : close (tm->dev_net_tun_fd);
669 0 : if (tm->dev_tap_fd >= 0)
670 0 : close (tm->dev_tap_fd);
671 : }
672 :
673 0 : return error;
674 : }
675 :
676 7514 : VLIB_CONFIG_FUNCTION (tuntap_config, "tuntap");
677 :
678 : /**
679 : * @brief Add or Del IP4 address to tun/tap interface
680 : *
681 : * @param *im - ip4_main_t
682 : * @param opaque - uword
683 : * @param sw_if_index - u32
684 : * @param *address - ip4_address_t
685 : * @param is_delete - u32
686 : *
687 : */
688 : void
689 4838 : tuntap_ip4_add_del_interface_address (ip4_main_t * im,
690 : uword opaque,
691 : u32 sw_if_index,
692 : ip4_address_t * address,
693 : u32 address_length,
694 : u32 if_address_index, u32 is_delete)
695 : {
696 4838 : tuntap_main_t *tm = &tuntap_main;
697 : struct ifreq ifr;
698 : subif_address_t subif_addr, *ap;
699 : uword *p;
700 :
701 : /** Tuntap disabled, or using a "normal" interface. */
702 4838 : if (tm->have_normal_interface || tm->dev_tap_fd < 0)
703 4838 : return;
704 :
705 : /* if the address is being applied to an interface that is not in
706 : * the same table/VRF as this tap, then ignore it.
707 : * If we don't do this overlapping address spaces in the different tables
708 : * breaks the linux host's routing tables */
709 0 : if (fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
710 : sw_if_index) !=
711 0 : fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, tm->sw_if_index))
712 0 : return;
713 :
714 : /** See if we already know about this subif */
715 0 : clib_memset (&subif_addr, 0, sizeof (subif_addr));
716 0 : subif_addr.sw_if_index = sw_if_index;
717 0 : clib_memcpy_fast (&subif_addr.addr, address, sizeof (*address));
718 :
719 0 : p = mhash_get (&tm->subif_mhash, &subif_addr);
720 :
721 0 : if (p)
722 0 : ap = pool_elt_at_index (tm->subifs, p[0]);
723 : else
724 : {
725 0 : pool_get (tm->subifs, ap);
726 0 : *ap = subif_addr;
727 0 : mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
728 : }
729 :
730 : /* Use subif pool index to select alias device. */
731 0 : clib_memset (&ifr, 0, sizeof (ifr));
732 0 : snprintf (ifr.ifr_name, sizeof (ifr.ifr_name),
733 0 : "%s:%d", tm->tun_name, (int) (ap - tm->subifs));
734 :
735 : /* the tuntap punt/inject is enabled for IPv4 RX so long as
736 : * any vpp interface has an IPv4 address.
737 : * this is also ref counted.
738 : */
739 0 : ip4_sw_interface_enable_disable (tm->sw_if_index, !is_delete);
740 :
741 0 : if (!is_delete)
742 : {
743 : struct sockaddr_in *sin;
744 :
745 0 : sin = (struct sockaddr_in *) &ifr.ifr_addr;
746 :
747 : /* Set ipv4 address, netmask. */
748 0 : sin->sin_family = AF_INET;
749 0 : clib_memcpy_fast (&sin->sin_addr.s_addr, address, 4);
750 0 : if (ioctl (tm->dev_tap_fd, SIOCSIFADDR, &ifr) < 0)
751 0 : clib_unix_warning ("ioctl SIOCSIFADDR");
752 :
753 0 : sin->sin_addr.s_addr = im->fib_masks[address_length];
754 0 : if (ioctl (tm->dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0)
755 0 : clib_unix_warning ("ioctl SIOCSIFNETMASK");
756 : }
757 : else
758 : {
759 0 : mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */ );
760 0 : pool_put (tm->subifs, ap);
761 : }
762 :
763 : /* get flags, modify to bring up interface... */
764 0 : if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
765 0 : clib_unix_warning ("ioctl SIOCGIFFLAGS");
766 :
767 0 : if (is_delete)
768 0 : ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
769 : else
770 0 : ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
771 :
772 0 : if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
773 0 : clib_unix_warning ("ioctl SIOCSIFFLAGS");
774 : }
775 :
776 : /**
777 : * @brief workaround for a known include file bug.
778 : * including @c <linux/ipv6.h> causes multiple definitions if
779 : * @c <netinet/in.h is also included.
780 : */
781 : struct in6_ifreq
782 : {
783 : struct in6_addr ifr6_addr;
784 : u32 ifr6_prefixlen;
785 : int ifr6_ifindex;
786 : };
787 :
788 : /**
789 : * @brief Add or Del tun/tap interface address.
790 : *
791 : * Both the v6 interface address API and the way ifconfig
792 : * displays subinterfaces differ from their v4 counterparts.
793 : * The code given here seems to work but YMMV.
794 : *
795 : * @param *im - ip6_main_t
796 : * @param opaque - uword
797 : * @param sw_if_index - u32
798 : * @param *address - ip6_address_t
799 : * @param address_length - u32
800 : * @param if_address_index - u32
801 : * @param is_delete - u32
802 : */
803 : void
804 4115 : tuntap_ip6_add_del_interface_address (ip6_main_t * im,
805 : uword opaque,
806 : u32 sw_if_index,
807 : ip6_address_t * address,
808 : u32 address_length,
809 : u32 if_address_index, u32 is_delete)
810 : {
811 4115 : tuntap_main_t *tm = &tuntap_main;
812 : struct ifreq ifr;
813 : struct in6_ifreq ifr6;
814 : subif_address_t subif_addr, *ap;
815 : uword *p;
816 :
817 : /* Tuntap disabled, or using a "normal" interface. */
818 4115 : if (tm->have_normal_interface || tm->dev_tap_fd < 0)
819 4115 : return;
820 :
821 : /* if the address is being applied to an interface that is not in
822 : * the same table/VRF as this tap, then ignore it.
823 : * If we don't do this overlapping address spaces in the different tables
824 : * breaks the linux host's routing tables */
825 0 : if (fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
826 : sw_if_index) !=
827 0 : fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, tm->sw_if_index))
828 0 : return;
829 :
830 : /* See if we already know about this subif */
831 0 : clib_memset (&subif_addr, 0, sizeof (subif_addr));
832 0 : subif_addr.sw_if_index = sw_if_index;
833 0 : subif_addr.is_v6 = 1;
834 0 : clib_memcpy_fast (&subif_addr.addr, address, sizeof (*address));
835 :
836 0 : p = mhash_get (&tm->subif_mhash, &subif_addr);
837 :
838 0 : if (p)
839 0 : ap = pool_elt_at_index (tm->subifs, p[0]);
840 : else
841 : {
842 0 : pool_get (tm->subifs, ap);
843 0 : *ap = subif_addr;
844 0 : mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
845 : }
846 :
847 : /* Use subif pool index to select alias device. */
848 0 : clib_memset (&ifr, 0, sizeof (ifr));
849 0 : clib_memset (&ifr6, 0, sizeof (ifr6));
850 0 : snprintf (ifr.ifr_name, sizeof (ifr.ifr_name),
851 0 : "%s:%d", tm->tun_name, (int) (ap - tm->subifs));
852 :
853 : /* the tuntap punt/inject is enabled for IPv6 RX so long as
854 : * any vpp interface has an IPv6 address.
855 : * this is also ref counted.
856 : */
857 0 : ip6_sw_interface_enable_disable (tm->sw_if_index, !is_delete);
858 :
859 0 : if (!is_delete)
860 : {
861 0 : int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
862 0 : if (sockfd < 0)
863 0 : clib_unix_warning ("get ifindex socket");
864 :
865 0 : if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
866 0 : clib_unix_warning ("get ifindex");
867 :
868 0 : ifr6.ifr6_ifindex = ifr.ifr_ifindex;
869 0 : ifr6.ifr6_prefixlen = address_length;
870 0 : clib_memcpy_fast (&ifr6.ifr6_addr, address, 16);
871 :
872 0 : if (ioctl (sockfd, SIOCSIFADDR, &ifr6) < 0)
873 0 : clib_unix_warning ("set address");
874 :
875 0 : if (sockfd >= 0)
876 0 : close (sockfd);
877 : }
878 : else
879 : {
880 0 : int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
881 0 : if (sockfd < 0)
882 0 : clib_unix_warning ("get ifindex socket");
883 :
884 0 : if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
885 0 : clib_unix_warning ("get ifindex");
886 :
887 0 : ifr6.ifr6_ifindex = ifr.ifr_ifindex;
888 0 : ifr6.ifr6_prefixlen = address_length;
889 0 : clib_memcpy_fast (&ifr6.ifr6_addr, address, 16);
890 :
891 0 : if (ioctl (sockfd, SIOCDIFADDR, &ifr6) < 0)
892 0 : clib_unix_warning ("del address");
893 :
894 0 : if (sockfd >= 0)
895 0 : close (sockfd);
896 :
897 0 : mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */ );
898 0 : pool_put (tm->subifs, ap);
899 : }
900 : }
901 :
902 : /**
903 : * @brief TX the tun/tap frame
904 : *
905 : * @param *vm - vlib_main_t
906 : * @param *node - vlib_node_runtime_t
907 : * @param *frame - vlib_frame_t
908 : *
909 : */
910 : static void
911 0 : tuntap_punt_frame (vlib_main_t * vm,
912 : vlib_node_runtime_t * node, vlib_frame_t * frame)
913 : {
914 0 : tuntap_tx (vm, node, frame);
915 0 : vlib_frame_free (vm, frame);
916 0 : }
917 :
918 : /**
919 : * @brief Free the tun/tap frame
920 : *
921 : * @param *vm - vlib_main_t
922 : * @param *node - vlib_node_runtime_t
923 : * @param *frame - vlib_frame_t
924 : *
925 : */
926 : static void
927 0 : tuntap_nopunt_frame (vlib_main_t * vm,
928 : vlib_node_runtime_t * node, vlib_frame_t * frame)
929 : {
930 0 : u32 *buffers = vlib_frame_vector_args (frame);
931 0 : uword n_packets = frame->n_vectors;
932 0 : vlib_buffer_free (vm, buffers, n_packets);
933 0 : vlib_frame_free (vm, frame);
934 0 : }
935 :
936 : /* *INDENT-OFF* */
937 8063 : VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
938 : .name = "tuntap",
939 : .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
940 : };
941 : /* *INDENT-ON* */
942 :
943 : /**
944 : * @brief Format tun/tap interface name
945 : *
946 : * @param *s - u8 - formatter string
947 : * @param *args - va_list
948 : *
949 : * @return *s - u8 - formatted string
950 : *
951 : */
952 : static u8 *
953 0 : format_tuntap_interface_name (u8 * s, va_list * args)
954 : {
955 0 : u32 i = va_arg (*args, u32);
956 :
957 0 : s = format (s, "tuntap-%d", i);
958 0 : return s;
959 : }
960 :
961 : /**
962 : * @brief TX packet out tun/tap
963 : *
964 : * @param *vm - vlib_main_t
965 : * @param *node - vlib_node_runtime_t
966 : * @param *frame - vlib_frame_t
967 : *
968 : * @return n_buffers - uword - Packets transmitted
969 : *
970 : */
971 : static uword
972 0 : tuntap_intfc_tx (vlib_main_t * vm,
973 : vlib_node_runtime_t * node, vlib_frame_t * frame)
974 : {
975 0 : tuntap_main_t *tm = &tuntap_main;
976 0 : u32 *buffers = vlib_frame_vector_args (frame);
977 0 : uword n_buffers = frame->n_vectors;
978 :
979 : /* Normal interface transmit happens only on the normal interface... */
980 0 : if (tm->have_normal_interface)
981 0 : return tuntap_tx (vm, node, frame);
982 :
983 0 : vlib_buffer_free (vm, buffers, n_buffers);
984 0 : return n_buffers;
985 : }
986 :
987 : /* *INDENT-OFF* */
988 12095 : VNET_DEVICE_CLASS (tuntap_dev_class,static) = {
989 : .name = "tuntap",
990 : .tx_function = tuntap_intfc_tx,
991 : .format_device_name = format_tuntap_interface_name,
992 : };
993 : /* *INDENT-ON* */
994 :
995 : /**
996 : * @brief tun/tap node init
997 : *
998 : * @param *vm - vlib_main_t
999 : *
1000 : * @return error - clib_error_t
1001 : *
1002 : */
1003 : static clib_error_t *
1004 575 : tuntap_init (vlib_main_t * vm)
1005 : {
1006 575 : ip4_main_t *im4 = &ip4_main;
1007 575 : ip6_main_t *im6 = &ip6_main;
1008 : ip4_add_del_interface_address_callback_t cb4;
1009 : ip6_add_del_interface_address_callback_t cb6;
1010 575 : tuntap_main_t *tm = &tuntap_main;
1011 575 : vlib_thread_main_t *m = vlib_get_thread_main ();
1012 :
1013 575 : mhash_init (&tm->subif_mhash, sizeof (u32), sizeof (subif_address_t));
1014 :
1015 575 : cb4.function = tuntap_ip4_add_del_interface_address;
1016 575 : cb4.function_opaque = 0;
1017 575 : vec_add1 (im4->add_del_interface_address_callbacks, cb4);
1018 :
1019 575 : cb6.function = tuntap_ip6_add_del_interface_address;
1020 575 : cb6.function_opaque = 0;
1021 575 : vec_add1 (im6->add_del_interface_address_callbacks, cb6);
1022 575 : vec_validate_aligned (tm->threads, m->n_vlib_mains - 1,
1023 : CLIB_CACHE_LINE_BYTES);
1024 :
1025 575 : return 0;
1026 : }
1027 :
1028 : /* *INDENT-OFF* */
1029 81215 : VLIB_INIT_FUNCTION (tuntap_init) =
1030 : {
1031 : .runs_after = VLIB_INITS("ip4_init"),
1032 : };
1033 : /* *INDENT-ON* */
1034 :
1035 : /*
1036 : * fd.io coding-style-patch-verification: ON
1037 : *
1038 : * Local Variables:
1039 : * eval: (c-set-style "gnu")
1040 : * End:
1041 : */
|