Line data Source code
1 : /*
2 : *------------------------------------------------------------------
3 : * vhost.c - vhost-user
4 : *
5 : * Copyright (c) 2014-2018 Cisco and/or its affiliates.
6 : * Licensed under the Apache License, Version 2.0 (the "License");
7 : * you may not use this file except in compliance with the License.
8 : * You may obtain a copy of the License at:
9 : *
10 : * http://www.apache.org/licenses/LICENSE-2.0
11 : *
12 : * Unless required by applicable law or agreed to in writing, software
13 : * distributed under the License is distributed on an "AS IS" BASIS,
14 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 : * See the License for the specific language governing permissions and
16 : * limitations under the License.
17 : *------------------------------------------------------------------
18 : */
19 :
20 : #include <fcntl.h> /* for open */
21 : #include <sys/ioctl.h>
22 : #include <sys/socket.h>
23 : #include <sys/un.h>
24 : #include <sys/stat.h>
25 : #include <sys/types.h>
26 : #include <sys/uio.h> /* for iovec */
27 : #include <netinet/in.h>
28 : #include <sys/vfs.h>
29 :
30 : #include <linux/if_arp.h>
31 : #include <linux/if_tun.h>
32 :
33 : #include <vlib/vlib.h>
34 : #include <vlib/unix/unix.h>
35 :
36 : #include <vnet/ethernet/ethernet.h>
37 : #include <vnet/devices/devices.h>
38 : #include <vnet/feature/feature.h>
39 : #include <vnet/interface/rx_queue_funcs.h>
40 : #include <vnet/interface/tx_queue_funcs.h>
41 :
42 : #include <vhost/vhost_user.h>
43 : #include <vhost/vhost_user_inline.h>
44 :
45 : /**
46 : * @file
47 : * @brief vHost User Device Driver.
48 : *
49 : * This file contains the source code for vHost User interface.
50 : */
51 :
52 :
53 : vlib_node_registration_t vhost_user_send_interrupt_node;
54 :
55 : /* *INDENT-OFF* */
56 : vhost_user_main_t vhost_user_main = {
57 : .mtu_bytes = 1518,
58 : };
59 :
60 1679 : VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
61 : .name = "vhost-user",
62 : };
63 : /* *INDENT-ON* */
64 :
65 : static long
66 0 : get_huge_page_size (int fd)
67 : {
68 : struct statfs s;
69 0 : fstatfs (fd, &s);
70 0 : return s.f_bsize;
71 : }
72 :
73 : static void
74 4 : unmap_all_mem_regions (vhost_user_intf_t * vui)
75 : {
76 : int i, r, q;
77 : vhost_user_vring_t *vq;
78 :
79 4 : for (i = 0; i < vui->nregions; i++)
80 : {
81 0 : if (vui->region_mmap_addr[i] != MAP_FAILED)
82 : {
83 :
84 0 : long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
85 :
86 0 : ssize_t map_sz = (vui->regions[i].memory_size +
87 0 : vui->regions[i].mmap_offset +
88 0 : page_sz - 1) & ~(page_sz - 1);
89 :
90 : r =
91 0 : munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
92 : map_sz);
93 :
94 0 : vu_log_debug (vui, "unmap memory region %d addr 0x%lx len 0x%lx "
95 : "page_sz 0x%x", i, vui->region_mmap_addr[i], map_sz,
96 : page_sz);
97 :
98 0 : vui->region_mmap_addr[i] = MAP_FAILED;
99 :
100 0 : if (r == -1)
101 : {
102 0 : vu_log_err (vui, "failed to unmap memory region (errno %d)",
103 : errno);
104 : }
105 0 : close (vui->region_mmap_fd[i]);
106 : }
107 : }
108 4 : vui->nregions = 0;
109 :
110 12 : FOR_ALL_VHOST_RX_TXQ (q, vui)
111 : {
112 8 : vq = &vui->vrings[q];
113 8 : vq->avail = 0;
114 8 : vq->used = 0;
115 8 : vq->desc = 0;
116 : }
117 4 : }
118 :
119 : static_always_inline void
120 0 : vhost_user_tx_thread_placement (vhost_user_intf_t *vui, u32 qid)
121 : {
122 0 : vnet_main_t *vnm = vnet_get_main ();
123 0 : vhost_user_vring_t *rxvq = &vui->vrings[qid];
124 0 : u32 q = qid >> 1, rxvq_count;
125 :
126 0 : ASSERT ((qid & 1) == 0);
127 0 : if (!rxvq->started || !rxvq->enabled)
128 0 : return;
129 :
130 0 : rxvq_count = (qid >> 1) + 1;
131 0 : if (rxvq->queue_index == ~0)
132 : {
133 0 : rxvq->queue_index =
134 0 : vnet_hw_if_register_tx_queue (vnm, vui->hw_if_index, q);
135 0 : rxvq->qid = q;
136 : }
137 :
138 0 : FOR_ALL_VHOST_RXQ (q, vui)
139 : {
140 0 : vhost_user_vring_t *rxvq = &vui->vrings[q];
141 0 : u32 qi = rxvq->queue_index;
142 :
143 0 : if (rxvq->queue_index == ~0)
144 0 : break;
145 0 : for (u32 i = 0; i < vlib_get_n_threads (); i++)
146 0 : vnet_hw_if_tx_queue_unassign_thread (vnm, qi, i);
147 : }
148 :
149 0 : for (u32 i = 0; i < vlib_get_n_threads (); i++)
150 : {
151 0 : vhost_user_vring_t *rxvq =
152 0 : &vui->vrings[VHOST_VRING_IDX_RX (i % rxvq_count)];
153 0 : u32 qi = rxvq->queue_index;
154 :
155 0 : vnet_hw_if_tx_queue_assign_thread (vnm, qi, i);
156 : }
157 :
158 0 : vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
159 : }
160 :
161 : /**
162 : * @brief Unassign existing interface/queue to thread mappings and re-assign
163 : * new interface/queue to thread mappings
164 : */
165 : static_always_inline void
166 4 : vhost_user_rx_thread_placement (vhost_user_intf_t * vui, u32 qid)
167 : {
168 4 : vhost_user_vring_t *txvq = &vui->vrings[qid];
169 4 : vnet_main_t *vnm = vnet_get_main ();
170 : int rv;
171 4 : u32 q = qid >> 1;
172 4 : vhost_user_main_t *vum = &vhost_user_main;
173 :
174 4 : ASSERT ((qid & 1) == 1); // should be odd
175 : // Assign new queue mappings for the interface
176 4 : if (txvq->queue_index != ~0)
177 0 : return;
178 4 : vnet_hw_if_set_input_node (vnm, vui->hw_if_index,
179 : vhost_user_input_node.index);
180 4 : txvq->queue_index = vnet_hw_if_register_rx_queue (vnm, vui->hw_if_index, q,
181 : VNET_HW_IF_RXQ_THREAD_ANY);
182 4 : txvq->thread_index =
183 4 : vnet_hw_if_get_rx_queue_thread_index (vnm, txvq->queue_index);
184 :
185 4 : if (txvq->mode == VNET_HW_IF_RX_MODE_UNKNOWN)
186 : /* Set polling as the default */
187 0 : txvq->mode = VNET_HW_IF_RX_MODE_POLLING;
188 4 : if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
189 : {
190 4 : vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
191 : /* Keep a polling queue count for each thread */
192 4 : cpu->polling_q_count++;
193 : }
194 4 : txvq->qid = q;
195 4 : rv = vnet_hw_if_set_rx_queue_mode (vnm, txvq->queue_index, txvq->mode);
196 4 : if (rv)
197 0 : vu_log_warn (vui, "unable to set rx mode for interface %d, "
198 : "queue %d: rc=%d", vui->hw_if_index, q, rv);
199 4 : vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
200 : }
201 :
202 : /** @brief Returns whether at least one TX and one RX vring are enabled */
203 : static_always_inline int
204 4 : vhost_user_intf_ready (vhost_user_intf_t * vui)
205 : {
206 4 : int i, found[2] = { }; //RX + TX
207 :
208 12 : for (i = 0; i < vui->num_qid; i++)
209 8 : if (vui->vrings[i].started && vui->vrings[i].enabled)
210 0 : found[i & 1] = 1;
211 :
212 4 : return found[0] && found[1];
213 : }
214 :
215 : static_always_inline void
216 4 : vhost_user_update_iface_state (vhost_user_intf_t * vui)
217 : {
218 : /* if we have pointers to descriptor table, go up */
219 4 : int is_ready = vhost_user_intf_ready (vui);
220 4 : if (is_ready != vui->is_ready)
221 : {
222 0 : vu_log_debug (vui, "interface %d %s", vui->sw_if_index,
223 : is_ready ? "ready" : "down");
224 0 : if (vui->admin_up)
225 0 : vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index,
226 : is_ready ? VNET_HW_INTERFACE_FLAG_LINK_UP
227 : : 0);
228 0 : vui->is_ready = is_ready;
229 : }
230 4 : }
231 :
232 : static clib_error_t *
233 0 : vhost_user_callfd_read_ready (clib_file_t * uf)
234 : {
235 : __attribute__ ((unused)) int n;
236 : u8 buff[8];
237 :
238 0 : n = read (uf->file_descriptor, ((char *) &buff), 8);
239 :
240 0 : return 0;
241 : }
242 :
243 : static_always_inline void
244 0 : vhost_user_thread_placement (vhost_user_intf_t * vui, u32 qid)
245 : {
246 0 : if (qid & 1) // RX is odd, TX is even
247 : {
248 0 : if (vui->vrings[qid].queue_index == ~0)
249 0 : vhost_user_rx_thread_placement (vui, qid);
250 : }
251 : else
252 0 : vhost_user_tx_thread_placement (vui, qid);
253 0 : }
254 :
255 : static clib_error_t *
256 0 : vhost_user_kickfd_read_ready (clib_file_t * uf)
257 : {
258 : __attribute__ ((unused)) ssize_t n;
259 : u8 buff[8];
260 0 : vhost_user_main_t *vum = &vhost_user_main;
261 0 : vhost_user_intf_t *vui =
262 0 : pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data >> 8);
263 0 : u32 qid = uf->private_data & 0xff;
264 0 : u32 is_txq = qid & 1;
265 0 : vhost_user_vring_t *vq = &vui->vrings[qid];
266 0 : vnet_main_t *vnm = vnet_get_main ();
267 :
268 0 : n = read (uf->file_descriptor, buff, 8);
269 0 : if (vq->started == 0)
270 : {
271 0 : vq->started = 1;
272 0 : vhost_user_thread_placement (vui, qid);
273 0 : vhost_user_update_iface_state (vui);
274 0 : if (is_txq)
275 0 : vnet_hw_if_set_rx_queue_file_index (vnm, vq->queue_index,
276 : vq->kickfd_idx);
277 : }
278 :
279 0 : if (is_txq && (vq->mode != VNET_HW_IF_RX_MODE_POLLING) &&
280 0 : vhost_user_intf_ready (vui))
281 : {
282 0 : vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, vq->thread_index);
283 : /*
284 : * If the thread has more than 1 queue and the other queue is in polling
285 : * mode, there is no need to trigger an interrupt
286 : */
287 0 : if (cpu->polling_q_count == 0)
288 0 : vnet_hw_if_rx_queue_set_int_pending (vnm, vq->queue_index);
289 : }
290 :
291 0 : return 0;
292 : }
293 :
294 : static_always_inline void
295 72 : vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid)
296 : {
297 72 : vhost_user_vring_t *vring = &vui->vrings[qid];
298 :
299 72 : clib_memset (vring, 0, sizeof (*vring));
300 72 : vring->kickfd_idx = ~0;
301 72 : vring->callfd_idx = ~0;
302 72 : vring->errfd = -1;
303 72 : vring->qid = -1;
304 72 : vring->queue_index = ~0;
305 72 : vring->thread_index = ~0;
306 72 : vring->mode = VNET_HW_IF_RX_MODE_POLLING;
307 :
308 72 : clib_spinlock_init (&vring->vring_lock);
309 :
310 : /*
311 : * We have a bug with some qemu 2.5, and this may be a fix.
312 : * Feel like interpretation holy text, but this is from vhost-user.txt.
313 : * "
314 : * One queue pair is enabled initially. More queues are enabled
315 : * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE.
316 : * "
317 : * Don't know who's right, but this is what DPDK does.
318 : */
319 72 : if (qid == 0 || qid == 1)
320 16 : vring->enabled = 1;
321 72 : }
322 :
323 : static_always_inline void
324 8 : vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
325 : {
326 8 : vhost_user_vring_t *vring = &vui->vrings[qid];
327 :
328 8 : if (vring->kickfd_idx != ~0)
329 : {
330 0 : clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
331 : vring->kickfd_idx);
332 0 : clib_file_del (&file_main, uf);
333 0 : vring->kickfd_idx = ~0;
334 : }
335 8 : if (vring->callfd_idx != ~0)
336 : {
337 0 : clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
338 : vring->callfd_idx);
339 0 : clib_file_del (&file_main, uf);
340 0 : vring->callfd_idx = ~0;
341 : }
342 8 : if (vring->errfd != -1)
343 : {
344 0 : close (vring->errfd);
345 0 : vring->errfd = -1;
346 : }
347 :
348 8 : clib_spinlock_free (&vring->vring_lock);
349 :
350 : // save the needed information in vrings prior to being wiped out
351 8 : u16 q = vui->vrings[qid].qid;
352 8 : u32 queue_index = vui->vrings[qid].queue_index;
353 8 : u32 mode = vui->vrings[qid].mode;
354 8 : u32 thread_index = vui->vrings[qid].thread_index;
355 8 : vhost_user_vring_init (vui, qid);
356 8 : vui->vrings[qid].qid = q;
357 8 : vui->vrings[qid].queue_index = queue_index;
358 8 : vui->vrings[qid].mode = mode;
359 8 : vui->vrings[qid].thread_index = thread_index;
360 8 : }
361 :
362 : static_always_inline void
363 4 : vhost_user_if_disconnect (vhost_user_intf_t * vui)
364 : {
365 4 : vnet_main_t *vnm = vnet_get_main ();
366 : int q;
367 :
368 4 : vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
369 :
370 4 : if (vui->clib_file_index != ~0)
371 : {
372 0 : clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
373 0 : vui->clib_file_index = ~0;
374 : }
375 :
376 4 : vui->is_ready = 0;
377 :
378 12 : FOR_ALL_VHOST_RX_TXQ (q, vui) { vhost_user_vring_close (vui, q); }
379 :
380 4 : unmap_all_mem_regions (vui);
381 4 : vu_log_debug (vui, "interface ifindex %d disconnected", vui->sw_if_index);
382 4 : }
383 :
384 : void
385 0 : vhost_user_set_operation_mode (vhost_user_intf_t *vui,
386 : vhost_user_vring_t *txvq)
387 : {
388 0 : if (vhost_user_is_packed_ring_supported (vui))
389 : {
390 0 : if (txvq->used_event)
391 : {
392 0 : if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
393 0 : txvq->used_event->flags = VRING_EVENT_F_DISABLE;
394 : else
395 0 : txvq->used_event->flags = 0;
396 : }
397 : }
398 : else
399 : {
400 0 : if (txvq->used)
401 : {
402 0 : if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
403 0 : txvq->used->flags = VRING_USED_F_NO_NOTIFY;
404 : else
405 0 : txvq->used->flags = 0;
406 : }
407 : }
408 0 : }
409 :
410 : static clib_error_t *
411 0 : vhost_user_socket_read (clib_file_t * uf)
412 : {
413 : int n, i, j;
414 0 : int fd, number_of_fds = 0;
415 : int fds[VHOST_MEMORY_MAX_NREGIONS];
416 : vhost_user_msg_t msg;
417 : struct msghdr mh;
418 : struct iovec iov[1];
419 0 : vhost_user_main_t *vum = &vhost_user_main;
420 : vhost_user_intf_t *vui;
421 : struct cmsghdr *cmsg;
422 : u8 q;
423 0 : clib_file_t template = { 0 };
424 0 : vnet_main_t *vnm = vnet_get_main ();
425 0 : vlib_main_t *vm = vlib_get_main ();
426 :
427 0 : vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
428 :
429 : char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
430 :
431 0 : clib_memset (&mh, 0, sizeof (mh));
432 0 : clib_memset (control, 0, sizeof (control));
433 :
434 0 : for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++)
435 0 : fds[i] = -1;
436 :
437 : /* set the payload */
438 0 : iov[0].iov_base = (void *) &msg;
439 0 : iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
440 :
441 0 : mh.msg_iov = iov;
442 0 : mh.msg_iovlen = 1;
443 0 : mh.msg_control = control;
444 0 : mh.msg_controllen = sizeof (control);
445 :
446 0 : n = recvmsg (uf->file_descriptor, &mh, 0);
447 :
448 0 : if (n != VHOST_USER_MSG_HDR_SZ)
449 : {
450 0 : if (n == -1)
451 : {
452 0 : vu_log_debug (vui, "recvmsg returned error %d %s", errno,
453 : strerror (errno));
454 : }
455 : else
456 : {
457 0 : vu_log_debug (vui, "n (%d) != VHOST_USER_MSG_HDR_SZ (%d)",
458 : n, VHOST_USER_MSG_HDR_SZ);
459 : }
460 0 : goto close_socket;
461 : }
462 :
463 0 : if (mh.msg_flags & MSG_CTRUNC)
464 : {
465 0 : vu_log_debug (vui, "MSG_CTRUNC is set");
466 0 : goto close_socket;
467 : }
468 :
469 0 : cmsg = CMSG_FIRSTHDR (&mh);
470 :
471 0 : if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
472 0 : (cmsg->cmsg_type == SCM_RIGHTS) &&
473 0 : (cmsg->cmsg_len - CMSG_LEN (0) <=
474 : VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
475 : {
476 0 : number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
477 0 : clib_memcpy_fast (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
478 : }
479 :
480 : /* version 1, no reply bit set */
481 0 : if ((msg.flags & 7) != 1)
482 : {
483 0 : vu_log_debug (vui, "malformed message received. closing socket");
484 0 : goto close_socket;
485 : }
486 :
487 : {
488 : int rv;
489 0 : rv =
490 0 : read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ,
491 0 : msg.size);
492 0 : if (rv < 0)
493 : {
494 0 : vu_log_debug (vui, "read failed %s", strerror (errno));
495 0 : goto close_socket;
496 : }
497 0 : else if (rv != msg.size)
498 : {
499 0 : vu_log_debug (vui, "message too short (read %dB should be %dB)", rv,
500 : msg.size);
501 0 : goto close_socket;
502 : }
503 : }
504 :
505 0 : switch (msg.request)
506 : {
507 0 : case VHOST_USER_GET_FEATURES:
508 0 : msg.flags |= 4;
509 0 : msg.u64 = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) |
510 : VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ) |
511 : VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT) |
512 : VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC) |
513 : VIRTIO_FEATURE (VHOST_F_LOG_ALL) |
514 : VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_ANNOUNCE) |
515 : VIRTIO_FEATURE (VIRTIO_NET_F_MQ) |
516 : VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES) |
517 : VIRTIO_FEATURE (VIRTIO_F_VERSION_1);
518 0 : msg.u64 &= vui->feature_mask;
519 :
520 0 : if (vui->enable_event_idx)
521 0 : msg.u64 |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
522 0 : if (vui->enable_gso)
523 0 : msg.u64 |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
524 0 : if (vui->enable_packed)
525 0 : msg.u64 |= VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
526 :
527 0 : msg.size = sizeof (msg.u64);
528 0 : vu_log_debug (vui, "if %d msg VHOST_USER_GET_FEATURES - reply "
529 : "0x%016llx", vui->hw_if_index, msg.u64);
530 0 : n =
531 0 : send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
532 0 : if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
533 : {
534 0 : vu_log_debug (vui, "could not send message response");
535 0 : goto close_socket;
536 : }
537 0 : break;
538 :
539 0 : case VHOST_USER_SET_FEATURES:
540 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_FEATURES features "
541 : "0x%016llx", vui->hw_if_index, msg.u64);
542 :
543 0 : vui->features = msg.u64;
544 :
545 0 : if (vui->features &
546 : (VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) |
547 : VIRTIO_FEATURE (VIRTIO_F_VERSION_1)))
548 0 : vui->virtio_net_hdr_sz = 12;
549 : else
550 0 : vui->virtio_net_hdr_sz = 10;
551 :
552 0 : vui->is_any_layout =
553 0 : (vui->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
554 :
555 0 : ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE);
556 0 : if (vui->enable_gso &&
557 0 : ((vui->features & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)
558 : == FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS))
559 : {
560 0 : vnet_hw_if_set_caps (vnm, vui->hw_if_index,
561 : VNET_HW_IF_CAP_TCP_GSO |
562 : VNET_HW_IF_CAP_TX_TCP_CKSUM |
563 : VNET_HW_IF_CAP_TX_UDP_CKSUM);
564 : }
565 : else
566 : {
567 0 : vnet_hw_if_unset_caps (vnm, vui->hw_if_index,
568 : VNET_HW_IF_CAP_TCP_GSO |
569 : VNET_HW_IF_CAP_L4_TX_CKSUM);
570 : }
571 0 : vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
572 0 : vui->is_ready = 0;
573 0 : vhost_user_update_iface_state (vui);
574 0 : break;
575 :
576 0 : case VHOST_USER_SET_MEM_TABLE:
577 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
578 : vui->hw_if_index, msg.memory.nregions);
579 :
580 0 : if ((msg.memory.nregions < 1) ||
581 0 : (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
582 : {
583 0 : vu_log_debug (vui, "number of mem regions must be between 1 and %i",
584 : VHOST_MEMORY_MAX_NREGIONS);
585 0 : goto close_socket;
586 : }
587 :
588 0 : if (msg.memory.nregions != number_of_fds)
589 : {
590 0 : vu_log_debug (vui, "each memory region must have FD");
591 0 : goto close_socket;
592 : }
593 :
594 : /* Do the mmap without barrier sync */
595 : void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
596 0 : for (i = 0; i < msg.memory.nregions; i++)
597 : {
598 0 : long page_sz = get_huge_page_size (fds[i]);
599 :
600 : /* align size to page */
601 0 : ssize_t map_sz = (msg.memory.regions[i].memory_size +
602 0 : msg.memory.regions[i].mmap_offset +
603 0 : page_sz - 1) & ~(page_sz - 1);
604 :
605 0 : region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
606 : MAP_SHARED, fds[i], 0);
607 0 : if (region_mmap_addr[i] == MAP_FAILED)
608 : {
609 0 : vu_log_err (vui, "failed to map memory. errno is %d", errno);
610 0 : for (j = 0; j < i; j++)
611 0 : munmap (region_mmap_addr[j], map_sz);
612 0 : goto close_socket;
613 : }
614 0 : vu_log_debug (vui, "map memory region %d addr 0 len 0x%lx fd %d "
615 : "mapped 0x%lx page_sz 0x%x", i, map_sz, fds[i],
616 : region_mmap_addr[i], page_sz);
617 : }
618 :
619 0 : vlib_worker_thread_barrier_sync (vm);
620 0 : unmap_all_mem_regions (vui);
621 0 : for (i = 0; i < msg.memory.nregions; i++)
622 : {
623 0 : clib_memcpy_fast (&(vui->regions[i]), &msg.memory.regions[i],
624 : sizeof (vhost_user_memory_region_t));
625 :
626 0 : vui->region_mmap_addr[i] = region_mmap_addr[i];
627 0 : vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr;
628 0 : vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr +
629 0 : vui->regions[i].memory_size;
630 :
631 0 : vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
632 0 : vui->region_mmap_fd[i] = fds[i];
633 :
634 0 : vui->nregions++;
635 : }
636 :
637 : /*
638 : * Re-compute desc, used, and avail descriptor table if vring address
639 : * is set.
640 : */
641 0 : FOR_ALL_VHOST_RX_TXQ (q, vui)
642 : {
643 0 : if (vui->vrings[q].desc_user_addr && vui->vrings[q].used_user_addr &&
644 0 : vui->vrings[q].avail_user_addr)
645 : {
646 0 : vui->vrings[q].desc =
647 0 : map_user_mem (vui, vui->vrings[q].desc_user_addr);
648 0 : vui->vrings[q].used =
649 0 : map_user_mem (vui, vui->vrings[q].used_user_addr);
650 0 : vui->vrings[q].avail =
651 0 : map_user_mem (vui, vui->vrings[q].avail_user_addr);
652 : }
653 : }
654 0 : vlib_worker_thread_barrier_release (vm);
655 0 : break;
656 :
657 0 : case VHOST_USER_SET_VRING_NUM:
658 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
659 : vui->hw_if_index, msg.state.index, msg.state.num);
660 :
661 0 : if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
662 0 : (msg.state.num == 0) || /* it cannot be zero */
663 0 : ((msg.state.num - 1) & msg.state.num) || /* must be power of 2 */
664 0 : (msg.state.index >= vui->num_qid))
665 : {
666 0 : vu_log_debug (vui, "invalid VHOST_USER_SET_VRING_NUM: msg.state.num"
667 : " %d, msg.state.index %d, curruent max q %d",
668 : msg.state.num, msg.state.index, vui->num_qid);
669 0 : goto close_socket;
670 : }
671 0 : vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1;
672 0 : break;
673 :
674 0 : case VHOST_USER_SET_VRING_ADDR:
675 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
676 : vui->hw_if_index, msg.state.index);
677 :
678 0 : if (msg.state.index >= vui->num_qid)
679 : {
680 0 : vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:"
681 : " %u >= %u", msg.state.index, vui->num_qid);
682 0 : goto close_socket;
683 : }
684 :
685 0 : if (msg.size < sizeof (msg.addr))
686 : {
687 0 : vu_log_debug (vui, "vhost message is too short (%d < %d)",
688 : msg.size, sizeof (msg.addr));
689 0 : goto close_socket;
690 : }
691 :
692 : vnet_virtio_vring_desc_t *desc =
693 0 : map_user_mem (vui, msg.addr.desc_user_addr);
694 : vnet_virtio_vring_used_t *used =
695 0 : map_user_mem (vui, msg.addr.used_user_addr);
696 : vnet_virtio_vring_avail_t *avail =
697 0 : map_user_mem (vui, msg.addr.avail_user_addr);
698 :
699 0 : if ((desc == NULL) || (used == NULL) || (avail == NULL))
700 : {
701 0 : vu_log_debug (vui, "failed to map user memory for hw_if_index %d",
702 : vui->hw_if_index);
703 0 : goto close_socket;
704 : }
705 :
706 0 : vui->vrings[msg.state.index].desc_user_addr = msg.addr.desc_user_addr;
707 0 : vui->vrings[msg.state.index].used_user_addr = msg.addr.used_user_addr;
708 0 : vui->vrings[msg.state.index].avail_user_addr = msg.addr.avail_user_addr;
709 :
710 0 : vlib_worker_thread_barrier_sync (vm);
711 0 : vui->vrings[msg.state.index].desc = desc;
712 0 : vui->vrings[msg.state.index].used = used;
713 0 : vui->vrings[msg.state.index].avail = avail;
714 :
715 0 : vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
716 0 : vui->vrings[msg.state.index].log_used =
717 0 : (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
718 :
719 : /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
720 : the ring is initialized in an enabled state. */
721 0 : if (!(vui->features & VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES)))
722 0 : vui->vrings[msg.state.index].enabled = 1;
723 :
724 0 : vui->vrings[msg.state.index].last_used_idx =
725 0 : vui->vrings[msg.state.index].last_avail_idx =
726 0 : vui->vrings[msg.state.index].used->idx;
727 0 : vui->vrings[msg.state.index].last_kick =
728 0 : vui->vrings[msg.state.index].last_used_idx;
729 :
730 : /* tell driver that we want interrupts or not */
731 0 : vhost_user_set_operation_mode (vui, &vui->vrings[msg.state.index]);
732 0 : vlib_worker_thread_barrier_release (vm);
733 0 : vhost_user_update_iface_state (vui);
734 0 : break;
735 :
736 0 : case VHOST_USER_SET_OWNER:
737 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index);
738 0 : break;
739 :
740 0 : case VHOST_USER_RESET_OWNER:
741 0 : vu_log_debug (vui, "if %d msg VHOST_USER_RESET_OWNER",
742 : vui->hw_if_index);
743 0 : break;
744 :
745 0 : case VHOST_USER_SET_VRING_CALL:
746 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_CALL %d",
747 : vui->hw_if_index, msg.u64);
748 :
749 0 : q = (u8) (msg.u64 & 0xFF);
750 0 : if (vui->num_qid > q)
751 : {
752 : /* if there is old fd, delete and close it */
753 0 : if (vui->vrings[q].callfd_idx != ~0)
754 : {
755 0 : clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
756 : vui->vrings[q].callfd_idx);
757 0 : clib_file_del (&file_main, uf);
758 0 : vui->vrings[q].callfd_idx = ~0;
759 : }
760 : }
761 0 : else if (vec_len (vui->vrings) > q)
762 : {
763 : /* grow vrings by pair (RX + TX) */
764 0 : vui->num_qid = (q & 1) ? (q + 1) : (q + 2);
765 : }
766 : else
767 : {
768 0 : u32 i, new_max_q, old_max_q = vec_len (vui->vrings);
769 :
770 : /*
771 : * Double the array size if it is less than 64 entries.
772 : * Slow down thereafter.
773 : */
774 0 : if (vec_len (vui->vrings) < (VHOST_VRING_INIT_MQ_PAIR_SZ << 3))
775 0 : new_max_q = vec_len (vui->vrings) << 1;
776 : else
777 0 : new_max_q = vec_len (vui->vrings) +
778 : (VHOST_VRING_INIT_MQ_PAIR_SZ << 2);
779 0 : if (new_max_q > (VHOST_VRING_MAX_MQ_PAIR_SZ << 1))
780 0 : new_max_q = (VHOST_VRING_MAX_MQ_PAIR_SZ << 1);
781 :
782 : /* sync with the worker threads, vrings may move due to realloc */
783 0 : vlib_worker_thread_barrier_sync (vm);
784 0 : vec_validate_aligned (vui->vrings, new_max_q - 1,
785 : CLIB_CACHE_LINE_BYTES);
786 0 : vlib_worker_thread_barrier_release (vm);
787 :
788 0 : for (i = old_max_q; i < vec_len (vui->vrings); i++)
789 0 : vhost_user_vring_init (vui, i);
790 :
791 : /* grow vrings by pair (RX + TX) */
792 0 : vui->num_qid = (q & 1) ? (q + 1) : (q + 2);
793 : }
794 :
795 0 : if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
796 : {
797 0 : if (number_of_fds != 1)
798 : {
799 0 : vu_log_debug (vui, "More than one fd received !");
800 0 : goto close_socket;
801 : }
802 :
803 0 : template.read_function = vhost_user_callfd_read_ready;
804 0 : template.file_descriptor = fds[0];
805 0 : template.private_data =
806 0 : ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q;
807 0 : template.description = format (0, "vhost user");
808 0 : vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template);
809 : }
810 : else
811 0 : vui->vrings[q].callfd_idx = ~0;
812 0 : break;
813 :
814 0 : case VHOST_USER_SET_VRING_KICK:
815 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_KICK %d",
816 : vui->hw_if_index, msg.u64);
817 :
818 0 : q = (u8) (msg.u64 & 0xFF);
819 0 : if (q >= vui->num_qid)
820 : {
821 0 : vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_KICK:"
822 : " %u >= %u", q, vui->num_qid);
823 0 : goto close_socket;
824 : }
825 :
826 0 : if (vui->vrings[q].kickfd_idx != ~0)
827 : {
828 0 : clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
829 : vui->vrings[q].kickfd_idx);
830 0 : clib_file_del (&file_main, uf);
831 0 : vui->vrings[q].kickfd_idx = ~0;
832 : }
833 :
834 0 : if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
835 : {
836 0 : if (number_of_fds != 1)
837 : {
838 0 : vu_log_debug (vui, "More than one fd received !");
839 0 : goto close_socket;
840 : }
841 :
842 0 : template.read_function = vhost_user_kickfd_read_ready;
843 0 : template.file_descriptor = fds[0];
844 0 : template.private_data =
845 0 : (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) +
846 : q;
847 0 : vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template);
848 : }
849 : else
850 : {
851 : //When no kickfd is set, the queue is initialized as started
852 0 : vui->vrings[q].kickfd_idx = ~0;
853 0 : vui->vrings[q].started = 1;
854 0 : vhost_user_thread_placement (vui, q);
855 : }
856 0 : vhost_user_update_iface_state (vui);
857 0 : break;
858 :
859 0 : case VHOST_USER_SET_VRING_ERR:
860 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ERR %d",
861 : vui->hw_if_index, msg.u64);
862 :
863 0 : q = (u8) (msg.u64 & 0xFF);
864 0 : if (q >= vui->num_qid)
865 : {
866 0 : vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ERR:"
867 : " %u >= %u", q, vui->num_qid);
868 0 : goto close_socket;
869 : }
870 :
871 0 : if (vui->vrings[q].errfd != -1)
872 0 : close (vui->vrings[q].errfd);
873 :
874 0 : if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
875 : {
876 0 : if (number_of_fds != 1)
877 0 : goto close_socket;
878 :
879 0 : vui->vrings[q].errfd = fds[0];
880 : }
881 : else
882 0 : vui->vrings[q].errfd = -1;
883 0 : break;
884 :
885 0 : case VHOST_USER_SET_VRING_BASE:
886 0 : vu_log_debug (vui,
887 : "if %d msg VHOST_USER_SET_VRING_BASE idx %d num 0x%x",
888 : vui->hw_if_index, msg.state.index, msg.state.num);
889 0 : if (msg.state.index >= vui->num_qid)
890 : {
891 0 : vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:"
892 : " %u >= %u", msg.state.index, vui->num_qid);
893 0 : goto close_socket;
894 : }
895 0 : vlib_worker_thread_barrier_sync (vm);
896 0 : vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
897 0 : if (vhost_user_is_packed_ring_supported (vui))
898 : {
899 : /*
900 : * 0 1 2 3
901 : * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
902 : * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
903 : * | last avail idx | | last used idx | |
904 : * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
905 : * ^ ^
906 : * | |
907 : * avail wrap counter used wrap counter
908 : */
909 : /* last avail idx at bit 0-14. */
910 0 : vui->vrings[msg.state.index].last_avail_idx =
911 0 : msg.state.num & 0x7fff;
912 : /* avail wrap counter at bit 15 */
913 0 : vui->vrings[msg.state.index].avail_wrap_counter =
914 0 : ! !(msg.state.num & (1 << 15));
915 :
916 : /*
917 : * Although last_used_idx is passed in the upper 16 bits in qemu
918 : * implementation, in practice, last_avail_idx and last_used_idx are
919 : * usually the same. As a result, DPDK does not bother to pass us
920 : * last_used_idx. The spec is not clear on thex coding. I figured it
921 : * out by reading the qemu code. So let's just read last_avail_idx
922 : * and set last_used_idx equals to last_avail_idx.
923 : */
924 0 : vui->vrings[msg.state.index].last_used_idx =
925 0 : vui->vrings[msg.state.index].last_avail_idx;
926 0 : vui->vrings[msg.state.index].last_kick =
927 0 : vui->vrings[msg.state.index].last_used_idx;
928 0 : vui->vrings[msg.state.index].used_wrap_counter =
929 0 : vui->vrings[msg.state.index].avail_wrap_counter;
930 :
931 0 : if (vui->vrings[msg.state.index].avail_wrap_counter == 1)
932 0 : vui->vrings[msg.state.index].avail_wrap_counter =
933 : VRING_DESC_F_AVAIL;
934 : }
935 0 : vlib_worker_thread_barrier_release (vm);
936 0 : break;
937 :
938 0 : case VHOST_USER_GET_VRING_BASE:
939 0 : if (msg.state.index >= vui->num_qid)
940 : {
941 0 : vu_log_debug (vui, "invalid vring index VHOST_USER_GET_VRING_BASE:"
942 : " %u >= %u", msg.state.index, vui->num_qid);
943 0 : goto close_socket;
944 : }
945 :
946 : /* protection is needed to prevent rx/tx from changing last_avail_idx */
947 0 : vlib_worker_thread_barrier_sync (vm);
948 : /*
949 : * Copy last_avail_idx from the vring before closing it because
950 : * closing the vring also initializes the vring last_avail_idx
951 : */
952 0 : msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
953 0 : if (vhost_user_is_packed_ring_supported (vui))
954 : {
955 0 : msg.state.num =
956 0 : (vui->vrings[msg.state.index].last_avail_idx & 0x7fff) |
957 0 : (! !vui->vrings[msg.state.index].avail_wrap_counter << 15);
958 0 : msg.state.num |=
959 0 : ((vui->vrings[msg.state.index].last_used_idx & 0x7fff) |
960 0 : (! !vui->vrings[msg.state.index].used_wrap_counter << 15)) << 16;
961 : }
962 0 : msg.flags |= 4;
963 0 : msg.size = sizeof (msg.state);
964 :
965 : /*
966 : * Spec says: Client must [...] stop ring upon receiving
967 : * VHOST_USER_GET_VRING_BASE
968 : */
969 0 : vhost_user_vring_close (vui, msg.state.index);
970 0 : vlib_worker_thread_barrier_release (vm);
971 0 : vu_log_debug (vui,
972 : "if %d msg VHOST_USER_GET_VRING_BASE idx %d num 0x%x",
973 : vui->hw_if_index, msg.state.index, msg.state.num);
974 0 : n =
975 0 : send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
976 0 : if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
977 : {
978 0 : vu_log_debug (vui, "could not send message response");
979 0 : goto close_socket;
980 : }
981 0 : vhost_user_update_iface_state (vui);
982 0 : break;
983 :
984 0 : case VHOST_USER_NONE:
985 0 : vu_log_debug (vui, "if %d msg VHOST_USER_NONE", vui->hw_if_index);
986 0 : break;
987 :
988 0 : case VHOST_USER_SET_LOG_BASE:
989 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_BASE",
990 : vui->hw_if_index);
991 :
992 0 : if (msg.size != sizeof (msg.log))
993 : {
994 0 : vu_log_debug (vui, "invalid msg size for VHOST_USER_SET_LOG_BASE:"
995 : " %d instead of %d", msg.size, sizeof (msg.log));
996 0 : goto close_socket;
997 : }
998 :
999 0 : if (!(vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD)))
1000 : {
1001 0 : vu_log_debug (vui, "VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but "
1002 : "VHOST_USER_SET_LOG_BASE received");
1003 0 : goto close_socket;
1004 : }
1005 :
1006 0 : fd = fds[0];
1007 : /* align size to page */
1008 0 : long page_sz = get_huge_page_size (fd);
1009 0 : ssize_t map_sz =
1010 0 : (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1);
1011 :
1012 0 : void *log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
1013 : MAP_SHARED, fd, 0);
1014 :
1015 0 : vu_log_debug (vui, "map log region addr 0 len 0x%lx off 0x%lx fd %d "
1016 : "mapped 0x%lx", map_sz, msg.log.offset, fd,
1017 : log_base_addr);
1018 :
1019 0 : if (log_base_addr == MAP_FAILED)
1020 : {
1021 0 : vu_log_err (vui, "failed to map memory. errno is %d", errno);
1022 0 : goto close_socket;
1023 : }
1024 :
1025 0 : vlib_worker_thread_barrier_sync (vm);
1026 0 : vui->log_base_addr = log_base_addr;
1027 0 : vui->log_base_addr += msg.log.offset;
1028 0 : vui->log_size = msg.log.size;
1029 0 : vlib_worker_thread_barrier_release (vm);
1030 :
1031 0 : msg.flags |= 4;
1032 0 : msg.size = sizeof (msg.u64);
1033 0 : n =
1034 0 : send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
1035 0 : if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
1036 : {
1037 0 : vu_log_debug (vui, "could not send message response");
1038 0 : goto close_socket;
1039 : }
1040 0 : break;
1041 :
1042 0 : case VHOST_USER_SET_LOG_FD:
1043 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index);
1044 0 : break;
1045 :
1046 0 : case VHOST_USER_GET_PROTOCOL_FEATURES:
1047 0 : msg.flags |= 4;
1048 0 : msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |
1049 : (1 << VHOST_USER_PROTOCOL_F_MQ);
1050 0 : msg.size = sizeof (msg.u64);
1051 0 : vu_log_debug (vui, "if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - "
1052 : "reply 0x%016llx", vui->hw_if_index, msg.u64);
1053 0 : n =
1054 0 : send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
1055 0 : if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
1056 : {
1057 0 : vu_log_debug (vui, "could not send message response");
1058 0 : goto close_socket;
1059 : }
1060 0 : break;
1061 :
1062 0 : case VHOST_USER_SET_PROTOCOL_FEATURES:
1063 0 : vu_log_debug (vui, "if %d msg VHOST_USER_SET_PROTOCOL_FEATURES "
1064 : "features 0x%016llx", vui->hw_if_index, msg.u64);
1065 0 : vui->protocol_features = msg.u64;
1066 0 : break;
1067 :
1068 0 : case VHOST_USER_GET_QUEUE_NUM:
1069 0 : msg.flags |= 4;
1070 0 : msg.u64 = VHOST_VRING_MAX_MQ_PAIR_SZ;
1071 0 : msg.size = sizeof (msg.u64);
1072 0 : vu_log_debug (vui, "if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d",
1073 : vui->hw_if_index, msg.u64);
1074 0 : n =
1075 0 : send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
1076 0 : if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
1077 : {
1078 0 : vu_log_debug (vui, "could not send message response");
1079 0 : goto close_socket;
1080 : }
1081 0 : break;
1082 :
1083 0 : case VHOST_USER_SET_VRING_ENABLE:
1084 0 : vu_log_debug (vui, "if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d",
1085 : vui->hw_if_index, msg.state.num ? "enable" : "disable",
1086 : msg.state.index);
1087 0 : if (msg.state.index >= vui->num_qid)
1088 : {
1089 0 : vu_log_debug (vui, "invalid vring idx VHOST_USER_SET_VRING_ENABLE:"
1090 : " %u >= %u", msg.state.index, vui->num_qid);
1091 0 : goto close_socket;
1092 : }
1093 :
1094 0 : vui->vrings[msg.state.index].enabled = msg.state.num;
1095 0 : vhost_user_thread_placement (vui, msg.state.index);
1096 0 : vhost_user_update_iface_state (vui);
1097 0 : break;
1098 :
1099 0 : default:
1100 0 : vu_log_debug (vui, "unknown vhost-user message %d received. "
1101 : "closing socket", msg.request);
1102 0 : goto close_socket;
1103 : }
1104 :
1105 0 : return 0;
1106 :
1107 0 : close_socket:
1108 0 : vlib_worker_thread_barrier_sync (vm);
1109 0 : vhost_user_if_disconnect (vui);
1110 0 : vlib_worker_thread_barrier_release (vm);
1111 0 : vhost_user_update_iface_state (vui);
1112 0 : return 0;
1113 : }
1114 :
1115 : static clib_error_t *
1116 0 : vhost_user_socket_error (clib_file_t * uf)
1117 : {
1118 0 : vlib_main_t *vm = vlib_get_main ();
1119 0 : vhost_user_main_t *vum = &vhost_user_main;
1120 0 : vhost_user_intf_t *vui =
1121 0 : pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
1122 :
1123 0 : vu_log_debug (vui, "socket error on if %d", vui->sw_if_index);
1124 0 : vlib_worker_thread_barrier_sync (vm);
1125 0 : vhost_user_if_disconnect (vui);
1126 0 : vlib_worker_thread_barrier_release (vm);
1127 0 : return 0;
1128 : }
1129 :
1130 : static clib_error_t *
1131 0 : vhost_user_socksvr_accept_ready (clib_file_t * uf)
1132 : {
1133 : int client_fd, client_len;
1134 : struct sockaddr_un client;
1135 0 : clib_file_t template = { 0 };
1136 0 : vhost_user_main_t *vum = &vhost_user_main;
1137 : vhost_user_intf_t *vui;
1138 :
1139 0 : vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
1140 :
1141 0 : client_len = sizeof (client);
1142 0 : client_fd = accept (uf->file_descriptor,
1143 : (struct sockaddr *) &client,
1144 : (socklen_t *) & client_len);
1145 :
1146 0 : if (client_fd < 0)
1147 0 : return clib_error_return_unix (0, "accept");
1148 :
1149 0 : if (vui->clib_file_index != ~0)
1150 : {
1151 0 : vu_log_debug (vui, "Close client socket for vhost interface %d, fd %d",
1152 : vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index));
1153 0 : clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
1154 : }
1155 :
1156 0 : vu_log_debug (vui, "New client socket for vhost interface %d, fd %d",
1157 : vui->sw_if_index, client_fd);
1158 0 : template.read_function = vhost_user_socket_read;
1159 0 : template.error_function = vhost_user_socket_error;
1160 0 : template.file_descriptor = client_fd;
1161 0 : template.private_data = vui - vhost_user_main.vhost_user_interfaces;
1162 0 : template.description = format (0, "vhost interface %d", vui->sw_if_index);
1163 0 : vui->clib_file_index = clib_file_add (&file_main, &template);
1164 0 : vui->num_qid = 2;
1165 0 : return 0;
1166 : }
1167 :
1168 : static clib_error_t *
1169 559 : vhost_user_init (vlib_main_t * vm)
1170 : {
1171 559 : vhost_user_main_t *vum = &vhost_user_main;
1172 559 : vlib_thread_main_t *tm = vlib_get_thread_main ();
1173 :
1174 559 : vum->log_default = vlib_log_register_class ("vhost-user", 0);
1175 :
1176 559 : vum->coalesce_frames = 32;
1177 559 : vum->coalesce_time = 1e-3;
1178 :
1179 559 : vec_validate (vum->cpus, tm->n_vlib_mains - 1);
1180 :
1181 : vhost_cpu_t *cpu;
1182 1172 : vec_foreach (cpu, vum->cpus)
1183 : {
1184 : /* This is actually not necessary as validate already zeroes it
1185 : * Just keeping the loop here for later because I am lazy. */
1186 613 : cpu->rx_buffers_len = 0;
1187 : }
1188 :
1189 559 : vum->random = random_default_seed ();
1190 :
1191 559 : mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
1192 :
1193 559 : return 0;
1194 : }
1195 :
1196 : /* *INDENT-OFF* */
1197 1119 : VLIB_INIT_FUNCTION (vhost_user_init) =
1198 : {
1199 : .runs_after = VLIB_INITS("ip4_init"),
1200 : };
1201 : /* *INDENT-ON* */
1202 :
1203 : static uword
1204 559 : vhost_user_send_interrupt_process (vlib_main_t * vm,
1205 : vlib_node_runtime_t * rt, vlib_frame_t * f)
1206 : {
1207 : vhost_user_intf_t *vui;
1208 559 : f64 timeout = 3153600000.0 /* 100 years */ ;
1209 559 : uword event_type, *event_data = 0;
1210 559 : vhost_user_main_t *vum = &vhost_user_main;
1211 : u16 qid;
1212 : f64 now, poll_time_remaining;
1213 : f64 next_timeout;
1214 559 : u8 stop_timer = 0;
1215 :
1216 : while (1)
1217 : {
1218 0 : poll_time_remaining =
1219 559 : vlib_process_wait_for_event_or_clock (vm, timeout);
1220 0 : event_type = vlib_process_get_events (vm, &event_data);
1221 0 : vec_reset_length (event_data);
1222 :
1223 : /*
1224 : * Use the remaining timeout if it is less than coalesce time to avoid
1225 : * resetting the existing timer in the middle of expiration
1226 : */
1227 0 : timeout = poll_time_remaining;
1228 0 : if (vlib_process_suspend_time_is_zero (timeout) ||
1229 0 : (timeout > vum->coalesce_time))
1230 0 : timeout = vum->coalesce_time;
1231 :
1232 0 : now = vlib_time_now (vm);
1233 0 : switch (event_type)
1234 : {
1235 0 : case VHOST_USER_EVENT_STOP_TIMER:
1236 0 : stop_timer = 1;
1237 0 : break;
1238 :
1239 0 : case VHOST_USER_EVENT_START_TIMER:
1240 0 : stop_timer = 0;
1241 0 : timeout = 1e-3;
1242 0 : if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
1243 0 : break;
1244 : /* fall through */
1245 :
1246 : case ~0:
1247 : /* *INDENT-OFF* */
1248 0 : pool_foreach (vui, vum->vhost_user_interfaces) {
1249 0 : next_timeout = timeout;
1250 0 : FOR_ALL_VHOST_RX_TXQ (qid, vui)
1251 : {
1252 0 : vhost_user_vring_t *vq = &vui->vrings[qid];
1253 :
1254 0 : if (vq->started == 0)
1255 0 : continue;
1256 0 : if (vq->n_since_last_int)
1257 : {
1258 0 : if (now >= vq->int_deadline)
1259 0 : vhost_user_send_call (vm, vui, vq);
1260 : else
1261 0 : next_timeout = vq->int_deadline - now;
1262 : }
1263 :
1264 0 : if ((next_timeout < timeout) && (next_timeout > 0.0))
1265 0 : timeout = next_timeout;
1266 : }
1267 : }
1268 : /* *INDENT-ON* */
1269 0 : break;
1270 :
1271 0 : default:
1272 0 : clib_warning ("BUG: unhandled event type %d", event_type);
1273 0 : break;
1274 : }
1275 : /* No less than 1 millisecond */
1276 0 : if (timeout < 1e-3)
1277 0 : timeout = 1e-3;
1278 0 : if (stop_timer)
1279 0 : timeout = 3153600000.0;
1280 : }
1281 : return 0;
1282 : }
1283 :
1284 : /* *INDENT-OFF* */
1285 17359 : VLIB_REGISTER_NODE (vhost_user_send_interrupt_node) = {
1286 : .function = vhost_user_send_interrupt_process,
1287 : .type = VLIB_NODE_TYPE_PROCESS,
1288 : .name = "vhost-user-send-interrupt-process",
1289 : };
1290 : /* *INDENT-ON* */
1291 :
1292 : static uword
1293 559 : vhost_user_process (vlib_main_t * vm,
1294 : vlib_node_runtime_t * rt, vlib_frame_t * f)
1295 : {
1296 559 : vhost_user_main_t *vum = &vhost_user_main;
1297 : vhost_user_intf_t *vui;
1298 : struct sockaddr_un sun;
1299 : int sockfd;
1300 559 : clib_file_t template = { 0 };
1301 559 : f64 timeout = 3153600000.0 /* 100 years */ ;
1302 559 : uword *event_data = 0;
1303 :
1304 559 : sockfd = -1;
1305 559 : sun.sun_family = AF_UNIX;
1306 559 : template.read_function = vhost_user_socket_read;
1307 559 : template.error_function = vhost_user_socket_error;
1308 :
1309 : while (1)
1310 : {
1311 563 : vlib_process_wait_for_event_or_clock (vm, timeout);
1312 4 : vlib_process_get_events (vm, &event_data);
1313 4 : vec_reset_length (event_data);
1314 :
1315 4 : timeout = 3.0;
1316 :
1317 : /* *INDENT-OFF* */
1318 9 : pool_foreach (vui, vum->vhost_user_interfaces) {
1319 :
1320 5 : if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
1321 5 : if (vui->clib_file_index == ~0)
1322 : {
1323 6 : if ((sockfd < 0) &&
1324 1 : ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0))
1325 : {
1326 : /*
1327 : * 1st time error or new error for this interface,
1328 : * spit out the message and record the error
1329 : */
1330 0 : if (!vui->sock_errno || (vui->sock_errno != errno))
1331 : {
1332 0 : clib_unix_warning
1333 : ("Error: Could not open unix socket for %s",
1334 : vui->sock_filename);
1335 0 : vui->sock_errno = errno;
1336 : }
1337 0 : continue;
1338 : }
1339 :
1340 : /* try to connect */
1341 5 : strncpy (sun.sun_path, (char *) vui->sock_filename,
1342 : sizeof (sun.sun_path) - 1);
1343 5 : sun.sun_path[sizeof (sun.sun_path) - 1] = 0;
1344 :
1345 : /* Avoid hanging VPP if the other end does not accept */
1346 5 : if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0)
1347 0 : clib_unix_warning ("fcntl");
1348 :
1349 5 : if (connect (sockfd, (struct sockaddr *) &sun,
1350 : sizeof (struct sockaddr_un)) == 0)
1351 : {
1352 : /* Set the socket to blocking as it was before */
1353 0 : if (fcntl(sockfd, F_SETFL, 0) < 0)
1354 0 : clib_unix_warning ("fcntl2");
1355 :
1356 0 : vui->sock_errno = 0;
1357 0 : template.file_descriptor = sockfd;
1358 0 : template.private_data =
1359 0 : vui - vhost_user_main.vhost_user_interfaces;
1360 0 : template.description = format (0, "vhost user process");
1361 0 : vui->clib_file_index = clib_file_add (&file_main, &template);
1362 0 : vui->num_qid = 2;
1363 :
1364 : /* This sockfd is considered consumed */
1365 0 : sockfd = -1;
1366 : }
1367 : else
1368 : {
1369 5 : vui->sock_errno = errno;
1370 : }
1371 : }
1372 : else
1373 : {
1374 : /* check if socket is alive */
1375 0 : int error = 0;
1376 0 : socklen_t len = sizeof (error);
1377 0 : int fd = UNIX_GET_FD(vui->clib_file_index);
1378 : int retval =
1379 0 : getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
1380 :
1381 0 : if (retval)
1382 : {
1383 0 : vu_log_debug (vui, "getsockopt returned %d", retval);
1384 0 : vhost_user_if_disconnect (vui);
1385 : }
1386 : }
1387 : }
1388 : }
1389 : /* *INDENT-ON* */
1390 : }
1391 : return 0;
1392 : }
1393 :
1394 : /* *INDENT-OFF* */
1395 17359 : VLIB_REGISTER_NODE (vhost_user_process_node,static) = {
1396 : .function = vhost_user_process,
1397 : .type = VLIB_NODE_TYPE_PROCESS,
1398 : .name = "vhost-user-process",
1399 : };
1400 : /* *INDENT-ON* */
1401 :
1402 : /**
1403 : * Disables and reset interface structure.
1404 : * It can then be either init again, or removed from used interfaces.
1405 : */
1406 : static void
1407 4 : vhost_user_term_if (vhost_user_intf_t * vui)
1408 : {
1409 : int q;
1410 4 : vhost_user_main_t *vum = &vhost_user_main;
1411 :
1412 : // disconnect interface sockets
1413 4 : vhost_user_if_disconnect (vui);
1414 4 : vhost_user_update_gso_interface_count (vui, 0 /* delete */ );
1415 4 : vhost_user_update_iface_state (vui);
1416 :
1417 68 : for (q = 0; q < vec_len (vui->vrings); q++)
1418 64 : clib_spinlock_free (&vui->vrings[q].vring_lock);
1419 :
1420 4 : if (vui->unix_server_index != ~0)
1421 : {
1422 : //Close server socket
1423 0 : clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
1424 : vui->unix_server_index);
1425 0 : clib_file_del (&file_main, uf);
1426 0 : vui->unix_server_index = ~0;
1427 0 : unlink (vui->sock_filename);
1428 : }
1429 :
1430 4 : mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename,
1431 : &vui->if_index);
1432 4 : }
1433 :
1434 : int
1435 4 : vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
1436 : {
1437 4 : vhost_user_main_t *vum = &vhost_user_main;
1438 : vhost_user_intf_t *vui;
1439 4 : int rv = 0;
1440 : vnet_hw_interface_t *hwif;
1441 : u16 qid;
1442 :
1443 4 : if (!
1444 : (hwif =
1445 4 : vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index))
1446 4 : || hwif->dev_class_index != vhost_user_device_class.index)
1447 0 : return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1448 :
1449 4 : vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
1450 :
1451 4 : vu_log_debug (vui, "Deleting vhost-user interface %s (instance %d)",
1452 : hwif->name, hwif->dev_instance);
1453 :
1454 8 : FOR_ALL_VHOST_TXQ (qid, vui)
1455 : {
1456 4 : vhost_user_vring_t *txvq = &vui->vrings[qid];
1457 :
1458 4 : if ((txvq->mode == VNET_HW_IF_RX_MODE_POLLING) &&
1459 4 : (txvq->thread_index != ~0))
1460 : {
1461 4 : vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
1462 4 : ASSERT (cpu->polling_q_count != 0);
1463 4 : cpu->polling_q_count--;
1464 : }
1465 :
1466 4 : if ((vum->ifq_count > 0) &&
1467 0 : ((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
1468 0 : (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)))
1469 : {
1470 0 : vum->ifq_count--;
1471 : // Stop the timer if there is no more interrupt interface/queue
1472 0 : if (vum->ifq_count == 0)
1473 : {
1474 0 : vlib_process_signal_event (vm,
1475 0 : vhost_user_send_interrupt_node.index,
1476 : VHOST_USER_EVENT_STOP_TIMER, 0);
1477 0 : break;
1478 : }
1479 : }
1480 : }
1481 :
1482 : // Disable and reset interface
1483 4 : vhost_user_term_if (vui);
1484 :
1485 : // Reset renumbered iface
1486 4 : if (hwif->dev_instance <
1487 4 : vec_len (vum->show_dev_instance_by_real_dev_instance))
1488 0 : vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0;
1489 :
1490 : // Delete ethernet interface
1491 4 : ethernet_delete_interface (vnm, vui->hw_if_index);
1492 :
1493 : // free vrings
1494 4 : vec_free (vui->vrings);
1495 :
1496 : // Back to pool
1497 4 : pool_put (vum->vhost_user_interfaces, vui);
1498 :
1499 4 : return rv;
1500 : }
1501 :
1502 : static clib_error_t *
1503 559 : vhost_user_exit (vlib_main_t * vm)
1504 : {
1505 559 : vnet_main_t *vnm = vnet_get_main ();
1506 559 : vhost_user_main_t *vum = &vhost_user_main;
1507 : vhost_user_intf_t *vui;
1508 :
1509 559 : vlib_worker_thread_barrier_sync (vlib_get_main ());
1510 : /* *INDENT-OFF* */
1511 559 : pool_foreach (vui, vum->vhost_user_interfaces) {
1512 0 : vhost_user_delete_if (vnm, vm, vui->sw_if_index);
1513 : }
1514 : /* *INDENT-ON* */
1515 559 : vlib_worker_thread_barrier_release (vlib_get_main ());
1516 559 : return 0;
1517 : }
1518 :
1519 1119 : VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
1520 :
1521 : /**
1522 : * Open server unix socket on specified sock_filename.
1523 : */
1524 : static int
1525 0 : vhost_user_init_server_sock (const char *sock_filename, int *sock_fd)
1526 : {
1527 0 : int rv = 0;
1528 0 : struct sockaddr_un un = { };
1529 : int fd;
1530 : /* create listening socket */
1531 0 : if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
1532 0 : return VNET_API_ERROR_SYSCALL_ERROR_1;
1533 :
1534 0 : un.sun_family = AF_UNIX;
1535 0 : strncpy ((char *) un.sun_path, (char *) sock_filename,
1536 : sizeof (un.sun_path) - 1);
1537 :
1538 : /* remove if exists */
1539 0 : unlink ((char *) sock_filename);
1540 :
1541 0 : if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
1542 : {
1543 0 : rv = VNET_API_ERROR_SYSCALL_ERROR_2;
1544 0 : goto error;
1545 : }
1546 :
1547 0 : if (listen (fd, 1) == -1)
1548 : {
1549 0 : rv = VNET_API_ERROR_SYSCALL_ERROR_3;
1550 0 : goto error;
1551 : }
1552 :
1553 0 : *sock_fd = fd;
1554 0 : return 0;
1555 :
1556 0 : error:
1557 0 : close (fd);
1558 0 : return rv;
1559 : }
1560 :
1561 : /**
1562 : * Create ethernet interface for vhost user interface.
1563 : */
1564 : static void
1565 4 : vhost_user_create_ethernet (vnet_main_t *vnm, vlib_main_t *vm,
1566 : vhost_user_intf_t *vui,
1567 : vhost_user_create_if_args_t *args)
1568 : {
1569 4 : vhost_user_main_t *vum = &vhost_user_main;
1570 4 : vnet_eth_interface_registration_t eir = {};
1571 : u8 hwaddr[6];
1572 :
1573 : /* create hw and sw interface */
1574 4 : if (args->use_custom_mac)
1575 : {
1576 1 : clib_memcpy (hwaddr, args->hwaddr, 6);
1577 : }
1578 : else
1579 : {
1580 3 : random_u32 (&vum->random);
1581 3 : clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random));
1582 3 : hwaddr[0] = 2;
1583 3 : hwaddr[1] = 0xfe;
1584 : }
1585 :
1586 4 : eir.dev_class_index = vhost_user_device_class.index;
1587 4 : eir.dev_instance = vui - vum->vhost_user_interfaces /* device instance */,
1588 4 : eir.address = hwaddr;
1589 4 : vui->hw_if_index = vnet_eth_register_interface (vnm, &eir);
1590 4 : }
1591 :
1592 : /*
1593 : * Initialize vui with specified attributes
1594 : */
1595 : static void
1596 4 : vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui,
1597 : int server_sock_fd, vhost_user_create_if_args_t * args,
1598 : u32 * sw_if_index)
1599 : {
1600 : vnet_sw_interface_t *sw;
1601 : int q;
1602 4 : vhost_user_main_t *vum = &vhost_user_main;
1603 :
1604 4 : sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
1605 4 : if (server_sock_fd != -1)
1606 : {
1607 0 : clib_file_t template = { 0 };
1608 0 : template.read_function = vhost_user_socksvr_accept_ready;
1609 0 : template.file_descriptor = server_sock_fd;
1610 0 : template.private_data = vui - vum->vhost_user_interfaces; //hw index
1611 0 : template.description = format (0, "vhost user %d", sw);
1612 0 : vui->unix_server_index = clib_file_add (&file_main, &template);
1613 : }
1614 : else
1615 : {
1616 4 : vui->unix_server_index = ~0;
1617 : }
1618 :
1619 4 : vui->sw_if_index = sw->sw_if_index;
1620 4 : strncpy (vui->sock_filename, args->sock_filename,
1621 : ARRAY_LEN (vui->sock_filename) - 1);
1622 4 : vui->sock_errno = 0;
1623 4 : vui->is_ready = 0;
1624 4 : vui->feature_mask = args->feature_mask;
1625 4 : vui->clib_file_index = ~0;
1626 4 : vui->log_base_addr = 0;
1627 4 : vui->if_index = vui - vum->vhost_user_interfaces;
1628 4 : vui->enable_gso = args->enable_gso;
1629 4 : vui->enable_event_idx = args->enable_event_idx;
1630 4 : vui->enable_packed = args->enable_packed;
1631 : /*
1632 : * enable_gso takes precedence over configurable feature mask if there
1633 : * is a clash.
1634 : * if feature mask disables gso, but enable_gso is configured,
1635 : * then gso is enable
1636 : * if feature mask enables gso, but enable_gso is not configured,
1637 : * then gso is enable
1638 : *
1639 : * if gso is enable via feature mask, it must enable both host and guest
1640 : * gso feature mask, we don't support one sided GSO or partial GSO.
1641 : */
1642 4 : if ((vui->enable_gso == 0) &&
1643 4 : ((args->feature_mask & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)
1644 : == (FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)))
1645 0 : vui->enable_gso = 1;
1646 4 : vhost_user_update_gso_interface_count (vui, 1 /* add */ );
1647 4 : mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
1648 : &vui->if_index, 0);
1649 :
1650 4 : vec_validate_aligned (vui->vrings, (VHOST_VRING_INIT_MQ_PAIR_SZ << 1) - 1,
1651 : CLIB_CACHE_LINE_BYTES);
1652 4 : vui->num_qid = 2;
1653 68 : for (q = 0; q < vec_len (vui->vrings); q++)
1654 64 : vhost_user_vring_init (vui, q);
1655 :
1656 4 : vnet_hw_if_set_caps (vnm, vui->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
1657 4 : vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
1658 :
1659 4 : if (sw_if_index)
1660 4 : *sw_if_index = vui->sw_if_index;
1661 4 : }
1662 :
1663 : int
1664 4 : vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
1665 : vhost_user_create_if_args_t * args)
1666 : {
1667 4 : vhost_user_intf_t *vui = NULL;
1668 4 : u32 sw_if_idx = ~0;
1669 4 : int rv = 0;
1670 4 : int server_sock_fd = -1;
1671 4 : vhost_user_main_t *vum = &vhost_user_main;
1672 : uword *if_index;
1673 :
1674 4 : if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0))
1675 : {
1676 0 : return VNET_API_ERROR_INVALID_ARGUMENT;
1677 : }
1678 :
1679 4 : if_index = mhash_get (&vum->if_index_by_sock_name,
1680 4 : (void *) args->sock_filename);
1681 4 : if (if_index)
1682 : {
1683 0 : vui = &vum->vhost_user_interfaces[*if_index];
1684 0 : args->sw_if_index = vui->sw_if_index;
1685 0 : return VNET_API_ERROR_IF_ALREADY_EXISTS;
1686 : }
1687 :
1688 4 : if (args->is_server)
1689 : {
1690 0 : if ((rv =
1691 0 : vhost_user_init_server_sock (args->sock_filename,
1692 : &server_sock_fd)) != 0)
1693 : {
1694 0 : return rv;
1695 : }
1696 : }
1697 :
1698 : /* Protect the uninitialized vui from being dispatched by rx/tx */
1699 4 : vlib_worker_thread_barrier_sync (vm);
1700 4 : pool_get (vhost_user_main.vhost_user_interfaces, vui);
1701 4 : vhost_user_create_ethernet (vnm, vm, vui, args);
1702 4 : vlib_worker_thread_barrier_release (vm);
1703 :
1704 4 : vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx);
1705 4 : vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000);
1706 4 : vhost_user_rx_thread_placement (vui, 1);
1707 :
1708 4 : if (args->renumber)
1709 0 : vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance);
1710 :
1711 4 : args->sw_if_index = sw_if_idx;
1712 :
1713 : // Process node must connect
1714 4 : vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
1715 :
1716 4 : return rv;
1717 : }
1718 :
1719 : int
1720 0 : vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
1721 : vhost_user_create_if_args_t * args)
1722 : {
1723 0 : vhost_user_main_t *vum = &vhost_user_main;
1724 0 : vhost_user_intf_t *vui = NULL;
1725 0 : u32 sw_if_idx = ~0;
1726 0 : int server_sock_fd = -1;
1727 0 : int rv = 0;
1728 : vnet_hw_interface_t *hwif;
1729 : uword *if_index;
1730 :
1731 0 : if (!(hwif = vnet_get_sup_hw_interface_api_visible_or_null (vnm,
1732 : args->sw_if_index))
1733 0 : || hwif->dev_class_index != vhost_user_device_class.index)
1734 0 : return VNET_API_ERROR_INVALID_SW_IF_INDEX;
1735 :
1736 0 : if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0))
1737 0 : return VNET_API_ERROR_INVALID_ARGUMENT;
1738 :
1739 0 : vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
1740 :
1741 : /*
1742 : * Disallow changing the interface to have the same path name
1743 : * as other interface
1744 : */
1745 0 : if_index = mhash_get (&vum->if_index_by_sock_name,
1746 0 : (void *) args->sock_filename);
1747 0 : if (if_index && (*if_index != vui->if_index))
1748 0 : return VNET_API_ERROR_IF_ALREADY_EXISTS;
1749 :
1750 : // First try to open server socket
1751 0 : if (args->is_server)
1752 0 : if ((rv = vhost_user_init_server_sock (args->sock_filename,
1753 : &server_sock_fd)) != 0)
1754 0 : return rv;
1755 :
1756 0 : vhost_user_term_if (vui);
1757 0 : vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx);
1758 :
1759 0 : if (args->renumber)
1760 0 : vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance);
1761 :
1762 : // Process node must connect
1763 0 : vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
1764 :
1765 0 : return rv;
1766 : }
1767 :
1768 : clib_error_t *
1769 0 : vhost_user_connect_command_fn (vlib_main_t * vm,
1770 : unformat_input_t * input,
1771 : vlib_cli_command_t * cmd)
1772 : {
1773 0 : vnet_main_t *vnm = vnet_get_main ();
1774 0 : unformat_input_t _line_input, *line_input = &_line_input;
1775 0 : clib_error_t *error = NULL;
1776 0 : vhost_user_create_if_args_t args = { 0 };
1777 : int rv;
1778 :
1779 : /* Get a line of input. */
1780 0 : if (!unformat_user (input, unformat_line_input, line_input))
1781 0 : return 0;
1782 :
1783 0 : args.feature_mask = (u64) ~ (0ULL);
1784 0 : args.custom_dev_instance = ~0;
1785 : /* GSO feature is disable by default */
1786 0 : args.feature_mask &= ~FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
1787 : /* packed-ring feature is disable by default */
1788 0 : args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
1789 : /* event_idx feature is disable by default */
1790 0 : args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
1791 :
1792 0 : while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1793 : {
1794 0 : if (unformat (line_input, "socket %s", &args.sock_filename))
1795 : ;
1796 0 : else if (unformat (line_input, "server"))
1797 0 : args.is_server = 1;
1798 0 : else if (unformat (line_input, "gso"))
1799 0 : args.enable_gso = 1;
1800 0 : else if (unformat (line_input, "packed"))
1801 0 : args.enable_packed = 1;
1802 0 : else if (unformat (line_input, "event-idx"))
1803 0 : args.enable_event_idx = 1;
1804 0 : else if (unformat (line_input, "feature-mask 0x%llx",
1805 : &args.feature_mask))
1806 : ;
1807 0 : else if (unformat (line_input, "hwaddr %U", unformat_ethernet_address,
1808 : args.hwaddr))
1809 0 : args.use_custom_mac = 1;
1810 0 : else if (unformat (line_input, "renumber %d",
1811 : &args.custom_dev_instance))
1812 0 : args.renumber = 1;
1813 : else
1814 : {
1815 0 : error = clib_error_return (0, "unknown input `%U'",
1816 : format_unformat_error, line_input);
1817 0 : goto done;
1818 : }
1819 : }
1820 :
1821 0 : if ((rv = vhost_user_create_if (vnm, vm, &args)))
1822 : {
1823 0 : error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
1824 0 : goto done;
1825 : }
1826 :
1827 0 : vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnm,
1828 : args.sw_if_index);
1829 :
1830 0 : done:
1831 0 : vec_free (args.sock_filename);
1832 0 : unformat_free (line_input);
1833 :
1834 0 : return error;
1835 : }
1836 :
1837 : clib_error_t *
1838 0 : vhost_user_delete_command_fn (vlib_main_t * vm,
1839 : unformat_input_t * input,
1840 : vlib_cli_command_t * cmd)
1841 : {
1842 0 : unformat_input_t _line_input, *line_input = &_line_input;
1843 0 : u32 sw_if_index = ~0;
1844 0 : vnet_main_t *vnm = vnet_get_main ();
1845 0 : clib_error_t *error = NULL;
1846 :
1847 : /* Get a line of input. */
1848 0 : if (!unformat_user (input, unformat_line_input, line_input))
1849 0 : return 0;
1850 :
1851 0 : while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1852 : {
1853 0 : if (unformat (line_input, "sw_if_index %d", &sw_if_index))
1854 : ;
1855 0 : else if (unformat
1856 : (line_input, "%U", unformat_vnet_sw_interface, vnm,
1857 : &sw_if_index))
1858 : {
1859 : vnet_hw_interface_t *hwif =
1860 0 : vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
1861 0 : if (hwif == NULL ||
1862 0 : vhost_user_device_class.index != hwif->dev_class_index)
1863 : {
1864 0 : error = clib_error_return (0, "Not a vhost interface");
1865 0 : goto done;
1866 : }
1867 : }
1868 : else
1869 : {
1870 0 : error = clib_error_return (0, "unknown input `%U'",
1871 : format_unformat_error, line_input);
1872 0 : goto done;
1873 : }
1874 : }
1875 :
1876 0 : vhost_user_delete_if (vnm, vm, sw_if_index);
1877 :
1878 0 : done:
1879 0 : unformat_free (line_input);
1880 :
1881 0 : return error;
1882 : }
1883 :
1884 : int
1885 6 : vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
1886 : vhost_user_intf_details_t ** out_vuids)
1887 : {
1888 6 : int rv = 0;
1889 6 : vhost_user_main_t *vum = &vhost_user_main;
1890 : vhost_user_intf_t *vui;
1891 6 : vhost_user_intf_details_t *r_vuids = NULL;
1892 6 : vhost_user_intf_details_t *vuid = NULL;
1893 6 : u32 *hw_if_indices = 0;
1894 : vnet_hw_interface_t *hi;
1895 : int i;
1896 :
1897 6 : if (!out_vuids)
1898 0 : return -1;
1899 :
1900 9 : pool_foreach (vui, vum->vhost_user_interfaces)
1901 3 : vec_add1 (hw_if_indices, vui->hw_if_index);
1902 :
1903 9 : for (i = 0; i < vec_len (hw_if_indices); i++)
1904 : {
1905 3 : hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
1906 3 : vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
1907 :
1908 3 : vec_add2 (r_vuids, vuid, 1);
1909 3 : vuid->sw_if_index = vui->sw_if_index;
1910 3 : vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
1911 3 : vuid->features = vui->features;
1912 3 : vuid->num_regions = vui->nregions;
1913 3 : vuid->is_server = vui->unix_server_index != ~0;
1914 3 : vuid->sock_errno = vui->sock_errno;
1915 3 : snprintf ((char *) vuid->sock_filename, sizeof (vuid->sock_filename),
1916 3 : "%s", vui->sock_filename);
1917 3 : memcpy_s (vuid->if_name, sizeof (vuid->if_name), hi->name,
1918 3 : clib_min (vec_len (hi->name), sizeof (vuid->if_name) - 1));
1919 3 : vuid->if_name[sizeof (vuid->if_name) - 1] = 0;
1920 : }
1921 :
1922 6 : vec_free (hw_if_indices);
1923 :
1924 6 : *out_vuids = r_vuids;
1925 :
1926 6 : return rv;
1927 : }
1928 :
1929 : static u8 *
1930 0 : format_vhost_user_desc (u8 * s, va_list * args)
1931 : {
1932 0 : char *fmt = va_arg (*args, char *);
1933 0 : vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
1934 0 : vnet_virtio_vring_desc_t *desc_table =
1935 : va_arg (*args, vnet_virtio_vring_desc_t *);
1936 0 : int idx = va_arg (*args, int);
1937 0 : u32 *mem_hint = va_arg (*args, u32 *);
1938 :
1939 0 : s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
1940 0 : desc_table[idx].flags, desc_table[idx].next,
1941 0 : pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
1942 : mem_hint)));
1943 0 : return s;
1944 : }
1945 :
1946 : static void
1947 0 : vhost_user_show_fds (vlib_main_t * vm, vhost_user_vring_t * vq)
1948 : {
1949 0 : int kickfd = UNIX_GET_FD (vq->kickfd_idx);
1950 0 : int callfd = UNIX_GET_FD (vq->callfd_idx);
1951 :
1952 0 : vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n", kickfd, callfd,
1953 : vq->errfd);
1954 0 : }
1955 :
1956 : static void
1957 0 : vhost_user_show_desc (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
1958 : int show_descr, int show_verbose)
1959 : {
1960 : int j;
1961 0 : u32 mem_hint = 0;
1962 : u32 idx;
1963 : u32 n_entries;
1964 : vnet_virtio_vring_desc_t *desc_table;
1965 0 : vhost_user_vring_t *vq = &vui->vrings[q];
1966 :
1967 0 : if (vq->avail && vq->used)
1968 0 : vlib_cli_output (vm,
1969 : " avail.flags %x avail event idx %u avail.idx %d "
1970 : "used.flags %x used event idx %u used.idx %d\n",
1971 0 : vq->avail->flags, vhost_user_avail_event_idx (vq),
1972 0 : vq->avail->idx, vq->used->flags,
1973 0 : vhost_user_used_event_idx (vq), vq->used->idx);
1974 :
1975 0 : vhost_user_show_fds (vm, vq);
1976 :
1977 0 : if (show_descr)
1978 : {
1979 0 : vlib_cli_output (vm, "\n descriptor table:\n");
1980 0 : vlib_cli_output (vm,
1981 : " slot addr len flags next "
1982 : "user_addr\n");
1983 0 : vlib_cli_output (vm,
1984 : " ===== ================== ===== ====== ===== "
1985 : "==================\n");
1986 0 : for (j = 0; j < vq->qsz_mask + 1; j++)
1987 : {
1988 0 : desc_table = vq->desc;
1989 0 : vlib_cli_output (vm, "%U", format_vhost_user_desc,
1990 : " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", vui,
1991 : desc_table, j, &mem_hint);
1992 0 : if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT))
1993 : {
1994 0 : n_entries =
1995 0 : desc_table[j].len / sizeof (vnet_virtio_vring_desc_t);
1996 0 : desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
1997 0 : if (desc_table)
1998 : {
1999 0 : for (idx = 0; idx < clib_min (20, n_entries); idx++)
2000 : {
2001 0 : vlib_cli_output
2002 : (vm, "%U", format_vhost_user_desc,
2003 : "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
2004 : desc_table, idx, &mem_hint);
2005 : }
2006 0 : if (n_entries >= 20)
2007 0 : vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
2008 : n_entries);
2009 : }
2010 : }
2011 : }
2012 : }
2013 0 : }
2014 :
2015 : static u8 *
2016 0 : format_vhost_user_packed_desc (u8 * s, va_list * args)
2017 : {
2018 0 : char *fmt = va_arg (*args, char *);
2019 0 : vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
2020 0 : vnet_virtio_vring_packed_desc_t *desc_table =
2021 : va_arg (*args, vnet_virtio_vring_packed_desc_t *);
2022 0 : int idx = va_arg (*args, int);
2023 0 : u32 *mem_hint = va_arg (*args, u32 *);
2024 :
2025 0 : s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
2026 0 : desc_table[idx].flags, desc_table[idx].id,
2027 0 : pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
2028 : mem_hint)));
2029 0 : return s;
2030 : }
2031 :
2032 : static u8 *
2033 0 : format_vhost_user_event_idx_flags (u8 * s, va_list * args)
2034 : {
2035 0 : u32 flags = va_arg (*args, u32);
2036 : typedef struct
2037 : {
2038 : u8 value;
2039 : char *str;
2040 : } event_idx_flags;
2041 : static event_idx_flags event_idx_array[] = {
2042 : #define _(s,v) { .str = #s, .value = v, },
2043 : foreach_virtio_event_idx_flags
2044 : #undef _
2045 : };
2046 0 : u32 num_entries = sizeof (event_idx_array) / sizeof (event_idx_flags);
2047 :
2048 0 : if (flags < num_entries)
2049 0 : s = format (s, "%s", event_idx_array[flags].str);
2050 : else
2051 0 : s = format (s, "%u", flags);
2052 0 : return s;
2053 : }
2054 :
2055 : static void
2056 0 : vhost_user_show_desc_packed (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
2057 : int show_descr, int show_verbose)
2058 : {
2059 : int j;
2060 0 : u32 mem_hint = 0;
2061 : u32 idx;
2062 : u32 n_entries;
2063 : vnet_virtio_vring_packed_desc_t *desc_table;
2064 0 : vhost_user_vring_t *vq = &vui->vrings[q];
2065 : u16 off_wrap, event_idx;
2066 :
2067 0 : off_wrap = vq->avail_event->off_wrap;
2068 0 : event_idx = off_wrap & 0x7fff;
2069 0 : vlib_cli_output (vm, " avail_event.flags %U avail_event.off_wrap %u "
2070 : "avail event idx %u\n", format_vhost_user_event_idx_flags,
2071 0 : (u32) vq->avail_event->flags, off_wrap, event_idx);
2072 :
2073 0 : off_wrap = vq->used_event->off_wrap;
2074 0 : event_idx = off_wrap & 0x7fff;
2075 0 : vlib_cli_output (vm, " used_event.flags %U used_event.off_wrap %u "
2076 : "used event idx %u\n", format_vhost_user_event_idx_flags,
2077 0 : (u32) vq->used_event->flags, off_wrap, event_idx);
2078 :
2079 0 : vlib_cli_output (vm, " avail wrap counter %u, used wrap counter %u\n",
2080 0 : vq->avail_wrap_counter, vq->used_wrap_counter);
2081 :
2082 0 : vhost_user_show_fds (vm, vq);
2083 :
2084 0 : if (show_descr)
2085 : {
2086 0 : vlib_cli_output (vm, "\n descriptor table:\n");
2087 0 : vlib_cli_output (vm,
2088 : " slot addr len flags id "
2089 : "user_addr\n");
2090 0 : vlib_cli_output (vm,
2091 : " ===== ================== ===== ====== ===== "
2092 : "==================\n");
2093 0 : for (j = 0; j < vq->qsz_mask + 1; j++)
2094 : {
2095 0 : desc_table = vq->packed_desc;
2096 0 : vlib_cli_output (vm, "%U", format_vhost_user_packed_desc,
2097 : " %-5u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
2098 : desc_table, j, &mem_hint);
2099 0 : if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT))
2100 : {
2101 0 : n_entries = desc_table[j].len >> 4;
2102 0 : desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
2103 0 : if (desc_table)
2104 : {
2105 0 : for (idx = 0; idx < clib_min (20, n_entries); idx++)
2106 : {
2107 0 : vlib_cli_output
2108 : (vm, "%U", format_vhost_user_packed_desc,
2109 : "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
2110 : desc_table, idx, &mem_hint);
2111 : }
2112 0 : if (n_entries >= 20)
2113 0 : vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
2114 : n_entries);
2115 : }
2116 : }
2117 : }
2118 : }
2119 0 : }
2120 :
2121 : clib_error_t *
2122 0 : show_vhost_user_command_fn (vlib_main_t * vm,
2123 : unformat_input_t * input,
2124 : vlib_cli_command_t * cmd)
2125 : {
2126 0 : clib_error_t *error = 0;
2127 0 : vnet_main_t *vnm = vnet_get_main ();
2128 0 : vhost_user_main_t *vum = &vhost_user_main;
2129 : vhost_user_intf_t *vui;
2130 0 : u32 hw_if_index, *hw_if_indices = 0;
2131 : vnet_hw_interface_t *hi;
2132 : u16 qid;
2133 : int i, j, q;
2134 0 : int show_descr = 0;
2135 0 : int show_verbose = 0;
2136 : struct feat_struct
2137 : {
2138 : u8 bit;
2139 : char *str;
2140 : };
2141 : struct feat_struct *feat_entry;
2142 :
2143 : static struct feat_struct feat_array[] = {
2144 : #define _(s,b) { .str = #s, .bit = b, },
2145 : foreach_virtio_net_features
2146 : #undef _
2147 : {.str = NULL}
2148 : };
2149 :
2150 : #define foreach_protocol_feature \
2151 : _(VHOST_USER_PROTOCOL_F_MQ) \
2152 : _(VHOST_USER_PROTOCOL_F_LOG_SHMFD)
2153 :
2154 : static struct feat_struct proto_feat_array[] = {
2155 : #define _(s) { .str = #s, .bit = s},
2156 : foreach_protocol_feature
2157 : #undef _
2158 : {.str = NULL}
2159 : };
2160 :
2161 0 : while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2162 : {
2163 0 : if (unformat
2164 : (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
2165 : {
2166 0 : hi = vnet_get_hw_interface (vnm, hw_if_index);
2167 0 : if (vhost_user_device_class.index != hi->dev_class_index)
2168 : {
2169 0 : error = clib_error_return (0, "unknown input `%U'",
2170 : format_unformat_error, input);
2171 0 : goto done;
2172 : }
2173 0 : vec_add1 (hw_if_indices, hw_if_index);
2174 : }
2175 0 : else if (unformat (input, "descriptors") || unformat (input, "desc"))
2176 0 : show_descr = 1;
2177 0 : else if (unformat (input, "verbose"))
2178 0 : show_verbose = 1;
2179 : else
2180 : {
2181 0 : error = clib_error_return (0, "unknown input `%U'",
2182 : format_unformat_error, input);
2183 0 : goto done;
2184 : }
2185 : }
2186 0 : if (vec_len (hw_if_indices) == 0)
2187 : {
2188 0 : pool_foreach (vui, vum->vhost_user_interfaces)
2189 0 : vec_add1 (hw_if_indices, vui->hw_if_index);
2190 : }
2191 0 : vlib_cli_output (vm, "Virtio vhost-user interfaces");
2192 0 : vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
2193 : vum->coalesce_frames, vum->coalesce_time);
2194 0 : vlib_cli_output (vm, " Number of rx virtqueues in interrupt mode: %d",
2195 : vum->ifq_count);
2196 0 : vlib_cli_output (vm, " Number of GSO interfaces: %d", vum->gso_count);
2197 0 : for (u32 tid = 0; tid <= vlib_num_workers (); tid++)
2198 : {
2199 0 : vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, tid);
2200 0 : vlib_cli_output (vm, " Thread %u: Polling queue count %u", tid,
2201 : cpu->polling_q_count);
2202 : }
2203 :
2204 0 : for (i = 0; i < vec_len (hw_if_indices); i++)
2205 : {
2206 0 : hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
2207 0 : vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
2208 0 : vlib_cli_output (vm, "Interface: %U (ifindex %d)",
2209 0 : format_vnet_hw_if_index_name, vnm, hw_if_indices[i],
2210 0 : hw_if_indices[i]);
2211 0 : vlib_cli_output (vm, " Number of qids %u", vui->num_qid);
2212 0 : if (vui->enable_gso)
2213 0 : vlib_cli_output (vm, " GSO enable");
2214 0 : if (vui->enable_packed)
2215 0 : vlib_cli_output (vm, " Packed ring enable");
2216 0 : if (vui->enable_event_idx)
2217 0 : vlib_cli_output (vm, " Event index enable");
2218 :
2219 0 : vlib_cli_output (vm, "virtio_net_hdr_sz %d\n"
2220 : " features mask (0x%llx): \n"
2221 : " features (0x%llx): \n",
2222 : vui->virtio_net_hdr_sz, vui->feature_mask,
2223 : vui->features);
2224 :
2225 0 : feat_entry = (struct feat_struct *) &feat_array;
2226 0 : while (feat_entry->str)
2227 : {
2228 0 : if (vui->features & (1ULL << feat_entry->bit))
2229 0 : vlib_cli_output (vm, " %s (%d)", feat_entry->str,
2230 0 : feat_entry->bit);
2231 0 : feat_entry++;
2232 : }
2233 :
2234 0 : vlib_cli_output (vm, " protocol features (0x%llx)",
2235 : vui->protocol_features);
2236 0 : feat_entry = (struct feat_struct *) &proto_feat_array;
2237 0 : while (feat_entry->str)
2238 : {
2239 0 : if (vui->protocol_features & (1ULL << feat_entry->bit))
2240 0 : vlib_cli_output (vm, " %s (%d)", feat_entry->str,
2241 0 : feat_entry->bit);
2242 0 : feat_entry++;
2243 : }
2244 :
2245 0 : vlib_cli_output (vm, "\n");
2246 :
2247 0 : vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
2248 0 : vui->sock_filename,
2249 0 : (vui->unix_server_index != ~0) ? "server" : "client",
2250 : strerror (vui->sock_errno));
2251 :
2252 0 : vlib_cli_output (vm, " rx placement: ");
2253 :
2254 0 : FOR_ALL_VHOST_TXQ (qid, vui)
2255 : {
2256 0 : vhost_user_vring_t *txvq = &vui->vrings[qid];
2257 :
2258 0 : if (txvq->qid == -1)
2259 0 : continue;
2260 0 : vlib_cli_output (vm, " thread %d on vring %d, %U\n",
2261 : txvq->thread_index, qid, format_vnet_hw_if_rx_mode,
2262 : txvq->mode);
2263 : }
2264 :
2265 0 : vlib_cli_output (vm, " tx placement\n");
2266 :
2267 0 : FOR_ALL_VHOST_RXQ (qid, vui)
2268 : {
2269 0 : vhost_user_vring_t *rxvq = &vui->vrings[qid];
2270 : vnet_hw_if_tx_queue_t *txq;
2271 :
2272 0 : if (rxvq->queue_index == ~0)
2273 0 : continue;
2274 0 : txq = vnet_hw_if_get_tx_queue (vnm, rxvq->queue_index);
2275 0 : if (txq->threads)
2276 0 : vlib_cli_output (vm, " threads %U on vring %u: %s\n",
2277 : format_bitmap_list, txq->threads, qid,
2278 0 : txq->shared_queue ? "spin-lock" : "lock-free");
2279 : }
2280 :
2281 0 : vlib_cli_output (vm, "\n");
2282 :
2283 0 : vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
2284 :
2285 0 : if (vui->nregions)
2286 : {
2287 0 : vlib_cli_output (vm,
2288 : " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
2289 0 : vlib_cli_output (vm,
2290 : " ====== ===== ================== ================== ================== ================== ==================\n");
2291 : }
2292 0 : for (j = 0; j < vui->nregions; j++)
2293 : {
2294 0 : vlib_cli_output (vm,
2295 : " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
2296 : j, vui->region_mmap_fd[j],
2297 : vui->regions[j].guest_phys_addr,
2298 : vui->regions[j].memory_size,
2299 : vui->regions[j].userspace_addr,
2300 : vui->regions[j].mmap_offset,
2301 0 : pointer_to_uword (vui->region_mmap_addr[j]));
2302 : }
2303 0 : FOR_ALL_VHOST_RX_TXQ (q, vui)
2304 : {
2305 0 : if (!vui->vrings[q].started)
2306 0 : continue;
2307 :
2308 0 : vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
2309 0 : (q & 1) ? "RX" : "TX",
2310 0 : vui->vrings[q].enabled ? "" : " disabled");
2311 0 : vlib_cli_output (vm, " global %s queue index %u\n",
2312 0 : (q & 1) ? "RX" : "TX", vui->vrings[q].queue_index);
2313 :
2314 0 : vlib_cli_output (
2315 : vm,
2316 : " qsz %d last_avail_idx %d last_used_idx %d"
2317 : " last_kick %u\n",
2318 0 : vui->vrings[q].qsz_mask + 1, vui->vrings[q].last_avail_idx,
2319 0 : vui->vrings[q].last_used_idx, vui->vrings[q].last_kick);
2320 :
2321 0 : if (vhost_user_is_packed_ring_supported (vui))
2322 0 : vhost_user_show_desc_packed (vm, vui, q, show_descr, show_verbose);
2323 : else
2324 0 : vhost_user_show_desc (vm, vui, q, show_descr, show_verbose);
2325 : }
2326 0 : vlib_cli_output (vm, "\n");
2327 : }
2328 0 : done:
2329 0 : vec_free (hw_if_indices);
2330 0 : return error;
2331 : }
2332 :
2333 : /*
2334 : * CLI functions
2335 : */
2336 :
2337 : /*?
2338 : * Create a vHost User interface. Once created, a new virtual interface
2339 : * will exist with the name '<em>VirtualEthernet0/0/x</em>', where '<em>x</em>'
2340 : * is the next free index.
2341 : *
2342 : * There are several parameters associated with a vHost interface:
2343 : *
2344 : * - <b>socket <socket-filename></b> - Name of the linux socket used by
2345 : * hypervisor and VPP to manage the vHost interface. If in <em>server</em>
2346 : * mode, VPP will create the socket if it does not already exist. If in
2347 : * <em>client</em> mode, hypervisor will create the socket if it does not
2348 : * already exist. The VPP code is indifferent to the file location. However,
2349 : * if SELinux is enabled, then the socket needs to be created in
2350 : * <em>/var/run/vpp/</em>.
2351 : *
2352 : * - <b>server</b> - Optional flag to indicate that VPP should be the server
2353 : * for the linux socket. If not provided, VPP will be the client. In
2354 : * <em>server</em> mode, the VM can be reset without tearing down the vHost
2355 : * Interface. In <em>client</em> mode, VPP can be reset without bringing down
2356 : * the VM and tearing down the vHost Interface.
2357 : *
2358 : * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated
2359 : * at startup. <b>This is intended for degugging only.</b> It is recommended
2360 : * that this parameter not be used except by experienced users. By default,
2361 : * all supported features will be advertised. Otherwise, provide the set of
2362 : * features desired.
2363 : * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
2364 : * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
2365 : * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
2366 : * - 0x000400000 (22) - VIRTIO_NET_F_MQ
2367 : * - 0x004000000 (26) - VHOST_F_LOG_ALL
2368 : * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT
2369 : * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC
2370 : * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES
2371 : * - 0x100000000 (32) - VIRTIO_F_VERSION_1
2372 : *
2373 : * - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either
2374 : * X:X:X:X:X:X unix or X.X.X cisco format.
2375 : *
2376 : * - <b>renumber <dev_instance></b> - Optional parameter which allows the
2377 : * instance in the name to be specified. If instance already exists, name
2378 : * will be used anyway and multiple instances will have the same name. Use
2379 : * with caution.
2380 : *
2381 : * @cliexpar
2382 : * Example of how to create a vhost interface with VPP as the client and all
2383 : * features enabled:
2384 : * @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock}
2385 : * VirtualEthernet0/0/0
2386 : * @cliexend
2387 : * Example of how to create a vhost interface with VPP as the server and with
2388 : * just multiple queues enabled:
2389 : * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server
2390 : * feature-mask 0x40400000}
2391 : * VirtualEthernet0/0/1
2392 : * @cliexend
2393 : * Once the vHost interface is created, enable the interface using:
2394 : * @cliexcmd{set interface state VirtualEthernet0/0/0 up}
2395 : ?*/
2396 : /* *INDENT-OFF* */
2397 13439 : VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
2398 : .path = "create vhost-user",
2399 : .short_help = "create vhost-user socket <socket-filename> [server] "
2400 : "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] [gso] "
2401 : "[packed] [event-idx]",
2402 : .function = vhost_user_connect_command_fn,
2403 : .is_mp_safe = 1,
2404 : };
2405 : /* *INDENT-ON* */
2406 :
2407 : /*?
2408 : * Delete a vHost User interface using the interface name or the
2409 : * software interface index. Use the '<em>show interface</em>'
2410 : * command to determine the software interface index. On deletion,
2411 : * the linux socket will not be deleted.
2412 : *
2413 : * @cliexpar
2414 : * Example of how to delete a vhost interface by name:
2415 : * @cliexcmd{delete vhost-user VirtualEthernet0/0/1}
2416 : * Example of how to delete a vhost interface by software interface index:
2417 : * @cliexcmd{delete vhost-user sw_if_index 1}
2418 : ?*/
2419 : /* *INDENT-OFF* */
2420 13439 : VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
2421 : .path = "delete vhost-user",
2422 : .short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}",
2423 : .function = vhost_user_delete_command_fn,
2424 : };
2425 :
2426 : /*?
2427 : * Display the attributes of a single vHost User interface (provide interface
2428 : * name), multiple vHost User interfaces (provide a list of interface names
2429 : * separated by spaces) or all Vhost User interfaces (omit an interface name
2430 : * to display all vHost interfaces).
2431 : *
2432 : * @cliexpar
2433 : * @parblock
2434 : * Example of how to display a vhost interface:
2435 : * @cliexstart{show vhost-user VirtualEthernet0/0/0}
2436 : * Virtio vhost-user interfaces
2437 : * Global:
2438 : * coalesce frames 32 time 1e-3
2439 : * Interface: VirtualEthernet0/0/0 (ifindex 1)
2440 : * virtio_net_hdr_sz 12
2441 : * features mask (0xffffffffffffffff):
2442 : * features (0x50408000):
2443 : * VIRTIO_NET_F_MRG_RXBUF (15)
2444 : * VIRTIO_NET_F_MQ (22)
2445 : * VIRTIO_F_INDIRECT_DESC (28)
2446 : * VHOST_USER_F_PROTOCOL_FEATURES (30)
2447 : * protocol features (0x3)
2448 : * VHOST_USER_PROTOCOL_F_MQ (0)
2449 : * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1)
2450 : *
2451 : * socket filename /var/run/vpp/vhost1.sock type client errno "Success"
2452 : *
2453 : * rx placement:
2454 : * thread 1 on vring 1
2455 : * thread 1 on vring 5
2456 : * thread 2 on vring 3
2457 : * thread 2 on vring 7
2458 : * tx placement: spin-lock
2459 : * thread 0 on vring 0
2460 : * thread 1 on vring 2
2461 : * thread 2 on vring 0
2462 : *
2463 : * Memory regions (total 2)
2464 : * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
2465 : * ====== == =============== =========== ============== =========== ==========
2466 : * 0 60 0x00000000 0x000a0000 0xaac00000 0x00000000 0x2b400000
2467 : * 1 61 0x000c0000 0x3ff40000 0xaacc0000 0x000c0000 0xabcc0000
2468 : *
2469 : * Virtqueue 0 (TX)
2470 : * qsz 256 last_avail_idx 0 last_used_idx 0
2471 : * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
2472 : * kickfd 62 callfd 64 errfd -1
2473 : *
2474 : * Virtqueue 1 (RX)
2475 : * qsz 256 last_avail_idx 0 last_used_idx 0
2476 : * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2477 : * kickfd 65 callfd 66 errfd -1
2478 : *
2479 : * Virtqueue 2 (TX)
2480 : * qsz 256 last_avail_idx 0 last_used_idx 0
2481 : * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
2482 : * kickfd 63 callfd 70 errfd -1
2483 : *
2484 : * Virtqueue 3 (RX)
2485 : * qsz 256 last_avail_idx 0 last_used_idx 0
2486 : * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2487 : * kickfd 72 callfd 74 errfd -1
2488 : *
2489 : * Virtqueue 4 (TX disabled)
2490 : * qsz 256 last_avail_idx 0 last_used_idx 0
2491 : * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2492 : * kickfd 76 callfd 78 errfd -1
2493 : *
2494 : * Virtqueue 5 (RX disabled)
2495 : * qsz 256 last_avail_idx 0 last_used_idx 0
2496 : * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2497 : * kickfd 80 callfd 82 errfd -1
2498 : *
2499 : * Virtqueue 6 (TX disabled)
2500 : * qsz 256 last_avail_idx 0 last_used_idx 0
2501 : * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2502 : * kickfd 84 callfd 86 errfd -1
2503 : *
2504 : * Virtqueue 7 (RX disabled)
2505 : * qsz 256 last_avail_idx 0 last_used_idx 0
2506 : * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
2507 : * kickfd 88 callfd 90 errfd -1
2508 : *
2509 : * @cliexend
2510 : *
2511 : * The optional '<em>descriptors</em>' parameter will display the same output
2512 : * as the previous example but will include the descriptor table for each
2513 : * queue.
2514 : * The output is truncated below:
2515 : * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors}
2516 : * Virtio vhost-user interfaces
2517 : * Global:
2518 : * coalesce frames 32 time 1e-3
2519 : * Interface: VirtualEthernet0/0/0 (ifindex 1)
2520 : * virtio_net_hdr_sz 12
2521 : * features mask (0xffffffffffffffff):
2522 : * features (0x50408000):
2523 : * VIRTIO_NET_F_MRG_RXBUF (15)
2524 : * VIRTIO_NET_F_MQ (22)
2525 : * :
2526 : * Virtqueue 0 (TX)
2527 : * qsz 256 last_avail_idx 0 last_used_idx 0
2528 : * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
2529 : * kickfd 62 callfd 64 errfd -1
2530 : *
2531 : * descriptor table:
2532 : * id addr len flags next user_addr
2533 : * ===== ================== ===== ====== ===== ==================
2534 : * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974
2535 : * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034
2536 : * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4
2537 : * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4
2538 : * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474
2539 : * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34
2540 : * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4
2541 : * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4
2542 : * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74
2543 : * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634
2544 : * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4
2545 : * :
2546 : * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000
2547 : * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000
2548 : * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000
2549 : * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000
2550 : * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000
2551 : * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000
2552 : * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000
2553 : *
2554 : * Virtqueue 1 (RX)
2555 : * qsz 256 last_avail_idx 0 last_used_idx 0
2556 : * :
2557 : * @cliexend
2558 : * @endparblock
2559 : ?*/
2560 : /* *INDENT-OFF* */
2561 13439 : VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
2562 : .path = "show vhost-user",
2563 : .short_help = "show vhost-user [<interface> [<interface> [..]]] "
2564 : "[[descriptors] [verbose]]",
2565 : .function = show_vhost_user_command_fn,
2566 : };
2567 : /* *INDENT-ON* */
2568 :
2569 :
2570 : static clib_error_t *
2571 559 : vhost_user_config (vlib_main_t * vm, unformat_input_t * input)
2572 : {
2573 559 : vhost_user_main_t *vum = &vhost_user_main;
2574 :
2575 559 : while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2576 : {
2577 0 : if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
2578 : ;
2579 0 : else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
2580 : ;
2581 0 : else if (unformat (input, "dont-dump-memory"))
2582 0 : vum->dont_dump_vhost_user_memory = 1;
2583 : else
2584 0 : return clib_error_return (0, "unknown input `%U'",
2585 : format_unformat_error, input);
2586 : }
2587 :
2588 559 : return 0;
2589 : }
2590 :
2591 : /* vhost-user { ... } configuration. */
2592 1119 : VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
2593 :
2594 : void
2595 0 : vhost_user_unmap_all (void)
2596 : {
2597 0 : vhost_user_main_t *vum = &vhost_user_main;
2598 : vhost_user_intf_t *vui;
2599 :
2600 0 : if (vum->dont_dump_vhost_user_memory)
2601 : {
2602 0 : pool_foreach (vui, vum->vhost_user_interfaces)
2603 0 : unmap_all_mem_regions (vui);
2604 : }
2605 0 : }
2606 :
2607 : /*
2608 : * fd.io coding-style-patch-verification: ON
2609 : *
2610 : * Local Variables:
2611 : * eval: (c-set-style "gnu")
2612 : * End:
2613 : */
|