From decfd8bd7338574f5d370eece3f5a4355fedfdbf Mon Sep 17 00:00:00 2001 From: Priyansh Rathi Date: Mon, 24 Jul 2023 01:07:18 -0700 Subject: [PATCH] vsock: Add VM groups in sibling communication Restrict the VMs a given VM can communicate with by introducing VM groups. A group is simply a list of names assigned to the device in the configuration. A VM can communicate with another VM only if the list of group names assigned to their devices have atleast one group name in common. Signed-off-by: Priyansh Rathi --- crates/vsock/README.md | 26 ++++++++---- crates/vsock/src/main.rs | 63 ++++++++++++++++++++++++---- crates/vsock/src/thread_backend.rs | 45 ++++++++++++++++++-- crates/vsock/src/vhu_vsock.rs | 27 ++++++++++-- crates/vsock/src/vhu_vsock_thread.rs | 18 +++++++- 5 files changed, 157 insertions(+), 22 deletions(-) diff --git a/crates/vsock/README.md b/crates/vsock/README.md index f4e946d..2413971 100644 --- a/crates/vsock/README.md +++ b/crates/vsock/README.md @@ -43,17 +43,18 @@ Run the vhost-device-vsock device: vhost-device-vsock --guest-cid= \ --socket= \ --uds-path= \ - [--tx-buffer-size=host packets)>] + [--tx-buffer-size=host packets)>] \ + [--groups=] ``` or ``` -vhost-device-vsock --vm guest_cid=,socket=,uds-path=[,tx-buffer-size=host packets)>] +vhost-device-vsock --vm guest_cid=,socket=,uds-path=[,tx-buffer-size=host packets)>][,groups=] ``` Specify the `--vm` argument multiple times to specify multiple devices like this: ``` vhost-device-vsock \ ---vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock \ +--vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock,groups=group1+groupA \ --vm guest-cid=4,socket=/tmp/vhost4.socket,uds-path=/tmp/vm4.vsock,tx-buffer-size=32768 ``` @@ -69,10 +70,12 @@ vms: socket: /tmp/vhost3.socket uds_path: /tmp/vm3.sock tx_buffer_size: 65536 + groups: group1+groupA - guest_cid: 4 socket: /tmp/vhost4.socket uds_path: /tmp/vm4.sock tx_buffer_size: 32768 + groups: group2+groupB ``` Run VMM (e.g. QEMU): @@ -144,12 +147,17 @@ guest$ nc --vsock 2 1234 ### Sibling VM communication -If you add multiple VMs, they can communicate with each other. For example, if you have two VMs with -CID 3 and 4, you can run the following commands to make them communicate: +If you add multiple VMs with their devices configured with at least one common group name, they can communicate with +each other. If you don't explicitly specify a group name, a default group will be assigned to the device with name +`default`, and all such devices will be able to communicate with each other. Or you can choose a different list of +group names for each device, and only devices with the at least one group in commmon will be able to communicate with +each other. + +For example, if you have two VMs with CID 3 and 4, you can run the following commands to make them communicate: ```sh -shell1$ vhost-device-vsock --vm guest-cid=3,uds-path=/tmp/vm3.vsock,socket=/tmp/vhost3.socket \ - --vm guest-cid=4,uds-path=/tmp/vm4.vsock,socket=/tmp/vhost4.socket +shell1$ vhost-device-vsock --vm guest-cid=3,uds-path=/tmp/vm3.vsock,socket=/tmp/vhost3.socket,groups=group1+group2 \ + --vm guest-cid=4,uds-path=/tmp/vm4.vsock,socket=/tmp/vhost4.socket,groups=group1 shell2$ qemu-system-x86_64 \ -drive file=vm1.qcow2,format=qcow2,if=virtio -smp 2 -m 512M -mem-prealloc \ -object memory-backend-file,share=on,id=mem0,size=512M,mem-path="/dev/hugepages" \ @@ -164,6 +172,10 @@ shell3$ qemu-system-x86_64 \ -device vhost-user-vsock-pci,chardev=char0 ``` +Please note that here the `groups` parameter is specified just for clarity, but it is not necessary to specify it if you want +to use the default group and make all the devices communicate with one another. It is useful to specify a list of groups +when you want fine-grained control over which devices can communicate with each other. + ```sh # nc-vsock patched to set `.svm_flags = VMADDR_FLAG_TO_HOST` guest_cid3$ nc-vsock -l 1234 diff --git a/crates/vsock/src/main.rs b/crates/vsock/src/main.rs index 69e23ed..fa38bfb 100644 --- a/crates/vsock/src/main.rs +++ b/crates/vsock/src/main.rs @@ -27,6 +27,7 @@ use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap}; const DEFAULT_GUEST_CID: u64 = 3; const DEFAULT_TX_BUFFER_SIZE: u32 = 64 * 1024; +const DEFAULT_GROUP_NAME: &str = "default"; #[derive(Debug, ThisError)] enum CliError { @@ -78,6 +79,17 @@ struct VsockParam { /// The size of the buffer used for the TX virtqueue #[clap(long, default_value_t = DEFAULT_TX_BUFFER_SIZE, conflicts_with = "config", conflicts_with = "vm")] tx_buffer_size: u32, + + /// The list of group names to which the device belongs. + /// A group is a set of devices that allow sibling communication between their guests. + #[arg( + long, + default_value_t = String::from(DEFAULT_GROUP_NAME), + conflicts_with = "config", + conflicts_with = "vm", + verbatim_doc_comment + )] + groups: String, } #[derive(Clone, Debug, Deserialize)] @@ -86,6 +98,7 @@ struct ConfigFileVsockParam { socket: String, uds_path: String, tx_buffer_size: Option, + groups: Option, } #[derive(Parser, Debug)] @@ -95,8 +108,9 @@ struct VsockArgs { param: Option, /// Device parameters corresponding to a VM in the form of comma separated key=value pairs. - /// The allowed keys are: guest_cid, socket, uds_path and tx_buffer_size - /// Example: --vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock,tx-buffer-size=65536 + /// The allowed keys are: guest_cid, socket, uds_path, tx_buffer_size and group. + /// Example: + /// --vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock,tx-buffer-size=65536,groups=group1+group2 /// Multiple instances of this argument can be provided to configure devices for multiple guests. #[arg(long, conflicts_with = "config", verbatim_doc_comment, value_parser = parse_vm_params)] vm: Option>, @@ -111,6 +125,7 @@ fn parse_vm_params(s: &str) -> Result { let mut socket = None; let mut uds_path = None; let mut tx_buffer_size = None; + let mut groups = None; for arg in s.trim().split(',') { let mut parts = arg.split('='); @@ -126,6 +141,7 @@ fn parse_vm_params(s: &str) -> Result { "tx_buffer_size" | "tx-buffer-size" => { tx_buffer_size = Some(val.parse().map_err(VmArgsParseError::ParseInteger)?) } + "groups" => groups = Some(val.split('+').map(String::from).collect()), _ => return Err(VmArgsParseError::InvalidKey(key.to_string())), } } @@ -135,6 +151,7 @@ fn parse_vm_params(s: &str) -> Result { socket.ok_or_else(|| VmArgsParseError::RequiredKeyNotFound("socket".to_string()))?, uds_path.ok_or_else(|| VmArgsParseError::RequiredKeyNotFound("uds-path".to_string()))?, tx_buffer_size.unwrap_or(DEFAULT_TX_BUFFER_SIZE), + groups.unwrap_or(vec![DEFAULT_GROUP_NAME.to_string()]), )) } @@ -155,6 +172,9 @@ impl VsockArgs { p.socket.trim().to_string(), p.uds_path.trim().to_string(), p.tx_buffer_size.unwrap_or(DEFAULT_TX_BUFFER_SIZE), + p.groups.map_or(vec![DEFAULT_GROUP_NAME.to_string()], |g| { + g.trim().split('+').map(String::from).collect() + }), ) }) .collect(); @@ -185,6 +205,7 @@ impl TryFrom for Vec { p.socket.trim().to_string(), p.uds_path.trim().to_string(), p.tx_buffer_size, + p.groups.trim().split('+').map(String::from).collect(), )]) }), }, @@ -289,13 +310,20 @@ mod tests { use tempfile::tempdir; impl VsockArgs { - fn from_args(guest_cid: u64, socket: &str, uds_path: &str, tx_buffer_size: u32) -> Self { + fn from_args( + guest_cid: u64, + socket: &str, + uds_path: &str, + tx_buffer_size: u32, + groups: &str, + ) -> Self { VsockArgs { param: Some(VsockParam { guest_cid, socket: socket.to_string(), uds_path: uds_path.to_string(), tx_buffer_size, + groups: groups.to_string(), }), vm: None, config: None, @@ -316,7 +344,7 @@ mod tests { let socket_path = test_dir.path().join("vhost4.socket").display().to_string(); let uds_path = test_dir.path().join("vm4.vsock").display().to_string(); - let args = VsockArgs::from_args(3, &socket_path, &uds_path, 64 * 1024); + let args = VsockArgs::from_args(3, &socket_path, &uds_path, 64 * 1024, "group1"); let configs = Vec::::try_from(args); assert!(configs.is_ok()); @@ -329,6 +357,7 @@ mod tests { assert_eq!(config.get_socket_path(), socket_path); assert_eq!(config.get_uds_path(), uds_path); assert_eq!(config.get_tx_buffer_size(), 64 * 1024); + assert_eq!(config.get_groups(), vec!["group1".to_string()]); test_dir.close().unwrap(); } @@ -349,8 +378,8 @@ mod tests { ]; let params = format!( "--vm socket={vhost3_socket},uds_path={vm3_vsock} \ - --vm socket={vhost4_socket},uds-path={vm4_vsock},guest-cid=4,tx_buffer_size=65536 \ - --vm guest-cid=5,socket={vhost5_socket},uds_path={vm5_vsock},tx-buffer-size=32768", + --vm socket={vhost4_socket},uds-path={vm4_vsock},guest-cid=4,tx_buffer_size=65536,groups=group1 \ + --vm groups=group2+group3,guest-cid=5,socket={vhost5_socket},uds_path={vm5_vsock},tx-buffer-size=32768", vhost3_socket = socket_paths[0].display(), vhost4_socket = socket_paths[1].display(), vhost5_socket = socket_paths[2].display(), @@ -378,6 +407,7 @@ mod tests { ); assert_eq!(config.get_uds_path(), uds_paths[0].display().to_string()); assert_eq!(config.get_tx_buffer_size(), 65536); + assert_eq!(config.get_groups(), vec![DEFAULT_GROUP_NAME.to_string()]); let config = configs.get(1).unwrap(); assert_eq!(config.get_guest_cid(), 4); @@ -387,6 +417,7 @@ mod tests { ); assert_eq!(config.get_uds_path(), uds_paths[1].display().to_string()); assert_eq!(config.get_tx_buffer_size(), 65536); + assert_eq!(config.get_groups(), vec!["group1".to_string()]); let config = configs.get(2).unwrap(); assert_eq!(config.get_guest_cid(), 5); @@ -396,6 +427,10 @@ mod tests { ); assert_eq!(config.get_uds_path(), uds_paths[2].display().to_string()); assert_eq!(config.get_tx_buffer_size(), 32768); + assert_eq!( + config.get_groups(), + vec!["group2".to_string(), "group3".to_string()] + ); test_dir.close().unwrap(); } @@ -415,7 +450,8 @@ mod tests { - guest_cid: 4 socket: {} uds_path: {} - tx_buffer_size: 32768", + tx_buffer_size: 32768 + groups: group1+group2", socket_path.display(), uds_path.display(), ) @@ -432,6 +468,10 @@ mod tests { assert_eq!(config.get_socket_path(), socket_path.display().to_string()); assert_eq!(config.get_uds_path(), uds_path.display().to_string()); assert_eq!(config.get_tx_buffer_size(), 32768); + assert_eq!( + config.get_groups(), + vec!["group1".to_string(), "group2".to_string()] + ); // Now test that optional parameters are correctly set to their default values. let mut yaml = File::create(&config_path).unwrap(); @@ -456,6 +496,7 @@ mod tests { assert_eq!(config.get_socket_path(), socket_path.display().to_string()); assert_eq!(config.get_uds_path(), uds_path.display().to_string()); assert_eq!(config.get_tx_buffer_size(), DEFAULT_TX_BUFFER_SIZE); + assert_eq!(config.get_groups(), vec![DEFAULT_GROUP_NAME.to_string()]); std::fs::remove_file(&config_path).unwrap(); test_dir.close().unwrap(); @@ -479,7 +520,13 @@ mod tests { .display() .to_string(); - let config = VsockConfig::new(CID, vhost_socket_path, vsock_socket_path, CONN_TX_BUF_SIZE); + let config = VsockConfig::new( + CID, + vhost_socket_path, + vsock_socket_path, + CONN_TX_BUF_SIZE, + vec![DEFAULT_GROUP_NAME.to_string()], + ); let cid_map: Arc> = Arc::new(RwLock::new(HashMap::new())); diff --git a/crates/vsock/src/thread_backend.rs b/crates/vsock/src/thread_backend.rs index 1369122..6d5e80e 100644 --- a/crates/vsock/src/thread_backend.rs +++ b/crates/vsock/src/thread_backend.rs @@ -2,6 +2,7 @@ use std::{ collections::{HashMap, HashSet, VecDeque}, + ops::Deref, os::unix::{ net::UnixStream, prelude::{AsRawFd, RawFd}, @@ -70,6 +71,8 @@ pub(crate) struct VsockThreadBackend { pub cid_map: Arc>, /// Queue of raw vsock packets recieved from sibling VMs to be sent to the guest. pub raw_pkts_queue: Arc>, + /// Set of groups assigned to the device which it is allowed to communicate with. + groups_set: Arc>>, } impl VsockThreadBackend { @@ -79,6 +82,7 @@ impl VsockThreadBackend { epoll_fd: i32, guest_cid: u64, tx_buffer_size: u32, + groups_set: Arc>>, cid_map: Arc>, ) -> Self { Self { @@ -95,6 +99,7 @@ impl VsockThreadBackend { tx_buffer_size, cid_map, raw_pkts_queue: Arc::new(RwLock::new(VecDeque::new())), + groups_set, } } @@ -180,7 +185,21 @@ impl VsockThreadBackend { if dst_cid != VSOCK_HOST_CID { let cid_map = self.cid_map.read().unwrap(); if cid_map.contains_key(&dst_cid) { - let (sibling_raw_pkts_queue, sibling_event_fd) = cid_map.get(&dst_cid).unwrap(); + let (sibling_raw_pkts_queue, sibling_groups_set, sibling_event_fd) = + cid_map.get(&dst_cid).unwrap(); + + if self + .groups_set + .read() + .unwrap() + .is_disjoint(sibling_groups_set.read().unwrap().deref()) + { + info!( + "vsock: dropping packet for cid: {:?} due to group mismatch", + dst_cid + ); + return Ok(()); + } sibling_raw_pkts_queue .write() @@ -337,6 +356,7 @@ mod tests { const DATA_LEN: usize = 16; const CONN_TX_BUF_SIZE: u32 = 64 * 1024; + const GROUP_NAME: &str = "default"; #[test] fn test_vsock_thread_backend() { @@ -352,6 +372,8 @@ mod tests { let epoll_fd = epoll::create(false).unwrap(); + let groups_set: HashSet = vec![GROUP_NAME.to_string()].into_iter().collect(); + let cid_map: Arc> = Arc::new(RwLock::new(HashMap::new())); let mut vtp = VsockThreadBackend::new( @@ -359,6 +381,7 @@ mod tests { epoll_fd, CID, CONN_TX_BUF_SIZE, + Arc::new(RwLock::new(groups_set)), cid_map, ); @@ -434,14 +457,30 @@ mod tests { sibling_vhost_socket_path, sibling_vsock_socket_path, CONN_TX_BUF_SIZE, + vec!["group1", "group2", "group3"] + .into_iter() + .map(String::from) + .collect(), ); let sibling_backend = Arc::new(VhostUserVsockBackend::new(sibling_config, cid_map.clone()).unwrap()); let epoll_fd = epoll::create(false).unwrap(); - let mut vtp = - VsockThreadBackend::new(vsock_socket_path, epoll_fd, CID, CONN_TX_BUF_SIZE, cid_map); + + let groups_set: HashSet = vec!["groupA", "groupB", "group3"] + .into_iter() + .map(String::from) + .collect(); + + let mut vtp = VsockThreadBackend::new( + vsock_socket_path, + epoll_fd, + CID, + CONN_TX_BUF_SIZE, + Arc::new(RwLock::new(groups_set)), + cid_map, + ); assert!(!vtp.pending_raw_pkts()); diff --git a/crates/vsock/src/vhu_vsock.rs b/crates/vsock/src/vhu_vsock.rs index c64f11f..0d25b38 100644 --- a/crates/vsock/src/vhu_vsock.rs +++ b/crates/vsock/src/vhu_vsock.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, io::{self, Result as IoResult}, sync::{Arc, Mutex, RwLock}, u16, u32, u64, u8, @@ -24,7 +24,8 @@ use vmm_sys_util::{ use crate::thread_backend::RawPktsQ; use crate::vhu_vsock_thread::*; -pub(crate) type CidMap = HashMap>, EventFd)>; +pub(crate) type CidMap = + HashMap>, Arc>>, EventFd)>; const NUM_QUEUES: usize = 3; const QUEUE_SIZE: usize = 256; @@ -150,17 +151,25 @@ pub(crate) struct VsockConfig { socket: String, uds_path: String, tx_buffer_size: u32, + groups: Vec, } impl VsockConfig { /// Create a new instance of the VsockConfig struct, containing the /// parameters to be fed into the vsock-backend server. - pub fn new(guest_cid: u64, socket: String, uds_path: String, tx_buffer_size: u32) -> Self { + pub fn new( + guest_cid: u64, + socket: String, + uds_path: String, + tx_buffer_size: u32, + groups: Vec, + ) -> Self { Self { guest_cid, socket, uds_path, tx_buffer_size, + groups, } } @@ -184,6 +193,10 @@ impl VsockConfig { pub fn get_tx_buffer_size(&self) -> u32 { self.tx_buffer_size } + + pub fn get_groups(&self) -> Vec { + self.groups.clone() + } } /// A local port and peer port pair used to retrieve @@ -227,6 +240,7 @@ impl VhostUserVsockBackend { config.get_uds_path(), config.get_guest_cid(), config.get_tx_buffer_size(), + config.get_groups(), cid_map, )?); let queues_per_thread = vec![QUEUE_MASK]; @@ -364,6 +378,8 @@ mod tests { fn test_vsock_backend() { const CID: u64 = 3; + let groups_list: Vec = vec![String::from("default")]; + let test_dir = tempdir().expect("Could not create a temp test directory."); let vhost_socket_path = test_dir @@ -382,6 +398,7 @@ mod tests { vhost_socket_path.to_string(), vsock_socket_path.to_string(), CONN_TX_BUF_SIZE, + groups_list, ); let cid_map: Arc> = Arc::new(RwLock::new(HashMap::new())); @@ -451,6 +468,8 @@ mod tests { fn test_vsock_backend_failures() { const CID: u64 = 3; + let groups: Vec = vec![String::from("default")]; + let test_dir = tempdir().expect("Could not create a temp test directory."); let vhost_socket_path = test_dir @@ -469,6 +488,7 @@ mod tests { "/sys/not_allowed.socket".to_string(), "/sys/not_allowed.vsock".to_string(), CONN_TX_BUF_SIZE, + groups.clone(), ); let cid_map: Arc> = Arc::new(RwLock::new(HashMap::new())); @@ -481,6 +501,7 @@ mod tests { vhost_socket_path.to_string(), vsock_socket_path.to_string(), CONN_TX_BUF_SIZE, + groups, ); let backend = VhostUserVsockBackend::new(config, cid_map).unwrap(); diff --git a/crates/vsock/src/vhu_vsock_thread.rs b/crates/vsock/src/vhu_vsock_thread.rs index 726e2b6..0c2ab2f 100644 --- a/crates/vsock/src/vhu_vsock_thread.rs +++ b/crates/vsock/src/vhu_vsock_thread.rs @@ -1,9 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause use std::{ + collections::HashSet, fs::File, io, io::Read, + iter::FromIterator, num::Wrapping, ops::Deref, os::unix::{ @@ -79,6 +81,7 @@ impl VhostUserVsockThread { uds_path: String, guest_cid: u64, tx_buffer_size: u32, + groups: Vec, cid_map: Arc>, ) -> Result { // TODO: better error handling, maybe add a param to force the unlink @@ -93,6 +96,10 @@ impl VhostUserVsockThread { let host_raw_fd = host_sock.as_raw_fd(); + let mut groups = groups; + let groups_set: Arc>> = + Arc::new(RwLock::new(HashSet::from_iter(groups.drain(..)))); + let sibling_event_fd = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?; let thread_backend = VsockThreadBackend::new( @@ -100,6 +107,7 @@ impl VhostUserVsockThread { epoll_fd, guest_cid, tx_buffer_size, + groups_set.clone(), cid_map.clone(), ); @@ -107,6 +115,7 @@ impl VhostUserVsockThread { guest_cid, ( thread_backend.raw_pkts_queue.clone(), + groups_set, sibling_event_fd.try_clone().unwrap(), ), ); @@ -723,6 +732,8 @@ mod tests { #[test] fn test_vsock_thread() { + let groups: Vec = vec![String::from("default")]; + let cid_map: Arc> = Arc::new(RwLock::new(HashMap::new())); let test_dir = tempdir().expect("Could not create a temp test directory."); @@ -735,6 +746,7 @@ mod tests { .to_string(), 3, CONN_TX_BUF_SIZE, + groups, cid_map, ); assert!(t.is_ok()); @@ -792,6 +804,8 @@ mod tests { #[test] fn test_vsock_thread_failures() { + let groups: Vec = vec![String::from("default")]; + let cid_map: Arc> = Arc::new(RwLock::new(HashMap::new())); let test_dir = tempdir().expect("Could not create a temp test directory."); @@ -800,6 +814,7 @@ mod tests { "/sys/not_allowed.vsock".to_string(), 3, CONN_TX_BUF_SIZE, + groups.clone(), cid_map.clone(), ); assert!(t.is_err()); @@ -810,7 +825,8 @@ mod tests { .display() .to_string(); let mut t = - VhostUserVsockThread::new(vsock_socket_path, 3, CONN_TX_BUF_SIZE, cid_map).unwrap(); + VhostUserVsockThread::new(vsock_socket_path, 3, CONN_TX_BUF_SIZE, groups, cid_map) + .unwrap(); assert!(VhostUserVsockThread::epoll_register(-1, -1, epoll::Events::EPOLLIN).is_err()); assert!(VhostUserVsockThread::epoll_modify(-1, -1, epoll::Events::EPOLLIN).is_err()); assert!(VhostUserVsockThread::epoll_unregister(-1, -1).is_err());