vsock: Add VM groups in sibling communication

Restrict the VMs a given VM can communicate with by introducing VM groups.
A group is simply a list of names assigned to the device in the
configuration. A VM can communicate with another VM only if the list of
group names assigned to their devices have atleast one group name in
common.

Signed-off-by: Priyansh Rathi <techiepriyansh@gmail.com>
This commit is contained in:
Priyansh Rathi 2023-07-24 01:07:18 -07:00 committed by Viresh Kumar
parent bd59ff2462
commit decfd8bd73
5 changed files with 157 additions and 22 deletions

View File

@ -43,17 +43,18 @@ Run the vhost-device-vsock device:
vhost-device-vsock --guest-cid=<CID assigned to the guest> \
--socket=<path to the Unix socket to be created to communicate with the VMM via the vhost-user protocol> \
--uds-path=<path to the Unix socket to communicate with the guest via the virtio-vsock device> \
[--tx-buffer-size=<size of the buffer used for the TX virtqueue (guest->host packets)>]
[--tx-buffer-size=<size of the buffer used for the TX virtqueue (guest->host packets)>] \
[--groups=<list of group names to which the device belongs concatenated with '+' delimiter>]
```
or
```
vhost-device-vsock --vm guest_cid=<CID assigned to the guest>,socket=<path to the Unix socket to be created to communicate with the VMM via the vhost-user protocol>,uds-path=<path to the Unix socket to communicate with the guest via the virtio-vsock device>[,tx-buffer-size=<size of the buffer used for the TX virtqueue (guest->host packets)>]
vhost-device-vsock --vm guest_cid=<CID assigned to the guest>,socket=<path to the Unix socket to be created to communicate with the VMM via the vhost-user protocol>,uds-path=<path to the Unix socket to communicate with the guest via the virtio-vsock device>[,tx-buffer-size=<size of the buffer used for the TX virtqueue (guest->host packets)>][,groups=<list of group names to which the device belongs concatenated with '+' delimiter>]
```
Specify the `--vm` argument multiple times to specify multiple devices like this:
```
vhost-device-vsock \
--vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock \
--vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock,groups=group1+groupA \
--vm guest-cid=4,socket=/tmp/vhost4.socket,uds-path=/tmp/vm4.vsock,tx-buffer-size=32768
```
@ -69,10 +70,12 @@ vms:
socket: /tmp/vhost3.socket
uds_path: /tmp/vm3.sock
tx_buffer_size: 65536
groups: group1+groupA
- guest_cid: 4
socket: /tmp/vhost4.socket
uds_path: /tmp/vm4.sock
tx_buffer_size: 32768
groups: group2+groupB
```
Run VMM (e.g. QEMU):
@ -144,12 +147,17 @@ guest$ nc --vsock 2 1234
### Sibling VM communication
If you add multiple VMs, they can communicate with each other. For example, if you have two VMs with
CID 3 and 4, you can run the following commands to make them communicate:
If you add multiple VMs with their devices configured with at least one common group name, they can communicate with
each other. If you don't explicitly specify a group name, a default group will be assigned to the device with name
`default`, and all such devices will be able to communicate with each other. Or you can choose a different list of
group names for each device, and only devices with the at least one group in commmon will be able to communicate with
each other.
For example, if you have two VMs with CID 3 and 4, you can run the following commands to make them communicate:
```sh
shell1$ vhost-device-vsock --vm guest-cid=3,uds-path=/tmp/vm3.vsock,socket=/tmp/vhost3.socket \
--vm guest-cid=4,uds-path=/tmp/vm4.vsock,socket=/tmp/vhost4.socket
shell1$ vhost-device-vsock --vm guest-cid=3,uds-path=/tmp/vm3.vsock,socket=/tmp/vhost3.socket,groups=group1+group2 \
--vm guest-cid=4,uds-path=/tmp/vm4.vsock,socket=/tmp/vhost4.socket,groups=group1
shell2$ qemu-system-x86_64 \
-drive file=vm1.qcow2,format=qcow2,if=virtio -smp 2 -m 512M -mem-prealloc \
-object memory-backend-file,share=on,id=mem0,size=512M,mem-path="/dev/hugepages" \
@ -164,6 +172,10 @@ shell3$ qemu-system-x86_64 \
-device vhost-user-vsock-pci,chardev=char0
```
Please note that here the `groups` parameter is specified just for clarity, but it is not necessary to specify it if you want
to use the default group and make all the devices communicate with one another. It is useful to specify a list of groups
when you want fine-grained control over which devices can communicate with each other.
```sh
# nc-vsock patched to set `.svm_flags = VMADDR_FLAG_TO_HOST`
guest_cid3$ nc-vsock -l 1234

View File

@ -27,6 +27,7 @@ use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap};
const DEFAULT_GUEST_CID: u64 = 3;
const DEFAULT_TX_BUFFER_SIZE: u32 = 64 * 1024;
const DEFAULT_GROUP_NAME: &str = "default";
#[derive(Debug, ThisError)]
enum CliError {
@ -78,6 +79,17 @@ struct VsockParam {
/// The size of the buffer used for the TX virtqueue
#[clap(long, default_value_t = DEFAULT_TX_BUFFER_SIZE, conflicts_with = "config", conflicts_with = "vm")]
tx_buffer_size: u32,
/// The list of group names to which the device belongs.
/// A group is a set of devices that allow sibling communication between their guests.
#[arg(
long,
default_value_t = String::from(DEFAULT_GROUP_NAME),
conflicts_with = "config",
conflicts_with = "vm",
verbatim_doc_comment
)]
groups: String,
}
#[derive(Clone, Debug, Deserialize)]
@ -86,6 +98,7 @@ struct ConfigFileVsockParam {
socket: String,
uds_path: String,
tx_buffer_size: Option<u32>,
groups: Option<String>,
}
#[derive(Parser, Debug)]
@ -95,8 +108,9 @@ struct VsockArgs {
param: Option<VsockParam>,
/// Device parameters corresponding to a VM in the form of comma separated key=value pairs.
/// The allowed keys are: guest_cid, socket, uds_path and tx_buffer_size
/// Example: --vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock,tx-buffer-size=65536
/// The allowed keys are: guest_cid, socket, uds_path, tx_buffer_size and group.
/// Example:
/// --vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock,tx-buffer-size=65536,groups=group1+group2
/// Multiple instances of this argument can be provided to configure devices for multiple guests.
#[arg(long, conflicts_with = "config", verbatim_doc_comment, value_parser = parse_vm_params)]
vm: Option<Vec<VsockConfig>>,
@ -111,6 +125,7 @@ fn parse_vm_params(s: &str) -> Result<VsockConfig, VmArgsParseError> {
let mut socket = None;
let mut uds_path = None;
let mut tx_buffer_size = None;
let mut groups = None;
for arg in s.trim().split(',') {
let mut parts = arg.split('=');
@ -126,6 +141,7 @@ fn parse_vm_params(s: &str) -> Result<VsockConfig, VmArgsParseError> {
"tx_buffer_size" | "tx-buffer-size" => {
tx_buffer_size = Some(val.parse().map_err(VmArgsParseError::ParseInteger)?)
}
"groups" => groups = Some(val.split('+').map(String::from).collect()),
_ => return Err(VmArgsParseError::InvalidKey(key.to_string())),
}
}
@ -135,6 +151,7 @@ fn parse_vm_params(s: &str) -> Result<VsockConfig, VmArgsParseError> {
socket.ok_or_else(|| VmArgsParseError::RequiredKeyNotFound("socket".to_string()))?,
uds_path.ok_or_else(|| VmArgsParseError::RequiredKeyNotFound("uds-path".to_string()))?,
tx_buffer_size.unwrap_or(DEFAULT_TX_BUFFER_SIZE),
groups.unwrap_or(vec![DEFAULT_GROUP_NAME.to_string()]),
))
}
@ -155,6 +172,9 @@ impl VsockArgs {
p.socket.trim().to_string(),
p.uds_path.trim().to_string(),
p.tx_buffer_size.unwrap_or(DEFAULT_TX_BUFFER_SIZE),
p.groups.map_or(vec![DEFAULT_GROUP_NAME.to_string()], |g| {
g.trim().split('+').map(String::from).collect()
}),
)
})
.collect();
@ -185,6 +205,7 @@ impl TryFrom<VsockArgs> for Vec<VsockConfig> {
p.socket.trim().to_string(),
p.uds_path.trim().to_string(),
p.tx_buffer_size,
p.groups.trim().split('+').map(String::from).collect(),
)])
}),
},
@ -289,13 +310,20 @@ mod tests {
use tempfile::tempdir;
impl VsockArgs {
fn from_args(guest_cid: u64, socket: &str, uds_path: &str, tx_buffer_size: u32) -> Self {
fn from_args(
guest_cid: u64,
socket: &str,
uds_path: &str,
tx_buffer_size: u32,
groups: &str,
) -> Self {
VsockArgs {
param: Some(VsockParam {
guest_cid,
socket: socket.to_string(),
uds_path: uds_path.to_string(),
tx_buffer_size,
groups: groups.to_string(),
}),
vm: None,
config: None,
@ -316,7 +344,7 @@ mod tests {
let socket_path = test_dir.path().join("vhost4.socket").display().to_string();
let uds_path = test_dir.path().join("vm4.vsock").display().to_string();
let args = VsockArgs::from_args(3, &socket_path, &uds_path, 64 * 1024);
let args = VsockArgs::from_args(3, &socket_path, &uds_path, 64 * 1024, "group1");
let configs = Vec::<VsockConfig>::try_from(args);
assert!(configs.is_ok());
@ -329,6 +357,7 @@ mod tests {
assert_eq!(config.get_socket_path(), socket_path);
assert_eq!(config.get_uds_path(), uds_path);
assert_eq!(config.get_tx_buffer_size(), 64 * 1024);
assert_eq!(config.get_groups(), vec!["group1".to_string()]);
test_dir.close().unwrap();
}
@ -349,8 +378,8 @@ mod tests {
];
let params = format!(
"--vm socket={vhost3_socket},uds_path={vm3_vsock} \
--vm socket={vhost4_socket},uds-path={vm4_vsock},guest-cid=4,tx_buffer_size=65536 \
--vm guest-cid=5,socket={vhost5_socket},uds_path={vm5_vsock},tx-buffer-size=32768",
--vm socket={vhost4_socket},uds-path={vm4_vsock},guest-cid=4,tx_buffer_size=65536,groups=group1 \
--vm groups=group2+group3,guest-cid=5,socket={vhost5_socket},uds_path={vm5_vsock},tx-buffer-size=32768",
vhost3_socket = socket_paths[0].display(),
vhost4_socket = socket_paths[1].display(),
vhost5_socket = socket_paths[2].display(),
@ -378,6 +407,7 @@ mod tests {
);
assert_eq!(config.get_uds_path(), uds_paths[0].display().to_string());
assert_eq!(config.get_tx_buffer_size(), 65536);
assert_eq!(config.get_groups(), vec![DEFAULT_GROUP_NAME.to_string()]);
let config = configs.get(1).unwrap();
assert_eq!(config.get_guest_cid(), 4);
@ -387,6 +417,7 @@ mod tests {
);
assert_eq!(config.get_uds_path(), uds_paths[1].display().to_string());
assert_eq!(config.get_tx_buffer_size(), 65536);
assert_eq!(config.get_groups(), vec!["group1".to_string()]);
let config = configs.get(2).unwrap();
assert_eq!(config.get_guest_cid(), 5);
@ -396,6 +427,10 @@ mod tests {
);
assert_eq!(config.get_uds_path(), uds_paths[2].display().to_string());
assert_eq!(config.get_tx_buffer_size(), 32768);
assert_eq!(
config.get_groups(),
vec!["group2".to_string(), "group3".to_string()]
);
test_dir.close().unwrap();
}
@ -415,7 +450,8 @@ mod tests {
- guest_cid: 4
socket: {}
uds_path: {}
tx_buffer_size: 32768",
tx_buffer_size: 32768
groups: group1+group2",
socket_path.display(),
uds_path.display(),
)
@ -432,6 +468,10 @@ mod tests {
assert_eq!(config.get_socket_path(), socket_path.display().to_string());
assert_eq!(config.get_uds_path(), uds_path.display().to_string());
assert_eq!(config.get_tx_buffer_size(), 32768);
assert_eq!(
config.get_groups(),
vec!["group1".to_string(), "group2".to_string()]
);
// Now test that optional parameters are correctly set to their default values.
let mut yaml = File::create(&config_path).unwrap();
@ -456,6 +496,7 @@ mod tests {
assert_eq!(config.get_socket_path(), socket_path.display().to_string());
assert_eq!(config.get_uds_path(), uds_path.display().to_string());
assert_eq!(config.get_tx_buffer_size(), DEFAULT_TX_BUFFER_SIZE);
assert_eq!(config.get_groups(), vec![DEFAULT_GROUP_NAME.to_string()]);
std::fs::remove_file(&config_path).unwrap();
test_dir.close().unwrap();
@ -479,7 +520,13 @@ mod tests {
.display()
.to_string();
let config = VsockConfig::new(CID, vhost_socket_path, vsock_socket_path, CONN_TX_BUF_SIZE);
let config = VsockConfig::new(
CID,
vhost_socket_path,
vsock_socket_path,
CONN_TX_BUF_SIZE,
vec![DEFAULT_GROUP_NAME.to_string()],
);
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));

View File

@ -2,6 +2,7 @@
use std::{
collections::{HashMap, HashSet, VecDeque},
ops::Deref,
os::unix::{
net::UnixStream,
prelude::{AsRawFd, RawFd},
@ -70,6 +71,8 @@ pub(crate) struct VsockThreadBackend {
pub cid_map: Arc<RwLock<CidMap>>,
/// Queue of raw vsock packets recieved from sibling VMs to be sent to the guest.
pub raw_pkts_queue: Arc<RwLock<RawPktsQ>>,
/// Set of groups assigned to the device which it is allowed to communicate with.
groups_set: Arc<RwLock<HashSet<String>>>,
}
impl VsockThreadBackend {
@ -79,6 +82,7 @@ impl VsockThreadBackend {
epoll_fd: i32,
guest_cid: u64,
tx_buffer_size: u32,
groups_set: Arc<RwLock<HashSet<String>>>,
cid_map: Arc<RwLock<CidMap>>,
) -> Self {
Self {
@ -95,6 +99,7 @@ impl VsockThreadBackend {
tx_buffer_size,
cid_map,
raw_pkts_queue: Arc::new(RwLock::new(VecDeque::new())),
groups_set,
}
}
@ -180,7 +185,21 @@ impl VsockThreadBackend {
if dst_cid != VSOCK_HOST_CID {
let cid_map = self.cid_map.read().unwrap();
if cid_map.contains_key(&dst_cid) {
let (sibling_raw_pkts_queue, sibling_event_fd) = cid_map.get(&dst_cid).unwrap();
let (sibling_raw_pkts_queue, sibling_groups_set, sibling_event_fd) =
cid_map.get(&dst_cid).unwrap();
if self
.groups_set
.read()
.unwrap()
.is_disjoint(sibling_groups_set.read().unwrap().deref())
{
info!(
"vsock: dropping packet for cid: {:?} due to group mismatch",
dst_cid
);
return Ok(());
}
sibling_raw_pkts_queue
.write()
@ -337,6 +356,7 @@ mod tests {
const DATA_LEN: usize = 16;
const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
const GROUP_NAME: &str = "default";
#[test]
fn test_vsock_thread_backend() {
@ -352,6 +372,8 @@ mod tests {
let epoll_fd = epoll::create(false).unwrap();
let groups_set: HashSet<String> = vec![GROUP_NAME.to_string()].into_iter().collect();
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let mut vtp = VsockThreadBackend::new(
@ -359,6 +381,7 @@ mod tests {
epoll_fd,
CID,
CONN_TX_BUF_SIZE,
Arc::new(RwLock::new(groups_set)),
cid_map,
);
@ -434,14 +457,30 @@ mod tests {
sibling_vhost_socket_path,
sibling_vsock_socket_path,
CONN_TX_BUF_SIZE,
vec!["group1", "group2", "group3"]
.into_iter()
.map(String::from)
.collect(),
);
let sibling_backend =
Arc::new(VhostUserVsockBackend::new(sibling_config, cid_map.clone()).unwrap());
let epoll_fd = epoll::create(false).unwrap();
let mut vtp =
VsockThreadBackend::new(vsock_socket_path, epoll_fd, CID, CONN_TX_BUF_SIZE, cid_map);
let groups_set: HashSet<String> = vec!["groupA", "groupB", "group3"]
.into_iter()
.map(String::from)
.collect();
let mut vtp = VsockThreadBackend::new(
vsock_socket_path,
epoll_fd,
CID,
CONN_TX_BUF_SIZE,
Arc::new(RwLock::new(groups_set)),
cid_map,
);
assert!(!vtp.pending_raw_pkts());

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use std::{
collections::HashMap,
collections::{HashMap, HashSet},
io::{self, Result as IoResult},
sync::{Arc, Mutex, RwLock},
u16, u32, u64, u8,
@ -24,7 +24,8 @@ use vmm_sys_util::{
use crate::thread_backend::RawPktsQ;
use crate::vhu_vsock_thread::*;
pub(crate) type CidMap = HashMap<u64, (Arc<RwLock<RawPktsQ>>, EventFd)>;
pub(crate) type CidMap =
HashMap<u64, (Arc<RwLock<RawPktsQ>>, Arc<RwLock<HashSet<String>>>, EventFd)>;
const NUM_QUEUES: usize = 3;
const QUEUE_SIZE: usize = 256;
@ -150,17 +151,25 @@ pub(crate) struct VsockConfig {
socket: String,
uds_path: String,
tx_buffer_size: u32,
groups: Vec<String>,
}
impl VsockConfig {
/// Create a new instance of the VsockConfig struct, containing the
/// parameters to be fed into the vsock-backend server.
pub fn new(guest_cid: u64, socket: String, uds_path: String, tx_buffer_size: u32) -> Self {
pub fn new(
guest_cid: u64,
socket: String,
uds_path: String,
tx_buffer_size: u32,
groups: Vec<String>,
) -> Self {
Self {
guest_cid,
socket,
uds_path,
tx_buffer_size,
groups,
}
}
@ -184,6 +193,10 @@ impl VsockConfig {
pub fn get_tx_buffer_size(&self) -> u32 {
self.tx_buffer_size
}
pub fn get_groups(&self) -> Vec<String> {
self.groups.clone()
}
}
/// A local port and peer port pair used to retrieve
@ -227,6 +240,7 @@ impl VhostUserVsockBackend {
config.get_uds_path(),
config.get_guest_cid(),
config.get_tx_buffer_size(),
config.get_groups(),
cid_map,
)?);
let queues_per_thread = vec![QUEUE_MASK];
@ -364,6 +378,8 @@ mod tests {
fn test_vsock_backend() {
const CID: u64 = 3;
let groups_list: Vec<String> = vec![String::from("default")];
let test_dir = tempdir().expect("Could not create a temp test directory.");
let vhost_socket_path = test_dir
@ -382,6 +398,7 @@ mod tests {
vhost_socket_path.to_string(),
vsock_socket_path.to_string(),
CONN_TX_BUF_SIZE,
groups_list,
);
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
@ -451,6 +468,8 @@ mod tests {
fn test_vsock_backend_failures() {
const CID: u64 = 3;
let groups: Vec<String> = vec![String::from("default")];
let test_dir = tempdir().expect("Could not create a temp test directory.");
let vhost_socket_path = test_dir
@ -469,6 +488,7 @@ mod tests {
"/sys/not_allowed.socket".to_string(),
"/sys/not_allowed.vsock".to_string(),
CONN_TX_BUF_SIZE,
groups.clone(),
);
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
@ -481,6 +501,7 @@ mod tests {
vhost_socket_path.to_string(),
vsock_socket_path.to_string(),
CONN_TX_BUF_SIZE,
groups,
);
let backend = VhostUserVsockBackend::new(config, cid_map).unwrap();

View File

@ -1,9 +1,11 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use std::{
collections::HashSet,
fs::File,
io,
io::Read,
iter::FromIterator,
num::Wrapping,
ops::Deref,
os::unix::{
@ -79,6 +81,7 @@ impl VhostUserVsockThread {
uds_path: String,
guest_cid: u64,
tx_buffer_size: u32,
groups: Vec<String>,
cid_map: Arc<RwLock<CidMap>>,
) -> Result<Self> {
// TODO: better error handling, maybe add a param to force the unlink
@ -93,6 +96,10 @@ impl VhostUserVsockThread {
let host_raw_fd = host_sock.as_raw_fd();
let mut groups = groups;
let groups_set: Arc<RwLock<HashSet<String>>> =
Arc::new(RwLock::new(HashSet::from_iter(groups.drain(..))));
let sibling_event_fd = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
let thread_backend = VsockThreadBackend::new(
@ -100,6 +107,7 @@ impl VhostUserVsockThread {
epoll_fd,
guest_cid,
tx_buffer_size,
groups_set.clone(),
cid_map.clone(),
);
@ -107,6 +115,7 @@ impl VhostUserVsockThread {
guest_cid,
(
thread_backend.raw_pkts_queue.clone(),
groups_set,
sibling_event_fd.try_clone().unwrap(),
),
);
@ -723,6 +732,8 @@ mod tests {
#[test]
fn test_vsock_thread() {
let groups: Vec<String> = vec![String::from("default")];
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let test_dir = tempdir().expect("Could not create a temp test directory.");
@ -735,6 +746,7 @@ mod tests {
.to_string(),
3,
CONN_TX_BUF_SIZE,
groups,
cid_map,
);
assert!(t.is_ok());
@ -792,6 +804,8 @@ mod tests {
#[test]
fn test_vsock_thread_failures() {
let groups: Vec<String> = vec![String::from("default")];
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let test_dir = tempdir().expect("Could not create a temp test directory.");
@ -800,6 +814,7 @@ mod tests {
"/sys/not_allowed.vsock".to_string(),
3,
CONN_TX_BUF_SIZE,
groups.clone(),
cid_map.clone(),
);
assert!(t.is_err());
@ -810,7 +825,8 @@ mod tests {
.display()
.to_string();
let mut t =
VhostUserVsockThread::new(vsock_socket_path, 3, CONN_TX_BUF_SIZE, cid_map).unwrap();
VhostUserVsockThread::new(vsock_socket_path, 3, CONN_TX_BUF_SIZE, groups, cid_map)
.unwrap();
assert!(VhostUserVsockThread::epoll_register(-1, -1, epoll::Events::EPOLLIN).is_err());
assert!(VhostUserVsockThread::epoll_modify(-1, -1, epoll::Events::EPOLLIN).is_err());
assert!(VhostUserVsockThread::epoll_unregister(-1, -1).is_err());