diff --git a/Cargo.lock b/Cargo.lock index 517877a..3ebabed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,36 @@ dependencies = [ "termcolor", ] +[[package]] +name = "epoll" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20df693c700404f7e19d4d6fae6b15215d2913c27955d2b9d6f2c0f537511cd0" +dependencies = [ + "bitflags", + "libc", +] + +[[package]] +name = "fastrand" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "779d043b6a0b90cc4c0ed7ee380a6504394cee7efd7db050e3774eee387324b2" +dependencies = [ + "instant", +] + +[[package]] +name = "getrandom" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "hashbrown" version = "0.11.2" @@ -126,6 +156,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -162,6 +201,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -204,6 +249,45 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.5.4" @@ -221,6 +305,15 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + [[package]] name = "strsim" version = "0.10.0" @@ -238,6 +331,20 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", +] + [[package]] name = "termcolor" version = "1.1.2" @@ -314,6 +421,26 @@ dependencies = [ "vmm-sys-util", ] +[[package]] +name = "vhost-device-rng" +version = "0.1.0" +dependencies = [ + "clap", + "env_logger", + "epoll", + "libc", + "log", + "rand", + "tempfile", + "thiserror", + "vhost", + "vhost-user-backend", + "virtio-bindings", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] + [[package]] name = "vhost-user-backend" version = "0.1.0" @@ -367,6 +494,12 @@ dependencies = [ "libc", ] +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index cb85a75..92b4fd0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,4 +2,5 @@ members = [ "i2c", + "rng", ] diff --git a/README.md b/README.md index c6860dd..c4c6330 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ crates. Here is the list of device backends that we support: - [I2C](https://github.com/rust-vmm/vhost-device/blob/main/i2c/README.md) +- [RNG](https://github.com/rust-vmm/vhost-device/blob/main/rng/README.md) ## Testing and Code Coverage diff --git a/rng/Cargo.toml b/rng/Cargo.toml new file mode 100644 index 0000000..8578ec5 --- /dev/null +++ b/rng/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "vhost-device-rng" +version = "0.1.0" +authors = ["Mathieu Poirier "] +description = "vhost RNG backend device" +repository = "https://github.com/rust-vmm/vhost-device" +readme = "README.md" +keywords = ["rng", "vhost", "virt", "backend"] +license = "Apache-2.0 OR BSD-3-Clause" +edition = "2018" + +[dependencies] +clap = { version = ">=3.0", features = ["derive"] } +env_logger = ">=0.9" +epoll = "4.3" +libc = ">=0.2.95" +log = ">=0.4.6" +rand = ">=0.8.5" +tempfile = "3.2.0" +thiserror = "1.0" +vhost = { version = "0.3", features = ["vhost-user-slave"] } +vhost-user-backend = "0.1" +virtio-bindings = ">=0.1" +virtio-queue = "0.1" +vm-memory = ">=0.7" +vmm-sys-util = ">=0.9.0" + +[dev-dependencies] +virtio-queue = { version = "0.1", features = ["test-utils"] } +vm-memory = { version = "0.7.0", features = ["backend-mmap", "backend-atomic"] } diff --git a/rng/README.md b/rng/README.md new file mode 100644 index 0000000..bb1ee82 --- /dev/null +++ b/rng/README.md @@ -0,0 +1,73 @@ +# vhost-device-rng - RNG emulation backend daemon + +## Description +This program is a vhost-user backend that emulates a VirtIO random number +generator (RNG). It uses the host's random number generator pool, +/dev/urandom by default but configurable at will, to satisfy requests from +guests. + +The daemon is designed to respect limitation on possible random generator +hardware using the --max-bytes and --period options. As such 5 kilobyte per +second would translate to "--max-bytes 5000 --period 1000". If an application +requests more bytes than the allowed limit the thread will block until the +start of a new period. The daemon will automatically split the available +bandwidth equally between the guest when several threads are requested. + +Thought developed and tested with QEMU, the implemenation is based on the +vhost-user protocol and as such should be interoperable with other virtual +machine managers. Please see below for working examples. + +## Synopsis + +**vhost-device-rng** [*OPTIONS*] + +## Options + +.. program:: vhost-device-rng + +.. option:: -h, --help + + Print help. + +.. option:: -s, --socket-path=PATH + + Location of vhost-user Unix domain sockets, this path will be suffixed with + 0,1,2..socket_count-1. + +.. option:: -f, --filename + Random number generator source file, defaults to /dev/urandom. + +.. option:: -c, --socket-count=INT + + Number of guests (sockets) to attach to, default set to 1. + +.. option:: -p, --period + + Rate, in milliseconds, at which the RNG hardware can generate random data. + Used in conjunction with the --max-bytes option. + +.. option:: -m, --max-bytes + + In conjuction with the --period parameter, provides the maximum number of byte + per milliseconds a RNG device can generate. + +## Examples + +The daemon should be started first: + +:: + + host# vhost-device-rng --socket-path=/some/path/rng.sock -c 1 -m 512 -p 1000 + +Note that from the above command the socket path "/some/path/rng.sock0" will be +created. This in turn needs to be communicated as a chardev socket to QEMU in order +for the backend RNG device to communicate with the vhost RNG daemon: + +:: + + host# qemu-system -M virt \ + -object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on \ + -chardev socket,path=/some/path/rng.sock0,id=rng0 \ + -device vhost-user-rng-pci,chardev=rng0 \ + -numa node,memdev=mem \ + ... diff --git a/rng/src/main.rs b/rng/src/main.rs new file mode 100644 index 0000000..249ca9a --- /dev/null +++ b/rng/src/main.rs @@ -0,0 +1,169 @@ +// +// Copyright 2022 Linaro Ltd. All Rights Reserved. +// Mathieu Poirier +// +// SPDX-License-Identifier: Apache-2.0 +mod vhu_rng; + +use log::{info, warn}; +use std::convert::TryFrom; +use std::fs::File; +use std::sync::{Arc, Mutex, RwLock}; +use std::thread; + +use clap::Parser; +use thiserror::Error as ThisError; +use vhost::{vhost_user, vhost_user::Listener}; +use vhost_user_backend::VhostUserDaemon; +use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap}; + +use vhu_rng::VuRngBackend; + +// Chosen to replicate the max period found in QEMU's vhost-user-rng +// and virtio-rng implementations. +const VHU_RNG_MAX_PERIOD_MS: u128 = 65536; + +type Result = std::result::Result; + +#[derive(Debug, PartialEq, ThisError)] +/// Errors related to vhost-device-rng daemon. +pub enum Error { + #[error("RNG source file doesn't exists or can't be accessed")] + AccessRngSourceFile, + #[error("Period is too big: {0}")] + InvalidPeriodInput(u128), + #[error("Wrong socket count: {0}")] + InvalidSocketCount(u32), + #[error("Threads can't be joined")] + FailedJoiningThreads, +} + +#[derive(Clone, Parser, Debug, PartialEq)] +#[clap(author, version, about, long_about = None)] +struct RngArgs { + // Time needed (in ms) to transfer max-bytes amount of byte. + #[clap(short, long, default_value_t = VHU_RNG_MAX_PERIOD_MS)] + period: u128, + + // Maximum amount of byte that can be transferred in a period. + #[clap(short, long, default_value_t = usize::MAX)] + max_bytes: usize, + + // Number of guests (sockets) to connect to. + #[clap(short = 'c', long, default_value_t = 1)] + socket_count: u32, + + // Location of vhost-user Unix domain socket. This is suffixed by 0,1,2..socket_count-1. + #[clap(short, long)] + socket_path: String, + + // Where to get the RNG data from. Defaults to /dev/urandom. + #[clap(short = 'f', long, default_value = "/dev/urandom")] + rng_source: String, +} + +#[derive(Clone, Debug, PartialEq)] +pub struct VuRngConfig { + pub period_ms: u128, + pub max_bytes: usize, + pub count: u32, + pub socket_path: String, + pub rng_source: String, +} + +impl TryFrom for VuRngConfig { + type Error = Error; + + fn try_from(args: RngArgs) -> Result { + if args.period == 0 || args.period > VHU_RNG_MAX_PERIOD_MS { + return Err(Error::InvalidPeriodInput(args.period)); + } + + if args.socket_count == 0 { + return Err(Error::InvalidSocketCount(args.socket_count)); + } + + // Divide available bandwidth by the number of threads in order + // to avoid overwhelming the HW. + let max_bytes = args.max_bytes / args.socket_count as usize; + let socket_path = args.socket_path.trim().to_string(); + let rng_source = args.rng_source.trim().to_string(); + + Ok(VuRngConfig { + period_ms: args.period, + max_bytes, + count: args.socket_count, + socket_path, + rng_source, + }) + } +} + +pub fn start_backend(config: VuRngConfig) -> Result<()> { + let mut handles = Vec::new(); + let file = File::open(&config.rng_source).map_err(|_| Error::AccessRngSourceFile)?; + let random_file = Arc::new(Mutex::new(file)); + + for i in 0..config.count { + let socket = format!("{}{}", config.socket_path.to_owned(), i); + let period_ms = config.period_ms; + let max_bytes = config.max_bytes; + let random = Arc::clone(&random_file); + + let handle = thread::spawn(move || loop { + // If creating the VuRngBackend isn't successull there isn't much else to do than + // killing the thread, which .unwrap() does. When that happens an error code is + // generated and displayed by the runtime mechanic. Killing a thread doesn't affect + // the other threads spun-off by the daemon. + let vu_rng_backend = Arc::new(RwLock::new( + VuRngBackend::new(random.clone(), period_ms, max_bytes).unwrap(), + )); + + let mut daemon = VhostUserDaemon::new( + String::from("vhost-user-RNG-daemon"), + Arc::clone(&vu_rng_backend), + GuestMemoryAtomic::new(GuestMemoryMmap::new()), + ) + .unwrap(); + + let listener = Listener::new(socket.clone(), true).unwrap(); + daemon.start(listener).unwrap(); + + match daemon.wait() { + Ok(()) => { + info!("Stopping cleanly."); + } + Err(vhost_user_backend::Error::HandleRequest( + vhost_user::Error::PartialMessage, + )) => { + info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug."); + } + Err(e) => { + warn!("Error running daemon: {:?}", e); + } + } + + // No matter the result, we need to shut down the worker thread. + vu_rng_backend + .read() + .unwrap() + .exit_event + .write(1) + .expect("Shutting down worker thread"); + }); + + handles.push(handle); + } + + for handle in handles { + handle.join().map_err(|_| Error::FailedJoiningThreads)?; + } + + Ok(()) +} + +fn main() -> Result<()> { + env_logger::init(); + + start_backend(VuRngConfig::try_from(RngArgs::parse()).unwrap()) +} diff --git a/rng/src/vhu_rng.rs b/rng/src/vhu_rng.rs new file mode 100644 index 0000000..7043538 --- /dev/null +++ b/rng/src/vhu_rng.rs @@ -0,0 +1,281 @@ +// VIRTIO RNG Emulation via vhost-user +// +// Copyright 2022 Linaro Ltd. All Rights Reserved. +// Mathieu Poirier +// +// SPDX-License-Identifier: Apache-2.0 + +use log::warn; +use std::io::Read; +use std::sync::{Arc, Mutex}; +use std::thread::sleep; +use std::time::{Duration, Instant}; +use std::{convert, io, result}; + +use thiserror::Error as ThisError; +use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; +use vhost_user_backend::{VhostUserBackendMut, VringRwLock, VringT}; +use virtio_bindings::bindings::virtio_net::{VIRTIO_F_NOTIFY_ON_EMPTY, VIRTIO_F_VERSION_1}; +use virtio_bindings::bindings::virtio_ring::{ + VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC, +}; +use virtio_queue::DescriptorChain; +use vm_memory::{Bytes, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap}; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK}; + +const QUEUE_SIZE: usize = 1024; +const NUM_QUEUES: usize = 1; + +type Result = std::result::Result; +type RngDescriptorChain = DescriptorChain>>; + +#[derive(Debug, PartialEq, ThisError)] +/// Errors related to vhost-device-rng daemon. +pub enum VuRngError { + #[error("Descriptor not found")] + DescriptorNotFound, + #[error("Notification send failed")] + SendNotificationFailed, + #[error("Can't create eventFd")] + EventFdError, + #[error("Failed to handle event")] + HandleEventNotEpollIn, + #[error("Unknown device event")] + HandleEventUnknownEvent, + #[error("Too many descriptors: {0}")] + UnexpectedDescriptorCount(usize), + #[error("Unexpected Read Descriptor")] + UnexpectedReadDescriptor, + #[error("Failed to access RNG source")] + UnexpectedRngSourceAccessError, + #[error("Failed to read from the RNG source")] + UnexpectedRngSourceError, + #[error("Previous Time value is later than current time")] + UnexpectedTimerValue, + #[error("Unexpected VirtQueue error")] + UnexpectedVirtQueueError, +} + +impl convert::From for io::Error { + fn from(e: VuRngError) -> Self { + io::Error::new(io::ErrorKind::Other, e) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct VuRngTimerConfig { + period_ms: u128, + period_start: Instant, + max_bytes: usize, + quota_remaining: usize, +} + +impl VuRngTimerConfig { + pub fn new(period_ms: u128, max_bytes: usize) -> Self { + VuRngTimerConfig { + period_ms, + period_start: Instant::now(), + max_bytes, + quota_remaining: max_bytes, + } + } +} + +pub struct VuRngBackend { + event_idx: bool, + timer: VuRngTimerConfig, + rng_source: Arc>, + pub exit_event: EventFd, +} + +impl VuRngBackend { + /// Create a new virtio rng device that gets random data from /dev/urandom. + pub fn new( + rng_source: Arc>, + period_ms: u128, + max_bytes: usize, + ) -> std::result::Result { + Ok(VuRngBackend { + event_idx: false, + rng_source, + timer: VuRngTimerConfig::new(period_ms, max_bytes), + exit_event: EventFd::new(EFD_NONBLOCK).map_err(|_| VuRngError::EventFdError)?, + }) + } + + pub fn process_requests( + &mut self, + requests: Vec, + vring: &VringRwLock, + ) -> Result { + if requests.is_empty() { + return Ok(true); + } + + for desc_chain in requests { + let descriptors: Vec<_> = desc_chain.clone().collect(); + + if descriptors.len() != 1 { + return Err(VuRngError::UnexpectedDescriptorCount(descriptors.len())); + } + + let descriptor = descriptors[0]; + let mut to_read = descriptor.len() as usize; + let mut timer = &mut self.timer; + + if !descriptor.is_write_only() { + return Err(VuRngError::UnexpectedReadDescriptor); + } + + // Get the current time + let now = Instant::now(); + + // Check how much time has passed since we started the last period. + match now.checked_duration_since(timer.period_start) { + Some(duration) => { + let elapsed = duration.as_millis(); + + if elapsed >= timer.period_ms { + // More time has passed than a full period, reset time + // and quota. + timer.period_start = now; + timer.quota_remaining = timer.max_bytes; + } else { + // If we are out of bytes for the current period. Block until + // the start of the next period. + if timer.quota_remaining == 0 { + let to_sleep = timer.period_ms - elapsed; + + sleep(Duration::from_millis(to_sleep as u64)); + timer.period_start = Instant::now(); + timer.quota_remaining = timer.max_bytes; + } + } + } + None => return Err(VuRngError::UnexpectedTimerValue), + }; + + if timer.quota_remaining < to_read { + to_read = timer.quota_remaining; + } + + let mut rng_source = self + .rng_source + .lock() + .map_err(|_| VuRngError::UnexpectedRngSourceAccessError)?; + + let len = desc_chain + .memory() + .read_from(descriptor.addr(), &mut *rng_source, to_read as usize) + .map_err(|_| VuRngError::UnexpectedRngSourceError)?; + + timer.quota_remaining -= len; + + if vring.add_used(desc_chain.head_index(), len as u32).is_err() { + warn!("Couldn't return used descriptors to the ring"); + } + } + Ok(true) + } + + /// Process the requests in the vring and dispatch replies + fn process_queue(&mut self, vring: &VringRwLock) -> Result { + let requests: Vec<_> = vring + .get_mut() + .get_queue_mut() + .iter() + .map_err(|_| VuRngError::DescriptorNotFound)? + .collect(); + + if self.process_requests(requests, vring)? { + // Send notification once all the requests are processed + vring + .signal_used_queue() + .map_err(|_| VuRngError::SendNotificationFailed)?; + } + + Ok(true) + } +} + +/// VhostUserBackend trait methods +impl VhostUserBackendMut for VuRngBackend { + fn num_queues(&self) -> usize { + NUM_QUEUES + } + + fn max_queue_size(&self) -> usize { + QUEUE_SIZE + } + + fn features(&self) -> u64 { + // this matches the current libvhost defaults except VHOST_F_LOG_ALL + 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_F_NOTIFY_ON_EMPTY + | 1 << VIRTIO_RING_F_INDIRECT_DESC + | 1 << VIRTIO_RING_F_EVENT_IDX + | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + VhostUserProtocolFeatures::MQ + } + + fn set_event_idx(&mut self, enabled: bool) { + dbg!(self.event_idx = enabled); + } + + fn update_memory( + &mut self, + _mem: GuestMemoryAtomic, + ) -> result::Result<(), io::Error> { + Ok(()) + } + + fn handle_event( + &mut self, + device_event: u16, + evset: EventSet, + vrings: &[VringRwLock], + _thread_id: usize, + ) -> result::Result { + if evset != EventSet::IN { + return Err(VuRngError::HandleEventNotEpollIn.into()); + } + + match device_event { + 0 => { + let vring = &vrings[0]; + + if self.event_idx { + // vm-virtio's Queue implementation only checks avail_index + // once, so to properly support EVENT_IDX we need to keep + // calling process_queue() until it stops finding new + // requests on the queue. + loop { + vring.disable_notification().unwrap(); + self.process_queue(vring)?; + if !vring.enable_notification().unwrap() { + break; + } + } + } else { + // Without EVENT_IDX, a single call is enough. + self.process_queue(vring)?; + } + } + + _ => { + warn!("unhandled device_event: {}", device_event); + return Err(VuRngError::HandleEventUnknownEvent.into()); + } + } + Ok(false) + } + + fn exit_event(&self, _thread_index: usize) -> Option { + self.exit_event.try_clone().ok() + } +} +