Merge remote-tracking branch 'origin/main' into virtio-sound

This commit is contained in:
Dorinda Bassey 2023-10-10 17:18:17 +02:00
commit 7d84caa08a
96 changed files with 12783 additions and 1318 deletions

View File

@ -4,10 +4,13 @@ updates:
directory: "/"
schedule:
interval: weekly
open-pull-requests-limit: 3
allow:
- dependency-type: direct
- dependency-type: indirect
groups:
vhost-device:
patterns:
- "*"
- package-ecosystem: gitsubmodule
directory: "/"
schedule:

781
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,12 @@
[workspace]
resolver = "2"
members = [
"crates/gpio",
"crates/i2c",
"crates/rng",
"crates/vhost-device-gpio",
"crates/vhost-device-i2c",
"crates/vhost-device-rng",
"crates/vhost-device-scsi",
"crates/vhost-device-scmi",
"crates/sound",
"crates/vsock",
"crates/vhost-device-vsock",
]

View File

@ -8,11 +8,13 @@ crates.
Here is the list of device backends that we support:
- [GPIO](https://github.com/rust-vmm/vhost-device/blob/main/crates/gpio/README.md)
- [I2C](https://github.com/rust-vmm/vhost-device/blob/main/crates/i2c/README.md)
- [RNG](https://github.com/rust-vmm/vhost-device/blob/main/crates/rng/README.md)
- [GPIO](https://github.com/rust-vmm/vhost-device/blob/main/crates/vhost-device-gpio/README.md)
- [I2C](https://github.com/rust-vmm/vhost-device/blob/main/crates/vhost-device-i2c/README.md)
- [RNG](https://github.com/rust-vmm/vhost-device/blob/main/crates/vhost-device-rng/README.md)
- [SCMI](https://github.com/rust-vmm/vhost-device/blob/main/crates/vhost-device-scmi/README.md)
- [SCSI](https://github.com/rust-vmm/vhost-device/blob/main/crates/vhost-device-scsi/README.md)
- [Sound](https://github.com/rust-vmm/vhost-device/blob/main/crates/sound/README.md)
- [VSOCK](https://github.com/rust-vmm/vhost-device/blob/main/crates/vsock/README.md)
- [VSOCK](https://github.com/rust-vmm/vhost-device/blob/main/crates/vhost-device-vsock/README.md)
## Testing and Code Coverage
@ -46,14 +48,29 @@ logic to service the virtio requests directly in the application.
## Build dependency
The GPIO crate needs a local installation of libgpiod library to be available,
which can be done like:
The GPIO crate needs a local installation of libgpiod library to be available.
If your distro ships libgpiod >= v2.0, then you should be fine.
$ git clone --depth 1 --branch v2.0-rc1 https://git.kernel.org/pub/scm/libs/libgpiod/libgpiod.git/
$ cd libgpiod
$ ./autogen.sh && make
Otherwise, you will need to build libgpiod yourself:
Either you can do a 'make install' now on your system, or provide path to the
locally build library like this while building vhost-device crates:
git clone --depth 1 --branch v2.0.x https://git.kernel.org/pub/scm/libs/libgpiod/libgpiod.git/
cd libgpiod
./autogen.sh --prefix="$PWD/install/"
make install
$ RUSTFLAGS='-L /home/<username>/libgpiod/lib/.libs/' cargo build --release
In order to inform tools about the build location, you can now set:
export PKG_CONFIG_PATH="<PATH-TO-LIBGPIOD>/install/lib/pkgconfig/"
To prevent setting this in every terminal session, you can also configure
cargo to
[set it automatically](https://doc.rust-lang.org/cargo/reference/config.html#env).
## Xen support
Supporting Xen requires special handling while mapping the guest memory. The
`vm-memory` crate implements xen memory mapping support via a separate feature
`xen`, and this crate uses the same feature name to enable Xen support.
It was decided by the `rust-vmm` maintainers to keep the interface simple and
build the crate for either standard Unix memory mapping or Xen, and not both.

View File

@ -1,5 +1,5 @@
{
"coverage_score": 67.6,
"coverage_score": 73.42,
"exclude_path": "",
"crate_features": ""
}

View File

@ -1,284 +0,0 @@
// VIRTIO GPIO Emulation via vhost-user
//
// Copyright 2022 Linaro Ltd. All Rights Reserved.
// Viresh Kumar <viresh.kumar@linaro.org>
//
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use log::{info, warn};
use std::num::ParseIntError;
use std::sync::{Arc, RwLock};
use std::thread::spawn;
use clap::Parser;
use thiserror::Error as ThisError;
use vhost::{vhost_user, vhost_user::Listener};
use vhost_user_backend::VhostUserDaemon;
use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap};
use crate::gpio::{GpioController, GpioDevice, PhysDevice};
use crate::vhu_gpio::VhostUserGpioBackend;
pub(crate) type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Eq, PartialEq, ThisError)]
/// Errors related to low level GPIO helpers
pub(crate) enum Error {
#[error("Invalid socket count: {0}")]
SocketCountInvalid(usize),
#[error("Socket count ({0}) doesn't match device count {1}")]
DeviceCountMismatch(usize, usize),
#[error("Duplicate device detected: {0}")]
DeviceDuplicate(u32),
#[error("Failed while parsing to integer: {0:?}")]
ParseFailure(ParseIntError),
#[error("Failed to join threads")]
FailedJoiningThreads,
}
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct GpioArgs {
/// Location of vhost-user Unix domain socket. This is suffixed by 0,1,2..socket_count-1.
#[clap(short, long)]
socket_path: String,
/// Number of guests (sockets) to connect to.
#[clap(short = 'c', long, default_value_t = 1)]
socket_count: usize,
/// List of GPIO devices, one for each guest, in the format <N1>[:<N2>]. The first entry is for
/// Guest that connects to socket 0, next one for socket 1, and so on. Each device number here
/// will be used to access the corresponding /dev/gpiochipX. Example, "-c 4 -l 3:4:6:1"
#[clap(short = 'l', long)]
device_list: String,
}
#[derive(Debug, PartialEq)]
pub(crate) struct DeviceConfig {
inner: Vec<u32>,
}
impl DeviceConfig {
fn new() -> Self {
Self { inner: Vec::new() }
}
fn contains_device(&self, number: u32) -> bool {
self.inner.iter().any(|elem| *elem == number)
}
fn push(&mut self, device: u32) -> Result<()> {
if self.contains_device(device) {
return Err(Error::DeviceDuplicate(device));
}
self.inner.push(device);
Ok(())
}
}
impl TryFrom<&str> for DeviceConfig {
type Error = Error;
fn try_from(list: &str) -> Result<Self> {
let list: Vec<&str> = list.split(':').collect();
let mut devices = DeviceConfig::new();
for info in list.iter() {
let number = info.parse::<u32>().map_err(Error::ParseFailure)?;
devices.push(number)?;
}
Ok(devices)
}
}
#[derive(PartialEq, Debug)]
struct GpioConfiguration {
socket_path: String,
socket_count: usize,
devices: DeviceConfig,
}
impl TryFrom<GpioArgs> for GpioConfiguration {
type Error = Error;
fn try_from(args: GpioArgs) -> Result<Self> {
if args.socket_count == 0 {
return Err(Error::SocketCountInvalid(0));
}
let devices = DeviceConfig::try_from(args.device_list.as_str())?;
if devices.inner.len() != args.socket_count {
return Err(Error::DeviceCountMismatch(
args.socket_count,
devices.inner.len(),
));
}
Ok(GpioConfiguration {
socket_path: args.socket_path,
socket_count: args.socket_count,
devices,
})
}
}
fn start_backend<D: 'static + GpioDevice + Send + Sync>(args: GpioArgs) -> Result<()> {
let config = GpioConfiguration::try_from(args).unwrap();
let mut handles = Vec::new();
for i in 0..config.socket_count {
let socket = config.socket_path.to_owned() + &i.to_string();
let device_num = config.devices.inner[i];
let handle = spawn(move || loop {
// A separate thread is spawned for each socket and can connect to a separate guest.
// These are run in an infinite loop to not require the daemon to be restarted once a
// guest exits.
//
// There isn't much value in complicating code here to return an error from the
// threads, and so the code uses unwrap() instead. The panic on a thread won't cause
// trouble to other threads/guests or the main() function and should be safe for the
// daemon.
let device = D::open(device_num).unwrap();
let controller = GpioController::<D>::new(device).unwrap();
let backend = Arc::new(RwLock::new(VhostUserGpioBackend::new(controller).unwrap()));
let listener = Listener::new(socket.clone(), true).unwrap();
let mut daemon = VhostUserDaemon::new(
String::from("vhost-device-gpio-backend"),
backend.clone(),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.unwrap();
daemon.start(listener).unwrap();
match daemon.wait() {
Ok(()) => {
info!("Stopping cleanly.");
}
Err(vhost_user_backend::Error::HandleRequest(
vhost_user::Error::PartialMessage,
)) => {
info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug.");
}
Err(e) => {
warn!("Error running daemon: {:?}", e);
}
}
// No matter the result, we need to shut down the worker thread.
backend.read().unwrap().exit_event.write(1).unwrap();
});
handles.push(handle);
}
for handle in handles {
handle.join().map_err(|_| Error::FailedJoiningThreads)?;
}
Ok(())
}
pub(crate) fn gpio_init() -> Result<()> {
env_logger::init();
start_backend::<PhysDevice>(GpioArgs::parse())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::gpio::tests::DummyDevice;
impl DeviceConfig {
pub fn new_with(devices: Vec<u32>) -> Self {
DeviceConfig { inner: devices }
}
}
fn get_cmd_args(path: &str, devices: &str, count: usize) -> GpioArgs {
GpioArgs {
socket_path: path.to_string(),
socket_count: count,
device_list: devices.to_string(),
}
}
#[test]
fn test_gpio_device_config() {
let mut config = DeviceConfig::new();
config.push(5).unwrap();
config.push(6).unwrap();
assert_eq!(config.push(5).unwrap_err(), Error::DeviceDuplicate(5));
}
#[test]
fn test_gpio_parse_failure() {
let socket_name = "vgpio.sock";
// Invalid device number
let cmd_args = get_cmd_args(socket_name, "1:4d:5", 3);
assert_eq!(
GpioConfiguration::try_from(cmd_args).unwrap_err(),
Error::ParseFailure("4d".parse::<u32>().unwrap_err())
);
// Zero socket count
let cmd_args = get_cmd_args(socket_name, "1:4", 0);
assert_eq!(
GpioConfiguration::try_from(cmd_args).unwrap_err(),
Error::SocketCountInvalid(0)
);
// Duplicate client address: 4
let cmd_args = get_cmd_args(socket_name, "1:4:5:6:4", 5);
assert_eq!(
GpioConfiguration::try_from(cmd_args).unwrap_err(),
Error::DeviceDuplicate(4)
);
// Device count mismatch
let cmd_args = get_cmd_args(socket_name, "1:4:5:6", 5);
assert_eq!(
GpioConfiguration::try_from(cmd_args).unwrap_err(),
Error::DeviceCountMismatch(5, 4)
);
}
#[test]
fn test_gpio_parse_successful() {
let socket_name = "vgpio.sock";
// Match expected and actual configuration
let cmd_args = get_cmd_args(socket_name, "1:4:32:21:5", 5);
let config = GpioConfiguration::try_from(cmd_args).unwrap();
let expected_devices = DeviceConfig::new_with(vec![1, 4, 32, 21, 5]);
let expected_config = GpioConfiguration {
socket_count: 5,
socket_path: String::from(socket_name),
devices: expected_devices,
};
assert_eq!(config, expected_config);
}
#[test]
fn test_gpio_fail_listener() {
// This will fail the listeners and thread will panic.
let socket_name = "~/path/not/present/gpio";
let cmd_args = get_cmd_args(socket_name, "1:4:3:5", 4);
assert_eq!(
start_backend::<DummyDevice>(cmd_args).unwrap_err(),
Error::FailedJoiningThreads
);
}
}

View File

@ -0,0 +1,15 @@
# Changelog
## [Unreleased]
### Added
### Changed
### Fixed
### Deprecated
## [0.1.0]
First release

View File

@ -11,22 +11,27 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
xen = ["vm-memory/xen", "vhost/xen", "vhost-user-backend/xen"]
mock_gpio = []
[dependencies]
clap = { version = "4.1", features = ["derive"] }
clap = { version = "4.4", features = ["derive"] }
env_logger = "0.10"
libc = "0.2"
log = "0.4"
thiserror = "1.0"
vhost = { version = "0.6", features = ["vhost-user-slave"] }
vhost-user-backend = "0.8"
virtio-bindings = "0.2"
virtio-queue = "0.7"
vm-memory = "0.10"
vhost = { version = "0.8", features = ["vhost-user-slave"] }
vhost-user-backend = "0.10"
virtio-bindings = "0.2.1"
virtio-queue = "0.9"
vm-memory = "0.12"
vmm-sys-util = "0.11"
[target.'cfg(target_env = "gnu")'.dependencies]
libgpiod = { git = "https://git.kernel.org/pub/scm/libs/libgpiod/libgpiod.git/", rev = "d8d3a84b2ddf" }
libgpiod = { version = "0.1" }
[dev-dependencies]
virtio-queue = { version = "0.7", features = ["test-utils"] }
vm-memory = { version = "0.10", features = ["backend-mmap", "backend-atomic"] }
assert_matches = "1.5"
virtio-queue = { version = "0.9", features = ["test-utils"] }
vm-memory = { version = "0.12", features = ["backend-mmap", "backend-atomic"] }

View File

@ -0,0 +1 @@
../../LICENSE-APACHE

View File

@ -0,0 +1 @@
../../LICENSE-BSD-3-Clause

View File

@ -43,6 +43,28 @@ Examples section below.
--socket-count. For example, the GPIO device 0 will be allocated to the guest
with "<socket-path>0" path.
## MockGpioDevice support
As connecting VM guests to random GPIO pins on your host is generally
asking for trouble you can enable the "mock_gpio" feature in your build:
cargo build --features "mock_gpio"
You can then enable simulated GPIOs using the 's' prefix:
--device-list s4,s8
Which will create two gpio devices, the first with 4 pins and the
second with 8. By default updates are display via env logger:
vhost-device-gpio -s /tmp/vus.sock -c 1 -l s4
[2023-09-14T14:15:14Z INFO vhost_device_gpio::mock_gpio] gpio dummy0 set value to 1
[2023-09-14T14:15:14Z INFO vhost_device_gpio::mock_gpio] gpio dummy0 set direction to 1
[2023-09-14T14:15:14Z INFO vhost_device_gpio::mock_gpio] gpio dummy0 set direction to 0
[2023-09-14T14:15:19Z INFO vhost_device_gpio::mock_gpio] gpio dummy1 set value to 1
[2023-09-14T14:15:19Z INFO vhost_device_gpio::mock_gpio] gpio dummy1 set direction to 1
[2023-09-14T14:15:19Z INFO vhost_device_gpio::mock_gpio] gpio dummy1 set direction to 0
## Examples
The daemon should be started first:

View File

@ -0,0 +1,368 @@
// VIRTIO GPIO Emulation via vhost-user
//
// Copyright 2022 Linaro Ltd. All Rights Reserved.
// Viresh Kumar <viresh.kumar@linaro.org>
//
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use log::{error, info, warn};
use std::num::ParseIntError;
use std::process::exit;
use std::sync::{Arc, RwLock};
use std::thread::{spawn, JoinHandle};
use clap::Parser;
use env_logger::Env;
use thiserror::Error as ThisError;
use vhost::{vhost_user, vhost_user::Listener};
use vhost_user_backend::VhostUserDaemon;
use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap};
use crate::gpio::{GpioController, GpioDevice, PhysDevice};
use crate::vhu_gpio::VhostUserGpioBackend;
#[cfg(any(test, feature = "mock_gpio"))]
use crate::mock_gpio::MockGpioDevice;
pub(crate) type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, ThisError)]
/// Errors related to low level GPIO helpers
pub(crate) enum Error {
#[error("Invalid socket count: {0}")]
SocketCountInvalid(usize),
#[error("Socket count ({0}) doesn't match device count {1}")]
DeviceCountMismatch(usize, usize),
#[error("Duplicate device detected: {0}")]
DeviceDuplicate(u32),
#[error("Failed while parsing to integer: {0:?}")]
ParseFailure(ParseIntError),
#[error("Failed to join threads")]
FailedJoiningThreads,
#[error("Could not open gpio device: {0}")]
CouldNotOpenDevice(crate::gpio::Error),
#[error("Could not create gpio controller: {0}")]
CouldNotCreateGpioController(crate::gpio::Error),
#[error("Could not create gpio backend: {0}")]
CouldNotCreateBackend(crate::vhu_gpio::Error),
#[error("Could not create daemon: {0}")]
CouldNotCreateDaemon(vhost_user_backend::Error),
}
const GPIO_AFTER_HELP: &str = "Each device number here will be used to \
access the corresponding /dev/gpiochipX or simulate a GPIO device \
with N pins (when feature enabled). \
Example, \"-c 4 -l 3:s4:6:s1\"\n";
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None, after_help = GPIO_AFTER_HELP)]
struct GpioArgs {
/// Location of vhost-user Unix domain socket. This is suffixed by 0,1,2..socket_count-1.
#[clap(short, long)]
socket_path: String,
/// Number of guests (sockets) to connect to.
#[clap(short = 'c', long, default_value_t = 1)]
socket_count: usize,
/// List of GPIO devices, one for each guest, in the format
/// [s]<N1>[:[s]<N2>].
#[clap(short = 'l', long)]
device_list: String,
}
#[derive(Debug, Clone, Copy, PartialEq)]
enum GpioDeviceType {
PhysicalDevice {
id: u32,
},
#[cfg(any(test, feature = "mock_gpio"))]
SimulatedDevice {
num_gpios: u32,
},
}
impl GpioDeviceType {
fn new(cfg: &str) -> Result<Self> {
match cfg.strip_prefix('s') {
#[cfg(any(test, feature = "mock_gpio"))]
Some(num) => {
let num_gpios = num.parse::<u32>().map_err(Error::ParseFailure)?;
Ok(GpioDeviceType::SimulatedDevice { num_gpios })
}
_ => {
let id = cfg.parse::<u32>().map_err(Error::ParseFailure)?;
Ok(GpioDeviceType::PhysicalDevice { id })
}
}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct DeviceConfig {
inner: Vec<GpioDeviceType>,
}
impl DeviceConfig {
fn new() -> Self {
Self { inner: Vec::new() }
}
fn contains_device(&self, device: GpioDeviceType) -> bool {
self.inner.contains(&device)
}
fn push(&mut self, device: GpioDeviceType) -> Result<()> {
match device {
GpioDeviceType::PhysicalDevice { id } => {
if self.contains_device(GpioDeviceType::PhysicalDevice { id }) {
return Err(Error::DeviceDuplicate(id));
}
}
#[cfg(any(test, feature = "mock_gpio"))]
GpioDeviceType::SimulatedDevice { num_gpios: _ } => {}
}
self.inner.push(device);
Ok(())
}
}
impl TryFrom<&str> for DeviceConfig {
type Error = Error;
fn try_from(list: &str) -> Result<Self> {
let list: Vec<&str> = list.split(':').collect();
let mut devices = DeviceConfig::new();
for info in list.iter() {
devices.push(GpioDeviceType::new(info)?)?;
}
Ok(devices)
}
}
#[derive(PartialEq, Debug)]
struct GpioConfiguration {
socket_path: String,
socket_count: usize,
devices: DeviceConfig,
}
impl TryFrom<GpioArgs> for GpioConfiguration {
type Error = Error;
fn try_from(args: GpioArgs) -> Result<Self> {
if args.socket_count == 0 {
return Err(Error::SocketCountInvalid(0));
}
let devices = DeviceConfig::try_from(args.device_list.as_str())?;
if devices.inner.len() != args.socket_count {
return Err(Error::DeviceCountMismatch(
args.socket_count,
devices.inner.len(),
));
}
Ok(GpioConfiguration {
socket_path: args.socket_path,
socket_count: args.socket_count,
devices,
})
}
}
fn start_device_backend<D: GpioDevice>(device: D, socket: String) -> Result<()> {
let controller = GpioController::new(device).map_err(Error::CouldNotCreateGpioController)?;
let backend = Arc::new(RwLock::new(
VhostUserGpioBackend::new(controller).map_err(Error::CouldNotCreateBackend)?,
));
let listener = Listener::new(socket, true).unwrap();
let mut daemon = VhostUserDaemon::new(
String::from("vhost-device-gpio-backend"),
backend.clone(),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.map_err(Error::CouldNotCreateDaemon)?;
daemon.start(listener).unwrap();
match daemon.wait() {
Ok(()) => {
info!("Stopping cleanly.");
}
Err(vhost_user_backend::Error::HandleRequest(vhost_user::Error::PartialMessage)) => {
info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug.");
}
Err(e) => {
warn!("Error running daemon: {:?}", e);
}
}
// No matter the result, we need to shut down the worker thread.
backend.read().unwrap().exit_event.write(1).unwrap();
Ok(())
}
fn start_backend(args: GpioArgs) -> Result<()> {
let config = GpioConfiguration::try_from(args)?;
let mut handles = Vec::new();
for i in 0..config.socket_count {
let socket = config.socket_path.to_owned() + &i.to_string();
let cfg = config.devices.inner[i];
let handle: JoinHandle<Result<()>> = spawn(move || loop {
// A separate thread is spawned for each socket and can
// connect to a separate guest. These are run in an
// infinite loop to not require the daemon to be restarted
// once a guest exits.
//
// However if we fail to spawn (due to bad config or
// other reason) we will bail out of the spawning and
// propagate the error back to gpio_init().
match cfg {
GpioDeviceType::PhysicalDevice { id } => {
let controller = PhysDevice::open(id).map_err(Error::CouldNotOpenDevice)?;
start_device_backend(controller, socket.clone())?;
}
#[cfg(any(test, feature = "mock_gpio"))]
GpioDeviceType::SimulatedDevice { num_gpios } => {
let controller = MockGpioDevice::open(num_gpios).unwrap(); // cannot fail
start_device_backend(controller, socket.clone())?;
}
};
});
handles.push(handle);
}
for handle in handles {
handle.join().map_err(|_| Error::FailedJoiningThreads)??;
}
Ok(())
}
pub(crate) fn gpio_init() {
let env = Env::default().filter_or("RUST_LOG", "info");
env_logger::init_from_env(env);
if let Err(e) = start_backend(GpioArgs::parse()) {
error!("Fatal error starting backend: {e}");
exit(1);
}
}
#[cfg(test)]
mod tests {
use assert_matches::assert_matches;
use super::*;
impl DeviceConfig {
pub fn new_with(devices: Vec<u32>) -> Self {
DeviceConfig {
inner: devices
.into_iter()
.map(|id| GpioDeviceType::PhysicalDevice { id })
.collect(),
}
}
}
fn get_cmd_args(path: &str, devices: &str, count: usize) -> GpioArgs {
GpioArgs {
socket_path: path.to_string(),
socket_count: count,
device_list: devices.to_string(),
}
}
#[test]
fn test_gpio_device_config() {
let mut config = DeviceConfig::new();
config
.push(GpioDeviceType::PhysicalDevice { id: 5 })
.unwrap();
config
.push(GpioDeviceType::PhysicalDevice { id: 6 })
.unwrap();
assert_matches!(
config
.push(GpioDeviceType::PhysicalDevice { id: 5 })
.unwrap_err(),
Error::DeviceDuplicate(5)
);
}
#[test]
fn test_gpio_parse_failure() {
let socket_name = "vgpio.sock";
// Invalid device number
let cmd_args = get_cmd_args(socket_name, "1:4d:5", 3);
assert_matches!(
GpioConfiguration::try_from(cmd_args).unwrap_err(),
Error::ParseFailure(e) if e == "4d".parse::<u32>().unwrap_err()
);
// Zero socket count
let cmd_args = get_cmd_args(socket_name, "1:4", 0);
assert_matches!(
GpioConfiguration::try_from(cmd_args).unwrap_err(),
Error::SocketCountInvalid(0)
);
// Duplicate client address: 4
let cmd_args = get_cmd_args(socket_name, "1:4:5:6:4", 5);
assert_matches!(
GpioConfiguration::try_from(cmd_args).unwrap_err(),
Error::DeviceDuplicate(4)
);
// Device count mismatch
let cmd_args = get_cmd_args(socket_name, "1:4:5:6", 5);
assert_matches!(
GpioConfiguration::try_from(cmd_args).unwrap_err(),
Error::DeviceCountMismatch(5, 4)
);
// Parse mixed device and simulated
let cmd_args = get_cmd_args(socket_name, "1:s4", 2);
assert_matches!(GpioConfiguration::try_from(cmd_args), Ok(_));
}
#[test]
fn test_gpio_parse_successful() {
let socket_name = "vgpio.sock";
// Match expected and actual configuration
let cmd_args = get_cmd_args(socket_name, "1:4:32:21:5", 5);
let config = GpioConfiguration::try_from(cmd_args).unwrap();
let expected_devices = DeviceConfig::new_with(vec![1, 4, 32, 21, 5]);
let expected_config = GpioConfiguration {
socket_count: 5,
socket_path: String::from(socket_name),
devices: expected_devices,
};
assert_eq!(config, expected_config);
}
#[test]
fn test_gpio_fail_listener_mock() {
let socket_name = "~/path/not/present/gpio";
let cmd_args = get_cmd_args(socket_name, "s1:s4:s3:s5", 4);
assert_matches!(
start_backend(cmd_args).unwrap_err(),
Error::FailedJoiningThreads
);
}
}

View File

@ -13,7 +13,9 @@ use libgpiod::{chip, line, request, Error as LibGpiodError};
use thiserror::Error as ThisError;
use vm_memory::{ByteValued, Le16, Le32};
type Result<T> = std::result::Result<T, Error>;
use crate::virtio_gpio::*;
pub(crate) type Result<T> = std::result::Result<T, Error>;
#[derive(Copy, Clone, Debug, PartialEq, ThisError)]
/// Errors related to low level gpio helpers
@ -47,32 +49,6 @@ pub(crate) enum Error {
GpioOperationFailed(&'static str),
}
/// Virtio specification definitions
/// Virtio GPIO request types
pub(crate) const VIRTIO_GPIO_MSG_GET_LINE_NAMES: u16 = 0x0001;
pub(crate) const VIRTIO_GPIO_MSG_GET_DIRECTION: u16 = 0x0002;
pub(crate) const VIRTIO_GPIO_MSG_SET_DIRECTION: u16 = 0x0003;
pub(crate) const VIRTIO_GPIO_MSG_GET_VALUE: u16 = 0x0004;
pub(crate) const VIRTIO_GPIO_MSG_SET_VALUE: u16 = 0x0005;
pub(crate) const VIRTIO_GPIO_MSG_IRQ_TYPE: u16 = 0x0006;
/// Direction types
pub(crate) const VIRTIO_GPIO_DIRECTION_NONE: u8 = 0x00;
pub(crate) const VIRTIO_GPIO_DIRECTION_OUT: u8 = 0x01;
pub(crate) const VIRTIO_GPIO_DIRECTION_IN: u8 = 0x02;
/// Virtio GPIO IRQ types
pub(crate) const VIRTIO_GPIO_IRQ_TYPE_NONE: u16 = 0x00;
pub(crate) const VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING: u16 = 0x01;
pub(crate) const VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING: u16 = 0x02;
pub(crate) const VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH: u16 =
VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING | VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING;
pub(crate) const VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH: u16 = 0x04;
pub(crate) const VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW: u16 = 0x08;
const VIRTIO_GPIO_IRQ_TYPE_ALL: u16 = VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH
| VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH
| VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW;
/// Virtio GPIO Configuration
#[derive(Copy, Clone, Debug, Default, PartialEq)]
#[repr(C)]
@ -219,11 +195,22 @@ impl GpioDevice for PhysDevice {
.set_consumer("vhu-gpio")
.map_err(Error::GpiodFailed)?;
state.request = Some(Arc::new(Mutex::new(
self.chip
.request_lines(Some(&rconfig), &lconfig)
.map_err(Error::GpiodFailed)?,
)));
// This is causing a warning since libgpiod's request_config is
// not `Send`.
// We, however, unsafely claim that it is by marking PhysDevice as
// `Send`. This is wrong, but until we figure out what to do, we
// just silence the clippy warning here.
//
// https://github.com/rust-vmm/vhost-device/issues/442 tracks
// finding a solution to this.
#[allow(clippy::arc_with_non_send_sync)]
{
state.request = Some(Arc::new(Mutex::new(
self.chip
.request_lines(Some(&rconfig), &lconfig)
.map_err(Error::GpiodFailed)?,
)));
}
}
Ok(())
@ -373,10 +360,10 @@ impl GpioDevice for PhysDevice {
}
#[derive(Debug, Copy, Clone)]
struct GpioState {
dir: u8,
val: Option<u16>,
irq_type: u16,
pub(crate) struct GpioState {
pub dir: u8,
pub val: Option<u16>,
pub irq_type: u16,
}
#[derive(Debug)]
@ -542,135 +529,7 @@ pub(crate) mod tests {
use super::Error;
use super::*;
#[derive(Debug)]
pub(crate) struct DummyDevice {
ngpio: u16,
pub(crate) gpio_names: Vec<String>,
state: RwLock<Vec<GpioState>>,
num_gpios_result: Result<u16>,
gpio_name_result: Result<String>,
direction_result: Result<u8>,
set_direction_result: Result<()>,
value_result: Result<u8>,
set_value_result: Result<()>,
set_irq_type_result: Result<()>,
pub(crate) wait_for_irq_result: Result<bool>,
}
impl DummyDevice {
pub(crate) fn new(ngpio: u16) -> Self {
Self {
ngpio,
gpio_names: vec!['\0'.to_string(); ngpio.into()],
state: RwLock::new(vec![
GpioState {
dir: VIRTIO_GPIO_DIRECTION_NONE,
val: None,
irq_type: VIRTIO_GPIO_IRQ_TYPE_NONE,
};
ngpio.into()
]),
num_gpios_result: Ok(0),
gpio_name_result: Ok("".to_string()),
direction_result: Ok(0),
set_direction_result: Ok(()),
value_result: Ok(0),
set_value_result: Ok(()),
set_irq_type_result: Ok(()),
wait_for_irq_result: Ok(true),
}
}
}
impl GpioDevice for DummyDevice {
fn open(_device: u32) -> Result<Self>
where
Self: Sized,
{
Ok(DummyDevice::new(8))
}
fn num_gpios(&self) -> Result<u16> {
if self.num_gpios_result.is_err() {
return self.num_gpios_result;
}
Ok(self.ngpio)
}
fn gpio_name(&self, gpio: u16) -> Result<String> {
assert!((gpio as usize) < self.gpio_names.len());
if self.gpio_name_result.is_err() {
return self.gpio_name_result.clone();
}
Ok(self.gpio_names[gpio as usize].clone())
}
fn direction(&self, gpio: u16) -> Result<u8> {
if self.direction_result.is_err() {
return self.direction_result;
}
Ok(self.state.read().unwrap()[gpio as usize].dir)
}
fn set_direction(&self, gpio: u16, dir: u8, value: u32) -> Result<()> {
if self.set_direction_result.is_err() {
return self.set_direction_result;
}
self.state.write().unwrap()[gpio as usize].dir = dir;
self.state.write().unwrap()[gpio as usize].val = match dir {
VIRTIO_GPIO_DIRECTION_NONE => None,
VIRTIO_GPIO_DIRECTION_IN => self.state.read().unwrap()[gpio as usize].val,
VIRTIO_GPIO_DIRECTION_OUT => Some(value as u16),
_ => return Err(Error::GpioDirectionInvalid(dir as u32)),
};
Ok(())
}
fn value(&self, gpio: u16) -> Result<u8> {
if self.value_result.is_err() {
return self.value_result;
}
if let Some(val) = self.state.read().unwrap()[gpio as usize].val {
Ok(val as u8)
} else {
Err(Error::GpioCurrentValueInvalid)
}
}
fn set_value(&self, gpio: u16, value: u32) -> Result<()> {
if self.set_value_result.is_err() {
return self.set_value_result;
}
self.state.write().unwrap()[gpio as usize].val = Some(value as u16);
Ok(())
}
fn set_irq_type(&self, _gpio: u16, _value: u16) -> Result<()> {
if self.set_irq_type_result.is_err() {
return self.set_irq_type_result;
}
Ok(())
}
fn wait_for_interrupt(&self, _gpio: u16) -> Result<bool> {
if self.wait_for_irq_result.is_err() {
return self.wait_for_irq_result;
}
Ok(true)
}
}
use crate::mock_gpio::MockGpioDevice;
#[test]
fn test_verify_gpio_controller() {
@ -689,7 +548,7 @@ pub(crate) mod tests {
// Controller adds '\0' for each line.
let names_size = size_of_val(&gpio_names) + gpio_names.len();
let mut device = DummyDevice::new(NGPIO);
let mut device = MockGpioDevice::new(NGPIO);
device.gpio_names.clear();
device.gpio_names.append(&mut gpio_names.clone());
let controller = GpioController::new(device).unwrap();
@ -743,7 +602,7 @@ pub(crate) mod tests {
#[test]
fn test_verify_gpio_operation() {
const NGPIO: u16 = 256;
let device = DummyDevice::new(NGPIO);
let device = MockGpioDevice::new(NGPIO);
let controller = GpioController::new(device).unwrap();
for gpio in 0..NGPIO {
@ -869,19 +728,19 @@ pub(crate) mod tests {
const NGPIO: u16 = 256;
// Get num lines failure
let error = Error::GpioOperationFailed("get-num-lines");
let mut device = DummyDevice::new(NGPIO);
let mut device = MockGpioDevice::new(NGPIO);
device.num_gpios_result = Err(error);
assert_eq!(GpioController::new(device).unwrap_err(), error);
// Get line name failure
let error = Error::GpioOperationFailed("get-line-name");
let mut device = DummyDevice::new(NGPIO);
let mut device = MockGpioDevice::new(NGPIO);
device.gpio_name_result = Err(error);
assert_eq!(GpioController::new(device).unwrap_err(), error);
// Get direction failure
let error = Error::GpioOperationFailed("get-direction");
let mut device = DummyDevice::new(NGPIO);
let mut device = MockGpioDevice::new(NGPIO);
device.direction_result = Err(error);
assert_eq!(GpioController::new(device).unwrap_err(), error);
}
@ -889,7 +748,7 @@ pub(crate) mod tests {
#[test]
fn test_gpio_set_direction_failure() {
const NGPIO: u16 = 256;
let device = DummyDevice::new(NGPIO);
let device = MockGpioDevice::new(NGPIO);
let controller = GpioController::new(device).unwrap();
for gpio in 0..NGPIO {
@ -919,7 +778,7 @@ pub(crate) mod tests {
#[test]
fn test_gpio_set_value_failure() {
const NGPIO: u16 = 256;
let device = DummyDevice::new(NGPIO);
let device = MockGpioDevice::new(NGPIO);
let controller = GpioController::new(device).unwrap();
for gpio in 0..NGPIO {
@ -937,7 +796,7 @@ pub(crate) mod tests {
#[test]
fn test_gpio_set_irq_type_failure() {
const NGPIO: u16 = 256;
let device = DummyDevice::new(NGPIO);
let device = MockGpioDevice::new(NGPIO);
let controller = GpioController::new(device).unwrap();
for gpio in 0..NGPIO {
@ -991,7 +850,7 @@ pub(crate) mod tests {
fn test_gpio_wait_for_interrupt_failure() {
const NGPIO: u16 = 256;
let err = Error::GpioIrqTypeInvalid(0);
let mut device = DummyDevice::new(NGPIO);
let mut device = MockGpioDevice::new(NGPIO);
device.wait_for_irq_result = Err(err);
@ -1005,7 +864,7 @@ pub(crate) mod tests {
#[test]
fn test_gpio_operation_failure() {
const NGPIO: u16 = 256;
let device = DummyDevice::new(NGPIO);
let device = MockGpioDevice::new(NGPIO);
let controller = GpioController::new(device).unwrap();
for gpio in 0..NGPIO {

View File

@ -9,11 +9,15 @@
mod backend;
#[cfg(target_env = "gnu")]
mod gpio;
#[cfg(all(target_env = "gnu", any(test, feature = "mock_gpio")))]
mod mock_gpio;
#[cfg(target_env = "gnu")]
mod vhu_gpio;
#[cfg(target_env = "gnu")]
mod virtio_gpio;
#[cfg(target_env = "gnu")]
fn main() -> backend::Result<()> {
fn main() {
backend::gpio_init()
}

View File

@ -0,0 +1,161 @@
// Mock GPIO backend device for testing
//
// Copyright 2023 Linaro Ltd. All Rights Reserved.
// Viresh Kumar <viresh.kumar@linaro.org>
//
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use log::info;
use std::sync::RwLock;
use crate::gpio::{Error, GpioDevice, GpioState, Result};
use crate::virtio_gpio::*;
#[derive(Debug)]
pub(crate) struct MockGpioDevice {
ngpio: u16,
pub(crate) gpio_names: Vec<String>,
state: RwLock<Vec<GpioState>>,
pub num_gpios_result: Result<u16>,
pub gpio_name_result: Result<String>,
pub direction_result: Result<u8>,
set_direction_result: Result<()>,
value_result: Result<u8>,
set_value_result: Result<()>,
set_irq_type_result: Result<()>,
pub(crate) wait_for_irq_result: Result<bool>,
}
impl MockGpioDevice {
pub(crate) fn new(ngpio: u16) -> Self {
let mut gpio_names = Vec::with_capacity(ngpio.into());
for i in 0..ngpio {
gpio_names.push(format!("dummy{}", i));
}
Self {
ngpio,
gpio_names,
state: RwLock::new(vec![
GpioState {
dir: VIRTIO_GPIO_DIRECTION_NONE,
val: None,
irq_type: VIRTIO_GPIO_IRQ_TYPE_NONE,
};
ngpio.into()
]),
num_gpios_result: Ok(0),
gpio_name_result: Ok("".to_string()),
direction_result: Ok(0),
set_direction_result: Ok(()),
value_result: Ok(0),
set_value_result: Ok(()),
set_irq_type_result: Ok(()),
wait_for_irq_result: Ok(true),
}
}
}
impl GpioDevice for MockGpioDevice {
fn open(ngpios: u32) -> Result<Self>
where
Self: Sized,
{
Ok(MockGpioDevice::new(ngpios.try_into().unwrap()))
}
fn num_gpios(&self) -> Result<u16> {
if self.num_gpios_result.is_err() {
return self.num_gpios_result;
}
Ok(self.ngpio)
}
fn gpio_name(&self, gpio: u16) -> Result<String> {
assert!((gpio as usize) < self.gpio_names.len());
if self.gpio_name_result.is_err() {
return self.gpio_name_result.clone();
}
Ok(self.gpio_names[gpio as usize].clone())
}
fn direction(&self, gpio: u16) -> Result<u8> {
if self.direction_result.is_err() {
return self.direction_result;
}
Ok(self.state.read().unwrap()[gpio as usize].dir)
}
fn set_direction(&self, gpio: u16, dir: u8, value: u32) -> Result<()> {
info!(
"gpio {} set direction to {}",
self.gpio_names[gpio as usize], dir
);
if self.set_direction_result.is_err() {
return self.set_direction_result;
}
self.state.write().unwrap()[gpio as usize].dir = dir;
self.state.write().unwrap()[gpio as usize].val = match dir {
VIRTIO_GPIO_DIRECTION_NONE => None,
VIRTIO_GPIO_DIRECTION_IN => self.state.read().unwrap()[gpio as usize].val,
VIRTIO_GPIO_DIRECTION_OUT => Some(value as u16),
_ => return Err(Error::GpioDirectionInvalid(dir as u32)),
};
Ok(())
}
fn value(&self, gpio: u16) -> Result<u8> {
if self.value_result.is_err() {
return self.value_result;
}
if let Some(val) = self.state.read().unwrap()[gpio as usize].val {
Ok(val as u8)
} else {
Err(Error::GpioCurrentValueInvalid)
}
}
fn set_value(&self, gpio: u16, value: u32) -> Result<()> {
info!(
"gpio {} set value to {}",
self.gpio_names[gpio as usize], value
);
if self.set_value_result.is_err() {
return self.set_value_result;
}
self.state.write().unwrap()[gpio as usize].val = Some(value as u16);
Ok(())
}
fn set_irq_type(&self, gpio: u16, value: u16) -> Result<()> {
info!(
"gpio {} set irq type to {}",
self.gpio_name(gpio).unwrap(),
value
);
if self.set_irq_type_result.is_err() {
return self.set_irq_type_result;
}
Ok(())
}
fn wait_for_interrupt(&self, _gpio: u16) -> Result<bool> {
if self.wait_for_irq_result.is_err() {
return self.wait_for_irq_result;
}
Ok(true)
}
}

View File

@ -30,7 +30,8 @@ use vm_memory::{
use vmm_sys_util::epoll::EventSet;
use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK};
use crate::gpio::{GpioController, GpioDevice, VIRTIO_GPIO_IRQ_TYPE_NONE};
use crate::gpio::{GpioController, GpioDevice};
use crate::virtio_gpio::VIRTIO_GPIO_IRQ_TYPE_NONE;
/// Possible values of the status field
const VIRTIO_GPIO_STATUS_OK: u8 = 0x0;
@ -423,7 +424,7 @@ impl<D: 'static + GpioDevice + Sync + Send> VhostUserBackendMut<VringRwLock, ()>
}
fn set_event_idx(&mut self, enabled: bool) {
dbg!(self.event_idx = enabled);
self.event_idx = enabled;
}
fn update_memory(&mut self, mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()> {
@ -505,9 +506,10 @@ mod tests {
use super::Error;
use super::*;
use crate::gpio::tests::DummyDevice;
use crate::gpio::Error as GpioError;
use crate::gpio::*;
use crate::mock_gpio::MockGpioDevice;
use crate::virtio_gpio::*;
// Prepares a single chain of descriptors for request queue
fn prepare_desc_chain<R: ByteValued>(
@ -629,7 +631,7 @@ mod tests {
}
// Validate descriptor chains after processing them, checks pass/failure of
// operation and the value of the buffers updated by the `DummyDevice`.
// operation and the value of the buffers updated by the `MockGpioDevice`.
fn validate_desc_chains(
desc_chains: Vec<GpioDescriptorChain>,
status: u8,
@ -656,7 +658,7 @@ mod tests {
fn test_gpio_process_requests_success() {
const NGPIO: u16 = 256;
const GPIO: u16 = 5;
let device = DummyDevice::new(NGPIO);
let device = MockGpioDevice::new(NGPIO);
let controller = GpioController::new(device).unwrap();
let backend = VhostUserGpioBackend::new(controller).unwrap();
let mem = GuestMemoryAtomic::new(
@ -707,7 +709,7 @@ mod tests {
fn test_gpio_process_requests_failure() {
const NGPIO: u16 = 256;
const GPIO: u16 = 5;
let device = DummyDevice::new(NGPIO);
let device = MockGpioDevice::new(NGPIO);
let controller = GpioController::new(device).unwrap();
let backend = VhostUserGpioBackend::new(controller).unwrap();
let mem = GuestMemoryAtomic::new(
@ -808,7 +810,7 @@ mod tests {
fn test_gpio_process_events_success() {
const NGPIO: u16 = 256;
const GPIO: u16 = 5;
let device = DummyDevice::new(NGPIO);
let device = MockGpioDevice::new(NGPIO);
let controller = GpioController::new(device).unwrap();
let mut backend = VhostUserGpioBackend::new(controller).unwrap();
let mem = GuestMemoryAtomic::new(
@ -862,7 +864,7 @@ mod tests {
fn test_gpio_process_events_multi_success() {
const NGPIO: u16 = 256;
const GPIO: u16 = 5;
let device = DummyDevice::new(NGPIO);
let device = MockGpioDevice::new(NGPIO);
let controller = GpioController::new(device).unwrap();
let mut backend = VhostUserGpioBackend::new(controller).unwrap();
let mem = GuestMemoryAtomic::new(
@ -951,7 +953,7 @@ mod tests {
fn test_gpio_process_events_failure() {
const NGPIO: u16 = 256;
let err = GpioError::GpioIrqTypeInvalid(0);
let mut device = DummyDevice::new(NGPIO);
let mut device = MockGpioDevice::new(NGPIO);
// This will make process-request fail later with
// VIRTIO_GPIO_IRQ_STATUS_INVALID error.
@ -1104,7 +1106,7 @@ mod tests {
// Controller adds '\0' for each line.
let names_size = std::mem::size_of_val(&gpio_names) + gpio_names.len();
let mut device = DummyDevice::new(NGPIO);
let mut device = MockGpioDevice::new(NGPIO);
device.gpio_names.clear();
device.gpio_names.append(&mut gpio_names);
let controller = GpioController::new(device).unwrap();

View File

@ -0,0 +1,33 @@
// VirtIO GPIO definitions
//
// Copyright 2023 Linaro Ltd. All Rights Reserved.
// Viresh Kumar <viresh.kumar@linaro.org>
//
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
/// Virtio specification definitions
/// Virtio GPIO request types
pub const VIRTIO_GPIO_MSG_GET_LINE_NAMES: u16 = 0x0001;
pub const VIRTIO_GPIO_MSG_GET_DIRECTION: u16 = 0x0002;
pub const VIRTIO_GPIO_MSG_SET_DIRECTION: u16 = 0x0003;
pub const VIRTIO_GPIO_MSG_GET_VALUE: u16 = 0x0004;
pub const VIRTIO_GPIO_MSG_SET_VALUE: u16 = 0x0005;
pub const VIRTIO_GPIO_MSG_IRQ_TYPE: u16 = 0x0006;
/// Direction types
pub const VIRTIO_GPIO_DIRECTION_NONE: u8 = 0x00;
pub const VIRTIO_GPIO_DIRECTION_OUT: u8 = 0x01;
pub const VIRTIO_GPIO_DIRECTION_IN: u8 = 0x02;
/// Virtio GPIO IRQ types
pub const VIRTIO_GPIO_IRQ_TYPE_NONE: u16 = 0x00;
pub const VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING: u16 = 0x01;
pub const VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING: u16 = 0x02;
pub const VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH: u16 =
VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING | VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING;
pub const VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH: u16 = 0x04;
pub const VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW: u16 = 0x08;
pub const VIRTIO_GPIO_IRQ_TYPE_ALL: u16 = VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH
| VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH
| VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW;

View File

@ -0,0 +1,15 @@
# Changelog
## [Unreleased]
### Added
### Changed
### Fixed
### Deprecated
## [0.1.0]
First release

View File

@ -11,19 +11,23 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
xen = ["vm-memory/xen", "vhost/xen", "vhost-user-backend/xen"]
[dependencies]
clap = { version = "4.1", features = ["derive"] }
clap = { version = "4.4", features = ["derive"] }
env_logger = "0.10"
libc = "0.2"
log = "0.4"
thiserror = "1.0"
vhost = { version = "0.6", features = ["vhost-user-slave"] }
vhost-user-backend = "0.8"
virtio-bindings = "0.2"
virtio-queue = "0.7"
vm-memory = "0.10"
vhost = { version = "0.8", features = ["vhost-user-slave"] }
vhost-user-backend = "0.10"
virtio-bindings = "0.2.1"
virtio-queue = "0.9"
vm-memory = "0.12"
vmm-sys-util = "0.11"
[dev-dependencies]
virtio-queue = { version = "0.7", features = ["test-utils"] }
vm-memory = { version = "0.10", features = ["backend-mmap", "backend-atomic"] }
assert_matches = "1.5"
virtio-queue = { version = "0.9", features = ["test-utils"] }
vm-memory = { version = "0.12", features = ["backend-mmap", "backend-atomic"] }

View File

@ -0,0 +1 @@
../../LICENSE-APACHE

View File

@ -0,0 +1 @@
../../LICENSE-BSD-3-Clause

View File

@ -183,7 +183,7 @@ impl SmbusMsg {
///
/// These smbus related functions try to reverse what Linux does, only
/// support basic modes (up to word transfer).
fn new(reqs: &mut [I2cReq]) -> Result<SmbusMsg> {
fn new(reqs: &[I2cReq]) -> Result<SmbusMsg> {
let mut data = I2cSmbusData {
block: [0; I2C_SMBUS_BLOCK_MAX + 2],
};

View File

@ -8,10 +8,11 @@
mod i2c;
mod vhu_i2c;
use log::{info, warn};
use log::{error, info, warn};
use std::num::ParseIntError;
use std::process::exit;
use std::sync::{Arc, RwLock};
use std::thread::spawn;
use std::thread::{spawn, JoinHandle};
use clap::Parser;
use thiserror::Error as ThisError;
@ -24,7 +25,7 @@ use vhu_i2c::VhostUserI2cBackend;
type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, PartialEq, ThisError)]
#[derive(Debug, ThisError)]
/// Errors related to low level i2c helpers
pub(crate) enum Error {
#[error("Invalid socket count: {0}")]
@ -41,6 +42,10 @@ pub(crate) enum Error {
ParseFailure(ParseIntError),
#[error("Failed to join threads")]
FailedJoiningThreads,
#[error("Could not create backend: {0}")]
CouldNotCreateBackend(vhu_i2c::Error),
#[error("Could not create daemon: {0}")]
CouldNotCreateDaemon(vhost_user_backend::Error),
}
#[derive(Parser, Debug)]
@ -182,7 +187,7 @@ fn start_backend<D: 'static + I2cDevice + Send + Sync>(args: I2cArgs) -> Result<
let socket = config.socket_path.to_owned() + &i.to_string();
let i2c_map = i2c_map.clone();
let handle = spawn(move || loop {
let handle: JoinHandle<Result<()>> = spawn(move || loop {
// A separate thread is spawned for each socket and can connect to a separate guest.
// These are run in an infinite loop to not require the daemon to be restarted once a
// guest exits.
@ -192,7 +197,7 @@ fn start_backend<D: 'static + I2cDevice + Send + Sync>(args: I2cArgs) -> Result<
// trouble to other threads/guests or the main() function and should be safe for the
// daemon.
let backend = Arc::new(RwLock::new(
VhostUserI2cBackend::new(i2c_map.clone()).unwrap(),
VhostUserI2cBackend::new(i2c_map.clone()).map_err(Error::CouldNotCreateBackend)?,
));
let listener = Listener::new(socket.clone(), true).unwrap();
@ -201,7 +206,7 @@ fn start_backend<D: 'static + I2cDevice + Send + Sync>(args: I2cArgs) -> Result<
backend.clone(),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.unwrap();
.map_err(Error::CouldNotCreateDaemon)?;
daemon.start(listener).unwrap();
@ -210,7 +215,7 @@ fn start_backend<D: 'static + I2cDevice + Send + Sync>(args: I2cArgs) -> Result<
info!("Stopping cleanly.");
}
Err(vhost_user_backend::Error::HandleRequest(
vhost_user::Error::PartialMessage,
vhost_user::Error::PartialMessage | vhost_user::Error::Disconnected,
)) => {
info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug.");
}
@ -227,20 +232,25 @@ fn start_backend<D: 'static + I2cDevice + Send + Sync>(args: I2cArgs) -> Result<
}
for handle in handles {
handle.join().map_err(|_| Error::FailedJoiningThreads)?;
handle.join().map_err(|_| Error::FailedJoiningThreads)??;
}
Ok(())
}
fn main() -> Result<()> {
fn main() {
env_logger::init();
start_backend::<PhysDevice>(I2cArgs::parse())
if let Err(e) = start_backend::<PhysDevice>(I2cArgs::parse()) {
error!("{e}");
exit(1);
}
}
#[cfg(test)]
mod tests {
use assert_matches::assert_matches;
use super::*;
use crate::i2c::tests::DummyDevice;
@ -277,12 +287,12 @@ mod tests {
config.push(5).unwrap();
config.push(6).unwrap();
assert_eq!(
assert_matches!(
config.push(invalid_addr).unwrap_err(),
Error::ClientAddressInvalid(invalid_addr)
Error::ClientAddressInvalid(a) if a == invalid_addr
);
assert_eq!(
assert_matches!(
config.push(5).unwrap_err(),
Error::ClientAddressDuplicate(5)
);
@ -294,21 +304,21 @@ mod tests {
// Invalid client address
let cmd_args = I2cArgs::from_args(socket_name, "1:4d", 5);
assert_eq!(
assert_matches!(
I2cConfiguration::try_from(cmd_args).unwrap_err(),
Error::ParseFailure("4d".parse::<u16>().unwrap_err())
Error::ParseFailure(e) if e == "4d".parse::<u16>().unwrap_err()
);
// Zero socket count
let cmd_args = I2cArgs::from_args(socket_name, "1:4", 0);
assert_eq!(
assert_matches!(
I2cConfiguration::try_from(cmd_args).unwrap_err(),
Error::SocketCountInvalid(0)
);
// Duplicate client address: 4
let cmd_args = I2cArgs::from_args(socket_name, "1:4,2:32:21,5:4:23", 5);
assert_eq!(
assert_matches!(
I2cConfiguration::try_from(cmd_args).unwrap_err(),
Error::ClientAddressDuplicate(4)
);
@ -351,7 +361,7 @@ mod tests {
.push(DeviceConfig::new_with(2, vec![32, 21]))
.unwrap();
assert_eq!(
assert_matches!(
config
.push(DeviceConfig::new_with(5, vec![4, 23]))
.unwrap_err(),
@ -368,11 +378,11 @@ mod tests {
.push(DeviceConfig::new_with(5, vec![10, 23]))
.unwrap();
assert_eq!(
assert_matches!(
config
.push(DeviceConfig::new_with(1, vec![32, 21]))
.unwrap_err(),
Error::AdapterDuplicate(1.to_string())
Error::AdapterDuplicate(n) if n == "1"
);
}
@ -382,7 +392,7 @@ mod tests {
let socket_name = "~/path/not/present/i2c";
let cmd_args = I2cArgs::from_args(socket_name, "1:4,3:5", 5);
assert_eq!(
assert_matches!(
start_backend::<DummyDevice>(cmd_args).unwrap_err(),
Error::FailedJoiningThreads
);

View File

@ -0,0 +1,17 @@
# Changelog
## [Unreleased]
### Added
- optional "mock_gpio" feature for testing
### Changed
### Fixed
### Deprecated
## [0.1.0]
First release

View File

@ -9,22 +9,26 @@ keywords = ["rng", "vhost", "virt", "backend"]
license = "Apache-2.0 OR BSD-3-Clause"
edition = "2021"
[features]
xen = ["vm-memory/xen", "vhost/xen", "vhost-user-backend/xen"]
[dependencies]
clap = { version = "4.1", features = ["derive"] }
clap = { version = "4.4", features = ["derive"] }
env_logger = "0.10"
epoll = "4.3"
libc = "0.2"
log = "0.4"
rand = "0.8.5"
tempfile = "3.4"
tempfile = "3.5"
thiserror = "1.0"
vhost = { version = "0.6", features = ["vhost-user-slave"] }
vhost-user-backend = "0.8"
virtio-bindings = "0.2"
virtio-queue = "0.7"
vm-memory = "0.10"
vhost = { version = "0.8", features = ["vhost-user-slave"] }
vhost-user-backend = "0.10"
virtio-bindings = "0.2.1"
virtio-queue = "0.9"
vm-memory = "0.12"
vmm-sys-util = "0.11"
[dev-dependencies]
virtio-queue = { version = "0.7", features = ["test-utils"] }
vm-memory = { version = "0.10", features = ["backend-mmap", "backend-atomic"] }
assert_matches = "1.5"
virtio-queue = { version = "0.9", features = ["test-utils"] }
vm-memory = { version = "0.12", features = ["backend-mmap", "backend-atomic"] }

View File

@ -0,0 +1 @@
../../LICENSE-APACHE

View File

@ -0,0 +1 @@
../../LICENSE-BSD-3-Clause

View File

@ -5,10 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
mod vhu_rng;
use log::{info, warn};
use log::{error, info, warn};
use std::fs::File;
use std::process::exit;
use std::sync::{Arc, Mutex, RwLock};
use std::thread;
use std::thread::{self, JoinHandle};
use clap::Parser;
use thiserror::Error as ThisError;
@ -24,7 +25,7 @@ const VHU_RNG_MAX_PERIOD_MS: u128 = 65536;
type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Eq, PartialEq, ThisError)]
#[derive(Debug, ThisError)]
/// Errors related to vhost-device-rng daemon.
pub(crate) enum Error {
#[error("RNG source file doesn't exists or can't be accessed")]
@ -35,6 +36,10 @@ pub(crate) enum Error {
InvalidSocketCount(u32),
#[error("Threads can't be joined")]
FailedJoiningThreads,
#[error("Could not create backend: {0}")]
CouldNotCreateBackend(std::io::Error),
#[error("Could not create daemon: {0}")]
CouldNotCreateDaemon(vhost_user_backend::Error),
}
#[derive(Clone, Parser, Debug, PartialEq)]
@ -109,13 +114,14 @@ pub(crate) fn start_backend(config: VuRngConfig) -> Result<()> {
let max_bytes = config.max_bytes;
let random = Arc::clone(&random_file);
let handle = thread::spawn(move || loop {
let handle: JoinHandle<Result<()>> = thread::spawn(move || loop {
// If creating the VuRngBackend isn't successull there isn't much else to do than
// killing the thread, which .unwrap() does. When that happens an error code is
// generated and displayed by the runtime mechanic. Killing a thread doesn't affect
// the other threads spun-off by the daemon.
let vu_rng_backend = Arc::new(RwLock::new(
VuRngBackend::new(random.clone(), period_ms, max_bytes).unwrap(),
VuRngBackend::new(random.clone(), period_ms, max_bytes)
.map_err(Error::CouldNotCreateBackend)?,
));
let mut daemon = VhostUserDaemon::new(
@ -123,7 +129,7 @@ pub(crate) fn start_backend(config: VuRngConfig) -> Result<()> {
Arc::clone(&vu_rng_backend),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.unwrap();
.map_err(Error::CouldNotCreateDaemon)?;
let listener = Listener::new(socket.clone(), true).unwrap();
daemon.start(listener).unwrap();
@ -133,7 +139,7 @@ pub(crate) fn start_backend(config: VuRngConfig) -> Result<()> {
info!("Stopping cleanly.");
}
Err(vhost_user_backend::Error::HandleRequest(
vhost_user::Error::PartialMessage,
vhost_user::Error::PartialMessage | vhost_user::Error::Disconnected,
)) => {
info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug.");
}
@ -155,23 +161,28 @@ pub(crate) fn start_backend(config: VuRngConfig) -> Result<()> {
}
for handle in handles {
handle.join().map_err(|_| Error::FailedJoiningThreads)?;
handle.join().map_err(|_| Error::FailedJoiningThreads)??;
}
Ok(())
}
fn main() -> Result<()> {
fn main() {
env_logger::init();
start_backend(VuRngConfig::try_from(RngArgs::parse()).unwrap())
if let Err(e) = VuRngConfig::try_from(RngArgs::parse()).and_then(start_backend) {
error!("{e}");
exit(1);
}
}
#[cfg(test)]
mod tests {
use super::*;
use assert_matches::assert_matches;
use tempfile::tempdir;
use super::*;
#[test]
fn verify_cmd_line_arguments() {
// All parameters have default values, except for the socket path. White spaces are
@ -190,22 +201,22 @@ mod tests {
// All configuration elements should be what we expect them to be. Using
// VuRngConfig::try_from() ensures that strings have been properly trimmed.
assert_eq!(
VuRngConfig::try_from(default_args),
VuRngConfig::try_from(args.clone())
VuRngConfig::try_from(default_args).unwrap(),
VuRngConfig::try_from(args.clone()).unwrap()
);
// Setting a invalid period should trigger an InvalidPeriodInput error.
let mut invalid_period_args = args.clone();
invalid_period_args.period = VHU_RNG_MAX_PERIOD_MS + 1;
assert_eq!(
assert_matches!(
VuRngConfig::try_from(invalid_period_args),
Err(Error::InvalidPeriodInput(VHU_RNG_MAX_PERIOD_MS + 1))
Err(Error::InvalidPeriodInput(p)) if p == VHU_RNG_MAX_PERIOD_MS + 1
);
// Setting the socket count to 0 should trigger an InvalidSocketCount error.
let mut invalid_socket_count_args = args;
invalid_socket_count_args.socket_count = 0;
assert_eq!(
assert_matches!(
VuRngConfig::try_from(invalid_socket_count_args),
Err(Error::InvalidSocketCount(0))
);
@ -226,7 +237,7 @@ mod tests {
};
// An invalid RNG source file should trigger an AccessRngSourceFile error.
assert_eq!(
assert_matches!(
start_backend(config.clone()).unwrap_err(),
Error::AccessRngSourceFile
);
@ -235,7 +246,7 @@ mod tests {
// of the socket file. Since the latter is invalid the vhost_user::Listener will
// throw an error, forcing the thread to exit and the call to handle.join() to fail.
config.rng_source = random_path.to_str().unwrap().to_string();
assert_eq!(
assert_matches!(
start_backend(config).unwrap_err(),
Error::FailedJoiningThreads
);

View File

@ -124,7 +124,7 @@ impl<T: Read> VuRngBackend<T> {
let descriptor = descriptors[0];
let mut to_read = descriptor.len() as usize;
let mut timer = &mut self.timer;
let timer = &mut self.timer;
if !descriptor.is_write_only() {
return Err(VuRngError::UnexpectedReadDescriptor);

View File

@ -0,0 +1,15 @@
# Changelog
## [Unreleased]
### Added
### Changed
### Fixed
### Deprecated
## [0.1.0]
First release

View File

@ -0,0 +1,27 @@
[package]
name = "vhost-device-scmi"
version = "0.1.0"
authors = ["Milan Zamazal <mzamazal@redhat.com>"]
description = "vhost-user SCMI backend device"
repository = "https://github.com/rust-vmm/vhost-device"
readme = "README.md"
keywords = ["scmi", "vhost", "virt", "backend"]
license = "Apache-2.0 OR BSD-3-Clause"
edition = "2021"
[dependencies]
clap = { version = "4.4", features = ["derive"] }
env_logger = "0.10"
itertools = "0.11"
log = "0.4"
thiserror = "1.0"
vhost = { version = "0.8", features = ["vhost-user-slave"] }
vhost-user-backend = "0.10"
virtio-bindings = "0.2"
virtio-queue = "0.9"
vm-memory = "0.12"
vmm-sys-util = "0.11"
[dev-dependencies]
assert_matches = "1.5"
virtio-queue = { version = "0.9", features = ["test-utils"] }

View File

@ -0,0 +1 @@
../../LICENSE-APACHE

View File

@ -0,0 +1 @@
../../LICENSE-BSD-3-Clause

View File

@ -0,0 +1,104 @@
# vhost-device-scmi
This program is a vhost-user backend for a VirtIO SCMI device.
It provides SCMI access to various entities on the host; not
necessarily only those providing an SCMI interface themselves.
It is tested with QEMU's `-device vhost-user-scmi-pci` but should work
with any virtual machine monitor (VMM) that supports vhost-user. See
the Examples section below.
## Synopsis
**vhost-device-scmi** [*OPTIONS*]
## Options
.. program:: vhost-device-scmi
.. option:: -h, --help
Print help.
.. option:: -s, --socket-path=PATH
Location of the vhost-user Unix domain sockets.
.. option:: -d, --device=SPEC
SCMI device specification in the format `ID,PROPERTY=VALUE,...`.
For example: `-d iio,path=/sys/bus/iio/devices/iio:device0,channel=in_accel`.
Can be used multiple times for multiple exposed devices.
If no device is specified then no device will be provided to the
guest OS but VirtIO SCMI will be still available there.
Use `--help-devices` to list help on all the available devices.
You can set `RUST_LOG` environment variable to `debug` to get maximum
messages on the standard error output.
## Examples
The daemon should be started first:
::
host# vhost-device-scmi --socket-path=scmi.sock --device fake,name=foo
The QEMU invocation needs to create a chardev socket the device can
use to communicate as well as share the guests memory over a memfd:
::
host# qemu-system \
-chardev socket,path=scmi.sock,id=scmi \
-device vhost-user-scmi-pci,chardev=vscmi,id=scmi \
-machine YOUR-MACHINE-OPTIONS,memory-backend=mem \
-m 4096 \
-object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on \
...
## Supported SCMI protocols
The currently supported SCMI protocols are:
- base
- sensor management
Basically only the mandatory and necessary parts of the protocols are
implemented.
See source code (`vhost-device-scmi` crate) documentation for details and how to
add more protocols, host device bindings or other functionality.
## Testing
SCMI is supported only on Arm in Linux. This restriction doesn't
apply to the host, which can be any architecture as long as the guest
is Arm.
The easiest way to test it on the guest side is using the Linux SCMI
Industrial I/O driver there. If an 3-axes accelerometer or gyroscope
VirtIO SCMI device is present and the guest kernel is compiled with
`CONFIG_IIO_SCMI` enabled then the device should be available in
`/sys/bus/iio/devices/`. The vhost-device-scmi fake device is
suitable for this.
Of course, other means of accessing SCMI devices can be used too. The
following Linux kernel command line can be useful to obtain SCMI trace
information, in addition to SCMI related messages in dmesg:
`trace_event=scmi:* ftrace=function ftrace_filter=scmi*`.
### Kernel support for testing
`kernel` subdirectory contains
[instructions](kernel/iio-dummy/README.md) how to create emulated
industrial I/O devices for testing.
## License
This project is licensed under either of
- [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0
- [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause)
unless specified in particular files otherwise.

View File

@ -0,0 +1,7 @@
*.cmd
*.ko
*.mod
*.mod.[co]
*.o
Module.symvers
modules.order

View File

@ -0,0 +1,19 @@
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the IIO Dummy Driver
#
# Modified by Milan Zamazal <mzamazal@redhat.com> in 2023 for out of
# tree compilation.
#
obj-m += iio_modified_dummy.o
on_nixos = $(wildcard /etc/NIXOS)
ifeq ($(on_nixos), /etc/NIXOS)
nix_prefix = $(shell nix-build -E '(import <nixpkgs> {}).linux.dev' --no-out-link)
endif
all:
make -C $(nix_prefix)/lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C $(nix_prefix)/lib/modules/$(shell uname -r)/build M=$(PWD) clean

View File

@ -0,0 +1,185 @@
# Using emulated industrial I/O devices
This is a modified version of the Linux [industrial I/O dummy
driver](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/iio/dummy).
Both the original driver and this modification can provide emulated
industrial I/O devices for testing vhost-device-scmi.
## Modifications in this module
If the stock industrial I/O dummy driver is enough for you, use it
(but you may still want to read the instructions below).
Otherwise, this alternative is provided with the following changes:
- Simplified Makefile for out of tree compilation.
- The accelerometer has three axes instead of just one.
- The Y axis of the accelerometer has offset and scale.
Of course, you can modified it further for your liking if needed.
## How to create emulated industrial I/O devices
Make sure your kernel supports software industrial I/O devices and
industrial I/O with configfs. You can check this by running `modprobe
industrialio_sw_device && modprobe industrialio_configfs`. If any of
the modules is not present, follow the [instructions for recompiling
kernel](#recompiling-kernel-with-industrial-io) below.
Make sure you have the right kernel version. Since Linux 5.19, the
dummy industrial I/O driver is broken. This will be probably fixed in
Linux 6.6.
If you have a broken kernel version, apply the
[fix](./iio-dummy-fix.patch) and compile and install the modified
kernel.
If you want to use the modified module from here, compile it. In
order to do this, you must have kernel development environment
installed, for example:
- Fedora or derivatives: `dnf install kernel-devel kernel-modules make`
- Debian or derivatives: `apt install linux-headers-$(uname -r) make`
- NixOS: `nix-shell '<nixpkgs>' -A linux.dev`
Then you can compile the module, simply running `make` should work on
most distributions.
Insert a dummy industrial I/O kernel module. Either the stock one:
```
# modprobe iio-dummy
```
or the modified one from here:
```
# modprobe industrialio
# modprobe industrialio_configfs
# modprobe industrialio_sw_device
# insmod ./iio-dummy-modified.ko
```
Find out where configfs is mounted: `mount | grep configfs`. It's
typically `/sys/kernel/config`. If configfs is not mounted, mount it
somewhere: `mount -t configfs none MOUNTPOINT`.
Now you can create emulated industrial I/O devices with the stock driver:
```
# mkdir /sys/kernel/config/iio/devices/dummy/my-device
```
And/or with the modified driver from here:
```
# mkdir /sys/kernel/config/iio/devices/dummy-modified/my-device
```
If everything is OK then you can find the device in
`/sys/bus/iio/devices/`.
## Recompiling kernel with industrial I/O
Making a custom kernel is different on each GNU/Linux distribution.
The corresponding documentation can be found for example here:
- Fedora: [https://fedoraproject.org/wiki/Building_a_custom_kernel](https://fedoraproject.org/wiki/Building_a_custom_kernel)
- CentOS Stream: [https://wiki.centos.org/HowTos/BuildingKernelModules](https://wiki.centos.org/HowTos/BuildingKernelModules)
(looks more useful for Fedora builds than CentOS)
- Debian: [https://kernel-team.pages.debian.net/kernel-handbook/ch-common-tasks.html#s-common-official](https://kernel-team.pages.debian.net/kernel-handbook/ch-common-tasks.html#s-common-official)
- NixOS: [https://nixos.wiki/wiki/Linux_kernel](https://nixos.wiki/wiki/Linux_kernel)
Here are instructions for Fedora, similar steps can be used for other
distributions, with distribution specifics as described in the links
above. This is not necessarily the most official or the best way to
do it but it's a way that *actually works* for me.
Note on CentOS Stream 9: The kernel there doesn't contain the needed
modules. Recompiling the kernel on CentOS Stream may be challenging
due to missing build dependencies. If it doesn't work for you, you
can try to use Fedora kernel and modules on CentOS Stream, including
the dummy module compiled on Fedora.
### Install kernel sources
```
# dnf install 'dnf-command(download)'
$ dnf download --source kernel
$ rpm -i kernel-*.src.rpm
# dnf builddep ~/rpmbuild/SPECS/kernel.spec
```
### Change kernel configuration
Not needed for current Fedora but may be needed for e.g. CentOS Stream.
```
# dnf install kernel-devel kernel-modules make rpm-build python3-devel ncurses-devel
$ rpmbuild -bp ~/rpmbuild/SPECS/kernel.spec
$ cd ~/rpmbuild/BUILD/kernel-*/linux-*/
$ cp configs/kernel-VERSION-YOURARCH.config .config
$ make nconfig
```
Configuration options that must be enabled:
- Device Drivers -> Industrial I/O Support -> Enable IIO configuration via configfs
- Device Drivers -> Industrial I/O Support -> Enable software IIO device support
Optionally (you can use the alternative driver from here instead):
- Device Drivers -> Industrial I/O Support -> IIO dummy drive -> An example driver with no hardware requirements
Then copy `.config` back to its original file and don't forget to add
the original architecture specification line there.
### Apply the kernel fix
If the kernel fix from here is needed, copy it to the sources:
```
cp .../iio-dummy-fix.patch ~/rpmbuild/SOURCES/
```
Edit `~/rpmbuild/SPECS/kernel.spec`:
- Uncomment: `%define buildid .local`.
- Add the patch file before: `Patch999999: linux-kernel-test.patch`.
- Add the patch file before: `ApplyOptionalPatch linux-kernel-test.patch`.
### Build the kernel
You can use different options, if you don't need anything extra then
the following builds the most important rpm's:
```
$ rpmbuild -bb --with baseonly --without debug --without debuginfo ~/rpmbuild/SPECS/kernel.spec
```
## Adding industrial I/O dummy module to your kernel
If all you need is to add a missing stock I/O dummy module, you can
try to compile just the module. Switch to kernel sources and run:
```
$ make oldconfig
$ make prepare
$ make modules_prepare
$ make M=drivers/iio/dummy
```
And insert the module:
```
# modprobe industrialio
# modprobe industrialio_configfs
# modprobe industrialio_sw_device
# insmod ./drivers/iio/dummy/iio-dummy.ko
```
If this fails, inspect `dmesg` output and try to figure out what's
wrong. If this fails too, rebuild the whole kernel with the given
module enabled.

View File

@ -0,0 +1,55 @@
Commit 813665564b3d ("iio: core: Convert to use firmware node handle
instead of OF node") switched the kind of nodes to use for label
retrieval in device registration. Probably an unwanted change in that
commit was that if the device has no parent then NULL pointer is
accessed. This is what happens in the stock IIO dummy driver when a
new entry is created in configfs:
# mkdir /sys/kernel/config/iio/devices/dummy/foo
BUG: kernel NULL pointer dereference, address: ...
...
Call Trace:
__iio_device_register
iio_dummy_probe
Since there seems to be no reason to make a parent device of an IIO
dummy device mandatory, lets prevent the invalid memory access in
__iio_device_register when the parent device is NULL. With this
change, the IIO dummy driver works fine with configfs.
Fixes: 813665564b3d ("iio: core: Convert to use firmware node handle instead of OF node")
Reviewed-by: Andy Shevchenko <andriy.shevchenko-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Signed-off-by: Milan Zamazal <mzamazal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
drivers/iio/industrialio-core.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index c117f50d0cf3..adcba832e6fa 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1888,7 +1888,7 @@ static const struct iio_buffer_setup_ops noop_ring_setup_ops;
int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod)
{
struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
- struct fwnode_handle *fwnode;
+ struct fwnode_handle *fwnode = NULL;
int ret;
if (!indio_dev->info)
@@ -1899,7 +1899,8 @@ int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod)
/* If the calling driver did not initialize firmware node, do it here */
if (dev_fwnode(&indio_dev->dev))
fwnode = dev_fwnode(&indio_dev->dev);
- else
+ /* The default dummy IIO device has no parent */
+ else if (indio_dev->dev.parent)
fwnode = dev_fwnode(indio_dev->dev.parent);
device_set_node(&indio_dev->dev, fwnode);
--
2.40.1

View File

@ -0,0 +1,706 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2011 Jonathan Cameron
*
* A reference industrial I/O driver to illustrate the functionality available.
*
* There are numerous real drivers to illustrate the finer points.
* The purpose of this driver is to provide a driver with far more comments
* and explanatory notes than any 'real' driver would have.
* Anyone starting out writing an IIO driver should first make sure they
* understand all of this driver except those bits specifically marked
* as being present to allow us to 'fake' the presence of hardware.
*
* Changes by Milan Zamazal <mzamazal@redhat.com> 2023, for testing
* with vhost-device-scmi:
*
* - Dropped conditional parts.
* - Use 3 axes in the accelerometer device.
* - Define offset and scale for some of the accelerometer axes.
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/iio/iio.h>
#include <linux/iio/sysfs.h>
#include <linux/iio/events.h>
#include <linux/iio/buffer.h>
#include <linux/iio/sw_device.h>
#include "iio_modified_dummy.h"
static const struct config_item_type iio_dummy_type = {
.ct_owner = THIS_MODULE,
};
/**
* struct iio_dummy_accel_calibscale - realworld to register mapping
* @val: first value in read_raw - here integer part.
* @val2: second value in read_raw etc - here micro part.
* @regval: register value - magic device specific numbers.
*/
struct iio_dummy_accel_calibscale {
int val;
int val2;
int regval; /* what would be written to hardware */
};
static const struct iio_dummy_accel_calibscale dummy_scales[] = {
{ 0, 100, 0x8 }, /* 0.000100 */
{ 0, 133, 0x7 }, /* 0.000133 */
{ 733, 13, 0x9 }, /* 733.000013 */
};
/*
* iio_dummy_channels - Description of available channels
*
* This array of structures tells the IIO core about what the device
* actually provides for a given channel.
*/
static const struct iio_chan_spec iio_dummy_channels[] = {
/* indexed ADC channel in_voltage0_raw etc */
{
.type = IIO_VOLTAGE,
/* Channel has a numeric index of 0 */
.indexed = 1,
.channel = 0,
/* What other information is available? */
.info_mask_separate =
/*
* in_voltage0_raw
* Raw (unscaled no bias removal etc) measurement
* from the device.
*/
BIT(IIO_CHAN_INFO_RAW) |
/*
* in_voltage0_offset
* Offset for userspace to apply prior to scale
* when converting to standard units (microvolts)
*/
BIT(IIO_CHAN_INFO_OFFSET) |
/*
* in_voltage0_scale
* Multipler for userspace to apply post offset
* when converting to standard units (microvolts)
*/
BIT(IIO_CHAN_INFO_SCALE),
/*
* sampling_frequency
* The frequency in Hz at which the channels are sampled
*/
.info_mask_shared_by_dir = BIT(IIO_CHAN_INFO_SAMP_FREQ),
/* The ordering of elements in the buffer via an enum */
.scan_index = DUMMY_INDEX_VOLTAGE_0,
.scan_type = { /* Description of storage in buffer */
.sign = 'u', /* unsigned */
.realbits = 13, /* 13 bits */
.storagebits = 16, /* 16 bits used for storage */
.shift = 0, /* zero shift */
},
},
/* Differential ADC channel in_voltage1-voltage2_raw etc*/
{
.type = IIO_VOLTAGE,
.differential = 1,
/*
* Indexing for differential channels uses channel
* for the positive part, channel2 for the negative.
*/
.indexed = 1,
.channel = 1,
.channel2 = 2,
/*
* in_voltage1-voltage2_raw
* Raw (unscaled no bias removal etc) measurement
* from the device.
*/
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
/*
* in_voltage-voltage_scale
* Shared version of scale - shared by differential
* input channels of type IIO_VOLTAGE.
*/
.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
/*
* sampling_frequency
* The frequency in Hz at which the channels are sampled
*/
.scan_index = DUMMY_INDEX_DIFFVOLTAGE_1M2,
.scan_type = { /* Description of storage in buffer */
.sign = 's', /* signed */
.realbits = 12, /* 12 bits */
.storagebits = 16, /* 16 bits used for storage */
.shift = 0, /* zero shift */
},
},
/* Differential ADC channel in_voltage3-voltage4_raw etc*/
{
.type = IIO_VOLTAGE,
.differential = 1,
.indexed = 1,
.channel = 3,
.channel2 = 4,
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
.info_mask_shared_by_dir = BIT(IIO_CHAN_INFO_SAMP_FREQ),
.scan_index = DUMMY_INDEX_DIFFVOLTAGE_3M4,
.scan_type = {
.sign = 's',
.realbits = 11,
.storagebits = 16,
.shift = 0,
},
},
/*
* 'modified' (i.e. axis specified) acceleration channel
* in_accel_[xyz]_raw
*/
{
.type = IIO_ACCEL,
.modified = 1,
/* Channel 2 is use for modifiers */
.channel2 = IIO_MOD_X,
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
/*
* Internal bias and gain correction values. Applied
* by the hardware or driver prior to userspace
* seeing the readings. Typically part of hardware
* calibration.
*/
BIT(IIO_CHAN_INFO_CALIBSCALE) |
BIT(IIO_CHAN_INFO_CALIBBIAS),
.info_mask_shared_by_dir = BIT(IIO_CHAN_INFO_SAMP_FREQ),
.scan_index = DUMMY_INDEX_ACCEL_X,
.scan_type = { /* Description of storage in buffer */
.sign = 's', /* signed */
.realbits = 16, /* 16 bits */
.storagebits = 16, /* 16 bits used for storage */
.shift = 0, /* zero shift */
},
},
{
.type = IIO_ACCEL,
.modified = 1,
/* Channel 2 is use for modifiers */
.channel2 = IIO_MOD_Y,
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
BIT(IIO_CHAN_INFO_RAW) |
BIT(IIO_CHAN_INFO_OFFSET) |
BIT(IIO_CHAN_INFO_SCALE) |
BIT(IIO_CHAN_INFO_CALIBSCALE) |
BIT(IIO_CHAN_INFO_CALIBBIAS),
.info_mask_shared_by_dir = BIT(IIO_CHAN_INFO_SAMP_FREQ),
.scan_index = DUMMY_INDEX_ACCEL_Y,
.scan_type = { /* Description of storage in buffer */
.sign = 's', /* signed */
.realbits = 16, /* 16 bits */
.storagebits = 16, /* 16 bits used for storage */
.shift = 0, /* zero shift */
},
},
{
.type = IIO_ACCEL,
.modified = 1,
/* Channel 2 is use for modifiers */
.channel2 = IIO_MOD_Z,
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
BIT(IIO_CHAN_INFO_CALIBSCALE) |
BIT(IIO_CHAN_INFO_CALIBBIAS),
.info_mask_shared_by_dir = BIT(IIO_CHAN_INFO_SAMP_FREQ),
.scan_index = DUMMY_INDEX_ACCEL_Z,
.scan_type = { /* Description of storage in buffer */
.sign = 's', /* signed */
.realbits = 16, /* 16 bits */
.storagebits = 16, /* 16 bits used for storage */
.shift = 0, /* zero shift */
},
},
/*
* Convenience macro for timestamps. 4 is the index in
* the buffer.
*/
IIO_CHAN_SOFT_TIMESTAMP(4),
/* DAC channel out_voltage0_raw */
{
.type = IIO_VOLTAGE,
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
.scan_index = -1, /* No buffer support */
.output = 1,
.indexed = 1,
.channel = 0,
},
{
.type = IIO_STEPS,
.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_ENABLE) |
BIT(IIO_CHAN_INFO_CALIBHEIGHT),
.info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
.scan_index = -1, /* No buffer support */
},
{
.type = IIO_ACTIVITY,
.modified = 1,
.channel2 = IIO_MOD_RUNNING,
.info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
.scan_index = -1, /* No buffer support */
},
{
.type = IIO_ACTIVITY,
.modified = 1,
.channel2 = IIO_MOD_WALKING,
.info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
.scan_index = -1, /* No buffer support */
},
};
/**
* iio_dummy_read_raw() - data read function.
* @indio_dev: the struct iio_dev associated with this device instance
* @chan: the channel whose data is to be read
* @val: first element of returned value (typically INT)
* @val2: second element of returned value (typically MICRO)
* @mask: what we actually want to read as per the info_mask_*
* in iio_chan_spec.
*/
static int iio_dummy_read_raw(struct iio_dev *indio_dev,
struct iio_chan_spec const *chan,
int *val,
int *val2,
long mask)
{
struct iio_dummy_state *st = iio_priv(indio_dev);
int ret = -EINVAL;
mutex_lock(&st->lock);
switch (mask) {
case IIO_CHAN_INFO_RAW: /* magic value - channel value read */
switch (chan->type) {
case IIO_VOLTAGE:
if (chan->output) {
/* Set integer part to cached value */
*val = st->dac_val;
ret = IIO_VAL_INT;
} else if (chan->differential) {
if (chan->channel == 1)
*val = st->differential_adc_val[0];
else
*val = st->differential_adc_val[1];
ret = IIO_VAL_INT;
} else {
*val = st->single_ended_adc_val;
ret = IIO_VAL_INT;
}
break;
case IIO_ACCEL:
switch(chan->scan_index) {
case DUMMY_INDEX_ACCEL_X:
*val = st->accel_val[0];
break;
case DUMMY_INDEX_ACCEL_Y:
*val = st->accel_val[1];
break;
case DUMMY_INDEX_ACCEL_Z:
*val = st->accel_val[2];
break;
default:
*val = 0;
}
ret = IIO_VAL_INT;
break;
default:
break;
}
break;
case IIO_CHAN_INFO_PROCESSED:
switch (chan->type) {
case IIO_STEPS:
*val = st->steps;
ret = IIO_VAL_INT;
break;
case IIO_ACTIVITY:
switch (chan->channel2) {
case IIO_MOD_RUNNING:
*val = st->activity_running;
ret = IIO_VAL_INT;
break;
case IIO_MOD_WALKING:
*val = st->activity_walking;
ret = IIO_VAL_INT;
break;
default:
break;
}
break;
default:
break;
}
break;
case IIO_CHAN_INFO_OFFSET:
/* only single ended adc -> 7 */
*val = 7;
ret = IIO_VAL_INT;
break;
case IIO_CHAN_INFO_SCALE:
switch (chan->type) {
case IIO_VOLTAGE:
switch (chan->differential) {
case 0:
/* only single ended adc -> 0.001333 */
*val = 0;
*val2 = 1333;
ret = IIO_VAL_INT_PLUS_MICRO;
break;
case 1:
/* all differential adc -> 0.000001344 */
*val = 0;
*val2 = 1344;
ret = IIO_VAL_INT_PLUS_NANO;
}
break;
case IIO_ACCEL:
switch(chan->scan_index) {
case DUMMY_INDEX_ACCEL_Y:
*val = 0;
*val2 = 1344;
break;
}
ret = IIO_VAL_INT_PLUS_MICRO;
break;
default:
break;
}
break;
case IIO_CHAN_INFO_CALIBBIAS:
/* only the acceleration axis - read from cache */
*val = st->accel_calibbias;
ret = IIO_VAL_INT;
break;
case IIO_CHAN_INFO_CALIBSCALE:
*val = st->accel_calibscale->val;
*val2 = st->accel_calibscale->val2;
ret = IIO_VAL_INT_PLUS_MICRO;
break;
case IIO_CHAN_INFO_SAMP_FREQ:
*val = 3;
*val2 = 33;
ret = IIO_VAL_INT_PLUS_NANO;
break;
case IIO_CHAN_INFO_ENABLE:
switch (chan->type) {
case IIO_STEPS:
*val = st->steps_enabled;
ret = IIO_VAL_INT;
break;
default:
break;
}
break;
case IIO_CHAN_INFO_CALIBHEIGHT:
switch (chan->type) {
case IIO_STEPS:
*val = st->height;
ret = IIO_VAL_INT;
break;
default:
break;
}
break;
default:
break;
}
mutex_unlock(&st->lock);
return ret;
}
/**
* iio_dummy_write_raw() - data write function.
* @indio_dev: the struct iio_dev associated with this device instance
* @chan: the channel whose data is to be written
* @val: first element of value to set (typically INT)
* @val2: second element of value to set (typically MICRO)
* @mask: what we actually want to write as per the info_mask_*
* in iio_chan_spec.
*
* Note that all raw writes are assumed IIO_VAL_INT and info mask elements
* are assumed to be IIO_INT_PLUS_MICRO unless the callback write_raw_get_fmt
* in struct iio_info is provided by the driver.
*/
static int iio_dummy_write_raw(struct iio_dev *indio_dev,
struct iio_chan_spec const *chan,
int val,
int val2,
long mask)
{
int i;
int ret = 0;
struct iio_dummy_state *st = iio_priv(indio_dev);
switch (mask) {
case IIO_CHAN_INFO_RAW:
switch (chan->type) {
case IIO_VOLTAGE:
if (chan->output == 0)
return -EINVAL;
/* Locking not required as writing single value */
mutex_lock(&st->lock);
st->dac_val = val;
mutex_unlock(&st->lock);
return 0;
default:
return -EINVAL;
}
case IIO_CHAN_INFO_PROCESSED:
switch (chan->type) {
case IIO_STEPS:
mutex_lock(&st->lock);
st->steps = val;
mutex_unlock(&st->lock);
return 0;
case IIO_ACTIVITY:
if (val < 0)
val = 0;
if (val > 100)
val = 100;
switch (chan->channel2) {
case IIO_MOD_RUNNING:
st->activity_running = val;
return 0;
case IIO_MOD_WALKING:
st->activity_walking = val;
return 0;
default:
return -EINVAL;
}
break;
default:
return -EINVAL;
}
case IIO_CHAN_INFO_CALIBSCALE:
mutex_lock(&st->lock);
/* Compare against table - hard matching here */
for (i = 0; i < ARRAY_SIZE(dummy_scales); i++)
if (val == dummy_scales[i].val &&
val2 == dummy_scales[i].val2)
break;
if (i == ARRAY_SIZE(dummy_scales))
ret = -EINVAL;
else
st->accel_calibscale = &dummy_scales[i];
mutex_unlock(&st->lock);
return ret;
case IIO_CHAN_INFO_CALIBBIAS:
mutex_lock(&st->lock);
st->accel_calibbias = val;
mutex_unlock(&st->lock);
return 0;
case IIO_CHAN_INFO_ENABLE:
switch (chan->type) {
case IIO_STEPS:
mutex_lock(&st->lock);
st->steps_enabled = val;
mutex_unlock(&st->lock);
return 0;
default:
return -EINVAL;
}
case IIO_CHAN_INFO_CALIBHEIGHT:
switch (chan->type) {
case IIO_STEPS:
st->height = val;
return 0;
default:
return -EINVAL;
}
default:
return -EINVAL;
}
}
/*
* Device type specific information.
*/
static const struct iio_info iio_dummy_info = {
.read_raw = &iio_dummy_read_raw,
.write_raw = &iio_dummy_write_raw,
};
/**
* iio_dummy_init_device() - device instance specific init
* @indio_dev: the iio device structure
*
* Most drivers have one of these to set up default values,
* reset the device to known state etc.
*/
static int iio_dummy_init_device(struct iio_dev *indio_dev)
{
struct iio_dummy_state *st = iio_priv(indio_dev);
st->dac_val = 0;
st->single_ended_adc_val = 73;
st->differential_adc_val[0] = 33;
st->differential_adc_val[1] = -34;
st->accel_val[0] = 34;
st->accel_val[1] = 37;
st->accel_val[2] = 40;
st->accel_calibbias = -7;
st->accel_calibscale = &dummy_scales[0];
st->steps = 47;
st->activity_running = 98;
st->activity_walking = 4;
return 0;
}
/**
* iio_dummy_probe() - device instance probe
* @name: name of this instance.
*
* Arguments are bus type specific.
* I2C: iio_dummy_probe(struct i2c_client *client,
* const struct i2c_device_id *id)
* SPI: iio_dummy_probe(struct spi_device *spi)
*/
static struct iio_sw_device *iio_dummy_probe(const char *name)
{
int ret;
struct iio_dev *indio_dev;
struct iio_dummy_state *st;
struct iio_sw_device *swd;
struct device *parent;
/*
* With hardware: Set the parent device.
* parent = &spi->dev;
* parent = &client->dev;
*/
swd = kzalloc(sizeof(*swd), GFP_KERNEL);
if (!swd)
return ERR_PTR(-ENOMEM);
/*
* Allocate an IIO device.
*
* This structure contains all generic state
* information about the device instance.
* It also has a region (accessed by iio_priv()
* for chip specific state information.
*/
indio_dev = iio_device_alloc(parent, sizeof(*st));
if (!indio_dev) {
ret = -ENOMEM;
goto error_free_swd;
}
st = iio_priv(indio_dev);
mutex_init(&st->lock);
iio_dummy_init_device(indio_dev);
/*
* Make the iio_dev struct available to remove function.
* Bus equivalents
* i2c_set_clientdata(client, indio_dev);
* spi_set_drvdata(spi, indio_dev);
*/
swd->device = indio_dev;
/*
* Set the device name.
*
* This is typically a part number and obtained from the module
* id table.
* e.g. for i2c and spi:
* indio_dev->name = id->name;
* indio_dev->name = spi_get_device_id(spi)->name;
*/
indio_dev->name = kstrdup(name, GFP_KERNEL);
if (!indio_dev->name) {
ret = -ENOMEM;
goto error_free_device;
}
/* Provide description of available channels */
indio_dev->channels = iio_dummy_channels;
indio_dev->num_channels = ARRAY_SIZE(iio_dummy_channels);
/*
* Provide device type specific interface functions and
* constant data.
*/
indio_dev->info = &iio_dummy_info;
/* Specify that device provides sysfs type interfaces */
indio_dev->modes = INDIO_DIRECT_MODE;
ret = iio_device_register(indio_dev);
if (ret < 0)
goto error_free_name;
iio_swd_group_init_type_name(swd, name, &iio_dummy_type);
return swd;
error_free_name:
kfree(indio_dev->name);
error_free_device:
iio_device_free(indio_dev);
error_free_swd:
kfree(swd);
return ERR_PTR(ret);
}
/**
* iio_dummy_remove() - device instance removal function
* @swd: pointer to software IIO device abstraction
*
* Parameters follow those of iio_dummy_probe for buses.
*/
static int iio_dummy_remove(struct iio_sw_device *swd)
{
/*
* Get a pointer to the device instance iio_dev structure
* from the bus subsystem. E.g.
* struct iio_dev *indio_dev = i2c_get_clientdata(client);
* struct iio_dev *indio_dev = spi_get_drvdata(spi);
*/
struct iio_dev *indio_dev = swd->device;
/* Unregister the device */
iio_device_unregister(indio_dev);
/* Free all structures */
kfree(indio_dev->name);
iio_device_free(indio_dev);
return 0;
}
/*
* module_iio_sw_device_driver() - device driver registration
*
* Varies depending on bus type of the device. As there is no device
* here, call probe directly. For information on device registration
* i2c:
* Documentation/i2c/writing-clients.rst
* spi:
* Documentation/spi/spi-summary.rst
*/
static const struct iio_sw_device_ops iio_dummy_device_ops = {
.probe = iio_dummy_probe,
.remove = iio_dummy_remove,
};
static struct iio_sw_device_type iio_dummy_device = {
.name = "dummy-modified",
.owner = THIS_MODULE,
.ops = &iio_dummy_device_ops,
};
module_iio_sw_device_driver(iio_dummy_device);
MODULE_AUTHOR("Jonathan Cameron <jic23@kernel.org>");
MODULE_DESCRIPTION("IIO dummy driver");
MODULE_LICENSE("GPL v2");

View File

@ -0,0 +1,68 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/**
* Copyright (c) 2011 Jonathan Cameron
*
* Join together the various functionality of iio_modified_dummy driver
*
* Changes by Milan Zamazal <mzamazal@redhat.com> 2023, for testing
* with vhost-device-scmi:
*
* - Dropped conditional parts.
* - Use 3 axes in the accelerometer device.
*/
#ifndef _IIO_MODIFIED_DUMMY_H_
#define _IIO_MODIFIED_DUMMY_H_
#include <linux/kernel.h>
struct iio_dummy_accel_calibscale;
struct iio_dummy_regs;
/**
* struct iio_dummy_state - device instance specific state.
* @dac_val: cache for dac value
* @single_ended_adc_val: cache for single ended adc value
* @differential_adc_val: cache for differential adc value
* @accel_val: cache for acceleration value
* @accel_calibbias: cache for acceleration calibbias
* @accel_calibscale: cache for acceleration calibscale
* @lock: lock to ensure state is consistent
* @event_irq: irq number for event line (faked)
* @event_val: cache for event threshold value
* @event_en: cache of whether event is enabled
*/
struct iio_dummy_state {
int dac_val;
int single_ended_adc_val;
int differential_adc_val[2];
int accel_val[3];
int accel_calibbias;
int activity_running;
int activity_walking;
const struct iio_dummy_accel_calibscale *accel_calibscale;
struct mutex lock;
struct iio_dummy_regs *regs;
int steps_enabled;
int steps;
int height;
};
/**
* enum iio_modified_dummy_scan_elements - scan index enum
* @DUMMY_INDEX_VOLTAGE_0: the single ended voltage channel
* @DUMMY_INDEX_DIFFVOLTAGE_1M2: first differential channel
* @DUMMY_INDEX_DIFFVOLTAGE_3M4: second differential channel
* @DUMMY_INDEX_ACCELX: acceleration channel
*
* Enum provides convenient numbering for the scan index.
*/
enum iio_modified_dummy_scan_elements {
DUMMY_INDEX_VOLTAGE_0,
DUMMY_INDEX_DIFFVOLTAGE_1M2,
DUMMY_INDEX_DIFFVOLTAGE_3M4,
DUMMY_INDEX_ACCEL_X,
DUMMY_INDEX_ACCEL_Y,
DUMMY_INDEX_ACCEL_Z,
};
#endif /* _IIO_MODIFIED_DUMMY_H_ */

View File

@ -0,0 +1,560 @@
// SPDX-FileCopyrightText: Red Hat, Inc.
// SPDX-License-Identifier: Apache-2.0
//! Common functionality for SCMI bindings to host devices.
//!
//! A new kind of devices can be added in [available_devices] using
//! [DeviceSpecification::new] calls.
//!
//! The module also defines common infrastructure to provide sensor devices to
//! SCMI, see [SensorT].
use std::collections::{HashMap, HashSet};
use std::ffi::OsString;
use std::fmt::Write;
use itertools::Itertools;
use log::debug;
use thiserror::Error as ThisError;
use crate::scmi::{
self, DeviceResult, MessageId, MessageValue, MessageValues, ProtocolId, ScmiDevice,
ScmiDeviceError, MAX_SIMPLE_STRING_LENGTH, SENSOR_AXIS_DESCRIPTION_GET, SENSOR_CONFIG_GET,
SENSOR_CONFIG_SET, SENSOR_CONTINUOUS_UPDATE_NOTIFY, SENSOR_DESCRIPTION_GET, SENSOR_PROTOCOL_ID,
SENSOR_READING_GET,
};
use super::{fake, iio};
/// Non-SCMI related device errors.
#[derive(Debug, ThisError)]
pub enum DeviceError {
#[error("{0}")]
GenericError(String),
#[error("Invalid device parameter: {0}")]
InvalidProperty(String),
#[error("I/O error on {0:?}: {1}")]
IOError(OsString, std::io::Error),
#[error("Missing device parameters: {}", .0.join(", "))]
MissingDeviceProperties(Vec<String>),
#[error("Unexpected device parameters: {}", .0.join(", "))]
UnexpectedDeviceProperties(Vec<String>),
}
// [(NAME, [(PROPERTY, VALUE), ...]), ...]
pub type DeviceDescription = Vec<(String, DeviceProperties)>;
type PropertyPairs = Vec<(String, String)>;
#[derive(Debug, Eq, PartialEq, Hash)]
pub struct DeviceProperties(PropertyPairs);
impl DeviceProperties {
pub(crate) fn new(properties: PropertyPairs) -> Self {
Self(properties)
}
pub(crate) fn get(&self, name: &str) -> Option<&str> {
self.0
.iter()
.find(|(n, _)| n == name)
.map(|(_, v)| v.as_str())
}
fn names(&self) -> HashSet<&str> {
self.0.iter().map(|(n, _)| -> &str { n.as_str() }).collect()
}
fn extra<'a>(&'a self, allowed: &[&'a str]) -> HashSet<&str> {
let allowed_set: HashSet<&str> = HashSet::from_iter(allowed.iter().copied());
self.names().difference(&allowed_set).copied().collect()
}
fn missing<'a>(&'a self, required: &[&'a str]) -> HashSet<&str> {
let required_set: HashSet<&str> = HashSet::from_iter(required.iter().copied());
required_set.difference(&self.names()).copied().collect()
}
pub(crate) fn check(&self, required: &[&str], optional: &[&str]) -> Result<(), DeviceError> {
let missing = self.missing(required);
if !missing.is_empty() {
return Err(DeviceError::MissingDeviceProperties(
missing
.iter()
.sorted()
.map(|s| (*s).to_owned())
.collect::<Vec<String>>(),
));
}
let mut all_allowed = Vec::from(required);
all_allowed.extend(optional.iter());
let extra = self.extra(&all_allowed);
if !extra.is_empty() {
return Err(DeviceError::UnexpectedDeviceProperties(
extra
.iter()
.sorted()
.map(|s| (*s).to_owned())
.collect::<Vec<String>>(),
));
}
Ok(())
}
}
pub type MaybeDevice = Result<Box<dyn ScmiDevice>, DeviceError>;
type DeviceConstructor = fn(&DeviceProperties) -> MaybeDevice;
/// Definition of a device kind.
///
/// Use [DeviceSpecification::new] to create it.
pub struct DeviceSpecification {
/// Function to call to create the device.
///
/// The device properties are those provided on the command line by the
/// user.
pub(crate) constructor: DeviceConstructor,
/// Short description of the device.
///
/// Single line, not a complete sentence.
short_help: String,
/// Long description of the device.
///
/// Complete sentences, can span multiple lines.
long_help: String,
/// Description of the device parameters available to the user.
///
/// Each item in the vector corresponds to a single parameter description
/// and should start with the parameter name and a followup colon.
parameters_help: Vec<String>,
}
impl DeviceSpecification {
/// Creates a new device specification.
///
/// See [DeviceSpecification] for the meaning of the arguments.
/// The device specification must be used in [available_devices] to
/// actually add the device.
fn new(
constructor: DeviceConstructor,
short_help: &str,
long_help: &str,
parameters_help: &[&str],
) -> Self {
Self {
constructor,
short_help: short_help.to_owned(),
long_help: long_help.to_owned(),
parameters_help: parameters_help
.iter()
.map(|s| String::from(*s))
.collect::<Vec<String>>(),
}
}
}
/// Mapping of device identifiers (names) to device specifications.
///
/// The string keys correspond to device identifiers specified on the command
/// line.
type NameDeviceMapping = HashMap<&'static str, DeviceSpecification>;
/// Creates device mapping and adds all the supported devices to it.
///
/// If you want to introduce a new kind of host device bindings, insert a
/// device identifier + [DeviceSpecification] to [NameDeviceMapping] here.
pub fn available_devices() -> NameDeviceMapping {
let mut devices: NameDeviceMapping = HashMap::new();
devices.insert(
"fake",
DeviceSpecification::new(
fake::FakeSensor::new_device,
"fake accelerometer",
"A simple 3-axes sensor providing fake pre-defined values.",
&["name: an optional name of the sensor, max. 15 characters"],
),
);
devices.insert(
"iio",
DeviceSpecification::new(
iio::IIOSensor::new_device,
"industrial I/O sensor",
"",
&[
"path: path to the device directory (e.g. /sys/bus/iio/devices/iio:device0)",
"channel: prefix of the device type (e.g. in_accel)",
"name: an optional name of the sensor, max. 15 characters",
],
),
);
devices
}
fn devices_help() -> String {
let mut help = String::new();
writeln!(help, "Available devices:").unwrap();
for (name, specification) in available_devices().iter() {
let short_help = &specification.short_help;
let long_help = &specification.long_help;
let parameters_help = &specification.parameters_help;
writeln!(help, "\n- {name}: {short_help}").unwrap();
for line in long_help.lines() {
writeln!(help, " {line}").unwrap();
}
if !parameters_help.is_empty() {
writeln!(help, " Parameters:").unwrap();
for parameter in parameters_help {
writeln!(help, " - {parameter}").unwrap();
}
}
}
writeln!(help, "\nDevice specification example:").unwrap();
writeln!(
help,
"--device iio,path=/sys/bus/iio/devices/iio:device0,channel=in_accel"
)
.unwrap();
help
}
pub fn print_devices_help() {
let help = devices_help();
println!("{}", help);
}
// Common sensor infrastructure
/// Basic information about the sensor.
///
/// It is typically used as a field in structs implementing sensor devices.
#[derive(Debug)]
pub struct Sensor {
/// The sensor name (possibly truncated) as reported to the guest.
pub name: String,
/// Whether the sensor is enabled.
///
/// Sensors can be enabled and disabled using SCMI. [Sensor]s created
/// using [Sensor::new] are disabled initially.
enabled: bool,
}
impl Sensor {
pub fn new(properties: &DeviceProperties, default_name: &str) -> Self {
let name = properties.get("name").unwrap_or(default_name);
Self {
name: name.to_owned(),
enabled: false,
}
}
}
/// Common base that sensor devices can use to simplify their implementation.
///
/// To add a new kind of sensor bindings, you must implement
/// [crate::scmi::ScmiDevice], define [DeviceSpecification] and add it to
/// [NameDeviceMapping] created in [available_devices]. You can do it fully
/// yourself or use this trait to simplify the implementation.
///
/// The trait is typically used as follows:
///
/// ```rust
/// struct MySensor {
/// sensor: Sensor,
/// // other fields as needed
/// }
///
/// impl SensorT for MySensor {
/// // provide trait functions implementation as needed
/// }
///
/// impl MySensor {
/// pub fn new_device(properties: &DeviceProperties) -> MaybeDevice {
/// check_device_properties(properties, &[], &["name"])?;
/// let sensor = Sensor::new(properties, "mydevice");
/// let my_sensor = MySensor { sensor };
/// let sensor_device = SensorDevice(Box::new(my_sensor));
/// Ok(Box::new(sensor_device))
/// }
/// }
/// ```
///
/// See [crate::devices::fake::FakeSensor] implementation for an example.
pub trait SensorT: Send {
/// Returns the inner [Sensor] instance, immutable.
fn sensor(&self) -> &Sensor;
/// Returns the inner [Sensor] instance, mutable.
fn sensor_mut(&mut self) -> &mut Sensor;
/// Performs any non-default initialization on the sensor.
///
/// If the initialization fails, a corresponding error message is
/// returned.
fn initialize(&mut self) -> Result<(), DeviceError> {
Ok(())
}
/// Returns the id of the SCMI protocol used to communicate with the
/// sensor.
///
/// Usually no need to redefine this.
fn protocol(&self) -> ProtocolId {
SENSOR_PROTOCOL_ID
}
/// Returns an error message about invalid property `name`.
///
/// Usually no need to redefine this.
fn invalid_property(&self, name: &str) -> Result<(), DeviceError> {
Result::Err(DeviceError::InvalidProperty(name.to_owned()))
}
/// Processes a device property specified on the command line.
///
/// The function is called on all the device properties from the command line.
fn process_property(&mut self, name: &str, _value: &str) -> Result<(), DeviceError> {
self.invalid_property(name)
}
/// Returns the number of axes of the given sensor.
///
/// If the sensor provides just a scalar value, 0 must be returned (the
/// default return value here). Otherwise a non-zero value must be
/// returned, even for vector sensors with a single access.
fn number_of_axes(&self) -> u32 {
0
}
/// Formats the unit of the given `axis` for SCMI protocol.
///
/// Usually no need to redefine this.
fn format_unit(&self, axis: u32) -> u32 {
(self.unit_exponent(axis) as u32 & 0x1F) << 11 | u32::from(self.unit())
}
/// Returns SCMI description of the sensor.
///
/// Usually no need to redefine this.
fn description_get(&self) -> DeviceResult {
// Continuous update required by Linux SCMI IIO driver
let low = 1 << 30;
let n_axes = self.number_of_axes();
let high = if n_axes > 0 {
n_axes << 16 | 1 << 8
} else {
self.format_unit(0)
};
let name = self.sensor().name.clone();
let values: MessageValues = vec![
// attributes low
MessageValue::Unsigned(low),
// attributes high
MessageValue::Unsigned(high),
// name, up to 16 bytes with final NULL (non-extended version)
MessageValue::String(name, MAX_SIMPLE_STRING_LENGTH),
];
Ok(values)
}
/// Returns the SCMI unit of the sensor.
fn unit(&self) -> u8 {
scmi::SENSOR_UNIT_UNSPECIFIED
}
/// Returns the decadic exponent to apply to the sensor values.
fn unit_exponent(&self, _axis: u32) -> i8 {
0
}
/// Returns the prefix of axes names.
///
/// Usually no need to redefine this.
fn axis_name_prefix(&self) -> String {
"axis".to_owned()
}
/// Returns the suffix of the given axis.
///
/// Usually no need to redefine this.
fn axis_name_suffix(&self, axis: u32) -> char {
match axis {
0 => 'X',
1 => 'Y',
2 => 'Z',
_ => 'N', // shouldn't be reached currently
}
}
/// Returns the SCMI description of the given axis.
///
/// Usually no need to redefine this.
fn axis_description(&self, axis: u32) -> Vec<MessageValue> {
let mut values = vec![];
values.push(MessageValue::Unsigned(axis)); // axis id
values.push(MessageValue::Unsigned(0)); // attributes low
values.push(MessageValue::Unsigned(self.format_unit(axis))); // attributes high
// Name in the recommended format, 16 bytes:
let prefix = self.axis_name_prefix();
let suffix = self.axis_name_suffix(axis);
values.push(MessageValue::String(
format!("{prefix}_{suffix}"),
MAX_SIMPLE_STRING_LENGTH,
));
values
}
/// Returns the SCMI configuration of the sensor.
///
/// The default implementation here returns just whether the sensor is
/// enabled or not.
fn config_get(&self) -> DeviceResult {
let config = u32::from(self.sensor().enabled);
Ok(vec![MessageValue::Unsigned(config)])
}
/// Processes the SCMI configuration of the sensor.
///
/// The default implementation here permits and implements only enabling
/// and disabling the sensor.
fn config_set(&mut self, config: u32) -> DeviceResult {
if config & 0xFFFFFFFE != 0 {
return Result::Err(ScmiDeviceError::UnsupportedRequest);
}
self.sensor_mut().enabled = config != 0;
debug!("Sensor enabled: {}", self.sensor().enabled);
Ok(vec![])
}
/// Returns SCMI reading of the sensor values.
///
/// It is a sequence of [MessageValue::Unsigned] values, 4 of them for each
/// sensor axis. See the SCMI standard for the exact specification of the
/// result.
fn reading_get(&mut self) -> DeviceResult;
/// Handles the given protocol message with the given parameters.
///
/// Usually no need to redefine this, unless more than the basic
/// functionality is needed, in which case it would be probably better to
/// enhance this trait with additional functions and improved
/// implementation.
fn handle(&mut self, message_id: MessageId, parameters: &[MessageValue]) -> DeviceResult {
match message_id {
SENSOR_DESCRIPTION_GET => self.description_get(),
SENSOR_AXIS_DESCRIPTION_GET => {
let n_sensor_axes = self.number_of_axes();
let axis_desc_index = parameters[0].get_unsigned();
if axis_desc_index >= n_sensor_axes {
return Result::Err(ScmiDeviceError::InvalidParameters);
}
let mut values = vec![MessageValue::Unsigned(n_sensor_axes - axis_desc_index)];
for i in axis_desc_index..n_sensor_axes {
let mut description = self.axis_description(i);
values.append(&mut description);
}
Ok(values)
}
SENSOR_CONFIG_GET => self.config_get(),
SENSOR_CONFIG_SET => {
let config = parameters[0].get_unsigned();
self.config_set(config)
}
SENSOR_CONTINUOUS_UPDATE_NOTIFY => {
// Linux VIRTIO SCMI insists on this.
// We can accept it and ignore it, the sensor will be still working.
Ok(vec![])
}
SENSOR_READING_GET => {
if !self.sensor().enabled {
return Result::Err(ScmiDeviceError::NotEnabled);
}
self.reading_get()
}
_ => Result::Err(ScmiDeviceError::UnsupportedRequest),
}
}
}
// It's possible to impl ScmiDevice for SensorT but it is not very useful
// because it doesn't allow to pass SensorT as ScmiDevice directly.
// Hence this wrapper.
pub struct SensorDevice(pub(crate) Box<dyn SensorT>);
impl ScmiDevice for SensorDevice {
fn initialize(&mut self) -> Result<(), DeviceError> {
self.0.initialize()
}
fn protocol(&self) -> ProtocolId {
self.0.protocol()
}
fn handle(&mut self, message_id: MessageId, parameters: &[MessageValue]) -> DeviceResult {
self.0.handle(message_id, parameters)
}
}
#[cfg(test)]
mod tests {
use std::assert_eq;
use super::*;
#[test]
fn test_help() {
let help = devices_help();
assert!(
help.contains("Available devices:\n"),
"global label missing"
);
assert!(help.contains("fake:"), "sensor name missing");
assert!(
help.contains("fake accelerometer"),
"short description missing"
);
assert!(help.contains("3-axes sensor"), "long description missing");
assert!(help.contains("Parameters:\n"), "parameter label missing");
assert!(help.contains("- name:"), "parameter `name' missing");
}
fn device_properties() -> DeviceProperties {
DeviceProperties::new(vec![
("foo".to_owned(), "val1".to_owned()),
("def".to_owned(), "val2".to_owned()),
("bar".to_owned(), "val3".to_owned()),
])
}
#[test]
fn test_device_properties() {
let properties = device_properties();
assert_eq!(properties.get("bar"), Some("val3"));
assert_eq!(properties.get("baz"), None);
assert_eq!(properties.names(), HashSet::from(["foo", "def", "bar"]));
let expected = ["abc", "def", "ghi"];
let missing = properties.missing(&expected);
assert_eq!(missing, HashSet::from(["abc", "ghi"]));
let extra = properties.extra(&expected);
assert_eq!(extra, HashSet::from(["foo", "bar"]));
}
#[test]
fn test_check_device_properties() {
let properties = device_properties();
match properties.check(&["abc", "def", "ghi"], &["foo", "baz"]) {
Err(DeviceError::MissingDeviceProperties(missing)) => {
assert_eq!(missing, vec!["abc".to_owned(), "ghi".to_owned()])
}
other => panic!("Unexpected result: {:?}", other),
}
match properties.check(&["def"], &["foo", "baz"]) {
Err(DeviceError::UnexpectedDeviceProperties(unexpected)) => {
assert_eq!(unexpected, vec!["bar".to_owned()])
}
other => panic!("Unexpected result: {:?}", other),
}
match properties.check(&["def"], &["foo", "bar"]) {
Ok(()) => (),
other => panic!("Unexpected result: {:?}", other),
}
}
}

View File

@ -0,0 +1,67 @@
// SPDX-FileCopyrightText: Red Hat, Inc.
// SPDX-License-Identifier: Apache-2.0
//! Fake sensor implementation.
//!
//! The fake sensor is completely implemented here rather than bound to a host
//! device. It emulates a dummy accelerometer device that increments an axis
//! reading value on each its retrieval. Useful for initial testing and
//! arranging SCMI virtualization setup without the need to bind real host
//! devices.
use crate::scmi::{self, DeviceResult, MessageValue};
use super::common::{DeviceProperties, MaybeDevice, Sensor, SensorDevice, SensorT};
pub struct FakeSensor {
sensor: Sensor,
value: u8,
}
impl SensorT for FakeSensor {
// TODO: Define a macro for this boilerplate?
fn sensor(&self) -> &Sensor {
&self.sensor
}
fn sensor_mut(&mut self) -> &mut Sensor {
&mut self.sensor
}
fn number_of_axes(&self) -> u32 {
3
}
fn unit(&self) -> u8 {
// The sensor type is "Meters per second squared", since this is the
// only, together with "Radians per second", what Google Linux IIO
// supports (accelerometers and gyroscopes only).
scmi::SENSOR_UNIT_METERS_PER_SECOND_SQUARED
}
fn axis_name_prefix(&self) -> String {
"acc".to_owned()
}
fn reading_get(&mut self) -> DeviceResult {
let value = self.value;
self.value = self.value.overflowing_add(1).0;
let mut result = vec![];
for i in 0..3 {
result.push(MessageValue::Unsigned(u32::from(value) + 100 * i));
result.push(MessageValue::Unsigned(0));
result.push(MessageValue::Unsigned(0));
result.push(MessageValue::Unsigned(0));
}
Ok(result)
}
}
impl FakeSensor {
pub fn new_device(properties: &DeviceProperties) -> MaybeDevice {
properties.check(&[], &["name"])?;
let sensor = Sensor::new(properties, "fake");
let fake_sensor = Self { sensor, value: 0 };
let sensor_device = SensorDevice(Box::new(fake_sensor));
Ok(Box::new(sensor_device))
}
}

View File

@ -0,0 +1,827 @@
// SPDX-FileCopyrightText: Red Hat, Inc.
// SPDX-License-Identifier: Apache-2.0
//! Industrial I/O (IIO) sensors bindings.
//!
//! Basic functionality for exposing `/sys/bus/iio/devices/` stuff as guest
//! SCMI devices. Only some typical cases are supported. If you want more
//! functionality, you must enhance the implementation here.
//!
//! For some entry points, see [IIOSensor] and [Axis].
use std::cmp::{max, min};
use std::ffi::{OsStr, OsString};
use std::fs;
use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use log::{debug, error, warn};
use crate::scmi::{self, DeviceResult, MessageValue, ScmiDeviceError, MAX_SIMPLE_STRING_LENGTH};
use super::common::{DeviceError, DeviceProperties, MaybeDevice, Sensor, SensorDevice, SensorT};
/// Information about units used by the given Linux IIO channel.
struct UnitMapping<'a> {
/// IIO sysfs channel prefix, e.g. "in_accel".
channel: &'a str,
/// One of the SCMI unit constants from [crate::scmi] (enum is not used to
/// avoid type conversions everywhere).
unit: u8,
/// Decadic exponent to be used to convert the given unit to the SCMI unit.
/// For example, the exponent is 0 for no conversion, -3 to convert
/// milliamps here to amps in SCMI, or 3 to convert kilopascals here to
/// pascals in SCMI.
unit_exponent: i8, // max. 5 bits actually
}
/// Specification of IIO channel units.
///
/// Based on
/// <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/sysfs-bus-iio>.
/// Not everything from there is present -- channels here with more complicated
/// unit transformations (beyond using a decadic exponent; e.g. degrees to
/// radians or units not defined in SCMI) are omitted. If an IIO channel
/// doesn't have unit specification here, it can be still used by the unit
/// reported in SCMI will be [crate::scmi::SENSOR_UNIT_UNSPECIFIED].
// TODO: Make some macro(s) for this?
const UNIT_MAPPING: &[UnitMapping] = &[
UnitMapping {
channel: "in_accel",
unit: scmi::SENSOR_UNIT_METERS_PER_SECOND_SQUARED,
unit_exponent: 0,
},
UnitMapping {
channel: "in_angle",
unit: scmi::SENSOR_UNIT_RADIANS,
unit_exponent: 0,
},
UnitMapping {
channel: "in_anglevel",
unit: scmi::SENSOR_UNIT_RADIANS_PER_SECOND,
unit_exponent: 0,
},
UnitMapping {
channel: "in_concentration",
unit: scmi::SENSOR_UNIT_PERCENTAGE,
unit_exponent: 0,
},
UnitMapping {
channel: "in_current",
unit: scmi::SENSOR_UNIT_AMPS,
unit_exponent: -3,
},
UnitMapping {
channel: "in_capacitance",
unit: scmi::SENSOR_UNIT_FARADS,
unit_exponent: -9,
},
UnitMapping {
channel: "in_distance",
unit: scmi::SENSOR_UNIT_METERS,
unit_exponent: 0,
},
UnitMapping {
channel: "in_electricalconductivity",
unit: scmi::SENSOR_UNIT_SIEMENS, // per meter
unit_exponent: 0,
},
UnitMapping {
channel: "in_energy",
unit: scmi::SENSOR_UNIT_JOULS,
unit_exponent: 0,
},
UnitMapping {
channel: "in_gravity",
unit: scmi::SENSOR_UNIT_METERS_PER_SECOND_SQUARED,
unit_exponent: 0,
},
UnitMapping {
channel: "in_humidityrelative",
unit: scmi::SENSOR_UNIT_PERCENTAGE,
unit_exponent: -3,
},
UnitMapping {
channel: "in_illuminance",
unit: scmi::SENSOR_UNIT_LUX,
unit_exponent: 0,
},
UnitMapping {
channel: "in_magn",
unit: scmi::SENSOR_UNIT_GAUSS,
unit_exponent: 0,
},
UnitMapping {
channel: "in_ph",
unit: scmi::SENSOR_UNIT_UNSPECIFIED, // SCMI doesn't define pH
unit_exponent: -3,
},
UnitMapping {
channel: "in_positionrelative",
unit: scmi::SENSOR_UNIT_PERCENTAGE,
unit_exponent: -3,
},
UnitMapping {
channel: "in_power",
unit: scmi::SENSOR_UNIT_WATTS,
unit_exponent: -3,
},
UnitMapping {
channel: "in_pressure",
unit: scmi::SENSOR_UNIT_PASCALS,
unit_exponent: 3,
},
UnitMapping {
channel: "in_proximity",
unit: scmi::SENSOR_UNIT_METERS,
unit_exponent: 0,
},
UnitMapping {
channel: "in_resistance",
unit: scmi::SENSOR_UNIT_OHMS,
unit_exponent: 0,
},
UnitMapping {
channel: "in_temp",
unit: scmi::SENSOR_UNIT_DEGREES_C,
unit_exponent: -3,
},
UnitMapping {
channel: "in_velocity_sqrt(x^2+y^2+z^2)",
unit: scmi::SENSOR_UNIT_METERS_PER_SECOND,
unit_exponent: -3,
},
UnitMapping {
channel: "in_voltage",
unit: scmi::SENSOR_UNIT_VOLTS,
unit_exponent: -3,
},
];
/// Representation of an IIO channel axis.
///
/// Used also for scalar values.
#[derive(PartialEq, Debug)]
struct Axis {
/// Full sysfs path to the axis value file stripped of "_raw".
path: OsString, // without "_raw" suffix
/// Axis unit exponent, see [UnitMapping::unit_exponent] and [UNIT_MAPPING].
unit_exponent: i8,
/// Additional exponent to apply to the axis values. It is computed from
/// the axis value scaling (see [IIOSensor::custom_exponent] to provide a
/// sufficiently accurate SCMI value that is represented by an integer (not
/// a float) + decadic exponent.
custom_exponent: i8,
}
/// Particular IIO sensor specification.
///
/// An IIO sensor is specified by an IIO sysfs device directory and a channel
/// prefix within the directory (i.e. more devices can be defined for a single
/// IIO device directory). All other information about the sensor is retrieved
/// from the device directory and from [UNIT_MAPPING].
#[derive(Debug)]
pub struct IIOSensor {
/// Common sensor instance.
sensor: Sensor,
/// Full sysfs path to the device directory.
///
/// Provided by the user.
path: OsString,
/// Prefix of the device type in the device directory, e.g. "in_accel".
///
/// Provided by the user.
channel: OsString,
/// Whether the sensor is scalar or has one or more axes.
///
/// Determined automatically by looking for presence of `*_[xyz]_raw` files
/// with the given channel prefix.
scalar: bool,
/// Axes descriptions, see [Axis] for more details.
axes: Vec<Axis>,
}
impl SensorT for IIOSensor {
// TODO: Define a macro for this boilerplate?
fn sensor(&self) -> &Sensor {
&self.sensor
}
fn sensor_mut(&mut self) -> &mut Sensor {
&mut self.sensor
}
fn initialize(&mut self) -> Result<(), DeviceError> {
let mut axes: Vec<Axis> = vec![];
match fs::read_dir(&self.path) {
Ok(iter) => {
for dir_entry in iter {
match dir_entry {
Ok(entry) => self.register_iio_file(entry, &mut axes),
Err(error) => return Err(DeviceError::IOError(self.path.clone(), error)),
}
}
}
Err(error) => return Err(DeviceError::IOError(self.path.clone(), error)),
}
if axes.is_empty() {
return Err(DeviceError::GenericError(format!(
"No {:?} channel found in {:?}",
&self.channel, &self.path
)));
}
axes.sort_by(|a1, a2| a1.path.cmp(&a2.path));
self.axes = axes;
Ok(())
}
fn unit(&self) -> u8 {
UNIT_MAPPING
.iter()
.find(|mapping| mapping.channel == self.channel)
.map_or(scmi::SENSOR_UNIT_UNSPECIFIED, |mapping| mapping.unit)
}
fn unit_exponent(&self, axis_index: u32) -> i8 {
let axis: &Axis = self.axes.get(axis_index as usize).unwrap();
axis.unit_exponent + axis.custom_exponent
}
fn number_of_axes(&self) -> u32 {
if self.scalar {
0
} else {
self.axes.len() as u32
}
}
fn axis_name_prefix(&self) -> String {
let channel = self.channel.to_str().unwrap();
let in_prefix = "in_";
let out_prefix = "out_";
let name: &str = if channel.starts_with(in_prefix) {
channel.strip_prefix(in_prefix).unwrap()
} else if channel.starts_with(out_prefix) {
channel.strip_prefix(out_prefix).unwrap()
} else {
channel
};
let len = min(name.len(), MAX_SIMPLE_STRING_LENGTH - 1);
String::from(&name[..len])
}
fn reading_get(&mut self) -> DeviceResult {
let mut result = vec![];
for axis in &self.axes {
let value = self.read_axis(axis)?;
result.push(MessageValue::Unsigned((value & 0xFFFFFFFF) as u32));
result.push(MessageValue::Unsigned((value >> 32) as u32));
result.push(MessageValue::Unsigned(0));
result.push(MessageValue::Unsigned(0));
}
Ok(result)
}
}
fn read_number_from_file<F: FromStr>(path: &Path) -> Result<Option<F>, ScmiDeviceError> {
match fs::read_to_string(path) {
Ok(string) => match string.trim().parse() {
Ok(value) => Ok(Some(value)),
_ => {
error!(
"Failed to parse IIO numeric value from {}: {string}",
path.display()
);
Err(ScmiDeviceError::GenericError)
}
},
Err(error) => match error.kind() {
ErrorKind::NotFound => {
let raw = path.ends_with("_raw");
let format = || {
format!(
"IIO {} file {} not found",
if raw { "value" } else { "data" },
path.display()
)
};
if raw {
error!("{}", format());
Err(ScmiDeviceError::GenericError)
} else {
debug!("{}", format());
Ok(None)
}
}
other_error => {
error!(
"Failed to read IIO data from {}: {}",
path.display(),
other_error
);
Err(ScmiDeviceError::GenericError)
}
},
}
}
impl IIOSensor {
#[allow(clippy::new_ret_no_self)]
pub fn new(properties: &DeviceProperties) -> Result<Self, DeviceError> {
properties.check(&["path", "channel"], &["name"])?;
let sensor = Sensor::new(properties, "iio");
Ok(Self {
sensor,
path: OsString::from(properties.get("path").unwrap()),
channel: OsString::from(properties.get("channel").unwrap()),
scalar: true,
axes: vec![],
})
}
pub fn new_device(properties: &DeviceProperties) -> MaybeDevice {
let iio_sensor = Self::new(properties)?;
let sensor_device = SensorDevice(Box::new(iio_sensor));
Ok(Box::new(sensor_device))
}
fn set_sensor_name_from_file(&mut self, path: &PathBuf) {
match fs::read_to_string(path) {
Ok(name) => self.sensor_mut().name = name,
Err(error) => warn!(
"Error reading IIO device name from {}: {}",
path.display(),
error
),
}
}
fn custom_exponent(&self, path: &OsStr, unit_exponent: i8) -> i8 {
let mut custom_exponent: i8 = 0;
if let Ok(Some(scale)) = self.read_axis_scale(path) {
// Crash completely OK if *this* doesn't fit:
custom_exponent = scale.log10() as i8;
if scale < 1.0 {
// The logarithm is truncated towards zero, we need floor
custom_exponent -= 1;
}
// The SCMI exponent (unit_exponent + custom_exponent) can have max. 5 bits:
custom_exponent = min(15 - unit_exponent, custom_exponent);
custom_exponent = max(-16 - unit_exponent, custom_exponent);
debug!(
"Setting custom scaling coefficient for {:?}: {}",
&path, custom_exponent
);
}
custom_exponent
}
fn add_axis(&mut self, axes: &mut Vec<Axis>, path: &OsStr) {
let unit_exponent = UNIT_MAPPING
.iter()
.find(|mapping| mapping.channel == self.channel)
.map_or(0, |mapping| mapping.unit_exponent);
// To get meaningful integer values, we must adjust exponent to
// the provided scale if any.
let custom_exponent = self.custom_exponent(path, unit_exponent);
axes.push(Axis {
path: OsString::from(path),
unit_exponent,
custom_exponent,
});
}
fn register_iio_file(&mut self, file: fs::DirEntry, axes: &mut Vec<Axis>) {
let channel = self.channel.to_str().unwrap();
let os_file_name = file.file_name();
let file_name = os_file_name.to_str().unwrap_or_default();
let raw_suffix = "_raw";
if file_name == "name" {
self.set_sensor_name_from_file(&file.path());
} else if file_name.starts_with(channel) && file_name.ends_with(raw_suffix) {
let infix = &file_name[channel.len()..file_name.len() - raw_suffix.len()];
let infix_len = infix.len();
if infix_len == 0 || (infix_len == 2 && infix.starts_with('_')) {
let raw_axis_path = Path::new(&self.path)
.join(Path::new(&file_name))
.to_str()
.unwrap()
.to_string();
let axis_path = raw_axis_path.strip_suffix(raw_suffix).unwrap();
self.add_axis(axes, &OsString::from(axis_path));
if infix_len > 0 {
self.scalar = false;
}
}
}
}
fn read_axis_file<T: FromStr>(
&self,
path: &OsStr,
name: &str,
) -> Result<Option<T>, ScmiDeviceError> {
for value_path in [
Path::new(&(String::from(path.to_str().unwrap()) + "_" + name)),
&Path::new(&path).parent().unwrap().join(name),
]
.iter()
{
let value: Option<T> = read_number_from_file(value_path)?;
if value.is_some() {
return Ok(value);
}
}
Ok(None)
}
fn read_axis_offset(&self, path: &OsStr) -> Result<Option<i64>, ScmiDeviceError> {
self.read_axis_file(path, "offset")
}
fn read_axis_scale(&self, path: &OsStr) -> Result<Option<f64>, ScmiDeviceError> {
self.read_axis_file(path, "scale")
}
fn read_axis(&self, axis: &Axis) -> Result<i64, ScmiDeviceError> {
let path_result = axis.path.clone().into_string();
let mut value: i64 =
read_number_from_file(Path::new(&(path_result.unwrap() + "_raw")))?.unwrap();
let offset: Option<i64> = self.read_axis_offset(&axis.path)?;
if let Some(offset_value) = offset {
match value.checked_add(offset_value) {
Some(new_value) => value = new_value,
None => {
error!(
"IIO offset overflow in {:?}: {} + {}",
&axis.path,
value,
offset.unwrap()
);
return Err(ScmiDeviceError::GenericError);
}
}
}
let scale: Option<f64> = self.read_axis_scale(&axis.path)?;
if let Some(scale_value) = scale {
let exponent_scale = 10.0_f64.powi(i32::from(axis.custom_exponent));
value = (value as f64 * (scale_value / exponent_scale)).round() as i64;
}
Ok(value)
}
}
#[cfg(test)]
mod tests {
use crate::scmi::ScmiDevice;
use super::*;
use std::{
assert_eq, fs,
path::{Path, PathBuf},
};
fn make_directory(prefix: &str) -> PathBuf {
for i in 1..100 {
let path = Path::new(".").join(format!("{prefix}{i}"));
if fs::create_dir(&path).is_ok() {
return path;
}
}
panic!("Couldn't create test directory");
}
struct IIODirectory {
path: PathBuf,
}
impl IIODirectory {
fn new(files: &[(&str, &str)]) -> IIODirectory {
let path = make_directory("_test");
let directory = IIODirectory { path };
for (file, content) in files.iter() {
fs::write(&directory.path.join(file), content).unwrap();
}
directory
}
}
impl Drop for IIODirectory {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.path);
}
}
fn directory_path(directory: &IIODirectory) -> String {
directory
.path
.clone()
.into_os_string()
.into_string()
.unwrap()
}
fn device_properties(path: String, channel: String, name: Option<String>) -> DeviceProperties {
let mut pairs = vec![("path".to_owned(), path), ("channel".to_owned(), channel)];
if let Some(name) = name {
pairs.push(("name".to_owned(), name));
}
DeviceProperties::new(pairs)
}
fn make_iio_sensor_from_path(path: String, channel: String, name: Option<String>) -> IIOSensor {
let properties = device_properties(path, channel, name);
IIOSensor::new(&properties).unwrap()
}
fn make_iio_sensor(
directory: &IIODirectory,
channel: String,
name: Option<String>,
) -> IIOSensor {
let path = directory_path(directory);
make_iio_sensor_from_path(path, channel, name)
}
fn make_scmi_sensor_from_path(
path: String,
channel: String,
name: Option<String>,
) -> MaybeDevice {
let properties = device_properties(path, channel, name);
IIOSensor::new_device(&properties)
}
fn make_scmi_sensor(
directory: &IIODirectory,
channel: String,
name: Option<String>,
) -> Box<dyn ScmiDevice> {
let path = directory_path(directory);
make_scmi_sensor_from_path(path, channel, name).unwrap()
}
#[test]
fn test_missing_property() {
let properties = DeviceProperties::new(vec![("path".to_owned(), ".".to_owned())]);
let result = IIOSensor::new(&properties);
match result {
Ok(_) => panic!("Should fail on a missing property"),
Err(DeviceError::MissingDeviceProperties(missing)) => {
assert_eq!(missing, vec!["channel".to_owned()])
}
other => panic!("Unexpected result: {:?}", other),
}
}
#[test]
fn test_extra_property() {
let properties = DeviceProperties::new(vec![
("path".to_owned(), ".".to_owned()),
("name".to_owned(), "test".to_owned()),
("channel".to_owned(), "in_accel".to_owned()),
("foo".to_owned(), "something".to_owned()),
("bar".to_owned(), "baz".to_owned()),
]);
let result = IIOSensor::new(&properties);
match result {
Ok(_) => panic!("Should fail on an extra property"),
Err(DeviceError::UnexpectedDeviceProperties(extra)) => {
assert_eq!(extra, ["bar".to_owned(), "foo".to_owned()])
}
other => panic!("Unexpected result: {:?}", other),
}
}
#[test]
fn test_iio_init() {
let directory = IIODirectory::new(&[("foo", "bar"), ("in_accel_raw", "123")]);
let mut sensor =
make_scmi_sensor(&directory, "in_accel".to_owned(), Some("accel".to_owned()));
sensor.initialize().unwrap();
}
#[test]
fn test_iio_init_no_directory() {
let mut sensor =
make_scmi_sensor_from_path("non-existent".to_owned(), "".to_owned(), None).unwrap();
match sensor.initialize() {
Ok(_) => panic!("Should fail on an inaccessible path"),
Err(DeviceError::IOError(path, std::io::Error { .. })) => {
assert_eq!(path, "non-existent")
}
other => panic!("Unexpected result: {:?}", other),
}
}
#[test]
fn test_iio_init_no_channel() {
let directory = IIODirectory::new(&[("foo", "bar")]);
let mut sensor = make_scmi_sensor(&directory, "in_accel".to_owned(), None);
match sensor.initialize() {
Ok(_) => panic!("Should fail on an inaccessible channel"),
Err(DeviceError::GenericError(message)) => {
assert!(
message.starts_with("No \"in_accel\" channel found in \"./_test"),
"Unexpected error: {}",
message
)
}
other => panic!("Unexpected result: {:?}", other),
}
}
#[test]
fn test_sensor_name_from_fs() {
let directory = IIODirectory::new(&[("in_accel_raw", "123"), ("name", "foo")]);
let mut sensor =
make_iio_sensor(&directory, "in_accel".to_owned(), Some("accel".to_owned()));
sensor.initialize().unwrap();
assert_eq!(sensor.sensor.name, "foo");
}
#[test]
fn test_sensor_name_from_params() {
let directory = IIODirectory::new(&[("in_accel_raw", "123")]);
let mut sensor = make_iio_sensor(&directory, "in_accel".to_owned(), Some("foo".to_owned()));
sensor.initialize().unwrap();
assert_eq!(sensor.sensor.name, "foo");
}
#[test]
fn test_default_sensor_name() {
let directory = IIODirectory::new(&[("in_accel_raw", "123")]);
let mut sensor = make_iio_sensor(&directory, "in_accel".to_owned(), None);
sensor.initialize().unwrap();
assert_eq!(sensor.sensor.name, "iio");
}
#[test]
fn test_units() {
let directory = IIODirectory::new(&[
("in_foo_raw", "123"),
("in_accel_raw", "123"),
("in_voltage_raw", "123"),
]);
for (name, unit) in [
("foo", scmi::SENSOR_UNIT_UNSPECIFIED),
("accel", scmi::SENSOR_UNIT_METERS_PER_SECOND_SQUARED),
("voltage", scmi::SENSOR_UNIT_VOLTS),
]
.iter()
{
let sensor =
make_iio_sensor(&directory, "in_".to_owned() + name, Some(name.to_string()));
assert_eq!(sensor.unit(), *unit);
}
}
#[test]
fn test_unit_exponent() {
for (channel, scale, exponent) in [
("in_accel", 1.23, 0),
("in_accel", 0.000123, -4),
("in_accel", 123.0, 2),
("in_voltage", 123.0, -1),
]
.iter()
{
let raw_file = format!("{channel}_raw");
let scale_file = format!("{channel}_scale");
let directory =
IIODirectory::new(&[(&raw_file, "123"), (&scale_file, &scale.to_string())]);
let mut sensor = make_iio_sensor(&directory, channel.to_string(), None);
sensor.initialize().unwrap();
assert_eq!(sensor.unit_exponent(0), *exponent);
}
}
#[test]
fn test_unit_exponent_multiple_axes() {
let directory = IIODirectory::new(&[
("in_accel_x_raw", "123"),
("in_accel_x_scale", "0.123"),
("in_accel_y_raw", "123"),
("in_accel_y_scale", "12.3"),
]);
let mut sensor = make_iio_sensor(&directory, "in_accel".to_owned(), None);
sensor.initialize().unwrap();
assert_eq!(sensor.unit_exponent(0), -1);
assert_eq!(sensor.unit_exponent(1), 1);
}
#[test]
fn test_unit_exponent_single_scale() {
let directory = IIODirectory::new(&[("in_accel_raw", "123"), ("scale", "0.123")]);
let mut sensor = make_iio_sensor(&directory, "in_accel".to_owned(), None);
sensor.initialize().unwrap();
assert_eq!(sensor.unit_exponent(0), -1);
}
#[test]
fn test_number_of_axes_scalar() {
let directory = IIODirectory::new(&[("in_accel_raw", "123"), ("in_accel_scale", "123")]);
let mut sensor = make_iio_sensor(&directory, "in_accel".to_owned(), None);
sensor.initialize().unwrap();
assert_eq!(sensor.number_of_axes(), 0);
}
#[test]
fn test_number_of_axes_1() {
let directory = IIODirectory::new(&[("in_accel_x_raw", "123"), ("in_accel_scale", "123")]);
let mut sensor = make_iio_sensor(&directory, "in_accel".to_owned(), None);
sensor.initialize().unwrap();
assert_eq!(sensor.number_of_axes(), 1);
}
#[test]
fn test_number_of_axes_3() {
let directory = IIODirectory::new(&[
("in_accel_x_raw", "123"),
("in_accel_y_raw", "123"),
("in_accel_z_raw", "123"),
("in_accel_x_scale", "123"),
]);
let mut sensor = make_iio_sensor(&directory, "in_accel".to_owned(), None);
sensor.initialize().unwrap();
assert_eq!(sensor.number_of_axes(), 3);
}
#[test]
fn test_axis_name_prefix() {
for (channel, prefix) in [
("in_accel", "accel"),
("out_voltage", "voltage"),
("foo", "foo"),
("name-longer-than-fifteen-characters", "name-longer-tha"),
]
.iter()
{
let sensor = make_iio_sensor_from_path("".to_owned(), channel.to_string(), None);
assert_eq!(&sensor.axis_name_prefix(), prefix);
}
}
#[test]
fn test_iio_reading_scalar() {
let directory = IIODirectory::new(&[
("in_voltage_raw", "9876543210"),
("in_voltage_offset", "123"),
("in_voltage_scale", "456"),
]);
let mut sensor = make_iio_sensor(&directory, "in_voltage".to_owned(), None);
sensor.initialize().unwrap();
let result = sensor.reading_get().unwrap();
// (9876543210 + 123) * 456 = 4503703759848
// custom exponent = 2
// applied and rounded: 45037037598 = 0xA7C6AA81E
assert_eq!(result.len(), 4);
assert_eq!(result.get(0).unwrap(), &MessageValue::Unsigned(0x7C6AA81E));
assert_eq!(result.get(1).unwrap(), &MessageValue::Unsigned(0xA));
assert_eq!(result.get(2).unwrap(), &MessageValue::Unsigned(0));
assert_eq!(result.get(3).unwrap(), &MessageValue::Unsigned(0));
}
#[test]
fn test_iio_reading_scalar_whitespace() {
let directory = IIODirectory::new(&[
("in_accel_raw", "10\n"),
("in_accel_offset", "20\n"),
("in_accel_scale", "0.3\n"),
]);
let mut sensor = make_iio_sensor(&directory, "in_accel".to_owned(), None);
sensor.initialize().unwrap();
let result = sensor.reading_get().unwrap();
assert_eq!(result.len(), 4);
assert_eq!(result.get(0).unwrap(), &MessageValue::Unsigned(0x5A));
assert_eq!(result.get(1).unwrap(), &MessageValue::Unsigned(0));
assert_eq!(result.get(2).unwrap(), &MessageValue::Unsigned(0));
assert_eq!(result.get(3).unwrap(), &MessageValue::Unsigned(0));
}
#[test]
fn test_iio_reading_axes() {
let directory = IIODirectory::new(&[
("in_accel_x_raw", "10"),
("in_accel_x_offset", "1"),
("in_accel_y_raw", "20"),
("in_accel_y_offset", "10"),
("in_accel_z_raw", "30"),
("in_accel_z_offset", "20"),
("in_accel_z_scale", "0.3"),
("scale", "0.02"),
]);
let mut sensor = make_iio_sensor(&directory, "in_accel".to_owned(), None);
sensor.initialize().unwrap();
let result = sensor.reading_get().unwrap();
assert_eq!(result.len(), 12);
assert_eq!(result.get(0).unwrap(), &MessageValue::Unsigned(22));
assert_eq!(result.get(4).unwrap(), &MessageValue::Unsigned(60));
assert_eq!(result.get(8).unwrap(), &MessageValue::Unsigned(150));
for i in 0..12 {
if i % 4 != 0 {
assert_eq!(result.get(i).unwrap(), &MessageValue::Unsigned(0));
}
}
}
}

View File

@ -0,0 +1,13 @@
// SPDX-FileCopyrightText: Red Hat, Inc.
// SPDX-License-Identifier: Apache-2.0
//! Implementation of SCMI bindings to host devices.
//!
//! The general infrastructure is implemented in [crate::devices::common] module.
//! Access to particular kinds of devices is implemented in the other modules:
//! - [crate::devices::fake] provides a fake sensor.
//! - [crate::devices::iio] implements access to industrial I/O (IIO) devices.
pub mod common;
pub mod fake;
pub mod iio;

View File

@ -0,0 +1,228 @@
// SPDX-FileCopyrightText: Red Hat, Inc.
// SPDX-License-Identifier: Apache-2.0
// Based on implementation of other devices here, Copyright by Linaro Ltd.
//! vhost-user daemon implementation for
//! [System Control and Management Interface](https://developer.arm.com/Architectures/System%20Control%20and%20Management%20Interface)
//! (SCMI).
//!
//! Currently, the mandatory parts of the following SCMI protocols are implemented:
//!
//! - base
//! - sensor management
//!
//! As for sensor management, support for industrial I/O (IIO) Linux devices
//! and a fake sensor device is implemented.
//!
//! The daemon listens on a socket that is specified using `--socket-path`
//! command line option. Usually at least one exposed device is specified,
//! which is done using `--device` command line option. It can be used more
//! than once, for different devices. `--help-devices` lists the available
//! devices and their options.
//!
//! The daemon normally logs info and higher messages to the standard error
//! output. To log more messages, you can set `RUST_LOG` environment variable,
//! e.g. to `debug`.
//!
//! Here is an example command line invocation of the daemon:
//!
//! ```sh
//! RUST_LOG=debug vhost-device-scmi \
//! --socket ~/tmp/scmi.sock \
//! --device iio,path=/sys/bus/iio/devices/iio:device0,channel=in_accel
//! ```
mod devices;
mod scmi;
mod vhu_scmi;
use devices::common::{print_devices_help, DeviceDescription, DeviceProperties};
use std::{
process::exit,
sync::{Arc, RwLock},
};
use clap::{CommandFactory, Parser};
use itertools::Itertools;
use log::{debug, error, info, warn};
use vhost::vhost_user;
use vhost::vhost_user::Listener;
use vhost_user_backend::VhostUserDaemon;
use vhu_scmi::VuScmiBackend;
use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap};
type Result<T> = std::result::Result<T, String>;
#[derive(Parser)]
struct ScmiArgs {
// Location of vhost-user Unix domain socket.
// Required, unless one of the --help options is used.
#[clap(short, long, help = "vhost-user socket to use (required)")]
socket_path: Option<String>,
// Specification of SCMI devices to create.
#[clap(short, long, help = "Devices to expose")]
#[arg(num_args(1..))]
device: Vec<String>,
#[clap(long, help = "Print help on available devices")]
help_devices: bool,
}
pub struct VuScmiConfig {
socket_path: String,
devices: DeviceDescription,
}
impl TryFrom<ScmiArgs> for VuScmiConfig {
type Error = String;
fn try_from(cmd_args: ScmiArgs) -> Result<Self> {
if cmd_args.socket_path.is_none() {
return Result::Err("Required argument socket-path was not provided".to_string());
}
let socket_path = cmd_args.socket_path.unwrap().trim().to_string();
let mut devices: DeviceDescription = vec![];
let device_iterator = cmd_args.device.iter();
for d in device_iterator {
let mut split = d.split(',');
let name = split.next().unwrap().to_owned();
let mut properties = vec![];
for s in split {
if let Some((key, value)) = s.split('=').collect_tuple() {
properties.push((key.to_owned(), value.to_owned()));
} else {
return Result::Err(format!("Invalid device {name} property format: {s}"));
}
}
devices.push((name, DeviceProperties::new(properties)));
}
Ok(Self {
socket_path,
devices,
})
}
}
fn start_backend(config: VuScmiConfig) -> Result<()> {
loop {
debug!("Starting backend");
let backend_instance = VuScmiBackend::new(&config);
if let Err(error) = backend_instance {
return Err(error.to_string());
}
let backend = Arc::new(RwLock::new(backend_instance.unwrap()));
let listener = Listener::new(config.socket_path.clone(), true).unwrap();
let mut daemon = VhostUserDaemon::new(
"vhost-device-scmi".to_owned(),
backend.clone(),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.unwrap();
daemon.start(listener).unwrap();
match daemon.wait() {
Ok(()) => {
info!("Stopping cleanly");
}
Err(vhost_user_backend::Error::HandleRequest(vhost_user::Error::PartialMessage)) => {
info!(
"vhost-user connection closed with partial message.
If the VM is shutting down, this is expected behavior;
otherwise, it might be a bug."
);
}
Err(e) => {
warn!("Error running daemon: {:?}", e);
}
}
// No matter the result, we need to shut down the worker thread.
backend.read().unwrap().exit_event.write(1).unwrap();
debug!("Finishing backend");
}
}
fn process_args(args: ScmiArgs) -> Option<ScmiArgs> {
if args.help_devices {
print_devices_help();
None
} else {
Some(args)
}
}
fn print_help(message: &String) {
println!("{message}\n");
let mut command = ScmiArgs::command();
command.print_help().unwrap();
}
fn main() {
env_logger::init();
if let Some(args) = process_args(ScmiArgs::parse()) {
match VuScmiConfig::try_from(args) {
Ok(config) => {
if let Err(error) = start_backend(config) {
error!("{error}");
println!("{error}");
exit(1);
}
}
Err(message) => print_help(&message),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_command_line() {
let path = "/foo/scmi.sock".to_owned();
let params_string = format!(
"binary \
--device dummy \
-s {path} \
--device fake,name=foo,prop=value \
-d fake,name=bar"
);
let params: Vec<&str> = params_string.split_whitespace().collect();
let args: ScmiArgs = process_args(Parser::parse_from(params)).unwrap();
let config = VuScmiConfig::try_from(args).unwrap();
assert_eq!(config.socket_path, path);
let devices = vec![
("dummy".to_owned(), DeviceProperties::new(vec![])),
(
"fake".to_owned(),
DeviceProperties::new(vec![
("name".to_owned(), "foo".to_owned()),
("prop".to_owned(), "value".to_owned()),
]),
),
(
"fake".to_owned(),
DeviceProperties::new(vec![("name".to_owned(), "bar".to_owned())]),
),
];
assert_eq!(config.devices, devices);
}
#[test]
fn test_device_help_processing() {
let params_string = "binary --help-devices".to_string();
let params: Vec<&str> = params_string.split_whitespace().collect();
let args: ScmiArgs = Parser::parse_from(params);
let processed = process_args(args);
assert!(processed.is_none());
}
#[test]
fn test_help() {
// No way known to me to capture print_help() output from clap.
print_help(&String::from("test"));
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,963 @@
// SPDX-FileCopyrightText: Red Hat, Inc.
// SPDX-License-Identifier: Apache-2.0
// Based on https://github.com/rust-vmm/vhost-device, Copyright by Linaro Ltd.
//! General part of the vhost-user SCMI backend. Nothing very different from
//! the other rust-vmm backends.
use log::{debug, error, warn};
use std::io;
use std::io::Result as IoResult;
use std::mem::size_of;
use thiserror::Error as ThisError;
use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
use vhost_user_backend::{VhostUserBackendMut, VringRwLock, VringT};
use virtio_bindings::bindings::virtio_config::{VIRTIO_F_NOTIFY_ON_EMPTY, VIRTIO_F_VERSION_1};
use virtio_bindings::bindings::virtio_ring::{
VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC,
};
use virtio_queue::{DescriptorChain, QueueOwnedT};
use vm_memory::{
Bytes, GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap,
};
use vmm_sys_util::epoll::EventSet;
use vmm_sys_util::eventfd::{EventFd, EFD_NONBLOCK};
use crate::devices::common::{available_devices, DeviceError};
use crate::scmi::{MessageHeader, ScmiHandler, ScmiRequest};
use crate::VuScmiConfig;
// QUEUE_SIZE must be apparently at least 1024 for MMIO.
// There is probably a maximum size per descriptor defined in the kernel.
const QUEUE_SIZE: usize = 1024;
const NUM_QUEUES: usize = 2;
const COMMAND_QUEUE: u16 = 0;
const EVENT_QUEUE: u16 = 1;
const VIRTIO_SCMI_F_P2A_CHANNELS: u16 = 0;
#[derive(Debug, ThisError)]
pub enum VuScmiError {
#[error("Descriptor not found")]
DescriptorNotFound,
#[error("Descriptor read failed")]
DescriptorReadFailed,
#[error("Descriptor write failed")]
DescriptorWriteFailed,
#[error("Error when configuring device {0}: {1}")]
DeviceConfigurationError(String, DeviceError),
#[error("Failed to create new EventFd")]
EventFdFailed,
#[error("Failed to handle event, didn't match EPOLLIN")]
HandleEventNotEpollIn,
#[error("Failed to handle unknown event")]
HandleEventUnknownEvent,
#[error("Isufficient descriptor size, required: {0}, found: {1}")]
InsufficientDescriptorSize(usize, usize),
#[error("Failed to send notification")]
SendNotificationFailed,
#[error("Invalid descriptor count {0}")]
UnexpectedDescriptorCount(usize),
#[error("Invalid descriptor size, expected: {0}, found: {1}")]
UnexpectedDescriptorSize(usize, usize),
#[error("Invalid descriptor size, expected at least: {0}, found: {1}")]
UnexpectedMinimumDescriptorSize(usize, usize),
#[error("Received unexpected readable descriptor at index {0}")]
UnexpectedReadableDescriptor(usize),
#[error("Received unexpected write only descriptor at index {0}")]
UnexpectedWriteOnlyDescriptor(usize),
#[error("Unknown device requested: {0}")]
UnknownDeviceRequested(String),
}
impl From<VuScmiError> for io::Error {
fn from(e: VuScmiError) -> Self {
Self::new(io::ErrorKind::Other, e)
}
}
type Result<T> = std::result::Result<T, VuScmiError>;
type ScmiDescriptorChain = DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap<()>>>;
pub struct VuScmiBackend {
event_idx: bool,
pub exit_event: EventFd,
mem: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
/// Event vring and descriptors serve for asynchronous responses and notifications.
/// They are obtained from the driver and we store them here for later use.
/// (We currently don't implement asynchronous responses or notifications but we support
/// the event queue because the Linux VIRTIO SCMI driver seems to be unhappy if it is not
/// present. And it doesn't harm to be ready for possible event queue use in future.)
event_vring: Option<VringRwLock>,
event_descriptors: Vec<DescriptorChain<GuestMemoryLoadGuard<GuestMemoryMmap>>>,
/// The abstraction of request handling, with all the needed information stored inside.
scmi_handler: ScmiHandler,
}
impl VuScmiBackend {
pub fn new(config: &VuScmiConfig) -> Result<Self> {
let mut handler = ScmiHandler::new();
let device_mapping = available_devices();
for (name, properties) in config.devices.iter() {
match device_mapping.get(name.as_str()) {
Some(specification) => match (specification.constructor)(properties) {
Ok(mut device) => {
if let Err(error) = device.initialize() {
return Result::Err(VuScmiError::DeviceConfigurationError(
name.clone(),
error,
));
}
handler.register_device(device);
}
Err(error) => {
return Result::Err(VuScmiError::DeviceConfigurationError(
name.clone(),
error,
));
}
},
None => return Result::Err(VuScmiError::UnknownDeviceRequested(name.clone())),
};
}
Ok(Self {
event_idx: false,
exit_event: EventFd::new(EFD_NONBLOCK).map_err(|_| VuScmiError::EventFdFailed)?,
mem: None,
event_vring: None,
event_descriptors: vec![],
scmi_handler: handler,
})
}
pub fn process_requests(
&mut self,
requests: Vec<ScmiDescriptorChain>,
vring: &VringRwLock,
) -> Result<bool> {
if requests.is_empty() {
return Ok(true);
}
for desc_chain in requests {
let descriptors: Vec<_> = desc_chain.clone().collect();
if descriptors.len() != 2 {
return Err(VuScmiError::UnexpectedDescriptorCount(descriptors.len()));
}
let desc_request = descriptors[0];
if desc_request.is_write_only() {
return Err(VuScmiError::UnexpectedWriteOnlyDescriptor(0));
}
let read_desc_len: usize = desc_request.len() as usize;
let header_size = size_of::<MessageHeader>();
if read_desc_len < header_size {
return Err(VuScmiError::UnexpectedMinimumDescriptorSize(
header_size,
read_desc_len,
));
}
let header = desc_chain
.memory()
.read_obj::<MessageHeader>(desc_request.addr())
.map_err(|_| VuScmiError::DescriptorReadFailed)?;
let mut scmi_request = ScmiRequest::new(header);
let n_parameters = self.scmi_handler.number_of_parameters(&scmi_request);
debug!("SCMI request with n parameters: {:?}", n_parameters);
let value_size = 4;
if let Some(expected_parameters) = n_parameters {
if expected_parameters > 0 {
let param_bytes = (expected_parameters as usize) * value_size;
let total_size = value_size + param_bytes;
if read_desc_len != total_size {
return Err(VuScmiError::UnexpectedDescriptorSize(
total_size,
read_desc_len,
));
}
let mut buffer: Vec<u8> = vec![0; header_size + param_bytes];
desc_chain
.memory()
.read_slice(&mut buffer, desc_request.addr())
.map_err(|_| VuScmiError::DescriptorReadFailed)?;
self.scmi_handler
.store_parameters(&mut scmi_request, &buffer[header_size..]);
} else if read_desc_len != value_size {
return Err(VuScmiError::UnexpectedDescriptorSize(
value_size,
read_desc_len,
));
}
}
debug!("Calling SCMI request handler");
let mut response = self.scmi_handler.handle(scmi_request);
debug!("SCMI response: {:?}", response);
let desc_response = descriptors[1];
if !desc_response.is_write_only() {
return Err(VuScmiError::UnexpectedReadableDescriptor(1));
}
let write_desc_len: usize = desc_response.len() as usize;
if response.len() > write_desc_len {
error!(
"Response of length {} cannot fit into the descriptor size {}",
response.len(),
write_desc_len
);
response = response.communication_error();
if response.len() > write_desc_len {
return Err(VuScmiError::InsufficientDescriptorSize(
response.len(),
write_desc_len,
));
}
}
desc_chain
.memory()
.write_slice(response.as_slice(), desc_response.addr())
.map_err(|_| VuScmiError::DescriptorWriteFailed)?;
if vring
.add_used(desc_chain.head_index(), response.len() as u32)
.is_err()
{
error!("Couldn't return used descriptors to the ring");
}
}
Ok(true)
}
fn process_command_queue(&mut self, vring: &VringRwLock) -> Result<()> {
debug!("Processing command queue");
let requests: Vec<_> = vring
.get_mut()
.get_queue_mut()
.iter(self.mem.as_ref().unwrap().memory())
.map_err(|_| VuScmiError::DescriptorNotFound)?
.collect();
debug!("Requests to process: {}", requests.len());
match self.process_requests(requests, vring) {
Ok(_) => {
// Send notification once all the requests are processed
debug!("Sending processed request notification");
vring
.signal_used_queue()
.map_err(|_| VuScmiError::SendNotificationFailed)?;
debug!("Notification sent");
}
Err(err) => {
warn!("Failed SCMI request: {}", err);
return Err(err);
}
}
debug!("Processing command queue finished");
Ok(())
}
fn start_event_queue(&mut self, vring: &VringRwLock) {
if self.event_vring.is_none() {
self.event_vring = Some(vring.clone());
}
}
pub fn process_event_requests(
&mut self,
requests: Vec<ScmiDescriptorChain>,
_vring: &VringRwLock,
) -> Result<bool> {
// The requests here are notifications from the guest about adding
// fresh buffers for the used ring. The Linux driver allocates 256
// buffers for the event queue initially (arriving here in several
// batches) and then adds a free buffer after each message delivered
// through the event queue.
for desc_chain in requests {
let descriptors: Vec<_> = desc_chain.clone().collect();
debug!(
"SCMI event request with n descriptors: {}",
descriptors.len()
);
if descriptors.len() != 1 {
return Err(VuScmiError::UnexpectedDescriptorCount(descriptors.len()));
}
let desc = descriptors[0];
if !desc.is_write_only() {
return Err(VuScmiError::UnexpectedReadableDescriptor(0));
}
debug!("SCMI event request avail descriptor length: {}", desc.len());
self.event_descriptors.push(desc_chain);
}
Ok(true)
}
fn process_event_queue(&mut self, vring: &VringRwLock) -> Result<()> {
debug!("Processing event queue");
let requests: Vec<_> = vring
.get_mut()
.get_queue_mut()
.iter(self.mem.as_ref().unwrap().memory())
.map_err(|_| VuScmiError::DescriptorNotFound)?
.collect();
debug!("Requests to process: {}", requests.len());
match self.process_event_requests(requests, vring) {
Ok(_) => {
// Send notification once all the requests are processed
debug!("Sending processed request notification");
vring
.signal_used_queue()
.map_err(|_| VuScmiError::SendNotificationFailed)?;
debug!("Notification sent");
}
Err(err) => {
warn!("Failed SCMI request: {}", err);
return Err(err);
}
}
self.start_event_queue(vring);
debug!("Processing event queue finished");
Ok(())
}
}
/// VhostUserBackend trait methods
impl VhostUserBackendMut<VringRwLock, ()> for VuScmiBackend {
fn num_queues(&self) -> usize {
debug!("Num queues called");
NUM_QUEUES
}
fn max_queue_size(&self) -> usize {
debug!("Max queue size called");
QUEUE_SIZE
}
fn features(&self) -> u64 {
debug!("Features called");
1 << VIRTIO_F_VERSION_1
| 1 << VIRTIO_F_NOTIFY_ON_EMPTY
| 1 << VIRTIO_RING_F_INDIRECT_DESC
| 1 << VIRTIO_RING_F_EVENT_IDX
| 1 << VIRTIO_SCMI_F_P2A_CHANNELS
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()
}
fn protocol_features(&self) -> VhostUserProtocolFeatures {
debug!("Protocol features called");
VhostUserProtocolFeatures::MQ
}
fn set_event_idx(&mut self, enabled: bool) {
self.event_idx = enabled;
debug!("Event idx set to: {}", enabled);
}
fn update_memory(&mut self, mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()> {
debug!("Update memory called");
self.mem = Some(mem);
Ok(())
}
fn handle_event(
&mut self,
device_event: u16,
evset: EventSet,
vrings: &[VringRwLock],
_thread_id: usize,
) -> IoResult<bool> {
debug!("Handle event called");
if evset != EventSet::IN {
warn!("Non-input event");
return Err(VuScmiError::HandleEventNotEpollIn.into());
}
match device_event {
COMMAND_QUEUE => {
let vring = &vrings[COMMAND_QUEUE as usize];
if self.event_idx {
// vm-virtio's Queue implementation only checks avail_index
// once, so to properly support EVENT_IDX we need to keep
// calling process_queue() until it stops finding new
// requests on the queue.
loop {
vring.disable_notification().unwrap();
self.process_command_queue(vring)?;
if !vring.enable_notification().unwrap() {
break;
}
}
} else {
// Without EVENT_IDX, a single call is enough.
self.process_command_queue(vring)?;
}
}
EVENT_QUEUE => {
let vring = &vrings[EVENT_QUEUE as usize];
if self.event_idx {
// vm-virtio's Queue implementation only checks avail_index
// once, so to properly support EVENT_IDX we need to keep
// calling process_queue() until it stops finding new
// requests on the queue.
loop {
vring.disable_notification().unwrap();
self.process_event_queue(vring)?;
if !vring.enable_notification().unwrap() {
break;
}
}
} else {
// Without EVENT_IDX, a single call is enough.
self.process_event_queue(vring)?;
}
}
_ => {
warn!("unhandled device_event: {}", device_event);
return Err(VuScmiError::HandleEventUnknownEvent.into());
}
}
debug!("Handle event finished");
Ok(false)
}
fn exit_event(&self, _thread_index: usize) -> Option<EventFd> {
debug!("Exit event called");
self.exit_event.try_clone().ok()
}
}
#[cfg(test)]
mod tests {
use virtio_bindings::virtio_ring::{VRING_DESC_F_NEXT, VRING_DESC_F_WRITE};
use virtio_queue::{mock::MockSplitQueue, Descriptor, Queue};
use vm_memory::{Address, GuestAddress, GuestMemoryAtomic, GuestMemoryMmap};
use super::*;
fn scmi_header(message_id: u8, protocol_id: u8) -> u32 {
u32::from(message_id) | u32::from(protocol_id) << 10
}
fn build_cmd_desc_chain(
protocol_id: u8,
message_id: u8,
parameters: Vec<u32>,
) -> ScmiDescriptorChain {
let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap();
let vq = MockSplitQueue::new(mem, 16);
let mut next_addr = vq.desc_table().total_size() + 0x100;
let mut index = 0;
let request_size: u32 = (4 + parameters.len() * 4) as u32;
// Descriptor for the SCMI request
let desc_request =
Descriptor::new(next_addr, request_size, VRING_DESC_F_NEXT as u16, index + 1);
let mut bytes: Vec<u8> = vec![];
bytes.append(&mut scmi_header(message_id, protocol_id).to_le_bytes().to_vec());
for p in parameters {
bytes.append(&mut p.to_le_bytes().to_vec());
}
mem.write_slice(bytes.as_slice(), desc_request.addr())
.unwrap();
vq.desc_table().store(index, desc_request).unwrap();
next_addr += u64::from(desc_request.len());
index += 1;
// Descriptor for the SCMI response
let desc_response = Descriptor::new(next_addr, 0x100, VRING_DESC_F_WRITE as u16, 0);
vq.desc_table().store(index, desc_response).unwrap();
// Put the descriptor index 0 in the first available ring position.
mem.write_obj(0u16, vq.avail_addr().unchecked_add(4))
.unwrap();
// Set `avail_idx` to 1.
mem.write_obj(1u16, vq.avail_addr().unchecked_add(2))
.unwrap();
// Create descriptor chain from pre-filled memory.
vq.create_queue::<Queue>()
.unwrap()
.iter(GuestMemoryAtomic::new(mem.clone()).memory())
.unwrap()
.next()
.unwrap()
}
fn build_event_desc_chain() -> ScmiDescriptorChain {
let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap();
let vq = MockSplitQueue::new(mem, 16);
let next_addr = vq.desc_table().total_size() + 0x100;
// Descriptor for the SCMI event
let desc_response = Descriptor::new(next_addr, 0x100, VRING_DESC_F_WRITE as u16, 0);
vq.desc_table().store(0, desc_response).unwrap();
// Put the descriptor index 0 in the first available ring position.
mem.write_obj(0u16, vq.avail_addr().unchecked_add(4))
.unwrap();
// Set `avail_idx` to 1.
mem.write_obj(1u16, vq.avail_addr().unchecked_add(2))
.unwrap();
// Create descriptor chain from pre-filled memory.
vq.create_queue::<Queue>()
.unwrap()
.iter(GuestMemoryAtomic::new(mem.clone()).memory())
.unwrap()
.next()
.unwrap()
}
// Build just empty descriptors
struct DescParameters {
addr: Option<u64>,
flags: u16,
len: u32,
}
fn build_dummy_desc_chain(parameters: Vec<&DescParameters>) -> ScmiDescriptorChain {
let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap();
let vq = MockSplitQueue::new(mem, 16);
for (i, p) in parameters.iter().enumerate() {
let mut f: u16 = if i == parameters.len() - 1 {
0
} else {
VRING_DESC_F_NEXT as u16
};
f |= p.flags;
let offset = match p.addr {
Some(addr) => addr,
_ => 0x100,
};
let desc = Descriptor::new(offset, p.len, f, (i + 1) as u16);
vq.desc_table().store(i as u16, desc).unwrap();
}
// Put the descriptor index 0 in the first available ring position.
mem.write_obj(0u16, vq.avail_addr().unchecked_add(4))
.unwrap();
// Set `avail_idx` to 1.
mem.write_obj(1u16, vq.avail_addr().unchecked_add(2))
.unwrap();
// Create descriptor chain from pre-filled memory
vq.create_queue::<Queue>()
.unwrap()
.iter(GuestMemoryAtomic::new(mem.clone()).memory())
.unwrap()
.next()
.unwrap()
}
fn validate_desc_chains(
desc_chains: &[ScmiDescriptorChain],
chain_index: usize,
protocol_id: u8,
message_id: u8,
status: i32,
data: Vec<u32>,
) {
let desc_chain = &desc_chains[chain_index];
let descriptors: Vec<_> = desc_chain.clone().collect();
let mut response = vec![0; descriptors[1].len() as usize];
desc_chain
.memory()
.read(&mut response, descriptors[1].addr())
.unwrap();
let mut result: Vec<u8> = scmi_header(message_id, protocol_id).to_le_bytes().to_vec();
result.append(&mut status.to_le_bytes().to_vec());
for d in &data {
result.append(&mut d.to_le_bytes().to_vec());
}
assert_eq!(response[0..result.len()], result);
}
fn make_backend() -> VuScmiBackend {
let config = VuScmiConfig {
socket_path: "/foo/scmi.sock".to_owned(),
devices: vec![],
};
VuScmiBackend::new(&config).unwrap()
}
#[test]
fn test_process_requests() {
let mut backend = make_backend();
let mem = GuestMemoryAtomic::new(
GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(),
);
let vring = VringRwLock::new(mem, 0x1000).unwrap();
// Descriptor chain size zero, shouldn't fail
backend
.process_requests(Vec::<ScmiDescriptorChain>::new(), &vring)
.unwrap();
// Valid single SCMI request: base protocol version
let desc_chains = vec![build_cmd_desc_chain(0x10, 0x0, vec![])];
backend
.process_requests(desc_chains.clone(), &vring)
.unwrap();
validate_desc_chains(&desc_chains, 0, 0x10, 0x0, 0, vec![0x20000]);
// Valid multi SCMI request: base protocol version + implementation version
let desc_chains = vec![
build_cmd_desc_chain(0x10, 0x0, vec![]),
build_cmd_desc_chain(0x10, 0x5, vec![]),
];
backend
.process_requests(desc_chains.clone(), &vring)
.unwrap();
validate_desc_chains(&desc_chains, 0, 0x10, 0x0, 0, vec![0x20000]);
validate_desc_chains(&desc_chains, 1, 0x10, 0x5, 0, vec![0]);
}
#[test]
fn test_process_requests_failure() {
let mut backend = make_backend();
let mem = GuestMemoryAtomic::new(
GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(),
);
let vring = VringRwLock::new(mem, 0x1000).unwrap();
let default = DescParameters {
addr: None,
flags: 0,
len: 0,
};
// Have only one descriptor, expected two.
let parameters = vec![&default];
let desc_chain = build_dummy_desc_chain(parameters);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::UnexpectedDescriptorCount(1) => (),
other => panic!("Unexpected result: {:?}", other),
}
// Have three descriptors, expected two.
let parameters = vec![&default, &default, &default];
let desc_chain = build_dummy_desc_chain(parameters);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::UnexpectedDescriptorCount(3) => (),
other => panic!("Unexpected result: {:?}", other),
}
// Write only descriptors.
let p = DescParameters {
addr: None,
flags: VRING_DESC_F_WRITE as u16,
len: 0,
};
let parameters = vec![&p, &p];
let desc_chain = build_dummy_desc_chain(parameters);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::UnexpectedWriteOnlyDescriptor(0) => (),
other => panic!("Unexpected result: {:?}", other),
}
// Invalid request address.
let parameters = vec![
&DescParameters {
addr: Some(0x10000),
flags: 0,
len: 4,
},
&DescParameters {
addr: None,
flags: VRING_DESC_F_WRITE as u16,
len: 4,
},
];
let desc_chain = build_dummy_desc_chain(parameters);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::DescriptorReadFailed => (),
other => panic!("Unexpected result: {:?}", other),
}
// Invalid request length (very small).
let parameters = vec![
&DescParameters {
addr: None,
flags: 0,
len: 2,
},
&DescParameters {
addr: None,
flags: VRING_DESC_F_WRITE as u16,
len: 4,
},
];
let desc_chain = build_dummy_desc_chain(parameters);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::UnexpectedMinimumDescriptorSize(4, 2) => (),
other => panic!("Unexpected result: {:?}", other),
}
// Invalid request length (too small).
let desc_chain = build_cmd_desc_chain(0x10, 0x2, vec![]);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::UnexpectedDescriptorSize(8, 4) => (),
other => panic!("Unexpected result: {:?}", other),
}
// Invalid request length (too large).
let desc_chain = build_cmd_desc_chain(0x10, 0x0, vec![0]);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::UnexpectedDescriptorSize(4, 8) => (),
other => panic!("Unexpected result: {:?}", other),
}
// Read only descriptors.
let p = DescParameters {
addr: None,
flags: 0,
len: 4,
};
let parameters = vec![&p, &p];
let desc_chain = build_dummy_desc_chain(parameters);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::UnexpectedReadableDescriptor(1) => (),
other => panic!("Unexpected result: {:?}", other),
}
// Invalid response address.
let parameters = vec![
&DescParameters {
addr: None,
flags: 0,
len: 4,
},
&DescParameters {
addr: Some(0x10000),
flags: VRING_DESC_F_WRITE as u16,
len: 8,
},
];
let desc_chain = build_dummy_desc_chain(parameters);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::DescriptorWriteFailed => (),
other => panic!("Unexpected result: {:?}", other),
}
// Invalid response length.
let parameters = vec![
&DescParameters {
addr: None,
flags: 0,
len: 4,
},
&DescParameters {
addr: None,
flags: VRING_DESC_F_WRITE as u16,
len: 6,
},
];
let desc_chain = build_dummy_desc_chain(parameters);
match backend
.process_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::InsufficientDescriptorSize(8, 6) => (),
other => panic!("Unexpected result: {:?}", other),
}
}
#[test]
fn test_event_requests() {
let mut backend = make_backend();
let mem = GuestMemoryAtomic::new(
GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(),
);
let vring = VringRwLock::new(mem, 0x1000).unwrap();
// Descriptor chain size zero, shouldn't fail and should be no-op
backend
.process_event_requests(Vec::<ScmiDescriptorChain>::new(), &vring)
.unwrap();
assert_eq!(backend.event_descriptors.len(), 0);
// Valid event descriptors, should get stored
let desc_chains = vec![build_event_desc_chain(), build_event_desc_chain()];
backend.process_event_requests(desc_chains, &vring).unwrap();
assert_eq!(backend.event_descriptors.len(), 2);
// Some more event descriptors
let desc_chains = vec![
build_event_desc_chain(),
build_event_desc_chain(),
build_event_desc_chain(),
];
backend.process_event_requests(desc_chains, &vring).unwrap();
assert_eq!(backend.event_descriptors.len(), 5);
}
#[test]
fn test_event_requests_failure() {
let mut backend = make_backend();
let mem = GuestMemoryAtomic::new(
GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(),
);
let vring = VringRwLock::new(mem, 0x1000).unwrap();
// Invalid number of desc chains
let p = DescParameters {
addr: None,
flags: 0,
len: 0,
};
let desc_chain = build_dummy_desc_chain(vec![&p, &p]);
match backend
.process_event_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::UnexpectedDescriptorCount(2) => (),
other => panic!("Unexpected result: {:?}", other),
}
// Read only descriptor
let p = DescParameters {
addr: None,
flags: 0,
len: 0,
};
let desc_chain = build_dummy_desc_chain(vec![&p]);
match backend
.process_event_requests(vec![desc_chain], &vring)
.unwrap_err()
{
VuScmiError::UnexpectedReadableDescriptor(0) => (),
other => panic!("Unexpected result: {:?}", other),
}
}
#[test]
fn test_backend() {
let mut backend = make_backend();
assert_eq!(backend.num_queues(), NUM_QUEUES);
assert_eq!(backend.max_queue_size(), QUEUE_SIZE);
assert_eq!(backend.features(), 0x171000001);
assert_eq!(backend.protocol_features(), VhostUserProtocolFeatures::MQ);
assert_eq!(backend.queues_per_thread(), vec![0xffff_ffff]);
backend.set_event_idx(true);
assert!(backend.event_idx);
assert!(backend.exit_event(0).is_some());
let mem = GuestMemoryAtomic::new(
GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(),
);
backend.update_memory(mem.clone()).unwrap();
let vring_request = VringRwLock::new(mem.clone(), 0x1000).unwrap();
vring_request.set_queue_info(0x100, 0x200, 0x300).unwrap();
vring_request.set_queue_ready(true);
let vring_event = VringRwLock::new(mem, 0x1000).unwrap();
vring_event.set_queue_info(0x100, 0x200, 0x300).unwrap();
vring_event.set_queue_ready(true);
assert_eq!(
backend
.handle_event(
0,
EventSet::OUT,
&[vring_request.clone(), vring_event.clone()],
0,
)
.unwrap_err()
.kind(),
io::ErrorKind::Other
);
assert_eq!(
backend
.handle_event(
2,
EventSet::IN,
&[vring_request.clone(), vring_event.clone()],
0,
)
.unwrap_err()
.kind(),
io::ErrorKind::Other
);
// Hit the loop part
backend.set_event_idx(true);
backend
.handle_event(
0,
EventSet::IN,
&[vring_request.clone(), vring_event.clone()],
0,
)
.unwrap();
// Hit the non-loop part
backend.set_event_idx(false);
backend
.handle_event(
0,
EventSet::IN,
&[vring_request.clone(), vring_event.clone()],
0,
)
.unwrap();
// Hit the loop part
backend.set_event_idx(true);
backend
.handle_event(
1,
EventSet::IN,
&[vring_request.clone(), vring_event.clone()],
0,
)
.unwrap();
// Hit the non-loop part
backend.set_event_idx(false);
backend
.handle_event(1, EventSet::IN, &[vring_request, vring_event], 0)
.unwrap();
}
}

View File

@ -0,0 +1,39 @@
# vhost-device-scsi architecture
Rough outline of the different pieces and how they fit together:
## `scsi/mod.rs`
This defines the `Target` trait, which represents a SCSI target. The code in
this file is independent from:
- A particular SCSI implementation: Currently, we have one implementation of
`Target`, which emulates the SCSI commands itself; but future implementations
could provide pass-through to an iSCSI target or SCSI devices attached to the
host.
- A particular SCSI transport: Nothing in `src/scsi/*` knows anything about
virtio; this is helpful for maintainability, and also allows our SCSI
emulation code to be reusable as, for example, an iSCSI target. To this end,
the `Target` trait is generic over a `Read` and `Write` that it uses for SCSI
data transfer. This makes testing easy: we can just provide a `Vec<u8>` to
write into.
## `scsi/emulation/*.rs`
This is the SCSI emulation code, which forms the bulk of the crate. It provides
`EmulatedTarget`, an implementation of `Target`. `EmulatedTarget`, in turn,
looks at the LUN and delegates commands to an implementation of `LogicalUnit`.
In most cases, this will be `BlockDevice`; there's also `MissingLun`, which is
used for responding to commands to invalid LUNs.
Currently, there is no separation between commands defined in the SPC standard
(commands shared by all device types) and the SBC standard (block-device
specific commands). If we ever implemented another device type (CD/DVD seems
most likely), we'd want to separate those out.
As noted above, the emulation code knows nothing about virtio.
## `src/{main,virtio}.rs`
This code handles vhost-user, virtio, and virtio-scsi; it's the only part of
the crate that knows about these protocols.

View File

@ -0,0 +1,15 @@
# Changelog
## [Unreleased]
### Added
### Changed
### Fixed
### Deprecated
## [0.1.0]
First release

View File

@ -0,0 +1,34 @@
[package]
name = "vhost-device-scsi"
version = "0.1.0"
authors = ["Gaelan Steele <gbs@canishe.com>", "Erik Schilling <erik.schilling@linaro.org>"]
description = "vhost scsi backend device"
repository = "https://github.com/rust-vmm/vhost-device"
readme = "README.md"
keywords = ["scsi", "vhost", "virt", "backend"]
license = "Apache-2.0 OR BSD-3-Clause"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
xen = ["vm-memory/xen", "vhost/xen", "vhost-user-backend/xen"]
[dependencies]
clap = { version = "4.4", features = ["derive"] }
env_logger = "0.10"
epoll = "4.3"
log = "0.4"
num_enum = "0.7"
thiserror = "1.0"
vhost = { version = "0.8", features = ["vhost-user-slave"] }
vhost-user-backend = "0.10"
virtio-bindings = "0.2.1"
virtio-queue = "0.9"
vm-memory = "0.12"
vmm-sys-util = "0.11"
[dev-dependencies]
assert_matches = "1.5"
tempfile = "3.2.0"

View File

@ -0,0 +1 @@
../../LICENSE-APACHE

View File

@ -0,0 +1 @@
../../LICENSE-BSD-3-Clause

View File

@ -0,0 +1,48 @@
# vhost-device-scsi
This is a Rust implementation of a vhost-device-scsi daemon.
## Usage
Run the vhost-device-scsi daemon:
```
vhost-device-scsi -r --socket-path /tmp/vhost-user-scsi.sock /path/to/image.raw /path/to/second-image.raw ...
```
Run QEMU:
```
qemu-system-x86_64 ... \
-device vhost-user-scsi-pci,num_queues=1,param_change=off,chardev=vus \
-chardev socket,id=vus,path=/tmp/vhost-user-scsi.sock \
# must match total guest meory
-object memory-backend-memfd,id=mem,size=384M,share=on \
-numa node,memdev=mem
```
## Limitations
We are currently only supporting a single request queue and do not support
dynamic reconfiguration of LUN parameters (VIRTIO_SCSI_F_CHANGE).
## Features
This crate is a work-in-progress. Currently, it's possible to mount and read
up to 256 read-only raw disk images. Some features we might like to add
at some point, roughly ordered from sooner to later:
- Write support. This should just be a matter of implementing the WRITE
command, but there's a bit of complexity around writeback caching we
need to make sure we get right.
- Support more LUNs. virtio-scsi supports up to 16384 LUNs per target.
After 256, the LUN encoding format is different; it's nothing too
complicated, but I haven't gotten around to implementing it.
- Concurrency. Currently, we process SCSI commands one at a time. Eventually,
it'd be a good idea to use threads or some fancy async/io_uring stuff to
concurrently handle multiple commands. virtio-scsi also allows for multiple
request queues, allowing the guest to submit requests from multiple cores
in parallel; we should support that.
- iSCSI passthrough. This shouldn't be too bad, but it might be a good idea
to decide on a concurrency model (threads or async) before we spend too much
time here.

View File

@ -0,0 +1,180 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
mod scsi;
mod vhu_scsi;
mod virtio;
use std::{
fs::File,
path::PathBuf,
process::exit,
sync::{Arc, RwLock},
};
use clap::Parser;
use log::{error, info, warn};
use thiserror::Error as ThisError;
use vhost::vhost_user::{self, Listener};
use vhost_user_backend::VhostUserDaemon;
use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap};
use crate::scsi::emulation::{
block_device::{BlockDevice, FileBackend, MediumRotationRate},
target::EmulatedTarget,
};
use crate::vhu_scsi::VhostUserScsiBackend;
#[derive(Debug, ThisError)]
enum Error {
#[error("More than 256 LUNs aren't currently supported")]
TooManyLUNs,
#[error("Failed creating listener: {0}")]
FailedCreatingListener(vhost_user::Error),
}
type Result<T> = std::result::Result<T, Error>;
#[derive(Parser)]
struct ScsiArgs {
/// Make the images read-only.
///
/// Currently, we don't actually support writes, but sometimes we want to
/// pretend the disk is writable to work around issues with some tools that
/// use the Linux SCSI generic API.
#[arg(long = "read-only", short = 'r')]
read_only: bool,
/// Tell the guest this disk is non-rotational.
///
/// Affects some heuristics in Linux around, for example, scheduling.
#[arg(long = "solid-state")]
solid_state: bool,
/// Location of vhost-user socket.
#[clap(short, long)]
socket_path: PathBuf,
/// Images against which the SCSI actions are emulated.
images: Vec<PathBuf>,
}
fn create_backend(args: &ScsiArgs) -> Result<VhostUserScsiBackend> {
let mut backend = VhostUserScsiBackend::new();
let mut target = EmulatedTarget::new();
if args.images.len() > 256 {
// This is fairly simple to add; it's just a matter of supporting the right LUN
// encoding formats.
error!("Currently only up to 256 targets are supported");
return Err(Error::TooManyLUNs);
}
if !args.read_only {
warn!("Currently, only read-only images are supported. Unless you know what you're doing, you want to pass -r");
}
for image in &args.images {
let mut dev = BlockDevice::new(FileBackend::new(
File::options()
.read(true)
.write(true)
.open(image)
.expect("Opening image"),
));
dev.set_write_protected(args.read_only);
dev.set_solid_state(if args.solid_state {
MediumRotationRate::NonRotating
} else {
MediumRotationRate::Unreported
});
target.add_lun(Box::new(dev));
}
backend.add_target(Box::new(target));
Ok(backend)
}
fn start_backend(backend: VhostUserScsiBackend, args: ScsiArgs) -> Result<()> {
let backend = Arc::new(RwLock::new(backend));
let mut daemon = VhostUserDaemon::new(
"vhost-device-scsi".into(),
Arc::clone(&backend),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.expect("Creating daemon");
daemon
.start(Listener::new(args.socket_path, true).map_err(Error::FailedCreatingListener)?)
.expect("Starting daemon");
match daemon.wait() {
Ok(()) => {
info!("Stopping cleanly.");
}
Err(vhost_user_backend::Error::HandleRequest(
vhost_user::Error::PartialMessage | vhost_user::Error::Disconnected,
)) => {
info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug.");
}
Err(e) => {
warn!("Error running daemon: {:?}", e);
}
}
// No matter the result, we need to shut down the worker thread.
// unwrap will only panic if we already panicked somewhere else
backend
.read()
.unwrap()
.exit_event
.write(1)
.expect("Shutting down worker thread");
Ok(())
}
fn run() -> Result<()> {
env_logger::init();
let args = ScsiArgs::parse();
let backend = create_backend(&args)?;
start_backend(backend, args)?;
Ok(())
}
fn main() {
if let Err(e) = run() {
error!("{e}");
exit(1);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_backend() {
let sock = tempfile::NamedTempFile::new().unwrap();
let args = ScsiArgs {
images: vec!["/dev/null".into()],
read_only: true,
socket_path: sock.path().into(),
solid_state: false,
};
create_backend(&args).unwrap();
}
#[test]
fn test_fail_listener() {
let socket_name = "~/path/not/present/scsi";
let args = ScsiArgs {
images: vec!["/dev/null".into()],
read_only: true,
socket_path: socket_name.into(),
solid_state: false,
};
let backend = create_backend(&args).unwrap();
let err = start_backend(backend, args).unwrap_err();
if let Error::FailedCreatingListener(_) = err {
} else {
panic!("expected failure when creating listener");
}
}
}

View File

@ -0,0 +1,778 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use std::{
convert::{TryFrom, TryInto},
fs::File,
io::{self, Read, Write},
num::{NonZeroU32, NonZeroU64, TryFromIntError},
ops::{Add, Div, Mul, Sub},
os::unix::prelude::*,
};
use log::{debug, error, warn};
use super::{
command::{
parse_opcode, CommandType, LunSpecificCommand, ModePageSelection, ModeSensePageControl,
ParseOpcodeResult, ReportSupportedOpCodesMode, SenseFormat, VpdPage, OPCODES,
},
mode_page::ModePage,
response_data::{respond_standard_inquiry_data, SilentlyTruncate},
target::{LogicalUnit, LunRequest},
};
use crate::scsi::{sense, CmdError, CmdOutput, TaskAttr};
pub(crate) enum MediumRotationRate {
Unreported,
NonRotating,
}
#[derive(Clone, Copy, PartialEq, PartialOrd)]
pub(crate) struct ByteOffset(u64);
impl From<u64> for ByteOffset {
fn from(value: u64) -> Self {
ByteOffset(value)
}
}
impl From<ByteOffset> for u64 {
fn from(value: ByteOffset) -> Self {
value.0
}
}
impl Div<BlockSize> for ByteOffset {
type Output = BlockOffset;
fn div(self, rhs: BlockSize) -> Self::Output {
BlockOffset(self.0 / NonZeroU64::from(rhs.0))
}
}
#[derive(Clone, Copy, PartialEq, PartialOrd)]
pub(crate) struct BlockSize(NonZeroU32);
impl From<BlockSize> for u32 {
fn from(value: BlockSize) -> Self {
u32::from(value.0)
}
}
impl TryFrom<u32> for BlockSize {
type Error = TryFromIntError;
fn try_from(value: u32) -> Result<Self, Self::Error> {
Ok(BlockSize(NonZeroU32::try_from(value)?))
}
}
#[derive(Clone, Copy, PartialEq, PartialOrd)]
pub(crate) struct BlockOffset(u64);
impl From<BlockOffset> for u64 {
fn from(value: BlockOffset) -> Self {
value.0
}
}
impl From<u64> for BlockOffset {
fn from(value: u64) -> Self {
BlockOffset(value)
}
}
impl Add<BlockOffset> for BlockOffset {
type Output = BlockOffset;
fn add(self, rhs: BlockOffset) -> Self::Output {
BlockOffset(self.0 + rhs.0)
}
}
impl Sub<BlockOffset> for BlockOffset {
type Output = Self;
fn sub(self, rhs: BlockOffset) -> Self::Output {
BlockOffset(self.0 - rhs.0)
}
}
impl Mul<BlockSize> for BlockOffset {
type Output = ByteOffset;
fn mul(self, rhs: BlockSize) -> Self::Output {
ByteOffset(self.0 * u64::from(NonZeroU64::from(rhs.0)))
}
}
pub(crate) trait BlockDeviceBackend: Send + Sync {
fn read_exact_at(&mut self, buf: &mut [u8], offset: ByteOffset) -> io::Result<()>;
fn write_exact_at(&mut self, buf: &[u8], offset: ByteOffset) -> io::Result<()>;
fn size_in_blocks(&mut self) -> io::Result<BlockOffset>;
fn block_size(&self) -> BlockSize;
fn sync(&mut self) -> io::Result<()>;
}
pub(crate) struct FileBackend {
file: File,
block_size: BlockSize,
}
impl FileBackend {
pub fn new(file: File) -> Self {
Self {
file,
block_size: BlockSize::try_from(512).expect("512 is valid BlockSize"),
}
}
}
impl BlockDeviceBackend for FileBackend {
fn read_exact_at(&mut self, buf: &mut [u8], offset: ByteOffset) -> io::Result<()> {
self.file.read_exact_at(buf, u64::from(offset))
}
fn write_exact_at(&mut self, buf: &[u8], offset: ByteOffset) -> io::Result<()> {
self.file.write_all_at(buf, u64::from(offset))
}
fn size_in_blocks(&mut self) -> io::Result<BlockOffset> {
let len = ByteOffset::from(self.file.metadata()?.len());
assert!(u64::from(len) % NonZeroU64::from(self.block_size.0) == 0);
Ok(len / self.block_size)
}
fn block_size(&self) -> BlockSize {
self.block_size
}
fn sync(&mut self) -> io::Result<()> {
self.file.sync_data()
}
}
pub(crate) struct BlockDevice<T: BlockDeviceBackend> {
backend: T,
write_protected: bool,
rotation_rate: MediumRotationRate,
}
impl<T: BlockDeviceBackend> BlockDevice<T> {
pub(crate) const fn new(backend: T) -> Self {
Self {
backend,
write_protected: false,
rotation_rate: MediumRotationRate::Unreported,
}
}
fn read_blocks(&mut self, lba: BlockOffset, blocks: BlockOffset) -> io::Result<Vec<u8>> {
// TODO: Ideally, this would be a read_vectored directly into guest
// address space. Instead, we have an allocation and several copies.
let mut ret = vec![
0;
usize::try_from(u64::from(blocks * self.backend.block_size()))
.expect("block length in bytes should fit usize")
];
self.backend
.read_exact_at(&mut ret[..], lba * self.backend.block_size())?;
Ok(ret)
}
fn write_blocks(
&mut self,
lba: BlockOffset,
blocks: BlockOffset,
reader: &mut dyn Read,
) -> io::Result<()> {
// TODO: Avoid the copies here.
let mut buf = vec![
0;
usize::try_from(u64::from(blocks * self.backend.block_size()))
.expect("block length in bytes should fit usize")
];
reader.read_exact(&mut buf)?;
self.backend
.write_exact_at(&buf, lba * self.backend.block_size())?;
Ok(())
}
fn write_same_block(
&mut self,
lba_start: BlockOffset,
block_count: BlockOffset,
buf: &[u8],
) -> io::Result<()> {
let block_size = self.backend.block_size();
for lba in u64::from(lba_start)..u64::from(lba_start + block_count) {
let lba = BlockOffset(lba);
self.backend.write_exact_at(buf, lba * block_size)?;
}
Ok(())
}
pub fn set_write_protected(&mut self, wp: bool) {
self.write_protected = wp;
}
pub fn set_solid_state(&mut self, rotation_rate: MediumRotationRate) {
self.rotation_rate = rotation_rate;
}
}
impl<T: BlockDeviceBackend> LogicalUnit for BlockDevice<T> {
fn execute_command(
&mut self,
data_in: &mut SilentlyTruncate<&mut dyn Write>,
data_out: &mut dyn Read,
req: LunRequest,
command: LunSpecificCommand,
) -> Result<CmdOutput, CmdError> {
if req.crn != 0 {
// CRN is a weird bit of the protocol we wouldn't ever expect to be used over
// virtio-scsi; but it's allowed to set it non-zero
warn!("Received non-zero CRN: {}", req.crn);
}
if req.task_attr != TaskAttr::Simple {
// virtio-scsi spec allows us to treat all task attrs as SIMPLE.
warn!("Ignoring non-simple task attr of {:?}", req.task_attr);
}
if req.prio != 0 {
// My reading of SAM-6 is that priority is purely advisory, so it's fine to
// ignore it.
warn!("Ignoring non-zero priority of {}.", req.prio);
}
if req.naca {
// We don't support NACA, and say as much in our INQUIRY data, so if
// we get it that's an error.
warn!("Driver set NACA bit, which is unsupported.");
return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB));
}
debug!("Incoming command: {:?}", command);
match command {
LunSpecificCommand::TestUnitReady => Ok(CmdOutput::ok()),
LunSpecificCommand::ReadCapacity10 => {
match self.backend.size_in_blocks() {
Ok(size) => {
// READ CAPACITY (10) returns a 32-bit LBA, which may not be enough. If it
// isn't, we're supposed to return 0xffff_ffff and hope the driver gets the
// memo and uses the newer READ CAPACITY (16).
// n.b. this is the last block, ie (length-1), not length
let final_block: u32 = u64::from(size - BlockOffset(1))
.try_into()
.unwrap_or(0xffff_ffff);
let block_size: u32 = u32::from(self.backend.block_size());
data_in
.write_all(&u32::to_be_bytes(final_block))
.map_err(CmdError::DataIn)?;
data_in
.write_all(&u32::to_be_bytes(block_size))
.map_err(CmdError::DataIn)?;
Ok(CmdOutput::ok())
}
Err(e) => {
error!("Error getting image size: {}", e);
// TODO: Is this a reasonable sense code to send?
Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR))
}
}
}
LunSpecificCommand::ReadCapacity16 => {
match self.backend.size_in_blocks() {
Ok(size) => {
// n.b. this is the last block, ie (length-1), not length
let final_block = u64::from(size - BlockOffset(1));
let block_size = u32::from(self.backend.block_size());
data_in
.write_all(&u64::to_be_bytes(final_block))
.map_err(CmdError::DataIn)?;
data_in
.write_all(&u32::to_be_bytes(block_size))
.map_err(CmdError::DataIn)?;
// no protection stuff; 1-to-1 logical/physical blocks
data_in.write_all(&[0, 0]).map_err(CmdError::DataIn)?;
// top 2 bits: thin provisioning stuff; other 14 bits are lowest
// aligned LBA, which is zero
data_in
.write_all(&[0b1100_0000, 0])
.map_err(CmdError::DataIn)?;
// reserved
data_in.write_all(&[0; 16]).map_err(CmdError::DataIn)?;
Ok(CmdOutput::ok())
}
Err(e) => {
error!("Error getting image size: {}", e);
// TODO: Is this a reasonable sense code to send?
Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR))
}
}
}
LunSpecificCommand::ModeSense6 { mode_page, pc, dbd } => {
// we use this for the pages array if we only need a single element; lifetime
// rules mean it has to be declared here
let single_page_array: [ModePage; 1];
let pages = match mode_page {
ModePageSelection::Single(x) => {
single_page_array = [x];
&single_page_array
}
ModePageSelection::AllPageZeros => ModePage::ALL_ZERO,
};
let pages_len: u32 = pages.iter().map(|x| u32::from(x.page_length() + 2)).sum();
// SPC-6r05, 7.5.6: "Logical units that support more than 256 bytes of block
// descriptors and mode pages should implement ten-byte mode commands. The MODE
// DATA LENGTH field in the six-byte CDB header limits the transferred data to
// 256 bytes."
// Unclear what exactly we're supposed to do if we have more than 256 bytes of
// mode pages and get sent a MODE SENSE (6). In any case, we don't at the
// moment; if we ever get that much, this unwrap() will start
// crashing us and we can figure out what to do.
let pages_len = u8::try_from(pages_len).unwrap();
// mode parameter header
data_in
.write_all(&[
pages_len + 3, // size in bytes after this one
0, // medium type - 0 for SBC
if self.write_protected {
0b1001_0000 // WP, support DPOFUA
} else {
0b0001_0000 // support DPOFUA
},
0, // block desc length
])
.map_err(CmdError::DataIn)?;
if !dbd {
// TODO: Block descriptors are optional, so we currently
// don't provide them. Does any driver
// actually use them?
}
for page in pages {
match pc {
ModeSensePageControl::Current | ModeSensePageControl::Default => {
page.write(data_in).map_err(CmdError::DataIn)?;
}
ModeSensePageControl::Changeable => {
// SPC-6 6.14.3: "If the logical unit does not
// implement changeable parameters mode pages and
// the device server receives a MODE SENSE command
// with 01b in the PC field, then the device server
// shall terminate the command with CHECK CONDITION
// status, with the sense key set to ILLEGAL
// REQUEST, and the additional sense code set to
// INVALID FIELD IN CDB."
return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB));
}
ModeSensePageControl::Saved => {
return Ok(CmdOutput::check_condition(
sense::SAVING_PARAMETERS_NOT_SUPPORTED,
))
}
}
}
Ok(CmdOutput::ok())
}
LunSpecificCommand::Read10 {
dpo,
fua,
lba,
transfer_length,
} => {
if dpo {
// DPO is just a hint that the guest probably won't access
// this any time soon, so we can ignore it
debug!("Silently ignoring DPO flag");
}
if fua {
// Somewhat weirdly, SCSI supports FUA on reads. Here's the
// key bit: "A force unit access (FUA) bit set to one
// specifies that the device server shall read the logical
// blocks from… the medium. If the FUA bit is set to one
// and a volatile cache contains a more recent version of a
// logical block than… the medium, then, before reading the
// logical block, the device server shall write the logical
// block to… the medium."
// I guess the idea is that you can read something back, and
// be absolutely sure what you just read will persist.
// So for our purposes, we need to make sure whatever we
// return has been saved to disk. fsync()ing the whole image
// is a bit blunt, but does the trick.
if let Err(e) = self.backend.sync() {
error!("Error syncing file: {}", e);
return Ok(CmdOutput::check_condition(sense::TARGET_FAILURE));
}
}
// Ignore group number: AFAICT, it's for separating reads from different
// workloads in performance metrics, and we don't report anything like that
let size = match self.backend.size_in_blocks() {
Ok(size) => size,
Err(e) => {
error!("Error getting image size for read: {}", e);
return Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR));
}
};
let lba = BlockOffset(lba.into());
let transfer_length = BlockOffset(transfer_length.into());
if lba + transfer_length > size {
return Ok(CmdOutput::check_condition(
sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE,
));
}
let read_result = self.read_blocks(lba, transfer_length);
match read_result {
Ok(bytes) => {
data_in.write_all(&bytes[..]).map_err(CmdError::DataIn)?;
Ok(CmdOutput::ok())
}
Err(e) => {
error!("Error reading image: {}", e);
Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR))
}
}
}
LunSpecificCommand::Write10 {
dpo,
fua,
lba,
transfer_length,
} => {
if dpo {
// DPO is just a hint that the guest probably won't access
// this any time soon, so we can ignore it
debug!("Silently ignoring DPO flag");
}
let size = match self.backend.size_in_blocks() {
Ok(size) => size,
Err(e) => {
error!("Error getting image size for read: {}", e);
return Ok(CmdOutput::check_condition(sense::TARGET_FAILURE));
}
};
let lba = BlockOffset(lba.into());
let transfer_length = BlockOffset(transfer_length.into());
if lba + transfer_length > size {
return Ok(CmdOutput::check_condition(
sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE,
));
}
let write_result = self.write_blocks(lba, transfer_length, data_out);
if fua {
if let Err(e) = self.backend.sync() {
error!("Error syncing file: {}", e);
return Ok(CmdOutput::check_condition(sense::TARGET_FAILURE));
}
}
match write_result {
Ok(()) => Ok(CmdOutput::ok()),
Err(e) => {
error!("Error writing to block device: {}", e);
Ok(CmdOutput::check_condition(sense::TARGET_FAILURE))
}
}
}
LunSpecificCommand::WriteSame16 {
lba,
number_of_logical_blocks,
anchor,
} => {
// We do not support block provisioning
if anchor {
return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB));
}
// This command can be used to unmap/discard a region of blocks...
// TODO: Do something smarter and punch holes into the backend,
// for now we will just write A LOT of zeros in a very inefficient way.
let size = match self.backend.size_in_blocks() {
Ok(size) => size,
Err(e) => {
error!("Error getting image size for read: {}", e);
return Ok(CmdOutput::check_condition(sense::UNRECOVERED_READ_ERROR));
}
};
let lba = BlockOffset(lba);
let number_of_logical_blocks = BlockOffset(number_of_logical_blocks.into());
if lba + number_of_logical_blocks > size {
return Ok(CmdOutput::check_condition(
sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE,
));
}
let mut buf = vec![
0;
usize::try_from(u32::from(self.backend.block_size()))
.expect("block_size should fit usize")
];
let read_result = data_out.read_exact(&mut buf);
if let Err(e) = read_result {
error!("Error reading from data_out: {}", e);
return Ok(CmdOutput::check_condition(sense::TARGET_FAILURE));
}
let write_result = self.write_same_block(lba, number_of_logical_blocks, &buf);
match write_result {
Ok(()) => Ok(CmdOutput::ok()),
Err(e) => {
error!("Error writing to block device: {}", e);
Ok(CmdOutput::check_condition(sense::TARGET_FAILURE))
}
}
}
LunSpecificCommand::Inquiry(page_code) => {
// top 3 bits 0: peripheral device code = exists and ready
// bottom 5 bits 0: device type = block device
data_in.write_all(&[0]).map_err(CmdError::DataIn)?;
if let Some(code) = page_code {
let mut out = vec![];
match code {
VpdPage::SupportedVpdPages => {
out.push(VpdPage::SupportedVpdPages.into());
out.push(VpdPage::BlockDeviceCharacteristics.into());
out.push(VpdPage::LogicalBlockProvisioning.into());
}
VpdPage::BlockDeviceCharacteristics => {
let rotation_rate: u16 = match self.rotation_rate {
MediumRotationRate::Unreported => 0,
MediumRotationRate::NonRotating => 1,
};
out.extend_from_slice(&rotation_rate.to_be_bytes());
// nothing worth setting in the rest
out.extend_from_slice(&[0; 58]);
}
VpdPage::LogicalBlockProvisioning => {
out.push(0); // don't support threshold sets
out.push(0b1110_0100); // support unmapping w/ UNMAP
// and WRITE SAME (10 & 16),
// don't support anchored
// LBAs or group descriptors
out.push(0b0000_0010); // thin provisioned
out.push(0); // no threshold % support
}
_ => return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB)),
}
data_in
.write_all(&[code.into()])
.map_err(CmdError::DataIn)?;
data_in
.write_all(
&u16::try_from(out.len())
.expect("VPD page < 2^16 bits")
.to_be_bytes(),
)
.map_err(CmdError::DataIn)?;
data_in.write_all(&out).map_err(CmdError::DataIn)?;
} else {
respond_standard_inquiry_data(data_in).map_err(CmdError::DataIn)?;
}
Ok(CmdOutput::ok())
}
LunSpecificCommand::ReportSupportedOperationCodes { rctd, mode } => {
// helpers for output data format
fn one_command_supported(
data_in: &mut impl Write,
ty: CommandType,
) -> io::Result<()> {
data_in.write_all(&[0])?; // unused flags
data_in.write_all(&[0b0000_0011])?; // supported, don't set a bunch of flags
let tpl = ty.cdb_template();
data_in.write_all(
&u16::try_from(tpl.len())
.expect("length of TPL to be same as CDB")
.to_be_bytes(),
)?;
data_in.write_all(tpl)?;
Ok(())
}
fn one_command_not_supported(data_in: &mut impl Write) -> io::Result<()> {
data_in.write_all(&[0])?; // unused flags
data_in.write_all(&[0b0000_0001])?; // not supported
data_in.write_all(&[0; 2])?; // cdb len
Ok(())
}
fn timeout_descriptor(data_in: &mut impl Write) -> io::Result<()> {
// timeout descriptor
data_in.write_all(&0xa_u16.to_be_bytes())?; // len
data_in.write_all(&[0, 0])?; // reserved, cmd specific
data_in.write_all(&0_u32.to_be_bytes())?;
data_in.write_all(&0_u32.to_be_bytes())?;
Ok(())
}
match mode {
ReportSupportedOpCodesMode::All => {
let cmd_len = if rctd { 20 } else { 8 };
let len = u32::try_from(OPCODES.len() * cmd_len)
.expect("less than (2^32 / 20) ~= 2^27 opcodes");
data_in
.write_all(&len.to_be_bytes())
.map_err(CmdError::DataIn)?;
for &(ty, (opcode, sa)) in OPCODES {
data_in.write_all(&[opcode]).map_err(CmdError::DataIn)?;
data_in.write_all(&[0]).map_err(CmdError::DataIn)?; // reserved
data_in
.write_all(&sa.unwrap_or(0).to_be_bytes())
.map_err(CmdError::DataIn)?;
data_in.write_all(&[0]).map_err(CmdError::DataIn)?; // reserved
let ctdp: u8 = if rctd { 0b10 } else { 0b00 };
let servactv = u8::from(sa.is_some());
data_in
.write_all(&[ctdp | servactv])
.map_err(CmdError::DataIn)?;
data_in
.write_all(
&u16::try_from(ty.cdb_template().len())
.expect("length of TPL to be same as CDB")
.to_be_bytes(),
)
.map_err(CmdError::DataIn)?;
if rctd {
timeout_descriptor(data_in).map_err(CmdError::DataIn)?;
}
}
}
ReportSupportedOpCodesMode::OneCommand(opcode) => match parse_opcode(opcode) {
ParseOpcodeResult::Command(ty) => {
one_command_supported(data_in, ty).map_err(CmdError::DataIn)?;
if rctd {
timeout_descriptor(data_in).map_err(CmdError::DataIn)?;
}
}
ParseOpcodeResult::ServiceAction(_) => {
return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB));
}
ParseOpcodeResult::Invalid => {
warn!("Reporting that we don't support command {:#2x}. It might be worth adding.", opcode);
one_command_not_supported(data_in).map_err(CmdError::DataIn)?;
}
},
ReportSupportedOpCodesMode::OneServiceAction(opcode, sa) => {
match parse_opcode(opcode) {
ParseOpcodeResult::Command(_) => {
return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB))
}
ParseOpcodeResult::ServiceAction(unparsed_sa) => {
if let Some(ty) = unparsed_sa.parse(sa) {
one_command_supported(data_in, ty).map_err(CmdError::DataIn)?;
if rctd {
timeout_descriptor(data_in).map_err(CmdError::DataIn)?;
}
} else {
warn!("Reporting that we don't support command {:#2x}/{:#2x}. It might be worth adding.", opcode, sa);
one_command_not_supported(data_in).map_err(CmdError::DataIn)?;
}
}
ParseOpcodeResult::Invalid => {
// the spec isn't super clear what we're supposed to do here, but I
// think an invalid opcode is one for which our implementation
// "does not implement service actions", so we say invalid field in
// CDB
warn!("Reporting that we don't support command {:#2x}/{:#2x}. It might be worth adding.", opcode, sa);
return Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB));
}
}
}
ReportSupportedOpCodesMode::OneCommandOrServiceAction(opcode, sa) => {
match parse_opcode(opcode) {
ParseOpcodeResult::Command(ty) => {
if sa == 0 {
one_command_supported(data_in, ty).map_err(CmdError::DataIn)?;
if rctd {
timeout_descriptor(data_in).map_err(CmdError::DataIn)?;
}
} else {
one_command_not_supported(data_in).map_err(CmdError::DataIn)?;
}
}
ParseOpcodeResult::ServiceAction(unparsed_sa) => {
if let Some(ty) = unparsed_sa.parse(sa) {
one_command_supported(data_in, ty).map_err(CmdError::DataIn)?;
if rctd {
timeout_descriptor(data_in).map_err(CmdError::DataIn)?;
}
} else {
warn!("Reporting that we don't support command {:#2x}/{:#2x}. It might be worth adding.", opcode, sa);
one_command_not_supported(data_in).map_err(CmdError::DataIn)?;
}
}
ParseOpcodeResult::Invalid => {
warn!("Reporting that we don't support command {:#2x}[/{:#2x}]. It might be worth adding.", opcode, sa);
one_command_not_supported(data_in).map_err(CmdError::DataIn)?;
}
}
}
}
Ok(CmdOutput::ok())
}
LunSpecificCommand::RequestSense(format) => {
match format {
SenseFormat::Fixed => {
data_in
.write_all(&sense::NO_ADDITIONAL_SENSE_INFORMATION.to_fixed_sense())
.map_err(CmdError::DataIn)?;
Ok(CmdOutput::ok())
}
SenseFormat::Descriptor => {
// Don't support desciptor format.
Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB))
}
}
}
LunSpecificCommand::SynchronizeCache10 => {
// While SCSI allows just syncing a range, we just sync the entire file
match self.backend.sync() {
Ok(()) => Ok(CmdOutput::ok()),
Err(e) => {
error!("Error syncing block device: {}", e);
Ok(CmdOutput::check_condition(sense::TARGET_FAILURE))
}
}
}
}
}
}

View File

@ -0,0 +1,681 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
//! Data structures and parsing code for SCSI commands. A rough overview:
//! We need to deal with opcodes in two places: in parsing commands themselves,
//! and in implementing REPORT SUPPORTED OPERATION CODES. Therefore, we parse
//! commands in two steps. First, we parse the opcode (and sometimes service
//! action) into a `CommandType` (a C-style enum containing just the commands,
//! not their parameters), then using that, we parse the rest of the CDB and
//! obtain a `Cdb`, which consists of a `Command`, an enum representing a
//! command and its parameters, along with some fields shared across many or all
//! commands.
use std::convert::{TryFrom, TryInto};
use log::warn;
use num_enum::TryFromPrimitive;
use crate::scsi::emulation::mode_page::ModePage;
/// One of the modes supported by SCSI's REPORT LUNS command.
#[derive(PartialEq, Eq, TryFromPrimitive, Debug, Copy, Clone)]
#[repr(u8)]
pub(crate) enum ReportLunsSelectReport {
NoWellKnown = 0x0,
WellKnownOnly = 0x1,
All = 0x2,
Administrative = 0x10,
TopLevel = 0x11,
SameConglomerate = 0x12,
}
/// A type of "vital product data" page returned by SCSI's INQUIRY command.
#[derive(PartialEq, Eq, Debug, Copy, Clone)]
pub(crate) enum VpdPage {
Ascii(u8),
Ata, // *
BlockDeviceCharacteristics, // *
BlockDeviceCharacteristicsExt,
BlockLimits, // *
BlockLimitsExt,
CfaProfile,
DeviceConstituents,
DeviceIdentification, // *
ExtendedInquiry,
FormatPresets,
LogicalBlockProvisioning, // *
ManagementNetworkAddresses,
ModePagePolicy,
PowerCondition,
PowerConsumption,
PortocolSpecificLogicalUnit,
ProtocolSpecificPort,
Referrals,
ScsiFeatureSets,
ScsiPorts,
SoftwareInterfaceIdentification,
SupportedVpdPages, // *
ThirdPartyCopy,
UnitSerialNumber, // *
ZonedBlockDeviceCharacteristics, // *
}
// starred ones are ones Linux will use if available
#[derive(PartialEq, Eq, TryFromPrimitive, Debug, Copy, Clone)]
#[repr(u8)]
pub(crate) enum ModeSensePageControl {
Current = 0b00,
Changeable = 0b01,
Default = 0b10,
Saved = 0b11,
}
impl TryFrom<u8> for VpdPage {
type Error = ();
fn try_from(val: u8) -> Result<Self, ()> {
match val {
0x00 => Ok(Self::SupportedVpdPages),
0x1..=0x7f => Ok(Self::Ascii(val)),
0x80 => Ok(Self::UnitSerialNumber),
0x83 => Ok(Self::DeviceIdentification),
0x84 => Ok(Self::SoftwareInterfaceIdentification),
0x85 => Ok(Self::ManagementNetworkAddresses),
0x86 => Ok(Self::ExtendedInquiry),
0x87 => Ok(Self::ModePagePolicy),
0x88 => Ok(Self::ScsiPorts),
0x89 => Ok(Self::Ata),
0x8a => Ok(Self::PowerCondition),
0x8b => Ok(Self::DeviceConstituents),
0x8c => Ok(Self::CfaProfile),
0x8d => Ok(Self::PowerConsumption),
0x8f => Ok(Self::ThirdPartyCopy),
0x90 => Ok(Self::PortocolSpecificLogicalUnit),
0x91 => Ok(Self::ProtocolSpecificPort),
0x92 => Ok(Self::ScsiFeatureSets),
0xb0 => Ok(Self::BlockLimits),
0xb1 => Ok(Self::BlockDeviceCharacteristics),
0xb2 => Ok(Self::LogicalBlockProvisioning),
0xb3 => Ok(Self::Referrals),
0xb5 => Ok(Self::BlockDeviceCharacteristicsExt),
0xb6 => Ok(Self::ZonedBlockDeviceCharacteristics),
0xb7 => Ok(Self::BlockLimitsExt),
0xb8 => Ok(Self::FormatPresets),
_ => Err(()),
}
}
}
impl From<VpdPage> for u8 {
fn from(pc: VpdPage) -> Self {
match pc {
VpdPage::Ascii(val) => val,
VpdPage::Ata => 0x89,
VpdPage::BlockDeviceCharacteristics => 0xb1,
VpdPage::BlockDeviceCharacteristicsExt => 0xb5,
VpdPage::BlockLimits => 0xb0,
VpdPage::BlockLimitsExt => 0xb7,
VpdPage::CfaProfile => 0x8c,
VpdPage::DeviceConstituents => 0x8b,
VpdPage::DeviceIdentification => 0x83,
VpdPage::ExtendedInquiry => 0x86,
VpdPage::FormatPresets => 0xb8,
VpdPage::LogicalBlockProvisioning => 0xb2,
VpdPage::ManagementNetworkAddresses => 0x85,
VpdPage::ModePagePolicy => 0x87,
VpdPage::PowerCondition => 0x8a,
VpdPage::PowerConsumption => 0x8d,
VpdPage::PortocolSpecificLogicalUnit => 0x90,
VpdPage::ProtocolSpecificPort => 0x91,
VpdPage::Referrals => 0xb3,
VpdPage::ScsiFeatureSets => 0x92,
VpdPage::ScsiPorts => 0x88,
VpdPage::SoftwareInterfaceIdentification => 0x84,
VpdPage::SupportedVpdPages => 0x00,
VpdPage::ThirdPartyCopy => 0x8f,
VpdPage::UnitSerialNumber => 0x80,
VpdPage::ZonedBlockDeviceCharacteristics => 0xb6,
}
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub(crate) enum SenseFormat {
Fixed,
Descriptor,
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub(crate) enum ModePageSelection {
AllPageZeros,
Single(ModePage),
}
#[derive(Debug)]
pub(crate) enum LunIndependentCommand {
ReportLuns(ReportLunsSelectReport),
}
#[derive(Debug)]
pub(crate) enum LunSpecificCommand {
Inquiry(Option<VpdPage>),
ModeSense6 {
pc: ModeSensePageControl,
mode_page: ModePageSelection,
/// Disable block descriptors
dbd: bool,
},
Read10 {
/// Disable page out (i.e. hint that this page won't be accessed again
/// soon, so we shouldn't bother caching it)
dpo: bool,
/// Force unit access (i.e. bypass cache)
fua: bool,
lba: u32,
transfer_length: u16,
},
Write10 {
/// Disable page out (i.e. hint that this page won't be accessed again
/// soon, so we shouldn't bother caching it)
dpo: bool,
/// Force unit access (i.e. bypass cache)
fua: bool,
lba: u32,
transfer_length: u16,
},
WriteSame16 {
lba: u64,
number_of_logical_blocks: u32,
anchor: bool,
},
ReadCapacity10,
ReadCapacity16,
ReportSupportedOperationCodes {
/// SCSI RCTD bit: whether we should include timeout descriptors.
rctd: bool,
mode: ReportSupportedOpCodesMode,
},
RequestSense(SenseFormat),
TestUnitReady,
SynchronizeCache10,
}
#[derive(Debug)]
pub(crate) enum Command {
LunIndependentCommand(LunIndependentCommand),
LunSpecificCommand(LunSpecificCommand),
}
#[derive(Clone, Copy, Debug)]
pub(crate) enum CommandType {
Inquiry,
ModeSense6,
Read10,
ReadCapacity10,
ReadCapacity16,
ReportLuns,
ReportSupportedOperationCodes,
RequestSense,
TestUnitReady,
Write10,
WriteSame16,
SynchronizeCache10,
}
pub(crate) const OPCODES: &[(CommandType, (u8, Option<u16>))] = &[
(CommandType::TestUnitReady, (0x0, None)),
(CommandType::RequestSense, (0x3, None)),
(CommandType::Inquiry, (0x12, None)),
(CommandType::ModeSense6, (0x1a, None)),
(CommandType::ReadCapacity10, (0x25, None)),
(CommandType::Read10, (0x28, None)),
(CommandType::Write10, (0x2a, None)),
(CommandType::SynchronizeCache10, (0x35, None)),
(CommandType::WriteSame16, (0x93, None)),
(CommandType::ReadCapacity16, (0x9e, Some(0x10))),
(CommandType::ReportLuns, (0xa0, None)),
(
CommandType::ReportSupportedOperationCodes,
(0xa3, Some(0xc)),
),
];
#[derive(Debug, Clone, Copy)]
pub(crate) struct UnparsedServiceAction(u8);
impl UnparsedServiceAction {
pub fn parse(self, service_action: u16) -> Option<CommandType> {
OPCODES
.iter()
.find(|(_, opcode)| *opcode == (self.0, Some(service_action)))
.map(|&(ty, _)| ty)
}
}
/// See `parse_opcode`
#[derive(Debug, Clone, Copy)]
pub(crate) enum ParseOpcodeResult {
/// The opcode represents a single command.
Command(CommandType),
/// The opcode requires a service action.
ServiceAction(UnparsedServiceAction),
/// The opcode is invalid.
Invalid,
}
/// Determine the command that corresponds to a SCSI opcode.
///
/// This is a little weird. Most SCSI commands are just identified by the
/// opcode - the first byte of the CDB - but some opcodes require a second
/// byte, called the service action. Generally, each distinct service action
/// value is treated as a first-class command. But there's some weirdness
/// around parsing, especially with invalid commands: sometimes, we're
/// expected to behave differently for a valid opcode with an invalid
/// service action vs an invalid opcode.
///
/// To allow for this, we have a two-step parsing API. First, a caller
/// calls `parse_opcode` with the first byte of the CDB. This could return
/// three things:
/// - `Command`: the opcode corresponded to a single-byte command; we're done.
/// - `Invalid`: the opcode isn't recognized at all; we're done.
/// - `ServiceAction`: the opcode is the first byte of a service action; the
/// caller needs to call .parse() on the `UnparsedServiceAction` we returned
/// with the service action byte.
pub(crate) fn parse_opcode(opcode: u8) -> ParseOpcodeResult {
let found = OPCODES.iter().find(|(_, (x, _))| *x == opcode);
match found {
Some(&(ty, (_, None))) => ParseOpcodeResult::Command(ty),
Some((_, (_, Some(_)))) => {
// we found some service action that uses this opcode; so this is a
// service action opcode, and we need the service action
ParseOpcodeResult::ServiceAction(UnparsedServiceAction(opcode))
}
None => ParseOpcodeResult::Invalid,
}
}
impl CommandType {
fn from_cdb(cdb: &[u8]) -> Result<Self, ParseError> {
// TODO: Variable-length CDBs put the service action in a different
// place. This'll need to change if we ever support those. IIRC, Linux
// doesn't ever use them, so it may never be relevant.
match parse_opcode(cdb[0]) {
ParseOpcodeResult::Command(ty) => Ok(ty),
ParseOpcodeResult::ServiceAction(sa) => sa
.parse(u16::from(cdb[1] & 0b0001_1111))
.ok_or(ParseError::InvalidField),
ParseOpcodeResult::Invalid => Err(ParseError::InvalidCommand),
}
}
/// Return the SCSI "CDB usage data" (see SPC-6 6.34.3) for this command
/// type.
///
/// Basically, this consists of a structure the size of the CDB for the
/// command, starting with the opcode and service action (if any), then
/// proceeding to a bitmap of fields we recognize.
pub const fn cdb_template(self) -> &'static [u8] {
match self {
Self::TestUnitReady => &[
0x0,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0100,
],
Self::RequestSense => &[
0x3,
0b0000_0001,
0b0000_0000,
0b0000_0000,
0b1111_1111,
0b0000_0100,
],
Self::ReportLuns => &[
0xa0,
0b0000_0000,
0b1111_1111,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b0000_0000,
0b0000_0100,
],
Self::ReadCapacity10 => &[
0x25,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0100,
],
Self::ReadCapacity16 => &[
0x9e,
0x10,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b0000_0000,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b0000_0000,
0b0000_0100,
],
Self::ModeSense6 => &[
0x1a,
0b0000_1000,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b0000_0100,
],
Self::Read10 => &[
0x28,
0b1111_1100,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b0011_1111,
0b1111_1111,
0b1111_1111,
0b0000_0100,
],
Self::Write10 => &[
0x2A,
0b1111_1100,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b0011_1111,
0b1111_1111,
0b1111_1111,
0b0000_0100,
],
Self::WriteSame16 => &[
0x93,
0b1111_1001,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b0011_1111,
0b0000_0100,
],
Self::Inquiry => &[
0x12,
0b0000_0001,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b0000_0100,
],
Self::ReportSupportedOperationCodes => &[
0xa3,
0xc,
0b1000_0111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b0000_0000,
0b0000_0100,
],
Self::SynchronizeCache10 => &[
0x53,
0b0000_0010,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b1111_1111,
0b0011_1111,
0b1111_1111,
0b1111_1111,
0b0000_0100,
],
}
}
}
#[derive(Debug)]
pub(crate) struct Cdb {
pub command: Command,
pub allocation_length: Option<u32>,
pub naca: bool,
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub(crate) enum ParseError {
/// The opcode (specifically the first byte of the CDB) is unknown, i.e. we
/// should respond with INVALID COMMAND OPERATION CODE
InvalidCommand,
/// Another field of the CDB (including the service action, if any) is
/// invalid, i.e. we should respond with INVALID FIELD IN CDB.
InvalidField,
/// The CDB has fewer bytes than necessary for its opcode.
TooSmall,
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub(crate) enum ReportSupportedOpCodesMode {
All,
OneCommand(u8),
OneServiceAction(u8, u16),
OneCommandOrServiceAction(u8, u16),
}
impl Cdb {
// TODO: do we want to ensure reserved fields are 0? SCSI allows, but
// doesn't require, us to do so.
pub(crate) fn parse(cdb: &[u8]) -> Result<Self, ParseError> {
let ct = CommandType::from_cdb(cdb)?;
if cdb.len() < ct.cdb_template().len() {
return Err(ParseError::TooSmall);
}
// Shrink the cdb down to its size, so accidentally accessing fields past the
// length panics
let cdb = &cdb[..ct.cdb_template().len()];
// unwraps below are safe: they're just calling TryFrom to convert from slices
// to fixed-size arrays; in each case, we're using constant indexes and we
// verified above that they're in bounds, so none of them can panic at runtime
match ct {
CommandType::Inquiry => {
// INQUIRY
let evpd = match cdb[1] {
0 => false,
1 => true,
// obselete or reserved bits set
_ => return Err(ParseError::InvalidField),
};
let page_code_raw = cdb[2];
let page_code = match (evpd, page_code_raw) {
(false, 0) => None,
(true, pc) => Some(pc.try_into().map_err(|_| ParseError::InvalidField)?),
(false, _) => return Err(ParseError::InvalidField),
};
Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::Inquiry(page_code)),
allocation_length: Some(u32::from(u16::from_be_bytes(
cdb[3..5].try_into().unwrap(),
))),
naca: (cdb[5] & 0b0000_0100) != 0,
})
}
CommandType::ModeSense6 => {
let dbd = match cdb[1] {
0b0000_1000 => true,
0b0000_0000 => false,
_ => return Err(ParseError::InvalidField),
};
let pc = (cdb[2] & 0b1100_0000) >> 6;
let page_code = cdb[2] & 0b0011_1111;
let subpage_code = cdb[3];
let mode: ModePageSelection = match (page_code, subpage_code) {
(0x8, 0x0) => ModePageSelection::Single(ModePage::Caching),
(0x3f, 0x0) => ModePageSelection::AllPageZeros,
_ => {
warn!(
"Rejecting request for unknown mode page {:#2x}/{:#2x}.",
page_code, subpage_code
);
return Err(ParseError::InvalidField);
}
};
Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::ModeSense6 {
pc: pc.try_into().map_err(|_| ParseError::InvalidField)?,
mode_page: mode,
dbd,
}),
allocation_length: Some(u32::from(cdb[4])),
naca: (cdb[5] & 0b0000_0100) != 0,
})
}
CommandType::Read10 => {
if cdb[1] & 0b1110_0100 != 0 {
// Features (protection and rebuild assist) we don't
// support; the standard says to respond with INVALID
// FIELD IN CDB for these if unsupported
return Err(ParseError::InvalidField);
}
Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::Read10 {
dpo: cdb[1] & 0b0001_0000 != 0,
fua: cdb[1] & 0b0000_1000 != 0,
lba: u32::from_be_bytes(cdb[2..6].try_into().unwrap()),
transfer_length: u16::from_be_bytes(cdb[7..9].try_into().unwrap()),
}),
allocation_length: None,
naca: (cdb[9] & 0b0000_0100) != 0,
})
}
CommandType::Write10 => {
if cdb[1] & 0b1110_0000 != 0 {
// Feature (protection) that we don't
// support; the standard says to respond with INVALID
// FIELD IN CDB for these if unsupported
return Err(ParseError::InvalidField);
}
Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::Write10 {
dpo: cdb[1] & 0b0001_0000 != 0,
fua: cdb[1] & 0b0000_1000 != 0,
lba: u32::from_be_bytes(cdb[2..6].try_into().unwrap()),
transfer_length: u16::from_be_bytes(cdb[7..9].try_into().unwrap()),
}),
allocation_length: None,
naca: (cdb[9] & 0b0000_0100) != 0,
})
}
CommandType::WriteSame16 => {
if cdb[1] & 0b1110_0001 != 0 {
warn!("Unsupported field in WriteSame16");
// We neither support protections nor logical block provisioning
return Err(ParseError::InvalidField);
}
Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::WriteSame16 {
lba: u64::from_be_bytes(cdb[2..10].try_into().expect("lba should fit u64")),
number_of_logical_blocks: u32::from_be_bytes(
cdb[10..14].try_into().expect("block count should fit u32"),
),
anchor: (cdb[1] & 0b0001_0000) != 0,
}),
allocation_length: None,
naca: (cdb[15] & 0b0000_0100) != 0,
})
}
CommandType::SynchronizeCache10 => Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::SynchronizeCache10),
allocation_length: None,
naca: (cdb[9] & 0b0000_0100) != 0,
}),
CommandType::ReadCapacity10 => Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::ReadCapacity10),
allocation_length: None,
naca: (cdb[9] & 0b0000_0100) != 0,
}),
CommandType::ReadCapacity16 => Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::ReadCapacity16),
allocation_length: Some(u32::from_be_bytes(cdb[10..14].try_into().unwrap())),
naca: (cdb[15] & 0b0000_0100) != 0,
}),
CommandType::ReportLuns => Ok(Self {
command: Command::LunIndependentCommand(LunIndependentCommand::ReportLuns(
cdb[2].try_into().map_err(|_| ParseError::InvalidField)?,
)),
allocation_length: Some(u32::from_be_bytes(cdb[6..10].try_into().unwrap())),
naca: (cdb[9] & 0b0000_0100) != 0,
}),
CommandType::ReportSupportedOperationCodes => {
let rctd = cdb[2] & 0b1000_0000 != 0;
let mode = match cdb[2] & 0b0000_0111 {
0b000 => ReportSupportedOpCodesMode::All,
0b001 => ReportSupportedOpCodesMode::OneCommand(cdb[3]),
0b010 => ReportSupportedOpCodesMode::OneServiceAction(
cdb[3],
u16::from_be_bytes(cdb[4..6].try_into().unwrap()),
),
0b011 => ReportSupportedOpCodesMode::OneCommandOrServiceAction(
cdb[3],
u16::from_be_bytes(cdb[4..6].try_into().unwrap()),
),
_ => return Err(ParseError::InvalidField),
};
Ok(Self {
command: Command::LunSpecificCommand(
LunSpecificCommand::ReportSupportedOperationCodes { rctd, mode },
),
allocation_length: Some(u32::from_be_bytes(cdb[6..10].try_into().unwrap())),
naca: (cdb[11] & 0b0000_0100) != 0,
})
}
CommandType::RequestSense => {
let format = if cdb[1] & 0b0000_0001 == 1 {
SenseFormat::Descriptor
} else {
SenseFormat::Fixed
};
Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::RequestSense(format)),
allocation_length: Some(u32::from(cdb[4])),
naca: (cdb[5] & 0b0000_0100) != 0,
})
}
CommandType::TestUnitReady => Ok(Self {
command: Command::LunSpecificCommand(LunSpecificCommand::TestUnitReady),
allocation_length: None,
naca: (cdb[5] & 0b0000_0100) != 0,
}),
}
}
}

View File

@ -0,0 +1,62 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use std::io::{Read, Write};
use super::{
command::{LunSpecificCommand, SenseFormat},
response_data::{respond_standard_inquiry_data, SilentlyTruncate},
target::{LogicalUnit, LunRequest},
};
use crate::scsi::{sense, CmdError, CmdError::DataIn, CmdOutput};
pub(crate) struct MissingLun;
impl LogicalUnit for MissingLun {
fn execute_command(
&mut self,
data_in: &mut SilentlyTruncate<&mut dyn Write>,
_data_out: &mut dyn Read,
_req: LunRequest,
cmd: LunSpecificCommand,
) -> Result<CmdOutput, CmdError> {
match cmd {
LunSpecificCommand::Inquiry(page_code) => {
// peripheral qualifier 0b011: logical unit not accessible
// device type 0x1f: unknown/no device type
data_in.write_all(&[0b0110_0000 | 0x1f]).map_err(DataIn)?;
match page_code {
Some(_) => {
// SPC-6 7.7.2: "If the PERIPHERAL QUALIFIER field is
// not set to 000b, the contents of the PAGE LENGTH
// field and the VPD parameters are outside the
// scope of this standard."
//
// Returning a 0 length and no data seems sensible enough.
data_in.write_all(&[0]).map_err(DataIn)?;
}
None => {
respond_standard_inquiry_data(data_in).map_err(DataIn)?;
}
}
Ok(CmdOutput::ok())
}
LunSpecificCommand::RequestSense(format) => {
match format {
SenseFormat::Fixed => {
data_in
.write_all(&sense::LOGICAL_UNIT_NOT_SUPPORTED.to_fixed_sense())
.map_err(DataIn)?;
Ok(CmdOutput::ok())
}
SenseFormat::Descriptor => {
// Don't support desciptor format.
Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB))
}
}
}
_ => Ok(CmdOutput::check_condition(
sense::LOGICAL_UNIT_NOT_SUPPORTED,
)),
}
}
}

View File

@ -0,0 +1,11 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
pub(crate) mod block_device;
mod command;
pub(crate) mod missing_lun;
pub(crate) mod mode_page;
mod response_data;
pub(crate) mod target;
#[cfg(test)]
mod tests;

View File

@ -0,0 +1,48 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use std::io::{self, Write};
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub(crate) enum ModePage {
Caching,
}
impl ModePage {
pub(crate) const ALL_ZERO: &'static [Self] = &[Self::Caching];
pub(crate) const fn page_code(self) -> (u8, u8) {
match self {
Self::Caching => (0x8, 0),
}
}
pub(crate) const fn page_length(self) -> u8 {
match self {
Self::Caching => 0x12,
}
}
pub(crate) fn write(self, data_in: &mut impl Write) -> io::Result<()> {
assert_eq!(self.page_code().1, 0, "Subpages aren't supported yet.");
data_in.write_all(&[
self.page_code().0, // top 2 bits: no subpage, saving not supported
self.page_length(), // page length
])?;
match self {
Self::Caching => {
data_in.write_all(&[
// Writeback Cache Enable, lots of bits zero
// n.b. kernel logs will show WCE off; it always says
// that for read-only devices, which we are rn
0b0000_0100,
])?;
// various cache fine-tuning stuff we can't really control
data_in.write_all(&[0; 0x11])?;
}
}
Ok(())
}
}

View File

@ -0,0 +1,107 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
//! Some helpers for writing response data, shared between `BlockDevice` and
//! `MissingLun`
use std::{cmp::min, convert::TryFrom, io, io::Write};
/// A wrapper around a `Write` that silently truncates its input after a given
/// number of bytes. This matches the semantics of SCSI's ALLOCATION LENGTH
/// field; anything beyond the allocation length is silently omitted.
pub struct SilentlyTruncate<W: Write>(W, usize);
impl<W: Write> SilentlyTruncate<W> {
pub const fn new(writer: W, len: usize) -> Self {
Self(writer, len)
}
}
impl<W: Write> Write for SilentlyTruncate<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if self.1 == 0 {
// our goal is to silently fail, so once we've stopped actually
// writing, just pretend all writes work
return Ok(buf.len());
}
let len = min(buf.len(), self.1);
let buf = &buf[..len];
let written = self.0.write(buf)?;
self.1 -= written;
Ok(written)
}
fn flush(&mut self) -> std::io::Result<()> {
self.0.flush()
}
}
fn encode_lun(lun: u16) -> [u8; 8] {
let lun = u8::try_from(lun).expect("more than 255 LUNs are currently unsupported");
[0, lun, 0, 0, 0, 0, 0, 0]
}
/// Write the response data for a REPORT LUNS command.
pub fn respond_report_luns<T>(data_in: &mut impl Write, luns: T) -> io::Result<()>
where
T: IntoIterator<Item = u16>,
T::IntoIter: ExactSizeIterator,
{
let iter = luns.into_iter();
data_in.write_all(
&(u32::try_from(iter.len() * 8))
.expect("less than 256 LUNS")
.to_be_bytes(),
)?;
data_in.write_all(&[0; 4])?; // reserved
for lun in iter {
data_in.write_all(&encode_lun(lun))?;
}
Ok(())
}
/// Write the response data for a standard (i.e. not VPD) inquiry, excluding the
/// first byte (the peripheal qualifier and device type).
pub fn respond_standard_inquiry_data(data_in: &mut impl Write) -> io::Result<()> {
// TODO: Feature bits here we might want to support:
// - NormACA
// - command queueing
data_in.write_all(&[
// various bits: not removable, not part of a
// conglomerate, no info on hotpluggability
0,
0x7, // version: SPC-6
// bits: don't support NormACA, support modern LUN format
// INQUIRY data version 2
0b0001_0000 | 0x2,
91, // additional INQURIY data length
// bunch of feature bits we don't support:
0,
0,
0,
])?;
// TODO: register this or another name with T10
data_in.write_all(b"rust-vmm")?;
data_in.write_all(b"vhost-user-scsi ")?;
data_in.write_all(b"v0 ")?;
// The Linux kernel doesn't request any more than this, so any data we return
// after this point is mostly academic.
data_in.write_all(&[0; 22])?;
let product_descs: &[u16; 8] = &[
0x00c0, // SAM-6 (no version claimed)
0x05c0, // SPC-5 (no version claimed)
0x0600, // SBC-4 (no version claimed)
0x0, 0x0, 0x0, 0x0, 0x0,
];
for desc in product_descs {
data_in.write_all(&desc.to_be_bytes())?;
}
data_in.write_all(&[0; 22])?;
Ok(())
}

View File

@ -0,0 +1,143 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use std::convert::TryFrom;
use std::io::{Read, Write};
use log::error;
use super::{
command::{
Cdb, Command, LunIndependentCommand, LunSpecificCommand, ParseError, ReportLunsSelectReport,
},
missing_lun::MissingLun,
response_data::{respond_report_luns, SilentlyTruncate},
};
use crate::scsi::{sense, CmdError, CmdOutput, Request, Target, TaskAttr};
pub(crate) struct LunRequest {
pub _id: u64,
pub task_attr: TaskAttr,
pub crn: u8,
pub prio: u8,
pub _allocation_length: Option<u32>,
pub naca: bool,
}
/// A single logical unit of an emulated SCSI device.
pub(crate) trait LogicalUnit: Send + Sync {
/// Process a SCSI command sent to this logical unit.
///
/// # Return value
/// This function returns a Result, but it should return Err only in limited
/// circumstances: when something goes wrong at the transport level, such
/// as writes to `req.data_in` failing or `req.cdb` being too short.
/// Any other errors, such as invalid SCSI commands or I/O errors
/// accessing an underlying file, should result in an Ok return value
/// with a `CmdOutput` representing a SCSI-level error (i.e. CHECK
/// CONDITION status, and appropriate sense data).
fn execute_command(
&mut self,
data_in: &mut SilentlyTruncate<&mut dyn Write>,
data_out: &mut dyn Read,
parameters: LunRequest,
command: LunSpecificCommand,
) -> Result<CmdOutput, CmdError>;
}
/// A SCSI target implemented by emulating a device within vhost-device-scsi.
pub(crate) struct EmulatedTarget {
luns: Vec<Box<dyn LogicalUnit>>,
}
impl EmulatedTarget {
pub(crate) fn new() -> Self {
Self { luns: Vec::new() }
}
pub(crate) fn add_lun(&mut self, logical_unit: Box<dyn LogicalUnit>) {
self.luns.push(logical_unit);
}
pub(crate) fn luns(&self) -> impl Iterator<Item = u16> + ExactSizeIterator + '_ {
// unwrap is safe: we limit LUNs at 256
self.luns
.iter()
.enumerate()
.map(|(idx, _logical_unit)| u16::try_from(idx).unwrap())
}
}
impl Default for EmulatedTarget {
fn default() -> Self {
Self::new()
}
}
impl Target for EmulatedTarget {
fn execute_command(
&mut self,
lun: u16,
data_out: &mut dyn Read,
data_in: &mut dyn Write,
req: Request,
) -> Result<CmdOutput, CmdError> {
match Cdb::parse(req.cdb) {
Ok(cdb) => {
let mut data_in = SilentlyTruncate::new(
data_in,
cdb.allocation_length.map_or(usize::MAX, |x| x as usize),
);
match cdb.command {
Command::LunIndependentCommand(cmd) => match cmd {
LunIndependentCommand::ReportLuns(select_report) => {
match select_report {
ReportLunsSelectReport::NoWellKnown
| ReportLunsSelectReport::All => {
respond_report_luns(&mut data_in, self.luns())
.map_err(CmdError::DataIn)?;
}
ReportLunsSelectReport::WellKnownOnly
| ReportLunsSelectReport::Administrative
| ReportLunsSelectReport::TopLevel
| ReportLunsSelectReport::SameConglomerate => {
respond_report_luns(&mut data_in, vec![].into_iter())
.map_err(CmdError::DataIn)?;
}
}
Ok(CmdOutput::ok())
}
},
Command::LunSpecificCommand(cmd) => {
let req = LunRequest {
_id: req.id,
task_attr: req.task_attr,
crn: req.crn,
prio: req.prio,
_allocation_length: cdb.allocation_length,
naca: cdb.naca,
};
match self.luns.get_mut(lun as usize) {
Some(lun) => lun.execute_command(&mut data_in, data_out, req, cmd),
None => MissingLun.execute_command(&mut data_in, data_out, req, cmd),
}
}
}
}
Err(ParseError::InvalidCommand) => {
error!("Rejecting CDB for unknown command: {:?}", req.cdb);
Ok(CmdOutput::check_condition(
sense::INVALID_COMMAND_OPERATION_CODE,
))
}
// TODO: SCSI has a provision for INVALID FIELD IN CDB to include the
// index of the invalid field, but it's not clear if that's mandatory.
// In any case, QEMU omits it.
Err(ParseError::InvalidField) => {
error!("Rejecting CDB with invalid field: {:?}", req.cdb);
Ok(CmdOutput::check_condition(sense::INVALID_FIELD_IN_CDB))
}
Err(ParseError::TooSmall) => Err(CmdError::CdbTooShort),
}
}
}

View File

@ -0,0 +1,198 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use super::{do_command_fail_lun, do_command_in_lun, null_image};
use crate::scsi::{
emulation::{block_device::BlockDevice, target::EmulatedTarget},
sense,
};
#[test]
fn test_report_luns() {
let mut target = EmulatedTarget::new();
for _ in 0..5 {
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
}
let select_reports = &[0x0, 0x2]; // all but well known, all
for &sr in select_reports {
do_command_in_lun(
&mut target,
6,
&[
0xa0, // REPORT LUNS
0, // reserved
sr, // select report
0, 0, 0, // reserved
0, 0, 1, 0, // alloc length: 256
0, 0,
],
&[],
&[
0, 0, 0, 40, // length: 5*8 = 40
0, 0, 0, 0, // reserved
0, 0, 0, 0, 0, 0, 0, 0, // LUN 0
0, 1, 0, 0, 0, 0, 0, 0, // LUN 1
0, 2, 0, 0, 0, 0, 0, 0, // LUN 2
0, 3, 0, 0, 0, 0, 0, 0, // LUN 3
0, 4, 0, 0, 0, 0, 0, 0, // LUN 4
],
);
}
}
#[test]
fn test_report_luns_empty() {
let mut target = EmulatedTarget::new();
for _ in 0..5 {
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
}
// well-known only and several modes explictly defined to return an empty list
// for all but ceratin types of recieving LUNs
let select_reports = &[0x1, 0x10, 0x11, 0x12];
for &sr in select_reports {
do_command_in_lun(
&mut target,
6,
&[
0xa0, // REPORT LUNS
0, // reserved
sr, // select report
0, 0, 0, // reserved
0, 0, 1, 0, // alloc length: 256
0, 0,
],
&[],
&[
0, 0, 0, 0, // length: 0
0, 0, 0, 0, // reserved
],
);
}
}
#[test]
fn test_request_sense() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in_lun(
&mut target,
1,
&[
0x3, // REQUEST SENSE
0, // fixed format sense data
0, 0, // reserved
255, // alloc length
0, // control
],
&[],
&sense::LOGICAL_UNIT_NOT_SUPPORTED.to_fixed_sense(),
);
}
#[test]
fn test_request_sense_descriptor_format() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_fail_lun(
&mut target,
1,
&[
0x3, // REQUEST SENSE
1, // descriptor format sense data
0, 0, // reserved
255, // alloc length
0, // control
],
sense::INVALID_FIELD_IN_CDB,
);
}
#[test]
fn test_inquiry() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in_lun(
&mut target,
1,
&[
0x12, // INQUIRY
0, // EVPD bit: 0
0, // page code
1, 0, // alloc length: 256
0, // control
],
&[],
// some empty comments to get rustfmt to do something vaguely sensible
&[
0x7f, // device not accessible, unknown type
0, // features
0x7, // version
0x12, // response data format v2, HiSup = 1
91, // addl length
0, 0, 0, // unsupported features
// vendor
b'r', b'u', b's', b't', b'-', b'v', b'm', b'm', //
// product
b'v', b'h', b'o', b's', b't', b'-', b'u', b's', b'e', b'r', b'-', b's', b'c', b's',
b'i', b' ', //
// revision
b'v', b'0', b' ', b' ', //
// reserved/obselete/vendor specific
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// version descriptors
0x0, 0xc0, // SAM-6
0x05, 0xc0, // SPC-5 (no code assigned for 6 yet)
0x06, 0x0, // SBC-4
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
// reserved
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
],
);
}
#[test]
fn test_other_command() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_fail_lun(
&mut target,
1,
&[
0, // TEST UNIT READY
0, 0, 0, 0, // reserved
0, // control
],
sense::LOGICAL_UNIT_NOT_SUPPORTED,
);
}
#[test]
fn test_invalid_command() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_fail_lun(
&mut target,
1,
&[
0xff, // vendor specific
0, 0, 0, 0, // reserved
0, // control
],
sense::INVALID_COMMAND_OPERATION_CODE,
);
}

View File

@ -0,0 +1,108 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
//! Tests for stuff shared between commands.
use assert_matches::assert_matches;
use std::io::ErrorKind;
use super::{do_command_fail, test_image};
use crate::scsi::{
emulation::{block_device::BlockDevice, target::EmulatedTarget},
sense, CmdError, Request, Target, TaskAttr,
};
#[test]
fn test_invalid_opcode() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(test_image());
target.add_lun(Box::new(dev));
do_command_fail(
&mut target,
&[
0xff, // vendor specific, unused by us
0, 0, 0, 0, 0,
],
sense::INVALID_COMMAND_OPERATION_CODE,
);
}
#[test]
fn test_invalid_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(test_image());
target.add_lun(Box::new(dev));
do_command_fail(
&mut target,
&[
0xa3, // MAINTAINANCE IN
0x1f, // vendor specific, unused by us
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
],
sense::INVALID_FIELD_IN_CDB,
);
}
#[test]
fn test_short_data_out_buffer() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(test_image());
target.add_lun(Box::new(dev));
let mut data_in: &mut [u8] = &mut [];
let mut data_out: &[u8] = &[0_u8; 511];
let res = target.execute_command(
0,
&mut data_out,
&mut data_in,
Request {
id: 0,
cdb: &[
0x28, // READ (10)
0, // flags
0, 0, 0, 15, // LBA: 5
0, // reserved, group #
0, 1, // transfer length: 1
0, // control
],
task_attr: TaskAttr::Simple,
crn: 0,
prio: 0,
},
);
if let CmdError::DataIn(e) = res.unwrap_err() {
assert_eq!(e.kind(), ErrorKind::WriteZero);
} else {
panic!();
}
}
#[test]
fn test_short_cdb() {
let mut target: EmulatedTarget = EmulatedTarget::new();
let dev = BlockDevice::new(test_image());
target.add_lun(Box::new(dev));
let mut data_in: &mut [u8] = &mut [];
let mut data_out: &[u8] = &[];
let res = target.execute_command(
0,
&mut data_out,
&mut data_in,
Request {
id: 0,
cdb: &[
0x28, // READ (10)
],
task_attr: TaskAttr::Simple,
crn: 0,
prio: 0,
},
);
assert_matches!(res.unwrap_err(), CmdError::CdbTooShort);
}

View File

@ -0,0 +1,520 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
#![cfg(test)]
mod bad_lun;
mod generic;
mod report_supported_operation_codes;
use std::{
fs::File,
io::Write,
sync::{Arc, Mutex},
};
use tempfile::tempfile;
use super::{
block_device::{
BlockDevice, BlockDeviceBackend, BlockOffset, BlockSize, ByteOffset, FileBackend,
},
target::EmulatedTarget,
};
use crate::scsi::{
sense::{self, SenseTriple},
CmdOutput, Request, Target, TaskAttr,
};
#[derive(Clone)]
struct TestBackend {
data: Arc<Mutex<[u8; 512 * 16]>>,
}
impl TestBackend {
fn new() -> Self {
TestBackend {
data: Arc::new(Mutex::new([0; 512 * 16])),
}
}
}
impl BlockDeviceBackend for TestBackend {
fn read_exact_at(&mut self, buf: &mut [u8], offset: ByteOffset) -> std::io::Result<()> {
let data = self.data.lock().unwrap();
let offset = usize::try_from(u64::from(offset)).expect("offset should fit usize");
buf.copy_from_slice(&data[offset..(offset + buf.len())]);
Ok(())
}
fn write_exact_at(&mut self, buf: &[u8], offset: ByteOffset) -> std::io::Result<()> {
let mut data = self.data.lock().unwrap();
let offset = usize::try_from(u64::from(offset)).expect("offset should fit usize");
data[offset..(offset + buf.len())].copy_from_slice(buf);
Ok(())
}
fn size_in_blocks(&mut self) -> std::io::Result<BlockOffset> {
Ok(ByteOffset::from(
u64::try_from(self.data.lock().unwrap().len()).expect("size_in_blocks should fit u64"),
) / self.block_size())
}
fn block_size(&self) -> BlockSize {
BlockSize::try_from(512).expect("512 should be a valid BlockSize")
}
fn sync(&mut self) -> std::io::Result<()> {
Ok(())
}
}
fn null_image() -> FileBackend {
FileBackend::new(File::open("/dev/null").unwrap())
}
fn test_image() -> FileBackend {
let mut f = tempfile().unwrap();
// generate 16 512-byte sectors, each of which consist of a single
// repeated hex character, i.e.
// sector 00: 0000000....0000
// sector 15: fffffff....ffff
for chr in b'0'..=b'9' {
f.write_all(&[chr; 512]).unwrap();
}
for chr in b'a'..=b'f' {
f.write_all(&[chr; 512]).unwrap();
}
FileBackend::new(f)
}
fn do_command_in_lun(
target: &mut EmulatedTarget,
lun: u16,
cdb: &[u8],
data_out: &[u8],
expected_data_in: &[u8],
) {
let mut data_in = Vec::new();
let res = target.execute_command(
lun,
&mut &data_out[..],
&mut data_in,
Request {
id: 0,
cdb,
task_attr: TaskAttr::Simple,
crn: 0,
prio: 0,
},
);
assert_eq!(res.unwrap(), CmdOutput::ok());
assert_eq!(&data_in, expected_data_in);
}
fn do_command_fail_lun(
target: &mut EmulatedTarget,
lun: u16,
cdb: &[u8],
expected_error: SenseTriple,
) {
let mut data_in = Vec::new();
let mut data_out: &[u8] = &[];
let res = target.execute_command(
lun,
&mut data_out,
&mut data_in,
Request {
id: 0,
cdb,
task_attr: TaskAttr::Simple,
crn: 0,
prio: 0,
},
);
assert_eq!(res.unwrap(), CmdOutput::check_condition(expected_error));
assert_eq!(&data_in, &[]);
}
fn do_command_in(
target: &mut EmulatedTarget,
cdb: &[u8],
data_out: &[u8],
expected_data_in: &[u8],
) {
do_command_in_lun(target, 0, cdb, data_out, expected_data_in);
}
fn do_command_fail(target: &mut EmulatedTarget, cdb: &[u8], expected_error: SenseTriple) {
do_command_fail_lun(target, 0, cdb, expected_error);
}
fn block_size_512() -> BlockSize {
BlockSize::try_from(512).expect("512 should be a valid block_size")
}
#[test]
fn test_test_unit_ready() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(&mut target, &[0, 0, 0, 0, 0, 0], &[], &[]);
}
#[test]
fn test_report_luns() {
let mut target = EmulatedTarget::new();
for _ in 0..5 {
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
}
do_command_in(
&mut target,
&[
0xa0, // REPORT LUNS
0, // reserved
0, // select report
0, 0, 0, // reserved
0, 0, 1, 0, // alloc length: 256
0, 0,
],
&[],
&[
0, 0, 0, 40, // length: 5*8 = 40
0, 0, 0, 0, // reserved
0, 0, 0, 0, 0, 0, 0, 0, // LUN 0
0, 1, 0, 0, 0, 0, 0, 0, // LUN 1
0, 2, 0, 0, 0, 0, 0, 0, // LUN 2
0, 3, 0, 0, 0, 0, 0, 0, // LUN 3
0, 4, 0, 0, 0, 0, 0, 0, // LUN 4
],
);
}
#[test]
fn test_read_10() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(test_image());
target.add_lun(Box::new(dev));
// TODO: this test relies on the default logical block size of 512. We should
// make that explicit.
do_command_in(
&mut target,
&[
0x28, // READ (10)
0, // flags
0, 0, 0, 5, // LBA: 5
0, // reserved, group #
0, 1, // transfer length: 1
0, // control
],
&[],
&[b'5'; 512],
);
}
#[test]
fn test_read_10_last_block() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(test_image());
target.add_lun(Box::new(dev));
// TODO: this test relies on the default logical block size of 512. We should
// make that explicit.
do_command_in(
&mut target,
&[
0x28, // READ (10)
0, // flags
0, 0, 0, 15, // LBA: 5
0, // reserved, group #
0, 1, // transfer length: 1
0, // control
],
&[],
&[b'f'; 512],
);
}
#[test]
fn test_read_10_out_of_range() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(test_image());
target.add_lun(Box::new(dev));
// TODO: this test relies on the default logical block size of 512. We should
// make that explicit.
do_command_fail(
&mut target,
&[
0x28, // READ (10)
0, // flags
0, 0, 0, 16, // LBA: 16
0, // reserved, group #
0, 1, // transfer length: 1
0, // control
],
sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE,
);
}
#[test]
fn test_read_10_cross_out() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
// TODO: this test relies on the default logical block size of 512. We should
// make that explicit.
do_command_fail(
&mut target,
&[
0x28, // READ (10)
0, // flags
0, 0, 0, 15, // LBA: 15
0, // reserved, group #
0, 2, // transfer length: 2
0, // control
],
sense::LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE,
);
}
#[test]
fn test_write_10() {
let mut target = EmulatedTarget::new();
let mut backend = TestBackend::new();
let dev = BlockDevice::new(backend.clone());
target.add_lun(Box::new(dev));
// TODO: this test relies on the default logical block size of 512. We should
// make that explicit.
{
let data_out = [b'w'; 512];
do_command_in(
&mut target,
&[
0x2a, // WRITE (10)
0, // flags
0, 0, 0, 5, // LBA: 5
0, // reserved, group #
0, 1, // transfer length: 1
0, // control
],
&data_out,
&[],
);
let mut buf = [0_u8; 512];
backend
.read_exact_at(&mut buf, BlockOffset::from(5) * block_size_512())
.expect("Reading should work");
assert_eq!(data_out, buf);
}
}
#[test]
fn test_write_same_16() {
let mut target = EmulatedTarget::new();
let mut backend = TestBackend::new();
let dev = BlockDevice::new(backend.clone());
target.add_lun(Box::new(dev));
// TODO: this test relies on the default logical block size of 512. We should
// make that explicit.
backend
.write_exact_at(&[0xff; 512 * 6], BlockOffset::from(5) * block_size_512())
.expect("Write should succeed");
let data_out = [0_u8; 512];
do_command_in(
&mut target,
&[
0x93, // WRITE SAME (16)
0, // flags
0, 0, 0, 0, 0, 0, 0, 5, // LBA: 5
0, 0, 0, 5, // tnumber of blocks: 5
0, // reserved, group #
0, // control
],
&data_out,
&[],
);
let mut buf = [0_u8; 512 * 5];
backend
.read_exact_at(&mut buf, BlockOffset::from(5) * block_size_512())
.expect("Reading should work");
assert_eq!([0_u8; 512 * 5], buf, "5 sectors should have been zero'd");
let mut buf = [0_u8; 512];
backend
.read_exact_at(&mut buf, BlockOffset::from(10) * block_size_512())
.expect("Reading should work");
assert_eq!(
[0xff_u8; 512], buf,
"sector after write should be left untouched"
);
}
#[test]
fn test_read_capacity_10() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(test_image());
target.add_lun(Box::new(dev));
// TODO: this test relies on the default logical block size of 512. We should
// make that explicit.
// TODO: we should test behavior with ≥ 2 TiB images. But not sure how we
// can do that reliably without risking using 2 TiB of disk
do_command_in(
&mut target,
&[
0x25, // READ CAPACITY (10)
0, 0, 0, 0, 0, 0, 0, 0, // flags
0, // control
],
&[],
&[
0, 0, 0, 15, // returned LBA (last valid LBA),
0, 0, 2, 0, // block size (512)
],
);
}
#[test]
fn test_read_capacity_16() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(test_image());
target.add_lun(Box::new(dev));
// TODO: this test relies on the default logical block size of 512. We should
// make that explicit.
do_command_in(
&mut target,
&[
0x9e, 0x10, // READ CAPACITY (16)
0, 0, 0, 0, 0, 0, 0, 0, // obsolete
0, 0, 0, 32, // allocation length: 32
0, // obselete/reserved
0, // control
],
&[],
&[
0, 0, 0, 0, 0, 0, 0, 15, // returned LBA (last valid LBA),
0, 0, 2, 0, // block size (512)
0, // reserved, zoned stuff, protection stuff
0, // one PB per LB
0xc0, // thin provisioning, unmapped blocks read 0
0, // LBA 0 is aligned (top bits above)
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // reserved
],
);
}
#[test]
fn test_inquiry() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0x12, // INQUIRY
0, // EVPD bit: 0
0, // page code
1, 0, // alloc length: 256
0, // control
],
&[],
// some empty comments to get rustfmt to do something vaguely sensible
&[
0, // accessible; direct acccess block device
0, // features
0x7, // version
0x12, // response data format v2, HiSup = 1
91, // addl length
0, 0, 0, // unsupported features
// vendor
b'r', b'u', b's', b't', b'-', b'v', b'm', b'm', //
// product
b'v', b'h', b'o', b's', b't', b'-', b'u', b's', b'e', b'r', b'-', b's', b'c', b's',
b'i', b' ', //
// revision
b'v', b'0', b' ', b' ', //
// reserved/obselete/vendor specific
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// version descriptors
0x0, 0xc0, // SAM-6
0x05, 0xc0, // SPC-5 (no code assigned for 6 yet)
0x06, 0, // SBC-4
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
// reserved
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
],
);
}
#[test]
fn test_request_sense() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0x3, // INQUIRY
0, // desc bit: 0
0, 0, // reserved
255, // alloc length
0, // control
],
&[],
// We'll always return this - modern SCSI has autosense, so any errors are sent with the
// response to the command that caused them (and therefore immediately cleared), and
// REQUEST SENSE returns an actual error only under some exceptional circumstances
// we don't implement.
&sense::NO_ADDITIONAL_SENSE_INFORMATION.to_fixed_sense(),
);
}
#[test]
fn test_request_sense_descriptor_format() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_fail(
&mut target,
&[
0x3, // INQUIRY
1, // desc bit: 1
0, 0, // reserved
255, // alloc length
0, // control
],
// We don't support descriptor format sense data.
sense::INVALID_FIELD_IN_CDB,
);
}

View File

@ -0,0 +1,420 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use super::{do_command_fail, do_command_in, null_image};
use crate::scsi::{
emulation::{block_device::BlockDevice, target::EmulatedTarget},
sense,
};
#[test]
fn test_one_command() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b1, // reporting options: one command
0, 1, 2, // opcode: TEST UNIT READY, SA ignored
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b11, // flags, supported
0, 6, // cdb len
0, 0, 0, 0, 0, 0b0100, // usage data
],
);
}
#[test]
fn test_one_command_with_timeout_descriptor() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0x81, // request timeout descs, reporting options: one command
0, 1, 2, // opcode: TEST UNIT READY, SA ignored
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b11, // flags, supported
0, 6, // cdb len
0, 0, 0, 0, 0, 0b0100, // usage data
0, 0xa, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no timeouts
],
);
}
#[test]
fn test_one_command_unsupported() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b1, // reporting options: one command
0xff, 1, 2, // opcode: vendor specific, SA ignored
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b01, // flags, not supported
0, 0, // cdb len
],
);
}
#[test]
fn test_one_command_valid_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_fail(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b1, // reporting options: one command
0x9e, 0, 0x10, // SERVICE ACTION IN (16), READ CAPACITY (16)
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
sense::INVALID_FIELD_IN_CDB,
);
}
#[test]
fn test_one_command_invalid_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_fail(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b1, // reporting options: one command
0x9e, 0, 0xff, // SERVICE ACTION IN (16), invalid
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
sense::INVALID_FIELD_IN_CDB,
);
}
#[test]
fn test_one_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b10, // reporting options: one service action
0x9e, 0, 0x10, // SERVICE ACTION IN (16), READ CAPACITY (16)
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b11, // flags, supported
0, 16, // cdb len
0x9e, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0,
0b0100, // usage data
],
);
}
#[test]
fn test_one_service_action_with_timeout_descriptor() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0x82, // request timeout descs, reporting options: one service action
0x9e, 0, 0x10, // SERVICE ACTION IN (16), READ CAPACITY (16)
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b11, // flags, supported
0, 16, // cdb len
0x9e, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0,
0b0100, // usage data
0, 0xa, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no timeouts
],
);
}
#[test]
fn test_one_service_action_unknown_opcode() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
// not entirely sure this behavior is correct; see comment in implementation
do_command_fail(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b10, // reporting options: one service action
0xff, 1, 2, // opcode: vendor specific, unimplemented
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
sense::INVALID_FIELD_IN_CDB,
);
}
#[test]
fn test_one_service_action_unknown_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b10, // reporting options: one service action
0x9e, 0, 0xff, // SERVICE ACTION IN (16), invalid SA
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b01, // flags, not supported
0, 0, // cdb len
],
);
}
#[test]
fn test_one_service_action_not_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_fail(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b10, // reporting options: one service action
0, 1, 2, // TEST UNIT READY
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
sense::INVALID_FIELD_IN_CDB,
);
}
// rest of these tests are for "mode 3", which the spec calls 011b and our
// implementation calls OneCommandOrServiceAction, but that's a mouthful so just
// use "mode 3" for test names
#[test]
fn test_mode_3_opcode_without_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b11, // reporting options: mode 3
0, 0, 0, // opcode: TEST UNIT READY, SA: 0
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b11, // flags, supported
0, 6, // cdb len
0, 0, 0, 0, 0, 0b0100, // usage data
],
);
}
#[test]
fn test_mode_3_with_timeout_descriptor() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0x83, // request timeout descs, reporting options: mode 3
0, 0, 0, // opcode: TEST UNIT READY, SA: 0
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b11, // flags, supported
0, 6, // cdb len
0, 0, 0, 0, 0, 0b0100, // usage data
0, 0xa, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no timeouts
],
);
}
#[test]
fn test_mode_3_opcode_with_unnecessary_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b11, // reporting options: mode 3
0, 0, 1, // opcode: TEST UNIT READY, SA: 1
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b01, // flags, not supported
0, 0, // cdb len
],
);
}
#[test]
fn test_mode_3_invalid_opcode() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b11, // reporting options: mode 3
0xff, 0, 0, // opcode: vendor specific
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b01, // flags, not supported
0, 0, // cdb len
],
);
}
#[test]
fn test_mode_3_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b11, // reporting options: mode 3
0x9e, 0, 0x10, // opcode: SERVICE ACTION IN (16), READ CAPACITY (16)
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b11, // flags, supported
0, 16, // cdb len
0x9e, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0,
0b0100, // usage data
],
);
}
#[test]
fn test_mode_3_service_action_with_timeout_descriptor() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0x83, // request timeout desc, tireporting options: mode 3
0x9e, 0, 0x10, // opcode: SERVICE ACTION IN (16), READ CAPACITY (16)
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b11, // flags, supported
0, 16, // cdb len
0x9e, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0,
0b0100, // usage data
0, 0xa, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no timeouts
],
);
}
#[test]
fn test_mode_3_invalid_service_action() {
let mut target = EmulatedTarget::new();
let dev = BlockDevice::new(null_image());
target.add_lun(Box::new(dev));
do_command_in(
&mut target,
&[
0xa3, 0x0c, // REPORT SUPPORTED OPERATION CODES
0b11, // reporting options: mode 3
0x9e, 0, 0xff, // opcode: SERVICE ACTION IN (16), invalid SA
0, 0, 1, 0, // allocation length: 256
0, // reserved
0, // control
],
&[],
&[
0, 0b01, // flags, not supported
0, 0, // cdb len
],
);
}

View File

@ -0,0 +1,76 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
pub mod emulation;
pub mod sense;
use std::io::{self, Read, Write};
use self::sense::SenseTriple;
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum TaskAttr {
Simple,
Ordered,
HeadOfQueue,
Aca,
}
#[derive(Debug, PartialEq, Eq)]
pub struct CmdOutput {
pub status: u8,
pub status_qualifier: u16,
pub sense: Vec<u8>,
}
impl CmdOutput {
pub const fn ok() -> Self {
Self {
status: 0,
status_qualifier: 0,
sense: Vec::new(),
}
}
pub fn check_condition(sense: SenseTriple) -> Self {
Self {
status: 2,
status_qualifier: 0,
sense: sense.to_fixed_sense(),
}
}
}
pub struct Request<'a> {
pub id: u64,
pub cdb: &'a [u8],
pub task_attr: TaskAttr,
pub crn: u8,
pub prio: u8,
}
/// An transport-level error encountered while processing a SCSI command.
///
/// This is only for transport-level errors; anything else should be handled by
/// returning a CHECK CONDITION status at the SCSI level.
#[derive(Debug)]
pub enum CmdError {
/// The provided CDB is too short for its operation code.
CdbTooShort,
/// An error occurred while writing to the provided data in writer.
DataIn(io::Error),
}
/// A transport-independent implementation of a SCSI target.
///
/// Currently, we only support emulated targets (see the `emulation` module),
/// but other implementations of this trait could implement pass-through to
/// iSCSI targets or SCSI devices on the host.
pub trait Target: Send + Sync {
fn execute_command(
&mut self,
lun: u16,
data_out: &mut dyn Read,
data_in: &mut dyn Write,
req: Request,
) -> Result<CmdOutput, CmdError>;
}

View File

@ -0,0 +1,37 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
pub struct SenseTriple(u8, u8, u8);
impl SenseTriple {
pub fn to_fixed_sense(self) -> Vec<u8> {
vec![
0x70, // response code (fixed, current); valid bit (0)
0x0, // reserved
self.0, // sk; various upper bits 0
0x0, 0x0, 0x0, 0x0, // information
0xa, // add'l sense length
0x0, 0x0, 0x0, 0x0, // cmd-specific information
self.1, // asc
self.2, // ascq
0x0, // field-replacable unit code
0x0, 0x0, 0x0, // sense-key-sepcific information
]
}
}
const NO_SENSE: u8 = 0;
const MEDIUM_ERROR: u8 = 0x3;
const HARDWARE_ERROR: u8 = 0x4;
const ILLEGAL_REQUEST: u8 = 0x5;
pub const NO_ADDITIONAL_SENSE_INFORMATION: SenseTriple = SenseTriple(NO_SENSE, 0, 0);
pub const INVALID_COMMAND_OPERATION_CODE: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x20, 0x0);
pub const LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x21, 0x0);
pub const INVALID_FIELD_IN_CDB: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x24, 0x0);
pub const LOGICAL_UNIT_NOT_SUPPORTED: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x21, 0x0);
pub const SAVING_PARAMETERS_NOT_SUPPORTED: SenseTriple = SenseTriple(ILLEGAL_REQUEST, 0x39, 0x0);
pub const UNRECOVERED_READ_ERROR: SenseTriple = SenseTriple(MEDIUM_ERROR, 0x11, 0x0);
pub const TARGET_FAILURE: SenseTriple = SenseTriple(HARDWARE_ERROR, 0x44, 0x0);

View File

@ -0,0 +1,621 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use core::slice;
use std::convert::{TryFrom, TryInto};
use std::io::{self, ErrorKind};
use std::mem;
use log::{debug, error, info, warn};
use vhost::vhost_user::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
use vhost_user_backend::{VhostUserBackendMut, VringRwLock, VringT};
use virtio_bindings::virtio_scsi::{virtio_scsi_config, virtio_scsi_event};
use virtio_bindings::{
virtio_config::VIRTIO_F_VERSION_1,
virtio_ring::{VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC},
virtio_scsi::VIRTIO_SCSI_F_HOTPLUG,
};
use virtio_queue::QueueOwnedT;
use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap};
use vmm_sys_util::{
epoll::EventSet,
eventfd::{EventFd, EFD_NONBLOCK},
};
use crate::scsi::Target;
use crate::virtio::CDB_SIZE;
use crate::{
scsi::{self, CmdError, TaskAttr},
virtio::{self, Request, RequestParseError, Response, ResponseCode, VirtioScsiLun, SENSE_SIZE},
};
const REQUEST_QUEUE: u16 = 2;
type DescriptorChainWriter = virtio::DescriptorChainWriter<GuestMemoryLoadGuard<GuestMemoryMmap>>;
type DescriptorChainReader = virtio::DescriptorChainReader<GuestMemoryLoadGuard<GuestMemoryMmap>>;
pub(crate) struct VhostUserScsiBackend {
event_idx: bool,
mem: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
targets: Vec<Box<dyn Target>>,
pub(crate) exit_event: EventFd,
}
impl VhostUserScsiBackend {
pub(crate) fn new() -> Self {
Self {
event_idx: false,
mem: None,
targets: Vec::new(),
exit_event: EventFd::new(EFD_NONBLOCK).expect("Creating exit eventfd"),
}
}
fn parse_target(&mut self, lun: VirtioScsiLun) -> Option<(&mut Box<dyn Target>, u16)> {
match lun {
VirtioScsiLun::TargetLun(target, lun) => self
.targets
.get_mut(usize::from(target))
.map(|tgt| (tgt, lun)),
VirtioScsiLun::ReportLuns => {
// TODO: do we need to handle the REPORT LUNS well-known LUN?
// In practice, everyone seems to just use LUN 0
warn!("Guest is trying to use the REPORT LUNS well-known LUN, which we don't support.");
None
}
}
}
fn process_requests(
&mut self,
reader: &mut DescriptorChainReader,
writer: &mut DescriptorChainWriter,
) {
let mut body_writer = writer.clone();
const RESPONSE_HEADER_SIZE: u32 = 12;
body_writer.skip(
RESPONSE_HEADER_SIZE + u32::try_from(SENSE_SIZE).expect("SENSE_SIZE should fit 32bit"),
);
let response = match Request::parse(reader) {
Ok(r) => {
if let Some((target, lun)) = self.parse_target(r.lun) {
let output = target.execute_command(
lun,
reader,
&mut body_writer,
scsi::Request {
id: r.id,
cdb: &r.cdb,
task_attr: match r.task_attr {
0 => TaskAttr::Simple,
1 => TaskAttr::Ordered,
2 => TaskAttr::HeadOfQueue,
3 => TaskAttr::Aca,
_ => {
// virtio-scsi spec allows us to map any task attr to simple, presumably
// including future ones
warn!("Unknown task attr: {}", r.task_attr);
TaskAttr::Simple
}
},
crn: r.crn,
prio: r.prio,
},
);
match output {
Ok(output) => {
assert!(output.sense.len() < SENSE_SIZE);
Response {
response: ResponseCode::Ok,
status: output.status,
status_qualifier: output.status_qualifier,
sense: output.sense,
// TODO: handle residual for data in
residual: body_writer.residual(),
}
}
Err(CmdError::CdbTooShort) => {
// the CDB buffer is, by default, sized larger than any CDB we support; we don't
// handle writes to config space (because QEMU doesn't let us), so there's no
// way the guest can set it too small
unreachable!();
}
Err(CmdError::DataIn(e)) => {
if e.kind() == ErrorKind::WriteZero {
Response::error(ResponseCode::Overrun, 0)
} else {
error!("Error writing response to guest memory: {}", e);
// There's some chance the header and data in are on different descriptors,
// and only the data in descriptor is bad, so let's at least try to write an
// error to the header
Response::error(ResponseCode::Failure, body_writer.residual())
}
}
}
} else {
debug!("Rejecting command to LUN with bad target {:?}", r.lun);
Response::error(ResponseCode::BadTarget, body_writer.residual())
}
}
Err(RequestParseError::CouldNotReadGuestMemory(e)) => {
// See comment later about errors while writing to guest mem; maybe we at least
// got functional write desciptors, so we can report an error
error!("Error reading request from guest memory: {:?}", e);
Response::error(ResponseCode::Failure, body_writer.residual())
}
Err(RequestParseError::FailedParsingLun(lun)) => {
error!("Unable to parse LUN: {:?}", lun);
Response::error(ResponseCode::Failure, body_writer.residual())
}
};
if let Err(e) = response.write(writer) {
// Alright, so something went wrong writing our response header to guest memory.
// The only reason this should ever happen, I think, is if the guest gave us a
// virtio descriptor with an invalid address.
// There's not a great way to recover from this - we just discovered that
// our only way of communicating with the guest doesn't work - so we either
// silently fail or crash. There isn't too much sense in crashing, IMO, as
// the guest could still recover by, say, installing a fixed kernel and
// rebooting. So let's just log an error and do nothing.
error!("Error writing response to guest memory: {:?}", e);
}
}
fn process_request_queue(&mut self, vring: &VringRwLock) -> Result<(), io::Error> {
let chains: Vec<_> = vring
.get_mut()
.get_queue_mut()
.iter(self.mem.as_ref().unwrap().memory())
.map_err(|e| io::Error::new(ErrorKind::Other, e))?
.collect();
for dc in chains {
let mut writer = DescriptorChainWriter::new(dc.clone());
let mut reader = DescriptorChainReader::new(dc.clone());
self.process_requests(&mut reader, &mut writer);
vring
.add_used(dc.head_index(), writer.max_written())
.map_err(|e| io::Error::new(ErrorKind::Other, e))?;
}
vring
.signal_used_queue()
.map_err(|e| io::Error::new(ErrorKind::Other, e))?;
Ok(())
}
pub(crate) fn add_target(&mut self, target: Box<dyn Target>) {
self.targets.push(target);
}
}
impl VhostUserBackendMut<VringRwLock> for VhostUserScsiBackend {
fn num_queues(&self) -> usize {
// control + event + request queues
let num_request_queues = 1;
2 + num_request_queues
}
fn max_queue_size(&self) -> usize {
128 // qemu assumes this by default
}
fn features(&self) -> u64 {
1 << VIRTIO_F_VERSION_1
| 1 << VIRTIO_SCSI_F_HOTPLUG
| 1 << VIRTIO_RING_F_INDIRECT_DESC
| 1 << VIRTIO_RING_F_EVENT_IDX
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()
}
fn protocol_features(&self) -> VhostUserProtocolFeatures {
VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::CONFIG
}
fn set_event_idx(&mut self, enabled: bool) {
self.event_idx = enabled;
}
fn update_memory(
&mut self,
atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>,
) -> std::result::Result<(), std::io::Error> {
info!("Memory updated - guest probably booting");
self.mem = Some(atomic_mem);
Ok(())
}
fn handle_event(
&mut self,
device_event: u16,
evset: EventSet,
vrings: &[VringRwLock],
thread_id: usize,
) -> io::Result<bool> {
assert!(evset == EventSet::IN);
assert!(vrings.len() == 3);
assert!((device_event as usize) < vrings.len());
assert!(thread_id == 0);
let vring = &vrings[device_event as usize];
match device_event {
REQUEST_QUEUE => {
if self.event_idx {
// vm-virtio's Queue implementation only checks avail_index
// once, so to properly support EVENT_IDX we need to keep
// calling process_request_queue() until it stops finding
// new requests on the queue.
loop {
vring.disable_notification().unwrap();
self.process_request_queue(vring)?;
if !vring.enable_notification().unwrap() {
break;
}
}
} else {
// Without EVENT_IDX, a single call is enough.
self.process_request_queue(vring)?;
}
}
_ => {
error!("Ignoring descriptor on queue {}", device_event);
}
}
Ok(false)
}
fn get_config(&self, offset: u32, size: u32) -> Vec<u8> {
let config = virtio_scsi_config {
num_queues: 1,
seg_max: 128 - 2,
max_sectors: 0xFFFF,
cmd_per_lun: 128,
event_info_size: mem::size_of::<virtio_scsi_event>()
.try_into()
.expect("event info size should fit 32bit"),
sense_size: SENSE_SIZE.try_into().expect("SENSE_SIZE should fit 32bit"),
cdb_size: CDB_SIZE.try_into().expect("CDB_SIZE should fit 32bit"),
max_channel: 0,
max_target: 255,
max_lun: u32::from(!u16::from(VirtioScsiLun::ADDRESS_METHOD_PATTERN) << 8 | 0xff),
};
// SAFETY:
// Pointer is aligned (points to start of struct), valid and we only
// access up to the size of the struct.
let config_slice = unsafe {
slice::from_raw_parts(
&config as *const virtio_scsi_config as *const u8,
mem::size_of::<virtio_scsi_config>(),
)
};
config_slice
.iter()
.skip(offset as usize)
.take(size as usize)
.cloned()
.collect()
}
fn set_config(&mut self, _offset: u32, _buf: &[u8]) -> std::result::Result<(), std::io::Error> {
// QEMU handles config space itself
panic!("Access to configuration space is not supported.");
}
fn exit_event(&self, _thread_index: usize) -> Option<EventFd> {
Some(self.exit_event.try_clone().expect("Cloning exit eventfd"))
}
}
#[cfg(test)]
mod tests {
use std::{
convert::TryInto,
io::{self, Read, Write},
sync::{Arc, Mutex},
};
use vhost_user_backend::{VhostUserBackendMut, VringRwLock, VringT};
use virtio_bindings::{
virtio_ring::VRING_DESC_F_WRITE,
virtio_scsi::{
virtio_scsi_cmd_req, virtio_scsi_config, VIRTIO_SCSI_S_BAD_TARGET,
VIRTIO_SCSI_S_FAILURE, VIRTIO_SCSI_S_OK,
},
};
use virtio_queue::{mock::MockSplitQueue, Descriptor};
use vm_memory::{
Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic,
GuestMemoryMmap,
};
use super::VhostUserScsiBackend;
use crate::{
scsi::{CmdOutput, Target, TaskAttr},
virtio::{
tests::{VirtioScsiCmdReq, VirtioScsiCmdResp},
VirtioScsiLun, CDB_SIZE,
},
};
#[allow(dead_code)]
struct RecordedCommand {
lun: u16,
id: u64,
cdb: [u8; CDB_SIZE],
task_attr: TaskAttr,
crn: u8,
prio: u8,
}
struct FakeTargetCommandCollector {
received_commands: Vec<RecordedCommand>,
}
impl FakeTargetCommandCollector {
fn new() -> Arc<Mutex<Self>> {
Arc::new(Mutex::new(Self {
received_commands: vec![],
}))
}
}
type FakeResponse = Result<crate::scsi::CmdOutput, crate::scsi::CmdError>;
struct FakeTarget<Cb> {
collector: Arc<Mutex<FakeTargetCommandCollector>>,
callback: Cb,
}
impl<Cb> FakeTarget<Cb> {
fn new(collector: Arc<Mutex<FakeTargetCommandCollector>>, callback: Cb) -> Self
where
Cb: FnMut(u16, crate::scsi::Request) -> FakeResponse + Sync + Send,
{
Self {
collector,
callback,
}
}
}
impl<Cb> Target for FakeTarget<Cb>
where
Cb: FnMut(u16, crate::scsi::Request) -> FakeResponse + Sync + Send,
{
fn execute_command(
&mut self,
lun: u16,
_data_out: &mut dyn Read,
_data_in: &mut dyn Write,
req: crate::scsi::Request,
) -> Result<crate::scsi::CmdOutput, crate::scsi::CmdError> {
let mut collector = self.collector.lock().unwrap();
collector.received_commands.push(RecordedCommand {
lun,
id: req.id,
cdb: req.cdb.try_into().unwrap(),
task_attr: req.task_attr,
crn: req.crn,
prio: req.prio,
});
(self.callback)(lun, req)
}
}
fn setup(
req: impl ByteValued,
) -> (
VhostUserScsiBackend,
VringRwLock,
GuestMemoryAtomic<GuestMemoryMmap>,
) {
let mem = GuestMemoryAtomic::new(
GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x1000_0000)]).unwrap(),
);
// The `build_desc_chain` function will populate the `NEXT` related flags and field.
let v = vec![
Descriptor::new(0x10_0000, 0x100, 0, 0), // request
Descriptor::new(0x20_0000, 0x100, VRING_DESC_F_WRITE as u16, 0), // response
];
mem.memory()
.write_obj(req, GuestAddress(0x10_0000))
.expect("writing to succeed");
let mem_handle = mem.memory();
let queue = MockSplitQueue::new(&*mem_handle, 16);
// queue.set_avail_idx(1);
queue.build_desc_chain(&v).unwrap();
// Put the descriptor index 0 in the first available ring position.
mem.memory()
.write_obj(0u16, queue.avail_addr().unchecked_add(4))
.unwrap();
// Set `avail_idx` to 1.
mem.memory()
.write_obj(1u16, queue.avail_addr().unchecked_add(2))
.unwrap();
let vring = VringRwLock::new(mem.clone(), 16).unwrap();
// vring.set_queue_info(0x10_0000, 0x10_0000, 0x300).unwrap();
vring.set_queue_size(16);
vring
.set_queue_info(
queue.desc_table_addr().0,
queue.avail_addr().0,
queue.used_addr().0,
)
.unwrap();
vring.set_queue_ready(true);
let mut backend = VhostUserScsiBackend::new();
backend.update_memory(mem.clone()).unwrap();
(backend, vring, mem)
}
fn get_response(mem: &GuestMemoryAtomic<GuestMemoryMmap>) -> VirtioScsiCmdResp {
mem.memory()
.read_obj::<VirtioScsiCmdResp>(GuestAddress(0x20_0000))
.expect("Unable to read response from memory")
}
fn create_lun_specifier(target: u8, lun: u16) -> [u8; 8] {
let lun = lun.to_le_bytes();
[
0x1,
target,
lun[0] | VirtioScsiLun::FLAT_SPACE_ADDRESSING_METHOD,
lun[1],
0x0,
0x0,
0x0,
0x0,
]
}
#[test]
fn backend_test() {
let collector = FakeTargetCommandCollector::new();
let fake_target = Box::new(FakeTarget::new(collector.clone(), |_, _| {
Ok(CmdOutput::ok())
}));
let req = VirtioScsiCmdReq(virtio_scsi_cmd_req {
lun: create_lun_specifier(0, 0),
tag: 0,
task_attr: 0,
prio: 0,
crn: 0,
cdb: [0; CDB_SIZE],
});
let (mut backend, vring, mem) = setup(req);
backend.add_target(fake_target);
backend.process_request_queue(&vring).unwrap();
let res = get_response(&mem);
assert_eq!(res.0.response, VIRTIO_SCSI_S_OK as u8);
let collector = collector.lock().unwrap();
assert_eq!(
collector.received_commands.len(),
1,
"expect one command to be passed to Target"
);
}
#[test]
fn backend_error_reporting_test() {
let collector = FakeTargetCommandCollector::new();
let fake_target = Box::new(FakeTarget::new(collector.clone(), |_, _| {
Err(crate::scsi::CmdError::DataIn(io::Error::new(
io::ErrorKind::Other,
"internal error",
)))
}));
let req = VirtioScsiCmdReq(virtio_scsi_cmd_req {
lun: create_lun_specifier(0, 0),
tag: 0,
task_attr: 0,
prio: 0,
crn: 0,
cdb: [0; CDB_SIZE],
});
let (mut backend, vring, mem) = setup(req);
backend.add_target(fake_target);
backend.process_request_queue(&vring).unwrap();
let res = get_response(&mem);
assert_eq!(res.0.response, VIRTIO_SCSI_S_FAILURE as u8);
let collector = collector.lock().unwrap();
assert_eq!(
collector.received_commands.len(),
1,
"expect one command to be passed to Target"
);
}
#[test]
fn test_command_to_unknown_lun() {
let collector = FakeTargetCommandCollector::new();
let req = VirtioScsiCmdReq(virtio_scsi_cmd_req {
lun: create_lun_specifier(0, 0),
tag: 0,
task_attr: 0,
prio: 0,
crn: 0,
cdb: [0; CDB_SIZE],
});
let (mut backend, vring, mem) = setup(req);
backend.process_request_queue(&vring).unwrap();
let res = get_response(&mem);
assert_eq!(res.0.response, VIRTIO_SCSI_S_BAD_TARGET as u8);
let collector = collector.lock().unwrap();
assert_eq!(
collector.received_commands.len(),
0,
"expect no command to make it to the target"
);
}
#[test]
fn test_broken_read_descriptor() {
let collector = FakeTargetCommandCollector::new();
let broken_req = [0u8; 1]; // single byte request
let (mut backend, vring, mem) = setup(broken_req);
backend.process_request_queue(&vring).unwrap();
let res = get_response(&mem);
assert_eq!(res.0.response, VIRTIO_SCSI_S_FAILURE as u8);
let collector = collector.lock().unwrap();
assert_eq!(
collector.received_commands.len(),
0,
"expect no command to make it to the target"
);
}
#[test]
fn test_reading_config() {
let backend = VhostUserScsiBackend::new();
// 0 len slice
assert_eq!(vec![0_u8; 0], backend.get_config(0, 0));
// overly long slice
assert_eq!(
std::mem::size_of::<virtio_scsi_config>(),
backend.get_config(0, 2000).len()
);
// subslice
assert_eq!(1, backend.get_config(4, 1).len());
// overly long subslice
assert_eq!(28, backend.get_config(8, 10000).len());
// offset after end
assert_eq!(0, backend.get_config(100000, 10).len());
}
}

View File

@ -0,0 +1,370 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
//! Helpers for virtio and virtio-scsi.
use std::{
cell::Cell,
cmp::{max, min},
convert::TryInto,
io,
io::{ErrorKind, Read, Write},
mem,
ops::Deref,
rc::Rc,
};
use log::error;
use virtio_bindings::virtio_scsi::virtio_scsi_cmd_req;
use virtio_queue::{Descriptor, DescriptorChain, DescriptorChainRwIter};
use vm_memory::{Bytes, GuestAddress, GuestMemory};
/// virtio-scsi has its own format for LUNs, documented in 5.6.6.1 of virtio
/// v1.1. This represents a LUN parsed from that format.
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub(crate) enum VirtioScsiLun {
ReportLuns,
TargetLun(u8, u16),
}
pub(crate) const REPORT_LUNS: [u8; 8] = [0xc1, 0x01, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0];
impl VirtioScsiLun {
pub(crate) const FLAT_SPACE_ADDRESSING_METHOD: u8 = 0b0100_0000;
pub(crate) const ADDRESS_METHOD_PATTERN: u8 = 0b1100_0000;
pub(crate) fn parse(bytes: [u8; 8]) -> Option<Self> {
if bytes == REPORT_LUNS {
Some(Self::ReportLuns)
} else if bytes[0] == 0x1 {
let target = bytes[1];
// bytes[2..3] is a normal SCSI single-level lun
if (bytes[2] & Self::ADDRESS_METHOD_PATTERN) != Self::FLAT_SPACE_ADDRESSING_METHOD {
error!(
"Got LUN in unsupported format: {:#2x} {:#2x}. \
Only flat space addressing is supported!",
bytes[2], bytes[3]
);
return None;
}
let lun = u16::from_be_bytes([bytes[2] & !Self::ADDRESS_METHOD_PATTERN, bytes[3]]);
Some(Self::TargetLun(target, lun))
} else {
None
}
}
}
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum ResponseCode {
Ok = 0,
Overrun = 1,
BadTarget = 3,
Failure = 9,
}
// These are the defaults given in the virtio spec; QEMU doesn't let the driver
// write to config space, so these will always be the correct values.
pub(crate) const SENSE_SIZE: usize = 96;
pub(crate) const CDB_SIZE: usize = 32;
pub(crate) struct Request {
pub id: u64,
pub lun: VirtioScsiLun,
pub prio: u8,
pub crn: u8,
pub cdb: [u8; CDB_SIZE],
pub task_attr: u8,
}
#[derive(Debug)]
pub(crate) enum RequestParseError {
CouldNotReadGuestMemory(io::Error),
FailedParsingLun([u8; 8]),
}
impl Request {
pub fn parse(reader: &mut impl Read) -> Result<Self, RequestParseError> {
let mut request = [0; mem::size_of::<virtio_scsi_cmd_req>()];
reader
.read_exact(&mut request)
.map_err(RequestParseError::CouldNotReadGuestMemory)?;
let lun = VirtioScsiLun::parse(request[0..8].try_into().expect("slice is of length 8"))
.ok_or(RequestParseError::FailedParsingLun(
request[0..8].try_into().expect("slice to be of length 8"),
))?;
Ok(Self {
id: u64::from_le_bytes(request[8..16].try_into().expect("slice is of length 8")),
lun,
task_attr: request[16],
prio: request[17],
crn: request[18],
cdb: request[19..].try_into().expect("should fit into cdb"),
})
}
}
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct Response {
pub response: ResponseCode,
pub status: u8,
pub status_qualifier: u16,
pub sense: Vec<u8>,
pub residual: u32,
}
impl Response {
pub fn write(&self, writer: &mut impl Write) -> Result<(), io::Error> {
writer.write_all(&(self.sense.len() as u32).to_le_bytes())?; // sense_len
writer.write_all(&self.residual.to_le_bytes())?; // residual
writer.write_all(&self.status_qualifier.to_le_bytes())?; // status qual
writer.write_all(&[self.status])?; // status
writer.write_all(&[self.response as u8])?; // response
writer.write_all(&self.sense[..])?;
Ok(())
}
/// Shortcut to create a response for an error condition, where most fields
/// don't matter.
pub fn error(code: ResponseCode, residual: u32) -> Self {
assert!(code != ResponseCode::Ok);
Self {
response: code,
status: 0,
status_qualifier: 0,
sense: Vec::new(),
residual,
}
}
}
// TODO: Drop this if https://github.com/rust-vmm/vm-virtio/pull/33 found an agreement
/// A `Write` implementation that writes to the memory indicated by a virtio
/// descriptor chain.
#[derive(Clone)]
pub struct DescriptorChainWriter<M: Deref>
where
M::Target: GuestMemory,
{
chain: DescriptorChain<M>,
iter: DescriptorChainRwIter<M>,
current: Option<Descriptor>,
offset: u32,
written: u32,
max_written: Rc<Cell<u32>>,
}
impl<M: Deref + Clone> DescriptorChainWriter<M>
where
M::Target: GuestMemory,
{
pub fn new(chain: DescriptorChain<M>) -> Self {
let mut iter = chain.clone().writable();
let current = iter.next();
Self {
chain,
iter,
current,
offset: 0,
written: 0,
max_written: Rc::new(Cell::new(0)),
}
}
pub fn skip(&mut self, bytes: u32) {
self.offset += bytes;
self.add_written(bytes);
while self
.current
.map_or(false, |current| self.offset >= current.len())
{
let current = self.current.expect("loop condition ensures existance");
self.offset -= current.len();
self.current = self.iter.next();
}
}
pub fn residual(&mut self) -> u32 {
let mut ret = 0;
while let Some(current) = self.current {
ret += current.len() - self.offset;
self.offset = 0;
self.current = self.iter.next();
}
ret
}
fn add_written(&mut self, written: u32) {
self.written += written;
self.max_written
.set(max(self.max_written.get(), self.written));
}
pub fn max_written(&self) -> u32 {
self.max_written.get()
}
}
impl<M: Deref + Clone> Write for DescriptorChainWriter<M>
where
M::Target: GuestMemory,
{
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if let Some(current) = self.current {
let left_in_descriptor = current.len() - self.offset;
let to_write: u32 = min(left_in_descriptor as usize, buf.len()) as u32;
let written = self
.chain
.memory()
.write(
&buf[..(to_write as usize)],
GuestAddress(current.addr().0.checked_add(u64::from(self.offset)).ok_or(
io::Error::new(ErrorKind::Other, vm_memory::Error::InvalidGuestRegion),
)?),
)
.map_err(|e| io::Error::new(ErrorKind::Other, e))?;
self.offset += written as u32;
if self.offset == current.len() {
self.current = self.iter.next();
self.offset = 0;
}
self.add_written(written as u32);
Ok(written)
} else {
Ok(0)
}
}
fn flush(&mut self) -> std::io::Result<()> {
// no-op: we're writing directly to guest memory
Ok(())
}
}
/// A `Read` implementation that reads from the memory indicated by a virtio
/// descriptor chain.
pub struct DescriptorChainReader<M: Deref>
where
M::Target: GuestMemory,
{
chain: DescriptorChain<M>,
iter: DescriptorChainRwIter<M>,
current: Option<Descriptor>,
offset: u32,
}
impl<M: Deref + Clone> DescriptorChainReader<M>
where
M::Target: GuestMemory,
{
pub fn new(chain: DescriptorChain<M>) -> Self {
let mut iter = chain.clone().readable();
let current = iter.next();
Self {
chain,
iter,
current,
offset: 0,
}
}
}
impl<M: Deref> Read for DescriptorChainReader<M>
where
M::Target: GuestMemory,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if let Some(current) = self.current {
let left_in_descriptor = current.len() - self.offset;
let to_read = min(left_in_descriptor, buf.len() as u32);
let read = self
.chain
.memory()
.read(
&mut buf[..(to_read as usize)],
GuestAddress(current.addr().0 + u64::from(self.offset)),
)
.map_err(|e| io::Error::new(ErrorKind::Other, e))?;
self.offset += read as u32;
if self.offset == current.len() {
self.current = self.iter.next();
self.offset = 0;
}
Ok(read)
} else {
Ok(0)
}
}
}
#[cfg(test)]
pub(crate) mod tests {
use virtio_bindings::virtio_scsi::{virtio_scsi_cmd_req, virtio_scsi_cmd_resp};
use virtio_queue::{mock::MockSplitQueue, Descriptor};
use vm_memory::{ByteValued, GuestAddress, GuestMemoryMmap};
use super::*;
#[derive(Debug, Default, Clone, Copy)]
#[repr(transparent)]
pub(crate) struct VirtioScsiCmdReq(pub virtio_scsi_cmd_req);
/// SAFETY: struct is a transparent wrapper around the request
/// which can be read from a byte array
unsafe impl ByteValued for VirtioScsiCmdReq {}
#[derive(Debug, Default, Clone, Copy)]
#[repr(transparent)]
pub(crate) struct VirtioScsiCmdResp(pub virtio_scsi_cmd_resp);
/// SAFETY: struct is a transparent wrapper around the response
/// which can be read from a byte array
unsafe impl ByteValued for VirtioScsiCmdResp {}
pub(crate) fn report_luns_command() -> VirtioScsiCmdReq {
VirtioScsiCmdReq(virtio_scsi_cmd_req {
lun: REPORT_LUNS,
tag: 0,
task_attr: 0,
prio: 0,
crn: 0,
cdb: [0; CDB_SIZE],
})
}
#[test]
fn test_parse_request() {
let mem: GuestMemoryMmap =
GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000_0000)]).unwrap();
// The `build_desc_chain` function will populate the `NEXT` related flags and field.
let v = vec![
// A device-writable request header descriptor.
Descriptor::new(0x10_0000, 0x100, 0, 0),
];
let req = report_luns_command();
mem.write_obj(req, GuestAddress(0x10_0000))
.expect("writing to succeed");
let queue = MockSplitQueue::new(&mem, 16);
let chain = queue.build_desc_chain(&v).unwrap();
let mut chain = DescriptorChainReader::new(chain.clone());
let req = Request::parse(&mut chain).expect("request failed to parse");
assert_eq!(req.lun, VirtioScsiLun::ReportLuns);
}
}

View File

@ -0,0 +1,2 @@
results/
test-data/

View File

@ -0,0 +1,2 @@
results/
test-data/

View File

@ -0,0 +1,11 @@
FROM fedora:39
RUN dnf install --quiet --assumeyes \
/usr/bin/qemu-system-x86_64 \
/usr/bin/qemu-img \
/usr/bin/virt-sysprep \
/usr/bin/ssh-keygen \
/usr/bin/ssh \
/usr/sbin/libvirtd \
wget \
&& dnf clean all
VOLUME /tests/

View File

@ -0,0 +1,33 @@
# Testing tools
This folder contains some tooling for tests
## Prerequisites
For running these tests, you need a KVM enabled x86_64 machine and `podman`.
vhost-device-scsi must have been built already.
## Performed tests
Right now, the test harness will only run
[blktests](https://github.com/osandov/blktests) against the target device
(these tests are probably testing the guest kernel more than the actual
device).
## Test execution
Triggering the build of the necessary container images and invoking the tests
is done by calling `./invoke-test.sh`.
That will build the `Containerfile`, launch a container and invoke
`./start-test.sh` inside of the container. That will download a Fedora cloud
image, launch the daemon, launch QEMU, waits until it is up and triggers the
test execution.
Results will be downloaded into a timestamped folder under `results/`.
# Other test tools
Some quick and dirty fuzzing code is available at
https://github.com/Ablu/vhost-device/tree/scsi-fuzzing.

View File

@ -0,0 +1,20 @@
#!/bin/bash -xe
cd $(dirname "$0")
DAEMON_BINARY="$PWD/../../../target/debug/vhost-device-scsi"
if [[ ! -e "$DAEMON_BINARY" ]]
then
echo "Unable to find \"$DAEMON_BINARY\". Did you run cargo build?"
exit 1
fi
TAG_NAME=vhost-device-scsi-test-env
podman build -t "$TAG_NAME" .
podman run \
-v /dev/kvm:/dev/kvm \
--security-opt label=disable \
-v "$DAEMON_BINARY":/usr/local/bin/vhost-device-scsi:ro \
-v $PWD:/test "$TAG_NAME" \
/test/start-test.sh

View File

@ -0,0 +1,60 @@
#!/bin/bash -xe
cd $(dirname "$0")
libvirtd --daemon
virtlogd --daemon
export LIBGUESTFS_BACKEND=direct
mkdir -p test-data/
pushd test-data
IMAGE=Fedora-Cloud-Base-38-1.6.x86_64.qcow2
test -e "$IMAGE" || wget --quiet "https://download.fedoraproject.org/pub/fedora/linux/releases/38/Cloud/x86_64/images/$IMAGE" -O "$IMAGE"
qemu-img create -f qcow2 -F qcow2 -b "$PWD/$IMAGE" fedora-overlay.qcow2
test -e test-key-id_rsa || ssh-keygen -N "" -f test-key-id_rsa
virt-sysprep -a fedora-overlay.qcow2 \
--ssh-inject root:file:test-key-id_rsa.pub
fallocate -l 5GiB big-image.img
popd
SSH_OPTS="-i test-data/test-key-id_rsa -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o User=root -o Port=2222"
vhost-device-scsi --socket-path /tmp/vhost-user-scsi.sock test-data/big-image.img &
sleep 1
qemu-system-x86_64 \
-enable-kvm -cpu host \
-device virtio-net-pci,netdev=net0,mac=52:54:00:12:35:02\
-netdev user,id=net0,hostfwd=tcp::2222-:22,hostfwd=tcp::2323-:23 \
-object rng-random,filename=/dev/urandom,id=rng0 -device virtio-rng-pci,rng=rng0 \
-hda test-data/fedora-overlay.qcow2 \
-object memory-backend-memfd,id=mem,size=8192M,share=on \
-numa node,memdev=mem \
-device vhost-user-scsi-pci,num_queues=1,param_change=off,chardev=vus \
-chardev socket,id=vus,path=/tmp/vhost-user-scsi.sock \
-smp 4 -m 8192 \
-serial mon:stdio \
-display none &
while ! ssh $SSH_OPTS localhost echo waiting for guest to come online
do
sleep 1
done
scp $SSH_OPTS test-script.sh localhost:~/
ssh $SSH_OPTS localhost /root/test-script.sh || echo "tests failed"
export RESULT_DIR="$PWD/results/$(date --rfc-3339=s)"
mkdir -p "$RESULT_DIR"
scp $SSH_OPTS -r localhost:/root/blktests/results/ "$RESULT_DIR/"
ssh $SSH_OPTS localhost poweroff
wait # wait for qemu to terminate

View File

@ -0,0 +1,10 @@
#!/bin/bash -xe
dnf install -y git make g++ fio liburing-devel blktrace
git clone https://github.com/osandov/blktests.git
pushd blktests
echo "TEST_DEVS=(/dev/sdb)" > config
make -j $(nproc)
./check scsi block
popd

View File

@ -0,0 +1,15 @@
# Changelog
## [Unreleased]
### Added
### Changed
### Fixed
### Deprecated
## [0.1.0]
First release

View File

@ -0,0 +1,35 @@
[package]
name = "vhost-device-vsock"
version = "0.1.0"
authors = ["Harshavardhan Unnibhavi <harshanavkis@gmail.com>", "Stefano Garzarella <sgarzare@redhat.com>"]
description = "A virtio-vsock device using the vhost-user protocol."
repository = "https://github.com/rust-vmm/vhost-device"
readme = "README.md"
keywords = ["vhost", "vsock"]
license = "Apache-2.0 OR BSD-3-Clause"
edition = "2021"
[features]
xen = ["vm-memory/xen", "vhost/xen", "vhost-user-backend/xen"]
[dependencies]
byteorder = "1"
clap = { version = "4.4", features = ["derive"] }
env_logger = "0.10"
epoll = "4.3.2"
log = "0.4"
thiserror = "1.0"
vhost = { version = "0.8", features = ["vhost-user-slave"] }
vhost-user-backend = "0.10"
virtio-bindings = "0.2.1"
virtio-queue = "0.9"
virtio-vsock = "0.3.1"
vm-memory = "0.12"
vmm-sys-util = "0.11"
config = { version = "0.13", default-features = false, features = ["yaml"] }
serde = { version = "1", features = ["derive"] }
serde_yaml = "0.9"
[dev-dependencies]
virtio-queue = { version = "0.9", features = ["test-utils"] }
tempfile = "3.6.0"

View File

@ -0,0 +1 @@
../../LICENSE-APACHE

View File

@ -0,0 +1 @@
../../LICENSE-BSD-3-Clause

View File

@ -0,0 +1,190 @@
# vhost-device-vsock
## Design
The crate introduces a vhost-device-vsock device that enables communication between an
application running in the guest i.e inside a VM and an application running on the
host i.e outside the VM. The application running in the guest communicates over VM
sockets i.e over AF_VSOCK sockets. The application running on the host connects to a
unix socket on the host i.e communicates over AF_UNIX sockets. The main components of
the crate are split into various files as described below:
- [packet.rs](src/packet.rs)
- Introduces the **VsockPacket** structure that represents a single vsock packet
processing methods.
- [rxops.rs](src/rxops.rs)
- Introduces various vsock operations that are enqueued into the rxqueue to be sent to the
guest. Exposes a **RxOps** structure.
- [rxqueue.rs](src/rxqueue.rs)
- rxqueue contains the pending rx operations corresponding to that connection. The queue is
represented as a bitmap as we handle connection-oriented connections. The module contains
various queue manipulation methods. Exposes a **RxQueue** structure.
- [thread_backend.rs](src/thread_backend.rs)
- Multiplexes connections between host and guest and calls into per connection methods that
are responsible for processing data and packets corresponding to the connection. Exposes a
**VsockThreadBackend** structure.
- [txbuf.rs](src/txbuf.rs)
- Module to buffer data that is sent from the guest to the host. The module exposes a **LocalTxBuf**
structure.
- [vhost_user_vsock_thread.rs](src/vhost_user_vsock_thread.rs)
- Module exposes a **VhostUserVsockThread** structure. It also handles new host initiated
connections and provides interfaces for registering host connections with the epoll fd. Also
provides interfaces for iterating through the rx and tx queues.
- [vsock_conn.rs](src/vsock_conn.rs)
- Module introduces a **VsockConnection** structure that represents a single vsock connection
between the guest and the host. It also processes packets according to their type.
- [vhu_vsock.rs](src/vhu_vsock.rs)
- exposes the main vhost user vsock backend interface.
## Usage
Run the vhost-device-vsock device:
```
vhost-device-vsock --guest-cid=<CID assigned to the guest> \
--socket=<path to the Unix socket to be created to communicate with the VMM via the vhost-user protocol> \
--uds-path=<path to the Unix socket to communicate with the guest via the virtio-vsock device> \
[--tx-buffer-size=<size of the buffer used for the TX virtqueue (guest->host packets)>] \
[--groups=<list of group names to which the device belongs concatenated with '+' delimiter>]
```
or
```
vhost-device-vsock --vm guest_cid=<CID assigned to the guest>,socket=<path to the Unix socket to be created to communicate with the VMM via the vhost-user protocol>,uds-path=<path to the Unix socket to communicate with the guest via the virtio-vsock device>[,tx-buffer-size=<size of the buffer used for the TX virtqueue (guest->host packets)>][,groups=<list of group names to which the device belongs concatenated with '+' delimiter>]
```
Specify the `--vm` argument multiple times to specify multiple devices like this:
```
vhost-device-vsock \
--vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock,groups=group1+groupA \
--vm guest-cid=4,socket=/tmp/vhost4.socket,uds-path=/tmp/vm4.vsock,tx-buffer-size=32768
```
Or use a configuration file:
```
vhost-device-vsock --config=<path to the local yaml configuration file>
```
Configuration file example:
```yaml
vms:
- guest_cid: 3
socket: /tmp/vhost3.socket
uds_path: /tmp/vm3.sock
tx_buffer_size: 65536
groups: group1+groupA
- guest_cid: 4
socket: /tmp/vhost4.socket
uds_path: /tmp/vm4.sock
tx_buffer_size: 32768
groups: group2+groupB
```
Run VMM (e.g. QEMU):
```
qemu-system-x86_64 \
<normal QEMU options> \
-object memory-backend-file,share=on,id=mem0,size=<Guest RAM size>,mem-path=<Guest RAM file path> \ # size == -m size
-machine <machine options>,memory-backend=mem0 \
-chardev socket,id=char0,reconnect=0,path=<vhost-user socket path> \
-device vhost-user-vsock-pci,chardev=char0
```
## Working example
```sh
shell1$ vhost-device-vsock --vm guest-cid=4,uds-path=/tmp/vm4.vsock,socket=/tmp/vhost4.socket
```
or if you want to configure the TX buffer size
```sh
shell1$ vhost-device-vsock --vm guest-cid=4,uds-path=/tmp/vm4.vsock,socket=/tmp/vhost4.socket,tx-buffer-size=65536
```
```sh
shell2$ qemu-system-x86_64 \
-drive file=vm.qcow2,format=qcow2,if=virtio -smp 2 -m 512M -mem-prealloc \
-object memory-backend-file,share=on,id=mem0,size=512M,mem-path="/dev/hugepages" \
-machine q35,accel=kvm,memory-backend=mem0 \
-chardev socket,id=char0,reconnect=0,path=/tmp/vhost4.socket \
-device vhost-user-vsock-pci,chardev=char0
```
### Guest listening
#### iperf
```sh
# https://github.com/stefano-garzarella/iperf-vsock
guest$ iperf3 --vsock -s
host$ iperf3 --vsock -c /tmp/vm4.vsock
```
#### netcat
```sh
guest$ nc --vsock -l 1234
host$ nc -U /tmp/vm4.vsock
CONNECT 1234
```
### Host listening
#### iperf
```sh
# https://github.com/stefano-garzarella/iperf-vsock
host$ iperf3 --vsock -s -B /tmp/vm4.vsock
guest$ iperf3 --vsock -c 2
```
#### netcat
```sh
host$ nc -l -U /tmp/vm4.vsock_1234
guest$ nc --vsock 2 1234
```
### Sibling VM communication
If you add multiple VMs with their devices configured with at least one common group name, they can communicate with
each other. If you don't explicitly specify a group name, a default group will be assigned to the device with name
`default`, and all such devices will be able to communicate with each other. Or you can choose a different list of
group names for each device, and only devices with the at least one group in commmon will be able to communicate with
each other.
For example, if you have two VMs with CID 3 and 4, you can run the following commands to make them communicate:
```sh
shell1$ vhost-device-vsock --vm guest-cid=3,uds-path=/tmp/vm3.vsock,socket=/tmp/vhost3.socket,groups=group1+group2 \
--vm guest-cid=4,uds-path=/tmp/vm4.vsock,socket=/tmp/vhost4.socket,groups=group1
shell2$ qemu-system-x86_64 \
-drive file=vm1.qcow2,format=qcow2,if=virtio -smp 2 -m 512M -mem-prealloc \
-object memory-backend-file,share=on,id=mem0,size=512M,mem-path="/dev/hugepages" \
-machine q35,accel=kvm,memory-backend=mem0 \
-chardev socket,id=char0,reconnect=0,path=/tmp/vhost3.socket \
-device vhost-user-vsock-pci,chardev=char0
shell3$ qemu-system-x86_64 \
-drive file=vm2.qcow2,format=qcow2,if=virtio -smp 2 -m 512M -mem-prealloc \
-object memory-backend-file,share=on,id=mem0,size=512M,mem-path="/dev/hugepages2" \
-machine q35,accel=kvm,memory-backend=mem0 \
-chardev socket,id=char0,reconnect=0,path=/tmp/vhost4.socket \
-device vhost-user-vsock-pci,chardev=char0
```
Please note that here the `groups` parameter is specified just for clarity, but it is not necessary to specify it if you want
to use the default group and make all the devices communicate with one another. It is useful to specify a list of groups
when you want fine-grained control over which devices can communicate with each other.
```sh
# nc-vsock patched to set `.svm_flags = VMADDR_FLAG_TO_HOST`
guest_cid3$ nc-vsock -l 1234
guest_cid4$ nc-vsock 3 1234
```
## License
This project is licensed under either of
- [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0
- [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause)

View File

@ -0,0 +1,551 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
mod rxops;
mod rxqueue;
mod thread_backend;
mod txbuf;
mod vhu_vsock;
mod vhu_vsock_thread;
mod vsock_conn;
use std::{
collections::HashMap,
convert::TryFrom,
process::exit,
sync::{Arc, RwLock},
thread,
};
use crate::vhu_vsock::{CidMap, VhostUserVsockBackend, VsockConfig};
use clap::{Args, Parser};
use log::{error, info, warn};
use serde::Deserialize;
use thiserror::Error as ThisError;
use vhost::{vhost_user, vhost_user::Listener};
use vhost_user_backend::VhostUserDaemon;
use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap};
const DEFAULT_GUEST_CID: u64 = 3;
const DEFAULT_TX_BUFFER_SIZE: u32 = 64 * 1024;
const DEFAULT_GROUP_NAME: &str = "default";
#[derive(Debug, ThisError)]
enum CliError {
#[error("No arguments provided")]
NoArgsProvided,
#[error("Failed to parse configuration file")]
ConfigParse,
}
#[derive(Debug, ThisError)]
enum VmArgsParseError {
#[error("Bad argument")]
BadArgument,
#[error("Invalid key `{0}`")]
InvalidKey(String),
#[error("Unable to convert string to integer: {0}")]
ParseInteger(std::num::ParseIntError),
#[error("Required key `{0}` not found")]
RequiredKeyNotFound(String),
}
#[derive(Debug, ThisError)]
enum BackendError {
#[error("Could not create backend: {0}")]
CouldNotCreateBackend(vhu_vsock::Error),
#[error("Could not create daemon: {0}")]
CouldNotCreateDaemon(vhost_user_backend::Error),
}
#[derive(Args, Clone, Debug)]
struct VsockParam {
/// Context identifier of the guest which uniquely identifies the device for its lifetime.
#[arg(
long,
default_value_t = DEFAULT_GUEST_CID,
conflicts_with = "config",
conflicts_with = "vm"
)]
guest_cid: u64,
/// Unix socket to which a hypervisor connects to and sets up the control path with the device.
#[arg(long, conflicts_with = "config", conflicts_with = "vm")]
socket: String,
/// Unix socket to which a host-side application connects to.
#[arg(long, conflicts_with = "config", conflicts_with = "vm")]
uds_path: String,
/// The size of the buffer used for the TX virtqueue
#[clap(long, default_value_t = DEFAULT_TX_BUFFER_SIZE, conflicts_with = "config", conflicts_with = "vm")]
tx_buffer_size: u32,
/// The list of group names to which the device belongs.
/// A group is a set of devices that allow sibling communication between their guests.
#[arg(
long,
default_value_t = String::from(DEFAULT_GROUP_NAME),
conflicts_with = "config",
conflicts_with = "vm",
verbatim_doc_comment
)]
groups: String,
}
#[derive(Clone, Debug, Deserialize)]
struct ConfigFileVsockParam {
guest_cid: Option<u64>,
socket: String,
uds_path: String,
tx_buffer_size: Option<u32>,
groups: Option<String>,
}
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct VsockArgs {
#[command(flatten)]
param: Option<VsockParam>,
/// Device parameters corresponding to a VM in the form of comma separated key=value pairs.
/// The allowed keys are: guest_cid, socket, uds_path, tx_buffer_size and group.
/// Example:
/// --vm guest-cid=3,socket=/tmp/vhost3.socket,uds-path=/tmp/vm3.vsock,tx-buffer-size=65536,groups=group1+group2
/// Multiple instances of this argument can be provided to configure devices for multiple guests.
#[arg(long, conflicts_with = "config", verbatim_doc_comment, value_parser = parse_vm_params)]
vm: Option<Vec<VsockConfig>>,
/// Load from a given configuration file
#[arg(long)]
config: Option<String>,
}
fn parse_vm_params(s: &str) -> Result<VsockConfig, VmArgsParseError> {
let mut guest_cid = None;
let mut socket = None;
let mut uds_path = None;
let mut tx_buffer_size = None;
let mut groups = None;
for arg in s.trim().split(',') {
let mut parts = arg.split('=');
let key = parts.next().ok_or(VmArgsParseError::BadArgument)?;
let val = parts.next().ok_or(VmArgsParseError::BadArgument)?;
match key {
"guest_cid" | "guest-cid" => {
guest_cid = Some(val.parse().map_err(VmArgsParseError::ParseInteger)?)
}
"socket" => socket = Some(val.to_string()),
"uds_path" | "uds-path" => uds_path = Some(val.to_string()),
"tx_buffer_size" | "tx-buffer-size" => {
tx_buffer_size = Some(val.parse().map_err(VmArgsParseError::ParseInteger)?)
}
"groups" => groups = Some(val.split('+').map(String::from).collect()),
_ => return Err(VmArgsParseError::InvalidKey(key.to_string())),
}
}
Ok(VsockConfig::new(
guest_cid.unwrap_or(DEFAULT_GUEST_CID),
socket.ok_or_else(|| VmArgsParseError::RequiredKeyNotFound("socket".to_string()))?,
uds_path.ok_or_else(|| VmArgsParseError::RequiredKeyNotFound("uds-path".to_string()))?,
tx_buffer_size.unwrap_or(DEFAULT_TX_BUFFER_SIZE),
groups.unwrap_or(vec![DEFAULT_GROUP_NAME.to_string()]),
))
}
impl VsockArgs {
pub fn parse_config(&self) -> Option<Result<Vec<VsockConfig>, CliError>> {
if let Some(c) = &self.config {
let b = config::Config::builder()
.add_source(config::File::new(c.as_str(), config::FileFormat::Yaml))
.build();
if let Ok(s) = b {
let mut v = s.get::<Vec<ConfigFileVsockParam>>("vms").unwrap();
if !v.is_empty() {
let parsed: Vec<VsockConfig> = v
.drain(..)
.map(|p| {
VsockConfig::new(
p.guest_cid.unwrap_or(DEFAULT_GUEST_CID),
p.socket.trim().to_string(),
p.uds_path.trim().to_string(),
p.tx_buffer_size.unwrap_or(DEFAULT_TX_BUFFER_SIZE),
p.groups.map_or(vec![DEFAULT_GROUP_NAME.to_string()], |g| {
g.trim().split('+').map(String::from).collect()
}),
)
})
.collect();
return Some(Ok(parsed));
} else {
return Some(Err(CliError::ConfigParse));
}
} else {
return Some(Err(CliError::ConfigParse));
}
}
None
}
}
impl TryFrom<VsockArgs> for Vec<VsockConfig> {
type Error = CliError;
fn try_from(cmd_args: VsockArgs) -> Result<Self, CliError> {
// we try to use the configuration first, if failed, then fall back to the manual settings.
match cmd_args.parse_config() {
Some(c) => c,
_ => match cmd_args.vm {
Some(v) => Ok(v),
_ => cmd_args.param.map_or(Err(CliError::NoArgsProvided), |p| {
Ok(vec![VsockConfig::new(
p.guest_cid,
p.socket.trim().to_string(),
p.uds_path.trim().to_string(),
p.tx_buffer_size,
p.groups.trim().split('+').map(String::from).collect(),
)])
}),
},
}
}
}
/// This is the public API through which an external program starts the
/// vhost-device-vsock backend server.
pub(crate) fn start_backend_server(
config: VsockConfig,
cid_map: Arc<RwLock<CidMap>>,
) -> Result<(), BackendError> {
loop {
let backend = Arc::new(
VhostUserVsockBackend::new(config.clone(), cid_map.clone())
.map_err(BackendError::CouldNotCreateBackend)?,
);
let listener = Listener::new(config.get_socket_path(), true).unwrap();
let mut daemon = VhostUserDaemon::new(
String::from("vhost-device-vsock"),
backend.clone(),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.map_err(BackendError::CouldNotCreateDaemon)?;
let mut vring_workers = daemon.get_epoll_handlers();
for thread in backend.threads.iter() {
thread
.lock()
.unwrap()
.set_vring_worker(Some(vring_workers.remove(0)));
}
daemon.start(listener).unwrap();
match daemon.wait() {
Ok(()) => {
info!("Stopping cleanly");
}
Err(vhost_user_backend::Error::HandleRequest(
vhost_user::Error::PartialMessage | vhost_user::Error::Disconnected,
)) => {
info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug.");
}
Err(e) => {
warn!("Error running daemon: {:?}", e);
}
}
// No matter the result, we need to shut down the worker thread.
backend.exit_event.write(1).unwrap();
}
}
pub(crate) fn start_backend_servers(configs: &[VsockConfig]) -> Result<(), BackendError> {
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let mut handles = Vec::new();
for c in configs.iter() {
let config = c.clone();
let cid_map = cid_map.clone();
let handle = thread::Builder::new()
.name(format!("vhu-vsock-cid-{}", c.get_guest_cid()))
.spawn(move || start_backend_server(config, cid_map))
.unwrap();
handles.push(handle);
}
for handle in handles {
handle.join().unwrap()?;
}
Ok(())
}
fn main() {
env_logger::init();
let configs = match Vec::<VsockConfig>::try_from(VsockArgs::parse()) {
Ok(c) => c,
Err(e) => {
println!("Error parsing arguments: {}", e);
return;
}
};
if let Err(e) = start_backend_servers(&configs) {
error!("{e}");
exit(1);
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::tempdir;
impl VsockArgs {
fn from_args(
guest_cid: u64,
socket: &str,
uds_path: &str,
tx_buffer_size: u32,
groups: &str,
) -> Self {
VsockArgs {
param: Some(VsockParam {
guest_cid,
socket: socket.to_string(),
uds_path: uds_path.to_string(),
tx_buffer_size,
groups: groups.to_string(),
}),
vm: None,
config: None,
}
}
fn from_file(config: &str) -> Self {
VsockArgs {
param: None,
vm: None,
config: Some(config.to_string()),
}
}
}
#[test]
fn test_vsock_config_setup() {
let test_dir = tempdir().expect("Could not create a temp test directory.");
let socket_path = test_dir.path().join("vhost4.socket").display().to_string();
let uds_path = test_dir.path().join("vm4.vsock").display().to_string();
let args = VsockArgs::from_args(3, &socket_path, &uds_path, 64 * 1024, "group1");
let configs = Vec::<VsockConfig>::try_from(args);
assert!(configs.is_ok());
let configs = configs.unwrap();
assert_eq!(configs.len(), 1);
let config = &configs[0];
assert_eq!(config.get_guest_cid(), 3);
assert_eq!(config.get_socket_path(), socket_path);
assert_eq!(config.get_uds_path(), uds_path);
assert_eq!(config.get_tx_buffer_size(), 64 * 1024);
assert_eq!(config.get_groups(), vec!["group1".to_string()]);
test_dir.close().unwrap();
}
#[test]
fn test_vsock_config_setup_from_vm_args() {
let test_dir = tempdir().expect("Could not create a temp test directory.");
let socket_paths = [
test_dir.path().join("vhost3.socket"),
test_dir.path().join("vhost4.socket"),
test_dir.path().join("vhost5.socket"),
];
let uds_paths = [
test_dir.path().join("vm3.vsock"),
test_dir.path().join("vm4.vsock"),
test_dir.path().join("vm5.vsock"),
];
let params = format!(
"--vm socket={vhost3_socket},uds_path={vm3_vsock} \
--vm socket={vhost4_socket},uds-path={vm4_vsock},guest-cid=4,tx_buffer_size=65536,groups=group1 \
--vm groups=group2+group3,guest-cid=5,socket={vhost5_socket},uds_path={vm5_vsock},tx-buffer-size=32768",
vhost3_socket = socket_paths[0].display(),
vhost4_socket = socket_paths[1].display(),
vhost5_socket = socket_paths[2].display(),
vm3_vsock = uds_paths[0].display(),
vm4_vsock = uds_paths[1].display(),
vm5_vsock = uds_paths[2].display(),
);
let mut params = params.split_whitespace().collect::<Vec<&str>>();
params.insert(0, ""); // to make the test binary name agnostic
let args = VsockArgs::parse_from(params);
let configs = Vec::<VsockConfig>::try_from(args);
assert!(configs.is_ok());
let configs = configs.unwrap();
assert_eq!(configs.len(), 3);
let config = configs.get(0).unwrap();
assert_eq!(config.get_guest_cid(), 3);
assert_eq!(
config.get_socket_path(),
socket_paths[0].display().to_string()
);
assert_eq!(config.get_uds_path(), uds_paths[0].display().to_string());
assert_eq!(config.get_tx_buffer_size(), 65536);
assert_eq!(config.get_groups(), vec![DEFAULT_GROUP_NAME.to_string()]);
let config = configs.get(1).unwrap();
assert_eq!(config.get_guest_cid(), 4);
assert_eq!(
config.get_socket_path(),
socket_paths[1].display().to_string()
);
assert_eq!(config.get_uds_path(), uds_paths[1].display().to_string());
assert_eq!(config.get_tx_buffer_size(), 65536);
assert_eq!(config.get_groups(), vec!["group1".to_string()]);
let config = configs.get(2).unwrap();
assert_eq!(config.get_guest_cid(), 5);
assert_eq!(
config.get_socket_path(),
socket_paths[2].display().to_string()
);
assert_eq!(config.get_uds_path(), uds_paths[2].display().to_string());
assert_eq!(config.get_tx_buffer_size(), 32768);
assert_eq!(
config.get_groups(),
vec!["group2".to_string(), "group3".to_string()]
);
test_dir.close().unwrap();
}
#[test]
fn test_vsock_config_setup_from_file() {
let test_dir = tempdir().expect("Could not create a temp test directory.");
let config_path = test_dir.path().join("config.yaml");
let socket_path = test_dir.path().join("vhost4.socket");
let uds_path = test_dir.path().join("vm4.vsock");
let mut yaml = File::create(&config_path).unwrap();
yaml.write_all(
format!(
"vms:
- guest_cid: 4
socket: {}
uds_path: {}
tx_buffer_size: 32768
groups: group1+group2",
socket_path.display(),
uds_path.display(),
)
.as_bytes(),
)
.unwrap();
let args = VsockArgs::from_file(&config_path.display().to_string());
let configs = Vec::<VsockConfig>::try_from(args).unwrap();
assert_eq!(configs.len(), 1);
let config = &configs[0];
assert_eq!(config.get_guest_cid(), 4);
assert_eq!(config.get_socket_path(), socket_path.display().to_string());
assert_eq!(config.get_uds_path(), uds_path.display().to_string());
assert_eq!(config.get_tx_buffer_size(), 32768);
assert_eq!(
config.get_groups(),
vec!["group1".to_string(), "group2".to_string()]
);
// Now test that optional parameters are correctly set to their default values.
let mut yaml = File::create(&config_path).unwrap();
yaml.write_all(
format!(
"vms:
- socket: {}
uds_path: {}",
socket_path.display(),
uds_path.display(),
)
.as_bytes(),
)
.unwrap();
let args = VsockArgs::from_file(&config_path.display().to_string());
let configs = Vec::<VsockConfig>::try_from(args).unwrap();
assert_eq!(configs.len(), 1);
let config = &configs[0];
assert_eq!(config.get_guest_cid(), DEFAULT_GUEST_CID);
assert_eq!(config.get_socket_path(), socket_path.display().to_string());
assert_eq!(config.get_uds_path(), uds_path.display().to_string());
assert_eq!(config.get_tx_buffer_size(), DEFAULT_TX_BUFFER_SIZE);
assert_eq!(config.get_groups(), vec![DEFAULT_GROUP_NAME.to_string()]);
std::fs::remove_file(&config_path).unwrap();
test_dir.close().unwrap();
}
#[test]
fn test_vsock_server() {
const CID: u64 = 3;
const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
let test_dir = tempdir().expect("Could not create a temp test directory.");
let vhost_socket_path = test_dir
.path()
.join("test_vsock_server.socket")
.display()
.to_string();
let vsock_socket_path = test_dir
.path()
.join("test_vsock_server.vsock")
.display()
.to_string();
let config = VsockConfig::new(
CID,
vhost_socket_path,
vsock_socket_path,
CONN_TX_BUF_SIZE,
vec![DEFAULT_GROUP_NAME.to_string()],
);
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let backend = Arc::new(VhostUserVsockBackend::new(config, cid_map).unwrap());
let daemon = VhostUserDaemon::new(
String::from("vhost-device-vsock"),
backend.clone(),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.unwrap();
let vring_workers = daemon.get_epoll_handlers();
// VhostUserVsockBackend support a single thread that handles the TX and RX queues
assert_eq!(backend.threads.len(), 1);
assert_eq!(vring_workers.len(), backend.threads.len());
test_dir.close().unwrap();
}
}

View File

@ -2,26 +2,53 @@
use std::{
collections::{HashMap, HashSet, VecDeque},
ops::Deref,
os::unix::{
net::UnixStream,
prelude::{AsRawFd, FromRawFd, RawFd},
prelude::{AsRawFd, RawFd},
},
sync::{Arc, RwLock},
};
use log::{info, warn};
use virtio_vsock::packet::VsockPacket;
use virtio_vsock::packet::{VsockPacket, PKT_HEADER_SIZE};
use vm_memory::bitmap::BitmapSlice;
use crate::{
rxops::*,
vhu_vsock::{
ConnMapKey, Error, Result, VSOCK_HOST_CID, VSOCK_OP_REQUEST, VSOCK_OP_RST,
CidMap, ConnMapKey, Error, Result, VSOCK_HOST_CID, VSOCK_OP_REQUEST, VSOCK_OP_RST,
VSOCK_TYPE_STREAM,
},
vhu_vsock_thread::VhostUserVsockThread,
vsock_conn::*,
};
pub(crate) type RawPktsQ = VecDeque<RawVsockPacket>;
pub(crate) struct RawVsockPacket {
pub header: [u8; PKT_HEADER_SIZE],
pub data: Vec<u8>,
}
impl RawVsockPacket {
fn from_vsock_packet<B: BitmapSlice>(pkt: &VsockPacket<B>) -> Result<Self> {
let mut raw_pkt = Self {
header: [0; PKT_HEADER_SIZE],
data: vec![0; pkt.len() as usize],
};
pkt.header_slice().copy_to(&mut raw_pkt.header);
if !pkt.is_empty() {
pkt.data_slice()
.ok_or(Error::PktBufMissing)?
.copy_to(raw_pkt.data.as_mut());
}
Ok(raw_pkt)
}
}
pub(crate) struct VsockThreadBackend {
/// Map of ConnMapKey objects indexed by raw file descriptors.
pub listener_map: HashMap<RawFd, ConnMapKey>,
@ -35,13 +62,29 @@ pub(crate) struct VsockThreadBackend {
host_socket_path: String,
/// epoll for registering new host-side connections.
epoll_fd: i32,
/// CID of the guest.
guest_cid: u64,
/// Set of allocated local ports.
pub local_port_set: HashSet<u32>,
tx_buffer_size: u32,
/// Maps the guest CID to the corresponding backend. Used for sibling VM communication.
pub cid_map: Arc<RwLock<CidMap>>,
/// Queue of raw vsock packets recieved from sibling VMs to be sent to the guest.
pub raw_pkts_queue: Arc<RwLock<RawPktsQ>>,
/// Set of groups assigned to the device which it is allowed to communicate with.
groups_set: Arc<RwLock<HashSet<String>>>,
}
impl VsockThreadBackend {
/// New instance of VsockThreadBackend.
pub fn new(host_socket_path: String, epoll_fd: i32) -> Self {
pub fn new(
host_socket_path: String,
epoll_fd: i32,
guest_cid: u64,
tx_buffer_size: u32,
groups_set: Arc<RwLock<HashSet<String>>>,
cid_map: Arc<RwLock<CidMap>>,
) -> Self {
Self {
listener_map: HashMap::new(),
conn_map: HashMap::new(),
@ -51,7 +94,12 @@ impl VsockThreadBackend {
stream_map: HashMap::new(),
host_socket_path,
epoll_fd,
guest_cid,
local_port_set: HashSet::new(),
tx_buffer_size,
cid_map,
raw_pkts_queue: Arc::new(RwLock::new(VecDeque::new())),
groups_set,
}
}
@ -60,6 +108,11 @@ impl VsockThreadBackend {
!self.backend_rxq.is_empty()
}
/// Checks if there are pending raw vsock packets to be sent to the guest.
pub fn pending_raw_pkts(&self) -> bool {
!self.raw_pkts_queue.read().unwrap().is_empty()
}
/// Deliver a vsock packet to the guest vsock driver.
///
/// Returns:
@ -120,7 +173,45 @@ impl VsockThreadBackend {
/// Returns:
/// - always `Ok(())` if packet has been consumed correctly
pub fn send_pkt<B: BitmapSlice>(&mut self, pkt: &VsockPacket<B>) -> Result<()> {
let key = ConnMapKey::new(pkt.dst_port(), pkt.src_port());
if pkt.src_cid() != self.guest_cid {
warn!(
"vsock: dropping packet with inconsistent src_cid: {:?} from guest configured with CID: {:?}",
pkt.src_cid(), self.guest_cid
);
return Ok(());
}
let dst_cid = pkt.dst_cid();
if dst_cid != VSOCK_HOST_CID {
let cid_map = self.cid_map.read().unwrap();
if cid_map.contains_key(&dst_cid) {
let (sibling_raw_pkts_queue, sibling_groups_set, sibling_event_fd) =
cid_map.get(&dst_cid).unwrap();
if self
.groups_set
.read()
.unwrap()
.is_disjoint(sibling_groups_set.read().unwrap().deref())
{
info!(
"vsock: dropping packet for cid: {:?} due to group mismatch",
dst_cid
);
return Ok(());
}
sibling_raw_pkts_queue
.write()
.unwrap()
.push_back(RawVsockPacket::from_vsock_packet(pkt)?);
let _ = sibling_event_fd.write(1);
} else {
warn!("vsock: dropping packet for unknown cid: {:?}", dst_cid);
}
return Ok(());
}
// TODO: Rst if packet has unsupported type
if pkt.type_() != VSOCK_TYPE_STREAM {
@ -128,15 +219,7 @@ impl VsockThreadBackend {
return Ok(());
}
// TODO: Handle packets to other CIDs as well
if pkt.dst_cid() != VSOCK_HOST_CID {
info!(
"vsock: dropping packet for cid other than host: {:?}",
pkt.dst_cid()
);
return Ok(());
}
let key = ConnMapKey::new(pkt.dst_port(), pkt.src_port());
// TODO: Handle cases where connection does not exist and packet op
// is not VSOCK_OP_REQUEST
@ -183,6 +266,28 @@ impl VsockThreadBackend {
Ok(())
}
/// Deliver a raw vsock packet sent from a sibling VM to the guest vsock driver.
///
/// Returns:
/// - `Ok(())` if packet was successfully filled in
/// - `Err(Error::EmptyRawPktsQueue)` if there was no available data
pub fn recv_raw_pkt<B: BitmapSlice>(&mut self, pkt: &mut VsockPacket<B>) -> Result<()> {
let raw_vsock_pkt = self
.raw_pkts_queue
.write()
.unwrap()
.pop_front()
.ok_or(Error::EmptyRawPktsQueue)?;
pkt.set_header_from_raw(&raw_vsock_pkt.header).unwrap();
if !raw_vsock_pkt.data.is_empty() {
let buf = pkt.data_slice().ok_or(Error::PktBufMissing)?;
buf.copy_from(&raw_vsock_pkt.data);
}
Ok(())
}
/// Handle a new guest initiated connection, i.e from the peer, the guest driver.
///
/// Attempts to connect to a host side unix socket listening on a path
@ -204,30 +309,26 @@ impl VsockThreadBackend {
stream: UnixStream,
pkt: &VsockPacket<B>,
) -> Result<()> {
let stream_fd = stream.as_raw_fd();
self.listener_map
.insert(stream_fd, ConnMapKey::new(pkt.dst_port(), pkt.src_port()));
let conn = VsockConnection::new_peer_init(
stream,
stream.try_clone().map_err(Error::UnixConnect)?,
pkt.dst_cid(),
pkt.dst_port(),
pkt.src_cid(),
pkt.src_port(),
self.epoll_fd,
pkt.buf_alloc(),
self.tx_buffer_size,
);
let stream_fd = conn.stream.as_raw_fd();
self.listener_map
.insert(stream_fd, ConnMapKey::new(pkt.dst_port(), pkt.src_port()));
self.conn_map
.insert(ConnMapKey::new(pkt.dst_port(), pkt.src_port()), conn);
self.backend_rxq
.push_back(ConnMapKey::new(pkt.dst_port(), pkt.src_port()));
self.stream_map.insert(
stream_fd,
// SAFETY: Safe as the file descriptor is guaranteed to be valid.
unsafe { UnixStream::from_raw_fd(stream_fd) },
);
self.stream_map.insert(stream_fd, stream);
self.local_port_set.insert(pkt.dst_port());
VhostUserVsockThread::epoll_register(
@ -248,25 +349,41 @@ impl VsockThreadBackend {
#[cfg(test)]
mod tests {
use super::*;
use crate::vhu_vsock::VSOCK_OP_RW;
use serial_test::serial;
use crate::vhu_vsock::{VhostUserVsockBackend, VsockConfig, VSOCK_OP_RW};
use std::os::unix::net::UnixListener;
use tempfile::tempdir;
use virtio_vsock::packet::{VsockPacket, PKT_HEADER_SIZE};
const DATA_LEN: usize = 16;
const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
const GROUP_NAME: &str = "default";
#[test]
#[serial]
fn test_vsock_thread_backend() {
const VSOCK_SOCKET_PATH: &str = "test_vsock_thread_backend.vsock";
const CID: u64 = 3;
const VSOCK_PEER_PORT: u32 = 1234;
const VSOCK_PEER_PATH: &str = "test_vsock_thread_backend.vsock_1234";
let _ = std::fs::remove_file(VSOCK_PEER_PATH);
let _listener = UnixListener::bind(VSOCK_PEER_PATH).unwrap();
let test_dir = tempdir().expect("Could not create a temp test directory.");
let vsock_socket_path = test_dir.path().join("test_vsock_thread_backend.vsock");
let vsock_peer_path = test_dir.path().join("test_vsock_thread_backend.vsock_1234");
let _listener = UnixListener::bind(&vsock_peer_path).unwrap();
let epoll_fd = epoll::create(false).unwrap();
let mut vtp = VsockThreadBackend::new(VSOCK_SOCKET_PATH.to_string(), epoll_fd);
let groups_set: HashSet<String> = vec![GROUP_NAME.to_string()].into_iter().collect();
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let mut vtp = VsockThreadBackend::new(
vsock_socket_path.display().to_string(),
epoll_fd,
CID,
CONN_TX_BUF_SIZE,
Arc::new(RwLock::new(groups_set)),
cid_map,
);
assert!(!vtp.pending_rx());
@ -286,6 +403,7 @@ mod tests {
packet.set_type(VSOCK_TYPE_STREAM);
assert!(vtp.send_pkt(&packet).is_ok());
packet.set_src_cid(CID);
packet.set_dst_cid(VSOCK_HOST_CID);
packet.set_dst_port(VSOCK_PEER_PORT);
assert!(vtp.send_pkt(&packet).is_ok());
@ -302,6 +420,125 @@ mod tests {
assert!(vtp.recv_pkt(&mut packet).is_ok());
// cleanup
let _ = std::fs::remove_file(VSOCK_PEER_PATH);
let _ = std::fs::remove_file(&vsock_peer_path);
let _ = std::fs::remove_file(&vsock_socket_path);
test_dir.close().unwrap();
}
#[test]
fn test_vsock_thread_backend_sibling_vms() {
const CID: u64 = 3;
const SIBLING_CID: u64 = 4;
const SIBLING_LISTENING_PORT: u32 = 1234;
let test_dir = tempdir().expect("Could not create a temp test directory.");
let vsock_socket_path = test_dir
.path()
.join("test_vsock_thread_backend.vsock")
.display()
.to_string();
let sibling_vhost_socket_path = test_dir
.path()
.join("test_vsock_thread_backend_sibling.socket")
.display()
.to_string();
let sibling_vsock_socket_path = test_dir
.path()
.join("test_vsock_thread_backend_sibling.vsock")
.display()
.to_string();
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let sibling_config = VsockConfig::new(
SIBLING_CID,
sibling_vhost_socket_path,
sibling_vsock_socket_path,
CONN_TX_BUF_SIZE,
vec!["group1", "group2", "group3"]
.into_iter()
.map(String::from)
.collect(),
);
let sibling_backend =
Arc::new(VhostUserVsockBackend::new(sibling_config, cid_map.clone()).unwrap());
let epoll_fd = epoll::create(false).unwrap();
let groups_set: HashSet<String> = vec!["groupA", "groupB", "group3"]
.into_iter()
.map(String::from)
.collect();
let mut vtp = VsockThreadBackend::new(
vsock_socket_path,
epoll_fd,
CID,
CONN_TX_BUF_SIZE,
Arc::new(RwLock::new(groups_set)),
cid_map,
);
assert!(!vtp.pending_raw_pkts());
let mut pkt_raw = [0u8; PKT_HEADER_SIZE + DATA_LEN];
let (hdr_raw, data_raw) = pkt_raw.split_at_mut(PKT_HEADER_SIZE);
// SAFETY: Safe as hdr_raw and data_raw are guaranteed to be valid.
let mut packet = unsafe { VsockPacket::new(hdr_raw, Some(data_raw)).unwrap() };
assert_eq!(
vtp.recv_raw_pkt(&mut packet).unwrap_err().to_string(),
Error::EmptyRawPktsQueue.to_string()
);
packet.set_type(VSOCK_TYPE_STREAM);
packet.set_src_cid(CID);
packet.set_dst_cid(SIBLING_CID);
packet.set_dst_port(SIBLING_LISTENING_PORT);
packet.set_op(VSOCK_OP_RW);
packet.set_len(DATA_LEN as u32);
packet
.data_slice()
.unwrap()
.copy_from(&[0xCAu8, 0xFEu8, 0xBAu8, 0xBEu8]);
assert!(vtp.send_pkt(&packet).is_ok());
assert!(sibling_backend.threads[0]
.lock()
.unwrap()
.thread_backend
.pending_raw_pkts());
let mut recvd_pkt_raw = [0u8; PKT_HEADER_SIZE + DATA_LEN];
let (recvd_hdr_raw, recvd_data_raw) = recvd_pkt_raw.split_at_mut(PKT_HEADER_SIZE);
let mut recvd_packet =
// SAFETY: Safe as recvd_hdr_raw and recvd_data_raw are guaranteed to be valid.
unsafe { VsockPacket::new(recvd_hdr_raw, Some(recvd_data_raw)).unwrap() };
assert!(sibling_backend.threads[0]
.lock()
.unwrap()
.thread_backend
.recv_raw_pkt(&mut recvd_packet)
.is_ok());
assert_eq!(recvd_packet.type_(), VSOCK_TYPE_STREAM);
assert_eq!(recvd_packet.src_cid(), CID);
assert_eq!(recvd_packet.dst_cid(), SIBLING_CID);
assert_eq!(recvd_packet.dst_port(), SIBLING_LISTENING_PORT);
assert_eq!(recvd_packet.op(), VSOCK_OP_RW);
assert_eq!(recvd_packet.len(), DATA_LEN as u32);
assert_eq!(recvd_data_raw[0], 0xCAu8);
assert_eq!(recvd_data_raw[1], 0xFEu8);
assert_eq!(recvd_data_raw[2], 0xBAu8);
assert_eq!(recvd_data_raw[3], 0xBEu8);
test_dir.close().unwrap();
}
}

View File

@ -4,7 +4,7 @@ use std::{io::Write, num::Wrapping};
use vm_memory::{bitmap::BitmapSlice, VolatileSlice};
use crate::vhu_vsock::{Error, Result, CONN_TX_BUF_SIZE};
use crate::vhu_vsock::{Error, Result};
#[derive(Debug)]
pub(crate) struct LocalTxBuf {
@ -18,14 +18,19 @@ pub(crate) struct LocalTxBuf {
impl LocalTxBuf {
/// Create a new instance of LocalTxBuf.
pub fn new() -> Self {
pub fn new(buf_size: u32) -> Self {
Self {
buf: vec![0; CONN_TX_BUF_SIZE as usize],
buf: vec![0; buf_size as usize],
head: Wrapping(0),
tail: Wrapping(0),
}
}
/// Get the buffer size
pub fn get_buf_size(&self) -> u32 {
self.buf.len() as u32
}
/// Check if the buf is empty.
pub fn is_empty(&self) -> bool {
self.len() == 0
@ -34,16 +39,16 @@ impl LocalTxBuf {
/// Add new data to the tx buffer, push all or none.
/// Returns LocalTxBufFull error if space not sufficient.
pub fn push<B: BitmapSlice>(&mut self, data_buf: &VolatileSlice<B>) -> Result<()> {
if CONN_TX_BUF_SIZE as usize - self.len() < data_buf.len() {
if self.get_buf_size() as usize - self.len() < data_buf.len() {
// Tx buffer is full
return Err(Error::LocalTxBufFull);
}
// Get index into buffer at which data can be inserted
let tail_idx = self.tail.0 as usize % CONN_TX_BUF_SIZE as usize;
let tail_idx = self.tail.0 as usize % self.get_buf_size() as usize;
// Check if we can fit the data buffer between head and end of buffer
let len = std::cmp::min(CONN_TX_BUF_SIZE as usize - tail_idx, data_buf.len());
let len = std::cmp::min(self.get_buf_size() as usize - tail_idx, data_buf.len());
let txbuf = &mut self.buf[tail_idx..tail_idx + len];
data_buf.copy_to(txbuf);
@ -67,10 +72,10 @@ impl LocalTxBuf {
}
// Get index into buffer from which data can be read
let head_idx = self.head.0 as usize % CONN_TX_BUF_SIZE as usize;
let head_idx = self.head.0 as usize % self.get_buf_size() as usize;
// First write from head to end of buffer
let len = std::cmp::min(CONN_TX_BUF_SIZE as usize - head_idx, self.len());
let len = std::cmp::min(self.get_buf_size() as usize - head_idx, self.len());
let written = stream
.write(&self.buf[head_idx..(head_idx + len)])
.map_err(Error::LocalTxBufFlush)?;
@ -97,9 +102,11 @@ impl LocalTxBuf {
mod tests {
use super::*;
const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
#[test]
fn test_txbuf_len() {
let mut loc_tx_buf = LocalTxBuf::new();
let mut loc_tx_buf = LocalTxBuf::new(CONN_TX_BUF_SIZE);
// Zero length tx buf
assert_eq!(loc_tx_buf.len(), 0);
@ -118,7 +125,7 @@ mod tests {
#[test]
fn test_txbuf_is_empty() {
let mut loc_tx_buf = LocalTxBuf::new();
let mut loc_tx_buf = LocalTxBuf::new(CONN_TX_BUF_SIZE);
// empty tx buffer
assert!(loc_tx_buf.is_empty());
@ -130,7 +137,7 @@ mod tests {
#[test]
fn test_txbuf_push() {
let mut loc_tx_buf = LocalTxBuf::new();
let mut loc_tx_buf = LocalTxBuf::new(CONN_TX_BUF_SIZE);
let mut buf = [0; CONN_TX_BUF_SIZE as usize];
// SAFETY: Safe as the buffer is guaranteed to be valid here.
let data = unsafe { VolatileSlice::new(buf.as_mut_ptr(), buf.len()) };
@ -168,7 +175,7 @@ mod tests {
#[test]
fn test_txbuf_flush_to() {
let mut loc_tx_buf = LocalTxBuf::new();
let mut loc_tx_buf = LocalTxBuf::new(CONN_TX_BUF_SIZE);
// data to be flushed
let mut buf = vec![1; CONN_TX_BUF_SIZE as usize];

View File

@ -1,14 +1,16 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use std::{
collections::{HashMap, HashSet},
io::{self, Result as IoResult},
sync::Mutex,
sync::{Arc, Mutex, RwLock},
u16, u32, u64, u8,
};
use log::warn;
use thiserror::Error as ThisError;
use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures};
use vhost_user_backend::{VhostUserBackendMut, VringRwLock};
use vhost_user_backend::{VhostUserBackend, VringRwLock};
use virtio_bindings::bindings::{
virtio_config::VIRTIO_F_NOTIFY_ON_EMPTY, virtio_config::VIRTIO_F_VERSION_1,
virtio_ring::VIRTIO_RING_F_EVENT_IDX,
@ -19,9 +21,13 @@ use vmm_sys_util::{
eventfd::{EventFd, EFD_NONBLOCK},
};
use crate::thread_backend::RawPktsQ;
use crate::vhu_vsock_thread::*;
const NUM_QUEUES: usize = 2;
pub(crate) type CidMap =
HashMap<u64, (Arc<RwLock<RawPktsQ>>, Arc<RwLock<HashSet<String>>>, EventFd)>;
const NUM_QUEUES: usize = 3;
const QUEUE_SIZE: usize = 256;
// New descriptors pending on the rx queue
@ -32,11 +38,12 @@ const TX_QUEUE_EVENT: u16 = 1;
const EVT_QUEUE_EVENT: u16 = 2;
/// Notification coming from the backend.
pub(crate) const BACKEND_EVENT: u16 = 3;
/// Event range [0...num_queues] is reserved for queues and exit event.
/// So NUM_QUEUES + 1 is used.
pub(crate) const BACKEND_EVENT: u16 = (NUM_QUEUES + 1) as u16;
/// Vsock connection TX buffer capacity
/// TODO: Make this value configurable
pub(crate) const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
/// Notification coming from the sibling VM.
pub(crate) const SIBLING_VM_EVENT: u16 = BACKEND_EVENT + 1;
/// CID of the host
pub(crate) const VSOCK_HOST_CID: u64 = 2;
@ -108,8 +115,6 @@ pub(crate) enum Error {
IterateQueue,
#[error("No rx request available")]
NoRequestRx,
#[error("Unable to create thread pool")]
CreateThreadPool(std::io::Error),
#[error("Packet missing data buffer")]
PktBufMissing,
#[error("Failed to connect to unix socket")]
@ -126,6 +131,10 @@ pub(crate) enum Error {
EmptyBackendRxQ,
#[error("Failed to create an EventFd")]
EventFdCreate(std::io::Error),
#[error("Raw vsock packets queue is empty")]
EmptyRawPktsQueue,
#[error("CID already in use by another vsock device")]
CidAlreadyInUse,
}
impl std::convert::From<Error> for std::io::Error {
@ -141,16 +150,26 @@ pub(crate) struct VsockConfig {
guest_cid: u64,
socket: String,
uds_path: String,
tx_buffer_size: u32,
groups: Vec<String>,
}
impl VsockConfig {
/// Create a new instance of the VsockConfig struct, containing the
/// parameters to be fed into the vsock-backend server.
pub fn new(guest_cid: u64, socket: String, uds_path: String) -> Self {
pub fn new(
guest_cid: u64,
socket: String,
uds_path: String,
tx_buffer_size: u32,
groups: Vec<String>,
) -> Self {
Self {
guest_cid,
socket,
uds_path,
tx_buffer_size,
groups,
}
}
@ -170,6 +189,14 @@ impl VsockConfig {
pub fn get_socket_path(&self) -> String {
String::from(&self.socket)
}
pub fn get_tx_buffer_size(&self) -> u32 {
self.tx_buffer_size
}
pub fn get_groups(&self) -> Vec<String> {
self.groups.clone()
}
}
/// A local port and peer port pair used to retrieve
@ -208,10 +235,13 @@ pub(crate) struct VhostUserVsockBackend {
}
impl VhostUserVsockBackend {
pub fn new(config: VsockConfig) -> Result<Self> {
pub fn new(config: VsockConfig, cid_map: Arc<RwLock<CidMap>>) -> Result<Self> {
let thread = Mutex::new(VhostUserVsockThread::new(
config.get_uds_path(),
config.get_guest_cid(),
config.get_tx_buffer_size(),
config.get_groups(),
cid_map,
)?);
let queues_per_thread = vec![QUEUE_MASK];
@ -226,7 +256,7 @@ impl VhostUserVsockBackend {
}
}
impl VhostUserBackendMut<VringRwLock, ()> for VhostUserVsockBackend {
impl VhostUserBackend<VringRwLock, ()> for VhostUserVsockBackend {
fn num_queues(&self) -> usize {
NUM_QUEUES
}
@ -246,13 +276,13 @@ impl VhostUserBackendMut<VringRwLock, ()> for VhostUserVsockBackend {
VhostUserProtocolFeatures::CONFIG
}
fn set_event_idx(&mut self, enabled: bool) {
fn set_event_idx(&self, enabled: bool) {
for thread in self.threads.iter() {
thread.lock().unwrap().event_idx = enabled;
}
}
fn update_memory(&mut self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()> {
fn update_memory(&self, atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>) -> IoResult<()> {
for thread in self.threads.iter() {
thread.lock().unwrap().mem = Some(atomic_mem.clone());
}
@ -260,7 +290,7 @@ impl VhostUserBackendMut<VringRwLock, ()> for VhostUserVsockBackend {
}
fn handle_event(
&mut self,
&self,
device_event: u16,
evset: EventSet,
vrings: &[VringRwLock],
@ -281,17 +311,31 @@ impl VhostUserBackendMut<VringRwLock, ()> for VhostUserVsockBackend {
TX_QUEUE_EVENT => {
thread.process_tx(vring_tx, evt_idx)?;
}
EVT_QUEUE_EVENT => {}
EVT_QUEUE_EVENT => {
warn!("Received an unexpected EVT_QUEUE_EVENT");
}
BACKEND_EVENT => {
thread.process_backend_evt(evset);
thread.process_tx(vring_tx, evt_idx)?;
if let Err(e) = thread.process_tx(vring_tx, evt_idx) {
match e {
Error::NoMemoryConfigured => {
warn!("Received a backend event before vring initialization")
}
_ => return Err(e.into()),
}
}
}
SIBLING_VM_EVENT => {
let _ = thread.sibling_event_fd.read();
thread.process_raw_pkts(vring_rx, evt_idx)?;
return Ok(false);
}
_ => {
return Err(Error::HandleUnknownEvent.into());
}
}
if device_event != EVT_QUEUE_EVENT && thread.thread_backend.pending_rx() {
if device_event != EVT_QUEUE_EVENT {
thread.process_rx(vring_rx, evt_idx)?;
}
@ -323,28 +367,46 @@ impl VhostUserBackendMut<VringRwLock, ()> for VhostUserVsockBackend {
#[cfg(test)]
mod tests {
use super::*;
use serial_test::serial;
use std::convert::TryInto;
use tempfile::tempdir;
use vhost_user_backend::VringT;
use vm_memory::GuestAddress;
const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
#[test]
#[serial]
fn test_vsock_backend() {
const CID: u64 = 3;
const VHOST_SOCKET_PATH: &str = "test_vsock_backend.socket";
const VSOCK_SOCKET_PATH: &str = "test_vsock_backend.vsock";
let groups_list: Vec<String> = vec![String::from("default")];
let test_dir = tempdir().expect("Could not create a temp test directory.");
let vhost_socket_path = test_dir
.path()
.join("test_vsock_backend.socket")
.display()
.to_string();
let vsock_socket_path = test_dir
.path()
.join("test_vsock_backend.vsock")
.display()
.to_string();
let config = VsockConfig::new(
CID,
VHOST_SOCKET_PATH.to_string(),
VSOCK_SOCKET_PATH.to_string(),
vhost_socket_path.to_string(),
vsock_socket_path.to_string(),
CONN_TX_BUF_SIZE,
groups_list,
);
let backend = VhostUserVsockBackend::new(config);
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let backend = VhostUserVsockBackend::new(config, cid_map);
assert!(backend.is_ok());
let mut backend = backend.unwrap();
let backend = backend.unwrap();
assert_eq!(backend.num_queues(), NUM_QUEUES);
assert_eq!(backend.max_queue_size(), QUEUE_SIZE);
@ -396,33 +458,53 @@ mod tests {
assert!(!ret.unwrap());
// cleanup
let _ = std::fs::remove_file(VHOST_SOCKET_PATH);
let _ = std::fs::remove_file(VSOCK_SOCKET_PATH);
let _ = std::fs::remove_file(vhost_socket_path);
let _ = std::fs::remove_file(vsock_socket_path);
test_dir.close().unwrap();
}
#[test]
#[serial]
fn test_vsock_backend_failures() {
const CID: u64 = 3;
const VHOST_SOCKET_PATH: &str = "test_vsock_backend_failures.socket";
const VSOCK_SOCKET_PATH: &str = "test_vsock_backend_failures.vsock";
let groups: Vec<String> = vec![String::from("default")];
let test_dir = tempdir().expect("Could not create a temp test directory.");
let vhost_socket_path = test_dir
.path()
.join("test_vsock_backend_failures.socket")
.display()
.to_string();
let vsock_socket_path = test_dir
.path()
.join("test_vsock_backend_failures.vsock")
.display()
.to_string();
let config = VsockConfig::new(
CID,
"/sys/not_allowed.socket".to_string(),
"/sys/not_allowed.vsock".to_string(),
CONN_TX_BUF_SIZE,
groups.clone(),
);
let backend = VhostUserVsockBackend::new(config);
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let backend = VhostUserVsockBackend::new(config, cid_map.clone());
assert!(backend.is_err());
let config = VsockConfig::new(
CID,
VHOST_SOCKET_PATH.to_string(),
VSOCK_SOCKET_PATH.to_string(),
vhost_socket_path.to_string(),
vsock_socket_path.to_string(),
CONN_TX_BUF_SIZE,
groups,
);
let mut backend = VhostUserVsockBackend::new(config).unwrap();
let backend = VhostUserVsockBackend::new(config, cid_map).unwrap();
let mem = GuestMemoryAtomic::new(
GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(),
);
@ -446,14 +528,16 @@ mod tests {
);
assert_eq!(
backend
.handle_event(BACKEND_EVENT + 1, EventSet::IN, &vrings, 0)
.handle_event(SIBLING_VM_EVENT + 1, EventSet::IN, &vrings, 0)
.unwrap_err()
.to_string(),
Error::HandleUnknownEvent.to_string()
);
// cleanup
let _ = std::fs::remove_file(VHOST_SOCKET_PATH);
let _ = std::fs::remove_file(VSOCK_SOCKET_PATH);
let _ = std::fs::remove_file(vhost_socket_path);
let _ = std::fs::remove_file(vsock_socket_path);
test_dir.close().unwrap();
}
}

View File

@ -1,37 +1,56 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
use std::{
collections::HashSet,
fs::File,
io,
io::Read,
iter::FromIterator,
num::Wrapping,
ops::Deref,
os::unix::{
net::{UnixListener, UnixStream},
prelude::{AsRawFd, FromRawFd, RawFd},
},
sync::{Arc, RwLock},
sync::mpsc::Sender,
sync::{mpsc, Arc, RwLock},
thread,
};
use futures::executor::{ThreadPool, ThreadPoolBuilder};
use log::warn;
use vhost_user_backend::{VringEpollHandler, VringRwLock, VringT};
use virtio_queue::QueueOwnedT;
use virtio_vsock::packet::{VsockPacket, PKT_HEADER_SIZE};
use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap};
use vmm_sys_util::epoll::EventSet;
use vmm_sys_util::{
epoll::EventSet,
eventfd::{EventFd, EFD_NONBLOCK},
};
use crate::{
rxops::*,
thread_backend::*,
vhu_vsock::{
ConnMapKey, Error, Result, VhostUserVsockBackend, BACKEND_EVENT, CONN_TX_BUF_SIZE,
CidMap, ConnMapKey, Error, Result, VhostUserVsockBackend, BACKEND_EVENT, SIBLING_VM_EVENT,
VSOCK_HOST_CID,
},
vsock_conn::*,
};
type ArcVhostBknd = Arc<RwLock<VhostUserVsockBackend>>;
type ArcVhostBknd = Arc<VhostUserVsockBackend>;
enum RxQueueType {
Standard,
RawPkts,
}
// Data which is required by a worker handling event idx.
struct EventData {
vring: VringRwLock,
event_idx: bool,
head_idx: u16,
used_len: usize,
}
pub(crate) struct VhostUserVsockThread {
/// Guest memory map.
@ -52,15 +71,29 @@ pub(crate) struct VhostUserVsockThread {
pub thread_backend: VsockThreadBackend,
/// CID of the guest.
guest_cid: u64,
/// Thread pool to handle event idx.
pool: ThreadPool,
/// Channel to a worker which handles event idx.
sender: Sender<EventData>,
/// host side port on which application listens.
local_port: Wrapping<u32>,
/// The tx buffer size
tx_buffer_size: u32,
/// EventFd to notify this thread for custom events. Currently used to notify
/// this thread to process raw vsock packets sent from a sibling VM.
pub sibling_event_fd: EventFd,
/// Keeps track of which RX queue was processed first in the last iteration.
/// Used to alternate between the RX queues to prevent the starvation of one by the other.
last_processed: RxQueueType,
}
impl VhostUserVsockThread {
/// Create a new instance of VhostUserVsockThread.
pub fn new(uds_path: String, guest_cid: u64) -> Result<Self> {
pub fn new(
uds_path: String,
guest_cid: u64,
tx_buffer_size: u32,
groups: Vec<String>,
cid_map: Arc<RwLock<CidMap>>,
) -> Result<Self> {
// TODO: better error handling, maybe add a param to force the unlink
let _ = std::fs::remove_file(uds_path.clone());
let host_sock = UnixListener::bind(&uds_path)
@ -73,21 +106,60 @@ impl VhostUserVsockThread {
let host_raw_fd = host_sock.as_raw_fd();
let mut groups = groups;
let groups_set: Arc<RwLock<HashSet<String>>> =
Arc::new(RwLock::new(HashSet::from_iter(groups.drain(..))));
let sibling_event_fd = EventFd::new(EFD_NONBLOCK).map_err(Error::EventFdCreate)?;
let thread_backend = VsockThreadBackend::new(
uds_path.clone(),
epoll_fd,
guest_cid,
tx_buffer_size,
groups_set.clone(),
cid_map.clone(),
);
{
let mut cid_map = cid_map.write().unwrap();
if cid_map.contains_key(&guest_cid) {
return Err(Error::CidAlreadyInUse);
}
cid_map.insert(
guest_cid,
(
thread_backend.raw_pkts_queue.clone(),
groups_set,
sibling_event_fd.try_clone().unwrap(),
),
);
}
let (sender, receiver) = mpsc::channel::<EventData>();
thread::spawn(move || loop {
// TODO: Understand why doing the following in the background thread works.
// maybe we'd better have thread pool for the entire application if necessary.
let Ok(event_data) = receiver.recv() else {
break;
};
Self::vring_handle_event(event_data);
});
let thread = VhostUserVsockThread {
mem: None,
event_idx: false,
host_sock: host_sock.as_raw_fd(),
host_sock_path: uds_path.clone(),
host_sock_path: uds_path,
host_listener: host_sock,
vring_worker: None,
epoll_file,
thread_backend: VsockThreadBackend::new(uds_path, epoll_fd),
thread_backend,
guest_cid,
pool: ThreadPoolBuilder::new()
.pool_size(1)
.create()
.map_err(Error::CreateThreadPool)?,
sender,
local_port: Wrapping(0),
tx_buffer_size,
sibling_event_fd,
last_processed: RxQueueType::Standard,
};
VhostUserVsockThread::epoll_register(epoll_fd, host_raw_fd, epoll::Events::EPOLLIN)?;
@ -95,6 +167,37 @@ impl VhostUserVsockThread {
Ok(thread)
}
fn vring_handle_event(event_data: EventData) {
if event_data.event_idx {
if event_data
.vring
.add_used(event_data.head_idx, event_data.used_len as u32)
.is_err()
{
warn!("Could not return used descriptors to ring");
}
match event_data.vring.needs_notification() {
Err(_) => {
warn!("Could not check if queue needs to be notified");
event_data.vring.signal_used_queue().unwrap();
}
Ok(needs_notification) => {
if needs_notification {
event_data.vring.signal_used_queue().unwrap();
}
}
}
} else {
if event_data
.vring
.add_used(event_data.head_idx, event_data.used_len as u32)
.is_err()
{
warn!("Could not return used descriptors to ring");
}
event_data.vring.signal_used_queue().unwrap();
}
}
/// Register a file with an epoll to listen for events in evset.
pub fn epoll_register(epoll_fd: RawFd, fd: RawFd, evset: epoll::Events) -> Result<()> {
epoll::ctl(
@ -150,6 +253,15 @@ impl VhostUserVsockThread {
.unwrap()
.register_listener(self.get_epoll_fd(), EventSet::IN, u64::from(BACKEND_EVENT))
.unwrap();
self.vring_worker
.as_ref()
.unwrap()
.register_listener(
self.sibling_event_fd.as_raw_fd(),
EventSet::IN,
u64::from(SIBLING_VM_EVENT),
)
.unwrap();
}
/// Process a BACKEND_EVENT received by VhostUserVsockBackend.
@ -181,10 +293,9 @@ impl VhostUserVsockThread {
fn handle_event(&mut self, fd: RawFd, evset: epoll::Events) {
if fd == self.host_sock {
// This is a new connection initiated by an application running on the host
self.host_listener
.accept()
.map_err(Error::UnixAccept)
.and_then(|(stream, _)| {
let conn = self.host_listener.accept().map_err(Error::UnixAccept);
if self.mem.is_some() {
conn.and_then(|(stream, _)| {
stream
.set_nonblocking(true)
.map(|_| stream)
@ -194,6 +305,13 @@ impl VhostUserVsockThread {
.unwrap_or_else(|err| {
warn!("Unable to accept new local connection: {:?}", err);
});
} else {
// If we aren't ready to process requests, accept and immediately close
// the connection.
conn.map(drop).unwrap_or_else(|err| {
warn!("Error closing an incoming connection: {:?}", err);
});
}
} else {
// Check if the stream represented by fd has already established a
// connection with the application running in the guest
@ -201,7 +319,7 @@ impl VhostUserVsockThread {
self.thread_backend.listener_map.entry(fd)
{
// New connection from the host
if evset != epoll::Events::EPOLLIN {
if evset.bits() != epoll::Events::EPOLLIN.bits() {
// Has to be EPOLLIN as it was not connected previously
return;
}
@ -246,6 +364,7 @@ impl VhostUserVsockThread {
self.guest_cid,
peer_port,
self.get_epoll_fd(),
self.tx_buffer_size,
);
new_conn.rx_queue.enqueue(RxOps::Request);
new_conn.set_peer_port(peer_port);
@ -269,7 +388,7 @@ impl VhostUserVsockThread {
let key = self.thread_backend.listener_map.get(&fd).unwrap();
let conn = self.thread_backend.conn_map.get_mut(key).unwrap();
if evset == epoll::Events::EPOLLOUT {
if evset.bits() == epoll::Events::EPOLLOUT.bits() {
// Flush any remaining data from the tx buffer
match conn.tx_buf.flush_to(&mut conn.stream) {
Ok(cnt) => {
@ -381,7 +500,11 @@ impl VhostUserVsockThread {
}
/// Iterate over the rx queue and process rx requests.
fn process_rx_queue(&mut self, vring: &VringRwLock) -> Result<bool> {
fn process_rx_queue(
&mut self,
vring: &VringRwLock,
rx_queue_type: RxQueueType,
) -> Result<bool> {
let mut used_any = false;
let atomic_mem = match &self.mem {
Some(m) => m,
@ -404,10 +527,15 @@ impl VhostUserVsockThread {
let used_len = match VsockPacket::from_rx_virtq_chain(
mem.deref(),
&mut avail_desc,
CONN_TX_BUF_SIZE,
self.tx_buffer_size,
) {
Ok(mut pkt) => {
if self.thread_backend.recv_pkt(&mut pkt).is_ok() {
let recv_result = match rx_queue_type {
RxQueueType::Standard => self.thread_backend.recv_pkt(&mut pkt),
RxQueueType::RawPkts => self.thread_backend.recv_raw_pkt(&mut pkt),
};
if recv_result.is_ok() {
PKT_HEADER_SIZE + pkt.len() as usize
} else {
queue.iter(mem).unwrap().go_to_previous_position();
@ -422,41 +550,33 @@ impl VhostUserVsockThread {
let vring = vring.clone();
let event_idx = self.event_idx;
self.sender
.send(EventData {
vring,
event_idx,
head_idx,
used_len,
})
.unwrap();
self.pool.spawn_ok(async move {
// TODO: Understand why doing the following in the pool works
if event_idx {
if vring.add_used(head_idx, used_len as u32).is_err() {
warn!("Could not return used descriptors to ring");
match rx_queue_type {
RxQueueType::Standard => {
if !self.thread_backend.pending_rx() {
break;
}
}
RxQueueType::RawPkts => {
if !self.thread_backend.pending_raw_pkts() {
break;
}
match vring.needs_notification() {
Err(_) => {
warn!("Could not check if queue needs to be notified");
vring.signal_used_queue().unwrap();
}
Ok(needs_notification) => {
if needs_notification {
vring.signal_used_queue().unwrap();
}
}
}
} else {
if vring.add_used(head_idx, used_len as u32).is_err() {
warn!("Could not return used descriptors to ring");
}
vring.signal_used_queue().unwrap();
}
});
if !self.thread_backend.pending_rx() {
break;
}
}
Ok(used_any)
}
/// Wrapper to process rx queue based on whether event idx is enabled or not.
pub fn process_rx(&mut self, vring: &VringRwLock, event_idx: bool) -> Result<bool> {
fn process_unix_sockets(&mut self, vring: &VringRwLock, event_idx: bool) -> Result<bool> {
if event_idx {
// To properly handle EVENT_IDX we need to keep calling
// process_rx_queue until it stops finding new requests
@ -468,13 +588,57 @@ impl VhostUserVsockThread {
}
vring.disable_notification().unwrap();
self.process_rx_queue(vring)?;
self.process_rx_queue(vring, RxQueueType::Standard)?;
if !vring.enable_notification().unwrap() {
break;
}
}
} else {
self.process_rx_queue(vring)?;
self.process_rx_queue(vring, RxQueueType::Standard)?;
}
Ok(false)
}
/// Wrapper to process raw vsock packets queue based on whether event idx is enabled or not.
pub fn process_raw_pkts(&mut self, vring: &VringRwLock, event_idx: bool) -> Result<bool> {
if event_idx {
loop {
if !self.thread_backend.pending_raw_pkts() {
break;
}
vring.disable_notification().unwrap();
self.process_rx_queue(vring, RxQueueType::RawPkts)?;
if !vring.enable_notification().unwrap() {
break;
}
}
} else {
self.process_rx_queue(vring, RxQueueType::RawPkts)?;
}
Ok(false)
}
pub fn process_rx(&mut self, vring: &VringRwLock, event_idx: bool) -> Result<bool> {
match self.last_processed {
RxQueueType::Standard => {
if self.thread_backend.pending_raw_pkts() {
self.process_raw_pkts(vring, event_idx)?;
self.last_processed = RxQueueType::RawPkts;
}
if self.thread_backend.pending_rx() {
self.process_unix_sockets(vring, event_idx)?;
}
}
RxQueueType::RawPkts => {
if self.thread_backend.pending_rx() {
self.process_unix_sockets(vring, event_idx)?;
self.last_processed = RxQueueType::Standard;
}
if self.thread_backend.pending_raw_pkts() {
self.process_raw_pkts(vring, event_idx)?;
}
}
}
Ok(false)
}
@ -502,7 +666,7 @@ impl VhostUserVsockThread {
let pkt = match VsockPacket::from_tx_virtq_chain(
mem.deref(),
&mut avail_desc,
CONN_TX_BUF_SIZE,
self.tx_buffer_size,
) {
Ok(pkt) => pkt,
Err(e) => {
@ -526,30 +690,14 @@ impl VhostUserVsockThread {
let vring = vring.clone();
let event_idx = self.event_idx;
self.pool.spawn_ok(async move {
if event_idx {
if vring.add_used(head_idx, used_len as u32).is_err() {
warn!("Could not return used descriptors to ring");
}
match vring.needs_notification() {
Err(_) => {
warn!("Could not check if queue needs to be notified");
vring.signal_used_queue().unwrap();
}
Ok(needs_notification) => {
if needs_notification {
vring.signal_used_queue().unwrap();
}
}
}
} else {
if vring.add_used(head_idx, used_len as u32).is_err() {
warn!("Could not return used descriptors to ring");
}
vring.signal_used_queue().unwrap();
}
});
self.sender
.send(EventData {
vring,
event_idx,
head_idx,
used_len,
})
.unwrap();
}
Ok(used_any)
@ -579,15 +727,23 @@ impl VhostUserVsockThread {
impl Drop for VhostUserVsockThread {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.host_sock_path);
self.thread_backend
.cid_map
.write()
.unwrap()
.remove(&self.guest_cid);
}
}
#[cfg(test)]
mod tests {
use super::*;
use serial_test::serial;
use std::collections::HashMap;
use tempfile::tempdir;
use vm_memory::GuestAddress;
use vmm_sys_util::eventfd::EventFd;
const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
impl VhostUserVsockThread {
fn get_epoll_file(&self) -> &File {
&self.epoll_file
@ -595,9 +751,24 @@ mod tests {
}
#[test]
#[serial]
fn test_vsock_thread() {
let t = VhostUserVsockThread::new("test_vsock_thread.vsock".to_string(), 3);
let groups: Vec<String> = vec![String::from("default")];
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let test_dir = tempdir().expect("Could not create a temp test directory.");
let t = VhostUserVsockThread::new(
test_dir
.path()
.join("test_vsock_thread.vsock")
.display()
.to_string(),
3,
CONN_TX_BUF_SIZE,
groups,
cid_map,
);
assert!(t.is_ok());
let mut t = t.unwrap();
@ -647,16 +818,40 @@ mod tests {
dummy_fd.write(1).unwrap();
t.process_backend_evt(EventSet::empty());
test_dir.close().unwrap();
}
#[test]
#[serial]
fn test_vsock_thread_failures() {
let t = VhostUserVsockThread::new("/sys/not_allowed.vsock".to_string(), 3);
let groups: Vec<String> = vec![String::from("default")];
let cid_map: Arc<RwLock<CidMap>> = Arc::new(RwLock::new(HashMap::new()));
let test_dir = tempdir().expect("Could not create a temp test directory.");
let t = VhostUserVsockThread::new(
"/sys/not_allowed.vsock".to_string(),
3,
CONN_TX_BUF_SIZE,
groups.clone(),
cid_map.clone(),
);
assert!(t.is_err());
let mut t =
VhostUserVsockThread::new("test_vsock_thread_failures.vsock".to_string(), 3).unwrap();
let vsock_socket_path = test_dir
.path()
.join("test_vsock_thread_failures.vsock")
.display()
.to_string();
let mut t = VhostUserVsockThread::new(
vsock_socket_path,
3,
CONN_TX_BUF_SIZE,
groups.clone(),
cid_map.clone(),
)
.unwrap();
assert!(VhostUserVsockThread::epoll_register(-1, -1, epoll::Events::EPOLLIN).is_err());
assert!(VhostUserVsockThread::epoll_modify(-1, -1, epoll::Events::EPOLLIN).is_err());
assert!(VhostUserVsockThread::epoll_unregister(-1, -1).is_err());
@ -677,5 +872,17 @@ mod tests {
.push_back(ConnMapKey::new(0, 0));
assert!(t.process_rx(&vring, false).is_err());
assert!(t.process_rx(&vring, true).is_err());
// trying to use a CID that is already in use should fail
let vsock_socket_path2 = test_dir
.path()
.join("test_vsock_thread_failures2.vsock")
.display()
.to_string();
let t2 =
VhostUserVsockThread::new(vsock_socket_path2, 3, CONN_TX_BUF_SIZE, groups, cid_map);
assert!(t2.is_err());
test_dir.close().unwrap();
}
}

View File

@ -15,7 +15,7 @@ use crate::{
rxqueue::*,
txbuf::*,
vhu_vsock::{
Error, Result, CONN_TX_BUF_SIZE, VSOCK_FLAGS_SHUTDOWN_RCV, VSOCK_FLAGS_SHUTDOWN_SEND,
Error, Result, VSOCK_FLAGS_SHUTDOWN_RCV, VSOCK_FLAGS_SHUTDOWN_SEND,
VSOCK_OP_CREDIT_REQUEST, VSOCK_OP_CREDIT_UPDATE, VSOCK_OP_REQUEST, VSOCK_OP_RESPONSE,
VSOCK_OP_RST, VSOCK_OP_RW, VSOCK_OP_SHUTDOWN, VSOCK_TYPE_STREAM,
},
@ -52,6 +52,8 @@ pub(crate) struct VsockConnection<S> {
pub epoll_fd: RawFd,
/// Local tx buffer.
pub tx_buf: LocalTxBuf,
/// Local tx buffer size
tx_buffer_size: u32,
}
impl<S: AsRawFd + Read + Write> VsockConnection<S> {
@ -64,6 +66,7 @@ impl<S: AsRawFd + Read + Write> VsockConnection<S> {
guest_cid: u64,
guest_port: u32,
epoll_fd: RawFd,
tx_buffer_size: u32,
) -> Self {
Self {
stream,
@ -79,12 +82,14 @@ impl<S: AsRawFd + Read + Write> VsockConnection<S> {
peer_fwd_cnt: Wrapping(0),
rx_cnt: Wrapping(0),
epoll_fd,
tx_buf: LocalTxBuf::new(),
tx_buf: LocalTxBuf::new(tx_buffer_size),
tx_buffer_size,
}
}
/// Create a new vsock connection object for connections initiated by
/// an application running in the guest.
#[allow(clippy::too_many_arguments)]
pub fn new_peer_init(
stream: S,
local_cid: u64,
@ -93,6 +98,7 @@ impl<S: AsRawFd + Read + Write> VsockConnection<S> {
guest_port: u32,
epoll_fd: RawFd,
peer_buf_alloc: u32,
tx_buffer_size: u32,
) -> Self {
let mut rx_queue = RxQueue::new();
rx_queue.enqueue(RxOps::Response);
@ -110,7 +116,8 @@ impl<S: AsRawFd + Read + Write> VsockConnection<S> {
peer_fwd_cnt: Wrapping(0),
rx_cnt: Wrapping(0),
epoll_fd,
tx_buf: LocalTxBuf::new(),
tx_buf: LocalTxBuf::new(tx_buffer_size),
tx_buffer_size,
}
}
@ -305,8 +312,18 @@ impl<S: AsRawFd + Read + Write> VsockConnection<S> {
if written_count > 0 {
// Increment forwarded count by number of bytes written to the stream
self.fwd_cnt += Wrapping(written_count as u32);
// TODO: https://github.com/torvalds/linux/commit/c69e6eafff5f725bc29dcb8b52b6782dca8ea8a2
self.rx_queue.enqueue(RxOps::CreditUpdate);
// At what point in available credits should we send a credit update.
// This is set to 1/4th of the tx buffer size. If we keep it too low,
// we will end up sending too many credit updates. If we keep it too
// high, we will end up sending too few credit updates and cause stalls.
// Stalls are more bad than too many credit updates.
let free_space = self
.tx_buffer_size
.wrapping_sub((self.fwd_cnt - self.last_fwd_cnt).0);
if free_space < self.tx_buffer_size / 4 {
self.rx_queue.enqueue(RxOps::CreditUpdate);
}
}
if written_count != buf.len() {
@ -329,7 +346,7 @@ impl<S: AsRawFd + Read + Write> VsockConnection<S> {
.set_src_port(self.local_port)
.set_dst_port(self.peer_port)
.set_type(VSOCK_TYPE_STREAM)
.set_buf_alloc(CONN_TX_BUF_SIZE)
.set_buf_alloc(self.tx_buffer_size)
.set_fwd_cnt(self.fwd_cnt.0)
}
@ -352,7 +369,6 @@ mod tests {
use super::*;
use crate::vhu_vsock::{VSOCK_HOST_CID, VSOCK_OP_RW, VSOCK_TYPE_STREAM};
use serial_test::serial;
use std::io::Result as IoResult;
use std::ops::Deref;
use virtio_bindings::bindings::virtio_ring::{VRING_DESC_F_NEXT, VRING_DESC_F_WRITE};
@ -362,6 +378,8 @@ mod tests {
GuestMemoryMmap,
};
const CONN_TX_BUF_SIZE: u32 = 64 * 1024;
struct HeadParams {
head_len: usize,
data_len: u32,
@ -486,12 +504,18 @@ mod tests {
}
#[test]
#[serial]
fn test_vsock_conn_init() {
// new locally inititated connection
let dummy_file = VsockDummySocket::new();
let mut conn_local =
VsockConnection::new_local_init(dummy_file, VSOCK_HOST_CID, 5000, 3, 5001, -1);
let mut conn_local = VsockConnection::new_local_init(
dummy_file,
VSOCK_HOST_CID,
5000,
3,
5001,
-1,
CONN_TX_BUF_SIZE,
);
assert!(!conn_local.connect);
assert_eq!(conn_local.peer_port, 5001);
@ -506,8 +530,16 @@ mod tests {
// New connection initiated by the peer/guest
let dummy_file = VsockDummySocket::new();
let mut conn_peer =
VsockConnection::new_peer_init(dummy_file, VSOCK_HOST_CID, 5000, 3, 5001, -1, 65536);
let mut conn_peer = VsockConnection::new_peer_init(
dummy_file,
VSOCK_HOST_CID,
5000,
3,
5001,
-1,
65536,
CONN_TX_BUF_SIZE,
);
assert!(!conn_peer.connect);
assert_eq!(conn_peer.peer_port, 5001);
@ -520,12 +552,18 @@ mod tests {
}
#[test]
#[serial]
fn test_vsock_conn_credit() {
// new locally inititated connection
let dummy_file = VsockDummySocket::new();
let mut conn_local =
VsockConnection::new_local_init(dummy_file, VSOCK_HOST_CID, 5000, 3, 5001, -1);
let mut conn_local = VsockConnection::new_local_init(
dummy_file,
VSOCK_HOST_CID,
5000,
3,
5001,
-1,
CONN_TX_BUF_SIZE,
);
assert_eq!(conn_local.peer_avail_credit(), 0);
assert!(conn_local.need_credit_update_from_peer());
@ -544,15 +582,21 @@ mod tests {
}
#[test]
#[serial]
fn test_vsock_conn_init_pkt() {
// parameters for packet head construction
let head_params = HeadParams::new(PKT_HEADER_SIZE, 10);
// new locally inititated connection
let dummy_file = VsockDummySocket::new();
let conn_local =
VsockConnection::new_local_init(dummy_file, VSOCK_HOST_CID, 5000, 3, 5001, -1);
let conn_local = VsockConnection::new_local_init(
dummy_file,
VSOCK_HOST_CID,
5000,
3,
5001,
-1,
CONN_TX_BUF_SIZE,
);
// write only descriptor chain
let (mem, mut descr_chain) = prepare_desc_chain_vsock(true, &head_params, 2, 10);
@ -574,15 +618,21 @@ mod tests {
}
#[test]
#[serial]
fn test_vsock_conn_recv_pkt() {
// parameters for packet head construction
let head_params = HeadParams::new(PKT_HEADER_SIZE, 5);
// new locally inititated connection
let dummy_file = VsockDummySocket::new();
let mut conn_local =
VsockConnection::new_local_init(dummy_file, VSOCK_HOST_CID, 5000, 3, 5001, -1);
let mut conn_local = VsockConnection::new_local_init(
dummy_file,
VSOCK_HOST_CID,
5000,
3,
5001,
-1,
CONN_TX_BUF_SIZE,
);
// write only descriptor chain
let (mem, mut descr_chain) = prepare_desc_chain_vsock(true, &head_params, 1, 5);
@ -664,15 +714,21 @@ mod tests {
}
#[test]
#[serial]
fn test_vsock_conn_send_pkt() {
// parameters for packet head construction
let head_params = HeadParams::new(PKT_HEADER_SIZE, 5);
// new locally inititated connection
let dummy_file = VsockDummySocket::new();
let mut conn_local =
VsockConnection::new_local_init(dummy_file, VSOCK_HOST_CID, 5000, 3, 5001, -1);
let mut conn_local = VsockConnection::new_local_init(
dummy_file,
VSOCK_HOST_CID,
5000,
3,
5001,
-1,
CONN_TX_BUF_SIZE,
);
// write only descriptor chain
let (mem, mut descr_chain) = prepare_desc_chain_vsock(false, &head_params, 1, 5);

View File

@ -1,30 +0,0 @@
[package]
name = "vhost-user-vsock"
version = "0.1.0"
authors = ["Harshavardhan Unnibhavi <harshanavkis@gmail.com>"]
description = "A virtio-vsock device using the vhost-user protocol."
repository = "https://github.com/rust-vmm/vhost-device"
readme = "README.md"
keywords = ["vhost", "vsock"]
license = "Apache-2.0 OR BSD-3-Clause"
edition = "2018"
[dependencies]
byteorder = "1"
clap = { version = "4.1", features = ["derive"] }
env_logger = "0.10"
epoll = "4.3.1"
futures = { version = "0.3", features = ["thread-pool"] }
log = "0.4"
thiserror = "1.0"
vhost = { version = "0.6", features = ["vhost-user-slave"] }
vhost-user-backend = "0.8"
virtio-bindings = "0.2"
virtio-queue = "0.7"
virtio-vsock = "0.2.1"
vm-memory = "0.10"
vmm-sys-util = "0.11"
[dev-dependencies]
virtio-queue = { version = "0.7", features = ["test-utils"] }
serial_test = "1.0"

View File

@ -1,116 +0,0 @@
# vhost-user-vsock
## Design
The crate introduces a vhost-user-vsock device that enables communication between an
application running in the guest i.e inside a VM and an application running on the
host i.e outside the VM. The application running in the guest communicates over VM
sockets i.e over AF_VSOCK sockets. The application running on the host connects to a
unix socket on the host i.e communicates over AF_UNIX sockets. The main components of
the crate are split into various files as described below:
- [packet.rs](src/packet.rs)
- Introduces the **VsockPacket** structure that represents a single vsock packet
processing methods.
- [rxops.rs](src/rxops.rs)
- Introduces various vsock operations that are enqueued into the rxqueue to be sent to the
guest. Exposes a **RxOps** structure.
- [rxqueue.rs](src/rxqueue.rs)
- rxqueue contains the pending rx operations corresponding to that connection. The queue is
represented as a bitmap as we handle connection-oriented connections. The module contains
various queue manipulation methods. Exposes a **RxQueue** structure.
- [thread_backend.rs](src/thread_backend.rs)
- Multiplexes connections between host and guest and calls into per connection methods that
are responsible for processing data and packets corresponding to the connection. Exposes a
**VsockThreadBackend** structure.
- [txbuf.rs](src/txbuf.rs)
- Module to buffer data that is sent from the guest to the host. The module exposes a **LocalTxBuf**
structure.
- [vhost_user_vsock_thread.rs](src/vhost_user_vsock_thread.rs)
- Module exposes a **VhostUserVsockThread** structure. It also handles new host initiated
connections and provides interfaces for registering host connections with the epoll fd. Also
provides interfaces for iterating through the rx and tx queues.
- [vsock_conn.rs](src/vsock_conn.rs)
- Module introduces a **VsockConnection** structure that represents a single vsock connection
between the guest and the host. It also processes packets according to their type.
- [vhu_vsock.rs](src/lib.rs)
- exposes the main vhost user vsock backend interface.
## Usage
Run the vhost-user-vsock device:
```
vhost-user-vsock --guest-cid=<CID assigned to the guest> \
--socket=<path to the Unix socket to be created to communicate with the VMM via the vhost-user protocol>
--uds-path=<path to the Unix socket to communicate with the guest via the virtio-vsock device>
```
Run VMM (e.g. QEMU):
```
qemu-system-x86_64 \
<normal QEMU options> \
-object memory-backend-file,share=on,id=mem0,size=<Guest RAM size>,mem-path=<Guest RAM file path> \ # size == -m size
-machine <machine options>,memory-backend=mem0 \
-chardev socket,id=char0,reconnect=0,path=<vhost-user socket path> \
-device vhost-user-vsock-pci,chardev=char0
```
## Working example
```sh
shell1$ vhost-user-vsock --guest-cid=4 --uds-path=/tmp/vm4.vsock --socket=/tmp/vhost4.socket
```
```sh
shell2$ qemu-system-x86_64 \
-drive file=vm.qcow2,format=qcow2,if=virtio -smp 2 -m 512M -mem-prealloc \
-object memory-backend-file,share=on,id=mem0,size=512M,mem-path="/dev/hugepages" \
-machine q35,accel=kvm,memory-backend=mem0 \
-chardev socket,id=char0,reconnect=0,path=/tmp/vhost4.socket \
-device vhost-user-vsock-pci,chardev=char0
```
### Guest listening
#### iperf
```sh
# https://github.com/stefano-garzarella/iperf-vsock
guest$ iperf3 --vsock -s
host$ iperf3 --vsock -c /tmp/vm4.vsock
```
#### netcat
```sh
guest$ nc --vsock -l 1234
host$ nc -U /tmp/vm4.vsock
CONNECT 1234
```
### Host listening
#### iperf
```sh
# https://github.com/stefano-garzarella/iperf-vsock
host$ iperf3 --vsock -s -B /tmp/vm4.vsock
guest$ iperf3 --vsock -c 2
```
#### netcat
```sh
host$ nc -l -U /tmp/vm4.vsock_1234
guest$ nc --vsock 2 1234
```
## License
This project is licensed under either of
- [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0
- [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause)

View File

@ -1,161 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
mod rxops;
mod rxqueue;
mod thread_backend;
mod txbuf;
mod vhu_vsock;
mod vhu_vsock_thread;
mod vsock_conn;
use std::{
convert::TryFrom,
sync::{Arc, RwLock},
};
use clap::Parser;
use log::{info, warn};
use vhost::{vhost_user, vhost_user::Listener};
use vhost_user_backend::VhostUserDaemon;
use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap};
use crate::vhu_vsock::{Error, Result, VhostUserVsockBackend, VsockConfig};
#[derive(Parser, Debug)]
#[clap(version, about, long_about = None)]
struct VsockArgs {
/// Context identifier of the guest which uniquely identifies the device for its lifetime.
#[clap(long, default_value_t = 3)]
guest_cid: u64,
/// Unix socket to which a hypervisor connects to and sets up the control path with the device.
#[clap(long)]
socket: String,
/// Unix socket to which a host-side application connects to.
#[clap(long)]
uds_path: String,
}
impl TryFrom<VsockArgs> for VsockConfig {
type Error = Error;
fn try_from(cmd_args: VsockArgs) -> Result<Self> {
let socket = cmd_args.socket.trim().to_string();
let uds_path = cmd_args.uds_path.trim().to_string();
Ok(VsockConfig::new(cmd_args.guest_cid, socket, uds_path))
}
}
/// This is the public API through which an external program starts the
/// vhost-user-vsock backend server.
pub(crate) fn start_backend_server(config: VsockConfig) {
loop {
let backend = Arc::new(RwLock::new(
VhostUserVsockBackend::new(config.clone()).unwrap(),
));
let listener = Listener::new(config.get_socket_path(), true).unwrap();
let mut daemon = VhostUserDaemon::new(
String::from("vhost-user-vsock"),
backend.clone(),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.unwrap();
let mut vring_workers = daemon.get_epoll_handlers();
for thread in backend.read().unwrap().threads.iter() {
thread
.lock()
.unwrap()
.set_vring_worker(Some(vring_workers.remove(0)));
}
daemon.start(listener).unwrap();
match daemon.wait() {
Ok(()) => {
info!("Stopping cleanly");
}
Err(vhost_user_backend::Error::HandleRequest(vhost_user::Error::PartialMessage)) => {
info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug.");
}
Err(e) => {
warn!("Error running daemon: {:?}", e);
}
}
// No matter the result, we need to shut down the worker thread.
backend.read().unwrap().exit_event.write(1).unwrap();
}
}
fn main() {
env_logger::init();
let config = VsockConfig::try_from(VsockArgs::parse()).unwrap();
start_backend_server(config);
}
#[cfg(test)]
mod tests {
use super::*;
use serial_test::serial;
impl VsockArgs {
fn from_args(guest_cid: u64, socket: &str, uds_path: &str) -> Self {
VsockArgs {
guest_cid,
socket: socket.to_string(),
uds_path: uds_path.to_string(),
}
}
}
#[test]
#[serial]
fn test_vsock_config_setup() {
let args = VsockArgs::from_args(3, "/tmp/vhost4.socket", "/tmp/vm4.vsock");
let config = VsockConfig::try_from(args);
assert!(config.is_ok());
let config = config.unwrap();
assert_eq!(config.get_guest_cid(), 3);
assert_eq!(config.get_socket_path(), "/tmp/vhost4.socket");
assert_eq!(config.get_uds_path(), "/tmp/vm4.vsock");
}
#[test]
#[serial]
fn test_vsock_server() {
const CID: u64 = 3;
const VHOST_SOCKET_PATH: &str = "test_vsock_server.socket";
const VSOCK_SOCKET_PATH: &str = "test_vsock_server.vsock";
let config = VsockConfig::new(
CID,
VHOST_SOCKET_PATH.to_string(),
VSOCK_SOCKET_PATH.to_string(),
);
let backend = Arc::new(RwLock::new(VhostUserVsockBackend::new(config).unwrap()));
let daemon = VhostUserDaemon::new(
String::from("vhost-user-vsock"),
backend.clone(),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.unwrap();
let vring_workers = daemon.get_epoll_handlers();
// VhostUserVsockBackend support a single thread that handles the TX and RX queues
assert_eq!(backend.read().unwrap().threads.len(), 1);
assert_eq!(vring_workers.len(), backend.read().unwrap().threads.len());
}
}

@ -1 +1 @@
Subproject commit c2f8c93e3796d8b3ea7dc339fad211457be9c238
Subproject commit 9751aaa0d0706964b1d4a228509a86bc25ffc0e7