mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-28 00:19:36 +00:00

Implement a new virtual PCI driver based on the VFIO framework. This driver allows users to pass through PCI devices to UML via VFIO. Currently, only MSI-X capable devices are supported, and it is assumed that drivers will use MSI-X. Signed-off-by: Tiwei Bie <tiwei.btw@antgroup.com> Link: https://patch.msgid.link/20250413154421.517878-1-tiwei.btw@antgroup.com Signed-off-by: Johannes Berg <johannes.berg@intel.com>
328 lines
7.0 KiB
C
328 lines
7.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2025 Ant Group
|
|
* Author: Tiwei Bie <tiwei.btw@antgroup.com>
|
|
*/
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <stdio.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/eventfd.h>
|
|
#include <linux/limits.h>
|
|
#include <linux/vfio.h>
|
|
#include <linux/pci_regs.h>
|
|
#include <as-layout.h>
|
|
#include <um_malloc.h>
|
|
|
|
#include "vfio_user.h"
|
|
|
|
int uml_vfio_user_open_container(void)
|
|
{
|
|
int r, fd;
|
|
|
|
fd = open("/dev/vfio/vfio", O_RDWR);
|
|
if (fd < 0)
|
|
return -errno;
|
|
|
|
r = ioctl(fd, VFIO_GET_API_VERSION);
|
|
if (r != VFIO_API_VERSION) {
|
|
r = r < 0 ? -errno : -EINVAL;
|
|
goto error;
|
|
}
|
|
|
|
r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
|
|
if (r <= 0) {
|
|
r = r < 0 ? -errno : -EINVAL;
|
|
goto error;
|
|
}
|
|
|
|
return fd;
|
|
|
|
error:
|
|
close(fd);
|
|
return r;
|
|
}
|
|
|
|
int uml_vfio_user_setup_iommu(int container)
|
|
{
|
|
/*
|
|
* This is a bit tricky. See the big comment in
|
|
* vhost_user_set_mem_table() in virtio_uml.c.
|
|
*/
|
|
unsigned long reserved = uml_reserved - uml_physmem;
|
|
struct vfio_iommu_type1_dma_map dma_map = {
|
|
.argsz = sizeof(dma_map),
|
|
.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
|
|
.vaddr = uml_reserved,
|
|
.iova = reserved,
|
|
.size = physmem_size - reserved,
|
|
};
|
|
|
|
if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0)
|
|
return -errno;
|
|
|
|
if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0)
|
|
return -errno;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int uml_vfio_user_get_group_id(const char *device)
|
|
{
|
|
char *path, *buf, *end;
|
|
const char *name;
|
|
int r;
|
|
|
|
path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device);
|
|
|
|
buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL);
|
|
if (!buf) {
|
|
r = -ENOMEM;
|
|
goto free_path;
|
|
}
|
|
|
|
r = readlink(path, buf, PATH_MAX);
|
|
if (r < 0) {
|
|
r = -errno;
|
|
goto free_buf;
|
|
}
|
|
buf[r] = '\0';
|
|
|
|
name = basename(buf);
|
|
|
|
r = strtoul(name, &end, 10);
|
|
if (*end != '\0' || end == name) {
|
|
r = -EINVAL;
|
|
goto free_buf;
|
|
}
|
|
|
|
free_buf:
|
|
kfree(buf);
|
|
free_path:
|
|
kfree(path);
|
|
return r;
|
|
}
|
|
|
|
int uml_vfio_user_open_group(int group_id)
|
|
{
|
|
char *path;
|
|
int fd;
|
|
|
|
path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
|
|
if (!path)
|
|
return -ENOMEM;
|
|
|
|
sprintf(path, "/dev/vfio/%d", group_id);
|
|
|
|
fd = open(path, O_RDWR);
|
|
if (fd < 0) {
|
|
fd = -errno;
|
|
goto out;
|
|
}
|
|
|
|
out:
|
|
kfree(path);
|
|
return fd;
|
|
}
|
|
|
|
int uml_vfio_user_set_container(int container, int group)
|
|
{
|
|
if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0)
|
|
return -errno;
|
|
return 0;
|
|
}
|
|
|
|
int uml_vfio_user_unset_container(int container, int group)
|
|
{
|
|
if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0)
|
|
return -errno;
|
|
return 0;
|
|
}
|
|
|
|
static int vfio_set_irqs(int device, int start, int count, int *irqfd)
|
|
{
|
|
struct vfio_irq_set *irq_set;
|
|
int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count;
|
|
int err = 0;
|
|
|
|
irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL);
|
|
if (!irq_set)
|
|
return -ENOMEM;
|
|
|
|
irq_set->argsz = argsz;
|
|
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
|
|
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
|
|
irq_set->start = start;
|
|
irq_set->count = count;
|
|
memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count);
|
|
|
|
if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) {
|
|
err = -errno;
|
|
goto out;
|
|
}
|
|
|
|
out:
|
|
kfree(irq_set);
|
|
return err;
|
|
}
|
|
|
|
int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
|
|
int group, const char *device)
|
|
{
|
|
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
|
|
struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
|
|
int err, i;
|
|
|
|
dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device);
|
|
if (dev->device < 0)
|
|
return -errno;
|
|
|
|
if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) {
|
|
err = -errno;
|
|
goto close_device;
|
|
}
|
|
|
|
dev->num_regions = device_info.num_regions;
|
|
if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1)
|
|
dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1;
|
|
|
|
dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions,
|
|
UM_GFP_KERNEL);
|
|
if (!dev->region) {
|
|
err = -ENOMEM;
|
|
goto close_device;
|
|
}
|
|
|
|
for (i = 0; i < dev->num_regions; i++) {
|
|
struct vfio_region_info region = {
|
|
.argsz = sizeof(region),
|
|
.index = i,
|
|
};
|
|
if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, ®ion) < 0) {
|
|
err = -errno;
|
|
goto free_region;
|
|
}
|
|
dev->region[i].size = region.size;
|
|
dev->region[i].offset = region.offset;
|
|
}
|
|
|
|
/* Only MSI-X is supported currently. */
|
|
irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX;
|
|
if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) {
|
|
err = -errno;
|
|
goto free_region;
|
|
}
|
|
|
|
dev->irq_count = irq_info.count;
|
|
|
|
dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL);
|
|
if (!dev->irqfd) {
|
|
err = -ENOMEM;
|
|
goto free_region;
|
|
}
|
|
|
|
memset(dev->irqfd, -1, sizeof(int) * dev->irq_count);
|
|
|
|
err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
|
|
if (err)
|
|
goto free_irqfd;
|
|
|
|
return 0;
|
|
|
|
free_irqfd:
|
|
kfree(dev->irqfd);
|
|
free_region:
|
|
kfree(dev->region);
|
|
close_device:
|
|
close(dev->device);
|
|
return err;
|
|
}
|
|
|
|
void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev)
|
|
{
|
|
kfree(dev->irqfd);
|
|
kfree(dev->region);
|
|
close(dev->device);
|
|
}
|
|
|
|
int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index)
|
|
{
|
|
int irqfd;
|
|
|
|
irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
|
if (irqfd < 0)
|
|
return -errno;
|
|
|
|
dev->irqfd[index] = irqfd;
|
|
return irqfd;
|
|
}
|
|
|
|
void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index)
|
|
{
|
|
close(dev->irqfd[index]);
|
|
dev->irqfd[index] = -1;
|
|
}
|
|
|
|
int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev)
|
|
{
|
|
return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
|
|
}
|
|
|
|
static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index,
|
|
uint64_t offset, void *buf, uint64_t size)
|
|
{
|
|
if (index >= dev->num_regions || offset + size > dev->region[index].size)
|
|
return -EINVAL;
|
|
|
|
if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0)
|
|
return -errno;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index,
|
|
uint64_t offset, const void *buf, uint64_t size)
|
|
{
|
|
if (index >= dev->num_regions || offset + size > dev->region[index].size)
|
|
return -EINVAL;
|
|
|
|
if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0)
|
|
return -errno;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
|
|
unsigned int offset, void *buf, int size)
|
|
{
|
|
return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX,
|
|
offset, buf, size);
|
|
}
|
|
|
|
int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
|
|
unsigned int offset, const void *buf, int size)
|
|
{
|
|
return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX,
|
|
offset, buf, size);
|
|
}
|
|
|
|
int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
|
|
unsigned int offset, void *buf, int size)
|
|
{
|
|
return vfio_region_read(dev, bar, offset, buf, size);
|
|
}
|
|
|
|
int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
|
|
unsigned int offset, const void *buf, int size)
|
|
{
|
|
return vfio_region_write(dev, bar, offset, buf, size);
|
|
}
|