diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6a1632d0fba1..44844707d327 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -981,6 +981,15 @@ struct io_uring_zcrx_offsets { __u64 __resv[2]; }; +struct io_uring_zcrx_area_reg { + __u64 addr; + __u64 len; + __u64 rq_area_token; + __u32 flags; + __u32 __resv1; + __u64 __resv2[2]; +}; + /* * Argument for IORING_REGISTER_ZCRX_IFQ */ diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index af39b69eb4fd..20b884c84e55 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -77,7 +77,7 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) return 0; } -static int io_buffer_validate(struct iovec *iov) +int io_buffer_validate(struct iovec *iov) { unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index a6d883c62b22..abf86b5b8614 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -68,6 +68,7 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, unsigned size, unsigned type); int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, unsigned int size, unsigned int type); +int io_buffer_validate(struct iovec *iov); bool io_check_coalesce_buffer(struct page **page_array, int nr_pages, struct io_imu_folio_data *data); diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index f3ace7e8264d..04883a3ae80c 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -10,6 +10,7 @@ #include "kbuf.h" #include "memmap.h" #include "zcrx.h" +#include "rsrc.h" #define IO_RQ_MAX_ENTRIES 32768 @@ -44,6 +45,79 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq) ifq->rqes = NULL; } +static void io_zcrx_free_area(struct io_zcrx_area *area) +{ + kvfree(area->freelist); + kvfree(area->nia.niovs); + if (area->pages) { + unpin_user_pages(area->pages, area->nia.num_niovs); + kvfree(area->pages); + } + kfree(area); +} + +static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, + struct io_zcrx_area **res, + struct io_uring_zcrx_area_reg *area_reg) +{ + struct io_zcrx_area *area; + int i, ret, nr_pages; + struct iovec iov; + + if (area_reg->flags || area_reg->rq_area_token) + return -EINVAL; + if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1]) + return -EINVAL; + if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK) + return -EINVAL; + + iov.iov_base = u64_to_user_ptr(area_reg->addr); + iov.iov_len = area_reg->len; + ret = io_buffer_validate(&iov); + if (ret) + return ret; + + ret = -ENOMEM; + area = kzalloc(sizeof(*area), GFP_KERNEL); + if (!area) + goto err; + + area->pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len, + &nr_pages); + if (IS_ERR(area->pages)) { + ret = PTR_ERR(area->pages); + area->pages = NULL; + goto err; + } + area->nia.num_niovs = nr_pages; + + area->nia.niovs = kvmalloc_array(nr_pages, sizeof(area->nia.niovs[0]), + GFP_KERNEL | __GFP_ZERO); + if (!area->nia.niovs) + goto err; + + area->freelist = kvmalloc_array(nr_pages, sizeof(area->freelist[0]), + GFP_KERNEL | __GFP_ZERO); + if (!area->freelist) + goto err; + + for (i = 0; i < nr_pages; i++) + area->freelist[i] = i; + + area->free_count = nr_pages; + area->ifq = ifq; + /* we're only supporting one area per ifq for now */ + area->area_id = 0; + area_reg->rq_area_token = (u64)area->area_id << IORING_ZCRX_AREA_SHIFT; + spin_lock_init(&area->freelist_lock); + *res = area; + return 0; +err: + if (area) + io_zcrx_free_area(area); + return ret; +} + static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) { struct io_zcrx_ifq *ifq; @@ -59,6 +133,9 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) { + if (ifq->area) + io_zcrx_free_area(ifq->area); + io_free_rbuf_ring(ifq); kfree(ifq); } @@ -66,6 +143,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg) { + struct io_uring_zcrx_area_reg area; struct io_uring_zcrx_ifq_reg reg; struct io_uring_region_desc rd; struct io_zcrx_ifq *ifq; @@ -99,7 +177,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, } reg.rq_entries = roundup_pow_of_two(reg.rq_entries); - if (!reg.area_ptr) + if (copy_from_user(&area, u64_to_user_ptr(reg.area_ptr), sizeof(area))) return -EFAULT; ifq = io_zcrx_ifq_alloc(ctx); @@ -110,6 +188,10 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, if (ret) goto err; + ret = io_zcrx_create_area(ifq, &ifq->area, &area); + if (ret) + goto err; + ifq->rq_entries = reg.rq_entries; ifq->if_rxq = reg.if_rxq; @@ -122,7 +204,10 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, ret = -EFAULT; goto err; } - + if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { + ret = -EFAULT; + goto err; + } ctx->ifq = ifq; return 0; err: diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 58e4ab6c6083..53fd94b65b38 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -3,9 +3,25 @@ #define IOU_ZC_RX_H #include +#include + +struct io_zcrx_area { + struct net_iov_area nia; + struct io_zcrx_ifq *ifq; + + u16 area_id; + struct page **pages; + + /* freelist */ + spinlock_t freelist_lock ____cacheline_aligned_in_smp; + u32 free_count; + u32 *freelist; +}; struct io_zcrx_ifq { struct io_ring_ctx *ctx; + struct io_zcrx_area *area; + struct io_uring *rq_ring; struct io_uring_zcrx_rqe *rqes; u32 rq_entries;