diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..e2721a1ff13d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2169,6 +2169,7 @@ struct file_operations { int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); + int (*mmap_prepare)(struct vm_area_desc *); } __randomize_layout; /* Supports async buffered reads */ @@ -2238,11 +2239,35 @@ struct inode_operations { struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; +/* Did the driver provide valid mmap hook configuration? */ +static inline bool file_has_valid_mmap_hooks(struct file *file) +{ + bool has_mmap = file->f_op->mmap; + bool has_mmap_prepare = file->f_op->mmap_prepare; + + /* Hooks are mutually exclusive. */ + if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) + return false; + if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare)) + return false; + + return true; +} + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { + if (WARN_ON_ONCE(file->f_op->mmap_prepare)) + return -EINVAL; + return file->f_op->mmap(file, vma); } +static inline int __call_mmap_prepare(struct file *file, + struct vm_area_desc *desc) +{ + return file->f_op->mmap_prepare(desc); +} + extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e76bade9ebb1..15808cad2bc1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -763,6 +763,30 @@ struct vma_numab_state { int prev_scan_seq; }; +/* + * Describes a VMA that is about to be mmap()'ed. Drivers may choose to + * manipulate mutable fields which will cause those fields to be updated in the + * resultant VMA. + * + * Helper functions are not required for manipulating any field. + */ +struct vm_area_desc { + /* Immutable state. */ + struct mm_struct *mm; + unsigned long start; + unsigned long end; + + /* Mutable fields. Populated with initial state. */ + pgoff_t pgoff; + struct file *file; + vm_flags_t vm_flags; + pgprot_t page_prot; + + /* Write-only fields. */ + const struct vm_operations_struct *vm_ops; + void *private_data; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory diff --git a/mm/memory.c b/mm/memory.c index 68c1d962d0ad..99af83434e7c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -527,10 +527,11 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, dump_page(page, "bad pte"); pr_alert("addr:%px vm_flags:%08lx anon_vma:%px mapping:%px index:%lx\n", (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); - pr_alert("file:%pD fault:%ps mmap:%ps read_folio:%ps\n", + pr_alert("file:%pD fault:%ps mmap:%ps mmap_prepare: %ps read_folio:%ps\n", vma->vm_file, vma->vm_ops ? vma->vm_ops->fault : NULL, vma->vm_file ? vma->vm_file->f_op->mmap : NULL, + vma->vm_file ? vma->vm_file->f_op->mmap_prepare : NULL, mapping ? mapping->a_ops->read_folio : NULL); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); diff --git a/mm/mmap.c b/mm/mmap.c index 81dd962a1cfc..50f902c08341 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -475,7 +475,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, vm_flags &= ~VM_MAYEXEC; } - if (!file->f_op->mmap) + if (!file_has_valid_mmap_hooks(file)) return -ENODEV; if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) return -EINVAL; diff --git a/mm/vma.c b/mm/vma.c index 1f2634b29568..3f32e04bb6cc 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -17,6 +17,11 @@ struct mmap_state { unsigned long pglen; unsigned long flags; struct file *file; + pgprot_t page_prot; + + /* User-defined fields, perhaps updated by .mmap_prepare(). */ + const struct vm_operations_struct *vm_ops; + void *vm_private_data; unsigned long charged; bool retry_merge; @@ -40,6 +45,7 @@ struct mmap_state { .pglen = PHYS_PFN(len_), \ .flags = flags_, \ .file = file_, \ + .page_prot = vm_get_page_prot(flags_), \ } #define VMG_MMAP_STATE(name, map_, vma_) \ @@ -2385,6 +2391,10 @@ static int __mmap_new_file_vma(struct mmap_state *map, int error; vma->vm_file = get_file(map->file); + + if (!map->file->f_op->mmap) + return 0; + error = mmap_file(vma->vm_file, vma); if (error) { fput(vma->vm_file); @@ -2441,7 +2451,7 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap) vma_iter_config(vmi, map->addr, map->end); vma_set_range(vma, map->addr, map->end, map->pgoff); vm_flags_init(vma, map->flags); - vma->vm_page_prot = vm_get_page_prot(map->flags); + vma->vm_page_prot = map->page_prot; if (vma_iter_prealloc(vmi, vma)) { error = -ENOMEM; @@ -2528,6 +2538,56 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma) vma_set_page_prot(vma); } +/* + * Invoke the f_op->mmap_prepare() callback for a file-backed mapping that + * specifies it. + * + * This is called prior to any merge attempt, and updates whitelisted fields + * that are permitted to be updated by the caller. + * + * All but user-defined fields will be pre-populated with original values. + * + * Returns 0 on success, or an error code otherwise. + */ +static int call_mmap_prepare(struct mmap_state *map) +{ + int err; + struct vm_area_desc desc = { + .mm = map->mm, + .start = map->addr, + .end = map->end, + + .pgoff = map->pgoff, + .file = map->file, + .vm_flags = map->flags, + .page_prot = map->page_prot, + }; + + /* Invoke the hook. */ + err = __call_mmap_prepare(map->file, &desc); + if (err) + return err; + + /* Update fields permitted to be changed. */ + map->pgoff = desc.pgoff; + map->file = desc.file; + map->flags = desc.vm_flags; + map->page_prot = desc.page_prot; + /* User-defined fields. */ + map->vm_ops = desc.vm_ops; + map->vm_private_data = desc.private_data; + + return 0; +} + +static void set_vma_user_defined_fields(struct vm_area_struct *vma, + struct mmap_state *map) +{ + if (map->vm_ops) + vma->vm_ops = map->vm_ops; + vma->vm_private_data = map->vm_private_data; +} + static unsigned long __mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf) @@ -2535,10 +2595,13 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; int error; + bool have_mmap_prepare = file && file->f_op->mmap_prepare; VMA_ITERATOR(vmi, mm, addr); MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file); error = __mmap_prepare(&map, uf); + if (!error && have_mmap_prepare) + error = call_mmap_prepare(&map); if (error) goto abort_munmap; @@ -2556,6 +2619,9 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, goto unacct_error; } + if (have_mmap_prepare) + set_vma_user_defined_fields(vma, &map); + /* If flags changed, we might be able to merge, so try again. */ if (map.retry_merge) { struct vm_area_struct *merged; diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 198abe66de5a..f6e45e62da3a 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -253,8 +253,40 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access */ }; +struct vm_area_struct; + +/* + * Describes a VMA that is about to be mmap()'ed. Drivers may choose to + * manipulate mutable fields which will cause those fields to be updated in the + * resultant VMA. + * + * Helper functions are not required for manipulating any field. + */ +struct vm_area_desc { + /* Immutable state. */ + struct mm_struct *mm; + unsigned long start; + unsigned long end; + + /* Mutable fields. Populated with initial state. */ + pgoff_t pgoff; + struct file *file; + vm_flags_t vm_flags; + pgprot_t page_prot; + + /* Write-only fields. */ + const struct vm_operations_struct *vm_ops; + void *private_data; +}; + +struct file_operations { + int (*mmap)(struct file *, struct vm_area_struct *); + int (*mmap_prepare)(struct vm_area_desc *); +}; + struct file { struct address_space *f_mapping; + const struct file_operations *f_op; }; #define VMA_LOCK_OFFSET 0x40000000 @@ -1125,11 +1157,6 @@ static inline void vm_flags_clear(struct vm_area_struct *vma, vma->__vm_flags &= ~flags; } -static inline int call_mmap(struct file *, struct vm_area_struct *) -{ - return 0; -} - static inline int shmem_zero_setup(struct vm_area_struct *) { return 0; @@ -1405,4 +1432,33 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma) (void)vma; } +/* Did the driver provide valid mmap hook configuration? */ +static inline bool file_has_valid_mmap_hooks(struct file *file) +{ + bool has_mmap = file->f_op->mmap; + bool has_mmap_prepare = file->f_op->mmap_prepare; + + /* Hooks are mutually exclusive. */ + if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) + return false; + if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare)) + return false; + + return true; +} + +static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (WARN_ON_ONCE(file->f_op->mmap_prepare)) + return -EINVAL; + + return file->f_op->mmap(file, vma); +} + +static inline int __call_mmap_prepare(struct file *file, + struct vm_area_desc *desc) +{ + return file->f_op->mmap_prepare(desc); +} + #endif /* __MM_VMA_INTERNAL_H */