diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index eec16db2d536..a9ef9d751c5e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -617,6 +617,13 @@ enum mlx5_mkey_type { MLX5_MKEY_INDIRECT_DEVX, }; +struct mlx5r_cache_rb_key { + u8 ats:1; + unsigned int access_mode; + unsigned int access_flags; + unsigned int ndescs; +}; + struct mlx5_ib_mkey { u32 key; enum mlx5_mkey_type type; @@ -737,11 +744,9 @@ struct mlx5_cache_ent { unsigned long reserved; char name[4]; - u32 order; - u32 access_mode; - unsigned int ndescs; struct rb_node node; + struct mlx5r_cache_rb_key rb_key; u8 disabled:1; u8 fill_to_high_water:1; @@ -1320,14 +1325,13 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, - int order); + struct mlx5r_cache_rb_key rb_key, + bool persistent_entry); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags); + int access_flags, int access_mode, + int ndescs); -struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, u32 order, - int access_flags); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, @@ -1350,7 +1354,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq); void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); -void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent); +int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev); void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags); @@ -1369,7 +1373,10 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} -static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {} +static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev) +{ + return 0; +} static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags) {} diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 5cc618db277f..f534d3b76f40 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -292,11 +292,13 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, + (ent->rb_key.access_mode >> 2) & 0x7); MLX5_SET(mkc, mkc, translations_octword_size, - get_mkc_octo_size(ent->access_mode, ent->ndescs)); + get_mkc_octo_size(ent->rb_key.access_mode, + ent->rb_key.ndescs)); MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); } @@ -594,8 +596,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (err != -EAGAIN) { mlx5_ib_warn( dev, - "command failed order %d, err %d\n", - ent->order, err); + "add keys command failed, err %d\n", + err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); } @@ -641,22 +643,49 @@ static void delayed_cache_work_func(struct work_struct *work) __cache_work_func(ent); } +static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, + struct mlx5r_cache_rb_key key2) +{ + int res; + + res = key1.ats - key2.ats; + if (res) + return res; + + res = key1.access_mode - key2.access_mode; + if (res) + return res; + + res = key1.access_flags - key2.access_flags; + if (res) + return res; + + /* + * keep ndescs the last in the compare table since the find function + * searches for an exact match on all properties and only closest + * match in size. + */ + return key1.ndescs - key2.ndescs; +} + static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, struct mlx5_cache_ent *ent) { struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; struct mlx5_cache_ent *cur; + int cmp; mutex_lock(&cache->rb_lock); /* Figure out where to put new node */ while (*new) { cur = rb_entry(*new, struct mlx5_cache_ent, node); parent = *new; - if (ent->order < cur->order) + cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key); + if (cmp > 0) new = &((*new)->rb_left); - if (ent->order > cur->order) + if (cmp < 0) new = &((*new)->rb_right); - if (ent->order == cur->order) { + if (cmp == 0) { mutex_unlock(&cache->rb_lock); return -EEXIST; } @@ -670,40 +699,45 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, return 0; } -static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev, - unsigned int order) +static struct mlx5_cache_ent * +mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, + struct mlx5r_cache_rb_key rb_key) { struct rb_node *node = dev->cache.rb_root.rb_node; struct mlx5_cache_ent *cur, *smallest = NULL; + int cmp; /* * Find the smallest ent with order >= requested_order. */ while (node) { cur = rb_entry(node, struct mlx5_cache_ent, node); - if (cur->order > order) { + cmp = cache_ent_key_cmp(cur->rb_key, rb_key); + if (cmp > 0) { smallest = cur; node = node->rb_left; } - if (cur->order < order) + if (cmp < 0) node = node->rb_right; - if (cur->order == order) + if (cmp == 0) return cur; } - return smallest; + return (smallest && + smallest->rb_key.access_mode == rb_key.access_mode && + smallest->rb_key.access_flags == rb_key.access_flags && + smallest->rb_key.ats == rb_key.ats) ? + smallest : + NULL; } -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags) +static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + struct mlx5_cache_ent *ent, + int access_flags) { struct mlx5_ib_mr *mr; int err; - if (!mlx5r_umr_can_reconfig(dev, 0, access_flags)) - return ERR_PTR(-EOPNOTSUPP); - mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -734,12 +768,44 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, return mr; } -struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, - u32 order, int access_flags) +static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, + int access_flags) { - struct mlx5_cache_ent *ent = mkey_cache_ent_from_order(dev, order); + int ret = 0; - return mlx5_mr_cache_alloc(dev, ent, access_flags); + if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && + MLX5_CAP_GEN(dev->mdev, atomic) && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + ret |= IB_ACCESS_REMOTE_ATOMIC; + + if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + ret |= IB_ACCESS_RELAXED_ORDERING; + + if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + ret |= IB_ACCESS_RELAXED_ORDERING; + + return ret; +} + +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + int access_flags, int access_mode, + int ndescs) +{ + struct mlx5r_cache_rb_key rb_key = { + .ndescs = ndescs, + .access_mode = access_mode, + .access_flags = get_unchangeable_access_flags(dev, access_flags) + }; + struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key); + + if (!ent) + return ERR_PTR(-EOPNOTSUPP); + + return _mlx5_mr_cache_alloc(dev, ent, access_flags); } static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) @@ -766,28 +832,32 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) dev->cache.fs_root = NULL; } +static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev, + struct mlx5_cache_ent *ent) +{ + int order = order_base_2(ent->rb_key.ndescs); + struct dentry *dir; + + if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) + order = MLX5_IMR_KSM_CACHE_ENTRY + 2; + + sprintf(ent->name, "%d", order); + dir = debugfs_create_dir(ent->name, dev->cache.fs_root); + debugfs_create_file("size", 0600, dir, ent, &size_fops); + debugfs_create_file("limit", 0600, dir, ent, &limit_fops); + debugfs_create_ulong("cur", 0400, dir, &ent->stored); + debugfs_create_u32("miss", 0600, dir, &ent->miss); +} + static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) { + struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev); struct mlx5_mkey_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - struct dentry *dir; - int i; if (!mlx5_debugfs_root || dev->is_rep) return; - dir = mlx5_debugfs_get_dev_root(dev->mdev); - cache->fs_root = debugfs_create_dir("mr_cache", dir); - - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - ent = mkey_cache_ent_from_order(dev, i); - sprintf(ent->name, "%d", ent->order); - dir = debugfs_create_dir(ent->name, cache->fs_root); - debugfs_create_file("size", 0600, dir, ent, &size_fops); - debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_ulong("cur", 0400, dir, &ent->stored); - debugfs_create_u32("miss", 0600, dir, &ent->miss); - } + cache->fs_root = debugfs_create_dir("mr_cache", dbg_root); } static void delay_time_func(struct timer_list *t) @@ -798,9 +868,11 @@ static void delay_time_func(struct timer_list *t) } struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, - int order) + struct mlx5r_cache_rb_key rb_key, + bool persistent_entry) { struct mlx5_cache_ent *ent; + int order; int ret; ent = kzalloc(sizeof(*ent), GFP_KERNEL); @@ -808,7 +880,7 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, return ERR_PTR(-ENOMEM); xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); - ent->order = order; + ent->rb_key = rb_key; ent->dev = dev; INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); @@ -818,13 +890,36 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, kfree(ent); return ERR_PTR(ret); } + + if (persistent_entry) { + if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) + order = MLX5_IMR_KSM_CACHE_ENTRY; + else + order = order_base_2(rb_key.ndescs) - 2; + + if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && + !dev->is_rep && mlx5_core_is_pf(dev->mdev) && + mlx5r_umr_can_load_pas(dev, 0)) + ent->limit = dev->mdev->profile.mr_cache[order].limit; + else + ent->limit = 0; + + mlx5_mkey_cache_debugfs_add_ent(dev, ent); + } + return ent; } int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mkey_cache *cache = &dev->cache; + struct rb_root *root = &dev->cache.rb_root; + struct mlx5r_cache_rb_key rb_key = { + .access_mode = MLX5_MKC_ACCESS_MODE_MTT, + }; struct mlx5_cache_ent *ent; + struct rb_node *node; + int ret; int i; mutex_init(&dev->slow_path_mutex); @@ -838,33 +933,32 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - ent = mlx5r_cache_create_ent(dev, i); - - if (i > MKEY_CACHE_LAST_STD_ENTRY) { - mlx5_odp_init_mkey_cache_entry(ent); - continue; + mlx5_mkey_cache_debugfs_init(dev); + for (i = 0; i <= mkey_cache_max_order(dev); i++) { + rb_key.ndescs = 1 << (i + 2); + ent = mlx5r_cache_create_ent(dev, rb_key, true); + if (IS_ERR(ent)) { + ret = PTR_ERR(ent); + goto err; } + } - if (ent->order > mkey_cache_max_order(dev)) - continue; + ret = mlx5_odp_init_mkey_cache(dev); + if (ret) + goto err; - ent->ndescs = 1 << ent->order; - ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->is_rep && mlx5_core_is_pf(dev->mdev) && - mlx5r_umr_can_load_pas(dev, 0)) - ent->limit = dev->mdev->profile.mr_cache[i].limit; - else - ent->limit = 0; + for (node = rb_first(root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); queue_adjust_cache_locked(ent); xa_unlock_irq(&ent->mkeys); } - mlx5_mkey_cache_debugfs_init(dev); - return 0; + +err: + mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); + return ret; } int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) @@ -965,7 +1059,7 @@ static int get_octo_len(u64 addr, u64 len, int page_shift) static int mkey_cache_max_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return MKEY_CACHE_LAST_STD_ENTRY + 2; + return MKEY_CACHE_LAST_STD_ENTRY; return MLX5_MAX_UMR_SHIFT; } @@ -995,6 +1089,9 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags) { + struct mlx5r_cache_rb_key rb_key = { + .access_mode = MLX5_MKC_ACCESS_MODE_MTT, + }; struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; @@ -1007,8 +1104,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, 0, iova); if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); - ent = mkey_cache_ent_from_order( - dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); + + rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size); + rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags); + rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags); + ent = mkey_cache_ent_from_rb_key(dev, rb_key); /* * Matches access in alloc_cache_mr(). If the MR can't come from the * cache then synchronously create an uncached one. @@ -1022,7 +1122,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, return mr; } - mr = mlx5_mr_cache_alloc(dev, ent, access_flags); + mr = _mlx5_mr_cache_alloc(dev, ent, access_flags); if (IS_ERR(mr)) return mr; @@ -1451,7 +1551,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); if (WARN_ON(!*page_size)) return false; - return (1ULL << mr->mmkey.cache_ent->order) >= + return (mr->mmkey.cache_ent->rb_key.ndescs) >= ib_umem_num_dma_blocks(new_umem, *page_size); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 71c3c611e10a..c51d6c9a4c87 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -405,7 +405,6 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, unsigned long idx) { - int order = order_base_2(MLX5_IMR_MTT_ENTRIES); struct mlx5_ib_dev *dev = mr_to_mdev(imr); struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; @@ -418,8 +417,9 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - BUILD_BUG_ON(order > MKEY_CACHE_LAST_STD_ENTRY); - mr = mlx5_mr_cache_alloc_order(dev, order, imr->access_flags); + mr = mlx5_mr_cache_alloc(dev, imr->access_flags, + MLX5_MKC_ACCESS_MODE_MTT, + MLX5_IMR_MTT_ENTRIES); if (IS_ERR(mr)) { ib_umem_odp_release(odp); return mr; @@ -493,8 +493,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = mlx5_mr_cache_alloc_order(dev, MLX5_IMR_KSM_CACHE_ENTRY, - access_flags); + imr = mlx5_mr_cache_alloc(dev, access_flags, MLX5_MKC_ACCESS_MODE_KSM, + mlx5_imr_ksm_entries); if (IS_ERR(imr)) { ib_umem_odp_release(umem_odp); return imr; @@ -1587,12 +1587,22 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) return err; } -void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) +int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev) { - if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return; - ent->ndescs = mlx5_imr_ksm_entries; - ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; + struct mlx5r_cache_rb_key rb_key = { + .access_mode = MLX5_MKC_ACCESS_MODE_KSM, + .ndescs = mlx5_imr_ksm_entries, + }; + struct mlx5_cache_ent *ent; + + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return 0; + + ent = mlx5r_cache_create_ent(dev, rb_key, true); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + return 0; } static const struct ib_device_ops mlx5_ib_dev_odp_ops = {