1 Star 0 Fork 44

yueguofeng/rdma-core

forked from src-openEuler/rdma-core 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0020-libhns-Introduce-DCA-for-RC-QP.patch 9.84 KB
一键复制 编辑 原始数据 按行查看 历史
zzry 提交于 2024-04-11 09:22 . Support hns roce DCA
From f0d70762b8c69e735a1d15f8379b649bcad3929c Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Mon, 10 May 2021 17:13:09 +0800
Subject: [PATCH 20/25] libhns: Introduce DCA for RC QP
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
The HIP09 introduces the DCA(Dynamic context attachment) feature which
supports many RC QPs to share the WQE buffer in a memory pool, this will
reduce the memory consumption when there are too many QPs inactive.
Two functions are defined for adding buffers to memory pool and removing
buffers from memory pool by calling ib cmd implemented in hns kernelspace
driver.
If a QP enables DCA feature, the WQE's buffer will be attached to the
memory pool when the users start to post WRs and be detached when all CQEs
has been polled.
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 61 +++++++++++++-
providers/hns/hns_roce_u.h | 21 ++++-
providers/hns/hns_roce_u_buf.c | 147 +++++++++++++++++++++++++++++++++
3 files changed, 226 insertions(+), 3 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 810b650..2272431 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -100,6 +100,53 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift)
return count_shift > size_shift ? count_shift - size_shift : 0;
}
+static int hns_roce_mmap(struct hns_roce_device *hr_dev,
+ struct hns_roce_context *context, int cmd_fd)
+{
+ int page_size = hr_dev->page_size;
+
+ context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, cmd_fd, 0);
+ if (context->uar == MAP_FAILED)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int init_dca_context(struct hns_roce_context *ctx, int page_size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ int ret;
+
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
+ return 0;
+
+ list_head_init(&dca_ctx->mem_list);
+ ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
+ if (ret)
+ return ret;
+
+ dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
+ dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
+ dca_ctx->mem_cnt = 0;
+
+ return 0;
+}
+
+static void uninit_dca_context(struct hns_roce_context *ctx)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+
+ if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS))
+ return;
+
+ pthread_spin_lock(&dca_ctx->lock);
+ hns_roce_cleanup_dca_mem(ctx);
+ pthread_spin_unlock(&dca_ctx->lock);
+
+ pthread_spin_destroy(&dca_ctx->lock);
+}
+
static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd,
struct hns_roce_alloc_ucontext_resp *resp,
int page_size)
@@ -185,7 +232,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
return NULL;
cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS |
- HNS_ROCE_CQE_INLINE_FLAGS;
+ HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA;
if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -198,9 +245,15 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (context->uar == MAP_FAILED)
goto err_free;
+ if (init_dca_context(context, hr_dev->page_size))
+ goto err_free;
+
if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size))
goto reset_free;
+ if (hns_roce_mmap(hr_dev, context, cmd_fd))
+ goto uar_free;
+
pthread_mutex_init(&context->qp_table_mutex, NULL);
pthread_mutex_init(&context->srq_table_mutex, NULL);
pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
@@ -210,8 +263,11 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
return &context->ibv_ctx;
+uar_free:
+ if (context->reset_state)
+ munmap(context->reset_state, hr_dev->page_size);
reset_free:
- munmap(context->uar, hr_dev->page_size);
+ uninit_dca_context(context);
err_free:
verbs_uninit_context(&context->ibv_ctx);
free(context);
@@ -226,6 +282,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
munmap(context->uar, hr_dev->page_size);
if (context->reset_state)
munmap(context->reset_state, hr_dev->page_size);
+ uninit_dca_context(context);
verbs_uninit_context(&context->ibv_ctx);
free(context);
}
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 024932a..90b2205 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -147,6 +147,10 @@
#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
+enum {
+ HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15),
+};
+
#define HNS_ROCE_QP_TABLE_BITS 8
#define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
@@ -201,6 +205,18 @@ struct hns_roce_spinlock {
int need_lock;
};
+#define HNS_DCA_MAX_MEM_SIZE ~0UL
+#define HNS_DCA_DEFAULT_UNIT_PAGES 16
+
+struct hns_roce_dca_ctx {
+ struct list_head mem_list;
+ pthread_spinlock_t lock;
+ int mem_cnt;
+ unsigned int unit_size;
+ uint64_t max_size;
+ uint64_t curr_size;
+};
+
struct hns_roce_v2_reset_state {
uint32_t is_reset;
uint32_t hw_ready;
@@ -239,7 +255,7 @@ struct hns_roce_context {
unsigned int cqe_size;
uint32_t config;
unsigned int max_inline_data;
-
+ struct hns_roce_dca_ctx dca_ctx;
bool use_new_reset_flag;
bool reseted;
};
@@ -586,6 +602,9 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
+
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
bool is_hns_dev(struct ibv_device *device);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 471dd9c..02c43ae 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -60,3 +60,150 @@ void hns_roce_free_buf(struct hns_roce_buf *buf)
munmap(buf->buf, buf->length);
}
+
+struct hns_roce_dca_mem {
+ uint32_t handle;
+ struct list_node entry;
+ struct hns_roce_buf buf;
+ struct hns_roce_context *ctx;
+};
+
+static void free_dca_mem(struct hns_roce_context *ctx,
+ struct hns_roce_dca_mem *mem)
+{
+ hns_roce_free_buf(&mem->buf);
+ free(mem);
+}
+
+static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size)
+{
+ struct hns_roce_dca_mem *mem = NULL;
+ int ret;
+
+ mem = malloc(sizeof(struct hns_roce_dca_mem));
+ if (!mem) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE);
+ if (ret) {
+ errno = ENOMEM;
+ free(mem);
+ return NULL;
+ }
+
+ return mem;
+}
+
+static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
+{
+ return (uintptr_t)dca_mem;
+}
+
+static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
+{
+ return dca_mem->buf.buf + offset;
+}
+
+static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key,
+ void *addr, uint32_t size, uint32_t *handle)
+{
+ struct ib_uverbs_attr *attr;
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_REG, 4);
+ fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size);
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR,
+ ioctl_ptr_to_u64(addr));
+ fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key);
+ attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
+
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret) {
+ verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr);
+
+ return 0;
+}
+
+static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle)
+{
+ int ret;
+
+ DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+ HNS_IB_METHOD_DCA_MEM_DEREG, 1);
+ fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle);
+ ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+ if (ret)
+ verbs_warn(&ctx->ibv_ctx,
+ "failed to dereg DCA mem-%u, ret = %d.\n",
+ handle, ret);
+}
+
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_roce_dca_mem *mem;
+ struct hns_roce_dca_mem *tmp;
+
+ list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry)
+ deregister_dca_mem(ctx, mem->handle);
+}
+
+static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
+ uint32_t alloc_size)
+{
+ bool enable;
+
+ pthread_spin_lock(&ctx->lock);
+
+ if (ctx->unit_size == 0) /* Pool size can't be increased */
+ enable = false;
+ else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */
+ enable = true;
+ else /* Pool size doesn't exceed max size */
+ enable = (ctx->curr_size + alloc_size) < ctx->max_size;
+
+ pthread_spin_unlock(&ctx->lock);
+
+ return enable;
+}
+
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
+{
+ struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+ struct hns_roce_dca_mem *mem;
+ int ret;
+
+ if (!add_dca_mem_enabled(&ctx->dca_ctx, size))
+ return -ENOMEM;
+
+ /* Step 1: Alloc DCA mem address */
+ mem = alloc_dca_mem(
+ DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size);
+ if (!mem)
+ return -ENOMEM;
+
+ /* Step 2: Register DCA mem uobject to pin user address */
+ ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0),
+ mem->buf.length, &mem->handle);
+ if (ret) {
+ free_dca_mem(ctx, mem);
+ return ret;
+ }
+
+ /* Step 3: Add DCA mem node to pool */
+ pthread_spin_lock(&dca_ctx->lock);
+ list_add_tail(&dca_ctx->mem_list, &mem->entry);
+ dca_ctx->mem_cnt++;
+ dca_ctx->curr_size += mem->buf.length;
+ pthread_spin_unlock(&dca_ctx->lock);
+
+ return 0;
+}
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/ygf_123/rdma-core.git
git@gitee.com:ygf_123/rdma-core.git
ygf_123
rdma-core
rdma-core
master

搜索帮助

D67c1975 1850385 1daf7b77 1850385