1 Star 0 Fork 45

Lostway/rdma-core

forked from src-openEuler/rdma-core 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0004-libhns-Fix-the-problem-of-sge-nums.patch 8.55 KB
一键复制 编辑 原始数据 按行查看 历史
From 448d82b2c62f09f1dd9c8045d34623dedef1c111 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Fri, 19 Nov 2021 20:21:21 +0800
Subject: [PATCH v4 04/10] libhns: Fix the problem of sge nums
Currently, the driver only uses max_send_sge to initialize sge num
when creating_qp. So, in the sq inline scenario, the driver may not
has enough sge to send data. For example, if max_send_sge is 16 and
max_inline_data is 1024, the driver needs 1024/16=64 sge to send data.
Therefore, the calculation method of sge num is modified to take the
maximum value of max_send_sge and max_inline_data/16 to solve this
problem.
Fixes:11c81d0e3a98("libhns: Refactor process of setting extended sge")
Fixes:b7814b7b9715("libhns: Support inline data in extented sge space for RC")
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Reviewed-by: Yangyang Li <liyangyang20@huawei.com>
---
providers/hns/hns_roce_u.c | 9 +++-
providers/hns/hns_roce_u.h | 3 ++
providers/hns/hns_roce_u_abi.h | 2 +-
providers/hns/hns_roce_u_hw_v2.c | 13 +----
providers/hns/hns_roce_u_verbs.c | 84 ++++++++++++++++++++++++--------
5 files changed, 77 insertions(+), 34 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index a46ceb9..1bd5bb1 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -103,9 +103,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
{
struct hns_roce_device *hr_dev = to_hr_dev(ibdev);
struct hns_roce_alloc_ucontext_resp resp = {};
+ struct hns_roce_alloc_ucontext cmd = {};
struct ibv_device_attr dev_attrs;
struct hns_roce_context *context;
- struct ibv_get_context cmd;
int i;
context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
@@ -113,7 +113,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
if (!context)
return NULL;
- if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof(cmd),
+ cmd.config |= HNS_ROCE_EXSGE_FLAGS;
+ if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp)))
goto err_free;
@@ -124,6 +125,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
else
context->cqe_size = HNS_ROCE_V3_CQE_SIZE;
+ context->config = resp.config;
+ if (resp.config & HNS_ROCE_RSP_EXSGE_FLAGS)
+ context->max_inline_data = resp.max_inline_data;
+
context->qp_table_shift = calc_table_shift(resp.qp_tab_size,
HNS_ROCE_QP_TABLE_BITS);
context->qp_table_mask = (1 << context->qp_table_shift) - 1;
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 5d90634..5388f9c 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -213,6 +213,8 @@ struct hns_roce_context {
unsigned int max_srq_sge;
int max_cqe;
unsigned int cqe_size;
+ uint32_t config;
+ unsigned int max_inline_data;
};
struct hns_roce_pd {
@@ -267,6 +269,7 @@ struct hns_roce_wq {
unsigned int head;
unsigned int tail;
unsigned int max_gs;
+ unsigned int ext_sge_cnt;
unsigned int rsv_sge;
unsigned int wqe_shift;
unsigned int shift; /* wq size is 2^shift */
diff --git a/providers/hns/hns_roce_u_abi.h b/providers/hns/hns_roce_u_abi.h
index 333f977..2753d30 100644
--- a/providers/hns/hns_roce_u_abi.h
+++ b/providers/hns/hns_roce_u_abi.h
@@ -47,7 +47,7 @@ DECLARE_DRV_CMD(hns_roce_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
hns_roce_ib_create_cq, hns_roce_ib_create_cq_resp);
DECLARE_DRV_CMD(hns_roce_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
- empty, hns_roce_ib_alloc_ucontext_resp);
+ hns_roce_ib_alloc_ucontext, hns_roce_ib_alloc_ucontext_resp);
DECLARE_DRV_CMD(hns_roce_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
hns_roce_ib_create_qp, hns_roce_ib_create_qp_resp);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index bb4298f..ebe68bc 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -841,14 +841,6 @@ static void get_src_buf_info(void **src_addr, uint32_t *src_len,
}
}
-static unsigned int get_std_sge_num(struct hns_roce_qp *qp)
-{
- if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
- return 0;
-
- return HNS_ROCE_SGE_IN_WQE;
-}
-
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
struct hns_roce_sge_info *sge_info,
const void *buf_list,
@@ -858,12 +850,9 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
unsigned int sge_mask = qp->ex_sge.sge_cnt - 1;
void *dst_addr, *src_addr, *tail_bound_addr;
uint32_t src_len, tail_len;
- unsigned int std_sge_num;
int i;
- std_sge_num = get_std_sge_num(qp);
- if (sge_info->total_len >
- (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE)
+ if (sge_info->total_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE)
return EINVAL;
dst_addr = get_send_sge_ex(qp, sge_info->start_idx & sge_mask);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index ba7f2ae..851b145 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -978,41 +978,88 @@ err_alloc:
return -ENOMEM;
}
-static unsigned int get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
+/**
+ * Calculated sge num according to attr's max_send_sge
+ */
+static unsigned int get_sge_num_from_max_send_sge(bool is_ud,
+ uint32_t max_send_sge)
{
- if (qp->verbs_qp.qp.qp_type == IBV_QPT_UD)
- return qp->sq.max_gs;
+ unsigned int std_sge_num;
+ unsigned int min_sge;
- if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
- return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
+ std_sge_num = is_ud ? 0 : HNS_ROCE_SGE_IN_WQE;
+ min_sge = is_ud ? 1 : 0;
+ return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
+ min_sge;
+}
- return 0;
+/**
+ * Calculated sge num according to attr's max_inline_data
+ */
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud,
+ uint32_t max_inline_data)
+{
+ unsigned int inline_sge = 0;
+
+ inline_sge = max_inline_data / HNS_ROCE_SGE_SIZE;
+ /*
+ * if max_inline_data less than
+ * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
+ * In addition to ud's mode, no need to extend sge.
+ */
+ if (!is_ud && (inline_sge <= HNS_ROCE_SGE_IN_WQE))
+ inline_sge = 0;
+
+ return inline_sge;
}
-static void set_ext_sge_param(struct hns_roce_device *hr_dev,
+static void set_ext_sge_param(struct hns_roce_context *ctx,
struct ibv_qp_init_attr_ex *attr,
struct hns_roce_qp *qp, unsigned int wr_cnt)
{
+ bool is_ud = (qp->verbs_qp.qp.qp_type == IBV_QPT_UD);
+ unsigned int ext_wqe_sge_cnt;
+ unsigned int inline_ext_sge;
unsigned int total_sge_cnt;
- unsigned int wqe_sge_cnt;
+ unsigned int std_sge_num;
qp->ex_sge.sge_shift = HNS_ROCE_SGE_SHIFT;
-
- qp->sq.max_gs = attr->cap.max_send_sge;
-
- wqe_sge_cnt = get_wqe_ext_sge_cnt(qp);
+ std_sge_num = is_ud ? 0 : HNS_ROCE_SGE_IN_WQE;
+ ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud,
+ attr->cap.max_send_sge);
+
+ if (ctx->config & HNS_ROCE_RSP_EXSGE_FLAGS) {
+ attr->cap.max_inline_data = min_t(uint32_t, roundup_pow_of_two(
+ attr->cap.max_inline_data),
+ ctx->max_inline_data);
+
+ inline_ext_sge = max(ext_wqe_sge_cnt,
+ get_sge_num_from_max_inl_data(is_ud,
+ attr->cap.max_inline_data));
+ qp->sq.ext_sge_cnt = inline_ext_sge ?
+ roundup_pow_of_two(inline_ext_sge) : 0;
+ qp->sq.max_gs = min((qp->sq.ext_sge_cnt + std_sge_num),
+ ctx->max_sge);
+
+ ext_wqe_sge_cnt = qp->sq.ext_sge_cnt;
+ } else {
+ qp->sq.max_gs = max(1U, attr->cap.max_send_sge);
+ qp->sq.max_gs = min(qp->sq.max_gs, ctx->max_sge);
+ qp->sq.ext_sge_cnt = qp->sq.max_gs;
+ }
/* If the number of extended sge is not zero, they MUST use the
* space of HNS_HW_PAGE_SIZE at least.
*/
- if (wqe_sge_cnt) {
- total_sge_cnt = roundup_pow_of_two(wr_cnt * wqe_sge_cnt);
- qp->ex_sge.sge_cnt =
- max(total_sge_cnt,
- (unsigned int)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
+ if (ext_wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(wr_cnt * ext_wqe_sge_cnt);
+ qp->ex_sge.sge_cnt = max(total_sge_cnt,
+ (unsigned int)HNS_HW_PAGE_SIZE /
+ HNS_ROCE_SGE_SIZE);
}
}
+
static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
struct hns_roce_qp *qp,
struct hns_roce_context *ctx)
@@ -1044,10 +1091,9 @@ static void hns_roce_set_qp_params(struct ibv_qp_init_attr_ex *attr,
qp->sq.wqe_cnt = cnt;
qp->sq.shift = hr_ilog32(cnt);
- set_ext_sge_param(hr_dev, attr, qp, cnt);
+ set_ext_sge_param(ctx, attr, qp, cnt);
qp->sq.max_post = min(ctx->max_qp_wr, cnt);
- qp->sq.max_gs = min(ctx->max_sge, qp->sq.max_gs);
qp->sq_signal_bits = attr->sq_sig_all ? 0 : 1;
--
2.30.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/Lostwayzxc/rdma-core.git
git@gitee.com:Lostwayzxc/rdma-core.git
Lostwayzxc
rdma-core
rdma-core
master

搜索帮助