1 Star 0 Fork 46

Funda Wang/rdma-core

forked from src-openEuler/rdma-core 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0009-libhns-Add-support-for-lock-free-CQ.patch 8.24 KB
一键复制 编辑 原始数据 按行查看 历史
zzry 提交于 2024-03-26 20:13 . Support reporting wc as software mode.
From cac8fdd87cd6e222ab5184f3d91dfc99bb922627 Mon Sep 17 00:00:00 2001
From: zzry <1245464216@qq.com>
Date: Fri, 8 Mar 2024 16:29:34 +0800
Subject: [PATCH 09/10] libhns: Add support for lock-free CQ
driver inclusion
category: feature
bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I97WST
------------------------------------------------------------------
Drop CQ locks when associated to a PAD holding a TD.
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
providers/hns/hns_roce_u.h | 3 +-
providers/hns/hns_roce_u_hw_v2.c | 46 +++++++++++++-------------
providers/hns/hns_roce_u_verbs.c | 56 ++++++++++++++++++++++++++++----
3 files changed, 74 insertions(+), 31 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 5732e39..0035e36 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -255,7 +255,7 @@ struct hns_roce_pad {
struct hns_roce_cq {
struct verbs_cq verbs_cq;
struct hns_roce_buf buf;
- pthread_spinlock_t lock;
+ struct hns_roce_spinlock hr_lock;
unsigned int cqn;
unsigned int cq_depth;
unsigned int cons_index;
@@ -265,6 +265,7 @@ struct hns_roce_cq {
unsigned long flags;
unsigned int cqe_size;
struct hns_roce_v2_cqe *cqe;
+ struct ibv_pd *parent_domain;
};
struct hns_roce_idx_que {
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 90a76e2..2fb4d72 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -268,9 +268,9 @@ static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq,
if (cur + nreq < wq->max_post)
return 0;
- pthread_spin_lock(&cq->lock);
+ hns_roce_spin_lock(&cq->hr_lock);
cur = wq->head - wq->tail;
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
return cur + nreq >= wq->max_post;
}
@@ -724,7 +724,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
int err = V2_CQ_OK;
int npolled;
- pthread_spin_lock(&cq->lock);
+ hns_roce_spin_lock(&cq->hr_lock);
for (npolled = 0; npolled < ne; ++npolled) {
err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled);
@@ -739,7 +739,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
update_cq_db(ctx, cq);
}
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
return err == V2_CQ_POLL_ERR ? err : npolled;
}
@@ -1510,9 +1510,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
static void hns_roce_v2_cq_clean(struct hns_roce_cq *cq, unsigned int qpn,
struct hns_roce_srq *srq)
{
- pthread_spin_lock(&cq->lock);
+ hns_roce_spin_lock(&cq->hr_lock);
__hns_roce_v2_cq_clean(cq, qpn, srq);
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
}
static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr,
@@ -1600,18 +1600,18 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp)
if (send_cq && recv_cq) {
if (send_cq == recv_cq) {
- pthread_spin_lock(&send_cq->lock);
+ hns_roce_spin_lock(&send_cq->hr_lock);
} else if (send_cq->cqn < recv_cq->cqn) {
- pthread_spin_lock(&send_cq->lock);
- pthread_spin_lock(&recv_cq->lock);
+ hns_roce_spin_lock(&send_cq->hr_lock);
+ hns_roce_spin_lock(&recv_cq->hr_lock);
} else {
- pthread_spin_lock(&recv_cq->lock);
- pthread_spin_lock(&send_cq->lock);
+ hns_roce_spin_lock(&recv_cq->hr_lock);
+ hns_roce_spin_lock(&send_cq->hr_lock);
}
} else if (send_cq) {
- pthread_spin_lock(&send_cq->lock);
+ hns_roce_spin_lock(&send_cq->hr_lock);
} else if (recv_cq) {
- pthread_spin_lock(&recv_cq->lock);
+ hns_roce_spin_lock(&recv_cq->hr_lock);
}
}
@@ -1622,18 +1622,18 @@ static void hns_roce_unlock_cqs(struct ibv_qp *qp)
if (send_cq && recv_cq) {
if (send_cq == recv_cq) {
- pthread_spin_unlock(&send_cq->lock);
+ hns_roce_spin_unlock(&send_cq->hr_lock);
} else if (send_cq->cqn < recv_cq->cqn) {
- pthread_spin_unlock(&recv_cq->lock);
- pthread_spin_unlock(&send_cq->lock);
+ hns_roce_spin_unlock(&recv_cq->hr_lock);
+ hns_roce_spin_unlock(&send_cq->hr_lock);
} else {
- pthread_spin_unlock(&send_cq->lock);
- pthread_spin_unlock(&recv_cq->lock);
+ hns_roce_spin_unlock(&send_cq->hr_lock);
+ hns_roce_spin_unlock(&recv_cq->hr_lock);
}
} else if (send_cq) {
- pthread_spin_unlock(&send_cq->lock);
+ hns_roce_spin_unlock(&send_cq->hr_lock);
} else if (recv_cq) {
- pthread_spin_unlock(&recv_cq->lock);
+ hns_roce_spin_unlock(&recv_cq->hr_lock);
}
}
@@ -1811,11 +1811,11 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
if (attr->comp_mask)
return EINVAL;
- pthread_spin_lock(&cq->lock);
+ hns_roce_spin_lock(&cq->hr_lock);
err = hns_roce_poll_one(ctx, &qp, cq, NULL);
if (err != V2_CQ_OK)
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
return err;
}
@@ -1849,7 +1849,7 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current)
else
update_cq_db(ctx, cq);
- pthread_spin_unlock(&cq->lock);
+ hns_roce_spin_unlock(&cq->hr_lock);
}
static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index d503031..afde313 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -407,6 +407,11 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw)
return 0;
}
+enum {
+ CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS |
+ IBV_CQ_INIT_ATTR_MASK_PD,
+};
+
enum {
CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
IBV_WC_EX_WITH_CVLAN,
@@ -415,21 +420,47 @@ enum {
static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr,
struct hns_roce_context *context)
{
+ struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain);
+
if (!attr->cqe || attr->cqe > context->max_cqe)
return EINVAL;
- if (attr->comp_mask)
+ if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) {
+ verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n",
+ attr->comp_mask);
return EOPNOTSUPP;
+ }
if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS))
return EOPNOTSUPP;
+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) {
+ if (!pad) {
+ verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n");
+ return EINVAL;
+ }
+ atomic_fetch_add(&pad->pd.refcount, 1);
+ }
+
attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM,
roundup_pow_of_two(attr->cqe));
return 0;
}
+static int hns_roce_cq_spinlock_init(struct ibv_context *context,
+ struct hns_roce_cq *cq,
+ struct ibv_cq_init_attr_ex *attr)
+{
+ bool need_lock;
+
+ need_lock = hns_roce_whether_need_lock(attr->parent_domain);
+ if (!need_lock)
+ verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n");
+
+ return hns_roce_spinlock_init(&cq->hr_lock, need_lock);
+}
+
static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq)
{
int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size);
@@ -486,7 +517,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
goto err;
}
- ret = pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD)
+ cq->parent_domain = attr->parent_domain;
+
+ ret = hns_roce_cq_spinlock_init(context, cq, attr);
if (ret)
goto err_lock;
@@ -517,8 +551,9 @@ err_cmd:
hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB);
err_db:
hns_roce_free_buf(&cq->buf);
-err_lock:
err_buf:
+ hns_roce_spinlock_destroy(&cq->hr_lock);
+err_lock:
free(cq);
err:
if (ret < 0)
@@ -569,16 +604,23 @@ int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
int hns_roce_u_destroy_cq(struct ibv_cq *cq)
{
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
+ struct hns_roce_pad *pad = to_hr_pad(hr_cq->parent_domain);
int ret;
ret = ibv_cmd_destroy_cq(cq);
if (ret)
return ret;
- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db,
- HNS_ROCE_CQ_TYPE_DB);
- hns_roce_free_buf(&to_hr_cq(cq)->buf);
- free(to_hr_cq(cq));
+ hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db, HNS_ROCE_CQ_TYPE_DB);
+ hns_roce_free_buf(&hr_cq->buf);
+
+ hns_roce_spinlock_destroy(&hr_cq->hr_lock);
+
+ if (pad)
+ atomic_fetch_sub(&pad->pd.refcount, 1);
+
+ free(hr_cq);
return ret;
}
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/fundawang/rdma-core.git
git@gitee.com:fundawang/rdma-core.git
fundawang
rdma-core
rdma-core
master

搜索帮助