2 Star 0 Fork 0

zongy17/Baseline-recmat

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
test_case4.cpp 20.99 KB
一键复制 编辑 原始数据 按行查看 历史
zongy17 提交于 2024-06-10 20:37 . merge tests of SStructMG
#include "wrapper_sstructmg.h"
const idx_t num_diag = 7;
int main(int argc, char* argv[])
{
int my_pid, num_procs;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_pid);
MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
idx_t arg_cnt = 1;
const idx_t local_m = atoi(argv[arg_cnt++]),// 每个part内有一个box,该box有m*m*m个cell
np_per_dim = atoi(argv[arg_cnt++]),// 每个part内有一个box,该box有p*p*p个进程组成
glb_nparts = 2;
const std::string its_name = std::string(argv[arg_cnt++]),
prc_name = std::string(argv[arg_cnt++]);
std::string config_mg_file;
if (prc_name == "GMG") config_mg_file = std::string(argv[arg_cnt++]);
const idx_t ncells_per_dim = local_m * np_per_dim,
ncells_per_part = ncells_per_dim * ncells_per_dim * ncells_per_dim,
nprocs_per_part = np_per_dim * np_per_dim * np_per_dim;
assert(ncells_per_dim % 4 == 0);
assert(num_procs == nprocs_per_part * glb_nparts);
const idx_t my_part = my_pid / nprocs_per_part,
pid_in_part = my_pid - my_part * nprocs_per_part;
idx_t cart_ids[3];
{// 计算本进程在part内处于什么位置
cart_ids[0] = pid_in_part / (np_per_dim * np_per_dim);
cart_ids[1] = (pid_in_part - cart_ids[0] * np_per_dim * np_per_dim) / np_per_dim;
cart_ids[2] = (pid_in_part - cart_ids[0] * np_per_dim * np_per_dim - cart_ids[1] * np_per_dim);
}
idx_t my_ilower[3], my_iupper[3], box_ends[3];
idx_t my_nelems = 1;
for (idx_t d = 0; d < 3; d++) {
my_ilower[d] = cart_ids[d] * local_m;
my_iupper[d] = (cart_ids[d] + 1)* local_m - 1;
box_ends[d] = my_iupper[d] + 1;
my_nelems *= (my_iupper[d] - my_ilower[d] + 1);
}
#if LOAD_REBAL
#endif
#ifdef DEBUG_PRINT
for (int p = 0; p < num_procs; p++) {
if (my_pid == p) {
printf("Proc %d part %d: (%d,%d,%d)~(%d,%d,%d)\n", my_pid, my_part,
my_ilower[0], my_ilower[1], my_ilower[2],
my_iupper[0], my_iupper[1], my_iupper[2] );
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
#endif
if (my_pid == 0) {
printf("\033[1;35m Local M = %d P: %d Total #procs: %d Total #dof: %d \033[0m \n",
local_m, np_per_dim, num_procs, ncells_per_part * glb_nparts);
}
SStructMG_Init();
{
SStructGrid<idx_t> ssgrid(MPI_COMM_WORLD, 3, glb_nparts);
const idx_t halos[3] = {1, 1, 1};
{// 添加本进程所拥有的块和盒子
const idx_t blk_exts[3] = { ncells_per_dim, ncells_per_dim, ncells_per_dim };
const bool periods[3] = {false, false, false};
ssgrid.addBlock(my_part, blk_exts, periods);
ssgrid.addBox(my_part, my_ilower, box_ends);
}
ssgrid.assemble();
const idx_t my_nblks = ssgrid.my_blk_list.size(); assert(my_nblks == 1);
idx_t shalos[my_nblks * 3];
for (idx_t k = 0; k < my_nblks; k++) {
shalos[k * 3 + 0] = halos[0];
shalos[k * 3 + 1] = halos[1];
shalos[k * 3 + 2] = halos[2];
}
// 根据半结构网格建立向量
par_SstructVector<idx_t, data_t> b(ssgrid, shalos, true);
par_SstructVector<idx_t, data_t> x(b), y(b), x0(b);
{// 填入向量数据
// 右端项设为1.0
data_t * vec_buf = new data_t [my_nelems];
for (idx_t i = 0; i < my_nelems; i++)
vec_buf[i] = 1.0;
b.set_box_values(my_part, my_ilower, box_ends, vec_buf);
// 初始解向量
for (idx_t i0 = my_ilower[0]; i0 <= my_iupper[0]; i0++)
for (idx_t i1 = my_ilower[1]; i1 <= my_iupper[1]; i1++)
for (idx_t i2 = my_ilower[2]; i2 <= my_iupper[2]; i2++) {
const idx_t elem_id = ((i0 - my_ilower[0]) * (my_iupper[1] - my_ilower[1] + 1)
+ i1 - my_ilower[1] )* (my_iupper[2] - my_ilower[2] + 1)
+ i2 - my_ilower[2] ;
vec_buf[elem_id] = ((data_t)my_part + 1.0) * (0.0 + 1.0) * cos((data_t)(i0 + i1 + i2) / 10.0 );
}
x0.set_box_values(my_part, my_ilower, box_ends, vec_buf);
// 辅助向量
y.set_val(0.0, true);
delete [] vec_buf; vec_buf = nullptr;
}
const idx_t amr_stride = 2;
assert(amr_stride == 2);
const idx_t c_lbdr = ncells_per_dim / 4 - 1,// 原网格中边界处较下者
c_ubdr = ncells_per_dim / 4 * 3 ;// 原网格中边界处较上者
if (my_pid == 0) printf("lbdr/ubdr @ C: %d %d\n", c_lbdr, c_ubdr);
// 根据半结构网格建立矩阵
__int128_t masks[my_nblks]; for (idx_t k = 0; k < my_nblks; k++) masks[k] = stencil_mask_3d7;
par_SstructMatrix<idx_t, data_t, data_t> A(ssgrid, shalos, masks);
int proc_diag_nrows = my_nelems;
int proc_offd_nnz = 0;
{// 填入矩阵数据
// 首先处理对角块,也即结构化部分
data_t* mat_buf = new data_t [my_nelems * num_diag];
#pragma omp parallel for collapse(2) schedule(static)
for (idx_t i0 = my_ilower[0]; i0 <= my_iupper[0]; i0++)
for (idx_t i1 = my_ilower[1]; i1 <= my_iupper[1]; i1++)
for (idx_t i2 = my_ilower[2]; i2 <= my_iupper[2]; i2++) {
data_t* dst_ptr = mat_buf + (((i0 - my_ilower[0]) * (my_iupper[1] - my_ilower[1] + 1)
+ i1 - my_ilower[1] )* (my_iupper[2] - my_ilower[2] + 1)
+ i2 - my_ilower[2] ) * num_diag;
dst_ptr[0] = dst_ptr[1] = dst_ptr[2] = -1.0;
dst_ptr[3] = 6.0;
dst_ptr[4] = dst_ptr[5] = dst_ptr[6] = -1.0;
// Set boundary conditions
if (i0 == 0 ) dst_ptr[0] = 0.0;
if (i0 == ncells_per_dim - 1) dst_ptr[6] = 0.0;
if (i1 == 0 ) dst_ptr[1] = 0.0;
if (i1 == ncells_per_dim - 1) dst_ptr[5] = 0.0;
if (i2 == 0 ) dst_ptr[2] = 0.0;
if (i2 == ncells_per_dim - 1) dst_ptr[4] = 0.0;
if (my_part == 0) {// 如果本进程处于原网格
// Zero out dummy unknowns (原网格被加密的区域)
if (i0 > c_lbdr && i0 < c_ubdr && i1 > c_lbdr && i1 < c_ubdr && i2 > c_lbdr && i2 < c_ubdr) {
dst_ptr[3] = 1.0;
dst_ptr[0] = dst_ptr[1] = dst_ptr[2] = dst_ptr[4] = dst_ptr[5] = dst_ptr[6] = 0.0;
}
// Zero out off-diagonal stencil couplings at fine/coarse interface (原网格与被加密部分接壤的区域)
// And Fix diagonal entries at part boundaries
if (i1 > c_lbdr && i1 < c_ubdr && i2 > c_lbdr && i2 < c_ubdr) {
if (i0 == c_lbdr) { dst_ptr[6] = 0.0; dst_ptr[3] = 7.666666666666666; }
if (i0 == c_ubdr) { dst_ptr[0] = 0.0; dst_ptr[3] = 7.666666666666666; }
}
if (i0 > c_lbdr && i0 < c_ubdr && i2 > c_lbdr && i2 < c_ubdr) {
if (i1 == c_lbdr) { dst_ptr[5] = 0.0; dst_ptr[3] = 7.666666666666666; }
if (i1 == c_ubdr) { dst_ptr[1] = 0.0; dst_ptr[3] = 7.666666666666666; }
}
if (i0 > c_lbdr && i0 < c_ubdr && i1 > c_lbdr && i1 < c_ubdr) {
if (i2 == c_lbdr) { dst_ptr[4] = 0.0; dst_ptr[3] = 7.666666666666666; }
if (i2 == c_ubdr) { dst_ptr[2] = 0.0; dst_ptr[3] = 7.666666666666666; }
}
}
else {// 如果本进程处于细网格
idx_t cnt_at_fbdr = 0;
if (i0 == 0 ) cnt_at_fbdr ++;
if (i0 == ncells_per_dim - 1) cnt_at_fbdr ++;
if (i1 == 0 ) cnt_at_fbdr ++;
if (i1 == ncells_per_dim - 1) cnt_at_fbdr ++;
if (i2 == 0 ) cnt_at_fbdr ++;
if (i2 == ncells_per_dim - 1) cnt_at_fbdr ++;
assert(cnt_at_fbdr <= 3);
// if (cnt_at_fbdr > 0) printf("Proc %d (%d,%d,%d) cnt %d\n", my_pid, i0, i1, i2, cnt_at_fbdr);
if (cnt_at_fbdr == 1) dst_ptr[3] = 5.666666666666666;// Fine part 2D borders (即加密网格的6个面)
else if (cnt_at_fbdr == 2) dst_ptr[3] = 5.333333333333333;// Fine part 1D borders (即加密网格的12条棱)
else if (cnt_at_fbdr == 3) dst_ptr[3] = 5.0 ;// Fine part 0D borders (即加密网格的8个顶点)
}
}
A.set_box_values(my_part, my_ilower, box_ends, mat_buf);
delete [] mat_buf; mat_buf = nullptr;
// 然后处理非对角快,也即非结构部分
data_t offd_val = -0.666666666666666;// 添加上非stencil的非零元数值
A.init_offd();
std::vector< Entry<idx_t, data_t> > all_nnz;
if (my_part == 0) {// 原来的网格
idx_t ngb_part = 1;
for (idx_t d = 2; d >= 0; d--) {// 在三个方向逐次添加连接关系
if (my_ilower[d] <= c_lbdr && c_lbdr <= my_iupper[d]) {
idx_t bdr_ilower[3], bdr_iupper[3];// 确定原网格上的边界位置
for (idx_t _k = 0; _k < 3; _k++) {
if (_k == d) {
bdr_ilower[_k] = c_lbdr;
bdr_iupper[_k] = c_lbdr;
} else {
bdr_ilower[_k] = std::max(c_lbdr + 1, my_ilower[_k]);
bdr_iupper[_k] = std::min(c_ubdr - 1, my_iupper[_k]);
}
}
// 从粗往细看,自己的一个粗点对应4个细点
for (idx_t fngb = 0; fngb < 4; fngb ++) {
idx_t offset[3];
if (d == 0) {
offset[0] = __builtin_popcount(0 ); offset[1] = __builtin_popcount(fngb & 0b01); offset[2] = __builtin_popcount(fngb & 0b10);
} else if (d == 1) {
offset[0] = __builtin_popcount(fngb & 0b01); offset[1] = __builtin_popcount(0 ); offset[2] = __builtin_popcount(fngb & 0b10);
} else if (d == 2) {
offset[0] = __builtin_popcount(fngb & 0b01); offset[1] = __builtin_popcount(fngb & 0b10); offset[2] = __builtin_popcount(0 );
}
for (idx_t i2 = bdr_ilower[2]; i2 <= bdr_iupper[2]; i2++)
for (idx_t i1 = bdr_ilower[1]; i1 <= bdr_iupper[1]; i1++)
for (idx_t i0 = bdr_ilower[0]; i0 <= bdr_iupper[0]; i0++) {
StructDescriptor<idx_t> row, col;
row.id = my_part;
row.index[0] = i0; row.index[1] = i1; row.index[2] = i2;
col.id = ngb_part;
for (idx_t _k = 0; _k < 3; _k++) {
if (_k == d) col.index[_k] = 0;// 注意此时对应加密网格的下边界
else col.index[_k] = (row.index[_k] - (c_lbdr + 1)) * amr_stride + offset[_k];
}
// printf("index: [%d](%d, %d, %d) - to_index: [%d](%d, %d, %d)\n",
// row.id, row.index[0], row.index[1], row.index[2],
// col.id, col.index[0], col.index[1], col.index[2]);
all_nnz.push_back(Entry<idx_t, data_t>(row, col, offd_val));
}
}
}// if lower bdr included
if (my_ilower[d] <= c_ubdr && c_ubdr <= my_iupper[d]) {
idx_t bdr_ilower[3], bdr_iupper[3];
for (idx_t _k = 0; _k < 3; _k++) {
if (_k == d) {
bdr_ilower[_k] = c_ubdr;
bdr_iupper[_k] = c_ubdr;
} else {
bdr_ilower[_k] = std::max(c_lbdr + 1, my_ilower[_k]);
bdr_iupper[_k] = std::min(c_ubdr - 1, my_iupper[_k]);
}
}
// 从粗往细看,自己的一个粗点对应4个细点
for (idx_t fngb = 0; fngb < 4; fngb ++) {
idx_t offset[3];
if (d == 0) {
offset[0] = __builtin_popcount(0 ); offset[1] = __builtin_popcount(fngb & 0b01); offset[2] = __builtin_popcount(fngb & 0b10);
} else if (d == 1) {
offset[0] = __builtin_popcount(fngb & 0b01); offset[1] = __builtin_popcount(0 ); offset[2] = __builtin_popcount(fngb & 0b10);
} else if (d == 2) {
offset[0] = __builtin_popcount(fngb & 0b01); offset[1] = __builtin_popcount(fngb & 0b10); offset[2] = __builtin_popcount(0 );
}
for (idx_t i2 = bdr_ilower[2]; i2 <= bdr_iupper[2]; i2++)
for (idx_t i1 = bdr_ilower[1]; i1 <= bdr_iupper[1]; i1++)
for (idx_t i0 = bdr_ilower[0]; i0 <= bdr_iupper[0]; i0++) {
StructDescriptor<idx_t> row, col;
row.id = my_part;
row.index[0] = i0; row.index[1] = i1; row.index[2] = i2;
col.id = ngb_part;
for (idx_t _k = 0; _k < 3; _k++) {
if (_k == d) col.index[_k] = ncells_per_dim - 1;// 注意此时对应加密网格的下边界
else col.index[_k] = (row.index[_k] - (c_lbdr + 1)) * amr_stride + offset[_k];
}
// printf("index: [%d](%d, %d, %d) - to_index: [%d](%d, %d, %d)\n",
// row.id, row.index[0], row.index[1], row.index[2],
// col.id, col.index[0], col.index[1], col.index[2]);
all_nnz.push_back(Entry<idx_t, data_t>(row, col, offd_val));
}
}
}// if upper bdr included
}
}
MPI_Barrier(MPI_COMM_WORLD);
if (my_part == 1) {// AMR加密后的网格
idx_t ngb_part = 0;
for (idx_t d = 2; d >= 0; d--) {// 在三个方向逐次添加连接关系
if (my_ilower[d] == 0) {
assert(cart_ids[d] == 0);
idx_t bdr_ilower[3], bdr_iupper[3];// 确定加密网格上的边界位置
for (idx_t _k = 0; _k < 3; _k++) {
bdr_ilower[_k] = my_ilower[_k];
if (_k == d) bdr_iupper[_k] = my_ilower[_k];
else bdr_iupper[_k] = my_iupper[_k];
}
// 从细往粗看,要知道自己这个细点对应哪个粗点
for (idx_t i2 = bdr_ilower[2]; i2 <= bdr_iupper[2]; i2++)
for (idx_t i1 = bdr_ilower[1]; i1 <= bdr_iupper[1]; i1++)
for (idx_t i0 = bdr_ilower[0]; i0 <= bdr_iupper[0]; i0++) {
StructDescriptor<idx_t> row, col;
row.id = my_part;
row.index[0] = i0; row.index[1] = i1; row.index[2] = i2;
col.id = ngb_part;
for (idx_t _k = 0; _k < 3; _k++) {
if (_k == d) col.index[_k] = c_lbdr;// 注意此时对应原网格上与加密网格的下边界
else col.index[_k] = row.index[_k] / amr_stride + c_lbdr + 1;
}
// printf("index: [%d](%d, %d, %d) - to_index: [%d](%d, %d, %d)\n",
// row.id, row.index[0], row.index[1], row.index[2],
// col.id, col.index[0], col.index[1], col.index[2]);
all_nnz.push_back(Entry<idx_t, data_t>(row, col, offd_val));
}
}
if (my_iupper[d] == ncells_per_dim - 1) {
assert(cart_ids[d] == np_per_dim - 1);
idx_t bdr_ilower[3], bdr_iupper[3];
for (idx_t _k = 0; _k < 3; _k++) {
bdr_iupper[_k] = my_iupper[_k];
if (_k == d) bdr_ilower[_k] = my_iupper[_k];
else bdr_ilower[_k] = my_ilower[_k];
}
for (idx_t i2 = bdr_ilower[2]; i2 <= bdr_iupper[2]; i2++)
for (idx_t i1 = bdr_ilower[1]; i1 <= bdr_iupper[1]; i1++)
for (idx_t i0 = bdr_ilower[0]; i0 <= bdr_iupper[0]; i0++) {
StructDescriptor<idx_t> row, col;
row.id = my_part;
row.index[0] = i0; row.index[1] = i1; row.index[2] = i2;
col.id = ngb_part;
for (idx_t _k = 0; _k < 3; _k++) {
if (_k == d) col.index[_k] = c_ubdr;// 注意此时对应原网格上与加密网格的上边界
else col.index[_k] = row.index[_k] / amr_stride + c_lbdr + 1;
}
// printf("index: [%d](%d, %d, %d) - to_index: [%d](%d, %d, %d)\n",
// row.id, row.index[0], row.index[1], row.index[2],
// col.id, col.index[0], col.index[1], col.index[2]);
all_nnz.push_back(Entry<idx_t, data_t>(row, col, offd_val));
}
}
}
}
A.add_entries(all_nnz.size(), all_nnz.data());
proc_offd_nnz = all_nnz.size();
}
A.assemble();
// for (int p = 0; p < num_procs; p++) {
// if (p == my_pid) {
// printf("Proc %4d Diag nrows %8d Offd nnz %8d\n", p, proc_diag_nrows, proc_offd_nnz);
// fflush(stdout);
// }
// MPI_Barrier(MPI_COMM_WORLD);
// }
const int test_cnt = 10;
std::vector<TEST_RECORD> records;
for (int test = 0; test < test_cnt; test++) {
vec_copy(x0, x);
double ck = vec_dot<idx_t, data_t, double>(b, b);
if (my_pid == 0) printf("( b, b) = %.15e\n", ck);
ck = vec_dot<idx_t, data_t, double>(x, x);
if (my_pid == 0) printf("( x, x) = %.15e\n", ck);
A.Mult(b, y, false);
ck = vec_dot<idx_t, data_t, double>(y, y);
if (my_pid == 0) printf("(A*b, A*b) = %.15e\n", ck);
A.Mult(x, y, false);
ck = vec_dot<idx_t, data_t, double>(y, y);
if (my_pid == 0) printf("(A*x, A*x) = %.15e\n", ck);
// 用A*x作为右端项b
vec_copy(y, b);
// 初始解设为0
x.set_val(0.0, true);
TEST_CONFIG config;
config.my_nblks = my_nblks;
config.config_mg_file = config_mg_file;
config.ssgrid_ptr = & ssgrid;
config.rtol = 1.0e-7;
TEST_RECORD rec;
buildup_solver(its_name, prc_name, config);
setup_and_solve(A, b, x, rec);
data_t true_r_norm, b_norm;
check_residual(A, x, b, y, true_r_norm, b_norm);
if (my_pid == 0) {
printf("\033[1;35mtrue ||r|| = %20.16e ||r||/||b||= %20.16e\033[0m\n",
true_r_norm, true_r_norm / b_norm);
printf("Proc %d Setup, Solve costs %.6f %.6f s\n",
my_pid, rec.setup, rec.solve);
}
stat_part_times(rec);
records.push_back(rec);
destroy_solver();
}// test loop
if (my_pid == 0) {// 输出最佳时间
stat_multi_runs(records);
}
}// all Semi-** object must be destroyed before MPI_Finalize()
SStructMG_Finalize();
MPI_Finalize();
return 0;
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/zongy17/baseline-recmat.git
git@gitee.com:zongy17/baseline-recmat.git
zongy17
baseline-recmat
Baseline-recmat
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385