From 410a4fcb7228b0778ecb73d54de9beab98ca5b94 Mon Sep 17 00:00:00 2001 From: wang-shihao21 Date: Mon, 24 Jun 2024 15:56:08 +0800 Subject: [PATCH] =?UTF-8?q?ads=E4=BB=93=E6=94=B9=E5=90=8D=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 28 +- MANIFEST.in | 2 +- README.md | 64 +-- Third_Party_Open_Source__Software_Notice | 2 +- ci/build.sh | 10 +- ci/cov.sh | 2 +- cmake/config.cmake | 2 +- cmake/func.cmake | 8 +- docs/api/README.md | 82 ++-- {ads => mx_driving}/__init__.py | 0 {ads => mx_driving}/common/CMakeLists.txt | 0 {ads => mx_driving}/common/__init__.py | 0 .../common/components/README.md | 0 {ads => mx_driving}/common/ops/__init__.py | 0 .../common/ops/csrc/DynamicScatter.cpp | 0 .../common/ops/csrc/DynamicVoxelization.cpp | 0 .../common/ops/csrc/FurthestPointSampling.cpp | 0 .../csrc/FurthestPointSamplingWithDist.cpp | 0 {ads => mx_driving}/common/ops/csrc/Knn.cpp | 0 .../csrc/MultiScaleDeformableAttnFunction.cpp | 0 {ads => mx_driving}/common/ops/csrc/Nms3d.cpp | 0 .../common/ops/csrc/Nms3dNormal.cpp | 64 +-- .../common/ops/csrc/PointsInBox.cpp | 0 {ads => mx_driving}/common/ops/csrc/README.md | 0 .../common/ops/csrc/RoipointPool3dForward.cpp | 0 .../common/ops/csrc/RotatedIou.cpp | 0 .../common/ops/csrc/RotatedOverlaps.cpp | 0 .../common/ops/csrc/ScatterMax.cpp | 0 .../common/ops/csrc/ScatterMeanGrad.cpp | 0 .../common/ops/csrc/ThreeInterpolate.cpp | 0 .../common/ops/csrc/VoxelPoolingTrain.cpp | 0 .../common/ops/csrc/functions.h | 0 .../common/ops/csrc/pybind.cpp | 0 .../common/ops/dynamic_voxelization.py | 0 .../common/ops/furthest_point_sampling.py | 0 .../ops/furthest_point_sampling_with_dist.py | 0 .../common/ops/kernels/CMakeLists.txt | 0 .../common/ops/kernels/README.md | 0 .../common/ops/kernels/inc/base.h | 0 .../common/ops/kernels/op_host/CMakeLists.txt | 0 .../common/ops/kernels/op_host/common.h | 0 .../ops/kernels/op_host/dynamic_scatter.cpp | 0 .../kernels/op_host/dynamic_scatter_grad.cpp | 396 +++++++++--------- .../op_host/dynamic_scatter_grad_tiling.h | 146 +++---- .../kernels/op_host/dynamic_scatter_tiling.h | 0 .../kernels/op_host/dynamic_voxelization.cpp | 0 .../op_host/dynamic_voxelization_tiling.h | 0 .../op_host/furthest_point_sampling.cpp | 0 .../op_host/furthest_point_sampling_tiling.h | 0 .../furthest_point_sampling_with_dist.cpp | 0 ...furthest_point_sampling_with_dist_tiling.h | 0 .../op_host/gather_nms3d_mask_tiling.cpp | 138 +++--- .../op_host/gather_nms3d_mask_tiling.h | 36 +- .../common/ops/kernels/op_host/knn.cpp | 0 .../common/ops/kernels/op_host/knn_tiling.h | 0 .../op_host/multi_scale_deformable_attn.cpp | 0 .../multi_scale_deformable_attn_grad.cpp | 0 .../multi_scale_deformable_attn_grad_tiling.h | 0 ...lti_scale_deformable_attn_grad_tiling_v2.h | 0 .../multi_scale_deformable_attn_grad_v2.cpp | 0 .../multi_scale_deformable_attn_tiling.h | 0 .../common/ops/kernels/op_host/nms3d.cpp | 0 .../kernels/op_host/nms3d_normal_tiling.cpp | 226 +++++----- .../ops/kernels/op_host/nms3d_normal_tiling.h | 48 +-- .../common/ops/kernels/op_host/nms3d_tiling.h | 0 .../ops/kernels/op_host/points_in_box.cpp | 0 .../kernels/op_host/points_in_box_tiling.h | 0 .../op_host/roipoint_pool3d_forward.cpp | 0 .../op_host/roipoint_pool3d_forward_tiling.h | 0 .../op_host/scatter_max_with_argmax_v2.cpp | 0 .../op_host/scatter_max_with_argmax_v2.h | 0 .../ops/kernels/op_host/scatter_mean_grad.cpp | 0 .../op_host/scatter_mean_grad_tiling.h | 0 .../kernels/op_host/voxel_pooling_train.cpp | 0 .../op_host/voxel_pooling_train_grad.cpp | 0 .../op_host/voxel_pooling_train_grad_tiling.h | 0 .../op_host/voxel_pooling_train_tiling.h | 0 .../ops/kernels/op_kernel/CMakeLists.txt | 0 .../ops/kernels/op_kernel/dynamic_scatter.cpp | 0 .../kernels/op_kernel/dynamic_scatter_base.h | 366 ++++++++-------- .../op_kernel/dynamic_scatter_grad.cpp | 56 +-- .../op_kernel/dynamic_scatter_grad_base.h | 332 +++++++-------- .../op_kernel/dynamic_scatter_grad_max.h | 180 ++++---- .../op_kernel/dynamic_scatter_grad_mean.h | 138 +++--- .../op_kernel/dynamic_scatter_grad_sum.h | 114 ++--- .../kernels/op_kernel/dynamic_scatter_max.h | 268 ++++++------ .../kernels/op_kernel/dynamic_scatter_mean.h | 144 +++---- .../kernels/op_kernel/dynamic_scatter_sum.h | 120 +++--- .../op_kernel/dynamic_voxelization.cpp | 0 .../op_kernel/furthest_point_sampling.cpp | 0 .../op_kernel/furthest_point_sampling.h | 0 .../furthest_point_sampling_with_dist.cpp | 0 .../kernels/op_kernel/gather_nms3d_mask.cpp | 228 +++++----- .../common/ops/kernels/op_kernel/knn.cpp | 0 .../common/ops/kernels/op_kernel/knn.h | 0 .../op_kernel/ms_deform_attn_grad_generic.h | 0 .../ms_deform_attn_grad_generic_v2.h | 0 .../op_kernel/ms_deform_attn_grad_high_perf.h | 0 .../ms_deform_attn_grad_high_perf_v2.h | 0 .../op_kernel/multi_scale_deformable_attn.cpp | 0 .../multi_scale_deformable_attn_grad.cpp | 0 .../multi_scale_deformable_attn_grad_v2.cpp | 0 .../common/ops/kernels/op_kernel/nms3d.cpp | 0 .../ops/kernels/op_kernel/nms3d_normal.cpp | 316 +++++++------- .../ops/kernels/op_kernel/points_in_box.cpp | 0 .../op_kernel/roipoint_pool3d_forward.cpp | 0 .../op_kernel/scatter_max_with_argmax_v2.cpp | 0 .../kernels/op_kernel/scatter_mean_grad.cpp | 0 .../ops/kernels/op_kernel/scatter_mean_grad.h | 0 .../op_kernel/scatter_mean_grad_base.h | 0 .../op_kernel/scatter_mean_grad_line.h | 0 .../kernels/op_kernel/voxel_pooling_train.cpp | 0 .../kernels/op_kernel/voxel_pooling_train.h | 0 .../op_kernel/voxel_pooling_train_grad.cpp | 0 {ads => mx_driving}/common/ops/knn.py | 0 .../common/ops/nms3d_normal.py | 54 +-- .../common/ops/npu_dynamic_scatter.py | 0 ...pu_multi_scale_deformable_attn_function.py | 0 {ads => mx_driving}/common/ops/npu_nms3d.py | 0 .../common/ops/npu_points_in_box.py | 0 .../common/ops/npu_roipoint_pool3d.py | 0 .../common/ops/npu_scatter_mean_grad.py | 0 .../common/ops/onnx/__init__.py | 0 .../common/ops/onnx/plugin/CMakeLists.txt | 0 .../onnx_multi_scale_deformable_attn.cpp | 0 .../common/ops/onnx/wrapper_onnx_ops.py | 4 +- {ads => mx_driving}/common/ops/rotated_iou.py | 0 .../common/ops/rotated_overlaps.py | 0 {ads => mx_driving}/common/ops/scatter_max.py | 0 {ads => mx_driving}/common/ops/threeNN.py | 0 .../common/ops/three_interpolate.py | 0 .../common/ops/voxel_pooling_train.py | 0 {ads => mx_driving}/motion/CMakeLists.txt | 0 {ads => mx_driving}/motion/__init__.py | 0 .../motion/components/README.md | 0 {ads => mx_driving}/motion/ops/csrc/README.md | 0 .../motion/ops/csrc/pybind.cpp | 0 .../motion/ops/kernels/CMakeLists.txt | 0 .../motion/ops/kernels/README.md | 0 .../ops/kernels/framework/CMakeLists.txt | 0 .../motion/ops/kernels/op_host/CMakeLists.txt | 0 .../ops/kernels/op_kernel/CMakeLists.txt | 0 {ads => mx_driving}/perception/CMakeLists.txt | 0 {ads => mx_driving}/perception/__init__.py | 0 .../perception/fused/__init__.py | 0 .../perception/fused/components/README.md | 0 .../perception/fused/ops/__init__.py | 0 .../perception/fused/ops/bev_pool.py | 2 +- .../perception/fused/ops/bev_pool_v2.py | 2 +- .../perception/fused/ops/csrc/BEVPool.cpp | 0 .../fused/ops/csrc/BEVPoolBackward.cpp | 0 .../perception/fused/ops/csrc/BEVPoolV2.cpp | 0 .../fused/ops/csrc/BEVPoolV2Backward.cpp | 0 .../perception/fused/ops/csrc/README.md | 0 .../perception/fused/ops/csrc/functions.h | 0 .../perception/fused/ops/csrc/pybind.cpp | 0 .../fused/ops/kernels/CMakeLists.txt | 0 .../perception/fused/ops/kernels/README.md | 0 .../fused/ops/kernels/op_host/CMakeLists.txt | 0 .../fused/ops/kernels/op_host/bev_pool.cpp | 0 .../ops/kernels/op_host/bev_pool_tiling.h | 0 .../ops/kernels/op_kernel/CMakeLists.txt | 0 .../fused/ops/kernels/op_kernel/bev_pool.cpp | 0 .../fused/ops/kernels/op_kernel/bev_pool.h | 0 .../ops/kernels/op_kernel/bev_pool_grad.cpp | 0 .../ops/kernels/op_kernel/bev_pool_v2.cpp | 0 .../fused/ops/kernels/op_kernel/bev_pool_v2.h | 0 .../kernels/op_kernel/bev_pool_v2_grad.cpp | 0 .../fused/ops/kernels/op_kernel/common.h | 0 .../perception/point/__init__.py | 0 .../perception/point/components/README.md | 0 .../perception/point/ops/__init__.py | 0 .../perception/point/ops/csrc/GroupPoints.cpp | 0 .../point/ops/csrc/PointToVoxel.cpp | 0 .../perception/point/ops/csrc/README.md | 0 .../perception/point/ops/csrc/UniqueVoxel.cpp | 0 .../point/ops/csrc/VecPoolBackward.cpp | 0 .../point/ops/csrc/VoxelToPoint.cpp | 0 .../perception/point/ops/csrc/functions.h | 0 .../perception/point/ops/csrc/pybind.cpp | 0 .../perception/point/ops/group_points.py | 0 .../point/ops/kernels/CMakeLists.txt | 0 .../perception/point/ops/kernels/README.md | 0 .../point/ops/kernels/op_host/CMakeLists.txt | 0 .../ops/kernels/op_host/group_points_grad.cpp | 0 .../op_host/group_points_grad_tiling.h | 0 .../ops/kernels/op_host/point_to_voxel.cpp | 0 .../kernels/op_host/point_to_voxel_tiling.h | 0 .../ops/kernels/op_host/unique_voxel.cpp | 0 .../ops/kernels/op_host/unique_voxel_tiling.h | 0 .../ops/kernels/op_host/vec_pool_grad.cpp | 0 .../kernels/op_host/vec_pool_grad_tiling.h | 0 .../ops/kernels/op_kernel/CMakeLists.txt | 0 .../kernels/op_kernel/group_points_grad.cpp | 0 .../ops/kernels/op_kernel/point_to_voxel.cpp | 0 .../ops/kernels/op_kernel/unique_voxel.cpp | 0 .../ops/kernels/op_kernel/vec_pool_grad.cpp | 0 .../ops/kernels/op_kernel/voxel_to_point.cpp | 0 .../perception/vision/__init__.py | 0 .../perception/vision/components/README.md | 0 .../perception/vision/ops/__init__.py | 0 .../vision/ops/boxes_overlap_bev.py | 0 .../vision/ops/csrc/BoxesOverlapBev.cpp | 0 .../perception/vision/ops/csrc/README.md | 0 .../perception/vision/ops/csrc/functions.h | 0 .../perception/vision/ops/csrc/pybind.cpp | 0 .../vision/ops/kernels/CMakeLists.txt | 0 .../perception/vision/ops/kernels/README.md | 0 .../vision/ops/kernels/op_host/CMakeLists.txt | 0 .../ops/kernels/op_host/boxes_overlap_bev.cpp | 0 .../op_host/boxes_overlap_bev_tiling.h | 0 .../ops/kernels/op_kernel/CMakeLists.txt | 0 .../kernels/op_kernel/boxes_overlap_bev.cpp | 0 {ads => mx_driving}/spconv/CMakeLists.txt | 0 {ads => mx_driving}/spconv/__init__.py | 0 {ads => mx_driving}/spconv/ops/__init__.py | 0 .../spconv/ops/csrc/MultiToSparse.cpp | 0 {ads => mx_driving}/spconv/ops/csrc/README.md | 0 .../spconv/ops/csrc/SparseConv3d.cpp | 0 .../spconv/ops/csrc/SparseConv3dGrad.cpp | 0 .../spconv/ops/csrc/SubmSparseCov3d.cpp | 0 .../spconv/ops/csrc/functions.h | 0 .../spconv/ops/csrc/pybind.cpp | 0 .../spconv/ops/kernels/CMakeLists.txt | 0 .../spconv/ops/kernels/README.md | 0 .../spconv/ops/kernels/op_host/CMakeLists.txt | 0 .../ops/kernels/op_host/sparse_conv3d.cpp | 0 .../kernels/op_host/sparse_conv3d_grad.cpp | 0 .../op_host/sparse_conv3d_grad_tiling.h | 0 .../kernels/op_host/sparse_conv3d_tiling.h | 0 .../op_host/subm_sparse_conv3d_tiling.cpp | 0 .../op_host/subm_sparse_conv3d_tiling.h | 0 .../spconv/ops/kernels/op_host/to_sparse.cpp | 0 .../ops/kernels/op_host/to_sparse_tiling.h | 0 .../ops/kernels/op_kernel/CMakeLists.txt | 0 .../ops/kernels/op_kernel/sparse_conv3d.cpp | 0 .../kernels/op_kernel/sparse_conv3d_grad.cpp | 0 .../kernels/op_kernel/subm_sparse_conv3d.cpp | 0 .../ops/kernels/op_kernel/to_sparse.cpp | 0 {ads => mx_driving}/spconv/ops/sparse_conv.py | 0 .../spconv/ops/sparse_functional.py | 0 .../spconv/ops/sparse_modules.py | 2 +- {ads => mx_driving}/spconv/ops/sparse_ops.py | 0 .../spconv/ops/sparse_structure.py | 0 setup.py | 6 +- tests/onnx/test_wrapper_onnx_ops.py | 2 +- tests/torch/test_bev_pool.py | 2 +- tests/torch/test_bev_pool_v2.py | 2 +- tests/torch/test_boxes_overlap_bev.py | 4 +- .../test_furthest_point_sample_with_dist.py | 4 +- tests/torch/test_furthest_point_sampling.py | 4 +- tests/torch/test_knn.py | 6 +- ...st_multi_scale_deformable_attn_function.py | 4 +- tests/torch/test_npu_dyn_voxelization.py | 4 +- tests/torch/test_npu_dynamic_scatter.py | 6 +- tests/torch/test_npu_nms3d.py | 4 +- tests/torch/test_npu_nms3d_normal.py | 6 +- tests/torch/test_npu_scatter_mean_grad.py | 4 +- tests/torch/test_points_in_box.py | 12 +- tests/torch/test_roipoint_pool3d.py | 2 +- tests/torch/test_rotated_iou.py | 4 +- tests/torch/test_scatter_max.py | 4 +- tests/torch/test_sparse_conv3d.py | 2 +- tests/torch/test_subm_sparse_conv3d.py | 2 +- tests/torch/test_three_interpolate.py | 4 +- tests/torch/test_three_nn.py | 10 +- tests/torch/test_voxel_pooling_train.py | 4 +- 267 files changed, 1841 insertions(+), 1841 deletions(-) rename {ads => mx_driving}/__init__.py (100%) rename {ads => mx_driving}/common/CMakeLists.txt (100%) rename {ads => mx_driving}/common/__init__.py (100%) rename {ads => mx_driving}/common/components/README.md (100%) rename {ads => mx_driving}/common/ops/__init__.py (100%) rename {ads => mx_driving}/common/ops/csrc/DynamicScatter.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/DynamicVoxelization.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/FurthestPointSampling.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/FurthestPointSamplingWithDist.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/Knn.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/MultiScaleDeformableAttnFunction.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/Nms3d.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/Nms3dNormal.cpp (97%) rename {ads => mx_driving}/common/ops/csrc/PointsInBox.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/README.md (100%) rename {ads => mx_driving}/common/ops/csrc/RoipointPool3dForward.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/RotatedIou.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/RotatedOverlaps.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/ScatterMax.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/ScatterMeanGrad.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/ThreeInterpolate.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/VoxelPoolingTrain.cpp (100%) rename {ads => mx_driving}/common/ops/csrc/functions.h (100%) rename {ads => mx_driving}/common/ops/csrc/pybind.cpp (100%) rename {ads => mx_driving}/common/ops/dynamic_voxelization.py (100%) rename {ads => mx_driving}/common/ops/furthest_point_sampling.py (100%) rename {ads => mx_driving}/common/ops/furthest_point_sampling_with_dist.py (100%) rename {ads => mx_driving}/common/ops/kernels/CMakeLists.txt (100%) rename {ads => mx_driving}/common/ops/kernels/README.md (100%) rename {ads => mx_driving}/common/ops/kernels/inc/base.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/CMakeLists.txt (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/common.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/dynamic_scatter.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/dynamic_scatter_grad.cpp (97%) rename {ads => mx_driving}/common/ops/kernels/op_host/dynamic_scatter_grad_tiling.h (95%) rename {ads => mx_driving}/common/ops/kernels/op_host/dynamic_scatter_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/dynamic_voxelization.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/dynamic_voxelization_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/furthest_point_sampling.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/furthest_point_sampling_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp (96%) rename {ads => mx_driving}/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h (96%) rename {ads => mx_driving}/common/ops/kernels/op_host/knn.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/knn_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/multi_scale_deformable_attn.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/nms3d.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/nms3d_normal_tiling.cpp (97%) rename {ads => mx_driving}/common/ops/kernels/op_host/nms3d_normal_tiling.h (97%) rename {ads => mx_driving}/common/ops/kernels/op_host/nms3d_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/points_in_box.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/points_in_box_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/roipoint_pool3d_forward.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/scatter_max_with_argmax_v2.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/scatter_max_with_argmax_v2.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/scatter_mean_grad.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/scatter_mean_grad_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/voxel_pooling_train.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/voxel_pooling_train_grad.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_host/voxel_pooling_train_tiling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/CMakeLists.txt (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter_base.h (97%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter_grad.cpp (97%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter_grad_base.h (97%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter_grad_max.h (96%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h (96%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h (96%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter_max.h (97%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter_mean.h (96%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_scatter_sum.h (96%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/dynamic_voxelization.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/furthest_point_sampling.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/furthest_point_sampling.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp (97%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/knn.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/knn.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/nms3d.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/nms3d_normal.cpp (97%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/points_in_box.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/scatter_max_with_argmax_v2.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/scatter_mean_grad.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/scatter_mean_grad.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/scatter_mean_grad_base.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/scatter_mean_grad_line.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/voxel_pooling_train.cpp (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/voxel_pooling_train.h (100%) rename {ads => mx_driving}/common/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp (100%) rename {ads => mx_driving}/common/ops/knn.py (100%) rename {ads => mx_driving}/common/ops/nms3d_normal.py (96%) rename {ads => mx_driving}/common/ops/npu_dynamic_scatter.py (100%) rename {ads => mx_driving}/common/ops/npu_multi_scale_deformable_attn_function.py (100%) rename {ads => mx_driving}/common/ops/npu_nms3d.py (100%) rename {ads => mx_driving}/common/ops/npu_points_in_box.py (100%) rename {ads => mx_driving}/common/ops/npu_roipoint_pool3d.py (100%) rename {ads => mx_driving}/common/ops/npu_scatter_mean_grad.py (100%) rename {ads => mx_driving}/common/ops/onnx/__init__.py (100%) rename {ads => mx_driving}/common/ops/onnx/plugin/CMakeLists.txt (100%) rename {ads => mx_driving}/common/ops/onnx/plugin/onnx_multi_scale_deformable_attn.cpp (100%) rename {ads => mx_driving}/common/ops/onnx/wrapper_onnx_ops.py (86%) rename {ads => mx_driving}/common/ops/rotated_iou.py (100%) rename {ads => mx_driving}/common/ops/rotated_overlaps.py (100%) rename {ads => mx_driving}/common/ops/scatter_max.py (100%) rename {ads => mx_driving}/common/ops/threeNN.py (100%) rename {ads => mx_driving}/common/ops/three_interpolate.py (100%) rename {ads => mx_driving}/common/ops/voxel_pooling_train.py (100%) rename {ads => mx_driving}/motion/CMakeLists.txt (100%) rename {ads => mx_driving}/motion/__init__.py (100%) rename {ads => mx_driving}/motion/components/README.md (100%) rename {ads => mx_driving}/motion/ops/csrc/README.md (100%) rename {ads => mx_driving}/motion/ops/csrc/pybind.cpp (100%) rename {ads => mx_driving}/motion/ops/kernels/CMakeLists.txt (100%) rename {ads => mx_driving}/motion/ops/kernels/README.md (100%) rename {ads => mx_driving}/motion/ops/kernels/framework/CMakeLists.txt (100%) rename {ads => mx_driving}/motion/ops/kernels/op_host/CMakeLists.txt (100%) rename {ads => mx_driving}/motion/ops/kernels/op_kernel/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/__init__.py (100%) rename {ads => mx_driving}/perception/fused/__init__.py (100%) rename {ads => mx_driving}/perception/fused/components/README.md (100%) rename {ads => mx_driving}/perception/fused/ops/__init__.py (100%) rename {ads => mx_driving}/perception/fused/ops/bev_pool.py (98%) rename {ads => mx_driving}/perception/fused/ops/bev_pool_v2.py (98%) rename {ads => mx_driving}/perception/fused/ops/csrc/BEVPool.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/csrc/BEVPoolBackward.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/csrc/BEVPoolV2.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/csrc/BEVPoolV2Backward.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/csrc/README.md (100%) rename {ads => mx_driving}/perception/fused/ops/csrc/functions.h (100%) rename {ads => mx_driving}/perception/fused/ops/csrc/pybind.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/README.md (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_host/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_host/bev_pool.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_host/bev_pool_tiling.h (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_kernel/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_kernel/bev_pool.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_kernel/bev_pool.h (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_kernel/bev_pool_grad.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_kernel/bev_pool_v2.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_kernel/bev_pool_v2.h (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_kernel/bev_pool_v2_grad.cpp (100%) rename {ads => mx_driving}/perception/fused/ops/kernels/op_kernel/common.h (100%) rename {ads => mx_driving}/perception/point/__init__.py (100%) rename {ads => mx_driving}/perception/point/components/README.md (100%) rename {ads => mx_driving}/perception/point/ops/__init__.py (100%) rename {ads => mx_driving}/perception/point/ops/csrc/GroupPoints.cpp (100%) rename {ads => mx_driving}/perception/point/ops/csrc/PointToVoxel.cpp (100%) rename {ads => mx_driving}/perception/point/ops/csrc/README.md (100%) rename {ads => mx_driving}/perception/point/ops/csrc/UniqueVoxel.cpp (100%) rename {ads => mx_driving}/perception/point/ops/csrc/VecPoolBackward.cpp (100%) rename {ads => mx_driving}/perception/point/ops/csrc/VoxelToPoint.cpp (100%) rename {ads => mx_driving}/perception/point/ops/csrc/functions.h (100%) rename {ads => mx_driving}/perception/point/ops/csrc/pybind.cpp (100%) rename {ads => mx_driving}/perception/point/ops/group_points.py (100%) rename {ads => mx_driving}/perception/point/ops/kernels/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/point/ops/kernels/README.md (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_host/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_host/group_points_grad.cpp (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_host/group_points_grad_tiling.h (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_host/point_to_voxel.cpp (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_host/point_to_voxel_tiling.h (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_host/unique_voxel.cpp (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_host/unique_voxel_tiling.h (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_host/vec_pool_grad.cpp (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_host/vec_pool_grad_tiling.h (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_kernel/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_kernel/group_points_grad.cpp (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_kernel/point_to_voxel.cpp (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_kernel/unique_voxel.cpp (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_kernel/vec_pool_grad.cpp (100%) rename {ads => mx_driving}/perception/point/ops/kernels/op_kernel/voxel_to_point.cpp (100%) rename {ads => mx_driving}/perception/vision/__init__.py (100%) rename {ads => mx_driving}/perception/vision/components/README.md (100%) rename {ads => mx_driving}/perception/vision/ops/__init__.py (100%) rename {ads => mx_driving}/perception/vision/ops/boxes_overlap_bev.py (100%) rename {ads => mx_driving}/perception/vision/ops/csrc/BoxesOverlapBev.cpp (100%) rename {ads => mx_driving}/perception/vision/ops/csrc/README.md (100%) rename {ads => mx_driving}/perception/vision/ops/csrc/functions.h (100%) rename {ads => mx_driving}/perception/vision/ops/csrc/pybind.cpp (100%) rename {ads => mx_driving}/perception/vision/ops/kernels/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/vision/ops/kernels/README.md (100%) rename {ads => mx_driving}/perception/vision/ops/kernels/op_host/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/vision/ops/kernels/op_host/boxes_overlap_bev.cpp (100%) rename {ads => mx_driving}/perception/vision/ops/kernels/op_host/boxes_overlap_bev_tiling.h (100%) rename {ads => mx_driving}/perception/vision/ops/kernels/op_kernel/CMakeLists.txt (100%) rename {ads => mx_driving}/perception/vision/ops/kernels/op_kernel/boxes_overlap_bev.cpp (100%) rename {ads => mx_driving}/spconv/CMakeLists.txt (100%) rename {ads => mx_driving}/spconv/__init__.py (100%) rename {ads => mx_driving}/spconv/ops/__init__.py (100%) rename {ads => mx_driving}/spconv/ops/csrc/MultiToSparse.cpp (100%) rename {ads => mx_driving}/spconv/ops/csrc/README.md (100%) rename {ads => mx_driving}/spconv/ops/csrc/SparseConv3d.cpp (100%) rename {ads => mx_driving}/spconv/ops/csrc/SparseConv3dGrad.cpp (100%) rename {ads => mx_driving}/spconv/ops/csrc/SubmSparseCov3d.cpp (100%) rename {ads => mx_driving}/spconv/ops/csrc/functions.h (100%) rename {ads => mx_driving}/spconv/ops/csrc/pybind.cpp (100%) rename {ads => mx_driving}/spconv/ops/kernels/CMakeLists.txt (100%) rename {ads => mx_driving}/spconv/ops/kernels/README.md (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_host/CMakeLists.txt (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_host/sparse_conv3d.cpp (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_host/sparse_conv3d_grad.cpp (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_host/sparse_conv3d_grad_tiling.h (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_host/sparse_conv3d_tiling.h (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.cpp (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.h (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_host/to_sparse.cpp (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_host/to_sparse_tiling.h (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_kernel/CMakeLists.txt (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_kernel/sparse_conv3d.cpp (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_kernel/sparse_conv3d_grad.cpp (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_kernel/subm_sparse_conv3d.cpp (100%) rename {ads => mx_driving}/spconv/ops/kernels/op_kernel/to_sparse.cpp (100%) rename {ads => mx_driving}/spconv/ops/sparse_conv.py (100%) rename {ads => mx_driving}/spconv/ops/sparse_functional.py (100%) rename {ads => mx_driving}/spconv/ops/sparse_modules.py (99%) rename {ads => mx_driving}/spconv/ops/sparse_ops.py (100%) rename {ads => mx_driving}/spconv/ops/sparse_structure.py (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index b79f1f1..fb306ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,11 +6,11 @@ include(cmake/config.cmake) include(cmake/func.cmake) include(cmake/intf.cmake) -set(ADS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ads) -add_subdirectory(${ADS_DIR}/common) -add_subdirectory(${ADS_DIR}/motion) -add_subdirectory(${ADS_DIR}/perception) -add_subdirectory(${ADS_DIR}/spconv) +set(MX_DRIVING_DIR ${CMAKE_CURRENT_SOURCE_DIR}/mx_driving) +add_subdirectory(${MX_DRIVING_DIR}/common) +add_subdirectory(${MX_DRIVING_DIR}/motion) +add_subdirectory(${MX_DRIVING_DIR}/perception) +add_subdirectory(${MX_DRIVING_DIR}/spconv) opbuild(OPS_SRC ${ASCEND_HOST_SRC} OUT_DIR ${ASCEND_AUTOGEN_PATH}) @@ -59,13 +59,13 @@ add_custom_command( POST_BUILD COMMAND ${CMAKE_COMMAND} -E chdir - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling + ${MX_DRIVING_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling ${CMAKE_COMMAND} -E create_symlink lib/linux/${CMAKE_SYSTEM_PROCESSOR}/$ liboptiling.so) install( FILES - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/liboptiling.so + ${MX_DRIVING_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/liboptiling.so DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling) if(${ENABLE_ONNX}) @@ -125,7 +125,7 @@ foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) TARGET ops_info_gen_${compute_unit} OUTPUT - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit}/aic-${compute_unit}-ops-info.json + ${MX_DRIVING_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit}/aic-${compute_unit}-ops-info.json OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini INSTALL_DIR @@ -142,7 +142,7 @@ foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) IMPL_DIR ${ASCEND_KERNEL_PATH} OUT_DIR - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl + ${MX_DRIVING_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl ) install_file( TRG @@ -164,11 +164,11 @@ foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) IMPL_DIR ${ASCEND_KERNEL_PATH} ADP_DIR - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic + ${MX_DRIVING_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/binary/${compute_unit} KERNEL_DIR - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel + ${MX_DRIVING_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel COMPUTE_UNIT @@ -185,7 +185,7 @@ add_npu_support_target( OPS_INFO_DIR ${ASCEND_AUTOGEN_PATH} OUT_DIR - ${ADS_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_info_cfg/ai_core + ${MX_DRIVING_PATH}/packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_info_cfg/ai_core INSTALL_DIR packages/vendors/${vendor_name}/framework/${ASCEND_FRAMEWORK_TYPE}) @@ -202,9 +202,9 @@ add_custom_target( gen_version_info ALL COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/cmake/util/gen_version_info.sh - ${ASCEND_CANN_PACKAGE_PATH} ${ADS_PATH}/packages/vendors/${vendor_name}) + ${ASCEND_CANN_PACKAGE_PATH} ${MX_DRIVING_PATH}/packages/vendors/${vendor_name}) -install(FILES ${ADS_PATH}/packages/vendors/${vendor_name}/version.info +install(FILES ${MX_DRIVING_PATH}/packages/vendors/${vendor_name}/version.info DESTINATION packages/vendors/${vendor_name}) if(COMPILE_OPP_PACKAGE) diff --git a/MANIFEST.in b/MANIFEST.in index cd191c5..3450ea3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -recursive-include ads/packages/ * +recursive-include mx_driving/packages/ * diff --git a/README.md b/README.md index d00b1d1..c896c74 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# ADS-Accelerator +# mxDriving # 简介 -ADS-Accelerator是基于昇腾NPU平台开发的适用于自动驾驶场景的算子和模型加速库,提供了一系列高性能的算子和模型加速接口,支持PyTorch框架。 +mxDriving是基于昇腾NPU平台开发的适用于自动驾驶场景的算子和模型加速库,提供了一系列高性能的算子和模型加速接口,支持PyTorch框架。 # 安装 @@ -20,14 +20,14 @@ ADS-Accelerator是基于昇腾NPU平台开发的适用于自动驾驶场景的 ## 从源码安装 1. 克隆原始仓 ```shell -git clone https://gitee.com/ascend/ads.git +git clone https://gitee.com/ascend/mxDriving.git ``` -2. 编译ADS +2. 编译mxDriving > 注意:请在仓库根目录下执行编译命令 ```shell bash ci/build.sh --python=3.7 ``` -生成的whl包在`ads/dist`目录下, 命名规则为`ads_accelerator-1.0.0+git{commit_id}-cp{python_version}-linux_{arch}.whl`。 +生成的whl包在`mx_driving/dist`目录下, 命名规则为`mx_driving-1.0.0+git{commit_id}-cp{python_version}-linux_{arch}.whl`。 参数`--python`指定编译过程中使用的python版本,支持3.7及以上: | 参数 | 取值范围 | 说明 | 缺省值 | 备注 | @@ -44,26 +44,26 @@ bash ci/build.sh --python=3.7 | aarch64 | pytorch1.11 | Python3.7(\>=3.7.5), Python3.8, Python3.9, Python3.10 | | aarch64 | pytorch2.0.1 | Python3.8, Python3.9, Python3.10 | | aarch64 | pytorch2.1.0 | Python3.8, Python3.9, Python3.10 | -3. 安装ADS +3. 安装mxDriving ```shell -cd ads/dist -pip3 install ads_accelerator-1.0.0+git{commit_id}-cp{python_version}-linux_{arch}.whl +cd mx_driving/dist +pip3 install mx_driving-1.0.0+git{commit_id}-cp{python_version}-linux_{arch}.whl ``` 如需要保存安装日志,可在`pip3 install`命令后添加`--log `参数,并对您指定的目录做好权限控制。 # 卸载 Pytorch 框架训练环境的卸载请参考昇腾官方文档[Pytorch框架训练环境卸载](https://hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes/ptes_00032.html)。 -ADS-Accelerator的卸载只需执行以下命令: +mxDriving的卸载只需执行以下命令: ```shell -pip3 uninstall ads-accelerator +pip3 uninstall mx_driving ``` # 快速上手 1. source 环境变量 ```shell -# 查看ads安装路径 -pip3 show ads-accelerator -export ASCEND_CUSTOM_OPP_PATH=xxx/site-packages/ads/packages/vendors/customize/ -export LD_LIBRARY_PATH=xxx/site-packages/ads/packages/vendors/customize/op_api/lib/:$LD_LIBRARY_PATH +# 查看mx_driving安装路径 +pip3 show mx_driving +export ASCEND_CUSTOM_OPP_PATH=xxx/site-packages/mx_driving/packages/vendors/customize/ +export LD_LIBRARY_PATH=xxx/site-packages/mx_driving/packages/vendors/customize/op_api/lib/:$LD_LIBRARY_PATH ``` 2. 算子调用 请参见下文算子清单。 @@ -72,7 +72,7 @@ export LD_LIBRARY_PATH=xxx/site-packages/ads/packages/vendors/customize/op_api/l ## 目录结构及说明 ``` . -├── ads +├── mx_driving │ ├── __init__.py │ ├── common # 通用模块 │ │ ├── __init__.py @@ -122,20 +122,20 @@ export LD_LIBRARY_PATH=xxx/site-packages/ads/packages/vendors/customize/op_api/l ```shell echo 2 > /proc/sys/kernel/randomize_va_space ``` -2. 由于ADS-Accelerator需要用户自行编译,建议您对编译后生成的so文件开启`strip`, 又称**移除调试符号信息**, 开启方式如下: +2. 由于mxDriving需要用户自行编译,建议您对编译后生成的so文件开启`strip`, 又称**移除调试符号信息**, 开启方式如下: ```shell strip -s ``` 具体so文件如下: - - ads/packages/vendors/customize/op_api/lib/libcust_opapi.so - - ads/packages/vendors/customize/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so - - ads/packages/vendors/customize/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opsproto_rt2.0.so + - mx_driving/packages/vendors/customize/op_api/lib/libcust_opapi.so + - mx_driving/packages/vendors/customize/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so + - mx_driving/packages/vendors/customize/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opsproto_rt2.0.so ## 运行用户建议 -出于安全性及权限最小化角度考虑,不建议使用`root`等管理员类型账户使用ads。 +出于安全性及权限最小化角度考虑,不建议使用`root`等管理员类型账户使用mx_driving。 ## 文件权限控制 -在使用ADS-Accelerator时,您可能会进行profiling、调试等操作,建议您对相关目录及文件做好权限控制,以保证文件安全。 -1. 建议您在使用ADS-Accelerator时,将umask调整为`0027`及以上,保障新增文件夹默认最高权限为`750`,文件默认最高权限为`640`。 +在使用mxDriving时,您可能会进行profiling、调试等操作,建议您对相关目录及文件做好权限控制,以保证文件安全。 +1. 建议您在使用mxDriving时,将umask调整为`0027`及以上,保障新增文件夹默认最高权限为`750`,文件默认最高权限为`640`。 2. 建议您对个人数据、商业资产、源文件、训练过程中保存的各类文件等敏感内容做好权限管控,可参考下表设置安全权限。 ### 文件权限参考 @@ -160,13 +160,13 @@ export LD_LIBRARY_PATH=xxx/site-packages/ads/packages/vendors/customize/op_api/l | 加解密接口、加解密脚本 | 500(r-x------) | ## 构建安全声明 -在源码编译安装ADS-Accelerator时,需要您自行编译,编译过程中会生成一些中间文件,建议您在编译完成后,对中间文件做好权限控制,以保证文件安全。 +在源码编译安装mxDriving时,需要您自行编译,编译过程中会生成一些中间文件,建议您在编译完成后,对中间文件做好权限控制,以保证文件安全。 ## 运行安全声明 1. 建议您结合运行环境资源状况编写对应训练脚本。若训练脚本与资源状况不匹配,如数据集加载内存大小超出内存容量限制、训练脚本在本地生成数据超过磁盘空间大小等情况,可能引发错误并导致进程意外退出。 -2. ADS-Accelerator在运行异常时(如输入校验异常(请参考api文档说明),环境变量配置错误,算子执行报错等)会退出进程并打印报错信息,属于正常现象。建议用户根据报错提示定位具体错误原因,包括通过设定算子同步执行、查看CANN日志、解析生成的Core Dump文件等方式。 +2. mxDriving在运行异常时(如输入校验异常(请参考api文档说明),环境变量配置错误,算子执行报错等)会退出进程并打印报错信息,属于正常现象。建议用户根据报错提示定位具体错误原因,包括通过设定算子同步执行、查看CANN日志、解析生成的Core Dump文件等方式。 ## 公网地址声明 -在ads的配置文件和脚本中存在[公网地址](#公网地址) +在mx_driving的配置文件和脚本中存在[公网地址](#公网地址) ### 公网地址 @@ -183,11 +183,11 @@ export LD_LIBRARY_PATH=xxx/site-packages/ads/packages/vendors/customize/op_api/l | 开源引入 | https://gitee.com/it-monkey/protocolbuffers.git | ci/docker/X86/build_protobuf.sh | https://gitee.com/it-monkey/protocolbuffers.git | 用于构建protobuf | ## 公开接口声明 -参考[API清单](./docs/api/README.md),Ads提供了对外的自定义接口。如果一个函数在文档中有展示,则该接口是公开接口。否则,使用该功能前可以在社区询问该功能是否确实是公开的或意外暴露的接口,因为这些未暴露接口将来可能会被修改或者删除。 +参考[API清单](./docs/api/README.md),mxDriving提供了对外的自定义接口。如果一个函数在文档中有展示,则该接口是公开接口。否则,使用该功能前可以在社区询问该功能是否确实是公开的或意外暴露的接口,因为这些未暴露接口将来可能会被修改或者删除。 ## 通信安全加固 -ADS-Accelerator在运行时依赖于`PyTorch`及`torch_npu`,您需关注通信安全加固,具体方式请参考[torch_npu通信安全加固](https://gitee.com/ascend/pytorch/blob/master/SECURITYNOTE.md#%E9%80%9A%E4%BF%A1%E5%AE%89%E5%85%A8%E5%8A%A0%E5%9B%BA)。 +mxDriving在运行时依赖于`PyTorch`及`torch_npu`,您需关注通信安全加固,具体方式请参考[torch_npu通信安全加固](https://gitee.com/ascend/pytorch/blob/master/SECURITYNOTE.md#%E9%80%9A%E4%BF%A1%E5%AE%89%E5%85%A8%E5%8A%A0%E5%9B%BA)。 ## 通信矩阵 -ADS-Accelerator在运行时依赖于`PyTorch`及`torch_npu`,涉及通信矩阵,具体信息请参考[torch_npu通信矩阵](https://gitee.com/ascend/pytorch/blob/master/SECURITYNOTE.md#%E9%80%9A%E4%BF%A1%E7%9F%A9%E9%98%B5)。 +mxDriving在运行时依赖于`PyTorch`及`torch_npu`,涉及通信矩阵,具体信息请参考[torch_npu通信矩阵](https://gitee.com/ascend/pytorch/blob/master/SECURITYNOTE.md#%E9%80%9A%E4%BF%A1%E7%9F%A9%E9%98%B5)。 # 支持Python,PyTorch和torch_npu版本说明 @@ -203,17 +203,17 @@ ADS-Accelerator在运行时依赖于`PyTorch`及`torch_npu`,涉及通信矩阵 # 软件生命周期说明 ## 分支维护策略 -ADS-Accelerator版本分支的维护阶段如下: +mxDriving版本分支的维护阶段如下: | **状态** | **时间** | **说明** | | ------------------- | -------- | ------------------------------------------------ | | 计划 | 1—3 个月 | 计划特性 | | 开发 | 3 个月 | 开发特性 | -| 维护 | 6-12 个月| 合入所有已解决的问题并发布版本,针对不同的ADS-Accelerator版本采取不同的维护策略,常规版本和长期支持版本维护周期分别为6个月和12个月 | +| 维护 | 6-12 个月| 合入所有已解决的问题并发布版本,针对不同的mxDriving版本采取不同的维护策略,常规版本和长期支持版本维护周期分别为6个月和12个月 | | 无维护 | 0—3 个月 | 合入所有已解决的问题,无专职维护人员,无版本发布 | | 生命周期终止(EOL) | N/A | 分支不再接受任何修改 | -## ADS-Accelerator版本维护策略 +## mxDriving版本维护策略 -| **ADS-Accelerator版本** | **维护策略** | **当前状态** | **发布时间** | **后续状态** | **EOL日期** | +| **mxDriving版本** | **维护策略** | **当前状态** | **发布时间** | **后续状态** | **EOL日期** | |-----------|-----------|--------|------------|-----------------------|-----------| diff --git a/Third_Party_Open_Source__Software_Notice b/Third_Party_Open_Source__Software_Notice index 93966ca..fe44bbd 100644 --- a/Third_Party_Open_Source__Software_Notice +++ b/Third_Party_Open_Source__Software_Notice @@ -6,7 +6,7 @@ Warranty Disclaimer THE OPEN SOURCE SOFTWARE IN THIS PRODUCT IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS. Copyright Notice and License Texts -Software: ads v1.0.0 +Software: mx_driving v1.0.0 Copyright notice: GNU GENERAL PUBLIC LICENSE Version 2, June 1991 diff --git a/ci/build.sh b/ci/build.sh index 2b47bed..919b7c7 100644 --- a/ci/build.sh +++ b/ci/build.sh @@ -1,7 +1,7 @@ # Copyright 2023 Huawei Technologies Co., Ltd CUR_DIR=$(dirname $(readlink -f $0)) SCRIPTS_DIR=${CUR_DIR}/../scripts -BUILD_PACKAGES_DIR=${CUR_DIR}/../ads/packages +BUILD_PACKAGES_DIR=${CUR_DIR}/../mx_driving/packages SUPPORTED_PY_VERSION=(3.7 3.8 3.9 3.10) PY_VERSION='3.7' SINGLE_OP='' @@ -93,11 +93,11 @@ function main() fi cd ${CUR_DIR}/.. rm -rf build - if [ -d "ads_accelerator.egg-info" ]; then - echo "ads_accelerator.egg-info exist" - rm -rf ads_accelerator.egg-info + if [ -d "mx_driving.egg-info" ]; then + echo "mx_driving.egg-info exist" + rm -rf mx_driving.egg-info else - echo "ads_accelerator.egg-info not exist" + echo "mx_driving.egg-info not exist" fi python"${PY_VERSION}" setup.py build bdist_wheel diff --git a/ci/cov.sh b/ci/cov.sh index 1016a01..34a63fc 100644 --- a/ci/cov.sh +++ b/ci/cov.sh @@ -1,7 +1,7 @@ #!/bin/bash CUR_DIR=$(dirname $(readlink -f $0)) NEW_BRANCH_PATH=$(readlink -f ${CUR_DIR}/..) -MASTER_BRANCH_PATH=$(readlink -f ${NEW_BRANCH_PATH}/../ads) +MASTER_BRANCH_PATH=$(readlink -f ${NEW_BRANCH_PATH}/../mx_driving) #生成覆盖率 diff -r -N -x ".git" -x "*.doc" -x "*.json" -x "*.h" -x "*.py" -x "*.so" -x "*.info" -x "*.o" -u ${MASTER_BRANCH_PATH} ${NEW_BRANCH_PATH} >> diff.txt diff --git a/cmake/config.cmake b/cmake/config.cmake index 40f8ccc..d4fe96a 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -59,7 +59,7 @@ set(ASCEND_TENSOR_COMPILER_PATH ${ASCEND_CANN_PACKAGE_PATH}/compiler) set(ASCEND_CCEC_COMPILER_PATH ${ASCEND_TENSOR_COMPILER_PATH}/ccec_compiler/bin) set(ASCEND_AUTOGEN_PATH ${CMAKE_BINARY_DIR}/autogen) set(ASCEND_KERNEL_PATH ${CMAKE_BINARY_DIR}/kernels) -set(ADS_PATH ${PROJECT_SOURCE_DIR}/ads) +set(MX_DRIVING_PATH ${PROJECT_SOURCE_DIR}/mx_driving) set(ASCEND_HOST_SRC "" CACHE STRING "host source files") diff --git a/cmake/func.cmake b/cmake/func.cmake index 2384e9a..3f532ae 100644 --- a/cmake/func.cmake +++ b/cmake/func.cmake @@ -3,20 +3,20 @@ function(install_target) set_target_properties( ${INSTALL_TARGET_TRG} PROPERTIES LIBRARY_OUTPUT_DIRECTORY - ${ADS_PATH}/${INSTALL_TARGET_DST}) + ${MX_DRIVING_PATH}/${INSTALL_TARGET_DST}) install(TARGETS ${INSTALL_TARGET_TRG} LIBRARY DESTINATION ${INSTALL_TARGET_DST}) endfunction() function(install_file) cmake_parse_arguments(INSTALL_TARGET "" "DST;TRG" "SRC" ${ARGN}) - file(MAKE_DIRECTORY ${ADS_PATH}/${INSTALL_TARGET_DST}) + file(MAKE_DIRECTORY ${MX_DRIVING_PATH}/${INSTALL_TARGET_DST}) foreach(SOURCE_FILE ${INSTALL_TARGET_SRC}) add_custom_command( TARGET ${INSTALL_TARGET_TRG} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${SOURCE_FILE} - ${ADS_PATH}/${INSTALL_TARGET_DST}) + ${MX_DRIVING_PATH}/${INSTALL_TARGET_DST}) endforeach() install(FILES ${INSTALL_TARGET_SRC} DESTINATION ${INSTALL_TARGET_DST}) endfunction() @@ -216,7 +216,7 @@ function(add_bin_compile_target) ${BINCMP_TARGET}_${op_file}_${op_index} COMMAND export HI_PYTHON=${ASCEND_PYTHON_EXECUTABLE} && export - ASCEND_CUSTOM_OPP_PATH=${ADS_PATH}/packages/vendors/${vendor_name} + ASCEND_CUSTOM_OPP_PATH=${MX_DRIVING_PATH}/packages/vendors/${vendor_name} && bash ${CMAKE_SOURCE_DIR}/scripts/retry.sh \"bash ${bin_script} ${BINCMP_OUT_DIR}/src/${op_type}.py ${BINCMP_KERNEL_DIR}/${BINCMP_COMPUTE_UNIT}/${op_file}\" WORKING_DIRECTORY ${BINCMP_OUT_DIR}) diff --git a/docs/api/README.md b/docs/api/README.md index b75c144..132d8de 100644 --- a/docs/api/README.md +++ b/docs/api/README.md @@ -3,7 +3,7 @@ ## scatter_max ### 接口原型 ```python -ads.common.scatter_max(Tensor updates, Tensor indices, Tensor out=None) -> (Tensor out, Tensor argmax) +mx_driving.common.scatter_max(Tensor updates, Tensor indices, Tensor out=None) -> (Tensor out, Tensor argmax) ``` ### 功能描述 在第0维上,将输入张量`updates`中的元素按照`indices`中的索引进行分散,然后在第0维上取最大值,返回最大值和对应的索引。对于1维张量,公式如下: @@ -29,7 +29,7 @@ $$argmax_i = argmax_j(updates_j)$$ ### 调用示例 ```python import torch, torch_npu -from ads.common import scatter_max +from mx_driving.common import scatter_max updates = torch.tensor([[2, 0, 1, 3, 1, 0, 0, 4], [0, 2, 1, 3, 0, 3, 4, 2], [1, 2, 3, 4, 4, 3, 2, 1]], dtype=torch.float32).npu() indices = torch.tensor([0, 2, 0], dtype=torch.int32).npu() out = updates.new_zeros((3, 8)) @@ -45,10 +45,10 @@ tensor([[0, 2, 2, 2, 2, 2, 2, 0], [3, 3, 3, 3, 3, 3, 3, 3], [1, 1, 1, 1, 1, 1, 1, 1]]) ``` -## \[prototype\] npu_rotated_overlaps +## npu_rotated_overlaps ### 接口原型 ```python -ads.common.npu_rotated_overlaps(Tensor self, Tensor query_boxes, bool trans=False) -> Tensor +mx_driving.common.npu_rotated_overlaps(Tensor self, Tensor query_boxes, bool trans=False) -> Tensor ``` ### 功能描述 计算旋转框的重叠面积。 @@ -64,7 +64,7 @@ ads.common.npu_rotated_overlaps(Tensor self, Tensor query_boxes, bool trans=Fals ```python import torch, torch_npu import numpy as np -from ads.common import npu_rotated_overlaps +from mx_driving.common import npu_rotated_overlaps a = np.random.uniform(0, 1, (1, 3, 5)).astype(np.float16) b = np.random.uniform(0, 1, (1, 2, 5)).astype(np.float16) box1 = torch.from_numpy(a).npu() @@ -77,10 +77,10 @@ tensor([[[0.0000, 0.1562, 0.0000], [0.1562, 0.3713, 0.0611], [0.0000, 0.0611, 0.0000]]], dtype=torch.float16) ``` -## \[prototype\] npu_rotated_iou +## npu_rotated_iou ### 接口原型 ```python -ads.common.npu_rotated_iou(Tensor self, Tensor query_boxes, bool trans=False, int mode=0, bool is_cross=True, float v_threshold=0.0, float e_threshold=0.0) -> Tensor +mx_driving.common.npu_rotated_iou(Tensor self, Tensor query_boxes, bool trans=False, int mode=0, bool is_cross=True, float v_threshold=0.0, float e_threshold=0.0) -> Tensor ``` ### 功能描述 计算旋转框的IoU。 @@ -100,7 +100,7 @@ ads.common.npu_rotated_iou(Tensor self, Tensor query_boxes, bool trans=False, in ```python import torch, torch_npu import numpy as np -from ads.common import npu_rotated_iou +from mx_driving.common import npu_rotated_iou a = np.random.uniform(0, 1, (2, 2, 5)).astype(np.float16) b = np.random.uniform(0, 1, (2, 3, 5)).astype(np.float16) box1 = torch.from_numpy(a).npu() @@ -118,7 +118,7 @@ tensor([[[3.3325e-01, 1.0162e-01], ## npu_dynamic_scatter ### 接口原型 ```python -ads.common.npu_dynamic_scatter(Tensor feats, Tensor coors, str reduce_type = 'max') -> Tuple[torch.Tensor, torch.Tensor] +mx_driving.common.npu_dynamic_scatter(Tensor feats, Tensor coors, str reduce_type = 'max') -> Tuple[torch.Tensor, torch.Tensor] ``` ### 功能描述 将点云特征点在对应体素中进行特征压缩。 @@ -134,7 +134,7 @@ ads.common.npu_dynamic_scatter(Tensor feats, Tensor coors, str reduce_type = 'ma ### 调用示例 ```python import torch, torch_npu -from ads.common import npu_dynamic_scatter +from mx_driving.common import npu_dynamic_scatter feats = torch.tensor([[1, 2, 3], [3, 2, 1], [7, 8, 9], [9, 8, 7]], dtype=torch.float32).npu() coors = torch.tensor([[1, 1, 1], [1, 1, 1], [2, 2, 2], [2, 2, 2]], dtype=torch.int32).npu() @@ -146,7 +146,7 @@ print(voxel_coors) ## npu_points_in_box ### 接口原型 ```python -ads.common.npu_points_in_box(Tensor boxes, Tensor points) -> Tensor +mx_driving.common.npu_points_in_box(Tensor boxes, Tensor points) -> Tensor ``` ### 功能描述 判断点是否在框内。 @@ -162,7 +162,7 @@ ads.common.npu_points_in_box(Tensor boxes, Tensor points) -> Tensor ### 调用示例 ```python import torch, torch_npu -from ads.common import npu_points_in_box +from mx_driving.common import npu_points_in_box boxes = torch.tensor([[[1, 2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8, 9]]], dtype=torch.float32).npu() points = torch.tensor([[[1, 2, 3], [3, 4, 5]]], dtype=torch.float32).npu() out = npu_points_in_box(boxes, points) @@ -174,7 +174,7 @@ tensor([[0, 1]], dtype=torch.int32) ## npu_multi_scale_deformable_attn_function ### 接口原型 ```python -ads.common.npu_multi_scale_deformable_attn_function(Tensor value, Tensor shape, Tensor offset, Tensor locations, Tensor weight) -> Tensor +mx_driving.common.npu_multi_scale_deformable_attn_function(Tensor value, Tensor shape, Tensor offset, Tensor locations, Tensor weight) -> Tensor ``` ### 功能描述 多尺度可变形注意力机制, 将多个视角的特征图进行融合。 @@ -193,7 +193,7 @@ ads.common.npu_multi_scale_deformable_attn_function(Tensor value, Tensor shape, ### 调用示例 ```python import torch, torch_npu -from ads.common import npu_multi_scale_deformable_attn_function +from mx_driving.common import npu_multi_scale_deformable_attn_function bs, num_levels, num_heads, num_points, num_queries, embed_dims = 1, 1, 4, 8, 16, 32 shapes = torch.as_tensor([(100, 100)], dtype=torch.long) @@ -213,7 +213,7 @@ tensor([[[9.3002, 11.1603, 0.0000, 0.0000]]], dtype=torch.float32) ## voxelization ### 接口原型 ```python -ads.common.voxelization(Tensor points, List[float] voxel_size, List[float] coors_range, int max_points=-1, int max_voxels=-1, bool deterministic=True) -> Tensor +mx_driving.common.voxelization(Tensor points, List[float] voxel_size, List[float] coors_range, int max_points=-1, int max_voxels=-1, bool deterministic=True) -> Tensor ``` ### 功能描述 将点云数据进行体素化。 @@ -231,7 +231,7 @@ ads.common.voxelization(Tensor points, List[float] voxel_size, List[float] coors ### 调用示例 ```python import torch, torch_npu -from ads.common import Voxelization +from mx_driving.common import Voxelization points = torch.randint(-20, 100, [16, 3], dtype=torch.float32).npu() coors_range = [0, -40, -3, 70.4, 40, 1] max_points = -1 @@ -243,7 +243,7 @@ print(out) ## npu_nms3d_normal ### 接口原型 ```python -ads.common.npu_nms3d_normal(Tensor boxes, Tensor scores, float: iou_threshold) -> Tensor +mx_driving.common.npu_nms3d_normal(Tensor boxes, Tensor scores, float: iou_threshold) -> Tensor ``` ### 功能描述 3D非极大值抑制。 @@ -258,7 +258,7 @@ ads.common.npu_nms3d_normal(Tensor boxes, Tensor scores, float: iou_threshold) - ### 调用示例 ```python import torch, torch_npu -from ads.common import npu_nms3d_normal +from mx_driving.common import npu_nms3d_normal boxes = torch.tensor([[1, 2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8, 9]], dtype=torch.float32).npu() scores = torch.tensor([1, 2], dtype=torch.float32).npu() out = npu_nms3d_normal(boxes, scores, 0.5) @@ -270,7 +270,7 @@ tensor([[1, 0]], dtype=torch.int32) ## npu_nms3d ### 接口原型 ```python -ads.common.npu_nms3d(Tensor boxes, Tensor scores, float: iou_threshold) -> Tensor +mx_driving.common.npu_nms3d(Tensor boxes, Tensor scores, float: iou_threshold) -> Tensor ``` ### 功能描述 3D非极大值抑制,在bev视角下剔除多个3d box交并比大于阈值的box。 @@ -285,7 +285,7 @@ ads.common.npu_nms3d(Tensor boxes, Tensor scores, float: iou_threshold) -> Tenso ### 调用示例 ```python import torch, torch_npu -from ads.common import npu_nms3d +from mx_driving.common import npu_nms3d boxes = torch.tensor([[1, 2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8, 9]], dtype=torch.float32).npu() scores = torch.tensor([1, 2], dtype=torch.float32).npu() out = npu_nms3d(boxes, scores, 0.5) @@ -297,7 +297,7 @@ tensor([[1]], dtype=torch.int32) ## npu_furthest_point_sampling ### 接口原型 ```python -ads.common.npu_furthest_point_sampling(Tensor points, int num_points) -> Tensor +mx_driving.common.npu_furthest_point_sampling(Tensor points, int num_points) -> Tensor ``` ### 功能描述 点云数据的最远点采样。 @@ -311,7 +311,7 @@ ads.common.npu_furthest_point_sampling(Tensor points, int num_points) -> Tensor ### 调用示例 ```python import torch, torch_npu -from ads.common import npu_furthest_point_sampling +from mx_driving.common import npu_furthest_point_sampling points = torch.tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]], dtype=torch.float32).npu() out = npu_furthest_point_sampling(points, 2) print(out) @@ -324,7 +324,7 @@ tensor([[0, 2]], dtype=torch.int32) ## furthest_point_sample_with_dist ### 接口原型 ```python -ads.common.furthest_point_sample_with_dist(Tensor points, int num_points) -> (Tensor, Tensor) +mx_driving.common.furthest_point_sample_with_dist(Tensor points, int num_points) -> (Tensor, Tensor) ``` ### 功能描述 与`npu_furthest_point_sampling`功能相同,但输入略有不同。 @@ -338,7 +338,7 @@ ads.common.furthest_point_sample_with_dist(Tensor points, int num_points) -> (Te ### 调用示例 ```python import torch, torch_npu -from ads.common import furthest_point_sample_with_dist +from mx_driving.common import furthest_point_sample_with_dist points = torch.tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]], dtype=torch.float32).npu() out = furthest_point_sample_with_dist(points, 2) print(out) @@ -350,7 +350,7 @@ tensor([[0, 2]], dtype=torch.int32) ## three_interpolate ### 接口原型 ```python -ads.common.three_interpolate(features: torch.Tensor, indices: torch.Tensor, +mx_driving.common.three_interpolate(features: torch.Tensor, indices: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: ``` ### 功能描述 @@ -370,7 +370,7 @@ ads.common.three_interpolate(features: torch.Tensor, indices: torch.Tensor, ### 调用示例 ```python import torch -from ads.common import three_interpolate +from mx_driving.common import three_interpolate features = torch.tensor( [[[2.4350, 4.7516, 4.4995, 2.4350, 2.4350, 2.4350], [3.1236, 2.6278, 3.0447, 3.1236, 3.1236, 3.1236], @@ -423,7 +423,7 @@ torch.tensor( ## bev_pool ### 接口原型 ```python -ads.perception.fused.bev_pool(Tensor feat, Tensor geom_feat, int B, int D, int H, int W) -> Tensor +mx_driving.perception.fused.bev_pool(Tensor feat, Tensor geom_feat, int B, int D, int H, int W) -> Tensor ``` ### 功能描述 BEV池化。可参考论文`BEVFusion: Multi-Task Multi-Sensor Fusion with Unified Bird's-Eye View Representation` @@ -447,7 +447,7 @@ BEV池化。可参考论文`BEVFusion: Multi-Task Multi-Sensor Fusion with Unifi ### 调用示例 ```python import torch, torch_npu -from ads.perception.fused import bev_pool +from mx_driving.perception.fused import bev_pool feat = torch.rand(4, 256).npu() feat.requires_grad_() geom_feat = torch.tensor([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 2], [0, 0, 0, 3]], dtype=torch.int32).npu() @@ -460,7 +460,7 @@ print(feat.grad) ## bev_pool_v2 ### 接口原型 ```python -ads.perception.fused.bev_pool_v2(Tensor depth, feat, Tensor ranks_depth, Tensor ranks_feat, Tensor ranks_bev, +mx_driving.perception.fused.bev_pool_v2(Tensor depth, feat, Tensor ranks_depth, Tensor ranks_feat, Tensor ranks_bev, List[int] bev_feat_shape, Tensor interval_starts, Tensor interval_lengths) -> Tensor ``` ### 功能描述 @@ -489,7 +489,7 @@ BEV池化优化版。可参考论文`BEVDet: High-performance Multi-camera 3D Ob ### 调用示例 ```python import torch, torch_npu -from ads.perception.fused import bev_pool_v2 +from mx_driving.perception.fused import bev_pool_v2 depth = torch.rand(2, 1, 8, 256, 256).npu() feat = torch.rand(2, 1, 256, 256, 64).npu() feat.requires_grad_() @@ -509,7 +509,7 @@ print(feat.grad) ## group_points ### 接口原型 ```python -ads.perception.point.npu_group_points(Tensor features, Tensor indices) -> Tensor +mx_driving.perception.point.npu_group_points(Tensor features, Tensor indices) -> Tensor ``` ### 功能描述 点云数据按照索引重新分组。 @@ -528,7 +528,7 @@ ads.perception.point.npu_group_points(Tensor features, Tensor indices) -> Tensor ```python import torch import torch_npu -import ads.perception.point +import mx_driving.perception.point indices = torch.tensor([[[0, 2, 5, 5], [1, 0, 5, 0], [2, 1, 4, 4]]]).int().npu() features = torch.tensor([[[0.9178, -0.7250, -1.6587, 0.0715, -0.2252, 0.4994], [0.6190, 0.1755, -1.7902, -0.5852, -0.3311, 1.9764], @@ -537,7 +537,7 @@ features = torch.tensor([[[0.9178, -0.7250, -1.6587, 0.0715, -0.2252, 0.4994], [0.7239, 0.2321, -0.6578, -1.1395, -2.3874, 1.1281]]], dtype=torch.float32).npu() features.requires_grad = True -output = ads.perception.point.npu_group_points(features, indices) +output = mx_driving.perception.point.npu_group_points(features, indices) output.backward(output) grad_features = features.grad ``` @@ -573,7 +573,7 @@ expected_grad_features = tensor( ## knn ### 接口原型 ```python -ads.common.knn(int k, Tensor xyz, Tensor center_xyz, bool Transposed) -> Tensor +mx_driving.common.knn(int k, Tensor xyz, Tensor center_xyz, bool Transposed) -> Tensor ``` ### 功能描述 對center_xyz中的每個點找到xyz中對應batch中的距離最近的k個點,并且返回此k個點的索引值。 @@ -589,7 +589,7 @@ ads.common.knn(int k, Tensor xyz, Tensor center_xyz, bool Transposed) -> Tensor ### 调用示例 ```python import torch, torch_npu -from ads.common import knn +from mx_driving.common import knn xyz = torch.tensor([[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], dtype=torch.float32).npu() center_xyz = torch.tensor([[1, 2, 3]], [[1, 2, 3]], dtype=torch.float32).npu() idx = knn(2, xyz, center_xyz, False) @@ -607,7 +607,7 @@ tensor([[0, 0], [1, 1]], dtype=torch.int32) ## three_nn ### 接口原型 ```python -ads.common.three_nn(Tensor target, Tensor source) -> (Tensor dist, Tensor idx) +mx_driving.common.three_nn(Tensor target, Tensor source) -> (Tensor dist, Tensor idx) ``` ### 功能描述 對target中的每個點找到source中對應batch中的距離最近的3個點,并且返回此3個點的距離和索引值。 @@ -622,7 +622,7 @@ ads.common.three_nn(Tensor target, Tensor source) -> (Tensor dist, Tensor idx) ### 调用示例 ```python import torch, torch_npu -from ads.common import three_nn +from mx_driving.common import three_nn source = torch.tensor([[[1, 1, 1], [1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]], dtype=torch.float32).npu() target = torch.tensor([[1, 2, 3]], [[1, 2, 3]], dtype=torch.float32).npu() dist, idx = three_nn(target, source) @@ -639,7 +639,7 @@ tensor([[0, 1, 2], [0, 1, 2]], dtype=torch.int32) ## RoipointPool3d ### 接口原型 ```python -ads.common.RoipointPool3d(int num_sampled_points, Tensor points, Tensor point_features, Tensor boxes3d) -> (Tensor pooled_features, Tensor pooled_empty_flag) +mx_driving.common.RoipointPool3d(int num_sampled_points, Tensor points, Tensor point_features, Tensor boxes3d) -> (Tensor pooled_features, Tensor pooled_empty_flag) ``` ### 功能描述 对每个3D方案的几何特定特征进行编码。 @@ -661,7 +661,7 @@ ads.common.RoipointPool3d(int num_sampled_points, Tensor points, Tensor point_fe ### 调用示例 ```python import torch, torch_npu -from ads.common import RoIPointPool3d +from mx_driving.common import RoIPointPool3d num_sampled_points = 1 points = torch.tensor([[[1, 2, 3]]], dtype=torch.float).npu() point_features = points.clone() @@ -678,7 +678,7 @@ tensor([[0]], dtype=torch.int32) ## boxes_overlap_bev ### 接口原型 ```python -ads.perception.vision.boxes_overlap_bev(Tensor boxes_a, Tensor boxes_b) -> Tensor +mx_driving.perception.vision.boxes_overlap_bev(Tensor boxes_a, Tensor boxes_b) -> Tensor ``` ### 功能描述 Calculates the intersection of bounding boxes in Bird's Eye View. @@ -694,7 +694,7 @@ Calculates the intersection of bounding boxes in Bird's Eye View. ### 调用示例 ```python import torch, torch_npu -from ads.perception.vision import boxes_overlap_bev +from mx_driving.perception.vision import boxes_overlap_bev boxes_a = torch.tensor([[0, 0, 2, 2, 0]], dtype=torch.float32).npu() boxes_b = torch.tensor([[1, 1, 3, 3, 0]], dtype=torch.float32).npu() area_overlap = boxes_overlap_bev(boxes_a, boxes_b) diff --git a/ads/__init__.py b/mx_driving/__init__.py similarity index 100% rename from ads/__init__.py rename to mx_driving/__init__.py diff --git a/ads/common/CMakeLists.txt b/mx_driving/common/CMakeLists.txt similarity index 100% rename from ads/common/CMakeLists.txt rename to mx_driving/common/CMakeLists.txt diff --git a/ads/common/__init__.py b/mx_driving/common/__init__.py similarity index 100% rename from ads/common/__init__.py rename to mx_driving/common/__init__.py diff --git a/ads/common/components/README.md b/mx_driving/common/components/README.md similarity index 100% rename from ads/common/components/README.md rename to mx_driving/common/components/README.md diff --git a/ads/common/ops/__init__.py b/mx_driving/common/ops/__init__.py similarity index 100% rename from ads/common/ops/__init__.py rename to mx_driving/common/ops/__init__.py diff --git a/ads/common/ops/csrc/DynamicScatter.cpp b/mx_driving/common/ops/csrc/DynamicScatter.cpp similarity index 100% rename from ads/common/ops/csrc/DynamicScatter.cpp rename to mx_driving/common/ops/csrc/DynamicScatter.cpp diff --git a/ads/common/ops/csrc/DynamicVoxelization.cpp b/mx_driving/common/ops/csrc/DynamicVoxelization.cpp similarity index 100% rename from ads/common/ops/csrc/DynamicVoxelization.cpp rename to mx_driving/common/ops/csrc/DynamicVoxelization.cpp diff --git a/ads/common/ops/csrc/FurthestPointSampling.cpp b/mx_driving/common/ops/csrc/FurthestPointSampling.cpp similarity index 100% rename from ads/common/ops/csrc/FurthestPointSampling.cpp rename to mx_driving/common/ops/csrc/FurthestPointSampling.cpp diff --git a/ads/common/ops/csrc/FurthestPointSamplingWithDist.cpp b/mx_driving/common/ops/csrc/FurthestPointSamplingWithDist.cpp similarity index 100% rename from ads/common/ops/csrc/FurthestPointSamplingWithDist.cpp rename to mx_driving/common/ops/csrc/FurthestPointSamplingWithDist.cpp diff --git a/ads/common/ops/csrc/Knn.cpp b/mx_driving/common/ops/csrc/Knn.cpp similarity index 100% rename from ads/common/ops/csrc/Knn.cpp rename to mx_driving/common/ops/csrc/Knn.cpp diff --git a/ads/common/ops/csrc/MultiScaleDeformableAttnFunction.cpp b/mx_driving/common/ops/csrc/MultiScaleDeformableAttnFunction.cpp similarity index 100% rename from ads/common/ops/csrc/MultiScaleDeformableAttnFunction.cpp rename to mx_driving/common/ops/csrc/MultiScaleDeformableAttnFunction.cpp diff --git a/ads/common/ops/csrc/Nms3d.cpp b/mx_driving/common/ops/csrc/Nms3d.cpp similarity index 100% rename from ads/common/ops/csrc/Nms3d.cpp rename to mx_driving/common/ops/csrc/Nms3d.cpp diff --git a/ads/common/ops/csrc/Nms3dNormal.cpp b/mx_driving/common/ops/csrc/Nms3dNormal.cpp similarity index 97% rename from ads/common/ops/csrc/Nms3dNormal.cpp rename to mx_driving/common/ops/csrc/Nms3dNormal.cpp index b3320f7..933dfd0 100644 --- a/ads/common/ops/csrc/Nms3dNormal.cpp +++ b/mx_driving/common/ops/csrc/Nms3dNormal.cpp @@ -1,32 +1,32 @@ -// Copyright (c) 2024 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "csrc/OpApiCommon.h" -#include "functions.h" - -std::tuple nms3d_normal(const at::Tensor& boxes, double nms_overlap_thresh) -{ - int32_t box_num = boxes.size(0); - int32_t data_align = 16; - int32_t mask_num = ((box_num - 1) / data_align + 1) * data_align; - at::Tensor mask = at::empty({box_num, mask_num}, boxes.options().dtype(at::kShort)); - EXEC_NPU_CMD(aclnnNms3dNormal, boxes, nms_overlap_thresh, mask); - - at::Tensor keep = at::zeros({box_num}, mask.options()); - at::Tensor num_out = at::zeros(1, mask.options()); - EXEC_NPU_CMD(aclnnGatherNms3dMask, mask, keep, num_out); - return std::tie(keep, num_out); -} +// Copyright (c) 2024 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "csrc/OpApiCommon.h" +#include "functions.h" + +std::tuple nms3d_normal(const at::Tensor& boxes, double nms_overlap_thresh) +{ + int32_t box_num = boxes.size(0); + int32_t data_align = 16; + int32_t mask_num = ((box_num - 1) / data_align + 1) * data_align; + at::Tensor mask = at::empty({box_num, mask_num}, boxes.options().dtype(at::kShort)); + EXEC_NPU_CMD(aclnnNms3dNormal, boxes, nms_overlap_thresh, mask); + + at::Tensor keep = at::zeros({box_num}, mask.options()); + at::Tensor num_out = at::zeros(1, mask.options()); + EXEC_NPU_CMD(aclnnGatherNms3dMask, mask, keep, num_out); + return std::tie(keep, num_out); +} diff --git a/ads/common/ops/csrc/PointsInBox.cpp b/mx_driving/common/ops/csrc/PointsInBox.cpp similarity index 100% rename from ads/common/ops/csrc/PointsInBox.cpp rename to mx_driving/common/ops/csrc/PointsInBox.cpp diff --git a/ads/common/ops/csrc/README.md b/mx_driving/common/ops/csrc/README.md similarity index 100% rename from ads/common/ops/csrc/README.md rename to mx_driving/common/ops/csrc/README.md diff --git a/ads/common/ops/csrc/RoipointPool3dForward.cpp b/mx_driving/common/ops/csrc/RoipointPool3dForward.cpp similarity index 100% rename from ads/common/ops/csrc/RoipointPool3dForward.cpp rename to mx_driving/common/ops/csrc/RoipointPool3dForward.cpp diff --git a/ads/common/ops/csrc/RotatedIou.cpp b/mx_driving/common/ops/csrc/RotatedIou.cpp similarity index 100% rename from ads/common/ops/csrc/RotatedIou.cpp rename to mx_driving/common/ops/csrc/RotatedIou.cpp diff --git a/ads/common/ops/csrc/RotatedOverlaps.cpp b/mx_driving/common/ops/csrc/RotatedOverlaps.cpp similarity index 100% rename from ads/common/ops/csrc/RotatedOverlaps.cpp rename to mx_driving/common/ops/csrc/RotatedOverlaps.cpp diff --git a/ads/common/ops/csrc/ScatterMax.cpp b/mx_driving/common/ops/csrc/ScatterMax.cpp similarity index 100% rename from ads/common/ops/csrc/ScatterMax.cpp rename to mx_driving/common/ops/csrc/ScatterMax.cpp diff --git a/ads/common/ops/csrc/ScatterMeanGrad.cpp b/mx_driving/common/ops/csrc/ScatterMeanGrad.cpp similarity index 100% rename from ads/common/ops/csrc/ScatterMeanGrad.cpp rename to mx_driving/common/ops/csrc/ScatterMeanGrad.cpp diff --git a/ads/common/ops/csrc/ThreeInterpolate.cpp b/mx_driving/common/ops/csrc/ThreeInterpolate.cpp similarity index 100% rename from ads/common/ops/csrc/ThreeInterpolate.cpp rename to mx_driving/common/ops/csrc/ThreeInterpolate.cpp diff --git a/ads/common/ops/csrc/VoxelPoolingTrain.cpp b/mx_driving/common/ops/csrc/VoxelPoolingTrain.cpp similarity index 100% rename from ads/common/ops/csrc/VoxelPoolingTrain.cpp rename to mx_driving/common/ops/csrc/VoxelPoolingTrain.cpp diff --git a/ads/common/ops/csrc/functions.h b/mx_driving/common/ops/csrc/functions.h similarity index 100% rename from ads/common/ops/csrc/functions.h rename to mx_driving/common/ops/csrc/functions.h diff --git a/ads/common/ops/csrc/pybind.cpp b/mx_driving/common/ops/csrc/pybind.cpp similarity index 100% rename from ads/common/ops/csrc/pybind.cpp rename to mx_driving/common/ops/csrc/pybind.cpp diff --git a/ads/common/ops/dynamic_voxelization.py b/mx_driving/common/ops/dynamic_voxelization.py similarity index 100% rename from ads/common/ops/dynamic_voxelization.py rename to mx_driving/common/ops/dynamic_voxelization.py diff --git a/ads/common/ops/furthest_point_sampling.py b/mx_driving/common/ops/furthest_point_sampling.py similarity index 100% rename from ads/common/ops/furthest_point_sampling.py rename to mx_driving/common/ops/furthest_point_sampling.py diff --git a/ads/common/ops/furthest_point_sampling_with_dist.py b/mx_driving/common/ops/furthest_point_sampling_with_dist.py similarity index 100% rename from ads/common/ops/furthest_point_sampling_with_dist.py rename to mx_driving/common/ops/furthest_point_sampling_with_dist.py diff --git a/ads/common/ops/kernels/CMakeLists.txt b/mx_driving/common/ops/kernels/CMakeLists.txt similarity index 100% rename from ads/common/ops/kernels/CMakeLists.txt rename to mx_driving/common/ops/kernels/CMakeLists.txt diff --git a/ads/common/ops/kernels/README.md b/mx_driving/common/ops/kernels/README.md similarity index 100% rename from ads/common/ops/kernels/README.md rename to mx_driving/common/ops/kernels/README.md diff --git a/ads/common/ops/kernels/inc/base.h b/mx_driving/common/ops/kernels/inc/base.h similarity index 100% rename from ads/common/ops/kernels/inc/base.h rename to mx_driving/common/ops/kernels/inc/base.h diff --git a/ads/common/ops/kernels/op_host/CMakeLists.txt b/mx_driving/common/ops/kernels/op_host/CMakeLists.txt similarity index 100% rename from ads/common/ops/kernels/op_host/CMakeLists.txt rename to mx_driving/common/ops/kernels/op_host/CMakeLists.txt diff --git a/ads/common/ops/kernels/op_host/common.h b/mx_driving/common/ops/kernels/op_host/common.h similarity index 100% rename from ads/common/ops/kernels/op_host/common.h rename to mx_driving/common/ops/kernels/op_host/common.h diff --git a/ads/common/ops/kernels/op_host/dynamic_scatter.cpp b/mx_driving/common/ops/kernels/op_host/dynamic_scatter.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/dynamic_scatter.cpp rename to mx_driving/common/ops/kernels/op_host/dynamic_scatter.cpp diff --git a/ads/common/ops/kernels/op_host/dynamic_scatter_grad.cpp b/mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad.cpp similarity index 97% rename from ads/common/ops/kernels/op_host/dynamic_scatter_grad.cpp rename to mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad.cpp index 5b20891..0e02845 100644 --- a/ads/common/ops/kernels/op_host/dynamic_scatter_grad.cpp +++ b/mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad.cpp @@ -1,199 +1,199 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. - */ -#include "dynamic_scatter_grad_tiling.h" -#include "register/op_def_registry.h" -#include "tiling/platform/platform_ascendc.h" -#include "tiling/tiling_api.h" - - -using namespace ge; -using namespace std; -using namespace AscendC; - -namespace optiling { -constexpr uint32_t BYTE_BLOCK = 32; -constexpr uint32_t SIZE_OF_B8 = 1; -constexpr uint32_t SIZE_OF_B16 = 2; -constexpr uint32_t SIZE_OF_B32 = 4; -constexpr uint32_t BIT_OF_B8 = 8; -constexpr uint32_t DIM_INDEX0 = 0; -constexpr uint32_t DIM_INDEX1 = 1; -constexpr uint32_t BYTES_PER_DATA = 20; -constexpr uint32_t TILING_KEY_COE = 100; -constexpr uint32_t RESERVED_UB_SIZE = 2 * 1024; -std::string DEFAULT_REDUCE_TYPE = "max"; -static std::map REDUCE_TYPE_MAP = {{"sum", 0}, {"mean", 1}, {"max", 2}}; - - -void DynamicScatterGradTiling::CalUsedCoreNum(const uint32_t coreNumPlatform) -{ - voxelNumPerCore = (totalVoxelNum + coreNumPlatform - 1) / coreNumPlatform; - usedCoreNum = (totalVoxelNum + voxelNumPerCore - 1) / voxelNumPerCore; - voxelNumLastCore = totalVoxelNum - (voxelNumPerCore * (usedCoreNum - 1)); - eleNumPerCore = voxelNumPerCore * featDim; - eleNumLastCore = voxelNumLastCore * featDim; -} - -void DynamicScatterGradTiling::CalTilingAligned() -{ - alignedNum = BYTE_BLOCK / SIZE_OF_B32; - featDimAligned = (featDim + alignedNum - 1) / alignedNum * alignedNum; - blockLen = featDimAligned / alignedNum; - if (featDim == featDimAligned) { - isFeatsAligned = true; - } else { - blockLenPad = featDim * SIZE_OF_B32; - } - uint32_t sizePerPoint = featDimAligned * SIZE_OF_B32; - uint32_t availableUbSize = ubSizePlatForm - sizePerPoint - RESERVED_UB_SIZE; - maxPointNum = availableUbSize / 2 / sizePerPoint; -} - -void DynamicScatterGradTiling::CalMaskTiling() -{ - uint32_t alignedMaskNum = BYTE_BLOCK / SIZE_OF_B8; - maskDim = (featDim + BIT_OF_B8 - 1) / BIT_OF_B8; - maskDimAligned = (maskDim + alignedMaskNum - 1) / alignedMaskNum * alignedMaskNum; - maskNum = maskDim * totalPointNum; - blockLenMask = maskDimAligned / alignedMaskNum; -} - -ge::graphStatus DynamicScatterGradTiling::Init() -{ - auto voxelShape = tilingContext->GetInputShape(0)->GetStorageShape(); - totalVoxelNum = voxelShape.GetDim(DIM_INDEX0); - featDim = voxelShape.GetDim(DIM_INDEX1); - auto pointShape = tilingContext->GetOutputShape(0)->GetStorageShape(); - totalPointNum = pointShape.GetDim(DIM_INDEX0); - pointGradNum = totalPointNum * featDim; - - auto platformInfo = tilingContext->GetPlatformInfo(); - if (platformInfo == nullptr) { - return ge::GRAPH_FAILED; - } - auto ascendcPlatform = platform_ascendc::PlatformAscendC(platformInfo); - coreNum = ascendcPlatform.GetCoreNumAiv(); - if (coreNum == 0) { - return ge::GRAPH_FAILED; - } - CalUsedCoreNum(coreNum); - ascendcPlatform.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ubSizePlatForm); - - auto attrs = tilingContext->GetAttrs(); - if (attrs == nullptr) { - return ge::GRAPH_FAILED; - } - const char* reduceTypePtr = attrs->GetAttrPointer(DIM_INDEX0); - std::string reduceType(reduceTypePtr); - if (reduceType != "sum" && reduceType != "mean" && reduceType != "max") { - return ge::GRAPH_PARAM_INVALID; - } - tilingContext->SetTilingKey(TILING_KEY_COE + REDUCE_TYPE_MAP[reduceType]); - - CalTilingAligned(); - if (reduceType == "max") { - CalMaskTiling(); - } - - return ge::GRAPH_SUCCESS; -} - -ge::graphStatus DynamicScatterGradTiling::RunKernelTiling() -{ - tilingContext->SetBlockDim(usedCoreNum); - tilingData.set_totalPointNum(totalPointNum); - tilingData.set_totalVoxelNum(totalVoxelNum); - tilingData.set_featDim(featDim); - tilingData.set_pointGradNum(pointGradNum); - tilingData.set_alignedNum(alignedNum); - tilingData.set_featDimAligned(featDimAligned); - tilingData.set_voxelNumPerCore(voxelNumPerCore); - tilingData.set_voxelNumLastCore(voxelNumLastCore); - tilingData.set_eleNumPerCore(eleNumPerCore); - tilingData.set_eleNumLastCore(eleNumLastCore); - tilingData.set_maskNum(maskNum); - tilingData.set_maskDim(maskDim); - tilingData.set_maskDimAligned(maskDimAligned); - tilingData.set_maxPointNum(maxPointNum); - tilingData.set_blockLen(blockLen); - tilingData.set_blockLenPad(blockLenPad); - tilingData.set_blockLenMask(blockLenMask); - tilingData.set_usedCoreNum(usedCoreNum); - tilingData.set_isFeatsAligned(isFeatsAligned); - tilingData.SaveToBuffer( - tilingContext->GetRawTilingData()->GetData(), tilingContext->GetRawTilingData()->GetCapacity()); - tilingContext->GetRawTilingData()->SetDataSize(tilingData.GetDataSize()); - return ge::GRAPH_SUCCESS; -} - -ge::graphStatus TilingForDynamicScatterGrad(gert::TilingContext* context) -{ - DynamicScatterGradTiling tilingObject(context); - tilingObject.Init(); - return tilingObject.RunKernelTiling(); -} -} // namespace optiling - -namespace ge { -static ge::graphStatus InferShapeForDynamicScatterGrad(gert::InferShapeContext* context) -{ - const gert::Shape* featShape = context->GetInputShape(3); - if (featShape == nullptr) { - return ge::GRAPH_FAILED; - } - gert::Shape* outShape = context->GetOutputShape(0); - if (outShape == nullptr) { - return ge::GRAPH_FAILED; - } - outShape->SetDim(0, featShape->GetDim(0)); - outShape->SetDim(1, featShape->GetDim(1)); - return GRAPH_SUCCESS; -} -} // namespace ge - -namespace ops { -class DynamicScatterGrad : public OpDef { -public: - explicit DynamicScatterGrad(const char* name) : OpDef(name) - { - this->Input("grad_voxel_feats") - .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); - this->Input("num_point_per_voxel") - .ParamType(REQUIRED) - .DataType({ge::DT_INT32}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); - this->Input("argsort_coor") - .ParamType(REQUIRED) - .DataType({ge::DT_INT32}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); - this->Input("compare_mask") - .ParamType(REQUIRED) - .DataType({ge::DT_UINT8}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); - this->Output("grad_feats") - .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); - this->Attr("reduce_type").AttrType(REQUIRED).String("max"); - this->SetInferShape(ge::InferShapeForDynamicScatterGrad); - this->AICore().SetTiling(optiling::TilingForDynamicScatterGrad); - OpAICoreConfig aicore_config; - aicore_config.DynamicCompileStaticFlag(true) - .DynamicFormatFlag(true) - .DynamicRankSupportFlag(true) - .DynamicShapeSupportFlag(true); - this->AICore().AddConfig("ascend910b", aicore_config); - this->AICore().AddConfig("ascend910c", aicore_config); - } -}; - -OP_ADD(DynamicScatterGrad); +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + */ +#include "dynamic_scatter_grad_tiling.h" +#include "register/op_def_registry.h" +#include "tiling/platform/platform_ascendc.h" +#include "tiling/tiling_api.h" + + +using namespace ge; +using namespace std; +using namespace AscendC; + +namespace optiling { +constexpr uint32_t BYTE_BLOCK = 32; +constexpr uint32_t SIZE_OF_B8 = 1; +constexpr uint32_t SIZE_OF_B16 = 2; +constexpr uint32_t SIZE_OF_B32 = 4; +constexpr uint32_t BIT_OF_B8 = 8; +constexpr uint32_t DIM_INDEX0 = 0; +constexpr uint32_t DIM_INDEX1 = 1; +constexpr uint32_t BYTES_PER_DATA = 20; +constexpr uint32_t TILING_KEY_COE = 100; +constexpr uint32_t RESERVED_UB_SIZE = 2 * 1024; +std::string DEFAULT_REDUCE_TYPE = "max"; +static std::map REDUCE_TYPE_MAP = {{"sum", 0}, {"mean", 1}, {"max", 2}}; + + +void DynamicScatterGradTiling::CalUsedCoreNum(const uint32_t coreNumPlatform) +{ + voxelNumPerCore = (totalVoxelNum + coreNumPlatform - 1) / coreNumPlatform; + usedCoreNum = (totalVoxelNum + voxelNumPerCore - 1) / voxelNumPerCore; + voxelNumLastCore = totalVoxelNum - (voxelNumPerCore * (usedCoreNum - 1)); + eleNumPerCore = voxelNumPerCore * featDim; + eleNumLastCore = voxelNumLastCore * featDim; +} + +void DynamicScatterGradTiling::CalTilingAligned() +{ + alignedNum = BYTE_BLOCK / SIZE_OF_B32; + featDimAligned = (featDim + alignedNum - 1) / alignedNum * alignedNum; + blockLen = featDimAligned / alignedNum; + if (featDim == featDimAligned) { + isFeatsAligned = true; + } else { + blockLenPad = featDim * SIZE_OF_B32; + } + uint32_t sizePerPoint = featDimAligned * SIZE_OF_B32; + uint32_t availableUbSize = ubSizePlatForm - sizePerPoint - RESERVED_UB_SIZE; + maxPointNum = availableUbSize / 2 / sizePerPoint; +} + +void DynamicScatterGradTiling::CalMaskTiling() +{ + uint32_t alignedMaskNum = BYTE_BLOCK / SIZE_OF_B8; + maskDim = (featDim + BIT_OF_B8 - 1) / BIT_OF_B8; + maskDimAligned = (maskDim + alignedMaskNum - 1) / alignedMaskNum * alignedMaskNum; + maskNum = maskDim * totalPointNum; + blockLenMask = maskDimAligned / alignedMaskNum; +} + +ge::graphStatus DynamicScatterGradTiling::Init() +{ + auto voxelShape = tilingContext->GetInputShape(0)->GetStorageShape(); + totalVoxelNum = voxelShape.GetDim(DIM_INDEX0); + featDim = voxelShape.GetDim(DIM_INDEX1); + auto pointShape = tilingContext->GetOutputShape(0)->GetStorageShape(); + totalPointNum = pointShape.GetDim(DIM_INDEX0); + pointGradNum = totalPointNum * featDim; + + auto platformInfo = tilingContext->GetPlatformInfo(); + if (platformInfo == nullptr) { + return ge::GRAPH_FAILED; + } + auto ascendcPlatform = platform_ascendc::PlatformAscendC(platformInfo); + coreNum = ascendcPlatform.GetCoreNumAiv(); + if (coreNum == 0) { + return ge::GRAPH_FAILED; + } + CalUsedCoreNum(coreNum); + ascendcPlatform.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ubSizePlatForm); + + auto attrs = tilingContext->GetAttrs(); + if (attrs == nullptr) { + return ge::GRAPH_FAILED; + } + const char* reduceTypePtr = attrs->GetAttrPointer(DIM_INDEX0); + std::string reduceType(reduceTypePtr); + if (reduceType != "sum" && reduceType != "mean" && reduceType != "max") { + return ge::GRAPH_PARAM_INVALID; + } + tilingContext->SetTilingKey(TILING_KEY_COE + REDUCE_TYPE_MAP[reduceType]); + + CalTilingAligned(); + if (reduceType == "max") { + CalMaskTiling(); + } + + return ge::GRAPH_SUCCESS; +} + +ge::graphStatus DynamicScatterGradTiling::RunKernelTiling() +{ + tilingContext->SetBlockDim(usedCoreNum); + tilingData.set_totalPointNum(totalPointNum); + tilingData.set_totalVoxelNum(totalVoxelNum); + tilingData.set_featDim(featDim); + tilingData.set_pointGradNum(pointGradNum); + tilingData.set_alignedNum(alignedNum); + tilingData.set_featDimAligned(featDimAligned); + tilingData.set_voxelNumPerCore(voxelNumPerCore); + tilingData.set_voxelNumLastCore(voxelNumLastCore); + tilingData.set_eleNumPerCore(eleNumPerCore); + tilingData.set_eleNumLastCore(eleNumLastCore); + tilingData.set_maskNum(maskNum); + tilingData.set_maskDim(maskDim); + tilingData.set_maskDimAligned(maskDimAligned); + tilingData.set_maxPointNum(maxPointNum); + tilingData.set_blockLen(blockLen); + tilingData.set_blockLenPad(blockLenPad); + tilingData.set_blockLenMask(blockLenMask); + tilingData.set_usedCoreNum(usedCoreNum); + tilingData.set_isFeatsAligned(isFeatsAligned); + tilingData.SaveToBuffer( + tilingContext->GetRawTilingData()->GetData(), tilingContext->GetRawTilingData()->GetCapacity()); + tilingContext->GetRawTilingData()->SetDataSize(tilingData.GetDataSize()); + return ge::GRAPH_SUCCESS; +} + +ge::graphStatus TilingForDynamicScatterGrad(gert::TilingContext* context) +{ + DynamicScatterGradTiling tilingObject(context); + tilingObject.Init(); + return tilingObject.RunKernelTiling(); +} +} // namespace optiling + +namespace ge { +static ge::graphStatus InferShapeForDynamicScatterGrad(gert::InferShapeContext* context) +{ + const gert::Shape* featShape = context->GetInputShape(3); + if (featShape == nullptr) { + return ge::GRAPH_FAILED; + } + gert::Shape* outShape = context->GetOutputShape(0); + if (outShape == nullptr) { + return ge::GRAPH_FAILED; + } + outShape->SetDim(0, featShape->GetDim(0)); + outShape->SetDim(1, featShape->GetDim(1)); + return GRAPH_SUCCESS; +} +} // namespace ge + +namespace ops { +class DynamicScatterGrad : public OpDef { +public: + explicit DynamicScatterGrad(const char* name) : OpDef(name) + { + this->Input("grad_voxel_feats") + .ParamType(REQUIRED) + .DataType({ge::DT_FLOAT}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + this->Input("num_point_per_voxel") + .ParamType(REQUIRED) + .DataType({ge::DT_INT32}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + this->Input("argsort_coor") + .ParamType(REQUIRED) + .DataType({ge::DT_INT32}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + this->Input("compare_mask") + .ParamType(REQUIRED) + .DataType({ge::DT_UINT8}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + this->Output("grad_feats") + .ParamType(REQUIRED) + .DataType({ge::DT_FLOAT}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + this->Attr("reduce_type").AttrType(REQUIRED).String("max"); + this->SetInferShape(ge::InferShapeForDynamicScatterGrad); + this->AICore().SetTiling(optiling::TilingForDynamicScatterGrad); + OpAICoreConfig aicore_config; + aicore_config.DynamicCompileStaticFlag(true) + .DynamicFormatFlag(true) + .DynamicRankSupportFlag(true) + .DynamicShapeSupportFlag(true); + this->AICore().AddConfig("ascend910b", aicore_config); + this->AICore().AddConfig("ascend910c", aicore_config); + } +}; + +OP_ADD(DynamicScatterGrad); } // namespace ops \ No newline at end of file diff --git a/ads/common/ops/kernels/op_host/dynamic_scatter_grad_tiling.h b/mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad_tiling.h similarity index 95% rename from ads/common/ops/kernels/op_host/dynamic_scatter_grad_tiling.h rename to mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad_tiling.h index 61727a3..c7f9e9c 100644 --- a/ads/common/ops/kernels/op_host/dynamic_scatter_grad_tiling.h +++ b/mx_driving/common/ops/kernels/op_host/dynamic_scatter_grad_tiling.h @@ -1,74 +1,74 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. - */ -#ifndef DYNAMIC_SCATTER_GRAD_TILING_H -#define DYNAMIC_SCATTER_GRAD_TILING_H - -#include "register/op_def_registry.h" -#include "register/tilingdata_base.h" -#include "tiling/tiling_api.h" - -namespace optiling { -BEGIN_TILING_DATA_DEF(DynamicScatterGradTilingData) -TILING_DATA_FIELD_DEF(uint32_t, totalPointNum); -TILING_DATA_FIELD_DEF(uint32_t, totalVoxelNum); -TILING_DATA_FIELD_DEF(uint32_t, pointGradNum); -TILING_DATA_FIELD_DEF(uint32_t, voxelNumPerCore); -TILING_DATA_FIELD_DEF(uint32_t, eleNumPerCore); -TILING_DATA_FIELD_DEF(uint32_t, voxelNumLastCore); -TILING_DATA_FIELD_DEF(uint32_t, eleNumLastCore); -TILING_DATA_FIELD_DEF(uint32_t, alignedNum); -TILING_DATA_FIELD_DEF(uint32_t, featDim); -TILING_DATA_FIELD_DEF(uint32_t, featDimAligned); -TILING_DATA_FIELD_DEF(uint32_t, maskNum); -TILING_DATA_FIELD_DEF(uint32_t, maskDim); -TILING_DATA_FIELD_DEF(uint32_t, maskDimAligned); -TILING_DATA_FIELD_DEF(uint32_t, maxPointNum); -TILING_DATA_FIELD_DEF(uint32_t, blockLen); -TILING_DATA_FIELD_DEF(uint32_t, blockLenPad); -TILING_DATA_FIELD_DEF(uint32_t, blockLenMask); -TILING_DATA_FIELD_DEF(uint32_t, usedCoreNum); -TILING_DATA_FIELD_DEF(bool, isFeatsAligned); -END_TILING_DATA_DEF; - -REGISTER_TILING_DATA_CLASS(DynamicScatterGrad, DynamicScatterGradTilingData) - -class DynamicScatterGradTiling { -public: - explicit DynamicScatterGradTiling(gert::TilingContext* context) : tilingContext(context) {}; - ge::graphStatus Init(); - ge::graphStatus RunKernelTiling(); - -private: - void SetTilingKeyMode(uint32_t reduceTypeNum) const; - void CalUsedCoreNum(const uint32_t coreNumPlatform); - void CalTilingAligned(); - void CalMaskTiling(); - -private: - DynamicScatterGradTilingData tilingData; - gert::TilingContext* tilingContext = nullptr; - uint32_t coreNum; - uint32_t usedCoreNum = 1; - uint32_t pointGradNum; - uint32_t totalVoxelNum; - uint32_t totalPointNum; - uint32_t voxelNumPerCore; - uint32_t voxelNumLastCore; - uint32_t eleNumPerCore; - uint32_t eleNumLastCore; - uint32_t alignedNum; - uint32_t featDim; - uint32_t featDimAligned; - uint32_t maskDim; - uint32_t maskDimAligned; - uint32_t maskNum = 0; - uint32_t maxPointNum = 0; - uint32_t blockLen; - uint32_t blockLenPad = 0; - uint32_t blockLenMask = 0; - uint64_t ubSizePlatForm; - bool isFeatsAligned = false; -}; -} // namespace optiling +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + */ +#ifndef DYNAMIC_SCATTER_GRAD_TILING_H +#define DYNAMIC_SCATTER_GRAD_TILING_H + +#include "register/op_def_registry.h" +#include "register/tilingdata_base.h" +#include "tiling/tiling_api.h" + +namespace optiling { +BEGIN_TILING_DATA_DEF(DynamicScatterGradTilingData) +TILING_DATA_FIELD_DEF(uint32_t, totalPointNum); +TILING_DATA_FIELD_DEF(uint32_t, totalVoxelNum); +TILING_DATA_FIELD_DEF(uint32_t, pointGradNum); +TILING_DATA_FIELD_DEF(uint32_t, voxelNumPerCore); +TILING_DATA_FIELD_DEF(uint32_t, eleNumPerCore); +TILING_DATA_FIELD_DEF(uint32_t, voxelNumLastCore); +TILING_DATA_FIELD_DEF(uint32_t, eleNumLastCore); +TILING_DATA_FIELD_DEF(uint32_t, alignedNum); +TILING_DATA_FIELD_DEF(uint32_t, featDim); +TILING_DATA_FIELD_DEF(uint32_t, featDimAligned); +TILING_DATA_FIELD_DEF(uint32_t, maskNum); +TILING_DATA_FIELD_DEF(uint32_t, maskDim); +TILING_DATA_FIELD_DEF(uint32_t, maskDimAligned); +TILING_DATA_FIELD_DEF(uint32_t, maxPointNum); +TILING_DATA_FIELD_DEF(uint32_t, blockLen); +TILING_DATA_FIELD_DEF(uint32_t, blockLenPad); +TILING_DATA_FIELD_DEF(uint32_t, blockLenMask); +TILING_DATA_FIELD_DEF(uint32_t, usedCoreNum); +TILING_DATA_FIELD_DEF(bool, isFeatsAligned); +END_TILING_DATA_DEF; + +REGISTER_TILING_DATA_CLASS(DynamicScatterGrad, DynamicScatterGradTilingData) + +class DynamicScatterGradTiling { +public: + explicit DynamicScatterGradTiling(gert::TilingContext* context) : tilingContext(context) {}; + ge::graphStatus Init(); + ge::graphStatus RunKernelTiling(); + +private: + void SetTilingKeyMode(uint32_t reduceTypeNum) const; + void CalUsedCoreNum(const uint32_t coreNumPlatform); + void CalTilingAligned(); + void CalMaskTiling(); + +private: + DynamicScatterGradTilingData tilingData; + gert::TilingContext* tilingContext = nullptr; + uint32_t coreNum; + uint32_t usedCoreNum = 1; + uint32_t pointGradNum; + uint32_t totalVoxelNum; + uint32_t totalPointNum; + uint32_t voxelNumPerCore; + uint32_t voxelNumLastCore; + uint32_t eleNumPerCore; + uint32_t eleNumLastCore; + uint32_t alignedNum; + uint32_t featDim; + uint32_t featDimAligned; + uint32_t maskDim; + uint32_t maskDimAligned; + uint32_t maskNum = 0; + uint32_t maxPointNum = 0; + uint32_t blockLen; + uint32_t blockLenPad = 0; + uint32_t blockLenMask = 0; + uint64_t ubSizePlatForm; + bool isFeatsAligned = false; +}; +} // namespace optiling #endif // DYNAMIC_SCATTER_GRAD_TILING_H \ No newline at end of file diff --git a/ads/common/ops/kernels/op_host/dynamic_scatter_tiling.h b/mx_driving/common/ops/kernels/op_host/dynamic_scatter_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/dynamic_scatter_tiling.h rename to mx_driving/common/ops/kernels/op_host/dynamic_scatter_tiling.h diff --git a/ads/common/ops/kernels/op_host/dynamic_voxelization.cpp b/mx_driving/common/ops/kernels/op_host/dynamic_voxelization.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/dynamic_voxelization.cpp rename to mx_driving/common/ops/kernels/op_host/dynamic_voxelization.cpp diff --git a/ads/common/ops/kernels/op_host/dynamic_voxelization_tiling.h b/mx_driving/common/ops/kernels/op_host/dynamic_voxelization_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/dynamic_voxelization_tiling.h rename to mx_driving/common/ops/kernels/op_host/dynamic_voxelization_tiling.h diff --git a/ads/common/ops/kernels/op_host/furthest_point_sampling.cpp b/mx_driving/common/ops/kernels/op_host/furthest_point_sampling.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/furthest_point_sampling.cpp rename to mx_driving/common/ops/kernels/op_host/furthest_point_sampling.cpp diff --git a/ads/common/ops/kernels/op_host/furthest_point_sampling_tiling.h b/mx_driving/common/ops/kernels/op_host/furthest_point_sampling_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/furthest_point_sampling_tiling.h rename to mx_driving/common/ops/kernels/op_host/furthest_point_sampling_tiling.h diff --git a/ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp b/mx_driving/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp rename to mx_driving/common/ops/kernels/op_host/furthest_point_sampling_with_dist.cpp diff --git a/ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h b/mx_driving/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h rename to mx_driving/common/ops/kernels/op_host/furthest_point_sampling_with_dist_tiling.h diff --git a/ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp b/mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp similarity index 96% rename from ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp rename to mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp index c3be6eb..efa392f 100644 --- a/ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp +++ b/mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.cpp @@ -1,69 +1,69 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. - */ -#include "gather_nms3d_mask_tiling.h" -#include "register/op_def_registry.h" -#include "tiling/platform/platform_ascendc.h" - -namespace optiling { -const uint32_t BLOCK_DIM = 1; -static ge::graphStatus GatherNms3dMaskTiling(gert::TilingContext *context) -{ - GatherNms3dMaskTilingData tiling; - auto const maskShape = context->GetInputShape(0); - if (maskShape == nullptr) { - return ge::GRAPH_FAILED; - } - - auto const maskShapeVal = maskShape->GetStorageShape(); - context->SetBlockDim(BLOCK_DIM); - tiling.set_box_num(maskShapeVal.GetDim(0)); - tiling.set_mask_num(maskShapeVal.GetDim(1)); - - tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); - context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); - - return ge::GRAPH_SUCCESS; -} -} - -namespace ge { -static ge::graphStatus GatherNms3dMaskInferShape(gert::InferShapeContext* context) -{ - return GRAPH_SUCCESS; -} -} - -namespace ops { -class GatherNms3dMask : public OpDef { -public: - explicit GatherNms3dMask(const char *name) : OpDef(name) - { - this->Input("mask") - .ParamType(REQUIRED) - .DataType({ge::DT_INT16}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); - this->Output("keep") - .ParamType(REQUIRED) - .DataType({ge::DT_INT16}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); - this->Output("num_out") - .ParamType(REQUIRED) - .DataType({ge::DT_INT16}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); - - this->SetInferShape(ge::GatherNms3dMaskInferShape); - - this->AICore() - .SetTiling(optiling::GatherNms3dMaskTiling); - - this->AICore().AddConfig("ascend910b"); - this->AICore().AddConfig("ascend910c"); - } -}; - -OP_ADD(GatherNms3dMask); -} +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + */ +#include "gather_nms3d_mask_tiling.h" +#include "register/op_def_registry.h" +#include "tiling/platform/platform_ascendc.h" + +namespace optiling { +const uint32_t BLOCK_DIM = 1; +static ge::graphStatus GatherNms3dMaskTiling(gert::TilingContext *context) +{ + GatherNms3dMaskTilingData tiling; + auto const maskShape = context->GetInputShape(0); + if (maskShape == nullptr) { + return ge::GRAPH_FAILED; + } + + auto const maskShapeVal = maskShape->GetStorageShape(); + context->SetBlockDim(BLOCK_DIM); + tiling.set_box_num(maskShapeVal.GetDim(0)); + tiling.set_mask_num(maskShapeVal.GetDim(1)); + + tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); + context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); + + return ge::GRAPH_SUCCESS; +} +} + +namespace ge { +static ge::graphStatus GatherNms3dMaskInferShape(gert::InferShapeContext* context) +{ + return GRAPH_SUCCESS; +} +} + +namespace ops { +class GatherNms3dMask : public OpDef { +public: + explicit GatherNms3dMask(const char *name) : OpDef(name) + { + this->Input("mask") + .ParamType(REQUIRED) + .DataType({ge::DT_INT16}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + this->Output("keep") + .ParamType(REQUIRED) + .DataType({ge::DT_INT16}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + this->Output("num_out") + .ParamType(REQUIRED) + .DataType({ge::DT_INT16}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + + this->SetInferShape(ge::GatherNms3dMaskInferShape); + + this->AICore() + .SetTiling(optiling::GatherNms3dMaskTiling); + + this->AICore().AddConfig("ascend910b"); + this->AICore().AddConfig("ascend910c"); + } +}; + +OP_ADD(GatherNms3dMask); +} diff --git a/ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h b/mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h similarity index 96% rename from ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h rename to mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h index bf7a47f..b0e0034 100644 --- a/ads/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h +++ b/mx_driving/common/ops/kernels/op_host/gather_nms3d_mask_tiling.h @@ -1,18 +1,18 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - */ -#ifndef GATHER_NMS3D_MASK_TILING_H -#define GATHER_NMS3D_MASK_TILING_H - -#include "register/tilingdata_base.h" - -namespace optiling { -BEGIN_TILING_DATA_DEF(GatherNms3dMaskTilingData) - TILING_DATA_FIELD_DEF(uint32_t, box_num); - TILING_DATA_FIELD_DEF(uint32_t, mask_num); -END_TILING_DATA_DEF; - -REGISTER_TILING_DATA_CLASS(GatherNms3dMask, GatherNms3dMaskTilingData) -} - -#endif // GATHER_NMS3D_MASK_TILING_H +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. + */ +#ifndef GATHER_NMS3D_MASK_TILING_H +#define GATHER_NMS3D_MASK_TILING_H + +#include "register/tilingdata_base.h" + +namespace optiling { +BEGIN_TILING_DATA_DEF(GatherNms3dMaskTilingData) + TILING_DATA_FIELD_DEF(uint32_t, box_num); + TILING_DATA_FIELD_DEF(uint32_t, mask_num); +END_TILING_DATA_DEF; + +REGISTER_TILING_DATA_CLASS(GatherNms3dMask, GatherNms3dMaskTilingData) +} + +#endif // GATHER_NMS3D_MASK_TILING_H diff --git a/ads/common/ops/kernels/op_host/knn.cpp b/mx_driving/common/ops/kernels/op_host/knn.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/knn.cpp rename to mx_driving/common/ops/kernels/op_host/knn.cpp diff --git a/ads/common/ops/kernels/op_host/knn_tiling.h b/mx_driving/common/ops/kernels/op_host/knn_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/knn_tiling.h rename to mx_driving/common/ops/kernels/op_host/knn_tiling.h diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attn.cpp b/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attn.cpp rename to mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn.cpp diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp b/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp rename to mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad.cpp diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h b/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h rename to mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling.h diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h b/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h rename to mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_tiling_v2.h diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp b/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp rename to mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_grad_v2.cpp diff --git a/ads/common/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h b/mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h rename to mx_driving/common/ops/kernels/op_host/multi_scale_deformable_attn_tiling.h diff --git a/ads/common/ops/kernels/op_host/nms3d.cpp b/mx_driving/common/ops/kernels/op_host/nms3d.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/nms3d.cpp rename to mx_driving/common/ops/kernels/op_host/nms3d.cpp diff --git a/ads/common/ops/kernels/op_host/nms3d_normal_tiling.cpp b/mx_driving/common/ops/kernels/op_host/nms3d_normal_tiling.cpp similarity index 97% rename from ads/common/ops/kernels/op_host/nms3d_normal_tiling.cpp rename to mx_driving/common/ops/kernels/op_host/nms3d_normal_tiling.cpp index 14598fb..bdfb047 100644 --- a/ads/common/ops/kernels/op_host/nms3d_normal_tiling.cpp +++ b/mx_driving/common/ops/kernels/op_host/nms3d_normal_tiling.cpp @@ -1,113 +1,113 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. - */ -#include "nms3d_normal_tiling.h" -#include "register/op_def_registry.h" -#include "tiling/platform/platform_ascendc.h" - -using namespace std; - -namespace optiling { -constexpr uint32_t DATA_ALIGN = 16; -constexpr uint32_t BOX_DIM = 2; -constexpr uint32_t MASK_DIM = 2; -static ge::graphStatus Nms3dNormalTilingFunc(gert::TilingContext* context) -{ - Nms3dNormalTilingData tiling; - if (context == nullptr) { - return ge::GRAPH_FAILED; - } - auto platformInfo = context->GetPlatformInfo(); - if (platformInfo == nullptr) { - return ge::GRAPH_FAILED; - } - auto ascendcPlatform = platform_ascendc::PlatformAscendC(platformInfo); - static uint32_t coreNum = ascendcPlatform.GetCoreNumAiv(); - - auto attrs = context->GetAttrs(); - if (attrs == nullptr || context->GetInputShape(0) == nullptr || context->GetOutputShape(0) == nullptr - || context->GetInputDesc(0) == nullptr || context->GetRawTilingData() == nullptr) { - return ge::GRAPH_FAILED; - } - - auto boxShape = context->GetInputShape(0)->GetStorageShape(); - auto maskShape = context->GetOutputShape(0)->GetStorageShape(); - auto dtype = context->GetInputDesc(0)->GetDataType(); - - if (boxShape.GetDimNum() != BOX_DIM || maskShape.GetDimNum() != MASK_DIM) { - return ge::GRAPH_FAILED; - } - uint32_t boxNum = boxShape.GetDim(0); - uint32_t maskNum = maskShape.GetDim(1); - if (ge::DT_FLOAT == dtype) { - context->SetTilingKey(1); - } else if (ge::DT_FLOAT16 == dtype) { - context->SetTilingKey(2); - } else { - return ge::GRAPH_FAILED; - } - - uint32_t usedCoreNum = std::min((boxNum - 1) / DATA_ALIGN + 1, coreNum); - uint32_t loopTime = (boxNum - 1) / (usedCoreNum * DATA_ALIGN) + 1; - uint32_t tailSum = boxNum - usedCoreNum * (loopTime - 1) * DATA_ALIGN; - uint32_t tailNum = (tailSum - 1) % DATA_ALIGN + 1; - float nms_overlap_thresh = *(attrs->GetAttrPointer(0)); - - context->SetBlockDim(usedCoreNum); - tiling.set_usedCoreNum(usedCoreNum); - tiling.set_boxNum(boxNum); - tiling.set_loopTime(loopTime); - tiling.set_eachSum(loopTime * DATA_ALIGN); - tiling.set_tailSum(tailSum); - tiling.set_tailNum(tailNum); - tiling.set_maskNum(maskNum); - tiling.set_overlapThresh(nms_overlap_thresh); - tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); - context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); - - size_t *currentWorkspace = context->GetWorkspaceSizes(1); - if (currentWorkspace == nullptr) { - return ge::GRAPH_FAILED; - } - currentWorkspace[0] = 0; - return ge::GRAPH_SUCCESS; -} -} - -namespace ge { -static ge::graphStatus Nms3dNormalInferShape(gert::InferShapeContext* context) -{ - return GRAPH_SUCCESS; -} -} - -namespace ops { -class Nms3dNormal : public OpDef { -public: - explicit Nms3dNormal(const char* name) : OpDef(name) - { - this->Input("boxes") - .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT, ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND, ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND}); - this->Output("mask") - .ParamType(REQUIRED) - .DataType({ge::DT_INT16, ge::DT_INT16}) - .Format({ge::FORMAT_ND, ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND}); - this->Attr("nms_overlap_thresh") - .AttrType(REQUIRED) - .Float(); - - this->SetInferShape(ge::Nms3dNormalInferShape); - - this->AICore() - .SetTiling(optiling::Nms3dNormalTilingFunc); - this->AICore().AddConfig("ascend910b"); - this->AICore().AddConfig("ascend910c"); - } -}; - -OP_ADD(Nms3dNormal); -} +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + */ +#include "nms3d_normal_tiling.h" +#include "register/op_def_registry.h" +#include "tiling/platform/platform_ascendc.h" + +using namespace std; + +namespace optiling { +constexpr uint32_t DATA_ALIGN = 16; +constexpr uint32_t BOX_DIM = 2; +constexpr uint32_t MASK_DIM = 2; +static ge::graphStatus Nms3dNormalTilingFunc(gert::TilingContext* context) +{ + Nms3dNormalTilingData tiling; + if (context == nullptr) { + return ge::GRAPH_FAILED; + } + auto platformInfo = context->GetPlatformInfo(); + if (platformInfo == nullptr) { + return ge::GRAPH_FAILED; + } + auto ascendcPlatform = platform_ascendc::PlatformAscendC(platformInfo); + static uint32_t coreNum = ascendcPlatform.GetCoreNumAiv(); + + auto attrs = context->GetAttrs(); + if (attrs == nullptr || context->GetInputShape(0) == nullptr || context->GetOutputShape(0) == nullptr + || context->GetInputDesc(0) == nullptr || context->GetRawTilingData() == nullptr) { + return ge::GRAPH_FAILED; + } + + auto boxShape = context->GetInputShape(0)->GetStorageShape(); + auto maskShape = context->GetOutputShape(0)->GetStorageShape(); + auto dtype = context->GetInputDesc(0)->GetDataType(); + + if (boxShape.GetDimNum() != BOX_DIM || maskShape.GetDimNum() != MASK_DIM) { + return ge::GRAPH_FAILED; + } + uint32_t boxNum = boxShape.GetDim(0); + uint32_t maskNum = maskShape.GetDim(1); + if (ge::DT_FLOAT == dtype) { + context->SetTilingKey(1); + } else if (ge::DT_FLOAT16 == dtype) { + context->SetTilingKey(2); + } else { + return ge::GRAPH_FAILED; + } + + uint32_t usedCoreNum = std::min((boxNum - 1) / DATA_ALIGN + 1, coreNum); + uint32_t loopTime = (boxNum - 1) / (usedCoreNum * DATA_ALIGN) + 1; + uint32_t tailSum = boxNum - usedCoreNum * (loopTime - 1) * DATA_ALIGN; + uint32_t tailNum = (tailSum - 1) % DATA_ALIGN + 1; + float nms_overlap_thresh = *(attrs->GetAttrPointer(0)); + + context->SetBlockDim(usedCoreNum); + tiling.set_usedCoreNum(usedCoreNum); + tiling.set_boxNum(boxNum); + tiling.set_loopTime(loopTime); + tiling.set_eachSum(loopTime * DATA_ALIGN); + tiling.set_tailSum(tailSum); + tiling.set_tailNum(tailNum); + tiling.set_maskNum(maskNum); + tiling.set_overlapThresh(nms_overlap_thresh); + tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); + context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); + + size_t *currentWorkspace = context->GetWorkspaceSizes(1); + if (currentWorkspace == nullptr) { + return ge::GRAPH_FAILED; + } + currentWorkspace[0] = 0; + return ge::GRAPH_SUCCESS; +} +} + +namespace ge { +static ge::graphStatus Nms3dNormalInferShape(gert::InferShapeContext* context) +{ + return GRAPH_SUCCESS; +} +} + +namespace ops { +class Nms3dNormal : public OpDef { +public: + explicit Nms3dNormal(const char* name) : OpDef(name) + { + this->Input("boxes") + .ParamType(REQUIRED) + .DataType({ge::DT_FLOAT, ge::DT_FLOAT16}) + .Format({ge::FORMAT_ND, ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND}); + this->Output("mask") + .ParamType(REQUIRED) + .DataType({ge::DT_INT16, ge::DT_INT16}) + .Format({ge::FORMAT_ND, ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND}); + this->Attr("nms_overlap_thresh") + .AttrType(REQUIRED) + .Float(); + + this->SetInferShape(ge::Nms3dNormalInferShape); + + this->AICore() + .SetTiling(optiling::Nms3dNormalTilingFunc); + this->AICore().AddConfig("ascend910b"); + this->AICore().AddConfig("ascend910c"); + } +}; + +OP_ADD(Nms3dNormal); +} diff --git a/ads/common/ops/kernels/op_host/nms3d_normal_tiling.h b/mx_driving/common/ops/kernels/op_host/nms3d_normal_tiling.h similarity index 97% rename from ads/common/ops/kernels/op_host/nms3d_normal_tiling.h rename to mx_driving/common/ops/kernels/op_host/nms3d_normal_tiling.h index 9b0bf9c..9976c24 100644 --- a/ads/common/ops/kernels/op_host/nms3d_normal_tiling.h +++ b/mx_driving/common/ops/kernels/op_host/nms3d_normal_tiling.h @@ -1,24 +1,24 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - */ -#ifndef NMS3D_NORMAL_TILING_H -#define NMS3D_NORMAL_TILING_H - -#include "register/tilingdata_base.h" - -namespace optiling { -BEGIN_TILING_DATA_DEF(Nms3dNormalTilingData) - TILING_DATA_FIELD_DEF(uint32_t, usedCoreNum) // used cores - TILING_DATA_FIELD_DEF(uint32_t, boxNum) // count of boxes - TILING_DATA_FIELD_DEF(uint32_t, loopTime) // loop times - TILING_DATA_FIELD_DEF(uint32_t, eachSum) // count of each core, = loop_time * 8 - TILING_DATA_FIELD_DEF(uint32_t, tailSum) // count of tail core - TILING_DATA_FIELD_DEF(uint32_t, tailNum) // last time count of tail core - TILING_DATA_FIELD_DEF(uint32_t, maskNum) // mask align 32bit - TILING_DATA_FIELD_DEF(float, overlapThresh) -END_TILING_DATA_DEF; - -REGISTER_TILING_DATA_CLASS(Nms3dNormal, Nms3dNormalTilingData) -} // namespace optiling - -#endif // NMS3D_NORMAL_TILING_H +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. + */ +#ifndef NMS3D_NORMAL_TILING_H +#define NMS3D_NORMAL_TILING_H + +#include "register/tilingdata_base.h" + +namespace optiling { +BEGIN_TILING_DATA_DEF(Nms3dNormalTilingData) + TILING_DATA_FIELD_DEF(uint32_t, usedCoreNum) // used cores + TILING_DATA_FIELD_DEF(uint32_t, boxNum) // count of boxes + TILING_DATA_FIELD_DEF(uint32_t, loopTime) // loop times + TILING_DATA_FIELD_DEF(uint32_t, eachSum) // count of each core, = loop_time * 8 + TILING_DATA_FIELD_DEF(uint32_t, tailSum) // count of tail core + TILING_DATA_FIELD_DEF(uint32_t, tailNum) // last time count of tail core + TILING_DATA_FIELD_DEF(uint32_t, maskNum) // mask align 32bit + TILING_DATA_FIELD_DEF(float, overlapThresh) +END_TILING_DATA_DEF; + +REGISTER_TILING_DATA_CLASS(Nms3dNormal, Nms3dNormalTilingData) +} // namespace optiling + +#endif // NMS3D_NORMAL_TILING_H diff --git a/ads/common/ops/kernels/op_host/nms3d_tiling.h b/mx_driving/common/ops/kernels/op_host/nms3d_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/nms3d_tiling.h rename to mx_driving/common/ops/kernels/op_host/nms3d_tiling.h diff --git a/ads/common/ops/kernels/op_host/points_in_box.cpp b/mx_driving/common/ops/kernels/op_host/points_in_box.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/points_in_box.cpp rename to mx_driving/common/ops/kernels/op_host/points_in_box.cpp diff --git a/ads/common/ops/kernels/op_host/points_in_box_tiling.h b/mx_driving/common/ops/kernels/op_host/points_in_box_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/points_in_box_tiling.h rename to mx_driving/common/ops/kernels/op_host/points_in_box_tiling.h diff --git a/ads/common/ops/kernels/op_host/roipoint_pool3d_forward.cpp b/mx_driving/common/ops/kernels/op_host/roipoint_pool3d_forward.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/roipoint_pool3d_forward.cpp rename to mx_driving/common/ops/kernels/op_host/roipoint_pool3d_forward.cpp diff --git a/ads/common/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h b/mx_driving/common/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h rename to mx_driving/common/ops/kernels/op_host/roipoint_pool3d_forward_tiling.h diff --git a/ads/common/ops/kernels/op_host/scatter_max_with_argmax_v2.cpp b/mx_driving/common/ops/kernels/op_host/scatter_max_with_argmax_v2.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/scatter_max_with_argmax_v2.cpp rename to mx_driving/common/ops/kernels/op_host/scatter_max_with_argmax_v2.cpp diff --git a/ads/common/ops/kernels/op_host/scatter_max_with_argmax_v2.h b/mx_driving/common/ops/kernels/op_host/scatter_max_with_argmax_v2.h similarity index 100% rename from ads/common/ops/kernels/op_host/scatter_max_with_argmax_v2.h rename to mx_driving/common/ops/kernels/op_host/scatter_max_with_argmax_v2.h diff --git a/ads/common/ops/kernels/op_host/scatter_mean_grad.cpp b/mx_driving/common/ops/kernels/op_host/scatter_mean_grad.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/scatter_mean_grad.cpp rename to mx_driving/common/ops/kernels/op_host/scatter_mean_grad.cpp diff --git a/ads/common/ops/kernels/op_host/scatter_mean_grad_tiling.h b/mx_driving/common/ops/kernels/op_host/scatter_mean_grad_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/scatter_mean_grad_tiling.h rename to mx_driving/common/ops/kernels/op_host/scatter_mean_grad_tiling.h diff --git a/ads/common/ops/kernels/op_host/voxel_pooling_train.cpp b/mx_driving/common/ops/kernels/op_host/voxel_pooling_train.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/voxel_pooling_train.cpp rename to mx_driving/common/ops/kernels/op_host/voxel_pooling_train.cpp diff --git a/ads/common/ops/kernels/op_host/voxel_pooling_train_grad.cpp b/mx_driving/common/ops/kernels/op_host/voxel_pooling_train_grad.cpp similarity index 100% rename from ads/common/ops/kernels/op_host/voxel_pooling_train_grad.cpp rename to mx_driving/common/ops/kernels/op_host/voxel_pooling_train_grad.cpp diff --git a/ads/common/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h b/mx_driving/common/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h rename to mx_driving/common/ops/kernels/op_host/voxel_pooling_train_grad_tiling.h diff --git a/ads/common/ops/kernels/op_host/voxel_pooling_train_tiling.h b/mx_driving/common/ops/kernels/op_host/voxel_pooling_train_tiling.h similarity index 100% rename from ads/common/ops/kernels/op_host/voxel_pooling_train_tiling.h rename to mx_driving/common/ops/kernels/op_host/voxel_pooling_train_tiling.h diff --git a/ads/common/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/common/ops/kernels/op_kernel/CMakeLists.txt similarity index 100% rename from ads/common/ops/kernels/op_kernel/CMakeLists.txt rename to mx_driving/common/ops/kernels/op_kernel/CMakeLists.txt diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter.cpp b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter.cpp rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter.cpp diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_base.h b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_base.h similarity index 97% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_base.h rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_base.h index 7e53f92..a0ad30a 100644 --- a/ads/common/ops/kernels/op_kernel/dynamic_scatter_base.h +++ b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_base.h @@ -1,184 +1,184 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. - */ -#ifndef _DYNAMIC_SCATTER_BASE_H_ -#define _DYNAMIC_SCATTER_BASE_H_ - -#include "kernel_operator.h" -#include "kernel_tiling/kernel_tiling.h" - -namespace DynamicScatter { -using namespace AscendC; - -constexpr int32_t BUFFER_NUM = 1; -constexpr uint32_t RESERVED_NUM = 1000; - -template -class DynamicScatterBase { -public: - __aicore__ inline DynamicScatterBase() {} - __aicore__ inline void BaseInit(GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, - GM_ADDR voxel_feats, DynamicScatterTilingData* tilingData, TPipe* in_pipe) - { - pipe = in_pipe; - - TilingDataInit(tilingData); - MemberDataInit(); - CopyParamasInit(); - GlobalBufInit(point_feats, prefix_sum_point_per_voxel, argsort_coor, voxel_feats); - BufInit(); - - eventIdSToV = static_cast(pipe->AllocEventID()); - eventIdSToMTE2 = static_cast(pipe->AllocEventID()); - eventIdSToMTE3 = static_cast(pipe->AllocEventID()); - eventIdVToMTE3 = static_cast(pipe->AllocEventID()); - eventIdMTE2ToV = static_cast(pipe->AllocEventID()); - eventIdMTE2ToS = static_cast(pipe->AllocEventID()); - eventIdMTE3ToV = static_cast(pipe->AllocEventID()); - eventIdMTE3ToS = static_cast(pipe->AllocEventID()); - eventIdMTE2ToMTE3 = static_cast(pipe->AllocEventID()); - eventIdMTE3ToMTE2 = static_cast(pipe->AllocEventID()); - } - - __aicore__ inline void TilingDataInit(DynamicScatterTilingData* tilingData) - { - usedCoreNum = tilingData->usedCoreNum; - totalPointNum = tilingData->totalPointNum; - totalVoxelNum = tilingData->totalVoxelNum; - featsDim = tilingData->featsDim; - pointFeatsNum = tilingData->pointFeatsNum; - voxelNumPerCore = tilingData->voxelNumPerCore; - voxelNumLastCore = tilingData->voxelNumLastCore; - voxelFeatsNumPerCore = tilingData->voxelFeatsNumPerCore; - voxelFeatsNumLastCore = tilingData->voxelFeatsNumLastCore; - alignedNum = tilingData->alignedNum; - featsDimAligned = tilingData->featsDimAligned; - availablePointNum = tilingData->availablePointNum; - blockLen = tilingData->blockLen; - blockLenPad = tilingData->blockLenPad; - isFeatsAligned = tilingData->isFeatsAligned; - } - - __aicore__ inline void MemberDataInit() - { - if (GetBlockIdx() < usedCoreNum - 1) { - voxelFeatNum = voxelFeatsNumPerCore; - voxelNum = voxelNumPerCore; - voxelOffset = voxelNum * GetBlockIdx(); - } else { - voxelFeatNum = voxelFeatsNumLastCore; - voxelNum = voxelNumLastCore; - voxelOffset = voxelNumPerCore * (usedCoreNum - 1); - } - voxelfeatsOffset = voxelOffset * featsDim; - } - - __aicore__ inline void CopyParamasInit() - { - copyFeatParams.blockCount = 1; - copyFeatParams.blockLen = blockLen; - copyFeatParams.srcStride = 0; - copyFeatParams.dstStride = 0; - if (!isFeatsAligned) { - copyOutPadParams.blockCount = 1; - copyOutPadParams.blockLen = blockLenPad; - copyOutPadParams.srcStride = 0; - copyOutPadParams.dstStride = 0; - copyOutPadParams.rsv = 0; - } - copyprefixSumParams.blockCount = 1; - copyprefixSumParams.blockLen = 1; - copyprefixSumParams.srcStride = 0; - copyprefixSumParams.dstStride = 0; - copyArgsortCoorParams.blockCount = 1; - copyArgsortCoorParams.srcStride = 0; - copyArgsortCoorParams.dstStride = 0; - } - - __aicore__ inline void GlobalBufInit( - GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, GM_ADDR voxel_feats) - { - pointFeatsGm.SetGlobalBuffer((__gm__ T*)point_feats, pointFeatsNum); - prefixSumGm.SetGlobalBuffer((__gm__ int32_t*)prefix_sum_point_per_voxel + voxelOffset, voxelNum); - argsortCoorGm.SetGlobalBuffer((__gm__ int32_t*)argsort_coor, totalPointNum); - voxelFeatsGm.SetGlobalBuffer((__gm__ T*)voxel_feats + voxelfeatsOffset, voxelFeatNum); - } - - __aicore__ inline void BufInit() - { - pipe->InitBuffer(prefixSumBuf, alignedNum * sizeof(int32_t)); - this->pipe->InitBuffer(this->argsortCoorBuf, RESERVED_NUM * sizeof(int32_t)); - this->pipe->InitBuffer(this->pointFeatsBuf, availablePointNum * this->featsDimAligned * sizeof(T)); - } - - __aicore__ inline void GetPointNum(uint32_t voxelIdx, const LocalTensor& prefixSumLocal) - { - if (GetBlockIdx() == usedCoreNum - 1 && voxelIdx == voxelNum - 1) { - DataCopy(prefixSumLocal, prefixSumGm[voxelIdx], copyprefixSumParams); - SetFlag(eventIdMTE2ToS); - WaitFlag(eventIdMTE2ToS); - startPoint = prefixSumLocal.GetValue(0); - pointNum = totalPointNum - startPoint; - } else { - DataCopy(prefixSumLocal, prefixSumGm[voxelIdx], copyprefixSumParams); - SetFlag(eventIdMTE2ToS); - WaitFlag(eventIdMTE2ToS); - startPoint = prefixSumLocal.GetValue(0); - pointNum = prefixSumLocal.GetValue(1) - startPoint; - } - } - - __aicore__ inline void CopyFeatsOut( - uint32_t voxelIdx, const LocalTensor& pointFeatsLocal, bool atomicMax, uint32_t offset = 0) - { - if (atomicMax) { - SetAtomicMax(); - } else { - SetAtomicAdd(); - } - if (isFeatsAligned) { - DataCopy(voxelFeatsGm[voxelIdx * featsDim], pointFeatsLocal[offset], copyFeatParams); - } else { - DataCopyPad(voxelFeatsGm[voxelIdx * featsDim], pointFeatsLocal[offset], copyOutPadParams); - } - SetAtomicNone(); - } - - __aicore__ inline void ReleaseEvent() - { - GetTPipePtr()->ReleaseEventID(eventIdSToV); - GetTPipePtr()->ReleaseEventID(eventIdSToMTE2); - GetTPipePtr()->ReleaseEventID(eventIdSToMTE3); - GetTPipePtr()->ReleaseEventID(eventIdVToMTE3); - GetTPipePtr()->ReleaseEventID(eventIdMTE2ToV); - GetTPipePtr()->ReleaseEventID(eventIdMTE2ToS); - GetTPipePtr()->ReleaseEventID(eventIdMTE3ToV); - GetTPipePtr()->ReleaseEventID(eventIdMTE3ToS); - GetTPipePtr()->ReleaseEventID(eventIdMTE2ToMTE3); - GetTPipePtr()->ReleaseEventID(eventIdMTE3ToMTE2); - } - -protected: - TPipe* pipe; - - GlobalTensor pointFeatsGm, voxelFeatsGm; - GlobalTensor prefixSumGm, argsortCoorGm; - - TBuf pointFeatsBuf, prefixSumBuf, argsortCoorBuf; - - uint32_t totalPointNum, totalVoxelNum; - uint32_t featsDim, pointFeatsNum, alignedNum, featsDimAligned, availablePointNum; - uint32_t usedCoreNum, voxelNumPerCore, voxelNumLastCore, voxelFeatsNumPerCore, voxelFeatsNumLastCore; - uint32_t blockLen, blockLenPad; - uint32_t voxelFeatNum, voxelNum, voxelOffset, voxelfeatsOffset; - uint32_t startPoint, pointNum, pointIdx, alignedPointNum; - bool isFeatsAligned; - - DataCopyParams copyFeatParams, copyprefixSumParams, copyArgsortCoorParams; - DataCopyExtParams copyOutPadParams; - - event_t eventIdSToV, eventIdSToMTE2, eventIdSToMTE3, eventIdVToMTE3, eventIdMTE2ToV, eventIdMTE2ToS, eventIdMTE3ToV; - event_t eventIdMTE3ToS, eventIdMTE2ToMTE3, eventIdMTE3ToMTE2; -}; -} // namespace DynamicScatter +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + */ +#ifndef _DYNAMIC_SCATTER_BASE_H_ +#define _DYNAMIC_SCATTER_BASE_H_ + +#include "kernel_operator.h" +#include "kernel_tiling/kernel_tiling.h" + +namespace DynamicScatter { +using namespace AscendC; + +constexpr int32_t BUFFER_NUM = 1; +constexpr uint32_t RESERVED_NUM = 1000; + +template +class DynamicScatterBase { +public: + __aicore__ inline DynamicScatterBase() {} + __aicore__ inline void BaseInit(GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, + GM_ADDR voxel_feats, DynamicScatterTilingData* tilingData, TPipe* in_pipe) + { + pipe = in_pipe; + + TilingDataInit(tilingData); + MemberDataInit(); + CopyParamasInit(); + GlobalBufInit(point_feats, prefix_sum_point_per_voxel, argsort_coor, voxel_feats); + BufInit(); + + eventIdSToV = static_cast(pipe->AllocEventID()); + eventIdSToMTE2 = static_cast(pipe->AllocEventID()); + eventIdSToMTE3 = static_cast(pipe->AllocEventID()); + eventIdVToMTE3 = static_cast(pipe->AllocEventID()); + eventIdMTE2ToV = static_cast(pipe->AllocEventID()); + eventIdMTE2ToS = static_cast(pipe->AllocEventID()); + eventIdMTE3ToV = static_cast(pipe->AllocEventID()); + eventIdMTE3ToS = static_cast(pipe->AllocEventID()); + eventIdMTE2ToMTE3 = static_cast(pipe->AllocEventID()); + eventIdMTE3ToMTE2 = static_cast(pipe->AllocEventID()); + } + + __aicore__ inline void TilingDataInit(DynamicScatterTilingData* tilingData) + { + usedCoreNum = tilingData->usedCoreNum; + totalPointNum = tilingData->totalPointNum; + totalVoxelNum = tilingData->totalVoxelNum; + featsDim = tilingData->featsDim; + pointFeatsNum = tilingData->pointFeatsNum; + voxelNumPerCore = tilingData->voxelNumPerCore; + voxelNumLastCore = tilingData->voxelNumLastCore; + voxelFeatsNumPerCore = tilingData->voxelFeatsNumPerCore; + voxelFeatsNumLastCore = tilingData->voxelFeatsNumLastCore; + alignedNum = tilingData->alignedNum; + featsDimAligned = tilingData->featsDimAligned; + availablePointNum = tilingData->availablePointNum; + blockLen = tilingData->blockLen; + blockLenPad = tilingData->blockLenPad; + isFeatsAligned = tilingData->isFeatsAligned; + } + + __aicore__ inline void MemberDataInit() + { + if (GetBlockIdx() < usedCoreNum - 1) { + voxelFeatNum = voxelFeatsNumPerCore; + voxelNum = voxelNumPerCore; + voxelOffset = voxelNum * GetBlockIdx(); + } else { + voxelFeatNum = voxelFeatsNumLastCore; + voxelNum = voxelNumLastCore; + voxelOffset = voxelNumPerCore * (usedCoreNum - 1); + } + voxelfeatsOffset = voxelOffset * featsDim; + } + + __aicore__ inline void CopyParamasInit() + { + copyFeatParams.blockCount = 1; + copyFeatParams.blockLen = blockLen; + copyFeatParams.srcStride = 0; + copyFeatParams.dstStride = 0; + if (!isFeatsAligned) { + copyOutPadParams.blockCount = 1; + copyOutPadParams.blockLen = blockLenPad; + copyOutPadParams.srcStride = 0; + copyOutPadParams.dstStride = 0; + copyOutPadParams.rsv = 0; + } + copyprefixSumParams.blockCount = 1; + copyprefixSumParams.blockLen = 1; + copyprefixSumParams.srcStride = 0; + copyprefixSumParams.dstStride = 0; + copyArgsortCoorParams.blockCount = 1; + copyArgsortCoorParams.srcStride = 0; + copyArgsortCoorParams.dstStride = 0; + } + + __aicore__ inline void GlobalBufInit( + GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, GM_ADDR voxel_feats) + { + pointFeatsGm.SetGlobalBuffer((__gm__ T*)point_feats, pointFeatsNum); + prefixSumGm.SetGlobalBuffer((__gm__ int32_t*)prefix_sum_point_per_voxel + voxelOffset, voxelNum); + argsortCoorGm.SetGlobalBuffer((__gm__ int32_t*)argsort_coor, totalPointNum); + voxelFeatsGm.SetGlobalBuffer((__gm__ T*)voxel_feats + voxelfeatsOffset, voxelFeatNum); + } + + __aicore__ inline void BufInit() + { + pipe->InitBuffer(prefixSumBuf, alignedNum * sizeof(int32_t)); + this->pipe->InitBuffer(this->argsortCoorBuf, RESERVED_NUM * sizeof(int32_t)); + this->pipe->InitBuffer(this->pointFeatsBuf, availablePointNum * this->featsDimAligned * sizeof(T)); + } + + __aicore__ inline void GetPointNum(uint32_t voxelIdx, const LocalTensor& prefixSumLocal) + { + if (GetBlockIdx() == usedCoreNum - 1 && voxelIdx == voxelNum - 1) { + DataCopy(prefixSumLocal, prefixSumGm[voxelIdx], copyprefixSumParams); + SetFlag(eventIdMTE2ToS); + WaitFlag(eventIdMTE2ToS); + startPoint = prefixSumLocal.GetValue(0); + pointNum = totalPointNum - startPoint; + } else { + DataCopy(prefixSumLocal, prefixSumGm[voxelIdx], copyprefixSumParams); + SetFlag(eventIdMTE2ToS); + WaitFlag(eventIdMTE2ToS); + startPoint = prefixSumLocal.GetValue(0); + pointNum = prefixSumLocal.GetValue(1) - startPoint; + } + } + + __aicore__ inline void CopyFeatsOut( + uint32_t voxelIdx, const LocalTensor& pointFeatsLocal, bool atomicMax, uint32_t offset = 0) + { + if (atomicMax) { + SetAtomicMax(); + } else { + SetAtomicAdd(); + } + if (isFeatsAligned) { + DataCopy(voxelFeatsGm[voxelIdx * featsDim], pointFeatsLocal[offset], copyFeatParams); + } else { + DataCopyPad(voxelFeatsGm[voxelIdx * featsDim], pointFeatsLocal[offset], copyOutPadParams); + } + SetAtomicNone(); + } + + __aicore__ inline void ReleaseEvent() + { + GetTPipePtr()->ReleaseEventID(eventIdSToV); + GetTPipePtr()->ReleaseEventID(eventIdSToMTE2); + GetTPipePtr()->ReleaseEventID(eventIdSToMTE3); + GetTPipePtr()->ReleaseEventID(eventIdVToMTE3); + GetTPipePtr()->ReleaseEventID(eventIdMTE2ToV); + GetTPipePtr()->ReleaseEventID(eventIdMTE2ToS); + GetTPipePtr()->ReleaseEventID(eventIdMTE3ToV); + GetTPipePtr()->ReleaseEventID(eventIdMTE3ToS); + GetTPipePtr()->ReleaseEventID(eventIdMTE2ToMTE3); + GetTPipePtr()->ReleaseEventID(eventIdMTE3ToMTE2); + } + +protected: + TPipe* pipe; + + GlobalTensor pointFeatsGm, voxelFeatsGm; + GlobalTensor prefixSumGm, argsortCoorGm; + + TBuf pointFeatsBuf, prefixSumBuf, argsortCoorBuf; + + uint32_t totalPointNum, totalVoxelNum; + uint32_t featsDim, pointFeatsNum, alignedNum, featsDimAligned, availablePointNum; + uint32_t usedCoreNum, voxelNumPerCore, voxelNumLastCore, voxelFeatsNumPerCore, voxelFeatsNumLastCore; + uint32_t blockLen, blockLenPad; + uint32_t voxelFeatNum, voxelNum, voxelOffset, voxelfeatsOffset; + uint32_t startPoint, pointNum, pointIdx, alignedPointNum; + bool isFeatsAligned; + + DataCopyParams copyFeatParams, copyprefixSumParams, copyArgsortCoorParams; + DataCopyExtParams copyOutPadParams; + + event_t eventIdSToV, eventIdSToMTE2, eventIdSToMTE3, eventIdVToMTE3, eventIdMTE2ToV, eventIdMTE2ToS, eventIdMTE3ToV; + event_t eventIdMTE3ToS, eventIdMTE2ToMTE3, eventIdMTE3ToMTE2; +}; +} // namespace DynamicScatter #endif // _DYNAMIC_SCATTER_BASE_H_ \ No newline at end of file diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad.cpp b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad.cpp similarity index 97% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_grad.cpp rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad.cpp index 5d854a2..ee576a0 100644 --- a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad.cpp +++ b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad.cpp @@ -1,29 +1,29 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. - */ -#include "dynamic_scatter_grad_max.h" -#include "dynamic_scatter_grad_mean.h" -#include "dynamic_scatter_grad_sum.h" - -using namespace DynamicScatterGrad; - -extern "C" __global__ __aicore__ void dynamic_scatter_grad(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, - GM_ADDR argsort_coor, GM_ADDR compare_mask, GM_ADDR grad_point_feats, GM_ADDR workspace, GM_ADDR tiling) -{ - TPipe pipe; - GET_TILING_DATA(tilingData, tiling); - if (TILING_KEY_IS(102)) { - DynamicScatterGrad::DynamicScatterGradMax op; - op.Init(grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, compare_mask, grad_point_feats, &tilingData, - &pipe); - op.Process(); - } else if (TILING_KEY_IS(101)) { - DynamicScatterGrad::DynamicScatterGradMean op; - op.Init(grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, &tilingData, &pipe); - op.Process(); - } else if (TILING_KEY_IS(100)) { - DynamicScatterGrad::DynamicScatterGradSum op; - op.Init(grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, &tilingData, &pipe); - op.Process(); - } +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + */ +#include "dynamic_scatter_grad_max.h" +#include "dynamic_scatter_grad_mean.h" +#include "dynamic_scatter_grad_sum.h" + +using namespace DynamicScatterGrad; + +extern "C" __global__ __aicore__ void dynamic_scatter_grad(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, + GM_ADDR argsort_coor, GM_ADDR compare_mask, GM_ADDR grad_point_feats, GM_ADDR workspace, GM_ADDR tiling) +{ + TPipe pipe; + GET_TILING_DATA(tilingData, tiling); + if (TILING_KEY_IS(102)) { + DynamicScatterGrad::DynamicScatterGradMax op; + op.Init(grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, compare_mask, grad_point_feats, &tilingData, + &pipe); + op.Process(); + } else if (TILING_KEY_IS(101)) { + DynamicScatterGrad::DynamicScatterGradMean op; + op.Init(grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, &tilingData, &pipe); + op.Process(); + } else if (TILING_KEY_IS(100)) { + DynamicScatterGrad::DynamicScatterGradSum op; + op.Init(grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, &tilingData, &pipe); + op.Process(); + } } \ No newline at end of file diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_base.h b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_base.h similarity index 97% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_base.h rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_base.h index e8cdd4b..8b2a1e0 100644 --- a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_base.h +++ b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_base.h @@ -1,167 +1,167 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. - */ -#ifndef _DYNAMIC_SCATTER_GRAD_BASE_H_ -#define _DYNAMIC_SCATTER_GRAD_BASE_H_ - -#include - -#include "kernel_operator.h" -#include "kernel_tiling/kernel_tiling.h" - -namespace DynamicScatterGrad { -using namespace AscendC; - -constexpr int32_t BUFFER_NUM = 1; -constexpr uint32_t RESERVED_NUM = 1000; - -template -class DynamicScatterGradBase { -public: - __aicore__ inline DynamicScatterGradBase() {} - __aicore__ inline void BaseInit(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, - GM_ADDR grad_point_feats, DynamicScatterGradTilingData* tilingData, TPipe* in_pipe) - { - pipe = in_pipe; - - TilingDataInit(tilingData); - MemberDataInit(); - CopyParamasInit(); - GlobalBufInit(grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats); - BufInit(); - - eventIdSToV = static_cast(pipe->AllocEventID()); - eventIdMte2ToS = static_cast(pipe->AllocEventID()); - eventIdSToMTE2 = static_cast(pipe->AllocEventID()); - eventIdSToMTE3 = static_cast(pipe->AllocEventID()); - eventIdMTE3ToMTE2 = static_cast(pipe->AllocEventID()); - } - - __aicore__ inline void TilingDataInit(DynamicScatterGradTilingData* tilingData) - { - alignedNum = tilingData->alignedNum; - totalPointNum = tilingData->totalPointNum; - totalVoxelNum = tilingData->totalVoxelNum; - pointGradNum = tilingData->pointGradNum; - voxelNumPerCore = tilingData->voxelNumPerCore; - voxelNumLastCore = tilingData->voxelNumLastCore; - eleNumPerCore = tilingData->eleNumPerCore; - eleNumLastCore = tilingData->eleNumLastCore; - featDim = tilingData->featDim; - featDimAligned = tilingData->featDimAligned; - blockLen = tilingData->blockLen; - blockLenPad = tilingData->blockLenPad; - isFeatsAligned = tilingData->isFeatsAligned; - usedCoreNum = tilingData->usedCoreNum; - } - - __aicore__ inline void MemberDataInit() - { - if (GetBlockIdx() < usedCoreNum - 1) { - voxelFeatNum = eleNumPerCore; - voxelNum = voxelNumPerCore; - voxelOffset = voxelNum * GetBlockIdx(); - } else { - voxelFeatNum = eleNumLastCore; - voxelNum = voxelNumLastCore; - voxelOffset = voxelNumPerCore * (usedCoreNum - 1); - } - voxelfeatsOffset = voxelOffset * featDim; - } - - __aicore__ inline void CopyParamasInit() - { - copyFeatParams.blockCount = 1; - copyFeatParams.blockLen = blockLen; - copyFeatParams.srcStride = 0; - copyFeatParams.dstStride = 0; - if (!isFeatsAligned) { - copyOutPadParams.blockCount = 1; - copyOutPadParams.blockLen = blockLenPad; - copyOutPadParams.srcStride = 0; - copyOutPadParams.dstStride = 0; - copyOutPadParams.rsv = 0; - } - copyprefixSumParams.blockCount = 1; - copyprefixSumParams.blockLen = 1; - copyprefixSumParams.srcStride = 0; - copyprefixSumParams.dstStride = 0; - copyArgsortCoorParams.blockCount = 1; - copyArgsortCoorParams.srcStride = 0; - copyArgsortCoorParams.dstStride = 0; - } - - __aicore__ inline void GlobalBufInit( - GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, GM_ADDR grad_point_feats) - { - voxelGradGm.SetGlobalBuffer((__gm__ T*)grad_voxel_feats + voxelfeatsOffset, voxelFeatNum); - prefixSumGm.SetGlobalBuffer((__gm__ int32_t*)prefix_sum_point_per_voxel + voxelOffset, totalVoxelNum); - argsortCoorGm.SetGlobalBuffer((__gm__ int32_t*)argsort_coor, totalVoxelNum - 1); - pointGradGm.SetGlobalBuffer((__gm__ T*)grad_point_feats, pointGradNum); - } - - __aicore__ inline void BufInit() - { - pipe->InitBuffer(voxelGradBuf, featDimAligned * sizeof(T)); - pipe->InitBuffer(prefixSumBuf, alignedNum * sizeof(int32_t)); - pipe->InitBuffer(argsortCoorBuf, RESERVED_NUM * sizeof(int32_t)); - } - - __aicore__ inline void GetPointNum(uint32_t voxel_idx, LocalTensor prefixSumLocal) - { - if (GetBlockIdx() == usedCoreNum - 1 && voxel_idx == voxelNum - 1) { - DataCopy(prefixSumLocal, prefixSumGm[voxel_idx], copyprefixSumParams); - SetFlag(eventIdMte2ToS); - WaitFlag(eventIdMte2ToS); - startPoint = prefixSumLocal.GetValue(0); - pointNum = totalPointNum - startPoint; - } else { - DataCopy(prefixSumLocal, prefixSumGm[voxel_idx], copyprefixSumParams); - SetFlag(eventIdMte2ToS); - WaitFlag(eventIdMte2ToS); - startPoint = prefixSumLocal.GetValue(0); - pointNum = prefixSumLocal.GetValue(1) - startPoint; - } - } - - __aicore__ inline void CopyPointGradOut(LocalTensor voxelGradLocal) - { - SetFlag(eventIdSToMTE3); - WaitFlag(eventIdSToMTE3); - if (isFeatsAligned) { - DataCopy(pointGradGm[point_idx * featDim], voxelGradLocal, copyFeatParams); - } else { - DataCopyPad(pointGradGm[point_idx * featDim], voxelGradLocal, copyOutPadParams); - } - } - - __aicore__ inline void ReleaseEvent() - { - GetTPipePtr()->ReleaseEventID(eventIdMte2ToS); - GetTPipePtr()->ReleaseEventID(eventIdSToMTE2); - GetTPipePtr()->ReleaseEventID(eventIdSToMTE3); - GetTPipePtr()->ReleaseEventID(eventIdSToV); - GetTPipePtr()->ReleaseEventID(eventIdMTE3ToMTE2); - } - -protected: - TPipe* pipe; - - GlobalTensor voxelGradGm, pointGradGm; - GlobalTensor prefixSumGm, argsortCoorGm; - - TBuf voxelGradBuf, prefixSumBuf, argsortCoorBuf; - - uint32_t voxelNumPerCore, voxelNumLastCore, eleNumPerCore, eleNumLastCore, usedCoreNum; - uint32_t totalPointNum, totalVoxelNum, alignedNum, blockLen, blockLenPad; - uint32_t pointGradNum, featDim, featDimAligned, voxelFeatNum, voxelNum, voxelOffset, voxelfeatsOffset; - uint32_t point_idx, pointNum, startPoint; - bool isFeatsAligned; - - DataCopyParams copyFeatParams, copyprefixSumParams, copyArgsortCoorParams; - DataCopyExtParams copyOutPadParams; - - event_t eventIdMte2ToS, eventIdSToMTE2, eventIdSToV, eventIdMTE3ToMTE2, eventIdSToMTE3; -}; -} // namespace DynamicScatterGrad +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + */ +#ifndef _DYNAMIC_SCATTER_GRAD_BASE_H_ +#define _DYNAMIC_SCATTER_GRAD_BASE_H_ + +#include + +#include "kernel_operator.h" +#include "kernel_tiling/kernel_tiling.h" + +namespace DynamicScatterGrad { +using namespace AscendC; + +constexpr int32_t BUFFER_NUM = 1; +constexpr uint32_t RESERVED_NUM = 1000; + +template +class DynamicScatterGradBase { +public: + __aicore__ inline DynamicScatterGradBase() {} + __aicore__ inline void BaseInit(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, + GM_ADDR grad_point_feats, DynamicScatterGradTilingData* tilingData, TPipe* in_pipe) + { + pipe = in_pipe; + + TilingDataInit(tilingData); + MemberDataInit(); + CopyParamasInit(); + GlobalBufInit(grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats); + BufInit(); + + eventIdSToV = static_cast(pipe->AllocEventID()); + eventIdMte2ToS = static_cast(pipe->AllocEventID()); + eventIdSToMTE2 = static_cast(pipe->AllocEventID()); + eventIdSToMTE3 = static_cast(pipe->AllocEventID()); + eventIdMTE3ToMTE2 = static_cast(pipe->AllocEventID()); + } + + __aicore__ inline void TilingDataInit(DynamicScatterGradTilingData* tilingData) + { + alignedNum = tilingData->alignedNum; + totalPointNum = tilingData->totalPointNum; + totalVoxelNum = tilingData->totalVoxelNum; + pointGradNum = tilingData->pointGradNum; + voxelNumPerCore = tilingData->voxelNumPerCore; + voxelNumLastCore = tilingData->voxelNumLastCore; + eleNumPerCore = tilingData->eleNumPerCore; + eleNumLastCore = tilingData->eleNumLastCore; + featDim = tilingData->featDim; + featDimAligned = tilingData->featDimAligned; + blockLen = tilingData->blockLen; + blockLenPad = tilingData->blockLenPad; + isFeatsAligned = tilingData->isFeatsAligned; + usedCoreNum = tilingData->usedCoreNum; + } + + __aicore__ inline void MemberDataInit() + { + if (GetBlockIdx() < usedCoreNum - 1) { + voxelFeatNum = eleNumPerCore; + voxelNum = voxelNumPerCore; + voxelOffset = voxelNum * GetBlockIdx(); + } else { + voxelFeatNum = eleNumLastCore; + voxelNum = voxelNumLastCore; + voxelOffset = voxelNumPerCore * (usedCoreNum - 1); + } + voxelfeatsOffset = voxelOffset * featDim; + } + + __aicore__ inline void CopyParamasInit() + { + copyFeatParams.blockCount = 1; + copyFeatParams.blockLen = blockLen; + copyFeatParams.srcStride = 0; + copyFeatParams.dstStride = 0; + if (!isFeatsAligned) { + copyOutPadParams.blockCount = 1; + copyOutPadParams.blockLen = blockLenPad; + copyOutPadParams.srcStride = 0; + copyOutPadParams.dstStride = 0; + copyOutPadParams.rsv = 0; + } + copyprefixSumParams.blockCount = 1; + copyprefixSumParams.blockLen = 1; + copyprefixSumParams.srcStride = 0; + copyprefixSumParams.dstStride = 0; + copyArgsortCoorParams.blockCount = 1; + copyArgsortCoorParams.srcStride = 0; + copyArgsortCoorParams.dstStride = 0; + } + + __aicore__ inline void GlobalBufInit( + GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, GM_ADDR grad_point_feats) + { + voxelGradGm.SetGlobalBuffer((__gm__ T*)grad_voxel_feats + voxelfeatsOffset, voxelFeatNum); + prefixSumGm.SetGlobalBuffer((__gm__ int32_t*)prefix_sum_point_per_voxel + voxelOffset, totalVoxelNum); + argsortCoorGm.SetGlobalBuffer((__gm__ int32_t*)argsort_coor, totalVoxelNum - 1); + pointGradGm.SetGlobalBuffer((__gm__ T*)grad_point_feats, pointGradNum); + } + + __aicore__ inline void BufInit() + { + pipe->InitBuffer(voxelGradBuf, featDimAligned * sizeof(T)); + pipe->InitBuffer(prefixSumBuf, alignedNum * sizeof(int32_t)); + pipe->InitBuffer(argsortCoorBuf, RESERVED_NUM * sizeof(int32_t)); + } + + __aicore__ inline void GetPointNum(uint32_t voxel_idx, LocalTensor prefixSumLocal) + { + if (GetBlockIdx() == usedCoreNum - 1 && voxel_idx == voxelNum - 1) { + DataCopy(prefixSumLocal, prefixSumGm[voxel_idx], copyprefixSumParams); + SetFlag(eventIdMte2ToS); + WaitFlag(eventIdMte2ToS); + startPoint = prefixSumLocal.GetValue(0); + pointNum = totalPointNum - startPoint; + } else { + DataCopy(prefixSumLocal, prefixSumGm[voxel_idx], copyprefixSumParams); + SetFlag(eventIdMte2ToS); + WaitFlag(eventIdMte2ToS); + startPoint = prefixSumLocal.GetValue(0); + pointNum = prefixSumLocal.GetValue(1) - startPoint; + } + } + + __aicore__ inline void CopyPointGradOut(LocalTensor voxelGradLocal) + { + SetFlag(eventIdSToMTE3); + WaitFlag(eventIdSToMTE3); + if (isFeatsAligned) { + DataCopy(pointGradGm[point_idx * featDim], voxelGradLocal, copyFeatParams); + } else { + DataCopyPad(pointGradGm[point_idx * featDim], voxelGradLocal, copyOutPadParams); + } + } + + __aicore__ inline void ReleaseEvent() + { + GetTPipePtr()->ReleaseEventID(eventIdMte2ToS); + GetTPipePtr()->ReleaseEventID(eventIdSToMTE2); + GetTPipePtr()->ReleaseEventID(eventIdSToMTE3); + GetTPipePtr()->ReleaseEventID(eventIdSToV); + GetTPipePtr()->ReleaseEventID(eventIdMTE3ToMTE2); + } + +protected: + TPipe* pipe; + + GlobalTensor voxelGradGm, pointGradGm; + GlobalTensor prefixSumGm, argsortCoorGm; + + TBuf voxelGradBuf, prefixSumBuf, argsortCoorBuf; + + uint32_t voxelNumPerCore, voxelNumLastCore, eleNumPerCore, eleNumLastCore, usedCoreNum; + uint32_t totalPointNum, totalVoxelNum, alignedNum, blockLen, blockLenPad; + uint32_t pointGradNum, featDim, featDimAligned, voxelFeatNum, voxelNum, voxelOffset, voxelfeatsOffset; + uint32_t point_idx, pointNum, startPoint; + bool isFeatsAligned; + + DataCopyParams copyFeatParams, copyprefixSumParams, copyArgsortCoorParams; + DataCopyExtParams copyOutPadParams; + + event_t eventIdMte2ToS, eventIdSToMTE2, eventIdSToV, eventIdMTE3ToMTE2, eventIdSToMTE3; +}; +} // namespace DynamicScatterGrad #endif // _DYNAMIC_SCATTER_GRAD_BASE_H_ \ No newline at end of file diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_max.h b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_max.h similarity index 96% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_max.h rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_max.h index 615ccd5..b6f212e 100644 --- a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_max.h +++ b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_max.h @@ -1,91 +1,91 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. - */ -#ifndef _DYNAMIC_SCATTER_GRAD_MAX_H_ -#define _DYNAMIC_SCATTER_GRAD_MAX_H_ - -#include "dynamic_scatter_grad_base.h" - -namespace DynamicScatterGrad { -using namespace AscendC; - -template -class DynamicScatterGradMax : public DynamicScatterGradBase { -public: - __aicore__ inline DynamicScatterGradMax() {} - __aicore__ inline void Init(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, - GM_ADDR compare_mask, GM_ADDR grad_point_feats, DynamicScatterGradTilingData* tilingData, TPipe* in_pipe) - { - this->BaseInit( - grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, tilingData, in_pipe); - - maskNum = tilingData->maskNum; - maskDim = tilingData->maskDim; - maskDimAligned = tilingData->maskDimAligned; - blockLenMask = tilingData->blockLenMask; - - copyMaskParams.blockCount = 1; - copyMaskParams.blockLen = blockLenMask; - copyMaskParams.srcStride = 0; - copyMaskParams.dstStride = 0; - - compareMaskGm.SetGlobalBuffer((__gm__ uint8_t*)compare_mask, maskNum); - - this->pipe->InitBuffer(compareMaskBuf, maskDimAligned * sizeof(uint8_t)); - this->pipe->InitBuffer(zeroBuf, this->featDimAligned * sizeof(T)); - this->pipe->InitBuffer(pointGradBuf, this->featDimAligned * sizeof(T)); - } - - __aicore__ inline void Process() - { - Compute(); - this->ReleaseEvent(); - } - -private: - __aicore__ inline void Compute() - { - LocalTensor zeroLocal = zeroBuf.template Get(); - LocalTensor pointGradLocal = pointGradBuf.template Get(); - LocalTensor voxelGradLocal = this->voxelGradBuf.template Get(); - LocalTensor compareMaskLocal = compareMaskBuf.template Get(); - LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); - LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); - Duplicate(zeroLocal, static_cast(0), this->featDimAligned); - - for (uint32_t voxel_idx = 0; voxel_idx < this->voxelNum; voxel_idx++) { - DataCopy(voxelGradLocal, this->voxelGradGm[voxel_idx * this->featDim], this->copyFeatParams); - this->GetPointNum(voxel_idx, prefixSumLocal); - uint32_t aligned_point_num = AlignUp(this->pointNum, this->alignedNum); - this->copyArgsortCoorParams.blockLen = aligned_point_num / this->alignedNum; - - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); - - SetFlag(this->eventIdMte2ToS); - WaitFlag(this->eventIdMte2ToS); - for (uint32_t idx = 0; idx < this->pointNum; idx++) { - this->point_idx = argsortCoorLocal.GetValue(idx); - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(compareMaskLocal, compareMaskGm[this->point_idx * maskDim], copyMaskParams); - PipeBarrier(); - Select(pointGradLocal, compareMaskLocal, voxelGradLocal, zeroLocal, - SELMODE::VSEL_TENSOR_TENSOR_MODE, this->featDim); - PipeBarrier(); - this->CopyPointGradOut(pointGradLocal); - } - SetFlag(this->eventIdMTE3ToMTE2); - WaitFlag(this->eventIdMTE3ToMTE2); - } - } - -private: - DataCopyParams copyMaskParams; - GlobalTensor compareMaskGm; - uint32_t maskNum, maskDim, maskDimAligned, blockLenMask; - TBuf compareMaskBuf, zeroBuf, pointGradBuf; -}; -} // namespace DynamicScatterGrad +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + */ +#ifndef _DYNAMIC_SCATTER_GRAD_MAX_H_ +#define _DYNAMIC_SCATTER_GRAD_MAX_H_ + +#include "dynamic_scatter_grad_base.h" + +namespace DynamicScatterGrad { +using namespace AscendC; + +template +class DynamicScatterGradMax : public DynamicScatterGradBase { +public: + __aicore__ inline DynamicScatterGradMax() {} + __aicore__ inline void Init(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, + GM_ADDR compare_mask, GM_ADDR grad_point_feats, DynamicScatterGradTilingData* tilingData, TPipe* in_pipe) + { + this->BaseInit( + grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, tilingData, in_pipe); + + maskNum = tilingData->maskNum; + maskDim = tilingData->maskDim; + maskDimAligned = tilingData->maskDimAligned; + blockLenMask = tilingData->blockLenMask; + + copyMaskParams.blockCount = 1; + copyMaskParams.blockLen = blockLenMask; + copyMaskParams.srcStride = 0; + copyMaskParams.dstStride = 0; + + compareMaskGm.SetGlobalBuffer((__gm__ uint8_t*)compare_mask, maskNum); + + this->pipe->InitBuffer(compareMaskBuf, maskDimAligned * sizeof(uint8_t)); + this->pipe->InitBuffer(zeroBuf, this->featDimAligned * sizeof(T)); + this->pipe->InitBuffer(pointGradBuf, this->featDimAligned * sizeof(T)); + } + + __aicore__ inline void Process() + { + Compute(); + this->ReleaseEvent(); + } + +private: + __aicore__ inline void Compute() + { + LocalTensor zeroLocal = zeroBuf.template Get(); + LocalTensor pointGradLocal = pointGradBuf.template Get(); + LocalTensor voxelGradLocal = this->voxelGradBuf.template Get(); + LocalTensor compareMaskLocal = compareMaskBuf.template Get(); + LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); + LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); + Duplicate(zeroLocal, static_cast(0), this->featDimAligned); + + for (uint32_t voxel_idx = 0; voxel_idx < this->voxelNum; voxel_idx++) { + DataCopy(voxelGradLocal, this->voxelGradGm[voxel_idx * this->featDim], this->copyFeatParams); + this->GetPointNum(voxel_idx, prefixSumLocal); + uint32_t aligned_point_num = AlignUp(this->pointNum, this->alignedNum); + this->copyArgsortCoorParams.blockLen = aligned_point_num / this->alignedNum; + + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); + + SetFlag(this->eventIdMte2ToS); + WaitFlag(this->eventIdMte2ToS); + for (uint32_t idx = 0; idx < this->pointNum; idx++) { + this->point_idx = argsortCoorLocal.GetValue(idx); + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(compareMaskLocal, compareMaskGm[this->point_idx * maskDim], copyMaskParams); + PipeBarrier(); + Select(pointGradLocal, compareMaskLocal, voxelGradLocal, zeroLocal, + SELMODE::VSEL_TENSOR_TENSOR_MODE, this->featDim); + PipeBarrier(); + this->CopyPointGradOut(pointGradLocal); + } + SetFlag(this->eventIdMTE3ToMTE2); + WaitFlag(this->eventIdMTE3ToMTE2); + } + } + +private: + DataCopyParams copyMaskParams; + GlobalTensor compareMaskGm; + uint32_t maskNum, maskDim, maskDimAligned, blockLenMask; + TBuf compareMaskBuf, zeroBuf, pointGradBuf; +}; +} // namespace DynamicScatterGrad #endif // _DYNAMIC_SCATTER_GRAD_MAX_H_ \ No newline at end of file diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h similarity index 96% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h index 7763dd2..4a5cc74 100644 --- a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h +++ b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_mean.h @@ -1,70 +1,70 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. - */ -#ifndef _DYNAMIC_SCATTER_GRAD_MEAN_H_ -#define _DYNAMIC_SCATTER_GRAD_MEAN_H_ - -#include "dynamic_scatter_grad_base.h" - -namespace DynamicScatterGrad { -using namespace AscendC; - -template -class DynamicScatterGradMean : public DynamicScatterGradBase { -public: - __aicore__ inline DynamicScatterGradMean() {} - __aicore__ inline void Init(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, - GM_ADDR grad_point_feats, DynamicScatterGradTilingData* tilingData, TPipe* in_pipe) - { - this->BaseInit( - grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, tilingData, in_pipe); - this->pipe->InitBuffer(pointNumDupBuf, this->featDimAligned * sizeof(T)); - this->pipe->InitBuffer(pointGradBuf, this->featDimAligned * sizeof(T)); - } - - __aicore__ inline void Process() - { - Compute(); - this->ReleaseEvent(); - } - -private: - __aicore__ inline void Compute() - { - LocalTensor pointGradLocal = pointGradBuf.template Get(); - LocalTensor pointNumDupLocal = pointNumDupBuf.template Get(); - LocalTensor voxelGradLocal = this->voxelGradBuf.template Get(); - LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); - LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); - - for (uint32_t voxel_idx = 0; voxel_idx < this->voxelNum; voxel_idx++) { - DataCopy(voxelGradLocal, this->voxelGradGm[voxel_idx * this->featDim], this->copyFeatParams); - this->GetPointNum(voxel_idx, prefixSumLocal); - uint32_t aligned_point_num = AlignUp(this->pointNum, this->alignedNum); - this->copyArgsortCoorParams.blockLen = aligned_point_num / this->alignedNum; - - SetFlag(this->eventIdSToV); - WaitFlag(this->eventIdSToV); - Duplicate(pointNumDupLocal, static_cast(static_cast(this->pointNum)), this->featDimAligned); - Div(pointGradLocal, voxelGradLocal, pointNumDupLocal, this->featDimAligned); - - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); - - SetFlag(this->eventIdMte2ToS); - WaitFlag(this->eventIdMte2ToS); - for (uint32_t idx = 0; idx < this->pointNum; idx++) { - this->point_idx = argsortCoorLocal.GetValue(idx); - this->CopyPointGradOut(pointGradLocal); - } - SetFlag(this->eventIdMTE3ToMTE2); - WaitFlag(this->eventIdMTE3ToMTE2); - } - } - -private: - TBuf pointNumDupBuf, pointGradBuf; -}; -} // namespace DynamicScatterGrad +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + */ +#ifndef _DYNAMIC_SCATTER_GRAD_MEAN_H_ +#define _DYNAMIC_SCATTER_GRAD_MEAN_H_ + +#include "dynamic_scatter_grad_base.h" + +namespace DynamicScatterGrad { +using namespace AscendC; + +template +class DynamicScatterGradMean : public DynamicScatterGradBase { +public: + __aicore__ inline DynamicScatterGradMean() {} + __aicore__ inline void Init(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, + GM_ADDR grad_point_feats, DynamicScatterGradTilingData* tilingData, TPipe* in_pipe) + { + this->BaseInit( + grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, tilingData, in_pipe); + this->pipe->InitBuffer(pointNumDupBuf, this->featDimAligned * sizeof(T)); + this->pipe->InitBuffer(pointGradBuf, this->featDimAligned * sizeof(T)); + } + + __aicore__ inline void Process() + { + Compute(); + this->ReleaseEvent(); + } + +private: + __aicore__ inline void Compute() + { + LocalTensor pointGradLocal = pointGradBuf.template Get(); + LocalTensor pointNumDupLocal = pointNumDupBuf.template Get(); + LocalTensor voxelGradLocal = this->voxelGradBuf.template Get(); + LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); + LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); + + for (uint32_t voxel_idx = 0; voxel_idx < this->voxelNum; voxel_idx++) { + DataCopy(voxelGradLocal, this->voxelGradGm[voxel_idx * this->featDim], this->copyFeatParams); + this->GetPointNum(voxel_idx, prefixSumLocal); + uint32_t aligned_point_num = AlignUp(this->pointNum, this->alignedNum); + this->copyArgsortCoorParams.blockLen = aligned_point_num / this->alignedNum; + + SetFlag(this->eventIdSToV); + WaitFlag(this->eventIdSToV); + Duplicate(pointNumDupLocal, static_cast(static_cast(this->pointNum)), this->featDimAligned); + Div(pointGradLocal, voxelGradLocal, pointNumDupLocal, this->featDimAligned); + + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); + + SetFlag(this->eventIdMte2ToS); + WaitFlag(this->eventIdMte2ToS); + for (uint32_t idx = 0; idx < this->pointNum; idx++) { + this->point_idx = argsortCoorLocal.GetValue(idx); + this->CopyPointGradOut(pointGradLocal); + } + SetFlag(this->eventIdMTE3ToMTE2); + WaitFlag(this->eventIdMTE3ToMTE2); + } + } + +private: + TBuf pointNumDupBuf, pointGradBuf; +}; +} // namespace DynamicScatterGrad #endif // _DYNAMIC_SCATTER_GRAD_MEAN_H_ \ No newline at end of file diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h similarity index 96% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h index 8b9ac06..414d909 100644 --- a/ads/common/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h +++ b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_grad_sum.h @@ -1,58 +1,58 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. - */ -#ifndef _DYNAMIC_SCATTER_GRAD_SUM_H_ -#define _DYNAMIC_SCATTER_GRAD_SUM_H_ - -#include "dynamic_scatter_grad_base.h" - -namespace DynamicScatterGrad { -using namespace AscendC; - -template -class DynamicScatterGradSum : public DynamicScatterGradBase { -public: - __aicore__ inline DynamicScatterGradSum() {} - __aicore__ inline void Init(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, - GM_ADDR grad_point_feats, DynamicScatterGradTilingData* tilingData, TPipe* in_pipe) - { - this->BaseInit( - grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, tilingData, in_pipe); - } - - __aicore__ inline void Process() - { - Compute(); - this->ReleaseEvent(); - } - -private: - __aicore__ inline void Compute() - { - LocalTensor voxelGradLocal = this->voxelGradBuf.template Get(); - LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); - LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); - - for (uint32_t voxel_idx = 0; voxel_idx < this->voxelNum; voxel_idx++) { - DataCopy(voxelGradLocal, this->voxelGradGm[voxel_idx * this->featDim], this->copyFeatParams); - this->GetPointNum(voxel_idx, prefixSumLocal); - uint32_t aligned_point_num = AlignUp(this->pointNum, this->alignedNum); - this->copyArgsortCoorParams.blockLen = aligned_point_num / this->alignedNum; - - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); - - SetFlag(this->eventIdMte2ToS); - WaitFlag(this->eventIdMte2ToS); - for (uint32_t idx = 0; idx < this->pointNum; idx++) { - this->point_idx = argsortCoorLocal.GetValue(idx); - this->CopyPointGradOut(voxelGradLocal); - } - SetFlag(this->eventIdMTE3ToMTE2); - WaitFlag(this->eventIdMTE3ToMTE2); - } - } -}; -} // namespace DynamicScatterGrad +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + */ +#ifndef _DYNAMIC_SCATTER_GRAD_SUM_H_ +#define _DYNAMIC_SCATTER_GRAD_SUM_H_ + +#include "dynamic_scatter_grad_base.h" + +namespace DynamicScatterGrad { +using namespace AscendC; + +template +class DynamicScatterGradSum : public DynamicScatterGradBase { +public: + __aicore__ inline DynamicScatterGradSum() {} + __aicore__ inline void Init(GM_ADDR grad_voxel_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, + GM_ADDR grad_point_feats, DynamicScatterGradTilingData* tilingData, TPipe* in_pipe) + { + this->BaseInit( + grad_voxel_feats, prefix_sum_point_per_voxel, argsort_coor, grad_point_feats, tilingData, in_pipe); + } + + __aicore__ inline void Process() + { + Compute(); + this->ReleaseEvent(); + } + +private: + __aicore__ inline void Compute() + { + LocalTensor voxelGradLocal = this->voxelGradBuf.template Get(); + LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); + LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); + + for (uint32_t voxel_idx = 0; voxel_idx < this->voxelNum; voxel_idx++) { + DataCopy(voxelGradLocal, this->voxelGradGm[voxel_idx * this->featDim], this->copyFeatParams); + this->GetPointNum(voxel_idx, prefixSumLocal); + uint32_t aligned_point_num = AlignUp(this->pointNum, this->alignedNum); + this->copyArgsortCoorParams.blockLen = aligned_point_num / this->alignedNum; + + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); + + SetFlag(this->eventIdMte2ToS); + WaitFlag(this->eventIdMte2ToS); + for (uint32_t idx = 0; idx < this->pointNum; idx++) { + this->point_idx = argsortCoorLocal.GetValue(idx); + this->CopyPointGradOut(voxelGradLocal); + } + SetFlag(this->eventIdMTE3ToMTE2); + WaitFlag(this->eventIdMTE3ToMTE2); + } + } +}; +} // namespace DynamicScatterGrad #endif // _DYNAMIC_SCATTER_GRAD_SUM_H_ \ No newline at end of file diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_max.h b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_max.h similarity index 97% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_max.h rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_max.h index 52449de..b2b7585 100644 --- a/ads/common/ops/kernels/op_kernel/dynamic_scatter_max.h +++ b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_max.h @@ -1,134 +1,134 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. - */ -#ifndef _DYNAMIC_SCATTER_MAX_H_ -#define _DYNAMIC_SCATTER_MAX_H_ - -#include - -#include "dynamic_scatter_base.h" - -namespace DynamicScatter { -using namespace AscendC; - -template -class DynamicScatterMax : public DynamicScatterBase { -public: - __aicore__ inline DynamicScatterMax() {} - __aicore__ inline void Init(GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, - GM_ADDR voxel_feats, GM_ADDR compare_mask, DynamicScatterTilingData* tilingData, TPipe* in_pipe) - { - this->BaseInit(point_feats, prefix_sum_point_per_voxel, argsort_coor, voxel_feats, tilingData, in_pipe); - - maskNum = tilingData->maskNum; - maskDim = tilingData->maskDim; - maskDimAligned = tilingData->maskDimAligned; - maskDimAlignedB16 = tilingData->maskDimAlignedB16; - blockLenMask = tilingData->blockLenMask; - repeatTimes = tilingData->repeatTimes; - curBlockIdx = GetBlockIdx(); - - compareMaskGm.SetGlobalBuffer((__gm__ uint8_t*)compare_mask, maskNum); - - this->pipe->InitBuffer(voxelFeatsBuf, this->featsDimAligned * sizeof(T)); - this->pipe->InitBuffer(recordMaskBuf, maskDimAlignedB16 * sizeof(uint16_t)); - this->pipe->InitBuffer(bitMaskBuf, maskDimAligned * sizeof(uint8_t)); - this->pipe->InitBuffer(bitMaskTmpBuf, maskDimAlignedB16 * sizeof(uint16_t)); - - compareParams.dstBlkStride = 1; - compareParams.src0BlkStride = 1; - compareParams.src1BlkStride = 1; - compareParams.dstRepStride = 8; - compareParams.src0RepStride = 8; - compareParams.src1RepStride = 8; - - copyMaskOutParams.blockCount = 1; - copyMaskOutParams.blockLen = blockLenMask; - copyMaskOutParams.srcStride = 1; - copyMaskOutParams.dstStride = 1; - - if (curBlockIdx == 0) { - InitOutput(this->voxelFeatsGm, this->totalVoxelNum * this->featsDim, static_cast(-INFINITY)); - } - SyncAll(); - } - - __aicore__ inline void Process() - { - Compute(); - this->ReleaseEvent(); - } - -private: - __aicore__ inline void Compute() - { - LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); - LocalTensor voxelFeatsLocal = this->voxelFeatsBuf.template Get(); - LocalTensor recordMaskLocal = recordMaskBuf.template Get(); - LocalTensor bitMaskLocal = bitMaskBuf.template Get(); - LocalTensor bitMaskLocalB16 = bitMaskLocal.ReinterpretCast(); - LocalTensor bitMaskTmpLocal = bitMaskTmpBuf.template Get(); - LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); - LocalTensor pointFeatsLocal = this->pointFeatsBuf.template Get(); - - for (uint32_t voxelIdx = 0; voxelIdx < this->voxelNum; voxelIdx++) { - this->GetPointNum(voxelIdx, prefixSumLocal); - this->alignedPointNum = AlignUp(this->pointNum, this->alignedNum); - this->copyArgsortCoorParams.blockLen = this->alignedPointNum / this->alignedNum; - - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); - - SetFlag(this->eventIdMTE2ToS); - WaitFlag(this->eventIdMTE2ToS); - for (uint32_t idx = 0; idx < this->pointNum; idx++) { - this->pointIdx = argsortCoorLocal.GetValue(idx); - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(pointFeatsLocal[idx * this->featsDimAligned], - this->pointFeatsGm[this->pointIdx * this->featsDim], this->copyFeatParams); - SetFlag(this->eventIdMTE2ToMTE3); - WaitFlag(this->eventIdMTE2ToMTE3); - this->CopyFeatsOut(voxelIdx, pointFeatsLocal, true, idx * this->featsDimAligned); - SetFlag(this->eventIdMTE3ToMTE2); - WaitFlag(this->eventIdMTE3ToMTE2); - } - Duplicate(recordMaskLocal, static_cast(0), this->maskDimAlignedB16); - DataCopy(voxelFeatsLocal, this->voxelFeatsGm[voxelIdx * this->featsDim], this->copyFeatParams); - - SetFlag(this->eventIdMTE2ToV); - WaitFlag(this->eventIdMTE2ToV); - for (uint32_t idx = 0; idx < this->pointNum; idx++) { - this->pointIdx = argsortCoorLocal.GetValue(idx); - Compare(bitMaskLocal, voxelFeatsLocal, pointFeatsLocal[idx * this->featsDimAligned], CMPMODE::EQ, mask, - repeatTimes, compareParams); - pipe_barrier(PIPE_ALL); - - Not(bitMaskTmpLocal, recordMaskLocal, maskDimAlignedB16); - And(bitMaskLocalB16, bitMaskLocalB16, bitMaskTmpLocal, maskDimAlignedB16); - Or(recordMaskLocal, bitMaskLocalB16, recordMaskLocal, maskDimAlignedB16); - SetFlag(this->eventIdVToMTE3); - WaitFlag(this->eventIdVToMTE3); - - SetFlag(this->eventIdSToMTE3); - WaitFlag(this->eventIdSToMTE3); - DataCopyPad(compareMaskGm[this->pointIdx * maskDim], bitMaskLocal, copyMaskOutParams); - SetFlag(this->eventIdMTE3ToS); - WaitFlag(this->eventIdMTE3ToS); - } - SetFlag(this->eventIdMTE3ToMTE2); - WaitFlag(this->eventIdMTE3ToMTE2); - } - } - -private: - uint32_t maskNum, maskDim, maskDimAligned, maskDimAlignedB16, blockLenMask, repeatTimes, curBlockIdx, maskOffset; - uint64_t mask = 64; - GlobalTensor compareMaskGm; - TBuf voxelFeatsBuf, bitMaskBuf, bitMaskTmpBuf, recordMaskBuf; - BinaryRepeatParams compareParams; - DataCopyExtParams copyMaskOutParams; -}; -} // namespace DynamicScatter -#endif // _DYNAMIC_SCATTER_MAX_H_ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + */ +#ifndef _DYNAMIC_SCATTER_MAX_H_ +#define _DYNAMIC_SCATTER_MAX_H_ + +#include + +#include "dynamic_scatter_base.h" + +namespace DynamicScatter { +using namespace AscendC; + +template +class DynamicScatterMax : public DynamicScatterBase { +public: + __aicore__ inline DynamicScatterMax() {} + __aicore__ inline void Init(GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, + GM_ADDR voxel_feats, GM_ADDR compare_mask, DynamicScatterTilingData* tilingData, TPipe* in_pipe) + { + this->BaseInit(point_feats, prefix_sum_point_per_voxel, argsort_coor, voxel_feats, tilingData, in_pipe); + + maskNum = tilingData->maskNum; + maskDim = tilingData->maskDim; + maskDimAligned = tilingData->maskDimAligned; + maskDimAlignedB16 = tilingData->maskDimAlignedB16; + blockLenMask = tilingData->blockLenMask; + repeatTimes = tilingData->repeatTimes; + curBlockIdx = GetBlockIdx(); + + compareMaskGm.SetGlobalBuffer((__gm__ uint8_t*)compare_mask, maskNum); + + this->pipe->InitBuffer(voxelFeatsBuf, this->featsDimAligned * sizeof(T)); + this->pipe->InitBuffer(recordMaskBuf, maskDimAlignedB16 * sizeof(uint16_t)); + this->pipe->InitBuffer(bitMaskBuf, maskDimAligned * sizeof(uint8_t)); + this->pipe->InitBuffer(bitMaskTmpBuf, maskDimAlignedB16 * sizeof(uint16_t)); + + compareParams.dstBlkStride = 1; + compareParams.src0BlkStride = 1; + compareParams.src1BlkStride = 1; + compareParams.dstRepStride = 8; + compareParams.src0RepStride = 8; + compareParams.src1RepStride = 8; + + copyMaskOutParams.blockCount = 1; + copyMaskOutParams.blockLen = blockLenMask; + copyMaskOutParams.srcStride = 1; + copyMaskOutParams.dstStride = 1; + + if (curBlockIdx == 0) { + InitOutput(this->voxelFeatsGm, this->totalVoxelNum * this->featsDim, static_cast(-INFINITY)); + } + SyncAll(); + } + + __aicore__ inline void Process() + { + Compute(); + this->ReleaseEvent(); + } + +private: + __aicore__ inline void Compute() + { + LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); + LocalTensor voxelFeatsLocal = this->voxelFeatsBuf.template Get(); + LocalTensor recordMaskLocal = recordMaskBuf.template Get(); + LocalTensor bitMaskLocal = bitMaskBuf.template Get(); + LocalTensor bitMaskLocalB16 = bitMaskLocal.ReinterpretCast(); + LocalTensor bitMaskTmpLocal = bitMaskTmpBuf.template Get(); + LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); + LocalTensor pointFeatsLocal = this->pointFeatsBuf.template Get(); + + for (uint32_t voxelIdx = 0; voxelIdx < this->voxelNum; voxelIdx++) { + this->GetPointNum(voxelIdx, prefixSumLocal); + this->alignedPointNum = AlignUp(this->pointNum, this->alignedNum); + this->copyArgsortCoorParams.blockLen = this->alignedPointNum / this->alignedNum; + + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); + + SetFlag(this->eventIdMTE2ToS); + WaitFlag(this->eventIdMTE2ToS); + for (uint32_t idx = 0; idx < this->pointNum; idx++) { + this->pointIdx = argsortCoorLocal.GetValue(idx); + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(pointFeatsLocal[idx * this->featsDimAligned], + this->pointFeatsGm[this->pointIdx * this->featsDim], this->copyFeatParams); + SetFlag(this->eventIdMTE2ToMTE3); + WaitFlag(this->eventIdMTE2ToMTE3); + this->CopyFeatsOut(voxelIdx, pointFeatsLocal, true, idx * this->featsDimAligned); + SetFlag(this->eventIdMTE3ToMTE2); + WaitFlag(this->eventIdMTE3ToMTE2); + } + Duplicate(recordMaskLocal, static_cast(0), this->maskDimAlignedB16); + DataCopy(voxelFeatsLocal, this->voxelFeatsGm[voxelIdx * this->featsDim], this->copyFeatParams); + + SetFlag(this->eventIdMTE2ToV); + WaitFlag(this->eventIdMTE2ToV); + for (uint32_t idx = 0; idx < this->pointNum; idx++) { + this->pointIdx = argsortCoorLocal.GetValue(idx); + Compare(bitMaskLocal, voxelFeatsLocal, pointFeatsLocal[idx * this->featsDimAligned], CMPMODE::EQ, mask, + repeatTimes, compareParams); + pipe_barrier(PIPE_ALL); + + Not(bitMaskTmpLocal, recordMaskLocal, maskDimAlignedB16); + And(bitMaskLocalB16, bitMaskLocalB16, bitMaskTmpLocal, maskDimAlignedB16); + Or(recordMaskLocal, bitMaskLocalB16, recordMaskLocal, maskDimAlignedB16); + SetFlag(this->eventIdVToMTE3); + WaitFlag(this->eventIdVToMTE3); + + SetFlag(this->eventIdSToMTE3); + WaitFlag(this->eventIdSToMTE3); + DataCopyPad(compareMaskGm[this->pointIdx * maskDim], bitMaskLocal, copyMaskOutParams); + SetFlag(this->eventIdMTE3ToS); + WaitFlag(this->eventIdMTE3ToS); + } + SetFlag(this->eventIdMTE3ToMTE2); + WaitFlag(this->eventIdMTE3ToMTE2); + } + } + +private: + uint32_t maskNum, maskDim, maskDimAligned, maskDimAlignedB16, blockLenMask, repeatTimes, curBlockIdx, maskOffset; + uint64_t mask = 64; + GlobalTensor compareMaskGm; + TBuf voxelFeatsBuf, bitMaskBuf, bitMaskTmpBuf, recordMaskBuf; + BinaryRepeatParams compareParams; + DataCopyExtParams copyMaskOutParams; +}; +} // namespace DynamicScatter +#endif // _DYNAMIC_SCATTER_MAX_H_ diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_mean.h b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_mean.h similarity index 96% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_mean.h rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_mean.h index c0a4539..d43ee62 100644 --- a/ads/common/ops/kernels/op_kernel/dynamic_scatter_mean.h +++ b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_mean.h @@ -1,73 +1,73 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. - */ -#ifndef _DYNAMIC_SCATTER_MEAN_H_ -#define _DYNAMIC_SCATTER_MEAN_H_ - -#include "dynamic_scatter_base.h" - -namespace DynamicScatter { -using namespace AscendC; - -template -class DynamicScatterMean : public DynamicScatterBase { -public: - __aicore__ inline DynamicScatterMean() {} - __aicore__ inline void Init(GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, - GM_ADDR voxel_feats, DynamicScatterTilingData* tilingData, TPipe* in_pipe) - { - this->BaseInit(point_feats, prefix_sum_point_per_voxel, argsort_coor, voxel_feats, tilingData, in_pipe); - this->pipe->InitBuffer(pointNumDupBuf, this->featsDimAligned * sizeof(T)); - } - - __aicore__ inline void Process() - { - Compute(); - this->ReleaseEvent(); - } - -private: - __aicore__ inline void Compute() - { - LocalTensor pointFeatsLocal = this->pointFeatsBuf.template Get(); - LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); - LocalTensor pointNumDupLocal = pointNumDupBuf.template Get(); - LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); - - for (uint32_t voxelIdx = 0; voxelIdx < this->voxelNum; voxelIdx++) { - this->GetPointNum(voxelIdx, prefixSumLocal); - this->alignedPointNum = AlignUp(this->pointNum, this->alignedNum); - this->copyArgsortCoorParams.blockLen = this->alignedPointNum / this->alignedNum; - - SetFlag(this->eventIdSToV); - WaitFlag(this->eventIdSToV); - Duplicate(pointNumDupLocal, static_cast(static_cast(this->pointNum)), this->featsDimAligned); - - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); - - SetFlag(this->eventIdMTE2ToS); - WaitFlag(this->eventIdMTE2ToS); - for (uint32_t idx = 0; idx < this->pointNum; idx++) { - this->pointIdx = argsortCoorLocal.GetValue(idx); - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(pointFeatsLocal, this->pointFeatsGm[this->pointIdx * this->featsDim], this->copyFeatParams); - SetFlag(this->eventIdMTE2ToV); - WaitFlag(this->eventIdMTE2ToV); - Div(pointFeatsLocal, pointFeatsLocal, pointNumDupLocal, this->featsDimAligned); - SetFlag(this->eventIdVToMTE3); - WaitFlag(this->eventIdVToMTE3); - this->CopyFeatsOut(voxelIdx, pointFeatsLocal, false); - SetFlag(this->eventIdMTE3ToMTE2); - WaitFlag(this->eventIdMTE3ToMTE2); - } - } - } - -private: - TBuf pointNumDupBuf; -}; -} // namespace DynamicScatter +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + */ +#ifndef _DYNAMIC_SCATTER_MEAN_H_ +#define _DYNAMIC_SCATTER_MEAN_H_ + +#include "dynamic_scatter_base.h" + +namespace DynamicScatter { +using namespace AscendC; + +template +class DynamicScatterMean : public DynamicScatterBase { +public: + __aicore__ inline DynamicScatterMean() {} + __aicore__ inline void Init(GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, + GM_ADDR voxel_feats, DynamicScatterTilingData* tilingData, TPipe* in_pipe) + { + this->BaseInit(point_feats, prefix_sum_point_per_voxel, argsort_coor, voxel_feats, tilingData, in_pipe); + this->pipe->InitBuffer(pointNumDupBuf, this->featsDimAligned * sizeof(T)); + } + + __aicore__ inline void Process() + { + Compute(); + this->ReleaseEvent(); + } + +private: + __aicore__ inline void Compute() + { + LocalTensor pointFeatsLocal = this->pointFeatsBuf.template Get(); + LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); + LocalTensor pointNumDupLocal = pointNumDupBuf.template Get(); + LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); + + for (uint32_t voxelIdx = 0; voxelIdx < this->voxelNum; voxelIdx++) { + this->GetPointNum(voxelIdx, prefixSumLocal); + this->alignedPointNum = AlignUp(this->pointNum, this->alignedNum); + this->copyArgsortCoorParams.blockLen = this->alignedPointNum / this->alignedNum; + + SetFlag(this->eventIdSToV); + WaitFlag(this->eventIdSToV); + Duplicate(pointNumDupLocal, static_cast(static_cast(this->pointNum)), this->featsDimAligned); + + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); + + SetFlag(this->eventIdMTE2ToS); + WaitFlag(this->eventIdMTE2ToS); + for (uint32_t idx = 0; idx < this->pointNum; idx++) { + this->pointIdx = argsortCoorLocal.GetValue(idx); + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(pointFeatsLocal, this->pointFeatsGm[this->pointIdx * this->featsDim], this->copyFeatParams); + SetFlag(this->eventIdMTE2ToV); + WaitFlag(this->eventIdMTE2ToV); + Div(pointFeatsLocal, pointFeatsLocal, pointNumDupLocal, this->featsDimAligned); + SetFlag(this->eventIdVToMTE3); + WaitFlag(this->eventIdVToMTE3); + this->CopyFeatsOut(voxelIdx, pointFeatsLocal, false); + SetFlag(this->eventIdMTE3ToMTE2); + WaitFlag(this->eventIdMTE3ToMTE2); + } + } + } + +private: + TBuf pointNumDupBuf; +}; +} // namespace DynamicScatter #endif // _DYNAMIC_SCATTER_MEAN_H_ \ No newline at end of file diff --git a/ads/common/ops/kernels/op_kernel/dynamic_scatter_sum.h b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_sum.h similarity index 96% rename from ads/common/ops/kernels/op_kernel/dynamic_scatter_sum.h rename to mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_sum.h index 1fc79e3..8d8f368 100644 --- a/ads/common/ops/kernels/op_kernel/dynamic_scatter_sum.h +++ b/mx_driving/common/ops/kernels/op_kernel/dynamic_scatter_sum.h @@ -1,61 +1,61 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. - */ -#ifndef _DYNAMIC_SCATTER_SUM_H_ -#define _DYNAMIC_SCATTER_SUM_H_ - -#include "dynamic_scatter_base.h" - -namespace DynamicScatter { -using namespace AscendC; - -template -class DynamicScatterSum : public DynamicScatterBase { -public: - __aicore__ inline DynamicScatterSum() {} - __aicore__ inline void Init(GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, - GM_ADDR voxel_feats, DynamicScatterTilingData* tilingData, TPipe* in_pipe) - { - this->BaseInit(point_feats, prefix_sum_point_per_voxel, argsort_coor, voxel_feats, tilingData, in_pipe); - } - - __aicore__ inline void Process() - { - Compute(); - this->ReleaseEvent(); - } - -private: - __aicore__ inline void Compute() - { - LocalTensor pointFeatsLocal = this->pointFeatsBuf.template Get(); - LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); - LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); - - for (uint32_t voxelIdx = 0; voxelIdx < this->voxelNum; voxelIdx++) { - this->GetPointNum(voxelIdx, prefixSumLocal); - this->alignedPointNum = AlignUp(this->pointNum, this->alignedNum); - this->copyArgsortCoorParams.blockLen = this->alignedPointNum / this->alignedNum; - - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); - - SetFlag(this->eventIdMTE2ToS); - WaitFlag(this->eventIdMTE2ToS); - for (uint32_t idx = 0; idx < this->pointNum; idx++) { - this->pointIdx = argsortCoorLocal.GetValue(idx); - SetFlag(this->eventIdSToMTE2); - WaitFlag(this->eventIdSToMTE2); - DataCopy(pointFeatsLocal, this->pointFeatsGm[this->pointIdx * this->featsDim], this->copyFeatParams); - SetFlag(this->eventIdMTE2ToMTE3); - WaitFlag(this->eventIdMTE2ToMTE3); - this->CopyFeatsOut(voxelIdx, pointFeatsLocal, false); - SetFlag(this->eventIdMTE3ToMTE2); - WaitFlag(this->eventIdMTE3ToMTE2); - } - } - } -}; -} // namespace DynamicScatter +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + */ +#ifndef _DYNAMIC_SCATTER_SUM_H_ +#define _DYNAMIC_SCATTER_SUM_H_ + +#include "dynamic_scatter_base.h" + +namespace DynamicScatter { +using namespace AscendC; + +template +class DynamicScatterSum : public DynamicScatterBase { +public: + __aicore__ inline DynamicScatterSum() {} + __aicore__ inline void Init(GM_ADDR point_feats, GM_ADDR prefix_sum_point_per_voxel, GM_ADDR argsort_coor, + GM_ADDR voxel_feats, DynamicScatterTilingData* tilingData, TPipe* in_pipe) + { + this->BaseInit(point_feats, prefix_sum_point_per_voxel, argsort_coor, voxel_feats, tilingData, in_pipe); + } + + __aicore__ inline void Process() + { + Compute(); + this->ReleaseEvent(); + } + +private: + __aicore__ inline void Compute() + { + LocalTensor pointFeatsLocal = this->pointFeatsBuf.template Get(); + LocalTensor prefixSumLocal = this->prefixSumBuf.template Get(); + LocalTensor argsortCoorLocal = this->argsortCoorBuf.template Get(); + + for (uint32_t voxelIdx = 0; voxelIdx < this->voxelNum; voxelIdx++) { + this->GetPointNum(voxelIdx, prefixSumLocal); + this->alignedPointNum = AlignUp(this->pointNum, this->alignedNum); + this->copyArgsortCoorParams.blockLen = this->alignedPointNum / this->alignedNum; + + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(argsortCoorLocal, this->argsortCoorGm[this->startPoint], this->copyArgsortCoorParams); + + SetFlag(this->eventIdMTE2ToS); + WaitFlag(this->eventIdMTE2ToS); + for (uint32_t idx = 0; idx < this->pointNum; idx++) { + this->pointIdx = argsortCoorLocal.GetValue(idx); + SetFlag(this->eventIdSToMTE2); + WaitFlag(this->eventIdSToMTE2); + DataCopy(pointFeatsLocal, this->pointFeatsGm[this->pointIdx * this->featsDim], this->copyFeatParams); + SetFlag(this->eventIdMTE2ToMTE3); + WaitFlag(this->eventIdMTE2ToMTE3); + this->CopyFeatsOut(voxelIdx, pointFeatsLocal, false); + SetFlag(this->eventIdMTE3ToMTE2); + WaitFlag(this->eventIdMTE3ToMTE2); + } + } + } +}; +} // namespace DynamicScatter #endif // _DYNAMIC_SCATTER_SUM_H_ \ No newline at end of file diff --git a/ads/common/ops/kernels/op_kernel/dynamic_voxelization.cpp b/mx_driving/common/ops/kernels/op_kernel/dynamic_voxelization.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/dynamic_voxelization.cpp rename to mx_driving/common/ops/kernels/op_kernel/dynamic_voxelization.cpp diff --git a/ads/common/ops/kernels/op_kernel/furthest_point_sampling.cpp b/mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/furthest_point_sampling.cpp rename to mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling.cpp diff --git a/ads/common/ops/kernels/op_kernel/furthest_point_sampling.h b/mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/furthest_point_sampling.h rename to mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling.h diff --git a/ads/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp b/mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp rename to mx_driving/common/ops/kernels/op_kernel/furthest_point_sampling_with_dist.cpp diff --git a/ads/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp b/mx_driving/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp similarity index 97% rename from ads/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp rename to mx_driving/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp index 70da3e7..e973dcc 100644 --- a/ads/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp +++ b/mx_driving/common/ops/kernels/op_kernel/gather_nms3d_mask.cpp @@ -1,114 +1,114 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - * - * This sample is a very basic sample that implements vector add on Ascend plaform. - */ -#include "kernel_operator.h" -using namespace AscendC; -constexpr int32_t BUFFER_NUM = 2; -constexpr int32_t BUF_SIZE_UNIT = 32; -constexpr int32_t NUM_SIZE = 1; - -class KernelGatherNms3dMask { -public: - __aicore__ inline KernelGatherNms3dMask() {} - __aicore__ inline void Init(GM_ADDR mask, GM_ADDR keep, GM_ADDR num_out, GatherNms3dMaskTilingData *tiling_data) - { - ASSERT(GetBlockNum() != 0 && "block dim can not be zero!"); - box_num = tiling_data->box_num; - mask_num = tiling_data->mask_num; - - int32_t assign_num = (box_num * sizeof(int16_t) + BUF_SIZE_UNIT - 1) / BUF_SIZE_UNIT; - mask_size = assign_num * BUF_SIZE_UNIT / sizeof(int16_t); - - maskGm.SetGlobalBuffer(reinterpret_cast<__gm__ int16_t * > (mask), box_num * mask_num); - keepGm.SetGlobalBuffer(reinterpret_cast<__gm__ int16_t * > (keep), box_num); - numOutGm.SetGlobalBuffer(reinterpret_cast<__gm__ int16_t * > (num_out), NUM_SIZE); - - pipe.InitBuffer(inQueueMask, BUFFER_NUM, mask_size * sizeof(int16_t)); - pipe.InitBuffer(maskBuf, mask_size * sizeof(int16_t)); - pipe.InitBuffer(keepBuf, mask_size * sizeof(int16_t)); - pipe.InitBuffer(numOutBuf, BUF_SIZE_UNIT); - } - __aicore__ inline void Process() - { - InitCmp(); - for (int32_t i = 0; i < box_num; ++i) { - if (maskTemp.GetValue(i) == 1) { - SaveKeep(i); - CopyIn(i); - Compute(i); - } - } - EndCmp(); - } - -private: - __aicore__ inline void InitCmp() - { - maskTemp = maskBuf.Get(); - keepTemp = keepBuf.Get(); - Duplicate(maskTemp, static_cast(1), mask_size); - Duplicate(keepTemp, static_cast(0), mask_size); - DataCopyParams copyParams{1, static_cast(box_num * sizeof(int16_t)), 0, 0}; - DataCopyPadParams padParams{false, 0, 2, 0}; - DataCopyPad(maskTemp, maskGm, copyParams, padParams); - } - __aicore__ inline void CopyIn(int32_t idx) - { - LocalTensor maskLocal = inQueueMask.AllocTensor(); - Duplicate(maskLocal, static_cast(1), mask_size); - DataCopyParams copyParams{1, static_cast(box_num * sizeof(int16_t)), 0, 0}; - DataCopyPadParams padParams{false, 0, 0, 2}; - DataCopyPad(maskLocal, maskGm[idx * mask_num], copyParams, padParams); - inQueueMask.EnQue(maskLocal); - } - __aicore__ inline void Compute(int32_t idx) - { - LocalTensor maskLocal = inQueueMask.DeQue(); - maskTemp = maskLocal & maskTemp; - pipe_barrier(PIPE_ALL); - inQueueMask.FreeTensor(maskLocal); - } - __aicore__ inline void SaveKeep(int32_t idx) - { - keepTemp.SetValue(keep_num, idx); - keep_num = keep_num + 1; - } - __aicore__ inline void EndCmp() - { - DataCopyParams copyMaskParams{1, static_cast(box_num * sizeof(int16_t)), 0, 0}; - DataCopyPad(keepGm, keepTemp, copyMaskParams); - LocalTensor numOutLocal = numOutBuf.Get(); - numOutLocal.SetValue(0, keep_num); - DataCopyParams copyNumParams{1, static_cast(NUM_SIZE * sizeof(int16_t)), 0, 0}; - DataCopyPad(numOutGm, numOutLocal, copyNumParams); - } - -private: - TPipe pipe; - TQue inQueueMask; - - GlobalTensor maskGm; - GlobalTensor keepGm; - GlobalTensor numOutGm; - - LocalTensor maskTemp; - LocalTensor keepTemp; - - TBuf maskBuf, keepBuf, numOutBuf; - - uint32_t box_num; - uint32_t mask_num; - uint32_t mask_size; - uint32_t keep_num = 0; -}; - -extern "C" __global__ __aicore__ -void gather_nms3d_mask(GM_ADDR mask, GM_ADDR keep, GM_ADDR num_out, GM_ADDR workspace, GM_ADDR tiling) -{ - GET_TILING_DATA(tiling_data, tiling); - KernelGatherNms3dMask op; - op.Init(mask, keep, num_out, &tiling_data); - op.Process(); -} +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. + * + * This sample is a very basic sample that implements vector add on Ascend plaform. + */ +#include "kernel_operator.h" +using namespace AscendC; +constexpr int32_t BUFFER_NUM = 2; +constexpr int32_t BUF_SIZE_UNIT = 32; +constexpr int32_t NUM_SIZE = 1; + +class KernelGatherNms3dMask { +public: + __aicore__ inline KernelGatherNms3dMask() {} + __aicore__ inline void Init(GM_ADDR mask, GM_ADDR keep, GM_ADDR num_out, GatherNms3dMaskTilingData *tiling_data) + { + ASSERT(GetBlockNum() != 0 && "block dim can not be zero!"); + box_num = tiling_data->box_num; + mask_num = tiling_data->mask_num; + + int32_t assign_num = (box_num * sizeof(int16_t) + BUF_SIZE_UNIT - 1) / BUF_SIZE_UNIT; + mask_size = assign_num * BUF_SIZE_UNIT / sizeof(int16_t); + + maskGm.SetGlobalBuffer(reinterpret_cast<__gm__ int16_t * > (mask), box_num * mask_num); + keepGm.SetGlobalBuffer(reinterpret_cast<__gm__ int16_t * > (keep), box_num); + numOutGm.SetGlobalBuffer(reinterpret_cast<__gm__ int16_t * > (num_out), NUM_SIZE); + + pipe.InitBuffer(inQueueMask, BUFFER_NUM, mask_size * sizeof(int16_t)); + pipe.InitBuffer(maskBuf, mask_size * sizeof(int16_t)); + pipe.InitBuffer(keepBuf, mask_size * sizeof(int16_t)); + pipe.InitBuffer(numOutBuf, BUF_SIZE_UNIT); + } + __aicore__ inline void Process() + { + InitCmp(); + for (int32_t i = 0; i < box_num; ++i) { + if (maskTemp.GetValue(i) == 1) { + SaveKeep(i); + CopyIn(i); + Compute(i); + } + } + EndCmp(); + } + +private: + __aicore__ inline void InitCmp() + { + maskTemp = maskBuf.Get(); + keepTemp = keepBuf.Get(); + Duplicate(maskTemp, static_cast(1), mask_size); + Duplicate(keepTemp, static_cast(0), mask_size); + DataCopyParams copyParams{1, static_cast(box_num * sizeof(int16_t)), 0, 0}; + DataCopyPadParams padParams{false, 0, 2, 0}; + DataCopyPad(maskTemp, maskGm, copyParams, padParams); + } + __aicore__ inline void CopyIn(int32_t idx) + { + LocalTensor maskLocal = inQueueMask.AllocTensor(); + Duplicate(maskLocal, static_cast(1), mask_size); + DataCopyParams copyParams{1, static_cast(box_num * sizeof(int16_t)), 0, 0}; + DataCopyPadParams padParams{false, 0, 0, 2}; + DataCopyPad(maskLocal, maskGm[idx * mask_num], copyParams, padParams); + inQueueMask.EnQue(maskLocal); + } + __aicore__ inline void Compute(int32_t idx) + { + LocalTensor maskLocal = inQueueMask.DeQue(); + maskTemp = maskLocal & maskTemp; + pipe_barrier(PIPE_ALL); + inQueueMask.FreeTensor(maskLocal); + } + __aicore__ inline void SaveKeep(int32_t idx) + { + keepTemp.SetValue(keep_num, idx); + keep_num = keep_num + 1; + } + __aicore__ inline void EndCmp() + { + DataCopyParams copyMaskParams{1, static_cast(box_num * sizeof(int16_t)), 0, 0}; + DataCopyPad(keepGm, keepTemp, copyMaskParams); + LocalTensor numOutLocal = numOutBuf.Get(); + numOutLocal.SetValue(0, keep_num); + DataCopyParams copyNumParams{1, static_cast(NUM_SIZE * sizeof(int16_t)), 0, 0}; + DataCopyPad(numOutGm, numOutLocal, copyNumParams); + } + +private: + TPipe pipe; + TQue inQueueMask; + + GlobalTensor maskGm; + GlobalTensor keepGm; + GlobalTensor numOutGm; + + LocalTensor maskTemp; + LocalTensor keepTemp; + + TBuf maskBuf, keepBuf, numOutBuf; + + uint32_t box_num; + uint32_t mask_num; + uint32_t mask_size; + uint32_t keep_num = 0; +}; + +extern "C" __global__ __aicore__ +void gather_nms3d_mask(GM_ADDR mask, GM_ADDR keep, GM_ADDR num_out, GM_ADDR workspace, GM_ADDR tiling) +{ + GET_TILING_DATA(tiling_data, tiling); + KernelGatherNms3dMask op; + op.Init(mask, keep, num_out, &tiling_data); + op.Process(); +} diff --git a/ads/common/ops/kernels/op_kernel/knn.cpp b/mx_driving/common/ops/kernels/op_kernel/knn.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/knn.cpp rename to mx_driving/common/ops/kernels/op_kernel/knn.cpp diff --git a/ads/common/ops/kernels/op_kernel/knn.h b/mx_driving/common/ops/kernels/op_kernel/knn.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/knn.h rename to mx_driving/common/ops/kernels/op_kernel/knn.h diff --git a/ads/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h b/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h rename to mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic.h diff --git a/ads/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h b/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h rename to mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_generic_v2.h diff --git a/ads/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h b/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h rename to mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf.h diff --git a/ads/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h b/mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h rename to mx_driving/common/ops/kernels/op_kernel/ms_deform_attn_grad_high_perf_v2.h diff --git a/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp b/mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp rename to mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn.cpp diff --git a/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp b/mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp rename to mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad.cpp diff --git a/ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp b/mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp rename to mx_driving/common/ops/kernels/op_kernel/multi_scale_deformable_attn_grad_v2.cpp diff --git a/ads/common/ops/kernels/op_kernel/nms3d.cpp b/mx_driving/common/ops/kernels/op_kernel/nms3d.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/nms3d.cpp rename to mx_driving/common/ops/kernels/op_kernel/nms3d.cpp diff --git a/ads/common/ops/kernels/op_kernel/nms3d_normal.cpp b/mx_driving/common/ops/kernels/op_kernel/nms3d_normal.cpp similarity index 97% rename from ads/common/ops/kernels/op_kernel/nms3d_normal.cpp rename to mx_driving/common/ops/kernels/op_kernel/nms3d_normal.cpp index 65092e1..341eef5 100644 --- a/ads/common/ops/kernels/op_kernel/nms3d_normal.cpp +++ b/mx_driving/common/ops/kernels/op_kernel/nms3d_normal.cpp @@ -1,158 +1,158 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - * - */ -#include "kernel_operator.h" -#include "kernel_tiling/kernel_tiling.h" -#include "kernel_utils.h" - -using namespace AscendC; -constexpr int32_t BUFFER_NUM = 2; -constexpr float EPS = 1e-8; - -template -class KernelNms3dNormal { -public: - __aicore__ inline KernelNms3dNormal() {} - __aicore__ inline void Init(GM_ADDR boxes, GM_ADDR mask, const Nms3dNormalTilingData* __restrict tiling_data) - { - ASSERT(GetBlockNum() != 0 && "block dim can not be zero!"); - usedCoreNum = tiling_data->usedCoreNum; - eachSum = tiling_data->eachSum; - boxNum = tiling_data->boxNum; - tailSum = tiling_data->tailSum; - tailNum = tiling_data->tailNum; - maskNum = tiling_data->maskNum; - loopTime = tiling_data->loopTime; - overlapThresh = tiling_data->overlapThresh; - - uint32_t core_id = GetBlockIdx(); - isLastCore = (core_id == (tiling_data->usedCoreNum - 1)); - - boxGm.SetGlobalBuffer(reinterpret_cast<__gm__ T*>(boxes), boxNum * 7); - maskGm.SetGlobalBuffer(reinterpret_cast<__gm__ int16_t*>(mask), maskNum * boxNum); - - pipe.InitBuffer(inQueueCur, BUFFER_NUM, dataAlign * sizeof(T)); - pipe.InitBuffer(inQueueBox, BUFFER_NUM, dataAlign * 7 * sizeof(T)); - pipe.InitBuffer(outQueueMask, BUFFER_NUM, dataAlign * sizeof(int16_t)); - pipe.InitBuffer(oneMask, BUFFER_NUM, dataAlign * sizeof(int16_t)); - if constexpr (sizeof(T) == sizeof(half)) { - pipe.InitBuffer(calcBuf, dataAlign * 2 * 7 * sizeof(float)); - curTemp = calcBuf.Get(dataAlign * 2 * 7); - boxTemp = curTemp[8]; - } - } - __aicore__ inline void Process() - { - uint32_t core_id = GetBlockIdx(); - LocalTensor oneLocal = oneMask.AllocTensor(); - Duplicate(oneLocal, static_cast(1), dataAlign); - for (size_t i = 0; i < boxNum; ++i) { - for (size_t j = 0; j < loopTime; ++j) { - uint32_t start = core_id * eachSum + dataAlign * j; - if (i >= start + dataAlign) { - DataCopy(maskGm[i * maskNum + start], oneLocal, dataAlign); - continue; - } - bool is_last = (isLastCore) && (j == loopTime - 1); - CopyIn(i, start, is_last); - Compute(i, start, is_last); - CopyOut(i, start); - } - } - oneMask.FreeTensor(oneLocal); - } - -private: - __aicore__ inline void CopyIn(int32_t cur_box, int32_t com_box, bool is_last) - { - LocalTensor curLocal = inQueueCur.AllocTensor(); - LocalTensor boxLocal = inQueueBox.AllocTensor(); - DataCopy(curLocal, boxGm[cur_box * 7], dataAlign); - DataCopy(boxLocal, boxGm[com_box * 7], dataAlign * 7); - inQueueCur.EnQue(curLocal); - inQueueBox.EnQue(boxLocal); - } - __aicore__ inline void Compute(int32_t cur_box, int32_t com_box, bool is_last) - { - uint32_t cmpNum = is_last ? tailNum : dataAlign; - if constexpr (sizeof(T) == sizeof(half)) { - LocalTensor curLocal = inQueueCur.DeQue(); - LocalTensor boxLocal = inQueueBox.DeQue(); - Cast(curTemp, curLocal, RoundMode::CAST_NONE, dataAlign); - Cast(boxTemp, boxLocal, RoundMode::CAST_NONE, 7 * dataAlign); - inQueueCur.FreeTensor(curLocal); - inQueueBox.FreeTensor(boxLocal); - } else { - curTemp = inQueueCur.DeQue(); - boxTemp = inQueueBox.DeQue(); - } - PipeBarrier(); - LocalTensor outLocal = outQueueMask.AllocTensor(); - float Sa = curTemp.GetValue(3) * curTemp.GetValue(4); - for (size_t i = 0; i < cmpNum; i++) { - if (cur_box >= com_box + i) { - outLocal.SetValue(i, 1); - continue; - } - float left = max(curTemp.GetValue(0) - curTemp.GetValue(3) / 2.0f, boxTemp.GetValue(i * 7) - boxTemp.GetValue(i * 7 + 3) / 2.0f); - float right = min(curTemp.GetValue(0) + curTemp.GetValue(3) / 2.0f, boxTemp.GetValue(i * 7) + boxTemp.GetValue(i * 7 + 3) / 2.0f); - float top = max(curTemp.GetValue(1) - curTemp.GetValue(4) / 2.0f, boxTemp.GetValue(i * 7 + 1) - boxTemp.GetValue(i * 7 + 4) / 2.0f); - float bottom = min(curTemp.GetValue(1) + curTemp.GetValue(4) / 2.0f, boxTemp.GetValue(i * 7 + 1) + boxTemp.GetValue(i * 7 + 4) / 2.0f); - float width = max(right - left, 0.f); - float height = max(bottom - top, 0.f); - float interS = width * height; - float Sb = boxTemp.GetValue(i * 7 + 3) * boxTemp.GetValue(i * 7 + 4); - if (interS / max(Sa + Sb - interS, EPS) >= overlapThresh) { - outLocal.SetValue(i, 0); - } else { - outLocal.SetValue(i, 1); - } - } - PipeBarrier(); - outQueueMask.EnQue(outLocal); - if constexpr (sizeof(T) != sizeof(half)) { - inQueueCur.FreeTensor(curTemp); - inQueueBox.FreeTensor(boxTemp); - } - } - __aicore__ inline void CopyOut(int32_t cur_box, int32_t com_box) - { - LocalTensor outLocal = outQueueMask.DeQue(); - DataCopy(maskGm[cur_box * maskNum + com_box], outLocal, dataAlign); - outQueueMask.FreeTensor(outLocal); - } - -private: - TPipe pipe; - TQue inQueueCur, inQueueBox; - TQue outQueueMask, oneMask; - TBuf calcBuf; - GlobalTensor boxGm; - GlobalTensor maskGm; - LocalTensor curTemp, boxTemp; - uint32_t usedCoreNum; - uint32_t loopTime; - uint32_t eachSum; - uint32_t boxNum; - uint32_t tailSum; - uint32_t tailNum; - uint32_t maskNum; - uint32_t dataAlign = 16; - float overlapThresh; - bool isLastCore; -}; - -extern "C" __global__ __aicore__ void nms3d_normal(GM_ADDR boxes, GM_ADDR mask, GM_ADDR workspace, GM_ADDR tiling) { - GET_TILING_DATA(tilingData, tiling); - const Nms3dNormalTilingData* __restrict tilingDevice = &tilingData; - if (TILING_KEY_IS(1)) { - KernelNms3dNormal op; - op.Init(boxes, mask, tilingDevice); - op.Process(); - } else if (TILING_KEY_IS(2)) { - KernelNms3dNormal op; - op.Init(boxes, mask, tilingDevice); - op.Process(); - } -} +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. + * + */ +#include "kernel_operator.h" +#include "kernel_tiling/kernel_tiling.h" +#include "kernel_utils.h" + +using namespace AscendC; +constexpr int32_t BUFFER_NUM = 2; +constexpr float EPS = 1e-8; + +template +class KernelNms3dNormal { +public: + __aicore__ inline KernelNms3dNormal() {} + __aicore__ inline void Init(GM_ADDR boxes, GM_ADDR mask, const Nms3dNormalTilingData* __restrict tiling_data) + { + ASSERT(GetBlockNum() != 0 && "block dim can not be zero!"); + usedCoreNum = tiling_data->usedCoreNum; + eachSum = tiling_data->eachSum; + boxNum = tiling_data->boxNum; + tailSum = tiling_data->tailSum; + tailNum = tiling_data->tailNum; + maskNum = tiling_data->maskNum; + loopTime = tiling_data->loopTime; + overlapThresh = tiling_data->overlapThresh; + + uint32_t core_id = GetBlockIdx(); + isLastCore = (core_id == (tiling_data->usedCoreNum - 1)); + + boxGm.SetGlobalBuffer(reinterpret_cast<__gm__ T*>(boxes), boxNum * 7); + maskGm.SetGlobalBuffer(reinterpret_cast<__gm__ int16_t*>(mask), maskNum * boxNum); + + pipe.InitBuffer(inQueueCur, BUFFER_NUM, dataAlign * sizeof(T)); + pipe.InitBuffer(inQueueBox, BUFFER_NUM, dataAlign * 7 * sizeof(T)); + pipe.InitBuffer(outQueueMask, BUFFER_NUM, dataAlign * sizeof(int16_t)); + pipe.InitBuffer(oneMask, BUFFER_NUM, dataAlign * sizeof(int16_t)); + if constexpr (sizeof(T) == sizeof(half)) { + pipe.InitBuffer(calcBuf, dataAlign * 2 * 7 * sizeof(float)); + curTemp = calcBuf.Get(dataAlign * 2 * 7); + boxTemp = curTemp[8]; + } + } + __aicore__ inline void Process() + { + uint32_t core_id = GetBlockIdx(); + LocalTensor oneLocal = oneMask.AllocTensor(); + Duplicate(oneLocal, static_cast(1), dataAlign); + for (size_t i = 0; i < boxNum; ++i) { + for (size_t j = 0; j < loopTime; ++j) { + uint32_t start = core_id * eachSum + dataAlign * j; + if (i >= start + dataAlign) { + DataCopy(maskGm[i * maskNum + start], oneLocal, dataAlign); + continue; + } + bool is_last = (isLastCore) && (j == loopTime - 1); + CopyIn(i, start, is_last); + Compute(i, start, is_last); + CopyOut(i, start); + } + } + oneMask.FreeTensor(oneLocal); + } + +private: + __aicore__ inline void CopyIn(int32_t cur_box, int32_t com_box, bool is_last) + { + LocalTensor curLocal = inQueueCur.AllocTensor(); + LocalTensor boxLocal = inQueueBox.AllocTensor(); + DataCopy(curLocal, boxGm[cur_box * 7], dataAlign); + DataCopy(boxLocal, boxGm[com_box * 7], dataAlign * 7); + inQueueCur.EnQue(curLocal); + inQueueBox.EnQue(boxLocal); + } + __aicore__ inline void Compute(int32_t cur_box, int32_t com_box, bool is_last) + { + uint32_t cmpNum = is_last ? tailNum : dataAlign; + if constexpr (sizeof(T) == sizeof(half)) { + LocalTensor curLocal = inQueueCur.DeQue(); + LocalTensor boxLocal = inQueueBox.DeQue(); + Cast(curTemp, curLocal, RoundMode::CAST_NONE, dataAlign); + Cast(boxTemp, boxLocal, RoundMode::CAST_NONE, 7 * dataAlign); + inQueueCur.FreeTensor(curLocal); + inQueueBox.FreeTensor(boxLocal); + } else { + curTemp = inQueueCur.DeQue(); + boxTemp = inQueueBox.DeQue(); + } + PipeBarrier(); + LocalTensor outLocal = outQueueMask.AllocTensor(); + float Sa = curTemp.GetValue(3) * curTemp.GetValue(4); + for (size_t i = 0; i < cmpNum; i++) { + if (cur_box >= com_box + i) { + outLocal.SetValue(i, 1); + continue; + } + float left = max(curTemp.GetValue(0) - curTemp.GetValue(3) / 2.0f, boxTemp.GetValue(i * 7) - boxTemp.GetValue(i * 7 + 3) / 2.0f); + float right = min(curTemp.GetValue(0) + curTemp.GetValue(3) / 2.0f, boxTemp.GetValue(i * 7) + boxTemp.GetValue(i * 7 + 3) / 2.0f); + float top = max(curTemp.GetValue(1) - curTemp.GetValue(4) / 2.0f, boxTemp.GetValue(i * 7 + 1) - boxTemp.GetValue(i * 7 + 4) / 2.0f); + float bottom = min(curTemp.GetValue(1) + curTemp.GetValue(4) / 2.0f, boxTemp.GetValue(i * 7 + 1) + boxTemp.GetValue(i * 7 + 4) / 2.0f); + float width = max(right - left, 0.f); + float height = max(bottom - top, 0.f); + float interS = width * height; + float Sb = boxTemp.GetValue(i * 7 + 3) * boxTemp.GetValue(i * 7 + 4); + if (interS / max(Sa + Sb - interS, EPS) >= overlapThresh) { + outLocal.SetValue(i, 0); + } else { + outLocal.SetValue(i, 1); + } + } + PipeBarrier(); + outQueueMask.EnQue(outLocal); + if constexpr (sizeof(T) != sizeof(half)) { + inQueueCur.FreeTensor(curTemp); + inQueueBox.FreeTensor(boxTemp); + } + } + __aicore__ inline void CopyOut(int32_t cur_box, int32_t com_box) + { + LocalTensor outLocal = outQueueMask.DeQue(); + DataCopy(maskGm[cur_box * maskNum + com_box], outLocal, dataAlign); + outQueueMask.FreeTensor(outLocal); + } + +private: + TPipe pipe; + TQue inQueueCur, inQueueBox; + TQue outQueueMask, oneMask; + TBuf calcBuf; + GlobalTensor boxGm; + GlobalTensor maskGm; + LocalTensor curTemp, boxTemp; + uint32_t usedCoreNum; + uint32_t loopTime; + uint32_t eachSum; + uint32_t boxNum; + uint32_t tailSum; + uint32_t tailNum; + uint32_t maskNum; + uint32_t dataAlign = 16; + float overlapThresh; + bool isLastCore; +}; + +extern "C" __global__ __aicore__ void nms3d_normal(GM_ADDR boxes, GM_ADDR mask, GM_ADDR workspace, GM_ADDR tiling) { + GET_TILING_DATA(tilingData, tiling); + const Nms3dNormalTilingData* __restrict tilingDevice = &tilingData; + if (TILING_KEY_IS(1)) { + KernelNms3dNormal op; + op.Init(boxes, mask, tilingDevice); + op.Process(); + } else if (TILING_KEY_IS(2)) { + KernelNms3dNormal op; + op.Init(boxes, mask, tilingDevice); + op.Process(); + } +} diff --git a/ads/common/ops/kernels/op_kernel/points_in_box.cpp b/mx_driving/common/ops/kernels/op_kernel/points_in_box.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/points_in_box.cpp rename to mx_driving/common/ops/kernels/op_kernel/points_in_box.cpp diff --git a/ads/common/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp b/mx_driving/common/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp rename to mx_driving/common/ops/kernels/op_kernel/roipoint_pool3d_forward.cpp diff --git a/ads/common/ops/kernels/op_kernel/scatter_max_with_argmax_v2.cpp b/mx_driving/common/ops/kernels/op_kernel/scatter_max_with_argmax_v2.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/scatter_max_with_argmax_v2.cpp rename to mx_driving/common/ops/kernels/op_kernel/scatter_max_with_argmax_v2.cpp diff --git a/ads/common/ops/kernels/op_kernel/scatter_mean_grad.cpp b/mx_driving/common/ops/kernels/op_kernel/scatter_mean_grad.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/scatter_mean_grad.cpp rename to mx_driving/common/ops/kernels/op_kernel/scatter_mean_grad.cpp diff --git a/ads/common/ops/kernels/op_kernel/scatter_mean_grad.h b/mx_driving/common/ops/kernels/op_kernel/scatter_mean_grad.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/scatter_mean_grad.h rename to mx_driving/common/ops/kernels/op_kernel/scatter_mean_grad.h diff --git a/ads/common/ops/kernels/op_kernel/scatter_mean_grad_base.h b/mx_driving/common/ops/kernels/op_kernel/scatter_mean_grad_base.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/scatter_mean_grad_base.h rename to mx_driving/common/ops/kernels/op_kernel/scatter_mean_grad_base.h diff --git a/ads/common/ops/kernels/op_kernel/scatter_mean_grad_line.h b/mx_driving/common/ops/kernels/op_kernel/scatter_mean_grad_line.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/scatter_mean_grad_line.h rename to mx_driving/common/ops/kernels/op_kernel/scatter_mean_grad_line.h diff --git a/ads/common/ops/kernels/op_kernel/voxel_pooling_train.cpp b/mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/voxel_pooling_train.cpp rename to mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train.cpp diff --git a/ads/common/ops/kernels/op_kernel/voxel_pooling_train.h b/mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train.h similarity index 100% rename from ads/common/ops/kernels/op_kernel/voxel_pooling_train.h rename to mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train.h diff --git a/ads/common/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp b/mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp similarity index 100% rename from ads/common/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp rename to mx_driving/common/ops/kernels/op_kernel/voxel_pooling_train_grad.cpp diff --git a/ads/common/ops/knn.py b/mx_driving/common/ops/knn.py similarity index 100% rename from ads/common/ops/knn.py rename to mx_driving/common/ops/knn.py diff --git a/ads/common/ops/nms3d_normal.py b/mx_driving/common/ops/nms3d_normal.py similarity index 96% rename from ads/common/ops/nms3d_normal.py rename to mx_driving/common/ops/nms3d_normal.py index 01d5feb..e9fd446 100644 --- a/ads/common/ops/nms3d_normal.py +++ b/mx_driving/common/ops/nms3d_normal.py @@ -1,27 +1,27 @@ -""" -Copyright (c) OpenMMLab. All rights reserved. -Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. -Modification by: Huawei Developers -Modification date: 2024-06-04 -Modification Description: -Modification 1. Add support fro Ascend NPU -""" -import torch -import torch_npu -from torch.autograd import Function -from torch.nn import Module -import ads_c - - -class AdsNms3dNormalFunction(Function): - @staticmethod - def forward(ctx, boxes, scores, iou_threshold: float): - if boxes.shape[1] != 7: - raise 'Input boxes shape should be (N, 7)' - order = scores.sort(0, descending=True)[1] - boxes = boxes[order].contiguous() - - keep, num_out = ads_c.nms3d_normal(boxes, iou_threshold) - return order[keep[:num_out].long()].contiguous() - -npu_nms3d_normal = AdsNms3dNormalFunction.apply +""" +Copyright (c) OpenMMLab. All rights reserved. +Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. +Modification by: Huawei Developers +Modification date: 2024-06-04 +Modification Description: +Modification 1. Add support fro Ascend NPU +""" +import torch +import torch_npu +from torch.autograd import Function +from torch.nn import Module +import ads_c + + +class AdsNms3dNormalFunction(Function): + @staticmethod + def forward(ctx, boxes, scores, iou_threshold: float): + if boxes.shape[1] != 7: + raise 'Input boxes shape should be (N, 7)' + order = scores.sort(0, descending=True)[1] + boxes = boxes[order].contiguous() + + keep, num_out = ads_c.nms3d_normal(boxes, iou_threshold) + return order[keep[:num_out].long()].contiguous() + +npu_nms3d_normal = AdsNms3dNormalFunction.apply diff --git a/ads/common/ops/npu_dynamic_scatter.py b/mx_driving/common/ops/npu_dynamic_scatter.py similarity index 100% rename from ads/common/ops/npu_dynamic_scatter.py rename to mx_driving/common/ops/npu_dynamic_scatter.py diff --git a/ads/common/ops/npu_multi_scale_deformable_attn_function.py b/mx_driving/common/ops/npu_multi_scale_deformable_attn_function.py similarity index 100% rename from ads/common/ops/npu_multi_scale_deformable_attn_function.py rename to mx_driving/common/ops/npu_multi_scale_deformable_attn_function.py diff --git a/ads/common/ops/npu_nms3d.py b/mx_driving/common/ops/npu_nms3d.py similarity index 100% rename from ads/common/ops/npu_nms3d.py rename to mx_driving/common/ops/npu_nms3d.py diff --git a/ads/common/ops/npu_points_in_box.py b/mx_driving/common/ops/npu_points_in_box.py similarity index 100% rename from ads/common/ops/npu_points_in_box.py rename to mx_driving/common/ops/npu_points_in_box.py diff --git a/ads/common/ops/npu_roipoint_pool3d.py b/mx_driving/common/ops/npu_roipoint_pool3d.py similarity index 100% rename from ads/common/ops/npu_roipoint_pool3d.py rename to mx_driving/common/ops/npu_roipoint_pool3d.py diff --git a/ads/common/ops/npu_scatter_mean_grad.py b/mx_driving/common/ops/npu_scatter_mean_grad.py similarity index 100% rename from ads/common/ops/npu_scatter_mean_grad.py rename to mx_driving/common/ops/npu_scatter_mean_grad.py diff --git a/ads/common/ops/onnx/__init__.py b/mx_driving/common/ops/onnx/__init__.py similarity index 100% rename from ads/common/ops/onnx/__init__.py rename to mx_driving/common/ops/onnx/__init__.py diff --git a/ads/common/ops/onnx/plugin/CMakeLists.txt b/mx_driving/common/ops/onnx/plugin/CMakeLists.txt similarity index 100% rename from ads/common/ops/onnx/plugin/CMakeLists.txt rename to mx_driving/common/ops/onnx/plugin/CMakeLists.txt diff --git a/ads/common/ops/onnx/plugin/onnx_multi_scale_deformable_attn.cpp b/mx_driving/common/ops/onnx/plugin/onnx_multi_scale_deformable_attn.cpp similarity index 100% rename from ads/common/ops/onnx/plugin/onnx_multi_scale_deformable_attn.cpp rename to mx_driving/common/ops/onnx/plugin/onnx_multi_scale_deformable_attn.cpp diff --git a/ads/common/ops/onnx/wrapper_onnx_ops.py b/mx_driving/common/ops/onnx/wrapper_onnx_ops.py similarity index 86% rename from ads/common/ops/onnx/wrapper_onnx_ops.py rename to mx_driving/common/ops/onnx/wrapper_onnx_ops.py index 6457e1d..e943d8e 100644 --- a/ads/common/ops/onnx/wrapper_onnx_ops.py +++ b/mx_driving/common/ops/onnx/wrapper_onnx_ops.py @@ -2,13 +2,13 @@ from typing import Optional, List import torch from torch import Tensor import torch.onnx.symbolic_helper as sym_help -import ads.common +import mx_driving.common class NPUMultiScaleDeformableAttnOP(torch.autograd.Function): @staticmethod def forward(ctx, *args, **kwargs): - return ads.common.npu_multi_scale_deformable_attn_function(*args, **kwargs) + return mx_driving.common.npu_multi_scale_deformable_attn_function(*args, **kwargs) @staticmethod # 'pylint: disable=too-many-arguments,huawei-too-many-arguments diff --git a/ads/common/ops/rotated_iou.py b/mx_driving/common/ops/rotated_iou.py similarity index 100% rename from ads/common/ops/rotated_iou.py rename to mx_driving/common/ops/rotated_iou.py diff --git a/ads/common/ops/rotated_overlaps.py b/mx_driving/common/ops/rotated_overlaps.py similarity index 100% rename from ads/common/ops/rotated_overlaps.py rename to mx_driving/common/ops/rotated_overlaps.py diff --git a/ads/common/ops/scatter_max.py b/mx_driving/common/ops/scatter_max.py similarity index 100% rename from ads/common/ops/scatter_max.py rename to mx_driving/common/ops/scatter_max.py diff --git a/ads/common/ops/threeNN.py b/mx_driving/common/ops/threeNN.py similarity index 100% rename from ads/common/ops/threeNN.py rename to mx_driving/common/ops/threeNN.py diff --git a/ads/common/ops/three_interpolate.py b/mx_driving/common/ops/three_interpolate.py similarity index 100% rename from ads/common/ops/three_interpolate.py rename to mx_driving/common/ops/three_interpolate.py diff --git a/ads/common/ops/voxel_pooling_train.py b/mx_driving/common/ops/voxel_pooling_train.py similarity index 100% rename from ads/common/ops/voxel_pooling_train.py rename to mx_driving/common/ops/voxel_pooling_train.py diff --git a/ads/motion/CMakeLists.txt b/mx_driving/motion/CMakeLists.txt similarity index 100% rename from ads/motion/CMakeLists.txt rename to mx_driving/motion/CMakeLists.txt diff --git a/ads/motion/__init__.py b/mx_driving/motion/__init__.py similarity index 100% rename from ads/motion/__init__.py rename to mx_driving/motion/__init__.py diff --git a/ads/motion/components/README.md b/mx_driving/motion/components/README.md similarity index 100% rename from ads/motion/components/README.md rename to mx_driving/motion/components/README.md diff --git a/ads/motion/ops/csrc/README.md b/mx_driving/motion/ops/csrc/README.md similarity index 100% rename from ads/motion/ops/csrc/README.md rename to mx_driving/motion/ops/csrc/README.md diff --git a/ads/motion/ops/csrc/pybind.cpp b/mx_driving/motion/ops/csrc/pybind.cpp similarity index 100% rename from ads/motion/ops/csrc/pybind.cpp rename to mx_driving/motion/ops/csrc/pybind.cpp diff --git a/ads/motion/ops/kernels/CMakeLists.txt b/mx_driving/motion/ops/kernels/CMakeLists.txt similarity index 100% rename from ads/motion/ops/kernels/CMakeLists.txt rename to mx_driving/motion/ops/kernels/CMakeLists.txt diff --git a/ads/motion/ops/kernels/README.md b/mx_driving/motion/ops/kernels/README.md similarity index 100% rename from ads/motion/ops/kernels/README.md rename to mx_driving/motion/ops/kernels/README.md diff --git a/ads/motion/ops/kernels/framework/CMakeLists.txt b/mx_driving/motion/ops/kernels/framework/CMakeLists.txt similarity index 100% rename from ads/motion/ops/kernels/framework/CMakeLists.txt rename to mx_driving/motion/ops/kernels/framework/CMakeLists.txt diff --git a/ads/motion/ops/kernels/op_host/CMakeLists.txt b/mx_driving/motion/ops/kernels/op_host/CMakeLists.txt similarity index 100% rename from ads/motion/ops/kernels/op_host/CMakeLists.txt rename to mx_driving/motion/ops/kernels/op_host/CMakeLists.txt diff --git a/ads/motion/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/motion/ops/kernels/op_kernel/CMakeLists.txt similarity index 100% rename from ads/motion/ops/kernels/op_kernel/CMakeLists.txt rename to mx_driving/motion/ops/kernels/op_kernel/CMakeLists.txt diff --git a/ads/perception/CMakeLists.txt b/mx_driving/perception/CMakeLists.txt similarity index 100% rename from ads/perception/CMakeLists.txt rename to mx_driving/perception/CMakeLists.txt diff --git a/ads/perception/__init__.py b/mx_driving/perception/__init__.py similarity index 100% rename from ads/perception/__init__.py rename to mx_driving/perception/__init__.py diff --git a/ads/perception/fused/__init__.py b/mx_driving/perception/fused/__init__.py similarity index 100% rename from ads/perception/fused/__init__.py rename to mx_driving/perception/fused/__init__.py diff --git a/ads/perception/fused/components/README.md b/mx_driving/perception/fused/components/README.md similarity index 100% rename from ads/perception/fused/components/README.md rename to mx_driving/perception/fused/components/README.md diff --git a/ads/perception/fused/ops/__init__.py b/mx_driving/perception/fused/ops/__init__.py similarity index 100% rename from ads/perception/fused/ops/__init__.py rename to mx_driving/perception/fused/ops/__init__.py diff --git a/ads/perception/fused/ops/bev_pool.py b/mx_driving/perception/fused/ops/bev_pool.py similarity index 98% rename from ads/perception/fused/ops/bev_pool.py rename to mx_driving/perception/fused/ops/bev_pool.py index 56cc636..245533d 100644 --- a/ads/perception/fused/ops/bev_pool.py +++ b/mx_driving/perception/fused/ops/bev_pool.py @@ -71,7 +71,7 @@ def bev_pool(feat, geom_feat, B, D, H, W): - C <= 1024 Usage: >>> import torch, torch_npu - >>> from ads.perception.fused import bev_pool + >>> from mx_driving.perception.fused import bev_pool >>> feat = torch.rand(4, 256).npu() >>> feat.requires_grad_() >>> geom_feat = torch.tensor([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 2], [0, 0, 0, 3]], dtype=torch.int32).npu() diff --git a/ads/perception/fused/ops/bev_pool_v2.py b/mx_driving/perception/fused/ops/bev_pool_v2.py similarity index 98% rename from ads/perception/fused/ops/bev_pool_v2.py rename to mx_driving/perception/fused/ops/bev_pool_v2.py index 0912ae9..e889548 100644 --- a/ads/perception/fused/ops/bev_pool_v2.py +++ b/mx_driving/perception/fused/ops/bev_pool_v2.py @@ -117,7 +117,7 @@ def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev, - N_RANKS <= 2^21 Usage: >>> import torch, torch_npu - >>> from ads.perception.fused import bev_pool_v2 + >>> from mx_driving.perception.fused import bev_pool_v2 >>> depth = torch.rand(2, 1, 8, 256, 256).npu() >>> feat = torch.rand(2, 1, 256, 256, 64).npu() >>> feat.requires_grad_() diff --git a/ads/perception/fused/ops/csrc/BEVPool.cpp b/mx_driving/perception/fused/ops/csrc/BEVPool.cpp similarity index 100% rename from ads/perception/fused/ops/csrc/BEVPool.cpp rename to mx_driving/perception/fused/ops/csrc/BEVPool.cpp diff --git a/ads/perception/fused/ops/csrc/BEVPoolBackward.cpp b/mx_driving/perception/fused/ops/csrc/BEVPoolBackward.cpp similarity index 100% rename from ads/perception/fused/ops/csrc/BEVPoolBackward.cpp rename to mx_driving/perception/fused/ops/csrc/BEVPoolBackward.cpp diff --git a/ads/perception/fused/ops/csrc/BEVPoolV2.cpp b/mx_driving/perception/fused/ops/csrc/BEVPoolV2.cpp similarity index 100% rename from ads/perception/fused/ops/csrc/BEVPoolV2.cpp rename to mx_driving/perception/fused/ops/csrc/BEVPoolV2.cpp diff --git a/ads/perception/fused/ops/csrc/BEVPoolV2Backward.cpp b/mx_driving/perception/fused/ops/csrc/BEVPoolV2Backward.cpp similarity index 100% rename from ads/perception/fused/ops/csrc/BEVPoolV2Backward.cpp rename to mx_driving/perception/fused/ops/csrc/BEVPoolV2Backward.cpp diff --git a/ads/perception/fused/ops/csrc/README.md b/mx_driving/perception/fused/ops/csrc/README.md similarity index 100% rename from ads/perception/fused/ops/csrc/README.md rename to mx_driving/perception/fused/ops/csrc/README.md diff --git a/ads/perception/fused/ops/csrc/functions.h b/mx_driving/perception/fused/ops/csrc/functions.h similarity index 100% rename from ads/perception/fused/ops/csrc/functions.h rename to mx_driving/perception/fused/ops/csrc/functions.h diff --git a/ads/perception/fused/ops/csrc/pybind.cpp b/mx_driving/perception/fused/ops/csrc/pybind.cpp similarity index 100% rename from ads/perception/fused/ops/csrc/pybind.cpp rename to mx_driving/perception/fused/ops/csrc/pybind.cpp diff --git a/ads/perception/fused/ops/kernels/CMakeLists.txt b/mx_driving/perception/fused/ops/kernels/CMakeLists.txt similarity index 100% rename from ads/perception/fused/ops/kernels/CMakeLists.txt rename to mx_driving/perception/fused/ops/kernels/CMakeLists.txt diff --git a/ads/perception/fused/ops/kernels/README.md b/mx_driving/perception/fused/ops/kernels/README.md similarity index 100% rename from ads/perception/fused/ops/kernels/README.md rename to mx_driving/perception/fused/ops/kernels/README.md diff --git a/ads/perception/fused/ops/kernels/op_host/CMakeLists.txt b/mx_driving/perception/fused/ops/kernels/op_host/CMakeLists.txt similarity index 100% rename from ads/perception/fused/ops/kernels/op_host/CMakeLists.txt rename to mx_driving/perception/fused/ops/kernels/op_host/CMakeLists.txt diff --git a/ads/perception/fused/ops/kernels/op_host/bev_pool.cpp b/mx_driving/perception/fused/ops/kernels/op_host/bev_pool.cpp similarity index 100% rename from ads/perception/fused/ops/kernels/op_host/bev_pool.cpp rename to mx_driving/perception/fused/ops/kernels/op_host/bev_pool.cpp diff --git a/ads/perception/fused/ops/kernels/op_host/bev_pool_tiling.h b/mx_driving/perception/fused/ops/kernels/op_host/bev_pool_tiling.h similarity index 100% rename from ads/perception/fused/ops/kernels/op_host/bev_pool_tiling.h rename to mx_driving/perception/fused/ops/kernels/op_host/bev_pool_tiling.h diff --git a/ads/perception/fused/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/perception/fused/ops/kernels/op_kernel/CMakeLists.txt similarity index 100% rename from ads/perception/fused/ops/kernels/op_kernel/CMakeLists.txt rename to mx_driving/perception/fused/ops/kernels/op_kernel/CMakeLists.txt diff --git a/ads/perception/fused/ops/kernels/op_kernel/bev_pool.cpp b/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool.cpp similarity index 100% rename from ads/perception/fused/ops/kernels/op_kernel/bev_pool.cpp rename to mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool.cpp diff --git a/ads/perception/fused/ops/kernels/op_kernel/bev_pool.h b/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool.h similarity index 100% rename from ads/perception/fused/ops/kernels/op_kernel/bev_pool.h rename to mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool.h diff --git a/ads/perception/fused/ops/kernels/op_kernel/bev_pool_grad.cpp b/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_grad.cpp similarity index 100% rename from ads/perception/fused/ops/kernels/op_kernel/bev_pool_grad.cpp rename to mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_grad.cpp diff --git a/ads/perception/fused/ops/kernels/op_kernel/bev_pool_v2.cpp b/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2.cpp similarity index 100% rename from ads/perception/fused/ops/kernels/op_kernel/bev_pool_v2.cpp rename to mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2.cpp diff --git a/ads/perception/fused/ops/kernels/op_kernel/bev_pool_v2.h b/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2.h similarity index 100% rename from ads/perception/fused/ops/kernels/op_kernel/bev_pool_v2.h rename to mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2.h diff --git a/ads/perception/fused/ops/kernels/op_kernel/bev_pool_v2_grad.cpp b/mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2_grad.cpp similarity index 100% rename from ads/perception/fused/ops/kernels/op_kernel/bev_pool_v2_grad.cpp rename to mx_driving/perception/fused/ops/kernels/op_kernel/bev_pool_v2_grad.cpp diff --git a/ads/perception/fused/ops/kernels/op_kernel/common.h b/mx_driving/perception/fused/ops/kernels/op_kernel/common.h similarity index 100% rename from ads/perception/fused/ops/kernels/op_kernel/common.h rename to mx_driving/perception/fused/ops/kernels/op_kernel/common.h diff --git a/ads/perception/point/__init__.py b/mx_driving/perception/point/__init__.py similarity index 100% rename from ads/perception/point/__init__.py rename to mx_driving/perception/point/__init__.py diff --git a/ads/perception/point/components/README.md b/mx_driving/perception/point/components/README.md similarity index 100% rename from ads/perception/point/components/README.md rename to mx_driving/perception/point/components/README.md diff --git a/ads/perception/point/ops/__init__.py b/mx_driving/perception/point/ops/__init__.py similarity index 100% rename from ads/perception/point/ops/__init__.py rename to mx_driving/perception/point/ops/__init__.py diff --git a/ads/perception/point/ops/csrc/GroupPoints.cpp b/mx_driving/perception/point/ops/csrc/GroupPoints.cpp similarity index 100% rename from ads/perception/point/ops/csrc/GroupPoints.cpp rename to mx_driving/perception/point/ops/csrc/GroupPoints.cpp diff --git a/ads/perception/point/ops/csrc/PointToVoxel.cpp b/mx_driving/perception/point/ops/csrc/PointToVoxel.cpp similarity index 100% rename from ads/perception/point/ops/csrc/PointToVoxel.cpp rename to mx_driving/perception/point/ops/csrc/PointToVoxel.cpp diff --git a/ads/perception/point/ops/csrc/README.md b/mx_driving/perception/point/ops/csrc/README.md similarity index 100% rename from ads/perception/point/ops/csrc/README.md rename to mx_driving/perception/point/ops/csrc/README.md diff --git a/ads/perception/point/ops/csrc/UniqueVoxel.cpp b/mx_driving/perception/point/ops/csrc/UniqueVoxel.cpp similarity index 100% rename from ads/perception/point/ops/csrc/UniqueVoxel.cpp rename to mx_driving/perception/point/ops/csrc/UniqueVoxel.cpp diff --git a/ads/perception/point/ops/csrc/VecPoolBackward.cpp b/mx_driving/perception/point/ops/csrc/VecPoolBackward.cpp similarity index 100% rename from ads/perception/point/ops/csrc/VecPoolBackward.cpp rename to mx_driving/perception/point/ops/csrc/VecPoolBackward.cpp diff --git a/ads/perception/point/ops/csrc/VoxelToPoint.cpp b/mx_driving/perception/point/ops/csrc/VoxelToPoint.cpp similarity index 100% rename from ads/perception/point/ops/csrc/VoxelToPoint.cpp rename to mx_driving/perception/point/ops/csrc/VoxelToPoint.cpp diff --git a/ads/perception/point/ops/csrc/functions.h b/mx_driving/perception/point/ops/csrc/functions.h similarity index 100% rename from ads/perception/point/ops/csrc/functions.h rename to mx_driving/perception/point/ops/csrc/functions.h diff --git a/ads/perception/point/ops/csrc/pybind.cpp b/mx_driving/perception/point/ops/csrc/pybind.cpp similarity index 100% rename from ads/perception/point/ops/csrc/pybind.cpp rename to mx_driving/perception/point/ops/csrc/pybind.cpp diff --git a/ads/perception/point/ops/group_points.py b/mx_driving/perception/point/ops/group_points.py similarity index 100% rename from ads/perception/point/ops/group_points.py rename to mx_driving/perception/point/ops/group_points.py diff --git a/ads/perception/point/ops/kernels/CMakeLists.txt b/mx_driving/perception/point/ops/kernels/CMakeLists.txt similarity index 100% rename from ads/perception/point/ops/kernels/CMakeLists.txt rename to mx_driving/perception/point/ops/kernels/CMakeLists.txt diff --git a/ads/perception/point/ops/kernels/README.md b/mx_driving/perception/point/ops/kernels/README.md similarity index 100% rename from ads/perception/point/ops/kernels/README.md rename to mx_driving/perception/point/ops/kernels/README.md diff --git a/ads/perception/point/ops/kernels/op_host/CMakeLists.txt b/mx_driving/perception/point/ops/kernels/op_host/CMakeLists.txt similarity index 100% rename from ads/perception/point/ops/kernels/op_host/CMakeLists.txt rename to mx_driving/perception/point/ops/kernels/op_host/CMakeLists.txt diff --git a/ads/perception/point/ops/kernels/op_host/group_points_grad.cpp b/mx_driving/perception/point/ops/kernels/op_host/group_points_grad.cpp similarity index 100% rename from ads/perception/point/ops/kernels/op_host/group_points_grad.cpp rename to mx_driving/perception/point/ops/kernels/op_host/group_points_grad.cpp diff --git a/ads/perception/point/ops/kernels/op_host/group_points_grad_tiling.h b/mx_driving/perception/point/ops/kernels/op_host/group_points_grad_tiling.h similarity index 100% rename from ads/perception/point/ops/kernels/op_host/group_points_grad_tiling.h rename to mx_driving/perception/point/ops/kernels/op_host/group_points_grad_tiling.h diff --git a/ads/perception/point/ops/kernels/op_host/point_to_voxel.cpp b/mx_driving/perception/point/ops/kernels/op_host/point_to_voxel.cpp similarity index 100% rename from ads/perception/point/ops/kernels/op_host/point_to_voxel.cpp rename to mx_driving/perception/point/ops/kernels/op_host/point_to_voxel.cpp diff --git a/ads/perception/point/ops/kernels/op_host/point_to_voxel_tiling.h b/mx_driving/perception/point/ops/kernels/op_host/point_to_voxel_tiling.h similarity index 100% rename from ads/perception/point/ops/kernels/op_host/point_to_voxel_tiling.h rename to mx_driving/perception/point/ops/kernels/op_host/point_to_voxel_tiling.h diff --git a/ads/perception/point/ops/kernels/op_host/unique_voxel.cpp b/mx_driving/perception/point/ops/kernels/op_host/unique_voxel.cpp similarity index 100% rename from ads/perception/point/ops/kernels/op_host/unique_voxel.cpp rename to mx_driving/perception/point/ops/kernels/op_host/unique_voxel.cpp diff --git a/ads/perception/point/ops/kernels/op_host/unique_voxel_tiling.h b/mx_driving/perception/point/ops/kernels/op_host/unique_voxel_tiling.h similarity index 100% rename from ads/perception/point/ops/kernels/op_host/unique_voxel_tiling.h rename to mx_driving/perception/point/ops/kernels/op_host/unique_voxel_tiling.h diff --git a/ads/perception/point/ops/kernels/op_host/vec_pool_grad.cpp b/mx_driving/perception/point/ops/kernels/op_host/vec_pool_grad.cpp similarity index 100% rename from ads/perception/point/ops/kernels/op_host/vec_pool_grad.cpp rename to mx_driving/perception/point/ops/kernels/op_host/vec_pool_grad.cpp diff --git a/ads/perception/point/ops/kernels/op_host/vec_pool_grad_tiling.h b/mx_driving/perception/point/ops/kernels/op_host/vec_pool_grad_tiling.h similarity index 100% rename from ads/perception/point/ops/kernels/op_host/vec_pool_grad_tiling.h rename to mx_driving/perception/point/ops/kernels/op_host/vec_pool_grad_tiling.h diff --git a/ads/perception/point/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/perception/point/ops/kernels/op_kernel/CMakeLists.txt similarity index 100% rename from ads/perception/point/ops/kernels/op_kernel/CMakeLists.txt rename to mx_driving/perception/point/ops/kernels/op_kernel/CMakeLists.txt diff --git a/ads/perception/point/ops/kernels/op_kernel/group_points_grad.cpp b/mx_driving/perception/point/ops/kernels/op_kernel/group_points_grad.cpp similarity index 100% rename from ads/perception/point/ops/kernels/op_kernel/group_points_grad.cpp rename to mx_driving/perception/point/ops/kernels/op_kernel/group_points_grad.cpp diff --git a/ads/perception/point/ops/kernels/op_kernel/point_to_voxel.cpp b/mx_driving/perception/point/ops/kernels/op_kernel/point_to_voxel.cpp similarity index 100% rename from ads/perception/point/ops/kernels/op_kernel/point_to_voxel.cpp rename to mx_driving/perception/point/ops/kernels/op_kernel/point_to_voxel.cpp diff --git a/ads/perception/point/ops/kernels/op_kernel/unique_voxel.cpp b/mx_driving/perception/point/ops/kernels/op_kernel/unique_voxel.cpp similarity index 100% rename from ads/perception/point/ops/kernels/op_kernel/unique_voxel.cpp rename to mx_driving/perception/point/ops/kernels/op_kernel/unique_voxel.cpp diff --git a/ads/perception/point/ops/kernels/op_kernel/vec_pool_grad.cpp b/mx_driving/perception/point/ops/kernels/op_kernel/vec_pool_grad.cpp similarity index 100% rename from ads/perception/point/ops/kernels/op_kernel/vec_pool_grad.cpp rename to mx_driving/perception/point/ops/kernels/op_kernel/vec_pool_grad.cpp diff --git a/ads/perception/point/ops/kernels/op_kernel/voxel_to_point.cpp b/mx_driving/perception/point/ops/kernels/op_kernel/voxel_to_point.cpp similarity index 100% rename from ads/perception/point/ops/kernels/op_kernel/voxel_to_point.cpp rename to mx_driving/perception/point/ops/kernels/op_kernel/voxel_to_point.cpp diff --git a/ads/perception/vision/__init__.py b/mx_driving/perception/vision/__init__.py similarity index 100% rename from ads/perception/vision/__init__.py rename to mx_driving/perception/vision/__init__.py diff --git a/ads/perception/vision/components/README.md b/mx_driving/perception/vision/components/README.md similarity index 100% rename from ads/perception/vision/components/README.md rename to mx_driving/perception/vision/components/README.md diff --git a/ads/perception/vision/ops/__init__.py b/mx_driving/perception/vision/ops/__init__.py similarity index 100% rename from ads/perception/vision/ops/__init__.py rename to mx_driving/perception/vision/ops/__init__.py diff --git a/ads/perception/vision/ops/boxes_overlap_bev.py b/mx_driving/perception/vision/ops/boxes_overlap_bev.py similarity index 100% rename from ads/perception/vision/ops/boxes_overlap_bev.py rename to mx_driving/perception/vision/ops/boxes_overlap_bev.py diff --git a/ads/perception/vision/ops/csrc/BoxesOverlapBev.cpp b/mx_driving/perception/vision/ops/csrc/BoxesOverlapBev.cpp similarity index 100% rename from ads/perception/vision/ops/csrc/BoxesOverlapBev.cpp rename to mx_driving/perception/vision/ops/csrc/BoxesOverlapBev.cpp diff --git a/ads/perception/vision/ops/csrc/README.md b/mx_driving/perception/vision/ops/csrc/README.md similarity index 100% rename from ads/perception/vision/ops/csrc/README.md rename to mx_driving/perception/vision/ops/csrc/README.md diff --git a/ads/perception/vision/ops/csrc/functions.h b/mx_driving/perception/vision/ops/csrc/functions.h similarity index 100% rename from ads/perception/vision/ops/csrc/functions.h rename to mx_driving/perception/vision/ops/csrc/functions.h diff --git a/ads/perception/vision/ops/csrc/pybind.cpp b/mx_driving/perception/vision/ops/csrc/pybind.cpp similarity index 100% rename from ads/perception/vision/ops/csrc/pybind.cpp rename to mx_driving/perception/vision/ops/csrc/pybind.cpp diff --git a/ads/perception/vision/ops/kernels/CMakeLists.txt b/mx_driving/perception/vision/ops/kernels/CMakeLists.txt similarity index 100% rename from ads/perception/vision/ops/kernels/CMakeLists.txt rename to mx_driving/perception/vision/ops/kernels/CMakeLists.txt diff --git a/ads/perception/vision/ops/kernels/README.md b/mx_driving/perception/vision/ops/kernels/README.md similarity index 100% rename from ads/perception/vision/ops/kernels/README.md rename to mx_driving/perception/vision/ops/kernels/README.md diff --git a/ads/perception/vision/ops/kernels/op_host/CMakeLists.txt b/mx_driving/perception/vision/ops/kernels/op_host/CMakeLists.txt similarity index 100% rename from ads/perception/vision/ops/kernels/op_host/CMakeLists.txt rename to mx_driving/perception/vision/ops/kernels/op_host/CMakeLists.txt diff --git a/ads/perception/vision/ops/kernels/op_host/boxes_overlap_bev.cpp b/mx_driving/perception/vision/ops/kernels/op_host/boxes_overlap_bev.cpp similarity index 100% rename from ads/perception/vision/ops/kernels/op_host/boxes_overlap_bev.cpp rename to mx_driving/perception/vision/ops/kernels/op_host/boxes_overlap_bev.cpp diff --git a/ads/perception/vision/ops/kernels/op_host/boxes_overlap_bev_tiling.h b/mx_driving/perception/vision/ops/kernels/op_host/boxes_overlap_bev_tiling.h similarity index 100% rename from ads/perception/vision/ops/kernels/op_host/boxes_overlap_bev_tiling.h rename to mx_driving/perception/vision/ops/kernels/op_host/boxes_overlap_bev_tiling.h diff --git a/ads/perception/vision/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/perception/vision/ops/kernels/op_kernel/CMakeLists.txt similarity index 100% rename from ads/perception/vision/ops/kernels/op_kernel/CMakeLists.txt rename to mx_driving/perception/vision/ops/kernels/op_kernel/CMakeLists.txt diff --git a/ads/perception/vision/ops/kernels/op_kernel/boxes_overlap_bev.cpp b/mx_driving/perception/vision/ops/kernels/op_kernel/boxes_overlap_bev.cpp similarity index 100% rename from ads/perception/vision/ops/kernels/op_kernel/boxes_overlap_bev.cpp rename to mx_driving/perception/vision/ops/kernels/op_kernel/boxes_overlap_bev.cpp diff --git a/ads/spconv/CMakeLists.txt b/mx_driving/spconv/CMakeLists.txt similarity index 100% rename from ads/spconv/CMakeLists.txt rename to mx_driving/spconv/CMakeLists.txt diff --git a/ads/spconv/__init__.py b/mx_driving/spconv/__init__.py similarity index 100% rename from ads/spconv/__init__.py rename to mx_driving/spconv/__init__.py diff --git a/ads/spconv/ops/__init__.py b/mx_driving/spconv/ops/__init__.py similarity index 100% rename from ads/spconv/ops/__init__.py rename to mx_driving/spconv/ops/__init__.py diff --git a/ads/spconv/ops/csrc/MultiToSparse.cpp b/mx_driving/spconv/ops/csrc/MultiToSparse.cpp similarity index 100% rename from ads/spconv/ops/csrc/MultiToSparse.cpp rename to mx_driving/spconv/ops/csrc/MultiToSparse.cpp diff --git a/ads/spconv/ops/csrc/README.md b/mx_driving/spconv/ops/csrc/README.md similarity index 100% rename from ads/spconv/ops/csrc/README.md rename to mx_driving/spconv/ops/csrc/README.md diff --git a/ads/spconv/ops/csrc/SparseConv3d.cpp b/mx_driving/spconv/ops/csrc/SparseConv3d.cpp similarity index 100% rename from ads/spconv/ops/csrc/SparseConv3d.cpp rename to mx_driving/spconv/ops/csrc/SparseConv3d.cpp diff --git a/ads/spconv/ops/csrc/SparseConv3dGrad.cpp b/mx_driving/spconv/ops/csrc/SparseConv3dGrad.cpp similarity index 100% rename from ads/spconv/ops/csrc/SparseConv3dGrad.cpp rename to mx_driving/spconv/ops/csrc/SparseConv3dGrad.cpp diff --git a/ads/spconv/ops/csrc/SubmSparseCov3d.cpp b/mx_driving/spconv/ops/csrc/SubmSparseCov3d.cpp similarity index 100% rename from ads/spconv/ops/csrc/SubmSparseCov3d.cpp rename to mx_driving/spconv/ops/csrc/SubmSparseCov3d.cpp diff --git a/ads/spconv/ops/csrc/functions.h b/mx_driving/spconv/ops/csrc/functions.h similarity index 100% rename from ads/spconv/ops/csrc/functions.h rename to mx_driving/spconv/ops/csrc/functions.h diff --git a/ads/spconv/ops/csrc/pybind.cpp b/mx_driving/spconv/ops/csrc/pybind.cpp similarity index 100% rename from ads/spconv/ops/csrc/pybind.cpp rename to mx_driving/spconv/ops/csrc/pybind.cpp diff --git a/ads/spconv/ops/kernels/CMakeLists.txt b/mx_driving/spconv/ops/kernels/CMakeLists.txt similarity index 100% rename from ads/spconv/ops/kernels/CMakeLists.txt rename to mx_driving/spconv/ops/kernels/CMakeLists.txt diff --git a/ads/spconv/ops/kernels/README.md b/mx_driving/spconv/ops/kernels/README.md similarity index 100% rename from ads/spconv/ops/kernels/README.md rename to mx_driving/spconv/ops/kernels/README.md diff --git a/ads/spconv/ops/kernels/op_host/CMakeLists.txt b/mx_driving/spconv/ops/kernels/op_host/CMakeLists.txt similarity index 100% rename from ads/spconv/ops/kernels/op_host/CMakeLists.txt rename to mx_driving/spconv/ops/kernels/op_host/CMakeLists.txt diff --git a/ads/spconv/ops/kernels/op_host/sparse_conv3d.cpp b/mx_driving/spconv/ops/kernels/op_host/sparse_conv3d.cpp similarity index 100% rename from ads/spconv/ops/kernels/op_host/sparse_conv3d.cpp rename to mx_driving/spconv/ops/kernels/op_host/sparse_conv3d.cpp diff --git a/ads/spconv/ops/kernels/op_host/sparse_conv3d_grad.cpp b/mx_driving/spconv/ops/kernels/op_host/sparse_conv3d_grad.cpp similarity index 100% rename from ads/spconv/ops/kernels/op_host/sparse_conv3d_grad.cpp rename to mx_driving/spconv/ops/kernels/op_host/sparse_conv3d_grad.cpp diff --git a/ads/spconv/ops/kernels/op_host/sparse_conv3d_grad_tiling.h b/mx_driving/spconv/ops/kernels/op_host/sparse_conv3d_grad_tiling.h similarity index 100% rename from ads/spconv/ops/kernels/op_host/sparse_conv3d_grad_tiling.h rename to mx_driving/spconv/ops/kernels/op_host/sparse_conv3d_grad_tiling.h diff --git a/ads/spconv/ops/kernels/op_host/sparse_conv3d_tiling.h b/mx_driving/spconv/ops/kernels/op_host/sparse_conv3d_tiling.h similarity index 100% rename from ads/spconv/ops/kernels/op_host/sparse_conv3d_tiling.h rename to mx_driving/spconv/ops/kernels/op_host/sparse_conv3d_tiling.h diff --git a/ads/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.cpp b/mx_driving/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.cpp similarity index 100% rename from ads/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.cpp rename to mx_driving/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.cpp diff --git a/ads/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.h b/mx_driving/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.h similarity index 100% rename from ads/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.h rename to mx_driving/spconv/ops/kernels/op_host/subm_sparse_conv3d_tiling.h diff --git a/ads/spconv/ops/kernels/op_host/to_sparse.cpp b/mx_driving/spconv/ops/kernels/op_host/to_sparse.cpp similarity index 100% rename from ads/spconv/ops/kernels/op_host/to_sparse.cpp rename to mx_driving/spconv/ops/kernels/op_host/to_sparse.cpp diff --git a/ads/spconv/ops/kernels/op_host/to_sparse_tiling.h b/mx_driving/spconv/ops/kernels/op_host/to_sparse_tiling.h similarity index 100% rename from ads/spconv/ops/kernels/op_host/to_sparse_tiling.h rename to mx_driving/spconv/ops/kernels/op_host/to_sparse_tiling.h diff --git a/ads/spconv/ops/kernels/op_kernel/CMakeLists.txt b/mx_driving/spconv/ops/kernels/op_kernel/CMakeLists.txt similarity index 100% rename from ads/spconv/ops/kernels/op_kernel/CMakeLists.txt rename to mx_driving/spconv/ops/kernels/op_kernel/CMakeLists.txt diff --git a/ads/spconv/ops/kernels/op_kernel/sparse_conv3d.cpp b/mx_driving/spconv/ops/kernels/op_kernel/sparse_conv3d.cpp similarity index 100% rename from ads/spconv/ops/kernels/op_kernel/sparse_conv3d.cpp rename to mx_driving/spconv/ops/kernels/op_kernel/sparse_conv3d.cpp diff --git a/ads/spconv/ops/kernels/op_kernel/sparse_conv3d_grad.cpp b/mx_driving/spconv/ops/kernels/op_kernel/sparse_conv3d_grad.cpp similarity index 100% rename from ads/spconv/ops/kernels/op_kernel/sparse_conv3d_grad.cpp rename to mx_driving/spconv/ops/kernels/op_kernel/sparse_conv3d_grad.cpp diff --git a/ads/spconv/ops/kernels/op_kernel/subm_sparse_conv3d.cpp b/mx_driving/spconv/ops/kernels/op_kernel/subm_sparse_conv3d.cpp similarity index 100% rename from ads/spconv/ops/kernels/op_kernel/subm_sparse_conv3d.cpp rename to mx_driving/spconv/ops/kernels/op_kernel/subm_sparse_conv3d.cpp diff --git a/ads/spconv/ops/kernels/op_kernel/to_sparse.cpp b/mx_driving/spconv/ops/kernels/op_kernel/to_sparse.cpp similarity index 100% rename from ads/spconv/ops/kernels/op_kernel/to_sparse.cpp rename to mx_driving/spconv/ops/kernels/op_kernel/to_sparse.cpp diff --git a/ads/spconv/ops/sparse_conv.py b/mx_driving/spconv/ops/sparse_conv.py similarity index 100% rename from ads/spconv/ops/sparse_conv.py rename to mx_driving/spconv/ops/sparse_conv.py diff --git a/ads/spconv/ops/sparse_functional.py b/mx_driving/spconv/ops/sparse_functional.py similarity index 100% rename from ads/spconv/ops/sparse_functional.py rename to mx_driving/spconv/ops/sparse_functional.py diff --git a/ads/spconv/ops/sparse_modules.py b/mx_driving/spconv/ops/sparse_modules.py similarity index 99% rename from ads/spconv/ops/sparse_modules.py rename to mx_driving/spconv/ops/sparse_modules.py index e173e2d..e80be98 100644 --- a/ads/spconv/ops/sparse_modules.py +++ b/mx_driving/spconv/ops/sparse_modules.py @@ -65,7 +65,7 @@ class SparseSequential(SparseModule): Example: >>> # using Sequential: - >>> from ads.spconv import SparseSequential + >>> from mx_driving.spconv import SparseSequential >>> model = SparseSequential( SparseConv2d(1,20,5), nn.ReLU(), diff --git a/ads/spconv/ops/sparse_ops.py b/mx_driving/spconv/ops/sparse_ops.py similarity index 100% rename from ads/spconv/ops/sparse_ops.py rename to mx_driving/spconv/ops/sparse_ops.py diff --git a/ads/spconv/ops/sparse_structure.py b/mx_driving/spconv/ops/sparse_structure.py similarity index 100% rename from ads/spconv/ops/sparse_structure.py rename to mx_driving/spconv/ops/sparse_structure.py diff --git a/setup.py b/setup.py index c2c2187..a317752 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ full_components = ["common", "motion", "perception/fused", "perception/point", " source_file = glob.glob(os.path.join("./bind/", "*.cpp")) include_dirs = [os.path.join(BASE_DIR, "include")] for part in full_components: - source_file += glob.glob(os.path.join(f"./ads/{part}/ops/csrc/", "*.cpp")) + source_file += glob.glob(os.path.join(f"./mx_driving/{part}/ops/csrc/", "*.cpp")) exts = [] ext1 = extension.NpuExtension( @@ -56,10 +56,10 @@ if not os.getenv("BUILD_WITHOUT_SHA"): VERSION += "+git" + sha[:7] setup( - name="ads_accelerator", + name="mx_driving", version=VERSION, description="A Library of acceleration for autonomous driving systems on Ascend-NPU.", - keywords="ads", + keywords="mx_driving", ext_modules=exts, author="Ascend Contributors", cmdclass={"build_ext": BuildExtension}, diff --git a/tests/onnx/test_wrapper_onnx_ops.py b/tests/onnx/test_wrapper_onnx_ops.py index adf5282..d5132f4 100644 --- a/tests/onnx/test_wrapper_onnx_ops.py +++ b/tests/onnx/test_wrapper_onnx_ops.py @@ -7,7 +7,7 @@ import torch_npu.onnx from torch_npu.utils.path_manager import PathManager from torch_npu.testing.testcase import run_tests -import ads.common.ops.onnx as onnx_ads +import mx_driving.common.ops.onnx as onnx_ads DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] diff --git a/tests/torch/test_bev_pool.py b/tests/torch/test_bev_pool.py index fdad3c5..f5e0676 100644 --- a/tests/torch/test_bev_pool.py +++ b/tests/torch/test_bev_pool.py @@ -3,7 +3,7 @@ import torch import numpy as np import torch_npu from torch_npu.testing.testcase import TestCase, run_tests -from ads.perception.fused import bev_pool +from mx_driving.perception.fused import bev_pool DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] diff --git a/tests/torch/test_bev_pool_v2.py b/tests/torch/test_bev_pool_v2.py index af38938..f007dee 100644 --- a/tests/torch/test_bev_pool_v2.py +++ b/tests/torch/test_bev_pool_v2.py @@ -6,7 +6,7 @@ import torch_npu from ads_c import npu_bev_pool_v2_backward from torch_npu.testing.testcase import TestCase, run_tests -from ads.perception.fused import bev_pool_v2 +from mx_driving.perception.fused import bev_pool_v2 DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] diff --git a/tests/torch/test_boxes_overlap_bev.py b/tests/torch/test_boxes_overlap_bev.py index 62fa15c..6ec0229 100644 --- a/tests/torch/test_boxes_overlap_bev.py +++ b/tests/torch/test_boxes_overlap_bev.py @@ -6,7 +6,7 @@ import numpy as np import torch import torch_npu from torch_npu.testing.testcase import TestCase, run_tests -import ads.perception.vision +import mx_driving.perception.vision DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -282,7 +282,7 @@ class TestBoxesOverlapBev(TestCase): def npu_to_exec(self, npu_inputs): npu_boxes_a = npu_inputs.boxes_a npu_boxes_b = npu_inputs.boxes_b - npu_ans_overlap = ads.perception.vision.boxes_overlap_bev(npu_boxes_a, npu_boxes_b) + npu_ans_overlap = mx_driving.perception.vision.boxes_overlap_bev(npu_boxes_a, npu_boxes_b) return npu_ans_overlap.cpu().float().numpy() def check_precision(self, actual, expected, rtol=1e-4, atol=1e-4, msg=None): diff --git a/tests/torch/test_furthest_point_sample_with_dist.py b/tests/torch/test_furthest_point_sample_with_dist.py index be2fb2d..33f7c91 100644 --- a/tests/torch/test_furthest_point_sample_with_dist.py +++ b/tests/torch/test_furthest_point_sample_with_dist.py @@ -18,7 +18,7 @@ import numpy as np import torch_npu from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -65,7 +65,7 @@ class TestFurthestPointSampleWithDist(TestCase): def custom_op_exec(self, point_dist, point_num, input_dtype): point_dist_npu = torch.tensor(point_dist, dtype=input_dtype).npu() - output = ads.common.furthest_point_sample_with_dist(point_dist_npu, point_num) + output = mx_driving.common.furthest_point_sample_with_dist(point_dist_npu, point_num) return output.cpu().numpy() @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `FurthestPointSampleWithDist` is only supported on 910B, skip this ut!") diff --git a/tests/torch/test_furthest_point_sampling.py b/tests/torch/test_furthest_point_sampling.py index 2ea02ec..51632f4 100644 --- a/tests/torch/test_furthest_point_sampling.py +++ b/tests/torch/test_furthest_point_sampling.py @@ -20,7 +20,7 @@ import torch import torch_npu from torch_npu.testing.testcase import TestCase, run_tests from torch_npu.testing.common_utils import create_common_tensor -import ads.common +import mx_driving.common DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -108,7 +108,7 @@ class TestFurthestPointSample(TestCase): return myTest.getCpuRes() def npu_op_exec(self, myTest): - return ads.common.npu_furthest_point_sampling(myTest.point.clone().permute(0, 2, 1).npu(), myTest.numPoints) + return mx_driving.common.npu_furthest_point_sampling(myTest.point.clone().permute(0, 2, 1).npu(), myTest.numPoints) def compare_res(self, myTest): myTest.createData() diff --git a/tests/torch/test_knn.py b/tests/torch/test_knn.py index 673a1c5..113ffd6 100644 --- a/tests/torch/test_knn.py +++ b/tests/torch/test_knn.py @@ -1,7 +1,7 @@ import torch import numpy as np from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common class TestKnn(TestCase): @@ -45,7 +45,7 @@ class TestKnn(TestCase): center_xyz = np.zeros((b, m, 3)).astype(np.float32) expected_idx, _ = self.cpu_op_exec([b, m, n, k, False], xyz, center_xyz) - idx = ads.common.knn(k, torch.from_numpy(xyz).npu(), torch.from_numpy(center_xyz).npu(), False) + idx = mx_driving.common.knn(k, torch.from_numpy(xyz).npu(), torch.from_numpy(center_xyz).npu(), False) self.assertRtolEqual(expected_idx, idx.cpu().numpy()) def test_knn_1(self): @@ -57,7 +57,7 @@ class TestKnn(TestCase): center_xyz = np.zeros((b, m, 3)).astype(np.float32) expected_idx, _ = self.cpu_op_exec([b, m, n, k, False], xyz, center_xyz) - idx = ads.common.knn(k, torch.from_numpy(xyz).npu(), torch.from_numpy(center_xyz).npu(), False) + idx = mx_driving.common.knn(k, torch.from_numpy(xyz).npu(), torch.from_numpy(center_xyz).npu(), False) self.assertRtolEqual(expected_idx, idx.cpu().numpy()) if __name__ == "__main__": diff --git a/tests/torch/test_multi_scale_deformable_attn_function.py b/tests/torch/test_multi_scale_deformable_attn_function.py index d10cd82..09e4e30 100644 --- a/tests/torch/test_multi_scale_deformable_attn_function.py +++ b/tests/torch/test_multi_scale_deformable_attn_function.py @@ -3,7 +3,7 @@ from collections import namedtuple import torch import torch_npu from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -130,7 +130,7 @@ class TestMultiScaleDeformableAttnFunction(TestCase): npu_sampling_locations = npu_inputs.sampling_locations npu_attention_weights = npu_inputs.attention_weights npu_grad_output = npu_inputs.grad_output - npu_output = ads.common.npu_multi_scale_deformable_attn_function(npu_value, npu_shapes, npu_offset, npu_sampling_locations, npu_attention_weights) + npu_output = mx_driving.common.npu_multi_scale_deformable_attn_function(npu_value, npu_shapes, npu_offset, npu_sampling_locations, npu_attention_weights) npu_output.backward(npu_grad_output) return ExecResults( output=npu_output.detach().cpu().numpy(), diff --git a/tests/torch/test_npu_dyn_voxelization.py b/tests/torch/test_npu_dyn_voxelization.py index bc49879..62af92e 100644 --- a/tests/torch/test_npu_dyn_voxelization.py +++ b/tests/torch/test_npu_dyn_voxelization.py @@ -5,7 +5,7 @@ import torch import torch_npu from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -52,7 +52,7 @@ class TestDynVoxelization(TestCase): def npu_to_exec(self, points, coors_range, voxel_size): max_num_points = -1 - dynamic_voxelization_npu = ads.common.Voxelization(voxel_size, coors_range, max_num_points) + dynamic_voxelization_npu = mx_driving.common.Voxelization(voxel_size, coors_range, max_num_points) coors = dynamic_voxelization_npu.forward(points) return coors diff --git a/tests/torch/test_npu_dynamic_scatter.py b/tests/torch/test_npu_dynamic_scatter.py index f72062d..0d07166 100644 --- a/tests/torch/test_npu_dynamic_scatter.py +++ b/tests/torch/test_npu_dynamic_scatter.py @@ -7,7 +7,7 @@ from torch_npu.testing.testcase import TestCase, run_tests from torch_npu.testing.common_utils import create_common_tensor import ads_c -import ads.common +import mx_driving.common DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -70,11 +70,11 @@ class TestDynamicScatter(TestCase): grad_point_feats[point_idx, :] = torch.where(mask_bit, grad_voxel_feats[voxel_idx, :], zero_tensor) def npu_op_exec(self, feats, coors, reduce_type): - output_feats, output_coors = ads.common.npu_dynamic_scatter(feats, coors, reduce_type) + output_feats, output_coors = mx_driving.common.npu_dynamic_scatter(feats, coors, reduce_type) return output_feats.cpu().numpy(), output_coors.cpu().numpy() def grad_npu_op_exec(self, feats, coors, reduce_type): - output_feats, output_coors = ads.common.npu_dynamic_scatter(feats, coors, reduce_type) + output_feats, output_coors = mx_driving.common.npu_dynamic_scatter(feats, coors, reduce_type) return output_feats.cpu().numpy(), output_coors.cpu().numpy() @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `DynamicScatter` is only supported on 910B, skip this ut!") diff --git a/tests/torch/test_npu_nms3d.py b/tests/torch/test_npu_nms3d.py index bdc41fc..9f2dcfe 100644 --- a/tests/torch/test_npu_nms3d.py +++ b/tests/torch/test_npu_nms3d.py @@ -8,7 +8,7 @@ import torch_npu from torch_npu.testing.common_utils import create_common_tensor from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common torch.npu.config.allow_internal_format = False torch_npu.npu.set_compile_mode(jit_compile=False) @@ -246,7 +246,7 @@ class TestNms3d(TestCase): return keep, num_out def npu_to_exec(self, boxes, scores, threshold=0.0): - keep = ads.common.npu_nms3d(boxes, scores, threshold) + keep = mx_driving.common.npu_nms3d(boxes, scores, threshold) return keep.cpu() @unittest.skipIf(DEVICE_NAME != True, "OP `Nms3d` is only supported on 910B, skip this ut!") diff --git a/tests/torch/test_npu_nms3d_normal.py b/tests/torch/test_npu_nms3d_normal.py index f80c155..8525ba0 100644 --- a/tests/torch/test_npu_nms3d_normal.py +++ b/tests/torch/test_npu_nms3d_normal.py @@ -5,7 +5,7 @@ import numpy as np import torch_npu from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -24,7 +24,7 @@ class TestNms3dNormal(TestCase): np_inds = np.array([1, 0, 3]) boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) - inds = ads.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3) + inds = mx_driving.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3) self.assertRtolEqual(inds.cpu().numpy(), np_inds) @@ -34,7 +34,7 @@ class TestNms3dNormal(TestCase): np_scores = np.random.rand(555).astype(np.float32) boxes = torch.from_numpy(np_boxes) scores = torch.from_numpy(np_scores) - inds = ads.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3) + inds = mx_driving.common.npu_nms3d_normal(boxes.npu(), scores.npu(), 0.3) self.assertRtolEqual(len(inds.cpu().numpy()), 148) diff --git a/tests/torch/test_npu_scatter_mean_grad.py b/tests/torch/test_npu_scatter_mean_grad.py index f521abe..baec6d2 100644 --- a/tests/torch/test_npu_scatter_mean_grad.py +++ b/tests/torch/test_npu_scatter_mean_grad.py @@ -5,7 +5,7 @@ from torch_scatter import scatter_mean import torch_npu from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -29,7 +29,7 @@ class TestScatterMeanGradFunction(TestCase): return result_cpu.numpy(), grad_out_tensor def npu_to_exec(self, index_tensor, grad_out_tensor, dim): - result_npu = ads.common.npu_scatter_mean_grad(grad_out_tensor.npu(), + result_npu = mx_driving.common.npu_scatter_mean_grad(grad_out_tensor.npu(), index_tensor.to(torch.int32).npu(), dim) return result_npu.cpu().numpy() diff --git a/tests/torch/test_points_in_box.py b/tests/torch/test_points_in_box.py index 4d6083c..195116f 100644 --- a/tests/torch/test_points_in_box.py +++ b/tests/torch/test_points_in_box.py @@ -17,7 +17,7 @@ import numpy as np import torch_npu from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -75,7 +75,7 @@ class TestPointsInBox(TestCase): [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0], [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]], dtype=torch.float32).npu() - point_indices = ads.common.npu_points_in_box(boxes, pts).cpu().numpy() + point_indices = mx_driving.common.npu_points_in_box(boxes, pts).cpu().numpy() expected_point_indices = torch.tensor([[-1, -1, 0, -1, 0, -1, -1, -1]], dtype=torch.int32).cpu().numpy() self.assertRtolEqual(point_indices, expected_point_indices) @@ -96,7 +96,7 @@ class TestPointsInBox(TestCase): points[b].float(), point_indices[b]) - point_indices_npu = ads.common.npu_points_in_box(boxes.npu(), points.npu()) + point_indices_npu = mx_driving.common.npu_points_in_box(boxes.npu(), points.npu()) self.assertRtolEqual(point_indices.numpy(), point_indices_npu.cpu().numpy()) @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBox` is only supported on 910B, skip this ut!") @@ -116,7 +116,7 @@ class TestPointsInBox(TestCase): point_indices[b]) with self.assertRaisesRegex(RuntimeError, "boxes is larger than 200"): - point_indices_npu = ads.common.npu_points_in_box(boxes.npu(), points.npu()) + point_indices_npu = mx_driving.common.npu_points_in_box(boxes.npu(), points.npu()) @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBox` is only supported on 910B, skip this ut!") def test_points_in_box_shape_large_points(self, device="npu"): @@ -134,7 +134,7 @@ class TestPointsInBox(TestCase): points[b].float(), point_indices[b]) - point_indices_npu = ads.common.npu_points_in_box(boxes.npu(), points.npu()) + point_indices_npu = mx_driving.common.npu_points_in_box(boxes.npu(), points.npu()) self.assertRtolEqual(point_indices.numpy(), point_indices_npu.cpu().numpy()) @unittest.skipIf(DEVICE_NAME != 'Ascend910B', "OP `PointsInBox` is only supported on 910B, skip this ut!") @@ -154,7 +154,7 @@ class TestPointsInBox(TestCase): point_indices[b]) with self.assertRaisesRegex(RuntimeError, "points_in_box npu only support batch size = 1"): - point_indices_npu = ads.common.npu_points_in_box(boxes.npu(), points.npu()) + point_indices_npu = mx_driving.common.npu_points_in_box(boxes.npu(), points.npu()) if __name__ == "__main__": diff --git a/tests/torch/test_roipoint_pool3d.py b/tests/torch/test_roipoint_pool3d.py index 4d56c05..fbb3897 100644 --- a/tests/torch/test_roipoint_pool3d.py +++ b/tests/torch/test_roipoint_pool3d.py @@ -17,7 +17,7 @@ import torch import torch_npu import numpy as np from torch_npu.testing.testcase import TestCase, run_tests -from ads.common import RoIPointPool3d +from mx_driving.common import RoIPointPool3d DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] diff --git a/tests/torch/test_rotated_iou.py b/tests/torch/test_rotated_iou.py index 94b50b6..e6eb08c 100644 --- a/tests/torch/test_rotated_iou.py +++ b/tests/torch/test_rotated_iou.py @@ -3,7 +3,7 @@ import numpy as np import torch_npu from torch_npu.testing.testcase import TestCase, run_tests from torch_npu.testing.common_utils import create_common_tensor -import ads.common +import mx_driving.common class TestRotatedIou(TestCase): @@ -47,7 +47,7 @@ class TestRotatedIou(TestCase): return output def npu_op_exec(self, box1, box2, trans=False): - output = ads.common.npu_rotated_iou(box1, box2, trans, 0, True, 0.0, 0.0) + output = mx_driving.common.npu_rotated_iou(box1, box2, trans, 0, True, 0.0, 0.0) output = output.detach().cpu().numpy() return output diff --git a/tests/torch/test_scatter_max.py b/tests/torch/test_scatter_max.py index cbe351a..b1967c7 100644 --- a/tests/torch/test_scatter_max.py +++ b/tests/torch/test_scatter_max.py @@ -5,7 +5,7 @@ import torch_scatter import torch_npu from torch_npu.testing.testcase import TestCase, run_tests from torch_npu.testing.common_utils import create_common_tensor -import ads.common +import mx_driving.common class TestScatterMaxWithArgmax(TestCase): @@ -26,7 +26,7 @@ class TestScatterMaxWithArgmax(TestCase): def npu_op_exec(self, updates, indices): updates.requires_grad = True - output, output_argmax = ads.common.scatter_max(updates, indices) + output, output_argmax = mx_driving.common.scatter_max(updates, indices) output.backward(torch.ones_like(output)) output_grad = updates.grad.cpu() diff --git a/tests/torch/test_sparse_conv3d.py b/tests/torch/test_sparse_conv3d.py index ff55e6f..77181dd 100644 --- a/tests/torch/test_sparse_conv3d.py +++ b/tests/torch/test_sparse_conv3d.py @@ -13,7 +13,7 @@ import numpy as np import torch import torch_npu from torch import nn -from ads.spconv import SparseSequential, SparseConvTensor, SparseConv3d +from mx_driving.spconv import SparseSequential, SparseConvTensor, SparseConv3d def generate_sparse_data(shape, diff --git a/tests/torch/test_subm_sparse_conv3d.py b/tests/torch/test_subm_sparse_conv3d.py index 0fdd3fe..6c2e041 100644 --- a/tests/torch/test_subm_sparse_conv3d.py +++ b/tests/torch/test_subm_sparse_conv3d.py @@ -13,7 +13,7 @@ import numpy as np import torch import torch_npu from torch import nn -from ads.spconv import SparseSequential, SparseConvTensor, SubMConv3d +from mx_driving.spconv import SparseSequential, SparseConvTensor, SubMConv3d def generate_sparse_data(shape, diff --git a/tests/torch/test_three_interpolate.py b/tests/torch/test_three_interpolate.py index b1f38de..65a4c4a 100644 --- a/tests/torch/test_three_interpolate.py +++ b/tests/torch/test_three_interpolate.py @@ -1,6 +1,6 @@ import torch from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common class TestThreeinterpolate(TestCase): @@ -51,7 +51,7 @@ class TestThreeinterpolate(TestCase): [3.8760e-01, 1.0300e-02, 8.3569e-09, 3.8760e-01, 3.8760e-01, 1.9723e-01]]], ).npu() - output = ads.common.three_interpolate(features, idx, weight) + output = mx_driving.common.three_interpolate(features, idx, weight) self.assertRtolEqual(expected_output, output) diff --git a/tests/torch/test_three_nn.py b/tests/torch/test_three_nn.py index 2b849b5..f9a5561 100644 --- a/tests/torch/test_three_nn.py +++ b/tests/torch/test_three_nn.py @@ -1,7 +1,7 @@ import torch import numpy as np from torch_npu.testing.testcase import TestCase, run_tests -import ads.common +import mx_driving.common class TestThreeNN(TestCase): @@ -38,7 +38,7 @@ class TestThreeNN(TestCase): target = np.zeros((batch, npoint, 3)).astype(np.float32) expected_dist, expected_idx = self.cpu_op_exec(batch, npoint, source, target) - dist, idx = ads.common.three_nn(torch.from_numpy(target).npu(), torch.from_numpy(source).npu()) + dist, idx = mx_driving.common.three_nn(torch.from_numpy(target).npu(), torch.from_numpy(source).npu()) self.assertRtolEqual(expected_dist, dist.cpu().numpy()) self.assertRtolEqual(expected_idx, idx.cpu().numpy()) @@ -244,7 +244,7 @@ class TestThreeNN(TestCase): N = 19 npoint = 7 expected_dist, expected_idx = self.cpu_op_exec(batch, npoint, source, target) - dist, idx = ads.common.three_nn(torch.from_numpy(target).npu(), torch.from_numpy(source).npu()) + dist, idx = mx_driving.common.three_nn(torch.from_numpy(target).npu(), torch.from_numpy(source).npu()) self.assertRtolEqual(expected_dist, dist.cpu().numpy()) self.assertRtolEqual(expected_idx, idx.cpu().numpy()) @@ -258,7 +258,7 @@ class TestThreeNN(TestCase): target = np.zeros((batch, npoint, 3)).astype(np.float32) expected_dist, expected_idx = self.cpu_op_exec(batch, npoint, source, target) - dist, idx = ads.common.three_nn(torch.from_numpy(target).npu(), torch.from_numpy(source).npu()) + dist, idx = mx_driving.common.three_nn(torch.from_numpy(target).npu(), torch.from_numpy(source).npu()) self.assertRtolEqual(expected_dist, dist.cpu().numpy()) self.assertRtolEqual(expected_idx, idx.cpu().numpy()) @@ -272,7 +272,7 @@ class TestThreeNN(TestCase): target = np.zeros((batch, npoint, 3)).astype(np.float32) expected_dist, expected_idx = self.cpu_op_exec(batch, npoint, source, target) - dist, idx = ads.common.three_nn(torch.from_numpy(target).npu(), torch.from_numpy(source).npu()) + dist, idx = mx_driving.common.three_nn(torch.from_numpy(target).npu(), torch.from_numpy(source).npu()) self.assertRtolEqual(expected_dist, dist.cpu().numpy()) self.assertRtolEqual(expected_idx, idx.cpu().numpy()) diff --git a/tests/torch/test_voxel_pooling_train.py b/tests/torch/test_voxel_pooling_train.py index b2e1112..02e27c0 100644 --- a/tests/torch/test_voxel_pooling_train.py +++ b/tests/torch/test_voxel_pooling_train.py @@ -6,7 +6,7 @@ import torch_npu from torch_npu.testing.testcase import TestCase, run_tests import ads_c import numpy as np -import ads.common +import mx_driving.common DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10] @@ -71,7 +71,7 @@ class TestVoxelPoolingTrain(TestCase): return pos, result, grad_features_cpu def npu_to_exec(self, geom_xyz, input_features, voxel_num): - result = ads.common.npu_voxel_pooling_train( + result = mx_driving.common.npu_voxel_pooling_train( geom_xyz, input_features, voxel_num) result.backward(result) -- Gitee