diff --git a/mmcv/ops/csrc/pytorch/npu/border_align_npu.cpp b/mmcv/ops/csrc/pytorch/npu/border_align_npu.cpp index a797502f69..e5c2a4195e 100644 --- a/mmcv/ops/csrc/pytorch/npu/border_align_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/border_align_npu.cpp @@ -10,32 +10,32 @@ void border_align_forward_impl(const Tensor &input, const Tensor &boxes, void border_align_forward_npu(const Tensor &input, const Tensor &boxes, Tensor output, Tensor argmax_idx, const int pool_size) { - TORCH_CHECK(input.size(0) == boxes.size(0), - "The batch sizes of feature map and rois must be the same."); - TORCH_CHECK(input.size(1) % 4 == 0, - "The number of channels must be divisible by 4."); - TORCH_CHECK(pool_size >= 2, "The pool size should be larger than 2."); - int32_t batch_size = input.size(0); - int32_t channels = input.size(1); - int32_t height = input.size(2); - int32_t width = input.size(3); - at::Tensor feature_map = input.permute({0, 2, 3, 1}).contiguous(); - at::Tensor rois_map = boxes.contiguous(); - at::Tensor temp_tensor = at::zeros( - {batch_size, height * width, pool_size + 1, channels}, input.options()); - EXEC_NPU_CMD(aclnnBorderAlign, feature_map, rois_map, pool_size, temp_tensor); - auto max_result = temp_tensor.max(-2); - at::Tensor output_ = std::get<0>(max_result).to(at::kFloat); - output_ = output_.reshape({batch_size, height * width, 4, channels / 4}) - .permute({0, 3, 1, 2}) - .contiguous(); - output.copy_(output_); - at::Tensor argmax_idx_ = std::get<1>(max_result).to(at::kInt); - argmax_idx_ = - argmax_idx_.reshape({batch_size, height * width, 4, channels / 4}) - .permute({0, 3, 1, 2}) - .contiguous(); - argmax_idx.copy_(argmax_idx_); + TORCH_CHECK(input.size(0) == boxes.size(0), + "The batch sizes of feature map and rois must be the same."); + TORCH_CHECK(input.size(1) % 4 == 0, + "The number of channels must be divisible by 4."); + TORCH_CHECK(pool_size >= 2, "The pool size should be larger than 2."); + int32_t batch_size = input.size(0); + int32_t channels = input.size(1); + int32_t height = input.size(2); + int32_t width = input.size(3); + at::Tensor feature_map = input.permute({0, 2, 3, 1}).contiguous(); + at::Tensor rois_map = boxes.contiguous(); + at::Tensor temp_tensor = at::zeros( + {batch_size, height * width, pool_size + 1, channels}, input.options()); + EXEC_NPU_CMD(aclnnBorderAlign, feature_map, rois_map, pool_size, temp_tensor); + auto max_result = temp_tensor.max(-2); + at::Tensor output_ = std::get<0>(max_result).to(at::kFloat); + output_ = output_.reshape({batch_size, height * width, 4, channels / 4}) + .permute({0, 3, 1, 2}) + .contiguous(); + output.copy_(output_); + at::Tensor argmax_idx_ = std::get<1>(max_result).to(at::kInt); + argmax_idx_ = + argmax_idx_.reshape({batch_size, height * width, 4, channels / 4}) + .permute({0, 3, 1, 2}) + .contiguous(); + argmax_idx.copy_(argmax_idx_); } REGISTER_NPU_IMPL(border_align_forward_impl, border_align_forward_npu); @@ -47,18 +47,18 @@ void border_align_backward_impl(const Tensor &grad_output, const Tensor &boxes, void border_align_backward_npu(const Tensor &grad_output, const Tensor &boxes, const Tensor &argmax_idx, Tensor grad_input, const int pool_size) { - TORCH_CHECK(grad_output.dim() == 4, - "grad_out.dim() must be 4, but got: ", grad_output.dim()); - TORCH_CHECK(boxes.dim() == 3, "idx.dim() must be 3, but got: ", boxes.dim()); - TORCH_CHECK(argmax_idx.dim() == 4, - "argmax_idx.dim() must be 4, but got: ", argmax_idx.dim()); + TORCH_CHECK(grad_output.dim() == 4, + "grad_out.dim() must be 4, but got: ", grad_output.dim()); + TORCH_CHECK(boxes.dim() == 3, "idx.dim() must be 3, but got: ", boxes.dim()); + TORCH_CHECK(argmax_idx.dim() == 4, + "argmax_idx.dim() must be 4, but got: ", argmax_idx.dim()); - int32_t batch_size = grad_output.size(0); - int32_t feat_channels = grad_output.size(1) * 4; - int32_t channels = grad_output.size(1); - int32_t box_size = boxes.size(1); - int32_t height = grad_input.size(2); - int32_t width = grad_input.size(3); + int32_t batch_size = grad_output.size(0); + int32_t feat_channels = grad_output.size(1) * 4; + int32_t channels = grad_output.size(1); + int32_t box_size = boxes.size(1); + int32_t height = grad_input.size(2); + int32_t width = grad_input.size(3); EXEC_NPU_CMD(aclnnBorderAlignGrad, grad_output, boxes, argmax_idx, channels, box_size, height, width, pool_size, batch_size, grad_input);