diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 0b05c42a64..c378fb1da1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -25,6 +25,8 @@ jobs: style: file - name: Check index.rst run: | + which clang-format + clang-format --version python .github/scripts/check_index_rst.py docs/en/index.rst python .github/scripts/check_index_rst.py docs/zh_cn/index.rst - name: Check markdown link diff --git a/configs/mmpose/pose-detection_yolox-pose_sdk_dynamic.py b/configs/mmpose/pose-detection_yolox-pose_sdk_dynamic.py new file mode 100644 index 0000000000..b4e4d6a203 --- /dev/null +++ b/configs/mmpose/pose-detection_yolox-pose_sdk_dynamic.py @@ -0,0 +1,9 @@ +_base_ = ['./pose-detection_static.py', '../_base_/backends/sdk.py'] + +codebase_config = dict(model_type='sdk_yoloxpose') + +backend_config = dict(pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='PoseToDetConverter'), + dict(type='PackDetPoseInputs') +]) diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp index 46f9921e62..743379117b 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp @@ -69,6 +69,8 @@ void mmdeploy_pose_detector_release_result(mmdeploy_pose_detection_t* results, i for (int i = 0; i < count; ++i) { delete[] results[i].point; delete[] results[i].score; + delete[] results[i].bboxes; + delete[] results[i].bbox_score; } delete[] results; } @@ -156,16 +158,27 @@ int mmdeploy_pose_detector_get_result(mmdeploy_value_t output, for (const auto& bbox_result : detections) { auto& res = _results[result_idx++]; auto size = bbox_result.key_points.size(); + auto num_bbox = bbox_result.detections.size(); res.point = new mmdeploy_point_t[size]; res.score = new float[size]; res.length = static_cast(size); + res.bboxes = new mmdeploy_rect_t[num_bbox]; + res.bbox_score = new float[num_bbox]; + res.num_bbox = static_cast(num_bbox); for (int k = 0; k < size; k++) { res.point[k].x = bbox_result.key_points[k].bbox[0]; res.point[k].y = bbox_result.key_points[k].bbox[1]; res.score[k] = bbox_result.key_points[k].score; } + for (int k = 0; k < num_bbox; k++) { + res.bboxes[k].left = bbox_result.detections[k].boundingbox[0]; + res.bboxes[k].top = bbox_result.detections[k].boundingbox[1]; + res.bboxes[k].right = bbox_result.detections[k].boundingbox[2]; + res.bboxes[k].bottom = bbox_result.detections[k].boundingbox[3]; + res.bbox_score[k] = bbox_result.detections[k].score; + } } *results = _results.release(); diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h index ff0987cee4..42e7f3fb5c 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h @@ -19,7 +19,10 @@ extern "C" { typedef struct mmdeploy_pose_detection_t { mmdeploy_point_t* point; ///< keypoint float* score; ///< keypoint score + mmdeploy_rect_t* bboxes; ///< bboxes + float* bbox_score; ///< bboxes score int length; ///< number of keypoint + int num_bbox; ///< number of bboxes } mmdeploy_pose_detection_t; typedef struct mmdeploy_pose_detector* mmdeploy_pose_detector_t; @@ -76,6 +79,7 @@ MMDEPLOY_API int mmdeploy_pose_detector_apply(mmdeploy_pose_detector_t detector, * bboxes, must be release by \ref mmdeploy_pose_detector_release_result * @return status code of the operation */ + MMDEPLOY_API int mmdeploy_pose_detector_apply_bbox(mmdeploy_pose_detector_t detector, const mmdeploy_mat_t* mats, int mat_count, const mmdeploy_rect_t* bboxes, diff --git a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/PoseDetector.cs b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/PoseDetector.cs index 4e750c1f44..e79814774b 100644 --- a/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/PoseDetector.cs +++ b/csrc/mmdeploy/apis/csharp/MMDeploy/APIs/PoseDetector.cs @@ -8,7 +8,10 @@ internal unsafe struct CPoseDetect { public Pointf* Point; public float* Score; + public Rect* BBox; + public float* BBoxScore; public int Length; + public int BBoxNum; } #pragma warning restore 0649 @@ -28,6 +31,16 @@ public struct PoseDetect /// public List Scores; + /// + /// BBox. + /// + public Rect BBox; + + /// + /// BBoxScore. + /// + public float BBoxScore; + /// /// Init points and scores if empty. /// @@ -40,6 +53,17 @@ private void Init() } } + /// + /// Set bounding box and score. + /// + /// BBox. + /// BBox score. + public void SetBBox(Rect bbox, float score) + { + BBox = bbox; + BBoxScore = score; + } + /// /// Add single keypoint to list. /// @@ -170,13 +194,26 @@ private unsafe void FormatResult(int matCount, int* bboxCount, CPoseDetect* resu PoseDetectorOutput outi = default; for (int j = 0; j < bboxCount[i]; j++) { - PoseDetect boxRes = default; - for (int k = 0; k < results->Length; k++) + int bbox_num = results->BBoxNum; + int num_point_each_bbox = results->Length / results->BBoxNum; + for (int box_id = 0; box_id < bbox_num; box_id++) { - boxRes.Add(results->Point[k], results->Score[k]); + PoseDetect boxRes = default; + Rect bbox = default; + float score = results->BBoxScore[box_id]; + bbox.Left = results->BBox[box_id].Left; + bbox.Top = results->BBox[box_id].Top; + bbox.Right = results->BBox[box_id].Right; + bbox.Bottom = results->BBox[box_id].Bottom; + boxRes.SetBBox(bbox, score); + for (int kp_id = 0; kp_id < num_point_each_bbox; kp_id++) + { + boxRes.Add(results->Point[(box_id * num_point_each_bbox) + kp_id], results->Score[(box_id * num_point_each_bbox) + kp_id]); + } + + outi.Add(boxRes); } - outi.Add(boxRes); results++; total++; } diff --git a/csrc/mmdeploy/apis/java/mmdeploy/PoseDetector.java b/csrc/mmdeploy/apis/java/mmdeploy/PoseDetector.java index 03cdd66a06..9a8073899b 100644 --- a/csrc/mmdeploy/apis/java/mmdeploy/PoseDetector.java +++ b/csrc/mmdeploy/apis/java/mmdeploy/PoseDetector.java @@ -17,13 +17,21 @@ public static class Result { /** Scores of points */ public float[] score; + /** BBox */ + public Rect [] bbox; + + /** BBox score */ + public float[] bboxScore; + /** Initializes a new instance of the Result class. * @param point: points. * @param score: scores of points. */ - public Result(PointF[] point, float [] score) { + public Result(PointF[] point, float[] score, Rect[] bbox, float[] bboxScore) { this.point = point; this.score = score; + this.bbox = bbox; + this.bboxScore = bboxScore; } } diff --git a/csrc/mmdeploy/apis/java/native/mmdeploy_PoseDetector.cpp b/csrc/mmdeploy/apis/java/native/mmdeploy_PoseDetector.cpp index 4956555a6e..a6e1067817 100644 --- a/csrc/mmdeploy/apis/java/native/mmdeploy_PoseDetector.cpp +++ b/csrc/mmdeploy/apis/java/native/mmdeploy_PoseDetector.cpp @@ -37,10 +37,15 @@ jobjectArray Java_mmdeploy_PoseDetector_apply(JNIEnv *env, jobject thiz, jlong h return NULL; } auto result_cls = env->FindClass("mmdeploy/PoseDetector$Result"); - auto result_ctor = env->GetMethodID(result_cls, "", "([Lmmdeploy/PointF;[F)V"); + auto result_ctor = + env->GetMethodID(result_cls, "", "([Lmmdeploy/PointF;[F[Lmmdeploy/Rect;[F)V"); + // auto result_ctor = + // env->GetMethodID(result_cls, "", "([Lmmdeploy/PointF;[F;[Lmmdeploy/Rect;[F)V"); auto array = env->NewObjectArray(size, result_cls, nullptr); auto pointf_cls = env->FindClass("mmdeploy/PointF"); auto pointf_ctor = env->GetMethodID(pointf_cls, "", "(FF)V"); + auto rect_cls = env->FindClass("mmdeploy/Rect"); + auto rect_ctor = env->GetMethodID(rect_cls, "", "(FFFF)V"); for (int i = 0; i < size; ++i) { auto keypoint_array = env->NewObjectArray(results[i].length, pointf_cls, nullptr); @@ -51,7 +56,19 @@ jobjectArray Java_mmdeploy_PoseDetector_apply(JNIEnv *env, jobject thiz, jlong h } auto score_array = env->NewFloatArray(results[i].length); env->SetFloatArrayRegion(score_array, 0, results[i].length, (jfloat *)results[i].score); - auto res = env->NewObject(result_cls, result_ctor, keypoint_array, score_array); + auto bbox_array = env->NewObjectArray(results[i].num_bbox, rect_cls, nullptr); + for (int j = 0; j < results[i].num_bbox; j++) { + auto bboxj = + env->NewObject(rect_cls, rect_ctor, (jfloat)results[i].bboxes[j].left, + (jfloat)results[i].bboxes[j].top, (jfloat)results[i].bboxes[j].right, + (jfloat)results[i].bboxes[j].bottom); + env->SetObjectArrayElement(bbox_array, j, bboxj); + } + auto bbox_score_array = env->NewFloatArray(results[i].num_bbox); + env->SetFloatArrayRegion(bbox_score_array, 0, results[i].num_bbox, + (jfloat *)results[i].bbox_score); + auto res = env->NewObject(result_cls, result_ctor, keypoint_array, score_array, bbox_array, + bbox_score_array); env->SetObjectArrayElement(array, i, res); } mmdeploy_pose_detector_release_result(results, size); diff --git a/csrc/mmdeploy/apis/python/pose_detector.cpp b/csrc/mmdeploy/apis/python/pose_detector.cpp index f9d99eaf14..88133b5a2f 100644 --- a/csrc/mmdeploy/apis/python/pose_detector.cpp +++ b/csrc/mmdeploy/apis/python/pose_detector.cpp @@ -65,20 +65,38 @@ class PyPoseDetector { auto output = py::list{}; auto result = detection; + for (int i = 0; i < mats.size(); i++) { - int n_point = result->length; - auto pred = py::array_t({bbox_count[i], n_point, 3}); - auto dst = pred.mutable_data(); - for (int j = 0; j < bbox_count[i]; j++) { + int n_point_total = result->length; + int n_bbox = result->num_bbox; + int n_point = n_bbox > 0 ? n_point_total / n_bbox : 0; + int pts_ind = 0; + auto pred_pts = py::array_t({n_bbox * n_point, 3}); + auto pred_bbox = py::array_t({n_bbox, 5}); + auto dst_pts = pred_pts.mutable_data(); + auto dst_bbox = pred_bbox.mutable_data(); + + // printf("num_bbox %d num_pts %d\n", result->num_bbox, result->length); + for (int j = 0; j < n_bbox; j++) { for (int k = 0; k < n_point; k++) { - dst[0] = result->point[k].x; - dst[1] = result->point[k].y; - dst[2] = result->score[k]; - dst += 3; + pts_ind = j * n_point + k; + dst_pts[0] = result->point[pts_ind].x; + dst_pts[1] = result->point[pts_ind].y; + dst_pts[2] = result->score[pts_ind]; + dst_pts += 3; + // printf("pts %f %f %f\n", dst_pts[0], dst_pts[1], dst_pts[2]); } - result++; + dst_bbox[0] = result->bboxes[j].left; + dst_bbox[1] = result->bboxes[j].top; + dst_bbox[2] = result->bboxes[j].right; + dst_bbox[3] = result->bboxes[j].bottom; + dst_bbox[4] = result->bbox_score[j]; + // printf("box %f %f %f %f %f\n", dst_bbox[0], dst_bbox[1], dst_bbox[2], dst_bbox[3], + // dst_bbox[4]); + dst_bbox += 5; } - output.append(std::move(pred)); + result++; + output.append(py::make_tuple(std::move(pred_bbox), std::move(pred_pts))); } int total = std::accumulate(bbox_count.begin(), bbox_count.end(), 0); @@ -101,12 +119,12 @@ static PythonBindingRegisterer register_pose_detector{[](py::module& m) { }), py::arg("model_path"), py::arg("device_name"), py::arg("device_id") = 0) .def("__call__", - [](PyPoseDetector* self, const PyImage& img) -> py::array { + [](PyPoseDetector* self, const PyImage& img) -> py::tuple { return self->Apply({img}, {})[0]; }) .def( "__call__", - [](PyPoseDetector* self, const PyImage& img, const Rect& box) -> py::array { + [](PyPoseDetector* self, const PyImage& img, const Rect& box) -> py::tuple { std::vector> bboxes; bboxes.push_back({box}); return self->Apply({img}, bboxes)[0]; @@ -115,7 +133,7 @@ static PythonBindingRegisterer register_pose_detector{[](py::module& m) { .def( "__call__", [](PyPoseDetector* self, const PyImage& img, - const std::vector& bboxes) -> py::array { + const std::vector& bboxes) -> py::tuple { std::vector> _bboxes; _bboxes.push_back(bboxes); return self->Apply({img}, _bboxes)[0]; diff --git a/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp b/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp index 4d7c0e7a92..df2cfc56a7 100644 --- a/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp +++ b/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp @@ -55,6 +55,9 @@ class TopdownHeatmapBaseHeadDecode : public MMPose { } auto& img_metas = _data["img_metas"]; + if (img_metas.contains("bbox")) { + from_value(img_metas["bbox"], bbox_); + } vector center; vector scale; @@ -78,6 +81,7 @@ class TopdownHeatmapBaseHeadDecode : public MMPose { output.key_points.push_back({{x, y}, s}); data += 3; } + output.detections.push_back({{bbox_[0], bbox_[1], bbox_[2], bbox_[3]}, bbox_[4]}); return to_value(std::move(output)); } @@ -354,6 +358,7 @@ class TopdownHeatmapBaseHeadDecode : public MMPose { float valid_radius_factor_{0.0546875f}; bool use_udp_{false}; string target_type_{"GaussianHeatmap"}; + vector bbox_{0, 0, 1, 1, 1}; }; MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMPose, TopdownHeatmapBaseHeadDecode); diff --git a/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp b/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp index 06f372c788..b1f58047bd 100644 --- a/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp +++ b/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp @@ -36,7 +36,9 @@ class DeepposeRegressionHeadDecode : public MMPose { } auto& img_metas = _data["img_metas"]; - + if (img_metas.contains("bbox")) { + from_value(img_metas["bbox"], bbox_); + } vector center; vector scale; from_value(img_metas["center"], center); @@ -60,6 +62,7 @@ class DeepposeRegressionHeadDecode : public MMPose { output.key_points.push_back({{x, y}, s}); data += 3; } + output.detections.push_back({{bbox_[0], bbox_[1], bbox_[2], bbox_[3]}, bbox_[4]}); return to_value(std::move(output)); } @@ -106,6 +109,9 @@ class DeepposeRegressionHeadDecode : public MMPose { *(data + 0) = *(data + 0) * scale_x + center[0] - scale[0] * 0.5; *(data + 1) = *(data + 1) * scale_y + center[1] - scale[1] * 0.5; } + + private: + vector bbox_{0, 0, 1, 1, 1}; }; MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMPose, DeepposeRegressionHeadDecode); diff --git a/csrc/mmdeploy/codebase/mmpose/mmpose.h b/csrc/mmdeploy/codebase/mmpose/mmpose.h index 3da9a9332a..da61644513 100644 --- a/csrc/mmdeploy/codebase/mmpose/mmpose.h +++ b/csrc/mmdeploy/codebase/mmpose/mmpose.h @@ -17,8 +17,14 @@ struct PoseDetectorOutput { float score; MMDEPLOY_ARCHIVE_MEMBERS(bbox, score); }; + struct BBox { + std::array boundingbox; // x1,y1,x2,y2 + float score; + MMDEPLOY_ARCHIVE_MEMBERS(boundingbox, score); + }; std::vector key_points; - MMDEPLOY_ARCHIVE_MEMBERS(key_points); + std::vector detections; + MMDEPLOY_ARCHIVE_MEMBERS(key_points, detections); }; MMDEPLOY_DECLARE_CODEBASE(MMPose, mmpose); diff --git a/csrc/mmdeploy/codebase/mmpose/simcc_label.cpp b/csrc/mmdeploy/codebase/mmpose/simcc_label.cpp index ffa0eebf25..71cdb2c6eb 100644 --- a/csrc/mmdeploy/codebase/mmpose/simcc_label.cpp +++ b/csrc/mmdeploy/codebase/mmpose/simcc_label.cpp @@ -49,7 +49,9 @@ class SimCCLabelDecode : public MMPose { } auto& img_metas = _data["img_metas"]; - + if (img_metas.contains("bbox")) { + from_value(img_metas["bbox"], bbox_); + } Tensor keypoints({Device{"cpu"}, DataType::kFLOAT, {simcc_x.shape(0), simcc_x.shape(1), 2}}); Tensor scores({Device{"cpu"}, DataType::kFLOAT, {simcc_x.shape(0), simcc_x.shape(1), 1}}); get_simcc_maximum(simcc_x, simcc_y, keypoints, scores); @@ -73,6 +75,7 @@ class SimCCLabelDecode : public MMPose { keypoints_data += 2; scores_data += 1; } + output.detections.push_back({{bbox_[0], bbox_[1], bbox_[2], bbox_[3]}, bbox_[4]}); return to_value(output); } @@ -103,6 +106,7 @@ class SimCCLabelDecode : public MMPose { } private: + vector bbox_{0, 0, 1, 1, 1}; bool flip_test_{false}; bool shift_heatmap_{false}; float simcc_split_ratio_{2.0}; diff --git a/csrc/mmdeploy/codebase/mmpose/yolox_pose.cpp b/csrc/mmdeploy/codebase/mmpose/yolox_pose.cpp new file mode 100644 index 0000000000..0b8c9659e1 --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/yolox_pose.cpp @@ -0,0 +1,100 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include +#include +#include +#include + +#include "mmdeploy/core/device.h" +#include "mmdeploy/core/registry.h" +#include "mmdeploy/core/serialization.h" +#include "mmdeploy/core/tensor.h" +#include "mmdeploy/core/utils/device_utils.h" +#include "mmdeploy/core/utils/formatter.h" +#include "mmdeploy/core/value.h" +#include "mmdeploy/experimental/module_adapter.h" +#include "mmpose.h" +#include "opencv_utils.h" + +namespace mmdeploy::mmpose { + +using std::string; +using std::vector; + +class YOLOXPose : public MMPose { + public: + explicit YOLOXPose(const Value& config) : MMPose(config) { + if (config.contains("params")) { + auto& params = config["params"]; + if (params.contains("score_thr")) { + from_value(params["score_thr"], score_thr_); + } + } + } + + Result operator()(const Value& _data, const Value& _prob) { + MMDEPLOY_DEBUG("preprocess_result: {}", _data); + MMDEPLOY_DEBUG("inference_result: {}", _prob); + + Device cpu_device{"cpu"}; + OUTCOME_TRY(auto dets, + MakeAvailableOnDevice(_prob["dets"].get(), cpu_device, stream())); + OUTCOME_TRY(auto keypoints, + MakeAvailableOnDevice(_prob["keypoints"].get(), cpu_device, stream())); + OUTCOME_TRY(stream().Wait()); + if (!(dets.shape().size() == 3 && dets.data_type() == DataType::kFLOAT)) { + MMDEPLOY_ERROR("unsupported `dets` tensor, shape: {}, dtype: {}", dets.shape(), + (int)dets.data_type()); + return Status(eNotSupported); + } + if (!(keypoints.shape().size() == 4 && keypoints.data_type() == DataType::kFLOAT)) { + MMDEPLOY_ERROR("unsupported `keypoints` tensor, shape: {}, dtype: {}", keypoints.shape(), + (int)keypoints.data_type()); + return Status(eNotSupported); + } + auto& img_metas = _data["img_metas"]; + vector scale_factor; + if (img_metas.contains("scale_factor")) { + from_value(img_metas["scale_factor"], scale_factor); + } else { + scale_factor = {1.f, 1.f, 1.f, 1.f}; + } + PoseDetectorOutput output; + + float* keypoints_data = keypoints.data(); + float* dets_data = dets.data(); + int num_dets = dets.shape(1), num_pts = keypoints.shape(2); + float s = 0, x1 = 0, y1 = 0, x2 = 0, y2 = 0; + + // fprintf(stdout, "num_dets= %d num_pts = %d\n", num_dets, num_pts); + for (int i = 0; i < dets.shape(0) * num_dets; i++) { + x1 = (*(dets_data++)) / scale_factor[0]; + y1 = (*(dets_data++)) / scale_factor[1]; + x2 = (*(dets_data++)) / scale_factor[2]; + y2 = (*(dets_data++)) / scale_factor[3]; + s = *(dets_data++); + // fprintf(stdout, "box %.2f %.2f %.2f %.2f %.6f\n", i, x1,y1,x2,y2,s); + + if (s <= score_thr_) { + keypoints_data += num_pts * 3; + continue; + } + output.detections.push_back({{x1, y1, x2, y2}, s}); + for (int k = 0; k < num_pts; k++) { + x1 = (*(keypoints_data++)) / scale_factor[0]; + y1 = (*(keypoints_data++)) / scale_factor[1]; + s = *(keypoints_data++); + // fprintf(stdout, "point %d, index %d, %.2f %.2f %.6f\n", k, x1, y1, s); + output.key_points.push_back({{x1, y1}, s}); + } + } + return to_value(output); + } + + protected: + float score_thr_ = 0.001; +}; + +MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMPose, YOLOXPose); + +} // namespace mmdeploy::mmpose diff --git a/csrc/mmdeploy/device/cpu/cpu_device.cpp b/csrc/mmdeploy/device/cpu/cpu_device.cpp index eb4fa4c365..c000c74ca3 100644 --- a/csrc/mmdeploy/device/cpu/cpu_device.cpp +++ b/csrc/mmdeploy/device/cpu/cpu_device.cpp @@ -161,6 +161,10 @@ Result CpuPlatformImpl::Copy(Buffer src, Buffer dst, size_t size, size_t s size_t dst_offset, Stream stream) { auto src_ptr = src.GetNative(); auto dst_ptr = dst.GetNative(); + if (size == 0) { + return success(); + } + if (!src_ptr || !dst_ptr) { return Status(eInvalidArgument); } diff --git a/demo/csharp/pose_detection/Program.cs b/demo/csharp/pose_detection/Program.cs index 5cfcc59f79..43d9184864 100644 --- a/demo/csharp/pose_detection/Program.cs +++ b/demo/csharp/pose_detection/Program.cs @@ -71,10 +71,13 @@ static void Main(string[] args) int index = 0; foreach (var box in output[0].Results) { - for (int i = 0; i < box.Points.Count; i++) + var pt1 = new OpenCvSharp.Point((int)box.BBox.Left, (int)box.BBox.Top); + var pt2 = new OpenCvSharp.Point((int)box.BBox.Right, (int)box.BBox.Bottom); + var color = new Scalar(palette[index][0], palette[index][1], palette[index][2]); + Cv2.Rectangle(imgs[0], pt1, pt2, color, 2); + foreach (var point in box.Points) { - Cv2.Circle(imgs[0], (int)box.Points[i].X, (int)box.Points[i].Y, 1, - new Scalar(palette[index][0], palette[index][1], palette[index][2]), 2); + Cv2.Circle(imgs[0], (int)point.X, (int)point.Y, 1,color, 2); } index++; } diff --git a/demo/csrc/CMakeLists.txt b/demo/csrc/CMakeLists.txt index 8de1c05cbf..c6d18d4e5b 100644 --- a/demo/csrc/CMakeLists.txt +++ b/demo/csrc/CMakeLists.txt @@ -8,24 +8,25 @@ endif () function(add_example task folder name) + set(exec_name "${folder}.${name}") if ((NOT task) OR (task IN_LIST MMDEPLOY_TASKS)) # Search for c/cpp sources file(GLOB _SRCS ${folder}/${name}.c*) - add_executable(${name} ${_SRCS}) + add_executable(${exec_name} ${_SRCS}) if (NOT (MSVC OR APPLE)) # Disable new dtags so that executables can run even without LD_LIBRARY_PATH set - target_link_libraries(${name} PRIVATE -Wl,--disable-new-dtags) + target_link_libraries(${exec_name} PRIVATE -Wl,--disable-new-dtags) endif () if (MMDEPLOY_BUILD_SDK_MONOLITHIC) - target_link_libraries(${name} PRIVATE mmdeploy ${OpenCV_LIBS}) + target_link_libraries(${exec_name} PRIVATE mmdeploy ${OpenCV_LIBS}) else () # Load MMDeploy modules - mmdeploy_load_static(${name} MMDeployStaticModules) - mmdeploy_load_dynamic(${name} MMDeployDynamicModules) + mmdeploy_load_static(${exec_name} MMDeployStaticModules) + mmdeploy_load_dynamic(${exec_name} MMDeployDynamicModules) # Link to MMDeploy libraries - target_link_libraries(${name} PRIVATE MMDeployLibs ${OpenCV_LIBS}) + target_link_libraries(${exec_name} PRIVATE MMDeployLibs ${OpenCV_LIBS}) endif () - install(TARGETS ${name} RUNTIME DESTINATION bin) + install(TARGETS ${exec_name} RUNTIME DESTINATION bin) endif () endfunction() @@ -36,6 +37,7 @@ add_example(detector c batch_object_detection) add_example(segmentor c image_segmentation) add_example(restorer c image_restorer) add_example(text_detector c ocr) +add_example(pose_detector c det_pose) add_example(pose_detector c pose_detection) add_example(rotated_detector c rotated_object_detection) add_example(video_recognizer c video_recognition) diff --git a/demo/csrc/c/pose_detection.cpp b/demo/csrc/c/pose_detection.cpp index 11d0ca6483..a8ce37e4f7 100644 --- a/demo/csrc/c/pose_detection.cpp +++ b/demo/csrc/c/pose_detection.cpp @@ -39,8 +39,18 @@ int main(int argc, char *argv[]) { } for (int i = 0; i < res->length; i++) { + // fprintf(stdout, "point %d %.2f %.2f %.6f\n", i, res->point[i].x, res->point[i].y, res->score[i]); cv::circle(img, {(int)res->point[i].x, (int)res->point[i].y}, 1, {0, 255, 0}, 2); } + for (int i=0; i < res->num_bbox; i++) { + const auto& box = res->bboxes[i]; + const float score = res->bbox_score[i]; + // fprintf(stdout, "box %d, left=%.2f, top=%.2f, right=%.2f, bottom=%.2f, score=%.4f\n", + // i, box.left, box.top, box.right, box.bottom, score); + cv::rectangle(img, cv::Point{(int)box.left, (int)box.top}, + cv::Point{(int)box.right, (int)box.bottom}, cv::Scalar{0, 255, 0}); + } + cv::imwrite("output_pose.png", img); mmdeploy_pose_detector_release_result(res, 1); diff --git a/demo/csrc/cpp/pose_detector.cxx b/demo/csrc/cpp/pose_detector.cxx index 025afdd794..d04d16c59c 100644 --- a/demo/csrc/cpp/pose_detector.cxx +++ b/demo/csrc/cpp/pose_detector.cxx @@ -10,6 +10,7 @@ int main(int argc, char *argv[]) { fprintf(stderr, "usage:\n pose_detection device_name model_path image_path\n"); return 1; } + fprintf(stdout, "Start running pose_detector cpp \n"); auto device_name = argv[1]; auto model_path = argv[2]; auto image_path = argv[3]; @@ -34,10 +35,18 @@ int main(int argc, char *argv[]) { } auto res = detector.Apply(img); - + // visualize results for (int i = 0; i < res[0].length; i++) { cv::circle(img, {(int)res[0].point[i].x, (int)res[0].point[i].y}, 1, {0, 255, 0}, 2); } + for (int i=0; i < res[0].num_bbox; i++) { + const auto& box = res[0].bboxes[i]; + const float score = res[0].bbox_score[i]; + fprintf(stdout, "box %d, left=%.2f, top=%.2f, right=%.2f, bottom=%.2f, label=%d, score=%.4f\n", + i, box.left, box.top, box.right, box.bottom, score); + cv::rectangle(img, cv::Point{(int)box.left, (int)box.top}, + cv::Point{(int)box.right, (int)box.bottom}, cv::Scalar{0, 255, 0}); + } cv::imwrite("output_pose.png", img); return 0; diff --git a/demo/java/src/PoseDetection.java b/demo/java/src/PoseDetection.java index 6c6bba4f7b..da4b500e10 100644 --- a/demo/java/src/PoseDetection.java +++ b/demo/java/src/PoseDetection.java @@ -9,8 +9,17 @@ import java.io.File; import java.io.IOException; +import org.opencv.core.Core; +import org.opencv.imgcodecs.Imgcodecs; +import org.opencv.imgproc.Imgproc; +import org.opencv.core.Point; +import org.opencv.core.Scalar; + /** @description: this is a class for PoseDetection java demo. */ public class PoseDetection { + static { + System.loadLibrary(Core.NATIVE_LIBRARY_NAME); + } /** The main function for PoseDetection Java demo. * @param deviceName: the device name of the demo. @@ -34,17 +43,47 @@ public static void main(String[] args) { poseEstimator = new PoseDetector(modelPath, deviceName, 0); // load image - Mat img = Utils.loadImage(imagePath); + org.opencv.core.Mat cvimg = Imgcodecs.imread(imagePath); + Mat img = Utils.cvMatToMat(cvimg); // apply pose estimator PoseDetector.Result[] result = poseEstimator.apply(img); // print results for (PoseDetector.Result value : result) { + int num_bbox = value.bbox.length; + int kpt_each_bbox = value.point.length / num_bbox; + for (int i = 0; i < num_bbox; i++) { + System.out.printf("bbox %d\n", i); + System.out.printf("left: %f, top: %f, right: %f, bottom: %f\n", value.bbox[i].left, + value.bbox[i].top, value.bbox[i].right, value.bbox[i].bottom); + + int base_idx = kpt_each_bbox * i; + for (int j = base_idx; j < base_idx + kpt_each_bbox; j++) { + System.out.printf("point %d, x: %d, y: %d\n", i, (int)value.point[j].x, (int)value.point[j].y); + } + System.out.printf("\n"); + } + } + + // save image + for (PoseDetector.Result value : result) { + for (int i = 0; i < value.bbox.length; i++) { + Point pt1 = new Point((int)value.bbox[i].left, (int)value.bbox[i].top); + Point pt2 = new Point((int)value.bbox[i].right, (int)value.bbox[i].bottom); + Scalar color = new Scalar(0, 0, 255); + int thickness = 2; + Imgproc.rectangle(cvimg, pt1, pt2, color, thickness); + } for (int i = 0; i < value.point.length; i++) { - System.out.printf("point %d, x: %d, y: %d\n", i, (int)value.point[i].x, (int)value.point[i].y); + Point center = new Point((int)value.point[i].x, (int)value.point[i].y); + int radius = 2; + Scalar color = new Scalar(0, 255, 0); + int thickness = 2; + Imgproc.circle(cvimg, center, radius, color, thickness); } } + Imgcodecs.imwrite("output_pose.jpg", cvimg); } catch (Exception e) { System.out.println("exception: " + e.getMessage()); } finally { diff --git a/demo/python/det_pose.py b/demo/python/det_pose.py index 4f6f8494ce..4ca9259b7a 100644 --- a/demo/python/det_pose.py +++ b/demo/python/det_pose.py @@ -76,8 +76,8 @@ def main(): bboxes = bboxes[keep, :4] # apply pose detector - poses = pose_detector(img, bboxes) - + dets, poses = pose_detector(img, bboxes) + poses = poses.reshape(dets.shape[0], -1, poses.shape[1]) visualize(img, poses, 'det_pose_output.jpg', 0.5, 1280) diff --git a/demo/python/pose_detection.py b/demo/python/pose_detection.py index 2e8afecf8b..7339bc22eb 100644 --- a/demo/python/pose_detection.py +++ b/demo/python/pose_detection.py @@ -40,11 +40,17 @@ def main(): bbox = np.array(args.bbox, dtype=int) bbox[2:] += bbox[:2] result = detector(img, bbox) - print(result) - - _, point_num, _ = result.shape - points = result[:, :, :2].reshape(point_num, 2) - for [x, y] in points.astype(int): + dets, points = result + dets = dets.reshape(-1, 5) + for box in dets: + score = box[4] + x1, y1, x2, y2 = [int(_) for _ in box[:4]] + if score < 0.3: + continue + cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0)) + + points = points[..., :2].reshape(-1, 2).astype(np.int32) + for [x, y] in points: cv2.circle(img, (x, y), 1, (0, 255, 0), 2) cv2.imwrite('output_pose.png', img) diff --git a/mmdeploy/codebase/mmpose/deploy/pose_detection.py b/mmdeploy/codebase/mmpose/deploy/pose_detection.py index 6584d995fb..f8cddc7d2f 100644 --- a/mmdeploy/codebase/mmpose/deploy/pose_detection.py +++ b/mmdeploy/codebase/mmpose/deploy/pose_detection.py @@ -36,14 +36,24 @@ def process_model_config( cfg = copy.deepcopy(model_cfg) test_pipeline = cfg.test_dataloader.dataset.pipeline data_preprocessor = cfg.model.data_preprocessor - codec = cfg.codec - if isinstance(codec, list): - codec = codec[-1] - input_size = codec['input_size'] if input_shape is None else input_shape + codec = getattr(cfg, 'codec', None) + if codec is not None: + if isinstance(codec, list): + codec = codec[-1] + input_size = codec['input_size'] if input_shape is None \ + else input_shape + else: + input_size = cfg.img_scale + test_pipeline[0] = dict(type='LoadImageFromFile') for i in reversed(range(len(test_pipeline))): trans = test_pipeline[i] - if trans['type'] == 'PackPoseInputs': + if trans['type'].startswith('mmdet.'): + trans['type'] = trans['type'][6:] + + if trans['type'] in [ + 'PackPoseInputs', 'PackDetPoseInputs', 'PoseToDetConverter' + ]: test_pipeline.pop(i) elif trans['type'] == 'GetBBoxCenterScale': trans['type'] = 'TopDownGetBboxCenterScale' @@ -53,22 +63,39 @@ def process_model_config( trans['type'] = 'TopDownAffine' trans['image_size'] = input_size trans.pop('input_size') - - test_pipeline.append( - dict( - type='Normalize', - mean=data_preprocessor.mean, - std=data_preprocessor.std, - to_rgb=data_preprocessor.bgr_to_rgb)) - test_pipeline.append(dict(type='ImageToTensor', keys=['img'])) - test_pipeline.append( - dict( - type='Collect', - keys=['img'], - meta_keys=[ - 'img_shape', 'pad_shape', 'ori_shape', 'img_norm_cfg', - 'scale_factor', 'bbox_score', 'center', 'scale' - ])) + elif trans['type'] == 'Resize': + trans['size'] = trans.pop('scale') + + if data_preprocessor.type == 'mmdet.DetDataPreprocessor': + # DetDataPreprocessor does not have mean, std, bgr_to_rgb + test_pipeline.append( + dict( + type='Normalize', mean=[0, 0, 0], std=[1, 1, 1], to_rgb=False)) + test_pipeline.append(dict(type='DefaultFormatBundle')) + test_pipeline.append( + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', + 'flip_indices', 'bbox' + ])) + else: + test_pipeline.append( + dict( + type='Normalize', + mean=data_preprocessor.mean, + std=data_preprocessor.std, + to_rgb=data_preprocessor.bgr_to_rgb)) + test_pipeline.append(dict(type='ImageToTensor', keys=['img'])) + test_pipeline.append( + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'img_shape', 'pad_shape', 'ori_shape', 'img_norm_cfg', + 'scale_factor', 'bbox_score', 'center', 'scale', 'bbox' + ])) cfg.test_dataloader.dataset.pipeline = test_pipeline return cfg @@ -345,13 +372,16 @@ def get_preprocess(self, *args, **kwargs) -> Dict: def get_postprocess(self, *args, **kwargs) -> Dict: """Get the postprocess information for SDK.""" - codec = self.model_cfg.codec - if isinstance(codec, (list, tuple)): - codec = codec[-1] - component = 'UNKNOWN' params = copy.deepcopy(self.model_cfg.model.test_cfg) - params.update(codec) - if self.model_cfg.model.type == 'TopdownPoseEstimator': + component = 'UNKNOWN' + model_type = self.model_cfg.model.type + if model_type == 'YOLODetector': + component = 'YOLOXPose' + elif model_type == 'TopdownPoseEstimator': + codec = self.model_cfg.codec + if isinstance(codec, (list, tuple)): + codec = codec[-1] + params.update(codec) component = 'TopdownHeatmapSimpleHeadDecode' if codec.type == 'MSRAHeatmap': params['post_process'] = 'default' diff --git a/mmdeploy/codebase/mmpose/deploy/pose_detection_model.py b/mmdeploy/codebase/mmpose/deploy/pose_detection_model.py index 1686a089fe..54434d9dfb 100644 --- a/mmdeploy/codebase/mmpose/deploy/pose_detection_model.py +++ b/mmdeploy/codebase/mmpose/deploy/pose_detection_model.py @@ -199,7 +199,7 @@ def pack_yolox_pose_result(self, preds: List[torch.Tensor], # the precision test requires keypoints to be np.ndarray pred_instances.keypoints = keypoints.cpu().numpy() pred_instances.keypoint_scores = keypoint_scores - pred_instances.lebels = torch.zeros(bboxes.shape[0]) + pred_instances.labels = torch.zeros(bboxes.shape[0]) data_sample.pred_instances = pred_instances return data_samples @@ -239,7 +239,9 @@ def forward(self, # inputs are c,h,w, sdk requested h,w,c input_img = input_img.permute(1, 2, 0) input_img = input_img.contiguous().detach().cpu().numpy() - keypoints = self.wrapper.handle(input_img, bboxes.tolist()) + dets, keypoints = self.wrapper.handle(input_img, bboxes.tolist()) + num_bbox = dets.shape[0] + keypoints = keypoints.reshape(num_bbox, -1, 3) pred = InstanceData( keypoints=keypoints[..., :2], keypoint_scores=keypoints[..., 2]) @@ -250,6 +252,51 @@ def forward(self, return results +@__BACKEND_MODEL.register_module('sdk_yoloxpose') +class SDKYoloxPoseModel(End2EndModel): + """SDK inference class, converts SDK output to mmpose format.""" + + def __init__(self, *args, **kwargs): + kwargs['data_preprocessor'] = None + super(SDKYoloxPoseModel, self).__init__(*args, **kwargs) + + def forward(self, + inputs: List[torch.Tensor], + data_samples: List[BaseDataElement], + mode: str = 'predict', + **kwargs) -> list: + """Run forward inference. + + Args: + inputs (List[torch.Tensor]): A list contains input image(s) + in [H x W x C] format. + data_samples (List[BaseDataElement]): + Data samples of image metas. + mode (str): test mode, only support 'predict' + **kwargs: Other key-pair arguments. + + Returns: + list: A list contains predictions. + """ + num_pts = self.model_cfg.model.bbox_head.head_module.num_keypoints + for input_img, sample in zip(inputs, data_samples): + # inputs are c,h,w, sdk requested h,w,c + input_img = input_img.permute(1, 2, 0) + input_img = input_img.contiguous().detach().cpu().numpy() + dets, keypoints = self.wrapper.handle(input_img) + num_bbox = dets.shape[0] + keypoints = keypoints.reshape(num_bbox, num_pts, 3) + pred_instances = InstanceData() + pred_instances.bboxes = dets[:, :4] + pred_instances.bbox_scores = dets[:, 4] + pred_instances.keypoints = keypoints[..., :2] + pred_instances.keypoint_scores = keypoints[..., 2] + pred_instances.labels = torch.zeros(dets.shape[0]) + sample.pred_instances = pred_instances + + return data_samples + + def build_pose_detection_model( model_files: Sequence[str], model_cfg: Union[str, mmengine.Config],