From 725d95c6ce9cde2455315fb12a6f9717805a564e Mon Sep 17 00:00:00 2001 From: Piotr Skalski Date: Wed, 27 Mar 2024 00:12:12 +0100 Subject: [PATCH] time in zone / dwell time demo (#1026) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * initial commit * initial stream processing script * fix(pre_commit): 🎨 auto format pre-commit hooks * work in progress * fix(pre_commit): 🎨 auto format pre-commit hooks * work in progress * fix(pre_commit): 🎨 auto format pre-commit hooks * ultralytics file and stream versions ready * fix(pre_commit): 🎨 auto format pre-commit hooks * ultralytics file and stream versions ready * class based filtering * fix(pre_commit): 🎨 auto format pre-commit hooks * timer improvements * inference static file processing script * fix(pre_commit): 🎨 auto format pre-commit hooks * inference stream processing script * fix(pre_commit): 🎨 auto format pre-commit hooks * all scripts are working * fix(pre_commit): 🎨 auto format pre-commit hooks * all video or stream processing scripts refactored * fix(pre_commit): 🎨 auto format pre-commit hooks * README.md update * fix(pre_commit): 🎨 auto format pre-commit hooks * initial version of draw ones script * fix(pre_commit): 🎨 auto format pre-commit hooks * loading image or video, drawing multiple polygons, quiting app, and aborting currently drawn polygon works. * fix(pre_commit): 🎨 auto format pre-commit hooks * loading image or video, drawing multiple polygons, quiting app, and aborting currently drawn polygon works. * fix(pre_commit): 🎨 auto format pre-commit hooks * Dynamic drawing of currently edited polygon. * fix(pre_commit): 🎨 auto format pre-commit hooks * Drawing tool is ready. * fix(pre_commit): 🎨 auto format pre-commit hooks * Final touches. * fix(pre_commit): 🎨 auto format pre-commit hooks * Update README.md * fix(pre_commit): 🎨 auto format pre-commit hooks * Update README.md --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- examples/time_in_zone/.gitignore | 9 + examples/time_in_zone/README.md | 262 ++++++++++++++++++ .../time_in_zone/inference_file_example.py | 132 +++++++++ .../inference_naive_stream_example.py | 142 ++++++++++ .../time_in_zone/inference_stream_example.py | 158 +++++++++++ examples/time_in_zone/requirements.txt | 5 + .../scripts/download_from_youtube.py | 46 +++ examples/time_in_zone/scripts/draw_zones.py | 176 ++++++++++++ .../time_in_zone/scripts/stream_from_file.py | 104 +++++++ .../time_in_zone/ultralytics_file_example.py | 144 ++++++++++ .../ultralytics_naive_stream_example.py | 154 ++++++++++ .../ultralytics_stream_example.py | 173 ++++++++++++ examples/time_in_zone/utils/__init__.py | 0 examples/time_in_zone/utils/general.py | 66 +++++ examples/time_in_zone/utils/timers.py | 88 ++++++ 15 files changed, 1659 insertions(+) create mode 100644 examples/time_in_zone/.gitignore create mode 100644 examples/time_in_zone/README.md create mode 100644 examples/time_in_zone/inference_file_example.py create mode 100644 examples/time_in_zone/inference_naive_stream_example.py create mode 100644 examples/time_in_zone/inference_stream_example.py create mode 100644 examples/time_in_zone/requirements.txt create mode 100644 examples/time_in_zone/scripts/download_from_youtube.py create mode 100644 examples/time_in_zone/scripts/draw_zones.py create mode 100644 examples/time_in_zone/scripts/stream_from_file.py create mode 100644 examples/time_in_zone/ultralytics_file_example.py create mode 100644 examples/time_in_zone/ultralytics_naive_stream_example.py create mode 100644 examples/time_in_zone/ultralytics_stream_example.py create mode 100644 examples/time_in_zone/utils/__init__.py create mode 100644 examples/time_in_zone/utils/general.py create mode 100644 examples/time_in_zone/utils/timers.py diff --git a/examples/time_in_zone/.gitignore b/examples/time_in_zone/.gitignore new file mode 100644 index 000000000..34efd9e06 --- /dev/null +++ b/examples/time_in_zone/.gitignore @@ -0,0 +1,9 @@ +data/ +venv*/ +*.pt +*.pth +*.mp4 +*.mov +*.png +*.jpg +*.jpeg diff --git a/examples/time_in_zone/README.md b/examples/time_in_zone/README.md new file mode 100644 index 000000000..3898c4525 --- /dev/null +++ b/examples/time_in_zone/README.md @@ -0,0 +1,262 @@ +# time in zone + +## 👋 hello + +Practical demonstration on leveraging computer vision for analyzing wait times and +monitoring the duration that objects or individuals spend in predefined areas of video +frames. This example project, perfect for retail analytics or traffic management +applications. + +https://github.com/roboflow/supervision/assets/26109316/d051cc8a-dd15-41d4-aa36-d38b86334c39 + +## 💻 install + +- clone repository and navigate to example directory + + ```bash + git clone https://github.com/roboflow/supervision.git + cd supervision/examples/time_in_zone + ``` + +- setup python environment and activate it [optional] + + ```bash + python3 -m venv venv + source venv/bin/activate + ``` + +- install required dependencies + + ```bash + pip install -r requirements.txt + ``` + +## 🛠 scripts + +### `download_from_youtube` + +This script allows you to download a video from YouTube. + +- `--url`: The full URL of the YouTube video you wish to download. +- `--output_path` (optional): Specifies the directory where the video will be saved. +- `--file_name` (optional): Sets the name of the saved video file. + +```bash +python scripts/download_from_youtube.py \ +--url "https://youtu.be/8zyEwAa50Q" \ +--output_path "data/checkout" \ +--file_name "video.mp4" +``` + +```bash +python scripts/download_from_youtube.py \ +--url "https://youtu.be/MNn9qKG2UFI" \ +--output_path "data/traffic" \ +--file_name "video.mp4" +``` + +### `stream_from_file` + +This script allows you to stream video files from a directory. It's an awesome way to +mock a live video stream for local testing. Video will be streamed in a loop under +`rtsp://localhost:8554/live0.stream` URL. This script requires docker to be installed. + +- `--video_directory`: Directory containing video files to stream. +- `--number_of_streams`: Number of video files to stream. + +```bash +python scripts/stream_from_file.py \ +--video_directory "data/checkout" \ +--number_of_streams 1 +``` + +```bash +python scripts/stream_from_file.py \ +--video_directory "data/traffic" \ +--number_of_streams 1 +``` + +### `draw_zones` + +If you want to test zone time in zone analysis on your own video, you can use this +script to design custom zones and save results as a JSON file. The script will open a +window where you can draw polygons on the source image or video file. The polygons will +be saved as a JSON file. + +- `--source_path`: Path to the source image or video file for drawing polygons. +- `--zone_configuration_path`: Path where the polygon annotations will be saved as a JSON file. + + +- `enter` - finish drawing the current polygon. +- `escape` - cancel drawing the current polygon. +- `q` - quit the drawing window. +- `s` - save zone configuration to a JSON file. + +```bash +python scripts/draw_zones.py \ +--source_path "data/checkout/video.mp4" \ +--zone_configuration_path "data/checkout/custom_config.json" +``` + +```bash +python scripts/draw_zones.py \ +--source_path "data/traffic/video.mp4" \ +--zone_configuration_path "data/traffic/custom_config.json" +``` + +https://github.com/roboflow/supervision/assets/26109316/9d514c9e-2a61-418b-ae49-6ac1ad6ae5ac + +## 🎬 video & stream processing + +### `inference_file_example` + +Script to run object detection on a video file using the Roboflow Inference model. + + - `--zone_configuration_path`: Path to the zone configuration JSON file. + - `--source_video_path`: Path to the source video file. + - `--model_id`: Roboflow model ID. + - `--classes`: List of class IDs to track. If empty, all classes are tracked. + - `--confidence_threshold`: Confidence level for detections (`0` to `1`). Default is `0.3`. + - `--iou_threshold`: IOU threshold for non-max suppression. Default is `0.7`. + +```bash +python inference_file_example.py \ +--zone_configuration_path "data/checkout/config.json" \ +--source_video_path "data/checkout/video.mp4" \ +--model_id "yolov8x-640" \ +--classes 0 \ +--confidence_threshold 0.3 \ +--iou_threshold 0.7 +``` + +https://github.com/roboflow/supervision/assets/26109316/d051cc8a-dd15-41d4-aa36-d38b86334c39 + +```bash +python inference_file_example.py \ +--zone_configuration_path "data/traffic/config.json" \ +--source_video_path "data/traffic/video.mp4" \ +--model_id "yolov8x-640" \ +--classes 2 5 6 7 \ +--confidence_threshold 0.3 \ +--iou_threshold 0.7 +``` + +https://github.com/roboflow/supervision/assets/26109316/5ec896d7-4b39-4426-8979-11e71666878b + +### `inference_stream_example` + +Script to run object detection on a video stream using the Roboflow Inference model. + + - `--zone_configuration_path`: Path to the zone configuration JSON file. + - `--rtsp_url`: Complete RTSP URL for the video stream. + - `--model_id`: Roboflow model ID. + - `--classes`: List of class IDs to track. If empty, all classes are tracked. + - `--confidence_threshold`: Confidence level for detections (`0` to `1`). Default is `0.3`. + - `--iou_threshold`: IOU threshold for non-max suppression. Default is `0.7`. + +```bash +python inference_file_example.py \ +--zone_configuration_path "data/checkout/config.json" \ +--rtsp_url "rtsp://localhost:8554/live0.stream" \ +--model_id "yolov8x-640" \ +--classes 0 \ +--confidence_threshold 0.3 \ +--iou_threshold 0.7 +``` + +```bash +python inference_file_example.py \ +--zone_configuration_path "data/traffic/config.json" \ +--rtsp_url "rtsp://localhost:8554/live0.stream" \ +--model_id "yolov8x-640" \ +--classes 2 5 6 7 \ +--confidence_threshold 0.3 \ +--iou_threshold 0.7 +``` + +
+👉 show ultralytics examples + +### `ultralytics_file_example` + +Script to run object detection on a video file using the Ultralytics YOLOv8 model. + + - `--zone_configuration_path`: Path to the zone configuration JSON file. + - `--source_video_path`: Path to the source video file. + - `--weights`: Path to the model weights file. Default is `'yolov8s.pt'`. + - `--device`: Computation device (`'cpu'`, `'mps'` or `'cuda'`). Default is `'cpu'`. + - `--classes`: List of class IDs to track. If empty, all classes are tracked. + - `--confidence_threshold`: Confidence level for detections (`0` to `1`). Default is `0.3`. + - `--iou_threshold`: IOU threshold for non-max suppression. Default is `0.7`. + +```bash +python inference_file_example.py \ +--zone_configuration_path "data/checkout/config.json" \ +--source_video_path "data/checkout/video.mp4" \ +--weights "yolov8x.pt" \ +--device "cpu" \ +--classes 0 \ +--confidence_threshold 0.3 \ +--iou_threshold 0.7 +``` + +```bash +python inference_file_example.py \ +--zone_configuration_path "data/traffic/config.json" \ +--source_video_path "data/traffic/video.mp4" \ +--weights "yolov8x.pt" \ +--device "cpu" \ +--classes 2 5 6 7 \ +--confidence_threshold 0.3 \ +--iou_threshold 0.7 +``` + +### `ultralytics_stream_example` + +Script to run object detection on a video stream using the Ultralytics YOLOv8 model. + + - `--zone_configuration_path`: Path to the zone configuration JSON file. + - `--rtsp_url`: Complete RTSP URL for the video stream. + - `--weights`: Path to the model weights file. Default is `'yolov8s.pt'`. + - `--device`: Computation device (`'cpu'`, `'mps'` or `'cuda'`). Default is `'cpu'`. + - `--classes`: List of class IDs to track. If empty, all classes are tracked. + - `--confidence_threshold`: Confidence level for detections (`0` to `1`). Default is `0.3`. + - `--iou_threshold`: IOU threshold for non-max suppression. Default is `0.7`. + +```bash +python inference_file_example.py \ +--zone_configuration_path "data/checkout/config.json" \ +--rtsp_url "rtsp://localhost:8554/live0.stream" \ +--weights "yolov8x.pt" \ +--device "cpu" \ +--classes 0 \ +--confidence_threshold 0.3 \ +--iou_threshold 0.7 +``` + +```bash +python inference_file_example.py \ +--zone_configuration_path "data/traffic/config.json" \ +--rtsp_url "rtsp://localhost:8554/live0.stream" \ +--weights "yolov8x.pt" \ +--device "cpu" \ +--classes 2 5 6 7 \ +--confidence_threshold 0.3 \ +--iou_threshold 0.7 +``` + +
+ +## © license + +This demo integrates two main components, each with its own licensing: + +- ultralytics: The object detection model used in this demo, YOLOv8, is distributed + under the [AGPL-3.0 license](https://github.com/ultralytics/ultralytics/blob/main/LICENSE). + You can find more details about this license here. + +- supervision: The analytics code that powers the zone-based analysis in this demo is + based on the Supervision library, which is licensed under the + [MIT license](https://github.com/roboflow/supervision/blob/develop/LICENSE.md). This + makes the Supervision part of the code fully open source and freely usable in your + projects. diff --git a/examples/time_in_zone/inference_file_example.py b/examples/time_in_zone/inference_file_example.py new file mode 100644 index 000000000..5feb1d836 --- /dev/null +++ b/examples/time_in_zone/inference_file_example.py @@ -0,0 +1,132 @@ +import argparse +from typing import List + +import cv2 +import numpy as np +from inference import get_model +from utils.general import find_in_list, load_zones_config +from utils.timers import FPSBasedTimer + +import supervision as sv + +COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) +COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) +LABEL_ANNOTATOR = sv.LabelAnnotator( + color=COLORS, text_color=sv.Color.from_hex("#000000") +) + + +def main( + source_video_path: str, + zone_configuration_path: str, + model_id: str, + confidence: float, + iou: float, + classes: List[int], +) -> None: + model = get_model(model_id=model_id) + tracker = sv.ByteTrack(minimum_matching_threshold=0.5) + video_info = sv.VideoInfo.from_video_path(video_path=source_video_path) + frames_generator = sv.get_video_frames_generator(source_video_path) + + frame = next(frames_generator) + resolution_wh = frame.shape[1], frame.shape[0] + + polygons = load_zones_config(file_path=zone_configuration_path) + zones = [ + sv.PolygonZone( + polygon=polygon, + frame_resolution_wh=resolution_wh, + triggering_anchors=(sv.Position.CENTER,), + ) + for polygon in polygons + ] + timers = [FPSBasedTimer(video_info.fps) for _ in zones] + + for frame in frames_generator: + results = model.infer(frame, confidence=confidence, iou_threshold=iou)[0] + detections = sv.Detections.from_inference(results) + detections = detections[find_in_list(detections.class_id, classes)] + detections = tracker.update_with_detections(detections) + + annotated_frame = frame.copy() + + for idx, zone in enumerate(zones): + annotated_frame = sv.draw_polygon( + scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx) + ) + + detections_in_zone = detections[zone.trigger(detections)] + time_in_zone = timers[idx].tick(detections_in_zone) + custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx) + + annotated_frame = COLOR_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + custom_color_lookup=custom_color_lookup, + ) + labels = [ + f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}" + for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone) + ] + annotated_frame = LABEL_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + labels=labels, + custom_color_lookup=custom_color_lookup, + ) + + cv2.imshow("Processed Video", annotated_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + cv2.destroyAllWindows() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Calculating detections dwell time in zones, using video file." + ) + parser.add_argument( + "--zone_configuration_path", + type=str, + required=True, + help="Path to the zone configuration JSON file.", + ) + parser.add_argument( + "--source_video_path", + type=str, + required=True, + help="Path to the source video file.", + ) + parser.add_argument( + "--model_id", type=str, default="yolov8s-640", help="Roboflow model ID." + ) + parser.add_argument( + "--confidence_threshold", + type=float, + default=0.3, + help="Confidence level for detections (0 to 1). Default is 0.3.", + ) + parser.add_argument( + "--iou_threshold", + default=0.7, + type=float, + help="IOU threshold for non-max suppression. Default is 0.7.", + ) + parser.add_argument( + "--classes", + nargs="*", + type=int, + default=[], + help="List of class IDs to track. If empty, all classes are tracked.", + ) + args = parser.parse_args() + + main( + source_video_path=args.source_video_path, + zone_configuration_path=args.zone_configuration_path, + model_id=args.model_id, + confidence=args.confidence_threshold, + iou=args.iou_threshold, + classes=args.classes, + ) diff --git a/examples/time_in_zone/inference_naive_stream_example.py b/examples/time_in_zone/inference_naive_stream_example.py new file mode 100644 index 000000000..dd2d68a5d --- /dev/null +++ b/examples/time_in_zone/inference_naive_stream_example.py @@ -0,0 +1,142 @@ +import argparse +from typing import List + +import cv2 +import numpy as np +from inference import get_model +from utils.general import find_in_list, get_stream_frames_generator, load_zones_config +from utils.timers import ClockBasedTimer + +import supervision as sv + +COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) +COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) +LABEL_ANNOTATOR = sv.LabelAnnotator( + color=COLORS, text_color=sv.Color.from_hex("#000000") +) + + +def main( + rtsp_url: str, + zone_configuration_path: str, + model_id: str, + confidence: float, + iou: float, + classes: List[int], +) -> None: + model = get_model(model_id=model_id) + tracker = sv.ByteTrack(minimum_matching_threshold=0.5) + frames_generator = get_stream_frames_generator(rtsp_url=rtsp_url) + fps_monitor = sv.FPSMonitor() + + frame = next(frames_generator) + resolution_wh = frame.shape[1], frame.shape[0] + + polygons = load_zones_config(file_path=zone_configuration_path) + zones = [ + sv.PolygonZone( + polygon=polygon, + frame_resolution_wh=resolution_wh, + triggering_anchors=(sv.Position.CENTER,), + ) + for polygon in polygons + ] + timers = [ClockBasedTimer() for _ in zones] + + for frame in frames_generator: + fps_monitor.tick() + fps = fps_monitor.fps + + results = model.infer(frame, confidence=confidence, iou_threshold=iou)[0] + detections = sv.Detections.from_inference(results) + detections = detections[find_in_list(detections.class_id, classes)] + detections = tracker.update_with_detections(detections) + + annotated_frame = frame.copy() + annotated_frame = sv.draw_text( + scene=annotated_frame, + text=f"{fps:.1f}", + text_anchor=sv.Point(40, 30), + background_color=sv.Color.from_hex("#A351FB"), + text_color=sv.Color.from_hex("#000000"), + ) + + for idx, zone in enumerate(zones): + annotated_frame = sv.draw_polygon( + scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx) + ) + + detections_in_zone = detections[zone.trigger(detections)] + time_in_zone = timers[idx].tick(detections_in_zone) + custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx) + + annotated_frame = COLOR_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + custom_color_lookup=custom_color_lookup, + ) + labels = [ + f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}" + for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone) + ] + annotated_frame = LABEL_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + labels=labels, + custom_color_lookup=custom_color_lookup, + ) + + cv2.imshow("Processed Video", annotated_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + cv2.destroyAllWindows() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Calculating detections dwell time in zones, using RTSP stream." + ) + parser.add_argument( + "--zone_configuration_path", + type=str, + required=True, + help="Path to the zone configuration JSON file.", + ) + parser.add_argument( + "--rtsp_url", + type=str, + required=True, + help="Complete RTSP URL for the video stream.", + ) + parser.add_argument( + "--model_id", type=str, default="yolov8s-640", help="Roboflow model ID." + ) + parser.add_argument( + "--confidence_threshold", + type=float, + default=0.3, + help="Confidence level for detections (0 to 1). Default is 0.3.", + ) + parser.add_argument( + "--iou_threshold", + default=0.7, + type=float, + help="IOU threshold for non-max suppression. Default is 0.7.", + ) + parser.add_argument( + "--classes", + nargs="*", + type=int, + default=[], + help="List of class IDs to track. If empty, all classes are tracked.", + ) + args = parser.parse_args() + + main( + rtsp_url=args.rtsp_url, + zone_configuration_path=args.zone_configuration_path, + model_id=args.model_id, + confidence=args.confidence_threshold, + iou=args.iou_threshold, + classes=args.classes, + ) diff --git a/examples/time_in_zone/inference_stream_example.py b/examples/time_in_zone/inference_stream_example.py new file mode 100644 index 000000000..e1fae57f9 --- /dev/null +++ b/examples/time_in_zone/inference_stream_example.py @@ -0,0 +1,158 @@ +import argparse +from typing import List + +import cv2 +import numpy as np +from inference import InferencePipeline +from inference.core.interfaces.camera.entities import VideoFrame +from utils.general import find_in_list, load_zones_config +from utils.timers import ClockBasedTimer + +import supervision as sv + +COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) +COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) +LABEL_ANNOTATOR = sv.LabelAnnotator( + color=COLORS, text_color=sv.Color.from_hex("#000000") +) + + +class CustomSink: + def __init__(self, zone_configuration_path: str, classes: List[int]): + self.classes = classes + self.tracker = sv.ByteTrack(minimum_matching_threshold=0.5) + self.fps_monitor = sv.FPSMonitor() + self.polygons = load_zones_config(file_path=zone_configuration_path) + self.timers = [ClockBasedTimer() for _ in self.polygons] + self.zones = None + + def on_prediction(self, result: dict, frame: VideoFrame) -> None: + if self.zones is None: + resolution_wh = frame.image.shape[1], frame.image.shape[0] + self.zones = [ + sv.PolygonZone( + polygon=polygon, + frame_resolution_wh=resolution_wh, + triggering_anchors=(sv.Position.CENTER,), + ) + for polygon in self.polygons + ] + + self.fps_monitor.tick() + fps = self.fps_monitor.fps + + detections = sv.Detections.from_inference(result) + detections = detections[find_in_list(detections.class_id, self.classes)] + detections = self.tracker.update_with_detections(detections) + + annotated_frame = frame.image.copy() + annotated_frame = sv.draw_text( + scene=annotated_frame, + text=f"{fps:.1f}", + text_anchor=sv.Point(40, 30), + background_color=sv.Color.from_hex("#A351FB"), + text_color=sv.Color.from_hex("#000000"), + ) + + for idx, zone in enumerate(self.zones): + annotated_frame = sv.draw_polygon( + scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx) + ) + + detections_in_zone = detections[zone.trigger(detections)] + time_in_zone = self.timers[idx].tick(detections_in_zone) + custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx) + + annotated_frame = COLOR_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + custom_color_lookup=custom_color_lookup, + ) + labels = [ + f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}" + for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone) + ] + annotated_frame = LABEL_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + labels=labels, + custom_color_lookup=custom_color_lookup, + ) + cv2.imshow("Processed Video", annotated_frame) + cv2.waitKey(1) + + +def main( + rtsp_url: str, + zone_configuration_path: str, + model_id: str, + confidence: float, + iou: float, + classes: List[int], +) -> None: + sink = CustomSink(zone_configuration_path=zone_configuration_path, classes=classes) + + pipeline = InferencePipeline.init( + model_id=model_id, + video_reference=rtsp_url, + on_prediction=sink.on_prediction, + confidence=confidence, + iou_threshold=iou, + ) + + pipeline.start() + + try: + pipeline.join() + except KeyboardInterrupt: + pipeline.terminate() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Calculating detections dwell time in zones, using RTSP stream." + ) + parser.add_argument( + "--zone_configuration_path", + type=str, + required=True, + help="Path to the zone configuration JSON file.", + ) + parser.add_argument( + "--rtsp_url", + type=str, + required=True, + help="Complete RTSP URL for the video stream.", + ) + parser.add_argument( + "--model_id", type=str, default="yolov8s-640", help="Roboflow model ID." + ) + parser.add_argument( + "--confidence_threshold", + type=float, + default=0.3, + help="Confidence level for detections (0 to 1). Default is 0.3.", + ) + parser.add_argument( + "--iou_threshold", + default=0.7, + type=float, + help="IOU threshold for non-max suppression. Default is 0.7.", + ) + parser.add_argument( + "--classes", + nargs="*", + type=int, + default=[], + help="List of class IDs to track. If empty, all classes are tracked.", + ) + args = parser.parse_args() + + main( + rtsp_url=args.rtsp_url, + zone_configuration_path=args.zone_configuration_path, + model_id=args.model_id, + confidence=args.confidence_threshold, + iou=args.iou_threshold, + classes=args.classes, + ) diff --git a/examples/time_in_zone/requirements.txt b/examples/time_in_zone/requirements.txt new file mode 100644 index 000000000..fa17b9864 --- /dev/null +++ b/examples/time_in_zone/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +supervision +ultralytics +inference +pytube diff --git a/examples/time_in_zone/scripts/download_from_youtube.py b/examples/time_in_zone/scripts/download_from_youtube.py new file mode 100644 index 000000000..ff7d94c3f --- /dev/null +++ b/examples/time_in_zone/scripts/download_from_youtube.py @@ -0,0 +1,46 @@ +import argparse +import os +from typing import Optional + +from pytube import YouTube + + +def main(url: str, output_path: Optional[str], file_name: Optional[str]) -> None: + yt = YouTube(url) + stream = yt.streams.get_highest_resolution() + + if not os.path.exists(output_path): + os.makedirs(output_path) + + stream.download(output_path=output_path, filename=file_name) + final_name = file_name if file_name else yt.title + final_path = output_path if output_path else "current directory" + print(f"Download completed! Video saved as '{final_name}' in '{final_path}'.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Download a specific YouTube video by providing its URL." + ) + parser.add_argument( + "--url", + type=str, + required=True, + help="The full URL of the YouTube video you wish to download.", + ) + parser.add_argument( + "--output_path", + type=str, + default="data/source", + required=False, + help="Optional. Specifies the directory where the video will be saved.", + ) + parser.add_argument( + "--file_name", + type=str, + default="video.mp4", + required=False, + help="Optional. Sets the name of the saved video file.", + ) + args = parser.parse_args() + main(url=args.url, output_path=args.output_path, file_name=args.file_name) diff --git a/examples/time_in_zone/scripts/draw_zones.py b/examples/time_in_zone/scripts/draw_zones.py new file mode 100644 index 000000000..3afae9e70 --- /dev/null +++ b/examples/time_in_zone/scripts/draw_zones.py @@ -0,0 +1,176 @@ +import argparse +import json +import os +from typing import Any, Optional, Tuple + +import cv2 +import numpy as np + +import supervision as sv + +KEY_ENTER = 13 +KEY_NEWLINE = 10 +KEY_ESCAPE = 27 +KEY_QUIT = ord("q") +KEY_SAVE = ord("s") + +THICKNESS = 2 +COLORS = sv.ColorPalette.DEFAULT +WINDOW_NAME = "Draw Zones" +POLYGONS = [[]] + +current_mouse_position: Optional[Tuple[int, int]] = None + + +def resolve_source(source_path: str) -> Optional[np.ndarray]: + if not os.path.exists(source_path): + return None + + image = cv2.imread(source_path) + if image is not None: + return image + + frame_generator = sv.get_video_frames_generator(source_path=source_path) + frame = next(frame_generator) + return frame + + +def mouse_event(event: int, x: int, y: int, flags: int, param: Any) -> None: + global current_mouse_position + if event == cv2.EVENT_MOUSEMOVE: + current_mouse_position = (x, y) + elif event == cv2.EVENT_LBUTTONDOWN: + POLYGONS[-1].append((x, y)) + + +def redraw(image: np.ndarray, original_image: np.ndarray) -> None: + global POLYGONS, current_mouse_position + image[:] = original_image.copy() + for idx, polygon in enumerate(POLYGONS): + color = ( + COLORS.by_idx(idx).as_bgr() + if idx < len(POLYGONS) - 1 + else sv.Color.WHITE.as_bgr() + ) + + if len(polygon) > 1: + for i in range(1, len(polygon)): + cv2.line( + img=image, + pt1=polygon[i - 1], + pt2=polygon[i], + color=color, + thickness=THICKNESS, + ) + if idx < len(POLYGONS) - 1: + cv2.line( + img=image, + pt1=polygon[-1], + pt2=polygon[0], + color=color, + thickness=THICKNESS, + ) + if idx == len(POLYGONS) - 1 and current_mouse_position is not None and polygon: + cv2.line( + img=image, + pt1=polygon[-1], + pt2=current_mouse_position, + color=color, + thickness=THICKNESS, + ) + cv2.imshow(WINDOW_NAME, image) + + +def close_and_finalize_polygon(image: np.ndarray, original_image: np.ndarray) -> None: + if len(POLYGONS[-1]) > 2: + cv2.line( + img=image, + pt1=POLYGONS[-1][-1], + pt2=POLYGONS[-1][0], + color=COLORS.by_idx(0).as_bgr(), + thickness=THICKNESS, + ) + POLYGONS.append([]) + image[:] = original_image.copy() + redraw_polygons(image) + cv2.imshow(WINDOW_NAME, image) + + +def redraw_polygons(image: np.ndarray) -> None: + for idx, polygon in enumerate(POLYGONS[:-1]): + if len(polygon) > 1: + color = COLORS.by_idx(idx).as_bgr() + for i in range(len(polygon) - 1): + cv2.line( + img=image, + pt1=polygon[i], + pt2=polygon[i + 1], + color=color, + thickness=THICKNESS, + ) + cv2.line( + img=image, + pt1=polygon[-1], + pt2=polygon[0], + color=color, + thickness=THICKNESS, + ) + + +def save_polygons_to_json(polygons, target_path): + data_to_save = polygons if polygons[-1] else polygons[:-1] + with open(target_path, "w") as f: + json.dump(data_to_save, f) + + +def main(source_path: str, zone_configuration_path: str) -> None: + global current_mouse_position + original_image = resolve_source(source_path=source_path) + if original_image is None: + print("Failed to load source image.") + return + + image = original_image.copy() + cv2.imshow(WINDOW_NAME, image) + cv2.setMouseCallback(WINDOW_NAME, mouse_event, image) + + while True: + key = cv2.waitKey(1) & 0xFF + if key == KEY_ENTER or key == KEY_NEWLINE: + close_and_finalize_polygon(image, original_image) + elif key == KEY_ESCAPE: + POLYGONS[-1] = [] + current_mouse_position = None + elif key == KEY_SAVE: + save_polygons_to_json(POLYGONS, zone_configuration_path) + print(f"Polygons saved to {zone_configuration_path}") + break + redraw(image, original_image) + if key == KEY_QUIT: + break + + cv2.destroyAllWindows() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Interactively draw polygons on images or video frames and save " + "the annotations." + ) + parser.add_argument( + "--source_path", + type=str, + required=True, + help="Path to the source image or video file for drawing polygons.", + ) + parser.add_argument( + "--zone_configuration_path", + type=str, + required=True, + help="Path where the polygon annotations will be saved as a JSON file.", + ) + arguments = parser.parse_args() + main( + source_path=arguments.source_path, + zone_configuration_path=arguments.zone_configuration_path, + ) diff --git a/examples/time_in_zone/scripts/stream_from_file.py b/examples/time_in_zone/scripts/stream_from_file.py new file mode 100644 index 000000000..23588f680 --- /dev/null +++ b/examples/time_in_zone/scripts/stream_from_file.py @@ -0,0 +1,104 @@ +import argparse +import os +import subprocess +import tempfile +from glob import glob +from threading import Thread + +import yaml + +SERVER_CONFIG = {"protocols": ["tcp"], "paths": {"all": {"source": "publisher"}}} +BASE_STREAM_URL = "rtsp://localhost:8554/live" + + +def main(video_directory: str, number_of_streams: int) -> None: + video_files = find_video_files_in_directory(video_directory, number_of_streams) + try: + with tempfile.TemporaryDirectory() as temporary_directory: + config_file_path = create_server_config_file(temporary_directory) + run_rtsp_server(config_path=config_file_path) + stream_videos(video_files) + finally: + stop_rtsp_server() + + +def find_video_files_in_directory(directory: str, limit: int) -> list: + video_formats = ["*.mp4", "*.webm"] + video_paths = [] + for video_format in video_formats: + video_paths.extend(glob(os.path.join(directory, video_format))) + return video_paths[:limit] + + +def create_server_config_file(directory: str) -> str: + config_path = os.path.join(directory, "rtsp-simple-server.yml") + with open(config_path, "w") as config_file: + yaml.dump(SERVER_CONFIG, config_file) + return config_path + + +def run_rtsp_server(config_path: str) -> None: + command = ( + "docker run --rm --name rtsp_server -d -v " + f"{config_path}:/rtsp-simple-server.yml -p 8554:8554 " + "aler9/rtsp-simple-server:v1.3.0" + ) + if run_command(command.split()) != 0: + raise RuntimeError("Could not start the RTSP server!") + + +def stop_rtsp_server() -> None: + run_command("docker kill rtsp_server".split()) + + +def stream_videos(video_files: list) -> None: + threads = [] + for index, video_file in enumerate(video_files): + stream_url = f"{BASE_STREAM_URL}{index}.stream" + print(f"Streaming {video_file} under {stream_url}") + thread = stream_video_to_url(video_file, stream_url) + threads.append(thread) + for thread in threads: + thread.join() + + +def stream_video_to_url(video_path: str, stream_url: str) -> Thread: + command = ( + f"ffmpeg -re -stream_loop -1 -i {video_path} " + f"-f rtsp -rtsp_transport tcp {stream_url}" + ) + return run_command_in_thread(command.split()) + + +def run_command_in_thread(command: list) -> Thread: + thread = Thread(target=run_command, args=(command,)) + thread.start() + return thread + + +def run_command(command: list) -> int: + process = subprocess.run(command) + return process.returncode + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Script to stream videos using RTSP protocol." + ) + parser.add_argument( + "--video_directory", + type=str, + required=True, + help="Directory containing video files to stream.", + ) + parser.add_argument( + "--number_of_streams", + type=int, + default=6, + help="Number of video files to stream.", + ) + arguments = parser.parse_args() + main( + video_directory=arguments.video_directory, + number_of_streams=arguments.number_of_streams, + ) diff --git a/examples/time_in_zone/ultralytics_file_example.py b/examples/time_in_zone/ultralytics_file_example.py new file mode 100644 index 000000000..fe8ce58df --- /dev/null +++ b/examples/time_in_zone/ultralytics_file_example.py @@ -0,0 +1,144 @@ +import argparse +from typing import List + +import cv2 +import numpy as np +from ultralytics import YOLO +from utils.general import find_in_list, load_zones_config +from utils.timers import FPSBasedTimer + +import supervision as sv + +COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) +COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) +LABEL_ANNOTATOR = sv.LabelAnnotator( + color=COLORS, text_color=sv.Color.from_hex("#000000") +) + + +def main( + source_video_path: str, + zone_configuration_path: str, + weights: str, + device: str, + confidence: float, + iou: float, + classes: List[int], +) -> None: + model = YOLO(weights) + tracker = sv.ByteTrack(minimum_matching_threshold=0.5) + video_info = sv.VideoInfo.from_video_path(video_path=source_video_path) + frames_generator = sv.get_video_frames_generator(source_video_path) + + frame = next(frames_generator) + resolution_wh = frame.shape[1], frame.shape[0] + + polygons = load_zones_config(file_path=zone_configuration_path) + zones = [ + sv.PolygonZone( + polygon=polygon, + frame_resolution_wh=resolution_wh, + triggering_anchors=(sv.Position.CENTER,), + ) + for polygon in polygons + ] + timers = [FPSBasedTimer(video_info.fps) for _ in zones] + + for frame in frames_generator: + results = model(frame, verbose=False, device=device, conf=confidence)[0] + detections = sv.Detections.from_ultralytics(results) + detections = detections[find_in_list(detections.class_id, classes)] + detections = detections.with_nms(threshold=iou) + detections = tracker.update_with_detections(detections) + + annotated_frame = frame.copy() + + for idx, zone in enumerate(zones): + annotated_frame = sv.draw_polygon( + scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx) + ) + + detections_in_zone = detections[zone.trigger(detections)] + time_in_zone = timers[idx].tick(detections_in_zone) + custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx) + + annotated_frame = COLOR_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + custom_color_lookup=custom_color_lookup, + ) + labels = [ + f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}" + for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone) + ] + annotated_frame = LABEL_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + labels=labels, + custom_color_lookup=custom_color_lookup, + ) + + cv2.imshow("Processed Video", annotated_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + cv2.destroyAllWindows() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Calculating detections dwell time in zones, using video file." + ) + parser.add_argument( + "--zone_configuration_path", + type=str, + required=True, + help="Path to the zone configuration JSON file.", + ) + parser.add_argument( + "--source_video_path", + type=str, + required=True, + help="Path to the source video file.", + ) + parser.add_argument( + "--weights", + type=str, + default="yolov8s.pt", + help="Path to the model weights file. Default is 'yolov8s.pt'.", + ) + parser.add_argument( + "--device", + type=str, + default="cpu", + help="Computation device ('cpu', 'mps' or 'cuda'). Default is 'cpu'.", + ) + parser.add_argument( + "--confidence_threshold", + type=float, + default=0.3, + help="Confidence level for detections (0 to 1). Default is 0.3.", + ) + parser.add_argument( + "--iou_threshold", + default=0.7, + type=float, + help="IOU threshold for non-max suppression. Default is 0.7.", + ) + parser.add_argument( + "--classes", + nargs="*", + type=int, + default=[], + help="List of class IDs to track. If empty, all classes are tracked.", + ) + args = parser.parse_args() + + main( + source_video_path=args.source_video_path, + zone_configuration_path=args.zone_configuration_path, + weights=args.weights, + device=args.device, + confidence=args.confidence_threshold, + iou=args.iou_threshold, + classes=args.classes, + ) diff --git a/examples/time_in_zone/ultralytics_naive_stream_example.py b/examples/time_in_zone/ultralytics_naive_stream_example.py new file mode 100644 index 000000000..1cc82b446 --- /dev/null +++ b/examples/time_in_zone/ultralytics_naive_stream_example.py @@ -0,0 +1,154 @@ +import argparse +from typing import List + +import cv2 +import numpy as np +from ultralytics import YOLO +from utils.general import find_in_list, get_stream_frames_generator, load_zones_config +from utils.timers import ClockBasedTimer + +import supervision as sv + +COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) +COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) +LABEL_ANNOTATOR = sv.LabelAnnotator( + color=COLORS, text_color=sv.Color.from_hex("#000000") +) + + +def main( + rtsp_url: str, + zone_configuration_path: str, + weights: str, + device: str, + confidence: float, + iou: float, + classes: List[int], +) -> None: + model = YOLO(weights) + tracker = sv.ByteTrack(minimum_matching_threshold=0.5) + frames_generator = get_stream_frames_generator(rtsp_url=rtsp_url) + fps_monitor = sv.FPSMonitor() + + frame = next(frames_generator) + resolution_wh = frame.shape[1], frame.shape[0] + + polygons = load_zones_config(file_path=zone_configuration_path) + zones = [ + sv.PolygonZone( + polygon=polygon, + frame_resolution_wh=resolution_wh, + triggering_anchors=(sv.Position.CENTER,), + ) + for polygon in polygons + ] + timers = [ClockBasedTimer() for _ in zones] + + for frame in frames_generator: + fps_monitor.tick() + fps = fps_monitor.fps + + results = model(frame, verbose=False, device=device, conf=confidence)[0] + detections = sv.Detections.from_ultralytics(results) + detections = detections[find_in_list(detections.class_id, classes)] + detections = detections.with_nms(threshold=iou) + detections = tracker.update_with_detections(detections) + + annotated_frame = frame.copy() + annotated_frame = sv.draw_text( + scene=annotated_frame, + text=f"{fps:.1f}", + text_anchor=sv.Point(40, 30), + background_color=sv.Color.from_hex("#A351FB"), + text_color=sv.Color.from_hex("#000000"), + ) + + for idx, zone in enumerate(zones): + annotated_frame = sv.draw_polygon( + scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx) + ) + + detections_in_zone = detections[zone.trigger(detections)] + time_in_zone = timers[idx].tick(detections_in_zone) + custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx) + + annotated_frame = COLOR_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + custom_color_lookup=custom_color_lookup, + ) + labels = [ + f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}" + for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone) + ] + annotated_frame = LABEL_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + labels=labels, + custom_color_lookup=custom_color_lookup, + ) + + cv2.imshow("Processed Video", annotated_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + cv2.destroyAllWindows() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Calculating detections dwell time in zones, using RTSP stream." + ) + parser.add_argument( + "--zone_configuration_path", + type=str, + required=True, + help="Path to the zone configuration JSON file.", + ) + parser.add_argument( + "--rtsp_url", + type=str, + required=True, + help="Complete RTSP URL for the video stream.", + ) + parser.add_argument( + "--weights", + type=str, + default="yolov8s.pt", + help="Path to the model weights file. Default is 'yolov8s.pt'.", + ) + parser.add_argument( + "--device", + type=str, + default="cpu", + help="Computation device ('cpu', 'mps' or 'cuda'). Default is 'cpu'.", + ) + parser.add_argument( + "--confidence_threshold", + type=float, + default=0.3, + help="Confidence level for detections (0 to 1). Default is 0.3.", + ) + parser.add_argument( + "--iou_threshold", + default=0.7, + type=float, + help="IOU threshold for non-max suppression. Default is 0.7.", + ) + parser.add_argument( + "--classes", + nargs="*", + type=int, + default=[], + help="List of class IDs to track. If empty, all classes are tracked.", + ) + args = parser.parse_args() + + main( + rtsp_url=args.rtsp_url, + zone_configuration_path=args.zone_configuration_path, + weights=args.weights, + device=args.device, + confidence=args.confidence_threshold, + iou=args.iou_threshold, + classes=args.classes, + ) diff --git a/examples/time_in_zone/ultralytics_stream_example.py b/examples/time_in_zone/ultralytics_stream_example.py new file mode 100644 index 000000000..25dc874f8 --- /dev/null +++ b/examples/time_in_zone/ultralytics_stream_example.py @@ -0,0 +1,173 @@ +import argparse +from typing import List + +import cv2 +import numpy as np +from inference import InferencePipeline +from inference.core.interfaces.camera.entities import VideoFrame +from ultralytics import YOLO +from utils.general import find_in_list, load_zones_config +from utils.timers import ClockBasedTimer + +import supervision as sv + +COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) +COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) +LABEL_ANNOTATOR = sv.LabelAnnotator( + color=COLORS, text_color=sv.Color.from_hex("#000000") +) + + +class CustomSink: + def __init__(self, zone_configuration_path: str, classes: List[int]): + self.classes = classes + self.tracker = sv.ByteTrack(minimum_matching_threshold=0.8) + self.fps_monitor = sv.FPSMonitor() + self.polygons = load_zones_config(file_path=zone_configuration_path) + self.timers = [ClockBasedTimer() for _ in self.polygons] + self.zones = None + + def on_prediction(self, detections: sv.Detections, frame: VideoFrame) -> None: + if self.zones is None: + resolution_wh = frame.image.shape[1], frame.image.shape[0] + self.zones = [ + sv.PolygonZone( + polygon=polygon, + frame_resolution_wh=resolution_wh, + triggering_anchors=(sv.Position.CENTER,), + ) + for polygon in self.polygons + ] + + self.fps_monitor.tick() + fps = self.fps_monitor.fps + + detections = detections[find_in_list(detections.class_id, self.classes)] + detections = self.tracker.update_with_detections(detections) + + annotated_frame = frame.image.copy() + annotated_frame = sv.draw_text( + scene=annotated_frame, + text=f"{fps:.1f}", + text_anchor=sv.Point(40, 30), + background_color=sv.Color.from_hex("#A351FB"), + text_color=sv.Color.from_hex("#000000"), + ) + + for idx, zone in enumerate(self.zones): + annotated_frame = sv.draw_polygon( + scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx) + ) + + detections_in_zone = detections[zone.trigger(detections)] + time_in_zone = self.timers[idx].tick(detections_in_zone) + custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx) + + annotated_frame = COLOR_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + custom_color_lookup=custom_color_lookup, + ) + labels = [ + f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}" + for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone) + ] + annotated_frame = LABEL_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + labels=labels, + custom_color_lookup=custom_color_lookup, + ) + cv2.imshow("Processed Video", annotated_frame) + cv2.waitKey(1) + + +def main( + rtsp_url: str, + zone_configuration_path: str, + weights: str, + device: str, + confidence: float, + iou: float, + classes: List[int], +) -> None: + model = YOLO(weights) + + def inference_callback(frame: VideoFrame) -> sv.Detections: + results = model(frame.image, verbose=False, conf=confidence, device=device)[0] + return sv.Detections.from_ultralytics(results).with_nms(threshold=iou) + + sink = CustomSink(zone_configuration_path=zone_configuration_path, classes=classes) + + pipeline = InferencePipeline.init_with_custom_logic( + video_reference=rtsp_url, + on_video_frame=inference_callback, + on_prediction=sink.on_prediction, + ) + + pipeline.start() + + try: + pipeline.join() + except KeyboardInterrupt: + pipeline.terminate() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Calculating detections dwell time in zones, using RTSP stream." + ) + parser.add_argument( + "--zone_configuration_path", + type=str, + required=True, + help="Path to the zone configuration JSON file.", + ) + parser.add_argument( + "--rtsp_url", + type=str, + required=True, + help="Complete RTSP URL for the video stream.", + ) + parser.add_argument( + "--weights", + type=str, + default="yolov8s.pt", + help="Path to the model weights file. Default is 'yolov8s.pt'.", + ) + parser.add_argument( + "--device", + type=str, + default="cpu", + help="Computation device ('cpu', 'mps' or 'cuda'). Default is 'cpu'.", + ) + parser.add_argument( + "--confidence_threshold", + type=float, + default=0.3, + help="Confidence level for detections (0 to 1). Default is 0.3.", + ) + parser.add_argument( + "--iou_threshold", + default=0.7, + type=float, + help="IOU threshold for non-max suppression. Default is 0.7.", + ) + parser.add_argument( + "--classes", + nargs="*", + type=int, + default=[], + help="List of class IDs to track. If empty, all classes are tracked.", + ) + args = parser.parse_args() + + main( + rtsp_url=args.rtsp_url, + zone_configuration_path=args.zone_configuration_path, + weights=args.weights, + device=args.device, + confidence=args.confidence_threshold, + iou=args.iou_threshold, + classes=args.classes, + ) diff --git a/examples/time_in_zone/utils/__init__.py b/examples/time_in_zone/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/time_in_zone/utils/general.py b/examples/time_in_zone/utils/general.py new file mode 100644 index 000000000..803d116aa --- /dev/null +++ b/examples/time_in_zone/utils/general.py @@ -0,0 +1,66 @@ +import json +from typing import Generator, List + +import cv2 +import numpy as np + + +def load_zones_config(file_path: str) -> List[np.ndarray]: + """ + Load polygon zone configurations from a JSON file. + + This function reads a JSON file which contains polygon coordinates, and + converts them into a list of NumPy arrays. Each polygon is represented as + a NumPy array of coordinates. + + Args: + file_path (str): The path to the JSON configuration file. + + Returns: + List[np.ndarray]: A list of polygons, each represented as a NumPy array. + """ + with open(file_path, "r") as file: + data = json.load(file) + return [np.array(polygon, np.int32) for polygon in data] + + +def find_in_list(array: np.ndarray, search_list: List[int]) -> np.ndarray: + """Determines if elements of a numpy array are present in a list. + + Args: + array (np.ndarray): The numpy array of integers to check. + search_list (List[int]): The list of integers to search within. + + Returns: + np.ndarray: A numpy array of booleans, where each boolean indicates whether + the corresponding element in `array` is found in `search_list`. + """ + if not search_list: + return np.ones(array.shape, dtype=bool) + else: + return np.isin(array, search_list) + + +def get_stream_frames_generator(rtsp_url: str) -> Generator[np.ndarray, None, None]: + """ + Generator function to yield frames from an RTSP stream. + + Args: + rtsp_url (str): URL of the RTSP video stream. + + Yields: + np.ndarray: The next frame from the video stream. + """ + cap = cv2.VideoCapture(rtsp_url) + if not cap.isOpened(): + raise Exception("Error: Could not open video stream.") + + try: + while True: + ret, frame = cap.read() + if not ret: + print("End of stream or error reading frame.") + break + yield frame + finally: + cap.release() diff --git a/examples/time_in_zone/utils/timers.py b/examples/time_in_zone/utils/timers.py new file mode 100644 index 000000000..cb5b471f6 --- /dev/null +++ b/examples/time_in_zone/utils/timers.py @@ -0,0 +1,88 @@ +from datetime import datetime +from typing import Dict + +import numpy as np + +import supervision as sv + + +class FPSBasedTimer: + """ + A timer that calculates the duration each object has been detected based on frames + per second (FPS). + + Attributes: + fps (int): The frame rate of the video stream, used to calculate time durations. + frame_id (int): The current frame number in the sequence. + tracker_id2frame_id (Dict[int, int]): Maps each tracker's ID to the frame number + at which it was first detected. + """ + + def __init__(self, fps: int = 30) -> None: + """Initializes the FPSBasedTimer with the specified frames per second rate. + + Args: + fps (int, optional): The frame rate of the video stream. Defaults to 30. + """ + self.fps = fps + self.frame_id = 0 + self.tracker_id2frame_id: Dict[int, int] = {} + + def tick(self, detections: sv.Detections) -> np.ndarray: + """Processes the current frame, updating time durations for each tracker. + + Args: + detections: The detections for the current frame, including tracker IDs. + + Returns: + np.ndarray: Time durations (in seconds) for each detected tracker, since + their first detection. + """ + self.frame_id += 1 + times = [] + + for tracker_id in detections.tracker_id: + self.tracker_id2frame_id.setdefault(tracker_id, self.frame_id) + + start_frame_id = self.tracker_id2frame_id[tracker_id] + time_duration = (self.frame_id - start_frame_id) / self.fps + times.append(time_duration) + + return np.array(times) + + +class ClockBasedTimer: + """ + A timer that calculates the duration each object has been detected based on the + system clock. + + Attributes: + tracker_id2start_time (Dict[int, datetime]): Maps each tracker's ID to the + datetime when it was first detected. + """ + + def __init__(self) -> None: + """Initializes the ClockBasedTimer.""" + self.tracker_id2start_time: Dict[int, datetime] = {} + + def tick(self, detections: sv.Detections) -> np.ndarray: + """Processes the current frame, updating time durations for each tracker. + + Args: + detections: The detections for the current frame, including tracker IDs. + + Returns: + np.ndarray: Time durations (in seconds) for each detected tracker, since + their first detection. + """ + current_time = datetime.now() + times = [] + + for tracker_id in detections.tracker_id: + self.tracker_id2start_time.setdefault(tracker_id, current_time) + + start_time = self.tracker_id2start_time[tracker_id] + time_duration = (current_time - start_time).total_seconds() + times.append(time_duration) + + return np.array(times)