From 725d95c6ce9cde2455315fb12a6f9717805a564e Mon Sep 17 00:00:00 2001
From: Piotr Skalski <piotr.skalski92@gmail.com>
Date: Wed, 27 Mar 2024 00:12:12 +0100
Subject: [PATCH] time in zone / dwell time demo  (#1026)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* initial commit

* initial stream processing script

* fix(pre_commit): 🎨 auto format pre-commit hooks

* work in progress

* fix(pre_commit): 🎨 auto format pre-commit hooks

* work in progress

* fix(pre_commit): 🎨 auto format pre-commit hooks

* ultralytics file and stream versions ready

* fix(pre_commit): 🎨 auto format pre-commit hooks

* ultralytics file and stream versions ready

* class based filtering

* fix(pre_commit): 🎨 auto format pre-commit hooks

* timer improvements

* inference static file processing script

* fix(pre_commit): 🎨 auto format pre-commit hooks

* inference stream processing script

* fix(pre_commit): 🎨 auto format pre-commit hooks

* all scripts are working

* fix(pre_commit): 🎨 auto format pre-commit hooks

* all video or stream processing scripts refactored

* fix(pre_commit): 🎨 auto format pre-commit hooks

* README.md update

* fix(pre_commit): 🎨 auto format pre-commit hooks

* initial version of draw ones script

* fix(pre_commit): 🎨 auto format pre-commit hooks

* loading image or video, drawing multiple polygons, quiting app, and aborting currently drawn polygon works.

* fix(pre_commit): 🎨 auto format pre-commit hooks

* loading image or video, drawing multiple polygons, quiting app, and aborting currently drawn polygon works.

* fix(pre_commit): 🎨 auto format pre-commit hooks

* Dynamic drawing of currently edited polygon.

* fix(pre_commit): 🎨 auto format pre-commit hooks

* Drawing tool is ready.

* fix(pre_commit): 🎨 auto format pre-commit hooks

* Final touches.

* fix(pre_commit): 🎨 auto format pre-commit hooks

* Update README.md

* fix(pre_commit): 🎨 auto format pre-commit hooks

* Update README.md

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 examples/time_in_zone/.gitignore              |   9 +
 examples/time_in_zone/README.md               | 262 ++++++++++++++++++
 .../time_in_zone/inference_file_example.py    | 132 +++++++++
 .../inference_naive_stream_example.py         | 142 ++++++++++
 .../time_in_zone/inference_stream_example.py  | 158 +++++++++++
 examples/time_in_zone/requirements.txt        |   5 +
 .../scripts/download_from_youtube.py          |  46 +++
 examples/time_in_zone/scripts/draw_zones.py   | 176 ++++++++++++
 .../time_in_zone/scripts/stream_from_file.py  | 104 +++++++
 .../time_in_zone/ultralytics_file_example.py  | 144 ++++++++++
 .../ultralytics_naive_stream_example.py       | 154 ++++++++++
 .../ultralytics_stream_example.py             | 173 ++++++++++++
 examples/time_in_zone/utils/__init__.py       |   0
 examples/time_in_zone/utils/general.py        |  66 +++++
 examples/time_in_zone/utils/timers.py         |  88 ++++++
 15 files changed, 1659 insertions(+)
 create mode 100644 examples/time_in_zone/.gitignore
 create mode 100644 examples/time_in_zone/README.md
 create mode 100644 examples/time_in_zone/inference_file_example.py
 create mode 100644 examples/time_in_zone/inference_naive_stream_example.py
 create mode 100644 examples/time_in_zone/inference_stream_example.py
 create mode 100644 examples/time_in_zone/requirements.txt
 create mode 100644 examples/time_in_zone/scripts/download_from_youtube.py
 create mode 100644 examples/time_in_zone/scripts/draw_zones.py
 create mode 100644 examples/time_in_zone/scripts/stream_from_file.py
 create mode 100644 examples/time_in_zone/ultralytics_file_example.py
 create mode 100644 examples/time_in_zone/ultralytics_naive_stream_example.py
 create mode 100644 examples/time_in_zone/ultralytics_stream_example.py
 create mode 100644 examples/time_in_zone/utils/__init__.py
 create mode 100644 examples/time_in_zone/utils/general.py
 create mode 100644 examples/time_in_zone/utils/timers.py

diff --git a/examples/time_in_zone/.gitignore b/examples/time_in_zone/.gitignore
new file mode 100644
index 000000000..34efd9e06
--- /dev/null
+++ b/examples/time_in_zone/.gitignore
@@ -0,0 +1,9 @@
+data/
+venv*/
+*.pt
+*.pth
+*.mp4
+*.mov
+*.png
+*.jpg
+*.jpeg
diff --git a/examples/time_in_zone/README.md b/examples/time_in_zone/README.md
new file mode 100644
index 000000000..3898c4525
--- /dev/null
+++ b/examples/time_in_zone/README.md
@@ -0,0 +1,262 @@
+# time in zone
+
+## 👋 hello
+
+Practical demonstration on leveraging computer vision for analyzing wait times and
+monitoring the duration that objects or individuals spend in predefined areas of video
+frames. This example project, perfect for retail analytics or traffic management
+applications.
+
+https://github.com/roboflow/supervision/assets/26109316/d051cc8a-dd15-41d4-aa36-d38b86334c39
+
+## 💻 install
+
+- clone repository and navigate to example directory
+
+  ```bash
+  git clone https://github.com/roboflow/supervision.git
+  cd supervision/examples/time_in_zone
+  ```
+
+- setup python environment and activate it [optional]
+
+  ```bash
+  python3 -m venv venv
+  source venv/bin/activate
+  ```
+
+- install required dependencies
+
+  ```bash
+  pip install -r requirements.txt
+  ```
+
+## 🛠 scripts
+
+### `download_from_youtube`
+
+This script allows you to download a video from YouTube.
+
+- `--url`: The full URL of the YouTube video you wish to download.
+- `--output_path` (optional): Specifies the directory where the video will be saved.
+- `--file_name` (optional): Sets the name of the saved video file.
+
+```bash
+python scripts/download_from_youtube.py \
+--url "https://youtu.be/8zyEwAa50Q" \
+--output_path "data/checkout" \
+--file_name "video.mp4"
+```
+
+```bash
+python scripts/download_from_youtube.py \
+--url "https://youtu.be/MNn9qKG2UFI" \
+--output_path "data/traffic" \
+--file_name "video.mp4"
+```
+
+### `stream_from_file`
+
+This script allows you to stream video files from a directory. It's an awesome way to
+mock a live video stream for local testing. Video will be streamed in a loop under
+`rtsp://localhost:8554/live0.stream` URL. This script requires docker to be installed.
+
+- `--video_directory`: Directory containing video files to stream.
+- `--number_of_streams`: Number of video files to stream.
+
+```bash
+python scripts/stream_from_file.py \
+--video_directory "data/checkout" \
+--number_of_streams 1
+```
+
+```bash
+python scripts/stream_from_file.py \
+--video_directory "data/traffic" \
+--number_of_streams 1
+```
+
+### `draw_zones`
+
+If you want to test zone time in zone analysis on your own video, you can use this
+script to design custom zones and save results as a JSON file. The script will open a
+window where you can draw polygons on the source image or video file. The polygons will
+be saved as a JSON file.
+
+- `--source_path`: Path to the source image or video file for drawing polygons.
+- `--zone_configuration_path`: Path where the polygon annotations will be saved as a JSON file.
+
+
+- `enter` - finish drawing the current polygon.
+- `escape` - cancel drawing the current polygon.
+- `q` - quit the drawing window.
+- `s` - save zone configuration to a JSON file.
+
+```bash
+python scripts/draw_zones.py \
+--source_path "data/checkout/video.mp4" \
+--zone_configuration_path "data/checkout/custom_config.json"
+```
+
+```bash
+python scripts/draw_zones.py \
+--source_path "data/traffic/video.mp4" \
+--zone_configuration_path "data/traffic/custom_config.json"
+```
+
+https://github.com/roboflow/supervision/assets/26109316/9d514c9e-2a61-418b-ae49-6ac1ad6ae5ac
+
+## 🎬 video & stream processing
+
+### `inference_file_example`
+
+Script to run object detection on a video file using the Roboflow Inference model.
+
+  - `--zone_configuration_path`: Path to the zone configuration JSON file.
+  - `--source_video_path`: Path to the source video file.
+  - `--model_id`: Roboflow model ID.
+  - `--classes`: List of class IDs to track. If empty, all classes are tracked.
+  - `--confidence_threshold`: Confidence level for detections (`0` to `1`). Default is `0.3`.
+  - `--iou_threshold`: IOU threshold for non-max suppression. Default is `0.7`.
+
+```bash
+python inference_file_example.py \
+--zone_configuration_path "data/checkout/config.json" \
+--source_video_path "data/checkout/video.mp4" \
+--model_id "yolov8x-640" \
+--classes 0 \
+--confidence_threshold 0.3 \
+--iou_threshold 0.7
+```
+
+https://github.com/roboflow/supervision/assets/26109316/d051cc8a-dd15-41d4-aa36-d38b86334c39
+
+```bash
+python inference_file_example.py \
+--zone_configuration_path "data/traffic/config.json" \
+--source_video_path "data/traffic/video.mp4" \
+--model_id "yolov8x-640" \
+--classes 2 5 6 7 \
+--confidence_threshold 0.3 \
+--iou_threshold 0.7
+```
+
+https://github.com/roboflow/supervision/assets/26109316/5ec896d7-4b39-4426-8979-11e71666878b
+
+### `inference_stream_example`
+
+Script to run object detection on a video stream using the Roboflow Inference model.
+
+  - `--zone_configuration_path`: Path to the zone configuration JSON file.
+  - `--rtsp_url`: Complete RTSP URL for the video stream.
+  - `--model_id`: Roboflow model ID.
+  - `--classes`: List of class IDs to track. If empty, all classes are tracked.
+  - `--confidence_threshold`: Confidence level for detections (`0` to `1`). Default is `0.3`.
+  - `--iou_threshold`: IOU threshold for non-max suppression. Default is `0.7`.
+
+```bash
+python inference_file_example.py \
+--zone_configuration_path "data/checkout/config.json" \
+--rtsp_url "rtsp://localhost:8554/live0.stream" \
+--model_id "yolov8x-640" \
+--classes 0 \
+--confidence_threshold 0.3 \
+--iou_threshold 0.7
+```
+
+```bash
+python inference_file_example.py \
+--zone_configuration_path "data/traffic/config.json" \
+--rtsp_url "rtsp://localhost:8554/live0.stream" \
+--model_id "yolov8x-640" \
+--classes 2 5 6 7 \
+--confidence_threshold 0.3 \
+--iou_threshold 0.7
+```
+
+<details>
+<summary>👉 show ultralytics examples</summary>
+
+### `ultralytics_file_example`
+
+Script to run object detection on a video file using the Ultralytics YOLOv8 model.
+
+  - `--zone_configuration_path`: Path to the zone configuration JSON file.
+  - `--source_video_path`: Path to the source video file.
+  - `--weights`: Path to the model weights file. Default is `'yolov8s.pt'`.
+  - `--device`: Computation device (`'cpu'`, `'mps'` or `'cuda'`). Default is `'cpu'`.
+  - `--classes`: List of class IDs to track. If empty, all classes are tracked.
+  - `--confidence_threshold`: Confidence level for detections (`0` to `1`). Default is `0.3`.
+  - `--iou_threshold`: IOU threshold for non-max suppression. Default is `0.7`.
+
+```bash
+python inference_file_example.py \
+--zone_configuration_path "data/checkout/config.json" \
+--source_video_path "data/checkout/video.mp4" \
+--weights "yolov8x.pt" \
+--device "cpu" \
+--classes 0 \
+--confidence_threshold 0.3 \
+--iou_threshold 0.7
+```
+
+```bash
+python inference_file_example.py \
+--zone_configuration_path "data/traffic/config.json" \
+--source_video_path "data/traffic/video.mp4" \
+--weights "yolov8x.pt" \
+--device "cpu" \
+--classes 2 5 6 7 \
+--confidence_threshold 0.3 \
+--iou_threshold 0.7
+```
+
+### `ultralytics_stream_example`
+
+Script to run object detection on a video stream using the Ultralytics YOLOv8 model.
+
+  - `--zone_configuration_path`: Path to the zone configuration JSON file.
+  - `--rtsp_url`: Complete RTSP URL for the video stream.
+  - `--weights`: Path to the model weights file. Default is `'yolov8s.pt'`.
+  - `--device`: Computation device (`'cpu'`, `'mps'` or `'cuda'`). Default is `'cpu'`.
+  - `--classes`: List of class IDs to track. If empty, all classes are tracked.
+  - `--confidence_threshold`: Confidence level for detections (`0` to `1`). Default is `0.3`.
+  - `--iou_threshold`: IOU threshold for non-max suppression. Default is `0.7`.
+
+```bash
+python inference_file_example.py \
+--zone_configuration_path "data/checkout/config.json" \
+--rtsp_url "rtsp://localhost:8554/live0.stream" \
+--weights "yolov8x.pt" \
+--device "cpu" \
+--classes 0 \
+--confidence_threshold 0.3 \
+--iou_threshold 0.7
+```
+
+```bash
+python inference_file_example.py \
+--zone_configuration_path "data/traffic/config.json" \
+--rtsp_url "rtsp://localhost:8554/live0.stream" \
+--weights "yolov8x.pt" \
+--device "cpu" \
+--classes 2 5 6 7 \
+--confidence_threshold 0.3 \
+--iou_threshold 0.7
+```
+
+</details>
+
+## © license
+
+This demo integrates two main components, each with its own licensing:
+
+- ultralytics: The object detection model used in this demo, YOLOv8, is distributed
+  under the [AGPL-3.0 license](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
+  You can find more details about this license here.
+
+- supervision: The analytics code that powers the zone-based analysis in this demo is
+  based on the Supervision library, which is licensed under the
+  [MIT license](https://github.com/roboflow/supervision/blob/develop/LICENSE.md). This
+  makes the Supervision part of the code fully open source and freely usable in your
+  projects.
diff --git a/examples/time_in_zone/inference_file_example.py b/examples/time_in_zone/inference_file_example.py
new file mode 100644
index 000000000..5feb1d836
--- /dev/null
+++ b/examples/time_in_zone/inference_file_example.py
@@ -0,0 +1,132 @@
+import argparse
+from typing import List
+
+import cv2
+import numpy as np
+from inference import get_model
+from utils.general import find_in_list, load_zones_config
+from utils.timers import FPSBasedTimer
+
+import supervision as sv
+
+COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"])
+COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS)
+LABEL_ANNOTATOR = sv.LabelAnnotator(
+    color=COLORS, text_color=sv.Color.from_hex("#000000")
+)
+
+
+def main(
+    source_video_path: str,
+    zone_configuration_path: str,
+    model_id: str,
+    confidence: float,
+    iou: float,
+    classes: List[int],
+) -> None:
+    model = get_model(model_id=model_id)
+    tracker = sv.ByteTrack(minimum_matching_threshold=0.5)
+    video_info = sv.VideoInfo.from_video_path(video_path=source_video_path)
+    frames_generator = sv.get_video_frames_generator(source_video_path)
+
+    frame = next(frames_generator)
+    resolution_wh = frame.shape[1], frame.shape[0]
+
+    polygons = load_zones_config(file_path=zone_configuration_path)
+    zones = [
+        sv.PolygonZone(
+            polygon=polygon,
+            frame_resolution_wh=resolution_wh,
+            triggering_anchors=(sv.Position.CENTER,),
+        )
+        for polygon in polygons
+    ]
+    timers = [FPSBasedTimer(video_info.fps) for _ in zones]
+
+    for frame in frames_generator:
+        results = model.infer(frame, confidence=confidence, iou_threshold=iou)[0]
+        detections = sv.Detections.from_inference(results)
+        detections = detections[find_in_list(detections.class_id, classes)]
+        detections = tracker.update_with_detections(detections)
+
+        annotated_frame = frame.copy()
+
+        for idx, zone in enumerate(zones):
+            annotated_frame = sv.draw_polygon(
+                scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx)
+            )
+
+            detections_in_zone = detections[zone.trigger(detections)]
+            time_in_zone = timers[idx].tick(detections_in_zone)
+            custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx)
+
+            annotated_frame = COLOR_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                custom_color_lookup=custom_color_lookup,
+            )
+            labels = [
+                f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}"
+                for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone)
+            ]
+            annotated_frame = LABEL_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                labels=labels,
+                custom_color_lookup=custom_color_lookup,
+            )
+
+        cv2.imshow("Processed Video", annotated_frame)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+    cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Calculating detections dwell time in zones, using video file."
+    )
+    parser.add_argument(
+        "--zone_configuration_path",
+        type=str,
+        required=True,
+        help="Path to the zone configuration JSON file.",
+    )
+    parser.add_argument(
+        "--source_video_path",
+        type=str,
+        required=True,
+        help="Path to the source video file.",
+    )
+    parser.add_argument(
+        "--model_id", type=str, default="yolov8s-640", help="Roboflow model ID."
+    )
+    parser.add_argument(
+        "--confidence_threshold",
+        type=float,
+        default=0.3,
+        help="Confidence level for detections (0 to 1). Default is 0.3.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.7,
+        type=float,
+        help="IOU threshold for non-max suppression. Default is 0.7.",
+    )
+    parser.add_argument(
+        "--classes",
+        nargs="*",
+        type=int,
+        default=[],
+        help="List of class IDs to track. If empty, all classes are tracked.",
+    )
+    args = parser.parse_args()
+
+    main(
+        source_video_path=args.source_video_path,
+        zone_configuration_path=args.zone_configuration_path,
+        model_id=args.model_id,
+        confidence=args.confidence_threshold,
+        iou=args.iou_threshold,
+        classes=args.classes,
+    )
diff --git a/examples/time_in_zone/inference_naive_stream_example.py b/examples/time_in_zone/inference_naive_stream_example.py
new file mode 100644
index 000000000..dd2d68a5d
--- /dev/null
+++ b/examples/time_in_zone/inference_naive_stream_example.py
@@ -0,0 +1,142 @@
+import argparse
+from typing import List
+
+import cv2
+import numpy as np
+from inference import get_model
+from utils.general import find_in_list, get_stream_frames_generator, load_zones_config
+from utils.timers import ClockBasedTimer
+
+import supervision as sv
+
+COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"])
+COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS)
+LABEL_ANNOTATOR = sv.LabelAnnotator(
+    color=COLORS, text_color=sv.Color.from_hex("#000000")
+)
+
+
+def main(
+    rtsp_url: str,
+    zone_configuration_path: str,
+    model_id: str,
+    confidence: float,
+    iou: float,
+    classes: List[int],
+) -> None:
+    model = get_model(model_id=model_id)
+    tracker = sv.ByteTrack(minimum_matching_threshold=0.5)
+    frames_generator = get_stream_frames_generator(rtsp_url=rtsp_url)
+    fps_monitor = sv.FPSMonitor()
+
+    frame = next(frames_generator)
+    resolution_wh = frame.shape[1], frame.shape[0]
+
+    polygons = load_zones_config(file_path=zone_configuration_path)
+    zones = [
+        sv.PolygonZone(
+            polygon=polygon,
+            frame_resolution_wh=resolution_wh,
+            triggering_anchors=(sv.Position.CENTER,),
+        )
+        for polygon in polygons
+    ]
+    timers = [ClockBasedTimer() for _ in zones]
+
+    for frame in frames_generator:
+        fps_monitor.tick()
+        fps = fps_monitor.fps
+
+        results = model.infer(frame, confidence=confidence, iou_threshold=iou)[0]
+        detections = sv.Detections.from_inference(results)
+        detections = detections[find_in_list(detections.class_id, classes)]
+        detections = tracker.update_with_detections(detections)
+
+        annotated_frame = frame.copy()
+        annotated_frame = sv.draw_text(
+            scene=annotated_frame,
+            text=f"{fps:.1f}",
+            text_anchor=sv.Point(40, 30),
+            background_color=sv.Color.from_hex("#A351FB"),
+            text_color=sv.Color.from_hex("#000000"),
+        )
+
+        for idx, zone in enumerate(zones):
+            annotated_frame = sv.draw_polygon(
+                scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx)
+            )
+
+            detections_in_zone = detections[zone.trigger(detections)]
+            time_in_zone = timers[idx].tick(detections_in_zone)
+            custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx)
+
+            annotated_frame = COLOR_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                custom_color_lookup=custom_color_lookup,
+            )
+            labels = [
+                f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}"
+                for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone)
+            ]
+            annotated_frame = LABEL_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                labels=labels,
+                custom_color_lookup=custom_color_lookup,
+            )
+
+        cv2.imshow("Processed Video", annotated_frame)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+    cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Calculating detections dwell time in zones, using RTSP stream."
+    )
+    parser.add_argument(
+        "--zone_configuration_path",
+        type=str,
+        required=True,
+        help="Path to the zone configuration JSON file.",
+    )
+    parser.add_argument(
+        "--rtsp_url",
+        type=str,
+        required=True,
+        help="Complete RTSP URL for the video stream.",
+    )
+    parser.add_argument(
+        "--model_id", type=str, default="yolov8s-640", help="Roboflow model ID."
+    )
+    parser.add_argument(
+        "--confidence_threshold",
+        type=float,
+        default=0.3,
+        help="Confidence level for detections (0 to 1). Default is 0.3.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.7,
+        type=float,
+        help="IOU threshold for non-max suppression. Default is 0.7.",
+    )
+    parser.add_argument(
+        "--classes",
+        nargs="*",
+        type=int,
+        default=[],
+        help="List of class IDs to track. If empty, all classes are tracked.",
+    )
+    args = parser.parse_args()
+
+    main(
+        rtsp_url=args.rtsp_url,
+        zone_configuration_path=args.zone_configuration_path,
+        model_id=args.model_id,
+        confidence=args.confidence_threshold,
+        iou=args.iou_threshold,
+        classes=args.classes,
+    )
diff --git a/examples/time_in_zone/inference_stream_example.py b/examples/time_in_zone/inference_stream_example.py
new file mode 100644
index 000000000..e1fae57f9
--- /dev/null
+++ b/examples/time_in_zone/inference_stream_example.py
@@ -0,0 +1,158 @@
+import argparse
+from typing import List
+
+import cv2
+import numpy as np
+from inference import InferencePipeline
+from inference.core.interfaces.camera.entities import VideoFrame
+from utils.general import find_in_list, load_zones_config
+from utils.timers import ClockBasedTimer
+
+import supervision as sv
+
+COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"])
+COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS)
+LABEL_ANNOTATOR = sv.LabelAnnotator(
+    color=COLORS, text_color=sv.Color.from_hex("#000000")
+)
+
+
+class CustomSink:
+    def __init__(self, zone_configuration_path: str, classes: List[int]):
+        self.classes = classes
+        self.tracker = sv.ByteTrack(minimum_matching_threshold=0.5)
+        self.fps_monitor = sv.FPSMonitor()
+        self.polygons = load_zones_config(file_path=zone_configuration_path)
+        self.timers = [ClockBasedTimer() for _ in self.polygons]
+        self.zones = None
+
+    def on_prediction(self, result: dict, frame: VideoFrame) -> None:
+        if self.zones is None:
+            resolution_wh = frame.image.shape[1], frame.image.shape[0]
+            self.zones = [
+                sv.PolygonZone(
+                    polygon=polygon,
+                    frame_resolution_wh=resolution_wh,
+                    triggering_anchors=(sv.Position.CENTER,),
+                )
+                for polygon in self.polygons
+            ]
+
+        self.fps_monitor.tick()
+        fps = self.fps_monitor.fps
+
+        detections = sv.Detections.from_inference(result)
+        detections = detections[find_in_list(detections.class_id, self.classes)]
+        detections = self.tracker.update_with_detections(detections)
+
+        annotated_frame = frame.image.copy()
+        annotated_frame = sv.draw_text(
+            scene=annotated_frame,
+            text=f"{fps:.1f}",
+            text_anchor=sv.Point(40, 30),
+            background_color=sv.Color.from_hex("#A351FB"),
+            text_color=sv.Color.from_hex("#000000"),
+        )
+
+        for idx, zone in enumerate(self.zones):
+            annotated_frame = sv.draw_polygon(
+                scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx)
+            )
+
+            detections_in_zone = detections[zone.trigger(detections)]
+            time_in_zone = self.timers[idx].tick(detections_in_zone)
+            custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx)
+
+            annotated_frame = COLOR_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                custom_color_lookup=custom_color_lookup,
+            )
+            labels = [
+                f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}"
+                for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone)
+            ]
+            annotated_frame = LABEL_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                labels=labels,
+                custom_color_lookup=custom_color_lookup,
+            )
+        cv2.imshow("Processed Video", annotated_frame)
+        cv2.waitKey(1)
+
+
+def main(
+    rtsp_url: str,
+    zone_configuration_path: str,
+    model_id: str,
+    confidence: float,
+    iou: float,
+    classes: List[int],
+) -> None:
+    sink = CustomSink(zone_configuration_path=zone_configuration_path, classes=classes)
+
+    pipeline = InferencePipeline.init(
+        model_id=model_id,
+        video_reference=rtsp_url,
+        on_prediction=sink.on_prediction,
+        confidence=confidence,
+        iou_threshold=iou,
+    )
+
+    pipeline.start()
+
+    try:
+        pipeline.join()
+    except KeyboardInterrupt:
+        pipeline.terminate()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Calculating detections dwell time in zones, using RTSP stream."
+    )
+    parser.add_argument(
+        "--zone_configuration_path",
+        type=str,
+        required=True,
+        help="Path to the zone configuration JSON file.",
+    )
+    parser.add_argument(
+        "--rtsp_url",
+        type=str,
+        required=True,
+        help="Complete RTSP URL for the video stream.",
+    )
+    parser.add_argument(
+        "--model_id", type=str, default="yolov8s-640", help="Roboflow model ID."
+    )
+    parser.add_argument(
+        "--confidence_threshold",
+        type=float,
+        default=0.3,
+        help="Confidence level for detections (0 to 1). Default is 0.3.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.7,
+        type=float,
+        help="IOU threshold for non-max suppression. Default is 0.7.",
+    )
+    parser.add_argument(
+        "--classes",
+        nargs="*",
+        type=int,
+        default=[],
+        help="List of class IDs to track. If empty, all classes are tracked.",
+    )
+    args = parser.parse_args()
+
+    main(
+        rtsp_url=args.rtsp_url,
+        zone_configuration_path=args.zone_configuration_path,
+        model_id=args.model_id,
+        confidence=args.confidence_threshold,
+        iou=args.iou_threshold,
+        classes=args.classes,
+    )
diff --git a/examples/time_in_zone/requirements.txt b/examples/time_in_zone/requirements.txt
new file mode 100644
index 000000000..fa17b9864
--- /dev/null
+++ b/examples/time_in_zone/requirements.txt
@@ -0,0 +1,5 @@
+opencv-python
+supervision
+ultralytics
+inference
+pytube
diff --git a/examples/time_in_zone/scripts/download_from_youtube.py b/examples/time_in_zone/scripts/download_from_youtube.py
new file mode 100644
index 000000000..ff7d94c3f
--- /dev/null
+++ b/examples/time_in_zone/scripts/download_from_youtube.py
@@ -0,0 +1,46 @@
+import argparse
+import os
+from typing import Optional
+
+from pytube import YouTube
+
+
+def main(url: str, output_path: Optional[str], file_name: Optional[str]) -> None:
+    yt = YouTube(url)
+    stream = yt.streams.get_highest_resolution()
+
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    stream.download(output_path=output_path, filename=file_name)
+    final_name = file_name if file_name else yt.title
+    final_path = output_path if output_path else "current directory"
+    print(f"Download completed! Video saved as '{final_name}' in '{final_path}'.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Download a specific YouTube video by providing its URL."
+    )
+    parser.add_argument(
+        "--url",
+        type=str,
+        required=True,
+        help="The full URL of the YouTube video you wish to download.",
+    )
+    parser.add_argument(
+        "--output_path",
+        type=str,
+        default="data/source",
+        required=False,
+        help="Optional. Specifies the directory where the video will be saved.",
+    )
+    parser.add_argument(
+        "--file_name",
+        type=str,
+        default="video.mp4",
+        required=False,
+        help="Optional. Sets the name of the saved video file.",
+    )
+    args = parser.parse_args()
+    main(url=args.url, output_path=args.output_path, file_name=args.file_name)
diff --git a/examples/time_in_zone/scripts/draw_zones.py b/examples/time_in_zone/scripts/draw_zones.py
new file mode 100644
index 000000000..3afae9e70
--- /dev/null
+++ b/examples/time_in_zone/scripts/draw_zones.py
@@ -0,0 +1,176 @@
+import argparse
+import json
+import os
+from typing import Any, Optional, Tuple
+
+import cv2
+import numpy as np
+
+import supervision as sv
+
+KEY_ENTER = 13
+KEY_NEWLINE = 10
+KEY_ESCAPE = 27
+KEY_QUIT = ord("q")
+KEY_SAVE = ord("s")
+
+THICKNESS = 2
+COLORS = sv.ColorPalette.DEFAULT
+WINDOW_NAME = "Draw Zones"
+POLYGONS = [[]]
+
+current_mouse_position: Optional[Tuple[int, int]] = None
+
+
+def resolve_source(source_path: str) -> Optional[np.ndarray]:
+    if not os.path.exists(source_path):
+        return None
+
+    image = cv2.imread(source_path)
+    if image is not None:
+        return image
+
+    frame_generator = sv.get_video_frames_generator(source_path=source_path)
+    frame = next(frame_generator)
+    return frame
+
+
+def mouse_event(event: int, x: int, y: int, flags: int, param: Any) -> None:
+    global current_mouse_position
+    if event == cv2.EVENT_MOUSEMOVE:
+        current_mouse_position = (x, y)
+    elif event == cv2.EVENT_LBUTTONDOWN:
+        POLYGONS[-1].append((x, y))
+
+
+def redraw(image: np.ndarray, original_image: np.ndarray) -> None:
+    global POLYGONS, current_mouse_position
+    image[:] = original_image.copy()
+    for idx, polygon in enumerate(POLYGONS):
+        color = (
+            COLORS.by_idx(idx).as_bgr()
+            if idx < len(POLYGONS) - 1
+            else sv.Color.WHITE.as_bgr()
+        )
+
+        if len(polygon) > 1:
+            for i in range(1, len(polygon)):
+                cv2.line(
+                    img=image,
+                    pt1=polygon[i - 1],
+                    pt2=polygon[i],
+                    color=color,
+                    thickness=THICKNESS,
+                )
+            if idx < len(POLYGONS) - 1:
+                cv2.line(
+                    img=image,
+                    pt1=polygon[-1],
+                    pt2=polygon[0],
+                    color=color,
+                    thickness=THICKNESS,
+                )
+        if idx == len(POLYGONS) - 1 and current_mouse_position is not None and polygon:
+            cv2.line(
+                img=image,
+                pt1=polygon[-1],
+                pt2=current_mouse_position,
+                color=color,
+                thickness=THICKNESS,
+            )
+    cv2.imshow(WINDOW_NAME, image)
+
+
+def close_and_finalize_polygon(image: np.ndarray, original_image: np.ndarray) -> None:
+    if len(POLYGONS[-1]) > 2:
+        cv2.line(
+            img=image,
+            pt1=POLYGONS[-1][-1],
+            pt2=POLYGONS[-1][0],
+            color=COLORS.by_idx(0).as_bgr(),
+            thickness=THICKNESS,
+        )
+    POLYGONS.append([])
+    image[:] = original_image.copy()
+    redraw_polygons(image)
+    cv2.imshow(WINDOW_NAME, image)
+
+
+def redraw_polygons(image: np.ndarray) -> None:
+    for idx, polygon in enumerate(POLYGONS[:-1]):
+        if len(polygon) > 1:
+            color = COLORS.by_idx(idx).as_bgr()
+            for i in range(len(polygon) - 1):
+                cv2.line(
+                    img=image,
+                    pt1=polygon[i],
+                    pt2=polygon[i + 1],
+                    color=color,
+                    thickness=THICKNESS,
+                )
+            cv2.line(
+                img=image,
+                pt1=polygon[-1],
+                pt2=polygon[0],
+                color=color,
+                thickness=THICKNESS,
+            )
+
+
+def save_polygons_to_json(polygons, target_path):
+    data_to_save = polygons if polygons[-1] else polygons[:-1]
+    with open(target_path, "w") as f:
+        json.dump(data_to_save, f)
+
+
+def main(source_path: str, zone_configuration_path: str) -> None:
+    global current_mouse_position
+    original_image = resolve_source(source_path=source_path)
+    if original_image is None:
+        print("Failed to load source image.")
+        return
+
+    image = original_image.copy()
+    cv2.imshow(WINDOW_NAME, image)
+    cv2.setMouseCallback(WINDOW_NAME, mouse_event, image)
+
+    while True:
+        key = cv2.waitKey(1) & 0xFF
+        if key == KEY_ENTER or key == KEY_NEWLINE:
+            close_and_finalize_polygon(image, original_image)
+        elif key == KEY_ESCAPE:
+            POLYGONS[-1] = []
+            current_mouse_position = None
+        elif key == KEY_SAVE:
+            save_polygons_to_json(POLYGONS, zone_configuration_path)
+            print(f"Polygons saved to {zone_configuration_path}")
+            break
+        redraw(image, original_image)
+        if key == KEY_QUIT:
+            break
+
+    cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Interactively draw polygons on images or video frames and save "
+        "the annotations."
+    )
+    parser.add_argument(
+        "--source_path",
+        type=str,
+        required=True,
+        help="Path to the source image or video file for drawing polygons.",
+    )
+    parser.add_argument(
+        "--zone_configuration_path",
+        type=str,
+        required=True,
+        help="Path where the polygon annotations will be saved as a JSON file.",
+    )
+    arguments = parser.parse_args()
+    main(
+        source_path=arguments.source_path,
+        zone_configuration_path=arguments.zone_configuration_path,
+    )
diff --git a/examples/time_in_zone/scripts/stream_from_file.py b/examples/time_in_zone/scripts/stream_from_file.py
new file mode 100644
index 000000000..23588f680
--- /dev/null
+++ b/examples/time_in_zone/scripts/stream_from_file.py
@@ -0,0 +1,104 @@
+import argparse
+import os
+import subprocess
+import tempfile
+from glob import glob
+from threading import Thread
+
+import yaml
+
+SERVER_CONFIG = {"protocols": ["tcp"], "paths": {"all": {"source": "publisher"}}}
+BASE_STREAM_URL = "rtsp://localhost:8554/live"
+
+
+def main(video_directory: str, number_of_streams: int) -> None:
+    video_files = find_video_files_in_directory(video_directory, number_of_streams)
+    try:
+        with tempfile.TemporaryDirectory() as temporary_directory:
+            config_file_path = create_server_config_file(temporary_directory)
+            run_rtsp_server(config_path=config_file_path)
+            stream_videos(video_files)
+    finally:
+        stop_rtsp_server()
+
+
+def find_video_files_in_directory(directory: str, limit: int) -> list:
+    video_formats = ["*.mp4", "*.webm"]
+    video_paths = []
+    for video_format in video_formats:
+        video_paths.extend(glob(os.path.join(directory, video_format)))
+    return video_paths[:limit]
+
+
+def create_server_config_file(directory: str) -> str:
+    config_path = os.path.join(directory, "rtsp-simple-server.yml")
+    with open(config_path, "w") as config_file:
+        yaml.dump(SERVER_CONFIG, config_file)
+    return config_path
+
+
+def run_rtsp_server(config_path: str) -> None:
+    command = (
+        "docker run --rm --name rtsp_server -d -v "
+        f"{config_path}:/rtsp-simple-server.yml -p 8554:8554 "
+        "aler9/rtsp-simple-server:v1.3.0"
+    )
+    if run_command(command.split()) != 0:
+        raise RuntimeError("Could not start the RTSP server!")
+
+
+def stop_rtsp_server() -> None:
+    run_command("docker kill rtsp_server".split())
+
+
+def stream_videos(video_files: list) -> None:
+    threads = []
+    for index, video_file in enumerate(video_files):
+        stream_url = f"{BASE_STREAM_URL}{index}.stream"
+        print(f"Streaming {video_file} under {stream_url}")
+        thread = stream_video_to_url(video_file, stream_url)
+        threads.append(thread)
+    for thread in threads:
+        thread.join()
+
+
+def stream_video_to_url(video_path: str, stream_url: str) -> Thread:
+    command = (
+        f"ffmpeg -re -stream_loop -1 -i {video_path} "
+        f"-f rtsp -rtsp_transport tcp {stream_url}"
+    )
+    return run_command_in_thread(command.split())
+
+
+def run_command_in_thread(command: list) -> Thread:
+    thread = Thread(target=run_command, args=(command,))
+    thread.start()
+    return thread
+
+
+def run_command(command: list) -> int:
+    process = subprocess.run(command)
+    return process.returncode
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Script to stream videos using RTSP protocol."
+    )
+    parser.add_argument(
+        "--video_directory",
+        type=str,
+        required=True,
+        help="Directory containing video files to stream.",
+    )
+    parser.add_argument(
+        "--number_of_streams",
+        type=int,
+        default=6,
+        help="Number of video files to stream.",
+    )
+    arguments = parser.parse_args()
+    main(
+        video_directory=arguments.video_directory,
+        number_of_streams=arguments.number_of_streams,
+    )
diff --git a/examples/time_in_zone/ultralytics_file_example.py b/examples/time_in_zone/ultralytics_file_example.py
new file mode 100644
index 000000000..fe8ce58df
--- /dev/null
+++ b/examples/time_in_zone/ultralytics_file_example.py
@@ -0,0 +1,144 @@
+import argparse
+from typing import List
+
+import cv2
+import numpy as np
+from ultralytics import YOLO
+from utils.general import find_in_list, load_zones_config
+from utils.timers import FPSBasedTimer
+
+import supervision as sv
+
+COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"])
+COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS)
+LABEL_ANNOTATOR = sv.LabelAnnotator(
+    color=COLORS, text_color=sv.Color.from_hex("#000000")
+)
+
+
+def main(
+    source_video_path: str,
+    zone_configuration_path: str,
+    weights: str,
+    device: str,
+    confidence: float,
+    iou: float,
+    classes: List[int],
+) -> None:
+    model = YOLO(weights)
+    tracker = sv.ByteTrack(minimum_matching_threshold=0.5)
+    video_info = sv.VideoInfo.from_video_path(video_path=source_video_path)
+    frames_generator = sv.get_video_frames_generator(source_video_path)
+
+    frame = next(frames_generator)
+    resolution_wh = frame.shape[1], frame.shape[0]
+
+    polygons = load_zones_config(file_path=zone_configuration_path)
+    zones = [
+        sv.PolygonZone(
+            polygon=polygon,
+            frame_resolution_wh=resolution_wh,
+            triggering_anchors=(sv.Position.CENTER,),
+        )
+        for polygon in polygons
+    ]
+    timers = [FPSBasedTimer(video_info.fps) for _ in zones]
+
+    for frame in frames_generator:
+        results = model(frame, verbose=False, device=device, conf=confidence)[0]
+        detections = sv.Detections.from_ultralytics(results)
+        detections = detections[find_in_list(detections.class_id, classes)]
+        detections = detections.with_nms(threshold=iou)
+        detections = tracker.update_with_detections(detections)
+
+        annotated_frame = frame.copy()
+
+        for idx, zone in enumerate(zones):
+            annotated_frame = sv.draw_polygon(
+                scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx)
+            )
+
+            detections_in_zone = detections[zone.trigger(detections)]
+            time_in_zone = timers[idx].tick(detections_in_zone)
+            custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx)
+
+            annotated_frame = COLOR_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                custom_color_lookup=custom_color_lookup,
+            )
+            labels = [
+                f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}"
+                for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone)
+            ]
+            annotated_frame = LABEL_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                labels=labels,
+                custom_color_lookup=custom_color_lookup,
+            )
+
+        cv2.imshow("Processed Video", annotated_frame)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+    cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Calculating detections dwell time in zones, using video file."
+    )
+    parser.add_argument(
+        "--zone_configuration_path",
+        type=str,
+        required=True,
+        help="Path to the zone configuration JSON file.",
+    )
+    parser.add_argument(
+        "--source_video_path",
+        type=str,
+        required=True,
+        help="Path to the source video file.",
+    )
+    parser.add_argument(
+        "--weights",
+        type=str,
+        default="yolov8s.pt",
+        help="Path to the model weights file. Default is 'yolov8s.pt'.",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cpu",
+        help="Computation device ('cpu', 'mps' or 'cuda'). Default is 'cpu'.",
+    )
+    parser.add_argument(
+        "--confidence_threshold",
+        type=float,
+        default=0.3,
+        help="Confidence level for detections (0 to 1). Default is 0.3.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.7,
+        type=float,
+        help="IOU threshold for non-max suppression. Default is 0.7.",
+    )
+    parser.add_argument(
+        "--classes",
+        nargs="*",
+        type=int,
+        default=[],
+        help="List of class IDs to track. If empty, all classes are tracked.",
+    )
+    args = parser.parse_args()
+
+    main(
+        source_video_path=args.source_video_path,
+        zone_configuration_path=args.zone_configuration_path,
+        weights=args.weights,
+        device=args.device,
+        confidence=args.confidence_threshold,
+        iou=args.iou_threshold,
+        classes=args.classes,
+    )
diff --git a/examples/time_in_zone/ultralytics_naive_stream_example.py b/examples/time_in_zone/ultralytics_naive_stream_example.py
new file mode 100644
index 000000000..1cc82b446
--- /dev/null
+++ b/examples/time_in_zone/ultralytics_naive_stream_example.py
@@ -0,0 +1,154 @@
+import argparse
+from typing import List
+
+import cv2
+import numpy as np
+from ultralytics import YOLO
+from utils.general import find_in_list, get_stream_frames_generator, load_zones_config
+from utils.timers import ClockBasedTimer
+
+import supervision as sv
+
+COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"])
+COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS)
+LABEL_ANNOTATOR = sv.LabelAnnotator(
+    color=COLORS, text_color=sv.Color.from_hex("#000000")
+)
+
+
+def main(
+    rtsp_url: str,
+    zone_configuration_path: str,
+    weights: str,
+    device: str,
+    confidence: float,
+    iou: float,
+    classes: List[int],
+) -> None:
+    model = YOLO(weights)
+    tracker = sv.ByteTrack(minimum_matching_threshold=0.5)
+    frames_generator = get_stream_frames_generator(rtsp_url=rtsp_url)
+    fps_monitor = sv.FPSMonitor()
+
+    frame = next(frames_generator)
+    resolution_wh = frame.shape[1], frame.shape[0]
+
+    polygons = load_zones_config(file_path=zone_configuration_path)
+    zones = [
+        sv.PolygonZone(
+            polygon=polygon,
+            frame_resolution_wh=resolution_wh,
+            triggering_anchors=(sv.Position.CENTER,),
+        )
+        for polygon in polygons
+    ]
+    timers = [ClockBasedTimer() for _ in zones]
+
+    for frame in frames_generator:
+        fps_monitor.tick()
+        fps = fps_monitor.fps
+
+        results = model(frame, verbose=False, device=device, conf=confidence)[0]
+        detections = sv.Detections.from_ultralytics(results)
+        detections = detections[find_in_list(detections.class_id, classes)]
+        detections = detections.with_nms(threshold=iou)
+        detections = tracker.update_with_detections(detections)
+
+        annotated_frame = frame.copy()
+        annotated_frame = sv.draw_text(
+            scene=annotated_frame,
+            text=f"{fps:.1f}",
+            text_anchor=sv.Point(40, 30),
+            background_color=sv.Color.from_hex("#A351FB"),
+            text_color=sv.Color.from_hex("#000000"),
+        )
+
+        for idx, zone in enumerate(zones):
+            annotated_frame = sv.draw_polygon(
+                scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx)
+            )
+
+            detections_in_zone = detections[zone.trigger(detections)]
+            time_in_zone = timers[idx].tick(detections_in_zone)
+            custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx)
+
+            annotated_frame = COLOR_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                custom_color_lookup=custom_color_lookup,
+            )
+            labels = [
+                f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}"
+                for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone)
+            ]
+            annotated_frame = LABEL_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                labels=labels,
+                custom_color_lookup=custom_color_lookup,
+            )
+
+        cv2.imshow("Processed Video", annotated_frame)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+    cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Calculating detections dwell time in zones, using RTSP stream."
+    )
+    parser.add_argument(
+        "--zone_configuration_path",
+        type=str,
+        required=True,
+        help="Path to the zone configuration JSON file.",
+    )
+    parser.add_argument(
+        "--rtsp_url",
+        type=str,
+        required=True,
+        help="Complete RTSP URL for the video stream.",
+    )
+    parser.add_argument(
+        "--weights",
+        type=str,
+        default="yolov8s.pt",
+        help="Path to the model weights file. Default is 'yolov8s.pt'.",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cpu",
+        help="Computation device ('cpu', 'mps' or 'cuda'). Default is 'cpu'.",
+    )
+    parser.add_argument(
+        "--confidence_threshold",
+        type=float,
+        default=0.3,
+        help="Confidence level for detections (0 to 1). Default is 0.3.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.7,
+        type=float,
+        help="IOU threshold for non-max suppression. Default is 0.7.",
+    )
+    parser.add_argument(
+        "--classes",
+        nargs="*",
+        type=int,
+        default=[],
+        help="List of class IDs to track. If empty, all classes are tracked.",
+    )
+    args = parser.parse_args()
+
+    main(
+        rtsp_url=args.rtsp_url,
+        zone_configuration_path=args.zone_configuration_path,
+        weights=args.weights,
+        device=args.device,
+        confidence=args.confidence_threshold,
+        iou=args.iou_threshold,
+        classes=args.classes,
+    )
diff --git a/examples/time_in_zone/ultralytics_stream_example.py b/examples/time_in_zone/ultralytics_stream_example.py
new file mode 100644
index 000000000..25dc874f8
--- /dev/null
+++ b/examples/time_in_zone/ultralytics_stream_example.py
@@ -0,0 +1,173 @@
+import argparse
+from typing import List
+
+import cv2
+import numpy as np
+from inference import InferencePipeline
+from inference.core.interfaces.camera.entities import VideoFrame
+from ultralytics import YOLO
+from utils.general import find_in_list, load_zones_config
+from utils.timers import ClockBasedTimer
+
+import supervision as sv
+
+COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"])
+COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS)
+LABEL_ANNOTATOR = sv.LabelAnnotator(
+    color=COLORS, text_color=sv.Color.from_hex("#000000")
+)
+
+
+class CustomSink:
+    def __init__(self, zone_configuration_path: str, classes: List[int]):
+        self.classes = classes
+        self.tracker = sv.ByteTrack(minimum_matching_threshold=0.8)
+        self.fps_monitor = sv.FPSMonitor()
+        self.polygons = load_zones_config(file_path=zone_configuration_path)
+        self.timers = [ClockBasedTimer() for _ in self.polygons]
+        self.zones = None
+
+    def on_prediction(self, detections: sv.Detections, frame: VideoFrame) -> None:
+        if self.zones is None:
+            resolution_wh = frame.image.shape[1], frame.image.shape[0]
+            self.zones = [
+                sv.PolygonZone(
+                    polygon=polygon,
+                    frame_resolution_wh=resolution_wh,
+                    triggering_anchors=(sv.Position.CENTER,),
+                )
+                for polygon in self.polygons
+            ]
+
+        self.fps_monitor.tick()
+        fps = self.fps_monitor.fps
+
+        detections = detections[find_in_list(detections.class_id, self.classes)]
+        detections = self.tracker.update_with_detections(detections)
+
+        annotated_frame = frame.image.copy()
+        annotated_frame = sv.draw_text(
+            scene=annotated_frame,
+            text=f"{fps:.1f}",
+            text_anchor=sv.Point(40, 30),
+            background_color=sv.Color.from_hex("#A351FB"),
+            text_color=sv.Color.from_hex("#000000"),
+        )
+
+        for idx, zone in enumerate(self.zones):
+            annotated_frame = sv.draw_polygon(
+                scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx)
+            )
+
+            detections_in_zone = detections[zone.trigger(detections)]
+            time_in_zone = self.timers[idx].tick(detections_in_zone)
+            custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx)
+
+            annotated_frame = COLOR_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                custom_color_lookup=custom_color_lookup,
+            )
+            labels = [
+                f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}"
+                for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone)
+            ]
+            annotated_frame = LABEL_ANNOTATOR.annotate(
+                scene=annotated_frame,
+                detections=detections_in_zone,
+                labels=labels,
+                custom_color_lookup=custom_color_lookup,
+            )
+        cv2.imshow("Processed Video", annotated_frame)
+        cv2.waitKey(1)
+
+
+def main(
+    rtsp_url: str,
+    zone_configuration_path: str,
+    weights: str,
+    device: str,
+    confidence: float,
+    iou: float,
+    classes: List[int],
+) -> None:
+    model = YOLO(weights)
+
+    def inference_callback(frame: VideoFrame) -> sv.Detections:
+        results = model(frame.image, verbose=False, conf=confidence, device=device)[0]
+        return sv.Detections.from_ultralytics(results).with_nms(threshold=iou)
+
+    sink = CustomSink(zone_configuration_path=zone_configuration_path, classes=classes)
+
+    pipeline = InferencePipeline.init_with_custom_logic(
+        video_reference=rtsp_url,
+        on_video_frame=inference_callback,
+        on_prediction=sink.on_prediction,
+    )
+
+    pipeline.start()
+
+    try:
+        pipeline.join()
+    except KeyboardInterrupt:
+        pipeline.terminate()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Calculating detections dwell time in zones, using RTSP stream."
+    )
+    parser.add_argument(
+        "--zone_configuration_path",
+        type=str,
+        required=True,
+        help="Path to the zone configuration JSON file.",
+    )
+    parser.add_argument(
+        "--rtsp_url",
+        type=str,
+        required=True,
+        help="Complete RTSP URL for the video stream.",
+    )
+    parser.add_argument(
+        "--weights",
+        type=str,
+        default="yolov8s.pt",
+        help="Path to the model weights file. Default is 'yolov8s.pt'.",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cpu",
+        help="Computation device ('cpu', 'mps' or 'cuda'). Default is 'cpu'.",
+    )
+    parser.add_argument(
+        "--confidence_threshold",
+        type=float,
+        default=0.3,
+        help="Confidence level for detections (0 to 1). Default is 0.3.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.7,
+        type=float,
+        help="IOU threshold for non-max suppression. Default is 0.7.",
+    )
+    parser.add_argument(
+        "--classes",
+        nargs="*",
+        type=int,
+        default=[],
+        help="List of class IDs to track. If empty, all classes are tracked.",
+    )
+    args = parser.parse_args()
+
+    main(
+        rtsp_url=args.rtsp_url,
+        zone_configuration_path=args.zone_configuration_path,
+        weights=args.weights,
+        device=args.device,
+        confidence=args.confidence_threshold,
+        iou=args.iou_threshold,
+        classes=args.classes,
+    )
diff --git a/examples/time_in_zone/utils/__init__.py b/examples/time_in_zone/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/time_in_zone/utils/general.py b/examples/time_in_zone/utils/general.py
new file mode 100644
index 000000000..803d116aa
--- /dev/null
+++ b/examples/time_in_zone/utils/general.py
@@ -0,0 +1,66 @@
+import json
+from typing import Generator, List
+
+import cv2
+import numpy as np
+
+
+def load_zones_config(file_path: str) -> List[np.ndarray]:
+    """
+    Load polygon zone configurations from a JSON file.
+
+    This function reads a JSON file which contains polygon coordinates, and
+    converts them into a list of NumPy arrays. Each polygon is represented as
+    a NumPy array of coordinates.
+
+    Args:
+        file_path (str): The path to the JSON configuration file.
+
+    Returns:
+        List[np.ndarray]: A list of polygons, each represented as a NumPy array.
+    """
+    with open(file_path, "r") as file:
+        data = json.load(file)
+        return [np.array(polygon, np.int32) for polygon in data]
+
+
+def find_in_list(array: np.ndarray, search_list: List[int]) -> np.ndarray:
+    """Determines if elements of a numpy array are present in a list.
+
+    Args:
+        array (np.ndarray): The numpy array of integers to check.
+        search_list (List[int]): The list of integers to search within.
+
+    Returns:
+        np.ndarray: A numpy array of booleans, where each boolean indicates whether
+        the corresponding element in `array` is found in `search_list`.
+    """
+    if not search_list:
+        return np.ones(array.shape, dtype=bool)
+    else:
+        return np.isin(array, search_list)
+
+
+def get_stream_frames_generator(rtsp_url: str) -> Generator[np.ndarray, None, None]:
+    """
+    Generator function to yield frames from an RTSP stream.
+
+    Args:
+        rtsp_url (str): URL of the RTSP video stream.
+
+    Yields:
+        np.ndarray: The next frame from the video stream.
+    """
+    cap = cv2.VideoCapture(rtsp_url)
+    if not cap.isOpened():
+        raise Exception("Error: Could not open video stream.")
+
+    try:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                print("End of stream or error reading frame.")
+                break
+            yield frame
+    finally:
+        cap.release()
diff --git a/examples/time_in_zone/utils/timers.py b/examples/time_in_zone/utils/timers.py
new file mode 100644
index 000000000..cb5b471f6
--- /dev/null
+++ b/examples/time_in_zone/utils/timers.py
@@ -0,0 +1,88 @@
+from datetime import datetime
+from typing import Dict
+
+import numpy as np
+
+import supervision as sv
+
+
+class FPSBasedTimer:
+    """
+    A timer that calculates the duration each object has been detected based on frames
+    per second (FPS).
+
+    Attributes:
+        fps (int): The frame rate of the video stream, used to calculate time durations.
+        frame_id (int): The current frame number in the sequence.
+        tracker_id2frame_id (Dict[int, int]): Maps each tracker's ID to the frame number
+            at which it was first detected.
+    """
+
+    def __init__(self, fps: int = 30) -> None:
+        """Initializes the FPSBasedTimer with the specified frames per second rate.
+
+        Args:
+            fps (int, optional): The frame rate of the video stream. Defaults to 30.
+        """
+        self.fps = fps
+        self.frame_id = 0
+        self.tracker_id2frame_id: Dict[int, int] = {}
+
+    def tick(self, detections: sv.Detections) -> np.ndarray:
+        """Processes the current frame, updating time durations for each tracker.
+
+        Args:
+            detections: The detections for the current frame, including tracker IDs.
+
+        Returns:
+            np.ndarray: Time durations (in seconds) for each detected tracker, since
+                their first detection.
+        """
+        self.frame_id += 1
+        times = []
+
+        for tracker_id in detections.tracker_id:
+            self.tracker_id2frame_id.setdefault(tracker_id, self.frame_id)
+
+            start_frame_id = self.tracker_id2frame_id[tracker_id]
+            time_duration = (self.frame_id - start_frame_id) / self.fps
+            times.append(time_duration)
+
+        return np.array(times)
+
+
+class ClockBasedTimer:
+    """
+    A timer that calculates the duration each object has been detected based on the
+    system clock.
+
+    Attributes:
+        tracker_id2start_time (Dict[int, datetime]): Maps each tracker's ID to the
+            datetime when it was first detected.
+    """
+
+    def __init__(self) -> None:
+        """Initializes the ClockBasedTimer."""
+        self.tracker_id2start_time: Dict[int, datetime] = {}
+
+    def tick(self, detections: sv.Detections) -> np.ndarray:
+        """Processes the current frame, updating time durations for each tracker.
+
+        Args:
+            detections: The detections for the current frame, including tracker IDs.
+
+        Returns:
+            np.ndarray: Time durations (in seconds) for each detected tracker, since
+                their first detection.
+        """
+        current_time = datetime.now()
+        times = []
+
+        for tracker_id in detections.tracker_id:
+            self.tracker_id2start_time.setdefault(tracker_id, current_time)
+
+            start_time = self.tracker_id2start_time[tracker_id]
+            time_duration = (current_time - start_time).total_seconds()
+            times.append(time_duration)
+
+        return np.array(times)