From 574c61b6b00bb80f90b6826b404b843b3c1f6bf4 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 14:28:40 +0200
Subject: [PATCH 01/18] initial commit

---
 docs/how_to/detect_and_annotate.md |   2 +-
 docs/how_to/save_detections.md     | 124 +++++++++++++++++++++++++++++
 mkdocs.yml                         |   4 +-
 supervision/draw/color.py          |  18 ++++-
 supervision/utils/image.py         |  51 ++++++------
 test/utils/test_image.py           |   4 +-
 6 files changed, 170 insertions(+), 33 deletions(-)
 create mode 100644 docs/how_to/save_detections.md

diff --git a/docs/how_to/detect_and_annotate.md b/docs/how_to/detect_and_annotate.md
index 221d9ca1b..adea95cf7 100644
--- a/docs/how_to/detect_and_annotate.md
+++ b/docs/how_to/detect_and_annotate.md
@@ -15,7 +15,7 @@ source image.
 
 ![basic-annotation](https://media.roboflow.com/supervision_detect_and_annotate_example_1.png)
 
-## Run Inference
+## Run Detection
 
 First, you'll need to obtain predictions from your object detection or segmentation
 model.
diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md
new file mode 100644
index 000000000..622c504f5
--- /dev/null
+++ b/docs/how_to/save_detections.md
@@ -0,0 +1,124 @@
+---
+comments: true
+status: new
+---
+
+# Save Detections
+
+TODO
+
+## Run Detection
+
+=== "Inference"
+
+    ```python
+    import cv2
+    from inference import get_model
+
+    model = get_model(model_id="yolov8n-640")
+    image = cv2.imread(<SOURCE_IMAGE_APTH>)
+    results = model.infer(image)[0]
+    ```
+
+=== "Ultralytics"
+
+    ```python
+    import cv2
+    from ultralytics import YOLO
+
+    model = YOLO("yolov8n.pt")
+    image = cv2.imread(<SOURCE_IMAGE_APTH>)
+    results = model(image)[0]
+    ```
+
+=== "Transformers"
+
+    ```python
+    import torch
+    from PIL import Image
+    from transformers import DetrImageProcessor, DetrForObjectDetection
+
+    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
+    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+
+    image = Image.open(<SOURCE_IMAGE_APTH>)
+    inputs = processor(images=image, return_tensors="pt")
+
+    with torch.no_grad():
+        outputs = model(**inputs)
+
+    width, height = image.size
+    target_size = torch.tensor([[height, width]])
+    results = processor.post_process_object_detection(
+        outputs=outputs, target_sizes=target_size)[0]
+    ```
+
+## Save Detections as CSV
+
+TODO
+
+=== "Inference"
+
+    ```python
+    import supervision as sv
+    from inference import get_model
+    
+    model = get_model(model_id="yolov8n-640")
+    
+    with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
+        for frame in sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>):
+    
+            results = model.infer(image)[0]
+            detections = sv.Detections.from_inference(results)
+            sink.append(detections, {})
+    ```
+
+=== "Ultralytics"
+
+    ```python
+    import supervision as sv
+    from ultralytics import YOLO
+    
+    model = YOLO("yolov8n.pt")
+    
+    with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
+        for frame in sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>):
+    
+            results = model(frame)[0]
+            detections = sv.Detections.from_ultralytics(results)
+            sink.append(detections, {})
+    ```
+
+=== "Transformers"
+
+    ```python
+    import torch
+    from PIL import Image
+    from transformers import DetrImageProcessor, DetrForObjectDetection
+
+    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
+    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+
+    image = Image.open(<SOURCE_IMAGE_APTH>)
+    inputs = processor(images=image, return_tensors="pt")
+
+    with torch.no_grad():
+        outputs = model(**inputs)
+
+    width, height = image.size
+    target_size = torch.tensor([[height, width]])
+    results = processor.post_process_object_detection(
+        outputs=outputs, target_sizes=target_size)[0]
+    ```
+
+## Custom Fields
+
+TODO
+
+## Save Detections as JSON
+
+TODO
+
+## Process Video and Save Detections
+
+TODO
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 8bae5d765..dd6feb8db 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -38,9 +38,11 @@ nav:
   - Home: index.md
   - How to:
       - Detect and Annotate: how_to/detect_and_annotate.md
+      - Save Detections: how_to/save_detections.md
+      - Filter Detections: how_to/filter_detections.md
       - Detect Small Objects: how_to/detect_small_objects.md
       - Track Objects: how_to/track_objects.md
-      - Filter Detections: how_to/filter_detections.md
+
   - API:
     - Annotators: annotators.md
     - Classifications:
diff --git a/supervision/draw/color.py b/supervision/draw/color.py
index debb46f3b..b195cffe7 100644
--- a/supervision/draw/color.py
+++ b/supervision/draw/color.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import List, Tuple
+from typing import List, Tuple, Union
 
 import matplotlib.pyplot as plt
 
@@ -448,3 +448,19 @@ def by_idx(self, idx: int) -> Color:
             raise ValueError("idx argument should not be negative")
         idx = idx % len(self.colors)
         return self.colors[idx]
+
+
+def unify_to_bgr(color: Union[Tuple[int, int, int], Color]) -> Tuple[int, int, int]:
+    """
+    Converts a color input in multiple formats to a standardized BGR format.
+
+    Args:
+        color (Union[Tuple[int, int, int], Color]): The color input to be converted,
+            which can be either a tuple of RGB values or an instance of a Color class.
+
+    Returns:
+        Tuple[int, int, int]: The color in BGR format as a tuple of three integers.
+    """
+    if issubclass(type(color), Color):
+        return color.as_bgr()
+    return color
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 4e0ba5d76..64fb6de10 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -9,7 +9,7 @@
 import numpy as np
 
 from supervision.annotators.base import ImageType
-from supervision.draw.color import Color
+from supervision.draw.color import Color, unify_to_bgr
 from supervision.draw.utils import calculate_optimal_text_scale, draw_text
 from supervision.geometry.core import Point
 from supervision.utils.conversion import (
@@ -25,17 +25,18 @@
 
 
 @convert_for_image_processing
-def crop_image(image: np.ndarray, xyxy: np.ndarray) -> np.ndarray:
+def crop_image(image: ImageType, xyxy: np.ndarray) -> np.ndarray:
     """
     Crops the given image based on the given bounding box.
 
     Args:
-        image (np.ndarray): The image to be cropped, represented as a numpy array.
+        image (ImageType): The image to be cropped. `ImageType` is a flexible type,
+            accepting either `numpy.ndarray` or `PIL.Image.Image`.
         xyxy (np.ndarray): A numpy array containing the bounding box coordinates
             in the format (x1, y1, x2, y2).
 
     Returns:
-        (np.ndarray): The cropped image as a numpy array.
+        (ImageType): The cropped image.
 
     Examples:
         ```python
@@ -74,10 +75,10 @@ def resize_image(image: np.ndarray, scale_factor: float) -> np.ndarray:
         raise ValueError("Scale factor must be positive.")
 
     old_width, old_height = image.shape[1], image.shape[0]
-    nwe_width = int(old_width * scale_factor)
+    new_width = int(old_width * scale_factor)
     new_height = int(old_height * scale_factor)
 
-    return cv2.resize(image, (nwe_width, new_height), interpolation=cv2.INTER_LINEAR)
+    return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
 
 
 def place_image(
@@ -285,14 +286,14 @@ def create_tiles(
         raise ValueError("Could not create image tiles from empty list of images.")
     if return_type == "auto":
         return_type = _negotiate_tiles_format(images=images)
-    tile_padding_color = _color_to_bgr(color=tile_padding_color)
-    tile_margin_color = _color_to_bgr(color=tile_margin_color)
+    tile_padding_color = unify_to_bgr(color=tile_padding_color)
+    tile_margin_color = unify_to_bgr(color=tile_margin_color)
     images = images_to_cv2(images=images)
     if single_tile_size is None:
         single_tile_size = _aggregate_images_shape(images=images, mode=tile_scaling)
     resized_images = [
         letterbox_image(
-            image=i, desired_size=single_tile_size, color=tile_padding_color
+            image=i, target_resolution_wh=single_tile_size, color=tile_padding_color
         )
         for i in images
     ]
@@ -311,8 +312,8 @@ def create_tiles(
     titles_anchors = fill(
         sequence=titles_anchors, desired_size=len(images), content=None
     )
-    titles_color = _color_to_bgr(color=titles_color)
-    titles_background_color = _color_to_bgr(color=titles_background_color)
+    titles_color = unify_to_bgr(color=titles_color)
+    titles_background_color = unify_to_bgr(color=titles_background_color)
     tiles = _generate_tiles(
         images=resized_images,
         grid_size=grid_size,
@@ -546,8 +547,8 @@ def _generate_color_image(
 
 @convert_for_image_processing
 def letterbox_image(
-    image: np.ndarray,
-    desired_size: Tuple[int, int],
+    image: ImageType,
+    target_resolution_wh: Tuple[int, int],
     color: Union[Tuple[int, int, int], Color] = (0, 0, 0),
 ) -> np.ndarray:
     """
@@ -555,27 +556,27 @@ def letterbox_image(
     ratio, adding padding of given color if needed to maintain aspect ratio.
 
     Args:
-        image (np.ndarray): Input image (type will be adjusted by decorator,
+        image (ImageType): Input image (type will be adjusted by decorator,
             you can provide PIL.Image)
-        desired_size (Tuple[int, int]): image size (width, height) representing
+        target_resolution_wh (Tuple[int, int]): image size (width, height) representing
             the target dimensions.
         color (Union[Tuple[int, int, int], Color]): the color to pad with - If
             tuple provided - should be BGR.
 
     Returns:
-        np.ndarray: letterboxed image (type may be adjusted to PIL.Image by
+        ImageType: letterboxed image (type may be adjusted to PIL.Image by
             decorator if function was called with PIL.Image)
     """
-    color = _color_to_bgr(color=color)
+    color = unify_to_bgr(color=color)
     resized_img = resize_image_keeping_aspect_ratio(
         image=image,
-        desired_size=desired_size,
+        desired_size=target_resolution_wh,
     )
     new_height, new_width = resized_img.shape[:2]
-    top_padding = (desired_size[1] - new_height) // 2
-    bottom_padding = desired_size[1] - new_height - top_padding
-    left_padding = (desired_size[0] - new_width) // 2
-    right_padding = desired_size[0] - new_width - left_padding
+    top_padding = (target_resolution_wh[1] - new_height) // 2
+    bottom_padding = target_resolution_wh[1] - new_height - top_padding
+    left_padding = (target_resolution_wh[0] - new_width) // 2
+    right_padding = target_resolution_wh[0] - new_width - left_padding
     return cv2.copyMakeBorder(
         resized_img,
         top_padding,
@@ -625,9 +626,3 @@ def resize_image_keeping_aspect_ratio(
         new_height = desired_size[1]
         new_width = int(desired_size[1] * img_ratio)
     return cv2.resize(image, (new_width, new_height))
-
-
-def _color_to_bgr(color: Union[Tuple[int, int, int], Color]) -> Tuple[int, int, int]:
-    if issubclass(type(color), Color):
-        return color.as_bgr()
-    return color
diff --git a/test/utils/test_image.py b/test/utils/test_image.py
index e50f2e574..50b6b5c1b 100644
--- a/test/utils/test_image.py
+++ b/test/utils/test_image.py
@@ -62,7 +62,7 @@ def test_letterbox_image_for_opencv_image() -> None:
 
     # when
     result = letterbox_image(
-        image=image, desired_size=(1024, 1024), color=(255, 255, 255)
+        image=image, target_resolution_wh=(1024, 1024), color=(255, 255, 255)
     )
 
     # then
@@ -88,7 +88,7 @@ def test_letterbox_image_for_pillow_image() -> None:
 
     # when
     result = letterbox_image(
-        image=image, desired_size=(1024, 1024), color=(255, 255, 255)
+        image=image, target_resolution_wh=(1024, 1024), color=(255, 255, 255)
     )
 
     # then

From 5b1ee8f870c1c9e091fb84167fbf060ebc9c0cac Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Apr 2024 12:31:30 +0000
Subject: [PATCH 02/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/how_to/save_detections.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md
index 622c504f5..81de930ec 100644
--- a/docs/how_to/save_detections.md
+++ b/docs/how_to/save_detections.md
@@ -62,12 +62,12 @@ TODO
     ```python
     import supervision as sv
     from inference import get_model
-    
+
     model = get_model(model_id="yolov8n-640")
-    
+
     with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
         for frame in sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>):
-    
+
             results = model.infer(image)[0]
             detections = sv.Detections.from_inference(results)
             sink.append(detections, {})
@@ -78,12 +78,12 @@ TODO
     ```python
     import supervision as sv
     from ultralytics import YOLO
-    
+
     model = YOLO("yolov8n.pt")
-    
+
     with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
         for frame in sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>):
-    
+
             results = model(frame)[0]
             detections = sv.Detections.from_ultralytics(results)
             sink.append(detections, {})
@@ -121,4 +121,4 @@ TODO
 
 ## Process Video and Save Detections
 
-TODO
\ No newline at end of file
+TODO

From 2115c9a1526dfc8d2d5de5f74651ac48f3f2c585 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 14:50:32 +0200
Subject: [PATCH 03/18] crop_image improvements

---
 supervision/utils/image.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 64fb6de10..a6466eaa7 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -25,15 +25,19 @@
 
 
 @convert_for_image_processing
-def crop_image(image: ImageType, xyxy: np.ndarray) -> np.ndarray:
+def crop_image(
+    image: ImageType,
+    xyxy: Union[np.ndarray, List[int], Tuple[int, int, int, int]]
+) -> ImageType:
     """
     Crops the given image based on the given bounding box.
 
     Args:
         image (ImageType): The image to be cropped. `ImageType` is a flexible type,
             accepting either `numpy.ndarray` or `PIL.Image.Image`.
-        xyxy (np.ndarray): A numpy array containing the bounding box coordinates
-            in the format (x1, y1, x2, y2).
+        xyxy (Union[np.ndarray, List[int], Tuple[int, int, int, int]]): A bounding box
+            coordinates in the format (x_min, y_min, x_max, y_max), accepted as either
+            a numpy array, a list, or a tuple.
 
     Returns:
         (ImageType): The cropped image.
@@ -50,9 +54,11 @@ def crop_image(image: ImageType, xyxy: np.ndarray) -> np.ndarray:
         ```
     """
 
+    if isinstance(xyxy, (list, tuple)):
+        xyxy = np.array(xyxy)
     xyxy = np.round(xyxy).astype(int)
-    x1, y1, x2, y2 = xyxy
-    return image[y1:y2, x1:x2]
+    x_min, y_min, x_max, y_max = xyxy.flatten()
+    return image[y_min:y_max, x_min:x_max]
 
 
 @convert_for_image_processing

From f2e6d882fbb64f326e179cdd24f5837715982ac8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Apr 2024 12:50:48 +0000
Subject: [PATCH 04/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/image.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index a6466eaa7..954533a2c 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -26,8 +26,7 @@
 
 @convert_for_image_processing
 def crop_image(
-    image: ImageType,
-    xyxy: Union[np.ndarray, List[int], Tuple[int, int, int, int]]
+    image: ImageType, xyxy: Union[np.ndarray, List[int], Tuple[int, int, int, int]]
 ) -> ImageType:
     """
     Crops the given image based on the given bounding box.

From 94e06ab0d6b71d87a3839207efe6b23d6bf74186 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 16:46:05 +0200
Subject: [PATCH 05/18] scale_image, resize_image changes

---
 docs/utils/image.md            |  10 +-
 supervision/__init__.py        |   4 +-
 supervision/annotators/core.py |   4 +-
 supervision/utils/image.py     | 167 ++++++++++++++++++++++-----------
 4 files changed, 125 insertions(+), 60 deletions(-)

diff --git a/docs/utils/image.md b/docs/utils/image.md
index 087aebd73..b61cddfce 100644
--- a/docs/utils/image.md
+++ b/docs/utils/image.md
@@ -18,10 +18,10 @@ status: new
 :::supervision.utils.image.crop_image
 
 <div class="md-typeset">
-  <h2>letterbox_image</h2>
+  <h2>scale_image</h2>
 </div>
 
-:::supervision.utils.image.letterbox_image
+:::supervision.utils.image.scale_image
 
 <div class="md-typeset">
   <h2>resize_image</h2>
@@ -29,6 +29,12 @@ status: new
 
 :::supervision.utils.image.resize_image
 
+<div class="md-typeset">
+  <h2>letterbox_image</h2>
+</div>
+
+:::supervision.utils.image.letterbox_image
+
 <div class="md-typeset">
   <h2>place_image</h2>
 </div>
diff --git a/supervision/__init__.py b/supervision/__init__.py
index 8c3acc265..d6aeeab59 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -78,8 +78,8 @@
     crop_image,
     letterbox_image,
     place_image,
-    resize_image,
-    resize_image_keeping_aspect_ratio,
+    scale_image,
+    resize_image
 )
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 9f6cdb367..38ddc4421 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -13,7 +13,7 @@
 from supervision.draw.utils import draw_polygon
 from supervision.geometry.core import Position
 from supervision.utils.conversion import convert_for_annotation_method
-from supervision.utils.image import crop_image, place_image, resize_image
+from supervision.utils.image import crop_image, place_image, scale_image
 
 
 class BoundingBoxAnnotator(BaseAnnotator):
@@ -1965,7 +1965,7 @@ def annotate(
             crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int)
         ]
         resized_crops = [
-            resize_image(image=crop, scale_factor=self.scale_factor) for crop in crops
+            scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops
         ]
         anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int)
 
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 954533a2c..99fcb3f24 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -7,6 +7,7 @@
 
 import cv2
 import numpy as np
+import numpy.typing as npt
 
 from supervision.annotators.base import ImageType
 from supervision.draw.color import Color, unify_to_bgr
@@ -26,7 +27,8 @@
 
 @convert_for_image_processing
 def crop_image(
-    image: ImageType, xyxy: Union[np.ndarray, List[int], Tuple[int, int, int, int]]
+    image: ImageType,
+    xyxy: Union[npt.NDArray[int], List[int], Tuple[int, int, int, int]]
 ) -> ImageType:
     """
     Crops the given image based on the given bounding box.
@@ -35,23 +37,49 @@ def crop_image(
         image (ImageType): The image to be cropped. `ImageType` is a flexible type,
             accepting either `numpy.ndarray` or `PIL.Image.Image`.
         xyxy (Union[np.ndarray, List[int], Tuple[int, int, int, int]]): A bounding box
-            coordinates in the format (x_min, y_min, x_max, y_max), accepted as either
-            a numpy array, a list, or a tuple.
+            coordinates in the format `(x_min, y_min, x_max, y_max)`, accepted as either
+            a `numpy.ndarray`, a `list`, or a `tuple`.
 
     Returns:
-        (ImageType): The cropped image.
+        (ImageType): The cropped image. The type is determined by the input type and
+            may be either a `numpy.ndarray` or `PIL.Image.Image`.
 
     Examples:
+
+    === "OpenCV"
+
         ```python
+        import cv2
         import supervision as sv
 
-        detection = sv.Detections(...)
-        with sv.ImageSink(target_dir_path='target/directory/path') as sink:
-            for xyxy in detection.xyxy:
-                cropped_image = sv.crop_image(image=image, xyxy=xyxy)
-                sink.save_image(image=cropped_image)
+        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        image.shape
+        # (1080, 1920, 3)
+
+        xyxy = [200, 400, 600, 800]
+        cropped_image = sv.crop_image(image=image, xyxy=xyxy)
+        cropped_image.shape
+        # (400, 400, 3)
         ```
-    """
+
+    === "Pillow"
+
+        ```python
+        from PIL import Image
+        import supervision as sv
+
+        image = Image.open(<SOURCE_IMAGE_PATH>)
+        image.size
+        # (1920, 1080)
+
+        xyxy = [200, 400, 600, 800]
+        cropped_image = sv.crop_image(image=image, xyxy=xyxy)
+        cropped_image.size
+        # (400, 400)
+        ```
+
+    ![crop_image](https://media.roboflow.com/supervision-docs/crop-image.png){ align=center width="800" }
+    """  # noqa E501 // docs
 
     if isinstance(xyxy, (list, tuple)):
         xyxy = np.array(xyxy)
@@ -61,29 +89,62 @@ def crop_image(
 
 
 @convert_for_image_processing
-def resize_image(image: np.ndarray, scale_factor: float) -> np.ndarray:
+def scale_image(image: ImageType, scale_factor: float) -> ImageType:
     """
-    Resizes an image by a given scale factor using cv2.INTER_LINEAR interpolation.
+    Scales the given image based on the given scale factor.
 
     Args:
-        image (np.ndarray): The input image to be resized.
+        image (ImageType): The image to be scaled. `ImageType` is a flexible type,
+            accepting either `numpy.ndarray` or `PIL.Image.Image`.
         scale_factor (float): The factor by which the image will be scaled. Scale
-            factor > 1.0 zooms in, < 1.0 zooms out.
+            factor > `1.0` zooms in, < `1.0` zooms out.
 
     Returns:
-        np.ndarray: The resized image.
+        (ImageType): The scaled image. The type is determined by the input type and
+            may be either a `numpy.ndarray` or `PIL.Image.Image`.
 
     Raises:
         ValueError: If the scale factor is non-positive.
+
+    Examples:
+
+    === "OpenCV"
+
+        ```python
+        import cv2
+        import supervision as sv
+
+        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        image.shape
+        # (1080, 1920, 3)
+
+        scaled_image = sv.scale_image(image=image, scale_factor=0.5)
+        scaled_image.shape
+        # (540, 960, 3)
+        ```
+
+    === "Pillow"
+
+        ```python
+        from PIL import Image
+        import supervision as sv
+
+        image = Image.open(<SOURCE_IMAGE_PATH>)
+        image.size
+        # (1920, 1080)
+
+        scaled_image = sv.scale_image(image=image, scale_factor=0.5)
+        scaled_image.size
+        # (540, 960)
+        ```
     """
     if scale_factor <= 0:
         raise ValueError("Scale factor must be positive.")
 
-    old_width, old_height = image.shape[1], image.shape[0]
-    new_width = int(old_width * scale_factor)
-    new_height = int(old_height * scale_factor)
-
-    return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
+    width_old, height_old = image.shape[1], image.shape[0]
+    width_new = int(width_old * scale_factor)
+    height_new = int(height_old * scale_factor)
+    return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)
 
 
 def place_image(
@@ -573,9 +634,10 @@ def letterbox_image(
             decorator if function was called with PIL.Image)
     """
     color = unify_to_bgr(color=color)
-    resized_img = resize_image_keeping_aspect_ratio(
+    resized_img = resize_image(
         image=image,
-        desired_size=target_resolution_wh,
+        target_resolution_wh=target_resolution_wh,
+        keep_aspect_ratio=True
     )
     new_height, new_width = resized_img.shape[:2]
     top_padding = (target_resolution_wh[1] - new_height) // 2
@@ -594,40 +656,37 @@ def letterbox_image(
 
 
 @convert_for_image_processing
-def resize_image_keeping_aspect_ratio(
-    image: np.ndarray,
-    desired_size: Tuple[int, int],
-) -> np.ndarray:
+def resize_image(
+    image: ImageType,
+    target_resolution_wh: Tuple[int, int],
+    keep_aspect_ratio: bool = False
+) -> ImageType:
     """
-    Resize and pad image preserving its aspect ratio.
-
-    For example: input image is (640, 480) and we want to resize into
-    (1024, 1024). If this rectangular image is just resized naively
-    to square-shape output - aspect ratio would be altered. If we do not
-    want this to happen - we may resize bigger dimension (640) to 1024.
-    Ratio of change is 1.6. This ratio is later on used to calculate scaling
-    in the other dimension. As a result we have (1024, 768) image.
-
-    Parameters:
-    - image (np.ndarray): Input image (type will be adjusted by decorator,
-        you can provide PIL.Image)
-    - desired_size (Tuple[int, int]): image size (width, height) representing the
-        target dimensions. Parameter will be used to dictate maximum size of
-        output image. Output size may be smaller - to preserve aspect ratio of original
-        image.
+    Resizes the given image to a specified resolution. Can maintain the original aspect
+    ratio or resize directly to the desired dimensions.
+
+    Args:
+        image (ImageType): The image to be resized. `ImageType` is a flexible type,
+            accepting either `numpy.ndarray` or `PIL.Image.Image`.
+        target_resolution_wh (Tuple[int, int]): The target resolution as
+            `(width, height)`.
+        keep_aspect_ratio (bool, optional): Flag to maintain the image's original
+            aspect ratio. Defaults to `False`.
 
     Returns:
-        np.ndarray: resized image (type may be adjusted to PIL.Image by decorator
-            if function was called with PIL.Image)
+        ImageType: The resized image. The type is determined by the input type and
+            may be either a `numpy.ndarray` or `PIL.Image.Image`.
     """
-    if image.shape[:2] == desired_size[::-1]:
-        return image
-    img_ratio = image.shape[1] / image.shape[0]
-    desired_ratio = desired_size[0] / desired_size[1]
-    if img_ratio >= desired_ratio:
-        new_width = desired_size[0]
-        new_height = int(desired_size[0] / img_ratio)
+    if keep_aspect_ratio:
+        image_ratio = image.shape[1] / image.shape[0]
+        target_ratio = target_resolution_wh[0] / target_resolution_wh[1]
+        if image_ratio >= target_ratio:
+            width_new = target_resolution_wh[0]
+            height_new = int(target_resolution_wh[0] / image_ratio)
+        else:
+            height_new = target_resolution_wh[1]
+            width_new = int(target_resolution_wh[1] * image_ratio)
     else:
-        new_height = desired_size[1]
-        new_width = int(desired_size[1] * img_ratio)
-    return cv2.resize(image, (new_width, new_height))
+        width_new, height_new = target_resolution_wh
+
+    return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)

From ad946893624e8609282035af3443d361f06b5564 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Apr 2024 14:46:45 +0000
Subject: [PATCH 06/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py    | 2 +-
 supervision/utils/image.py | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index d6aeeab59..b14a37ab8 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -78,8 +78,8 @@
     crop_image,
     letterbox_image,
     place_image,
+    resize_image,
     scale_image,
-    resize_image
 )
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 99fcb3f24..8fe25eea3 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -28,7 +28,7 @@
 @convert_for_image_processing
 def crop_image(
     image: ImageType,
-    xyxy: Union[npt.NDArray[int], List[int], Tuple[int, int, int, int]]
+    xyxy: Union[npt.NDArray[int], List[int], Tuple[int, int, int, int]],
 ) -> ImageType:
     """
     Crops the given image based on the given bounding box.
@@ -635,9 +635,7 @@ def letterbox_image(
     """
     color = unify_to_bgr(color=color)
     resized_img = resize_image(
-        image=image,
-        target_resolution_wh=target_resolution_wh,
-        keep_aspect_ratio=True
+        image=image, target_resolution_wh=target_resolution_wh, keep_aspect_ratio=True
     )
     new_height, new_width = resized_img.shape[:2]
     top_padding = (target_resolution_wh[1] - new_height) // 2
@@ -659,7 +657,7 @@ def letterbox_image(
 def resize_image(
     image: ImageType,
     target_resolution_wh: Tuple[int, int],
-    keep_aspect_ratio: bool = False
+    keep_aspect_ratio: bool = False,
 ) -> ImageType:
     """
     Resizes the given image to a specified resolution. Can maintain the original aspect

From 233d25d4c539eb180e2091856f3ebdddb6695c95 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 17:48:13 +0200
Subject: [PATCH 07/18] fix `resize_image` tests after rename

---
 supervision/utils/image.py | 116 ++++++++++++++++++++++++-------------
 test/utils/test_image.py   |  16 ++---
 2 files changed, 86 insertions(+), 46 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 8fe25eea3..4c5cc9c67 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -135,7 +135,7 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType:
 
         scaled_image = sv.scale_image(image=image, scale_factor=0.5)
         scaled_image.size
-        # (540, 960)
+        # (960, 540)
         ```
     """
     if scale_factor <= 0:
@@ -147,6 +147,81 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType:
     return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)
 
 
+@convert_for_image_processing
+def resize_image(
+    image: ImageType,
+    resolution_wh: Tuple[int, int],
+    keep_aspect_ratio: bool = False,
+) -> ImageType:
+    """
+    Resizes the given image to a specified resolution. Can maintain the original aspect
+    ratio or resize directly to the desired dimensions.
+
+    Args:
+        image (ImageType): The image to be resized. `ImageType` is a flexible type,
+            accepting either `numpy.ndarray` or `PIL.Image.Image`.
+        resolution_wh (Tuple[int, int]): The target resolution as
+            `(width, height)`.
+        keep_aspect_ratio (bool, optional): Flag to maintain the image's original
+            aspect ratio. Defaults to `False`.
+
+    Returns:
+        ImageType: The resized image. The type is determined by the input type and
+            may be either a `numpy.ndarray` or `PIL.Image.Image`.
+
+    Examples:
+
+    === "OpenCV"
+
+        ```python
+        import cv2
+        import supervision as sv
+
+        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        image.shape
+        # (1080, 1920, 3)
+
+        resized_image = sv.resize_image(
+            image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True
+        )
+        resized_image.shape
+        # (562, 1000, 3)
+        ```
+
+    === "Pillow"
+
+        ```python
+        from PIL import Image
+        import supervision as sv
+
+        image = Image.open(<SOURCE_IMAGE_PATH>)
+        image.size
+        # (1920, 1080)
+
+        resized_image = sv.resize_image(
+            image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True
+        )
+        resized_image.size
+        # (1000, 562)
+        ```
+        
+    ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width="800" }
+    """  # noqa E501 // docs
+    if keep_aspect_ratio:
+        image_ratio = image.shape[1] / image.shape[0]
+        target_ratio = resolution_wh[0] / resolution_wh[1]
+        if image_ratio >= target_ratio:
+            width_new = resolution_wh[0]
+            height_new = int(resolution_wh[0] / image_ratio)
+        else:
+            height_new = resolution_wh[1]
+            width_new = int(resolution_wh[1] * image_ratio)
+    else:
+        width_new, height_new = resolution_wh
+
+    return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)
+
+
 def place_image(
     scene: np.ndarray, image: np.ndarray, anchor: Tuple[int, int]
 ) -> np.ndarray:
@@ -635,7 +710,7 @@ def letterbox_image(
     """
     color = unify_to_bgr(color=color)
     resized_img = resize_image(
-        image=image, target_resolution_wh=target_resolution_wh, keep_aspect_ratio=True
+        image=image, resolution_wh=target_resolution_wh, keep_aspect_ratio=True
     )
     new_height, new_width = resized_img.shape[:2]
     top_padding = (target_resolution_wh[1] - new_height) // 2
@@ -651,40 +726,3 @@ def letterbox_image(
         cv2.BORDER_CONSTANT,
         value=color,
     )
-
-
-@convert_for_image_processing
-def resize_image(
-    image: ImageType,
-    target_resolution_wh: Tuple[int, int],
-    keep_aspect_ratio: bool = False,
-) -> ImageType:
-    """
-    Resizes the given image to a specified resolution. Can maintain the original aspect
-    ratio or resize directly to the desired dimensions.
-
-    Args:
-        image (ImageType): The image to be resized. `ImageType` is a flexible type,
-            accepting either `numpy.ndarray` or `PIL.Image.Image`.
-        target_resolution_wh (Tuple[int, int]): The target resolution as
-            `(width, height)`.
-        keep_aspect_ratio (bool, optional): Flag to maintain the image's original
-            aspect ratio. Defaults to `False`.
-
-    Returns:
-        ImageType: The resized image. The type is determined by the input type and
-            may be either a `numpy.ndarray` or `PIL.Image.Image`.
-    """
-    if keep_aspect_ratio:
-        image_ratio = image.shape[1] / image.shape[0]
-        target_ratio = target_resolution_wh[0] / target_resolution_wh[1]
-        if image_ratio >= target_ratio:
-            width_new = target_resolution_wh[0]
-            height_new = int(target_resolution_wh[0] / image_ratio)
-        else:
-            height_new = target_resolution_wh[1]
-            width_new = int(target_resolution_wh[1] * image_ratio)
-    else:
-        width_new, height_new = target_resolution_wh
-
-    return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)
diff --git a/test/utils/test_image.py b/test/utils/test_image.py
index 50b6b5c1b..b3e28a277 100644
--- a/test/utils/test_image.py
+++ b/test/utils/test_image.py
@@ -8,19 +8,20 @@
 from supervision.utils.image import (
     create_tiles,
     letterbox_image,
-    resize_image_keeping_aspect_ratio,
+    resize_image,
 )
 
 
-def test_resize_image_keeping_aspect_ratio_for_opencv_image() -> None:
+def test_resize_image_for_opencv_image() -> None:
     # given
     image = np.zeros((480, 640, 3), dtype=np.uint8)
     expected_result = np.zeros((768, 1024, 3), dtype=np.uint8)
 
     # when
-    result = resize_image_keeping_aspect_ratio(
+    result = resize_image(
         image=image,
-        desired_size=(1024, 1024),
+        resolution_wh=(1024, 1024),
+        keep_aspect_ratio=True,
     )
 
     # then
@@ -29,15 +30,16 @@ def test_resize_image_keeping_aspect_ratio_for_opencv_image() -> None:
     ), "Expected output shape to be (w, h): (1024, 768)"
 
 
-def test_resize_image_keeping_aspect_ratio_for_pillow_image() -> None:
+def test_resize_image_for_pillow_image() -> None:
     # given
     image = Image.new(mode="RGB", size=(640, 480), color=(0, 0, 0))
     expected_result = Image.new(mode="RGB", size=(1024, 768), color=(0, 0, 0))
 
     # when
-    result = resize_image_keeping_aspect_ratio(
+    result = resize_image(
         image=image,
-        desired_size=(1024, 1024),
+        resolution_wh=(1024, 1024),
+        keep_aspect_ratio=True,
     )
 
     # then

From a7dccfcc0d3f168e36b9f4be06686206be1d3260 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Apr 2024 15:48:30 +0000
Subject: [PATCH 08/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/image.py | 2 +-
 test/utils/test_image.py   | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 4c5cc9c67..e04ea45ed 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -204,7 +204,7 @@ def resize_image(
         resized_image.size
         # (1000, 562)
         ```
-        
+
     ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     if keep_aspect_ratio:
diff --git a/test/utils/test_image.py b/test/utils/test_image.py
index b3e28a277..a114d9aae 100644
--- a/test/utils/test_image.py
+++ b/test/utils/test_image.py
@@ -5,11 +5,7 @@
 from PIL import Image, ImageChops
 
 from supervision import Color, Point
-from supervision.utils.image import (
-    create_tiles,
-    letterbox_image,
-    resize_image,
-)
+from supervision.utils.image import create_tiles, letterbox_image, resize_image
 
 
 def test_resize_image_for_opencv_image() -> None:

From 774ac2ed4f82706c0a889741b2619cc8575b5ee0 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 17:59:19 +0200
Subject: [PATCH 09/18] update `letterbox_image` docs

---
 supervision/utils/image.py | 86 +++++++++++++++++++-------------------
 test/utils/test_image.py   |  4 +-
 2 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index e04ea45ed..274cef2f6 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -222,6 +222,48 @@ def resize_image(
     return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)
 
 
+@convert_for_image_processing
+def letterbox_image(
+    image: ImageType,
+    resolution_wh: Tuple[int, int],
+    color: Union[Tuple[int, int, int], Color] = (0, 0, 0),
+) -> ImageType:
+    """
+    Resizes and pads an image to a specified resolution with a given color, maintaining
+    the original aspect ratio.
+
+    Args:
+        image (ImageType): The image to be resized. `ImageType` is a flexible type,
+            accepting either `numpy.ndarray` or `PIL.Image.Image`.
+        resolution_wh (Tuple[int, int]): The target resolution as
+            `(width, height)`.
+        color (Union[Tuple[int, int, int], Color]): The color to pad with. If tuple
+            provided it should be in BGR format.
+
+    Returns:
+        ImageType: The resized image. The type is determined by the input type and
+            may be either a `numpy.ndarray` or `PIL.Image.Image`.
+    """
+    color = unify_to_bgr(color=color)
+    resized_image = resize_image(
+        image=image, resolution_wh=resolution_wh, keep_aspect_ratio=True
+    )
+    height_new, width_new = resized_image.shape[:2]
+    padding_top = (resolution_wh[1] - height_new) // 2
+    padding_bottom = resolution_wh[1] - height_new - padding_top
+    padding_left = (resolution_wh[0] - width_new) // 2
+    padding_right = resolution_wh[0] - width_new - padding_left
+    return cv2.copyMakeBorder(
+        resized_image,
+        padding_top,
+        padding_bottom,
+        padding_left,
+        padding_right,
+        cv2.BORDER_CONSTANT,
+        value=color,
+    )
+
+
 def place_image(
     scene: np.ndarray, image: np.ndarray, anchor: Tuple[int, int]
 ) -> np.ndarray:
@@ -434,7 +476,7 @@ def create_tiles(
         single_tile_size = _aggregate_images_shape(images=images, mode=tile_scaling)
     resized_images = [
         letterbox_image(
-            image=i, target_resolution_wh=single_tile_size, color=tile_padding_color
+            image=i, resolution_wh=single_tile_size, color=tile_padding_color
         )
         for i in images
     ]
@@ -684,45 +726,3 @@ def _generate_color_image(
     shape: Tuple[int, int], color: Tuple[int, int, int]
 ) -> np.ndarray:
     return np.ones(shape[::-1] + (3,), dtype=np.uint8) * color
-
-
-@convert_for_image_processing
-def letterbox_image(
-    image: ImageType,
-    target_resolution_wh: Tuple[int, int],
-    color: Union[Tuple[int, int, int], Color] = (0, 0, 0),
-) -> np.ndarray:
-    """
-    Resize and pad image to fit the desired size, preserving its aspect
-    ratio, adding padding of given color if needed to maintain aspect ratio.
-
-    Args:
-        image (ImageType): Input image (type will be adjusted by decorator,
-            you can provide PIL.Image)
-        target_resolution_wh (Tuple[int, int]): image size (width, height) representing
-            the target dimensions.
-        color (Union[Tuple[int, int, int], Color]): the color to pad with - If
-            tuple provided - should be BGR.
-
-    Returns:
-        ImageType: letterboxed image (type may be adjusted to PIL.Image by
-            decorator if function was called with PIL.Image)
-    """
-    color = unify_to_bgr(color=color)
-    resized_img = resize_image(
-        image=image, resolution_wh=target_resolution_wh, keep_aspect_ratio=True
-    )
-    new_height, new_width = resized_img.shape[:2]
-    top_padding = (target_resolution_wh[1] - new_height) // 2
-    bottom_padding = target_resolution_wh[1] - new_height - top_padding
-    left_padding = (target_resolution_wh[0] - new_width) // 2
-    right_padding = target_resolution_wh[0] - new_width - left_padding
-    return cv2.copyMakeBorder(
-        resized_img,
-        top_padding,
-        bottom_padding,
-        left_padding,
-        right_padding,
-        cv2.BORDER_CONSTANT,
-        value=color,
-    )
diff --git a/test/utils/test_image.py b/test/utils/test_image.py
index a114d9aae..487434aed 100644
--- a/test/utils/test_image.py
+++ b/test/utils/test_image.py
@@ -60,7 +60,7 @@ def test_letterbox_image_for_opencv_image() -> None:
 
     # when
     result = letterbox_image(
-        image=image, target_resolution_wh=(1024, 1024), color=(255, 255, 255)
+        image=image, resolution_wh=(1024, 1024), color=(255, 255, 255)
     )
 
     # then
@@ -86,7 +86,7 @@ def test_letterbox_image_for_pillow_image() -> None:
 
     # when
     result = letterbox_image(
-        image=image, target_resolution_wh=(1024, 1024), color=(255, 255, 255)
+        image=image, resolution_wh=(1024, 1024), color=(255, 255, 255)
     )
 
     # then

From 079e62fb0c544248cc6511d79d6be664928aa81e Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 21:23:23 +0200
Subject: [PATCH 10/18] rename `place_image` to overlay `image`

---
 docs/utils/image.md            | 14 ++---
 supervision/__init__.py        |  2 +-
 supervision/annotators/core.py |  4 +-
 supervision/utils/image.py     | 96 +++++++++++++++++++++++++---------
 4 files changed, 81 insertions(+), 35 deletions(-)

diff --git a/docs/utils/image.md b/docs/utils/image.md
index b61cddfce..16a18dc40 100644
--- a/docs/utils/image.md
+++ b/docs/utils/image.md
@@ -5,12 +5,6 @@ status: new
 
 # Image Utils
 
-<div class="md-typeset">
-  <h2>ImageSink</h2>
-</div>
-
-:::supervision.utils.image.ImageSink
-
 <div class="md-typeset">
   <h2>crop_image</h2>
 </div>
@@ -39,4 +33,10 @@ status: new
   <h2>place_image</h2>
 </div>
 
-:::supervision.utils.image.place_image
+:::supervision.utils.image.overlay_image
+
+<div class="md-typeset">
+  <h2>ImageSink</h2>
+</div>
+
+:::supervision.utils.image.ImageSink
diff --git a/supervision/__init__.py b/supervision/__init__.py
index b14a37ab8..9ff095573 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -77,7 +77,7 @@
     create_tiles,
     crop_image,
     letterbox_image,
-    place_image,
+    overlay_image,
     resize_image,
     scale_image,
 )
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 38ddc4421..854a9ece6 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -13,7 +13,7 @@
 from supervision.draw.utils import draw_polygon
 from supervision.geometry.core import Position
 from supervision.utils.conversion import convert_for_annotation_method
-from supervision.utils.image import crop_image, place_image, scale_image
+from supervision.utils.image import crop_image, overlay_image, scale_image
 
 
 class BoundingBoxAnnotator(BaseAnnotator):
@@ -1974,7 +1974,7 @@ def annotate(
             (x1, y1), (x2, y2) = self.calculate_crop_coordinates(
                 anchor=anchor, crop_wh=crop_wh, position=self.position
             )
-            scene = place_image(scene=scene, image=resized_crop, anchor=(x1, y1))
+            scene = overlay_image(scene=scene, inserted_image=resized_crop, anchor=(x1, y1))
             color = resolve_color(
                 color=self.border_color,
                 detections=detections,
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 274cef2f6..b579ba75f 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -243,7 +243,41 @@ def letterbox_image(
     Returns:
         ImageType: The resized image. The type is determined by the input type and
             may be either a `numpy.ndarray` or `PIL.Image.Image`.
-    """
+
+    ## Examples:
+
+    === "OpenCV"
+
+        ```python
+        import cv2
+        import supervision as sv
+
+        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        image.shape
+        # (1080, 1920, 3)
+
+        letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))
+        letterboxed_image.shape
+        # (1000, 1000, 3)
+        ```
+
+    === "Pillow"
+
+        ```python
+        from PIL import Image
+        import supervision as sv
+
+        image = Image.open(<SOURCE_IMAGE_PATH>)
+        image.size
+        # (1920, 1080)
+
+        letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))
+        letterboxed_image.size
+        # (1000, 1000)
+        ```
+        
+    ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" }
+    """  # noqa E501 // docs
     color = unify_to_bgr(color=color)
     resized_image = resize_image(
         image=image, resolution_wh=resolution_wh, keep_aspect_ratio=True
@@ -264,48 +298,60 @@ def letterbox_image(
     )
 
 
-def place_image(
-    scene: np.ndarray, image: np.ndarray, anchor: Tuple[int, int]
-) -> np.ndarray:
+def overlay_image(
+    image: npt.NDArray[np.uint8],
+    overlay: npt.NDArray[np.uint8],
+    anchor: Tuple[int, int]
+) -> npt.NDArray[np.uint8]:
     """
     Places an image onto a scene at a given anchor point, handling cases where
     the image's position is partially or completely outside the scene's bounds.
 
     Args:
-        scene (np.ndarray): The background scene onto which the image is placed.
-        image (np.ndarray): The image to be placed onto the scene.
-        anchor (Tuple[int, int]): The (x, y) coordinates in the scene where the
+        image (np.ndarray): The background scene onto which the image is placed.
+        overlay (np.ndarray): The image to be placed onto the scene.
+        anchor (Tuple[int, int]): The `(x, y)` coordinates in the scene where the
             top-left corner of the image will be placed.
 
     Returns:
-        np.ndarray: The modified scene with the image placed at the anchor point,
-            or unchanged if the image placement is completely outside the scene.
+        np.ndarray: The result image with overlay.
+
+    Examples:
+        ```python
+        import cv2
+        import numpy as np
+        import supervision as sv
+
+        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        overlay = np.zeros((200, 200, 3), dtype=np.uint8)
+        result_image = sv.letterbox_image(
+            image=image, overlay=overlay, anchor=(200, 400))
+        ```
     """
-    scene_height, scene_width = scene.shape[:2]
-    image_height, image_width = image.shape[:2]
+    scene_height, scene_width = image.shape[:2]
+    image_height, image_width = overlay.shape[:2]
     anchor_x, anchor_y = anchor
 
     is_out_horizontally = anchor_x + image_width <= 0 or anchor_x >= scene_width
     is_out_vertically = anchor_y + image_height <= 0 or anchor_y >= scene_height
 
     if is_out_horizontally or is_out_vertically:
-        return scene
+        return image
 
-    start_y = max(anchor_y, 0)
-    start_x = max(anchor_x, 0)
-    end_y = min(scene_height, anchor_y + image_height)
-    end_x = min(scene_width, anchor_x + image_width)
+    x_min = max(anchor_x, 0)
+    y_min = max(anchor_y, 0)
+    x_max = min(scene_width, anchor_x + image_width)
+    y_max = min(scene_height, anchor_y + image_height)
 
-    crop_start_y = max(-anchor_y, 0)
-    crop_start_x = max(-anchor_x, 0)
-    crop_end_y = image_height - max((anchor_y + image_height) - scene_height, 0)
-    crop_end_x = image_width - max((anchor_x + image_width) - scene_width, 0)
+    crop_x_min = max(-anchor_x, 0)
+    crop_y_min = max(-anchor_y, 0)
+    crop_x_max = image_width - max((anchor_x + image_width) - scene_width, 0)
+    crop_y_max = image_height - max((anchor_y + image_height) - scene_height, 0)
 
-    scene[start_y:end_y, start_x:end_x] = image[
-        crop_start_y:crop_end_y, crop_start_x:crop_end_x
-    ]
+    image[y_min:y_max, x_min:x_max] = \
+        overlay[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
 
-    return scene
+    return image
 
 
 class ImageSink:
@@ -335,7 +381,7 @@ def __init__(
                     source_path='source_video.mp4', stride=2):
                     sink.save_image(image=image)
             ```
-        """
+        """  # noqa E501 // docs
 
         self.target_dir_path = target_dir_path
         self.overwrite = overwrite

From f872fddaffe2d34c11f2fcc62b6d1b53a28aec85 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Apr 2024 19:24:02 +0000
Subject: [PATCH 11/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/annotators/core.py | 4 +++-
 supervision/utils/image.py     | 9 +++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 854a9ece6..ac9018625 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -1974,7 +1974,9 @@ def annotate(
             (x1, y1), (x2, y2) = self.calculate_crop_coordinates(
                 anchor=anchor, crop_wh=crop_wh, position=self.position
             )
-            scene = overlay_image(scene=scene, inserted_image=resized_crop, anchor=(x1, y1))
+            scene = overlay_image(
+                scene=scene, inserted_image=resized_crop, anchor=(x1, y1)
+            )
             color = resolve_color(
                 color=self.border_color,
                 detections=detections,
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index b579ba75f..f6eef9ca8 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -275,7 +275,7 @@ def letterbox_image(
         letterboxed_image.size
         # (1000, 1000)
         ```
-        
+
     ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     color = unify_to_bgr(color=color)
@@ -301,7 +301,7 @@ def letterbox_image(
 def overlay_image(
     image: npt.NDArray[np.uint8],
     overlay: npt.NDArray[np.uint8],
-    anchor: Tuple[int, int]
+    anchor: Tuple[int, int],
 ) -> npt.NDArray[np.uint8]:
     """
     Places an image onto a scene at a given anchor point, handling cases where
@@ -348,8 +348,9 @@ def overlay_image(
     crop_x_max = image_width - max((anchor_x + image_width) - scene_width, 0)
     crop_y_max = image_height - max((anchor_y + image_height) - scene_height, 0)
 
-    image[y_min:y_max, x_min:x_max] = \
-        overlay[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
+    image[y_min:y_max, x_min:x_max] = overlay[
+        crop_y_min:crop_y_max, crop_x_min:crop_x_max
+    ]
 
     return image
 

From c30e377eb5393aed81b182d6a66217e73c6d439a Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 22:07:12 +0200
Subject: [PATCH 12/18] more docs improvements

---
 docs/utils/image.md             |  2 +-
 supervision/__init__.py         |  1 +
 supervision/utils/conversion.py |  4 ++--
 supervision/utils/image.py      | 31 ++++++++++++-------------------
 supervision/utils/iterables.py  |  4 ++--
 5 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/docs/utils/image.md b/docs/utils/image.md
index 16a18dc40..87fd1943e 100644
--- a/docs/utils/image.md
+++ b/docs/utils/image.md
@@ -30,7 +30,7 @@ status: new
 :::supervision.utils.image.letterbox_image
 
 <div class="md-typeset">
-  <h2>place_image</h2>
+  <h2>overlay_image</h2>
 </div>
 
 :::supervision.utils.image.overlay_image
diff --git a/supervision/__init__.py b/supervision/__init__.py
index 9ff095573..6cdb23ca9 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -34,6 +34,7 @@
     ClassificationDataset,
     DetectionDataset,
 )
+from supervision.utils.conversion import pillow_to_cv2, cv2_to_pillow
 from supervision.detection.annotate import BoxAnnotator
 from supervision.detection.core import Detections
 from supervision.detection.line_zone import LineZone, LineZoneAnnotator
diff --git a/supervision/utils/conversion.py b/supervision/utils/conversion.py
index 608104bcd..8ddce9695 100644
--- a/supervision/utils/conversion.py
+++ b/supervision/utils/conversion.py
@@ -81,7 +81,7 @@ def pillow_to_cv2(image: Image.Image) -> np.ndarray:
         image (Image.Image): Pillow image (in RGB format).
 
     Returns:
-        np.ndarray: Input image converted to OpenCV format.
+        (np.ndarray): Input image converted to OpenCV format.
     """
     scene = np.array(image)
     scene = cv2.cvtColor(scene, cv2.COLOR_RGB2BGR)
@@ -97,7 +97,7 @@ def cv2_to_pillow(image: np.ndarray) -> Image.Image:
         image (np.ndarray): OpenCV image (in BGR format).
 
     Returns:
-        Image.Image: Input image converted to Pillow format.
+        (Image.Image): Input image converted to Pillow format.
     """
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     return Image.fromarray(image)
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index b579ba75f..488a9d360 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -44,8 +44,6 @@ def crop_image(
         (ImageType): The cropped image. The type is determined by the input type and
             may be either a `numpy.ndarray` or `PIL.Image.Image`.
 
-    Examples:
-
     === "OpenCV"
 
         ```python
@@ -106,8 +104,6 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType:
     Raises:
         ValueError: If the scale factor is non-positive.
 
-    Examples:
-
     === "OpenCV"
 
         ```python
@@ -166,11 +162,9 @@ def resize_image(
             aspect ratio. Defaults to `False`.
 
     Returns:
-        ImageType: The resized image. The type is determined by the input type and
+        (ImageType): The resized image. The type is determined by the input type and
             may be either a `numpy.ndarray` or `PIL.Image.Image`.
 
-    Examples:
-
     === "OpenCV"
 
         ```python
@@ -226,7 +220,7 @@ def resize_image(
 def letterbox_image(
     image: ImageType,
     resolution_wh: Tuple[int, int],
-    color: Union[Tuple[int, int, int], Color] = (0, 0, 0),
+    color: Union[Tuple[int, int, int], Color] = Color.BLACK,
 ) -> ImageType:
     """
     Resizes and pads an image to a specified resolution with a given color, maintaining
@@ -241,11 +235,9 @@ def letterbox_image(
             provided it should be in BGR format.
 
     Returns:
-        ImageType: The resized image. The type is determined by the input type and
+        (ImageType): The resized image. The type is determined by the input type and
             may be either a `numpy.ndarray` or `PIL.Image.Image`.
 
-    ## Examples:
-
     === "OpenCV"
 
         ```python
@@ -314,7 +306,7 @@ def overlay_image(
             top-left corner of the image will be placed.
 
     Returns:
-        np.ndarray: The result image with overlay.
+        (np.ndarray): The result image with overlay.
 
     Examples:
         ```python
@@ -323,10 +315,11 @@ def overlay_image(
         import supervision as sv
 
         image = cv2.imread(<SOURCE_IMAGE_PATH>)
-        overlay = np.zeros((200, 200, 3), dtype=np.uint8)
-        result_image = sv.letterbox_image(
-            image=image, overlay=overlay, anchor=(200, 400))
+        overlay = np.zeros((400, 400, 3), dtype=np.uint8)
+        result_image = sv.overlay_image(image=image, overlay=overlay, anchor=(200, 400))
         ```
+
+    ![overlay_image](https://media.roboflow.com/supervision-docs/overlay-image.png){ align=center width="800" }
     """
     scene_height, scene_width = image.shape[:2]
     image_height, image_width = overlay.shape[:2]
@@ -374,11 +367,11 @@ def __init__(
         Examples:
             ```python
             import supervision as sv
+            
+            frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>, stride=2)
 
-            with sv.ImageSink(target_dir_path='target/directory/path',
-                              overwrite=True) as sink:
-                for image in sv.get_video_frames_generator(
-                    source_path='source_video.mp4', stride=2):
+            with sv.ImageSink(target_dir_path=<TARGET_CROPS_DIRECTORY>) as sink:
+                for image in frames_generator:
                     sink.save_image(image=image)
             ```
         """  # noqa E501 // docs
diff --git a/supervision/utils/iterables.py b/supervision/utils/iterables.py
index ad570379c..52bfbeb6c 100644
--- a/supervision/utils/iterables.py
+++ b/supervision/utils/iterables.py
@@ -16,7 +16,7 @@ def create_batches(
         batch_size (int): The expected size of a batch.
 
     Returns:
-        Generator[List[V], None, None]: A generator that yields chunks
+        (Generator[List[V], None, None]): A generator that yields chunks
             of `sequence` of size `batch_size`, up to the length of
             the input `sequence`.
 
@@ -54,7 +54,7 @@ def fill(sequence: List[V], desired_size: int, content: V) -> List[V]:
             `sequence` as padding.
 
     Returns:
-        List[V]: A padded version of the input `sequence` (if needed).
+        (List[V]): A padded version of the input `sequence` (if needed).
 
     Examples:
         ```python

From 6248fa7d170ce765e5d59d2bce839836171841f3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Apr 2024 20:08:10 +0000
Subject: [PATCH 13/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py    | 2 +-
 supervision/utils/image.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 6cdb23ca9..f8e7a8324 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -34,7 +34,6 @@
     ClassificationDataset,
     DetectionDataset,
 )
-from supervision.utils.conversion import pillow_to_cv2, cv2_to_pillow
 from supervision.detection.annotate import BoxAnnotator
 from supervision.detection.core import Detections
 from supervision.detection.line_zone import LineZone, LineZoneAnnotator
@@ -72,6 +71,7 @@
 from supervision.geometry.utils import get_polygon_center
 from supervision.metrics.detection import ConfusionMatrix, MeanAveragePrecision
 from supervision.tracker.byte_tracker.core import ByteTrack
+from supervision.utils.conversion import cv2_to_pillow, pillow_to_cv2
 from supervision.utils.file import list_files_with_extensions
 from supervision.utils.image import (
     ImageSink,
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index dd3316ec4..44ccbeac1 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -368,7 +368,7 @@ def __init__(
         Examples:
             ```python
             import supervision as sv
-            
+
             frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>, stride=2)
 
             with sv.ImageSink(target_dir_path=<TARGET_CROPS_DIRECTORY>) as sink:

From d929d6f58b989a0fd38a359ca4b12b40921cccf2 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 22:44:10 +0200
Subject: [PATCH 14/18] `How to Save Detections` guide update

---
 docs/how_to/save_detections.md | 222 ++++++++++++++++++++++++++++-----
 1 file changed, 191 insertions(+), 31 deletions(-)

diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md
index 81de930ec..e0233f9b5 100644
--- a/docs/how_to/save_detections.md
+++ b/docs/how_to/save_detections.md
@@ -9,48 +9,64 @@ TODO
 
 ## Run Detection
 
+First, you'll need to obtain predictions from your object detection or segmentation 
+model. You can learn more on this topic in our 
+[How to Detect and Annotate](/latest/how_to/detect_and_annotate.md) guide.
+
 === "Inference"
 
     ```python
-    import cv2
+    import supervision as sv
     from inference import get_model
 
     model = get_model(model_id="yolov8n-640")
-    image = cv2.imread(<SOURCE_IMAGE_APTH>)
-    results = model.infer(image)[0]
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
+
+    for frame in frames_generator:
+
+        results = model.infer(image)[0]
+        detections = sv.Detections.from_inference(results)
     ```
 
 === "Ultralytics"
 
     ```python
-    import cv2
+    import supervision as sv
     from ultralytics import YOLO
 
     model = YOLO("yolov8n.pt")
-    image = cv2.imread(<SOURCE_IMAGE_APTH>)
-    results = model(image)[0]
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
+
+    for frame in frames_generator:
+
+        results = model(frame)[0]
+        detections = sv.Detections.from_ultralytics(results)
     ```
 
 === "Transformers"
 
     ```python
     import torch
-    from PIL import Image
+    import supervision as sv
     from transformers import DetrImageProcessor, DetrForObjectDetection
 
     processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
     model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
+
+    for frame in frames_generator:
 
-    image = Image.open(<SOURCE_IMAGE_APTH>)
-    inputs = processor(images=image, return_tensors="pt")
+        frame = sv.cv2_to_pillow(frame)
+        inputs = processor(images=frame, return_tensors="pt")
 
-    with torch.no_grad():
-        outputs = model(**inputs)
+        with torch.no_grad():
+            outputs = model(**inputs)
 
-    width, height = image.size
-    target_size = torch.tensor([[height, width]])
-    results = processor.post_process_object_detection(
-        outputs=outputs, target_sizes=target_size)[0]
+        width, height = frame.size
+        target_size = torch.tensor([[height, width]])
+        results = processor.post_process_object_detection(
+            outputs=outputs, target_sizes=target_size)[0]
+        detections = sv.Detections.from_transformers(results)
     ```
 
 ## Save Detections as CSV
@@ -59,14 +75,15 @@ TODO
 
 === "Inference"
 
-    ```python
+    ```{ .py hl_lines="7 12" }
     import supervision as sv
     from inference import get_model
 
     model = get_model(model_id="yolov8n-640")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
 
     with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
-        for frame in sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>):
+        for frame in frames_generator:
 
             results = model.infer(image)[0]
             detections = sv.Detections.from_inference(results)
@@ -75,14 +92,15 @@ TODO
 
 === "Ultralytics"
 
-    ```python
+    ```{ .py hl_lines="7 12" }
     import supervision as sv
     from ultralytics import YOLO
 
     model = YOLO("yolov8n.pt")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
 
     with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
-        for frame in sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>):
+        for frame in frames_generator:
 
             results = model(frame)[0]
             detections = sv.Detections.from_ultralytics(results)
@@ -91,34 +109,176 @@ TODO
 
 === "Transformers"
 
-    ```python
+    ```{ .py hl_lines="9 23" }
     import torch
-    from PIL import Image
+    import supervision as sv
     from transformers import DetrImageProcessor, DetrForObjectDetection
 
     processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
     model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
 
-    image = Image.open(<SOURCE_IMAGE_APTH>)
-    inputs = processor(images=image, return_tensors="pt")
+    with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
+        for frame in frames_generator:
+
+            frame = sv.cv2_to_pillow(frame)
+            inputs = processor(images=frame, return_tensors="pt")
 
-    with torch.no_grad():
-        outputs = model(**inputs)
+            with torch.no_grad():
+                outputs = model(**inputs)
 
-    width, height = image.size
-    target_size = torch.tensor([[height, width]])
-    results = processor.post_process_object_detection(
-        outputs=outputs, target_sizes=target_size)[0]
+            width, height = frame.size
+            target_size = torch.tensor([[height, width]])
+            results = processor.post_process_object_detection(
+                outputs=outputs, target_sizes=target_size)[0]
+            detections = sv.Detections.from_transformers(results)
+            sink.append(detections, {})
     ```
 
+TODO
+
+| x_min   | y_min    | x_max   | y_max    | class_id | confidence | tracker_id | class_name |
+|---------|----------|---------|----------|----------|------------|------------|------------|
+| 2941.14 | 1269.31  | 3220.77 | 1500.67  | 2        | 0.8517     |            | car        |
+| 944.889 | 899.641  | 1235.42 | 1308.80  | 7        | 0.6752     |            | truck      |
+| 1439.78 | 1077.79  | 1621.27 | 1231.40  | 2        | 0.6450     |            | car        |
+
 ## Custom Fields
 
 TODO
 
-## Save Detections as JSON
+=== "Inference"
+
+    ```{ .py hl_lines="8 12" }
+    import supervision as sv
+    from inference import get_model
+
+    model = get_model(model_id="yolov8n-640")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
+
+    with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
+        for frame_index, frame in enumerate(frames_generator):
+
+            results = model.infer(image)[0]
+            detections = sv.Detections.from_inference(results)
+            sink.append(detections, {"frame_index": frame_index})
+    ```
+
+=== "Ultralytics"
+
+    ```{ .py hl_lines="8 12" }
+    import supervision as sv
+    from ultralytics import YOLO
+
+    model = YOLO("yolov8n.pt")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
+
+    with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
+        for frame_index, frame in enumerate(frames_generator):
+
+            results = model(frame)[0]
+            detections = sv.Detections.from_ultralytics(results)
+            sink.append(detections, {"frame_index": frame_index})
+    ```
+
+=== "Transformers"
+
+    ```{ .py hl_lines="10 23" }
+    import torch
+    import supervision as sv
+    from transformers import DetrImageProcessor, DetrForObjectDetection
+
+    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
+    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
+
+    with sv.CSVSink(<TARGET_CSV_PATH>) as sink:
+        for frame_index, frame in enumerate(frames_generator):
+
+            frame = sv.cv2_to_pillow(frame)
+            inputs = processor(images=frame, return_tensors="pt")
+
+            with torch.no_grad():
+                outputs = model(**inputs)
+
+            width, height = frame.size
+            target_size = torch.tensor([[height, width]])
+            results = processor.post_process_object_detection(
+                outputs=outputs, target_sizes=target_size)[0]
+            detections = sv.Detections.from_transformers(results)
+            sink.append(detections, {"frame_index": frame_index})
+    ```
 
 TODO
 
-## Process Video and Save Detections
+| x_min   | y_min    | x_max   | y_max    | class_id | confidence | tracker_id | class_name | frame_index |
+|---------|----------|---------|----------|----------|------------|------------|------------|-------------|
+| 2941.14 | 1269.31  | 3220.77 | 1500.67  | 2        | 0.8517     |            | car        | 0           |
+| 944.889 | 899.641  | 1235.42 | 1308.80  | 7        | 0.6752     |            | truck      | 0           |
+| 1439.78 | 1077.79  | 1621.27 | 1231.40  | 2        | 0.6450     |            | car        | 0           |
+
+## Save Detections as JSON
 
 TODO
+
+=== "Inference"
+
+    ```{ .py hl_lines="7" }
+    import supervision as sv
+    from inference import get_model
+
+    model = get_model(model_id="yolov8n-640")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
+
+    with sv.JSONSink(<TARGET_CSV_PATH>) as sink:
+        for frame_index, frame in enumerate(frames_generator):
+
+            results = model.infer(image)[0]
+            detections = sv.Detections.from_inference(results)
+            sink.append(detections, {"frame_index": frame_index})
+    ```
+
+=== "Ultralytics"
+
+    ```{ .py hl_lines="7" }
+    import supervision as sv
+    from ultralytics import YOLO
+
+    model = YOLO("yolov8n.pt")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
+
+    with sv.JSONSink(<TARGET_CSV_PATH>) as sink:
+        for frame_index, frame in enumerate(frames_generator):
+
+            results = model(frame)[0]
+            detections = sv.Detections.from_ultralytics(results)
+            sink.append(detections, {"frame_index": frame_index})
+    ```
+
+=== "Transformers"
+
+    ```{ .py hl_lines="9" }
+    import torch
+    import supervision as sv
+    from transformers import DetrImageProcessor, DetrForObjectDetection
+
+    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
+    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+    frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
+
+    with sv.JSONSink(<TARGET_CSV_PATH>) as sink:
+        for frame_index, frame in enumerate(frames_generator):
+
+            frame = sv.cv2_to_pillow(frame)
+            inputs = processor(images=frame, return_tensors="pt")
+
+            with torch.no_grad():
+                outputs = model(**inputs)
+
+            width, height = frame.size
+            target_size = torch.tensor([[height, width]])
+            results = processor.post_process_object_detection(
+                outputs=outputs, target_sizes=target_size)[0]
+            detections = sv.Detections.from_transformers(results)
+            sink.append(detections, {"frame_index": frame_index})
+    ```
\ No newline at end of file

From 99fbd4b601657e7a25ed68a2ae805413788cd1ad Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Apr 2024 20:45:06 +0000
Subject: [PATCH 15/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/how_to/save_detections.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md
index e0233f9b5..5d0dc61a9 100644
--- a/docs/how_to/save_detections.md
+++ b/docs/how_to/save_detections.md
@@ -9,8 +9,8 @@ TODO
 
 ## Run Detection
 
-First, you'll need to obtain predictions from your object detection or segmentation 
-model. You can learn more on this topic in our 
+First, you'll need to obtain predictions from your object detection or segmentation
+model. You can learn more on this topic in our
 [How to Detect and Annotate](/latest/how_to/detect_and_annotate.md) guide.
 
 === "Inference"
@@ -281,4 +281,4 @@ TODO
                 outputs=outputs, target_sizes=target_size)[0]
             detections = sv.Detections.from_transformers(results)
             sink.append(detections, {"frame_index": frame_index})
-    ```
\ No newline at end of file
+    ```

From e0d5f00ae3439bdece59cb1bd8d07ada53d91d78 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 23:15:49 +0200
Subject: [PATCH 16/18] initial version of `How to Save Detections` guide done

---
 docs/how_to/save_detections.md | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md
index e0233f9b5..f432c9398 100644
--- a/docs/how_to/save_detections.md
+++ b/docs/how_to/save_detections.md
@@ -5,7 +5,13 @@ status: new
 
 # Save Detections
 
-TODO
+Supervision enables an easy way to save detections in .CSV and .JSON files for offline 
+processing. This guide demonstrates how to perform video inference using the 
+[Inference](https://github.com/roboflow/inference),
+[Ultralytics](https://github.com/ultralytics/ultralytics) or
+[Transformers](https://github.com/huggingface/transformers) packages and save their results with 
+[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) and 
+[`sv.JSONSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.JSONSink).
 
 ## Run Detection
 
@@ -71,7 +77,11 @@ model. You can learn more on this topic in our
 
 ## Save Detections as CSV
 
-TODO
+To save detections to a `.CSV` file, open our 
+[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) 
+and then pass the 
+[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections) 
+object resulting from the inference to it. Its fields are parsed and saved on disk.
 
 === "Inference"
 
@@ -135,8 +145,6 @@ TODO
             sink.append(detections, {})
     ```
 
-TODO
-
 | x_min   | y_min    | x_max   | y_max    | class_id | confidence | tracker_id | class_name |
 |---------|----------|---------|----------|----------|------------|------------|------------|
 | 2941.14 | 1269.31  | 3220.77 | 1500.67  | 2        | 0.8517     |            | car        |
@@ -145,7 +153,12 @@ TODO
 
 ## Custom Fields
 
-TODO
+Besides regular fields in 
+[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections), 
+[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) 
+also allows you to add custom information to each row, which can be passed via the 
+`custom_data` dictionary. Let's utilize this feature to save information about the 
+frame index from which the detections originate.
 
 === "Inference"
 
@@ -209,8 +222,6 @@ TODO
             sink.append(detections, {"frame_index": frame_index})
     ```
 
-TODO
-
 | x_min   | y_min    | x_max   | y_max    | class_id | confidence | tracker_id | class_name | frame_index |
 |---------|----------|---------|----------|----------|------------|------------|------------|-------------|
 | 2941.14 | 1269.31  | 3220.77 | 1500.67  | 2        | 0.8517     |            | car        | 0           |
@@ -219,7 +230,11 @@ TODO
 
 ## Save Detections as JSON
 
-TODO
+If you prefer to save the result in a `.JSON` file instead of a `.CSV` file, all you 
+need to do is replace 
+[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) 
+with 
+[`sv.JSONSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.JSONSink).
 
 === "Inference"
 

From c27d9eaf22cb90efd8f88c91d555e9c32935ece2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Apr 2024 21:16:18 +0000
Subject: [PATCH 17/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/how_to/save_detections.md | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md
index 2c9250ea6..94de6c618 100644
--- a/docs/how_to/save_detections.md
+++ b/docs/how_to/save_detections.md
@@ -5,12 +5,12 @@ status: new
 
 # Save Detections
 
-Supervision enables an easy way to save detections in .CSV and .JSON files for offline 
-processing. This guide demonstrates how to perform video inference using the 
+Supervision enables an easy way to save detections in .CSV and .JSON files for offline
+processing. This guide demonstrates how to perform video inference using the
 [Inference](https://github.com/roboflow/inference),
 [Ultralytics](https://github.com/ultralytics/ultralytics) or
-[Transformers](https://github.com/huggingface/transformers) packages and save their results with 
-[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) and 
+[Transformers](https://github.com/huggingface/transformers) packages and save their results with
+[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) and
 [`sv.JSONSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.JSONSink).
 
 ## Run Detection
@@ -77,10 +77,10 @@ model. You can learn more on this topic in our
 
 ## Save Detections as CSV
 
-To save detections to a `.CSV` file, open our 
-[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) 
-and then pass the 
-[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections) 
+To save detections to a `.CSV` file, open our
+[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink)
+and then pass the
+[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections)
 object resulting from the inference to it. Its fields are parsed and saved on disk.
 
 === "Inference"
@@ -153,11 +153,11 @@ object resulting from the inference to it. Its fields are parsed and saved on di
 
 ## Custom Fields
 
-Besides regular fields in 
-[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections), 
-[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) 
-also allows you to add custom information to each row, which can be passed via the 
-`custom_data` dictionary. Let's utilize this feature to save information about the 
+Besides regular fields in
+[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections),
+[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink)
+also allows you to add custom information to each row, which can be passed via the
+`custom_data` dictionary. Let's utilize this feature to save information about the
 frame index from which the detections originate.
 
 === "Inference"
@@ -230,10 +230,10 @@ frame index from which the detections originate.
 
 ## Save Detections as JSON
 
-If you prefer to save the result in a `.JSON` file instead of a `.CSV` file, all you 
-need to do is replace 
-[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) 
-with 
+If you prefer to save the result in a `.JSON` file instead of a `.CSV` file, all you
+need to do is replace
+[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink)
+with
 [`sv.JSONSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.JSONSink).
 
 === "Inference"

From eb1c7716ef0e9835f873133dd41e3ae40c9442df Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 9 Apr 2024 23:18:02 +0200
Subject: [PATCH 18/18] make ruff happy

---
 supervision/utils/image.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 44ccbeac1..9f2e17839 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -320,7 +320,7 @@ def overlay_image(
         ```
 
     ![overlay_image](https://media.roboflow.com/supervision-docs/overlay-image.png){ align=center width="800" }
-    """
+    """  # noqa E501 // docs
     scene_height, scene_width = image.shape[:2]
     image_height, image_width = overlay.shape[:2]
     anchor_x, anchor_y = anchor