From 574c61b6b00bb80f90b6826b404b843b3c1f6bf4 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 14:28:40 +0200 Subject: [PATCH 01/18] initial commit --- docs/how_to/detect_and_annotate.md | 2 +- docs/how_to/save_detections.md | 124 +++++++++++++++++++++++++++++ mkdocs.yml | 4 +- supervision/draw/color.py | 18 ++++- supervision/utils/image.py | 51 ++++++------ test/utils/test_image.py | 4 +- 6 files changed, 170 insertions(+), 33 deletions(-) create mode 100644 docs/how_to/save_detections.md diff --git a/docs/how_to/detect_and_annotate.md b/docs/how_to/detect_and_annotate.md index 221d9ca1b..adea95cf7 100644 --- a/docs/how_to/detect_and_annotate.md +++ b/docs/how_to/detect_and_annotate.md @@ -15,7 +15,7 @@ source image. ![basic-annotation](https://media.roboflow.com/supervision_detect_and_annotate_example_1.png) -## Run Inference +## Run Detection First, you'll need to obtain predictions from your object detection or segmentation model. diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md new file mode 100644 index 000000000..622c504f5 --- /dev/null +++ b/docs/how_to/save_detections.md @@ -0,0 +1,124 @@ +--- +comments: true +status: new +--- + +# Save Detections + +TODO + +## Run Detection + +=== "Inference" + + ```python + import cv2 + from inference import get_model + + model = get_model(model_id="yolov8n-640") + image = cv2.imread() + results = model.infer(image)[0] + ``` + +=== "Ultralytics" + + ```python + import cv2 + from ultralytics import YOLO + + model = YOLO("yolov8n.pt") + image = cv2.imread() + results = model(image)[0] + ``` + +=== "Transformers" + + ```python + import torch + from PIL import Image + from transformers import DetrImageProcessor, DetrForObjectDetection + + processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") + model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") + + image = Image.open() + inputs = processor(images=image, return_tensors="pt") + + with torch.no_grad(): + outputs = model(**inputs) + + width, height = image.size + target_size = torch.tensor([[height, width]]) + results = processor.post_process_object_detection( + outputs=outputs, target_sizes=target_size)[0] + ``` + +## Save Detections as CSV + +TODO + +=== "Inference" + + ```python + import supervision as sv + from inference import get_model + + model = get_model(model_id="yolov8n-640") + + with sv.CSVSink() as sink: + for frame in sv.get_video_frames_generator(): + + results = model.infer(image)[0] + detections = sv.Detections.from_inference(results) + sink.append(detections, {}) + ``` + +=== "Ultralytics" + + ```python + import supervision as sv + from ultralytics import YOLO + + model = YOLO("yolov8n.pt") + + with sv.CSVSink() as sink: + for frame in sv.get_video_frames_generator(): + + results = model(frame)[0] + detections = sv.Detections.from_ultralytics(results) + sink.append(detections, {}) + ``` + +=== "Transformers" + + ```python + import torch + from PIL import Image + from transformers import DetrImageProcessor, DetrForObjectDetection + + processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") + model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") + + image = Image.open() + inputs = processor(images=image, return_tensors="pt") + + with torch.no_grad(): + outputs = model(**inputs) + + width, height = image.size + target_size = torch.tensor([[height, width]]) + results = processor.post_process_object_detection( + outputs=outputs, target_sizes=target_size)[0] + ``` + +## Custom Fields + +TODO + +## Save Detections as JSON + +TODO + +## Process Video and Save Detections + +TODO \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 8bae5d765..dd6feb8db 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -38,9 +38,11 @@ nav: - Home: index.md - How to: - Detect and Annotate: how_to/detect_and_annotate.md + - Save Detections: how_to/save_detections.md + - Filter Detections: how_to/filter_detections.md - Detect Small Objects: how_to/detect_small_objects.md - Track Objects: how_to/track_objects.md - - Filter Detections: how_to/filter_detections.md + - API: - Annotators: annotators.md - Classifications: diff --git a/supervision/draw/color.py b/supervision/draw/color.py index debb46f3b..b195cffe7 100644 --- a/supervision/draw/color.py +++ b/supervision/draw/color.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import List, Tuple +from typing import List, Tuple, Union import matplotlib.pyplot as plt @@ -448,3 +448,19 @@ def by_idx(self, idx: int) -> Color: raise ValueError("idx argument should not be negative") idx = idx % len(self.colors) return self.colors[idx] + + +def unify_to_bgr(color: Union[Tuple[int, int, int], Color]) -> Tuple[int, int, int]: + """ + Converts a color input in multiple formats to a standardized BGR format. + + Args: + color (Union[Tuple[int, int, int], Color]): The color input to be converted, + which can be either a tuple of RGB values or an instance of a Color class. + + Returns: + Tuple[int, int, int]: The color in BGR format as a tuple of three integers. + """ + if issubclass(type(color), Color): + return color.as_bgr() + return color diff --git a/supervision/utils/image.py b/supervision/utils/image.py index 4e0ba5d76..64fb6de10 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -9,7 +9,7 @@ import numpy as np from supervision.annotators.base import ImageType -from supervision.draw.color import Color +from supervision.draw.color import Color, unify_to_bgr from supervision.draw.utils import calculate_optimal_text_scale, draw_text from supervision.geometry.core import Point from supervision.utils.conversion import ( @@ -25,17 +25,18 @@ @convert_for_image_processing -def crop_image(image: np.ndarray, xyxy: np.ndarray) -> np.ndarray: +def crop_image(image: ImageType, xyxy: np.ndarray) -> np.ndarray: """ Crops the given image based on the given bounding box. Args: - image (np.ndarray): The image to be cropped, represented as a numpy array. + image (ImageType): The image to be cropped. `ImageType` is a flexible type, + accepting either `numpy.ndarray` or `PIL.Image.Image`. xyxy (np.ndarray): A numpy array containing the bounding box coordinates in the format (x1, y1, x2, y2). Returns: - (np.ndarray): The cropped image as a numpy array. + (ImageType): The cropped image. Examples: ```python @@ -74,10 +75,10 @@ def resize_image(image: np.ndarray, scale_factor: float) -> np.ndarray: raise ValueError("Scale factor must be positive.") old_width, old_height = image.shape[1], image.shape[0] - nwe_width = int(old_width * scale_factor) + new_width = int(old_width * scale_factor) new_height = int(old_height * scale_factor) - return cv2.resize(image, (nwe_width, new_height), interpolation=cv2.INTER_LINEAR) + return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR) def place_image( @@ -285,14 +286,14 @@ def create_tiles( raise ValueError("Could not create image tiles from empty list of images.") if return_type == "auto": return_type = _negotiate_tiles_format(images=images) - tile_padding_color = _color_to_bgr(color=tile_padding_color) - tile_margin_color = _color_to_bgr(color=tile_margin_color) + tile_padding_color = unify_to_bgr(color=tile_padding_color) + tile_margin_color = unify_to_bgr(color=tile_margin_color) images = images_to_cv2(images=images) if single_tile_size is None: single_tile_size = _aggregate_images_shape(images=images, mode=tile_scaling) resized_images = [ letterbox_image( - image=i, desired_size=single_tile_size, color=tile_padding_color + image=i, target_resolution_wh=single_tile_size, color=tile_padding_color ) for i in images ] @@ -311,8 +312,8 @@ def create_tiles( titles_anchors = fill( sequence=titles_anchors, desired_size=len(images), content=None ) - titles_color = _color_to_bgr(color=titles_color) - titles_background_color = _color_to_bgr(color=titles_background_color) + titles_color = unify_to_bgr(color=titles_color) + titles_background_color = unify_to_bgr(color=titles_background_color) tiles = _generate_tiles( images=resized_images, grid_size=grid_size, @@ -546,8 +547,8 @@ def _generate_color_image( @convert_for_image_processing def letterbox_image( - image: np.ndarray, - desired_size: Tuple[int, int], + image: ImageType, + target_resolution_wh: Tuple[int, int], color: Union[Tuple[int, int, int], Color] = (0, 0, 0), ) -> np.ndarray: """ @@ -555,27 +556,27 @@ def letterbox_image( ratio, adding padding of given color if needed to maintain aspect ratio. Args: - image (np.ndarray): Input image (type will be adjusted by decorator, + image (ImageType): Input image (type will be adjusted by decorator, you can provide PIL.Image) - desired_size (Tuple[int, int]): image size (width, height) representing + target_resolution_wh (Tuple[int, int]): image size (width, height) representing the target dimensions. color (Union[Tuple[int, int, int], Color]): the color to pad with - If tuple provided - should be BGR. Returns: - np.ndarray: letterboxed image (type may be adjusted to PIL.Image by + ImageType: letterboxed image (type may be adjusted to PIL.Image by decorator if function was called with PIL.Image) """ - color = _color_to_bgr(color=color) + color = unify_to_bgr(color=color) resized_img = resize_image_keeping_aspect_ratio( image=image, - desired_size=desired_size, + desired_size=target_resolution_wh, ) new_height, new_width = resized_img.shape[:2] - top_padding = (desired_size[1] - new_height) // 2 - bottom_padding = desired_size[1] - new_height - top_padding - left_padding = (desired_size[0] - new_width) // 2 - right_padding = desired_size[0] - new_width - left_padding + top_padding = (target_resolution_wh[1] - new_height) // 2 + bottom_padding = target_resolution_wh[1] - new_height - top_padding + left_padding = (target_resolution_wh[0] - new_width) // 2 + right_padding = target_resolution_wh[0] - new_width - left_padding return cv2.copyMakeBorder( resized_img, top_padding, @@ -625,9 +626,3 @@ def resize_image_keeping_aspect_ratio( new_height = desired_size[1] new_width = int(desired_size[1] * img_ratio) return cv2.resize(image, (new_width, new_height)) - - -def _color_to_bgr(color: Union[Tuple[int, int, int], Color]) -> Tuple[int, int, int]: - if issubclass(type(color), Color): - return color.as_bgr() - return color diff --git a/test/utils/test_image.py b/test/utils/test_image.py index e50f2e574..50b6b5c1b 100644 --- a/test/utils/test_image.py +++ b/test/utils/test_image.py @@ -62,7 +62,7 @@ def test_letterbox_image_for_opencv_image() -> None: # when result = letterbox_image( - image=image, desired_size=(1024, 1024), color=(255, 255, 255) + image=image, target_resolution_wh=(1024, 1024), color=(255, 255, 255) ) # then @@ -88,7 +88,7 @@ def test_letterbox_image_for_pillow_image() -> None: # when result = letterbox_image( - image=image, desired_size=(1024, 1024), color=(255, 255, 255) + image=image, target_resolution_wh=(1024, 1024), color=(255, 255, 255) ) # then From 5b1ee8f870c1c9e091fb84167fbf060ebc9c0cac Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 12:31:30 +0000 Subject: [PATCH 02/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/how_to/save_detections.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md index 622c504f5..81de930ec 100644 --- a/docs/how_to/save_detections.md +++ b/docs/how_to/save_detections.md @@ -62,12 +62,12 @@ TODO ```python import supervision as sv from inference import get_model - + model = get_model(model_id="yolov8n-640") - + with sv.CSVSink() as sink: for frame in sv.get_video_frames_generator(): - + results = model.infer(image)[0] detections = sv.Detections.from_inference(results) sink.append(detections, {}) @@ -78,12 +78,12 @@ TODO ```python import supervision as sv from ultralytics import YOLO - + model = YOLO("yolov8n.pt") - + with sv.CSVSink() as sink: for frame in sv.get_video_frames_generator(): - + results = model(frame)[0] detections = sv.Detections.from_ultralytics(results) sink.append(detections, {}) @@ -121,4 +121,4 @@ TODO ## Process Video and Save Detections -TODO \ No newline at end of file +TODO From 2115c9a1526dfc8d2d5de5f74651ac48f3f2c585 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 14:50:32 +0200 Subject: [PATCH 03/18] crop_image improvements --- supervision/utils/image.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/supervision/utils/image.py b/supervision/utils/image.py index 64fb6de10..a6466eaa7 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -25,15 +25,19 @@ @convert_for_image_processing -def crop_image(image: ImageType, xyxy: np.ndarray) -> np.ndarray: +def crop_image( + image: ImageType, + xyxy: Union[np.ndarray, List[int], Tuple[int, int, int, int]] +) -> ImageType: """ Crops the given image based on the given bounding box. Args: image (ImageType): The image to be cropped. `ImageType` is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`. - xyxy (np.ndarray): A numpy array containing the bounding box coordinates - in the format (x1, y1, x2, y2). + xyxy (Union[np.ndarray, List[int], Tuple[int, int, int, int]]): A bounding box + coordinates in the format (x_min, y_min, x_max, y_max), accepted as either + a numpy array, a list, or a tuple. Returns: (ImageType): The cropped image. @@ -50,9 +54,11 @@ def crop_image(image: ImageType, xyxy: np.ndarray) -> np.ndarray: ``` """ + if isinstance(xyxy, (list, tuple)): + xyxy = np.array(xyxy) xyxy = np.round(xyxy).astype(int) - x1, y1, x2, y2 = xyxy - return image[y1:y2, x1:x2] + x_min, y_min, x_max, y_max = xyxy.flatten() + return image[y_min:y_max, x_min:x_max] @convert_for_image_processing From f2e6d882fbb64f326e179cdd24f5837715982ac8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 12:50:48 +0000 Subject: [PATCH 04/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/image.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/supervision/utils/image.py b/supervision/utils/image.py index a6466eaa7..954533a2c 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -26,8 +26,7 @@ @convert_for_image_processing def crop_image( - image: ImageType, - xyxy: Union[np.ndarray, List[int], Tuple[int, int, int, int]] + image: ImageType, xyxy: Union[np.ndarray, List[int], Tuple[int, int, int, int]] ) -> ImageType: """ Crops the given image based on the given bounding box. From 94e06ab0d6b71d87a3839207efe6b23d6bf74186 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 16:46:05 +0200 Subject: [PATCH 05/18] scale_image, resize_image changes --- docs/utils/image.md | 10 +- supervision/__init__.py | 4 +- supervision/annotators/core.py | 4 +- supervision/utils/image.py | 167 ++++++++++++++++++++++----------- 4 files changed, 125 insertions(+), 60 deletions(-) diff --git a/docs/utils/image.md b/docs/utils/image.md index 087aebd73..b61cddfce 100644 --- a/docs/utils/image.md +++ b/docs/utils/image.md @@ -18,10 +18,10 @@ status: new :::supervision.utils.image.crop_image
-

letterbox_image

+

scale_image

-:::supervision.utils.image.letterbox_image +:::supervision.utils.image.scale_image

resize_image

@@ -29,6 +29,12 @@ status: new :::supervision.utils.image.resize_image +
+

letterbox_image

+
+ +:::supervision.utils.image.letterbox_image +

place_image

diff --git a/supervision/__init__.py b/supervision/__init__.py index 8c3acc265..d6aeeab59 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -78,8 +78,8 @@ crop_image, letterbox_image, place_image, - resize_image, - resize_image_keeping_aspect_ratio, + scale_image, + resize_image ) from supervision.utils.notebook import plot_image, plot_images_grid from supervision.utils.video import ( diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py index 9f6cdb367..38ddc4421 100644 --- a/supervision/annotators/core.py +++ b/supervision/annotators/core.py @@ -13,7 +13,7 @@ from supervision.draw.utils import draw_polygon from supervision.geometry.core import Position from supervision.utils.conversion import convert_for_annotation_method -from supervision.utils.image import crop_image, place_image, resize_image +from supervision.utils.image import crop_image, place_image, scale_image class BoundingBoxAnnotator(BaseAnnotator): @@ -1965,7 +1965,7 @@ def annotate( crop_image(image=scene, xyxy=xyxy) for xyxy in detections.xyxy.astype(int) ] resized_crops = [ - resize_image(image=crop, scale_factor=self.scale_factor) for crop in crops + scale_image(image=crop, scale_factor=self.scale_factor) for crop in crops ] anchors = detections.get_anchors_coordinates(anchor=self.position).astype(int) diff --git a/supervision/utils/image.py b/supervision/utils/image.py index 954533a2c..99fcb3f24 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -7,6 +7,7 @@ import cv2 import numpy as np +import numpy.typing as npt from supervision.annotators.base import ImageType from supervision.draw.color import Color, unify_to_bgr @@ -26,7 +27,8 @@ @convert_for_image_processing def crop_image( - image: ImageType, xyxy: Union[np.ndarray, List[int], Tuple[int, int, int, int]] + image: ImageType, + xyxy: Union[npt.NDArray[int], List[int], Tuple[int, int, int, int]] ) -> ImageType: """ Crops the given image based on the given bounding box. @@ -35,23 +37,49 @@ def crop_image( image (ImageType): The image to be cropped. `ImageType` is a flexible type, accepting either `numpy.ndarray` or `PIL.Image.Image`. xyxy (Union[np.ndarray, List[int], Tuple[int, int, int, int]]): A bounding box - coordinates in the format (x_min, y_min, x_max, y_max), accepted as either - a numpy array, a list, or a tuple. + coordinates in the format `(x_min, y_min, x_max, y_max)`, accepted as either + a `numpy.ndarray`, a `list`, or a `tuple`. Returns: - (ImageType): The cropped image. + (ImageType): The cropped image. The type is determined by the input type and + may be either a `numpy.ndarray` or `PIL.Image.Image`. Examples: + + === "OpenCV" + ```python + import cv2 import supervision as sv - detection = sv.Detections(...) - with sv.ImageSink(target_dir_path='target/directory/path') as sink: - for xyxy in detection.xyxy: - cropped_image = sv.crop_image(image=image, xyxy=xyxy) - sink.save_image(image=cropped_image) + image = cv2.imread() + image.shape + # (1080, 1920, 3) + + xyxy = [200, 400, 600, 800] + cropped_image = sv.crop_image(image=image, xyxy=xyxy) + cropped_image.shape + # (400, 400, 3) ``` - """ + + === "Pillow" + + ```python + from PIL import Image + import supervision as sv + + image = Image.open() + image.size + # (1920, 1080) + + xyxy = [200, 400, 600, 800] + cropped_image = sv.crop_image(image=image, xyxy=xyxy) + cropped_image.size + # (400, 400) + ``` + + ![crop_image](https://media.roboflow.com/supervision-docs/crop-image.png){ align=center width="800" } + """ # noqa E501 // docs if isinstance(xyxy, (list, tuple)): xyxy = np.array(xyxy) @@ -61,29 +89,62 @@ def crop_image( @convert_for_image_processing -def resize_image(image: np.ndarray, scale_factor: float) -> np.ndarray: +def scale_image(image: ImageType, scale_factor: float) -> ImageType: """ - Resizes an image by a given scale factor using cv2.INTER_LINEAR interpolation. + Scales the given image based on the given scale factor. Args: - image (np.ndarray): The input image to be resized. + image (ImageType): The image to be scaled. `ImageType` is a flexible type, + accepting either `numpy.ndarray` or `PIL.Image.Image`. scale_factor (float): The factor by which the image will be scaled. Scale - factor > 1.0 zooms in, < 1.0 zooms out. + factor > `1.0` zooms in, < `1.0` zooms out. Returns: - np.ndarray: The resized image. + (ImageType): The scaled image. The type is determined by the input type and + may be either a `numpy.ndarray` or `PIL.Image.Image`. Raises: ValueError: If the scale factor is non-positive. + + Examples: + + === "OpenCV" + + ```python + import cv2 + import supervision as sv + + image = cv2.imread() + image.shape + # (1080, 1920, 3) + + scaled_image = sv.scale_image(image=image, scale_factor=0.5) + scaled_image.shape + # (540, 960, 3) + ``` + + === "Pillow" + + ```python + from PIL import Image + import supervision as sv + + image = Image.open() + image.size + # (1920, 1080) + + scaled_image = sv.scale_image(image=image, scale_factor=0.5) + scaled_image.size + # (540, 960) + ``` """ if scale_factor <= 0: raise ValueError("Scale factor must be positive.") - old_width, old_height = image.shape[1], image.shape[0] - new_width = int(old_width * scale_factor) - new_height = int(old_height * scale_factor) - - return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR) + width_old, height_old = image.shape[1], image.shape[0] + width_new = int(width_old * scale_factor) + height_new = int(height_old * scale_factor) + return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR) def place_image( @@ -573,9 +634,10 @@ def letterbox_image( decorator if function was called with PIL.Image) """ color = unify_to_bgr(color=color) - resized_img = resize_image_keeping_aspect_ratio( + resized_img = resize_image( image=image, - desired_size=target_resolution_wh, + target_resolution_wh=target_resolution_wh, + keep_aspect_ratio=True ) new_height, new_width = resized_img.shape[:2] top_padding = (target_resolution_wh[1] - new_height) // 2 @@ -594,40 +656,37 @@ def letterbox_image( @convert_for_image_processing -def resize_image_keeping_aspect_ratio( - image: np.ndarray, - desired_size: Tuple[int, int], -) -> np.ndarray: +def resize_image( + image: ImageType, + target_resolution_wh: Tuple[int, int], + keep_aspect_ratio: bool = False +) -> ImageType: """ - Resize and pad image preserving its aspect ratio. - - For example: input image is (640, 480) and we want to resize into - (1024, 1024). If this rectangular image is just resized naively - to square-shape output - aspect ratio would be altered. If we do not - want this to happen - we may resize bigger dimension (640) to 1024. - Ratio of change is 1.6. This ratio is later on used to calculate scaling - in the other dimension. As a result we have (1024, 768) image. - - Parameters: - - image (np.ndarray): Input image (type will be adjusted by decorator, - you can provide PIL.Image) - - desired_size (Tuple[int, int]): image size (width, height) representing the - target dimensions. Parameter will be used to dictate maximum size of - output image. Output size may be smaller - to preserve aspect ratio of original - image. + Resizes the given image to a specified resolution. Can maintain the original aspect + ratio or resize directly to the desired dimensions. + + Args: + image (ImageType): The image to be resized. `ImageType` is a flexible type, + accepting either `numpy.ndarray` or `PIL.Image.Image`. + target_resolution_wh (Tuple[int, int]): The target resolution as + `(width, height)`. + keep_aspect_ratio (bool, optional): Flag to maintain the image's original + aspect ratio. Defaults to `False`. Returns: - np.ndarray: resized image (type may be adjusted to PIL.Image by decorator - if function was called with PIL.Image) + ImageType: The resized image. The type is determined by the input type and + may be either a `numpy.ndarray` or `PIL.Image.Image`. """ - if image.shape[:2] == desired_size[::-1]: - return image - img_ratio = image.shape[1] / image.shape[0] - desired_ratio = desired_size[0] / desired_size[1] - if img_ratio >= desired_ratio: - new_width = desired_size[0] - new_height = int(desired_size[0] / img_ratio) + if keep_aspect_ratio: + image_ratio = image.shape[1] / image.shape[0] + target_ratio = target_resolution_wh[0] / target_resolution_wh[1] + if image_ratio >= target_ratio: + width_new = target_resolution_wh[0] + height_new = int(target_resolution_wh[0] / image_ratio) + else: + height_new = target_resolution_wh[1] + width_new = int(target_resolution_wh[1] * image_ratio) else: - new_height = desired_size[1] - new_width = int(desired_size[1] * img_ratio) - return cv2.resize(image, (new_width, new_height)) + width_new, height_new = target_resolution_wh + + return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR) From ad946893624e8609282035af3443d361f06b5564 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 14:46:45 +0000 Subject: [PATCH 06/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/__init__.py | 2 +- supervision/utils/image.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index d6aeeab59..b14a37ab8 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -78,8 +78,8 @@ crop_image, letterbox_image, place_image, + resize_image, scale_image, - resize_image ) from supervision.utils.notebook import plot_image, plot_images_grid from supervision.utils.video import ( diff --git a/supervision/utils/image.py b/supervision/utils/image.py index 99fcb3f24..8fe25eea3 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -28,7 +28,7 @@ @convert_for_image_processing def crop_image( image: ImageType, - xyxy: Union[npt.NDArray[int], List[int], Tuple[int, int, int, int]] + xyxy: Union[npt.NDArray[int], List[int], Tuple[int, int, int, int]], ) -> ImageType: """ Crops the given image based on the given bounding box. @@ -635,9 +635,7 @@ def letterbox_image( """ color = unify_to_bgr(color=color) resized_img = resize_image( - image=image, - target_resolution_wh=target_resolution_wh, - keep_aspect_ratio=True + image=image, target_resolution_wh=target_resolution_wh, keep_aspect_ratio=True ) new_height, new_width = resized_img.shape[:2] top_padding = (target_resolution_wh[1] - new_height) // 2 @@ -659,7 +657,7 @@ def letterbox_image( def resize_image( image: ImageType, target_resolution_wh: Tuple[int, int], - keep_aspect_ratio: bool = False + keep_aspect_ratio: bool = False, ) -> ImageType: """ Resizes the given image to a specified resolution. Can maintain the original aspect From 233d25d4c539eb180e2091856f3ebdddb6695c95 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 17:48:13 +0200 Subject: [PATCH 07/18] fix `resize_image` tests after rename --- supervision/utils/image.py | 116 ++++++++++++++++++++++++------------- test/utils/test_image.py | 16 ++--- 2 files changed, 86 insertions(+), 46 deletions(-) diff --git a/supervision/utils/image.py b/supervision/utils/image.py index 8fe25eea3..4c5cc9c67 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -135,7 +135,7 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType: scaled_image = sv.scale_image(image=image, scale_factor=0.5) scaled_image.size - # (540, 960) + # (960, 540) ``` """ if scale_factor <= 0: @@ -147,6 +147,81 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType: return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR) +@convert_for_image_processing +def resize_image( + image: ImageType, + resolution_wh: Tuple[int, int], + keep_aspect_ratio: bool = False, +) -> ImageType: + """ + Resizes the given image to a specified resolution. Can maintain the original aspect + ratio or resize directly to the desired dimensions. + + Args: + image (ImageType): The image to be resized. `ImageType` is a flexible type, + accepting either `numpy.ndarray` or `PIL.Image.Image`. + resolution_wh (Tuple[int, int]): The target resolution as + `(width, height)`. + keep_aspect_ratio (bool, optional): Flag to maintain the image's original + aspect ratio. Defaults to `False`. + + Returns: + ImageType: The resized image. The type is determined by the input type and + may be either a `numpy.ndarray` or `PIL.Image.Image`. + + Examples: + + === "OpenCV" + + ```python + import cv2 + import supervision as sv + + image = cv2.imread() + image.shape + # (1080, 1920, 3) + + resized_image = sv.resize_image( + image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True + ) + resized_image.shape + # (562, 1000, 3) + ``` + + === "Pillow" + + ```python + from PIL import Image + import supervision as sv + + image = Image.open() + image.size + # (1920, 1080) + + resized_image = sv.resize_image( + image=image, resolution_wh=(1000, 1000), keep_aspect_ratio=True + ) + resized_image.size + # (1000, 562) + ``` + + ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width="800" } + """ # noqa E501 // docs + if keep_aspect_ratio: + image_ratio = image.shape[1] / image.shape[0] + target_ratio = resolution_wh[0] / resolution_wh[1] + if image_ratio >= target_ratio: + width_new = resolution_wh[0] + height_new = int(resolution_wh[0] / image_ratio) + else: + height_new = resolution_wh[1] + width_new = int(resolution_wh[1] * image_ratio) + else: + width_new, height_new = resolution_wh + + return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR) + + def place_image( scene: np.ndarray, image: np.ndarray, anchor: Tuple[int, int] ) -> np.ndarray: @@ -635,7 +710,7 @@ def letterbox_image( """ color = unify_to_bgr(color=color) resized_img = resize_image( - image=image, target_resolution_wh=target_resolution_wh, keep_aspect_ratio=True + image=image, resolution_wh=target_resolution_wh, keep_aspect_ratio=True ) new_height, new_width = resized_img.shape[:2] top_padding = (target_resolution_wh[1] - new_height) // 2 @@ -651,40 +726,3 @@ def letterbox_image( cv2.BORDER_CONSTANT, value=color, ) - - -@convert_for_image_processing -def resize_image( - image: ImageType, - target_resolution_wh: Tuple[int, int], - keep_aspect_ratio: bool = False, -) -> ImageType: - """ - Resizes the given image to a specified resolution. Can maintain the original aspect - ratio or resize directly to the desired dimensions. - - Args: - image (ImageType): The image to be resized. `ImageType` is a flexible type, - accepting either `numpy.ndarray` or `PIL.Image.Image`. - target_resolution_wh (Tuple[int, int]): The target resolution as - `(width, height)`. - keep_aspect_ratio (bool, optional): Flag to maintain the image's original - aspect ratio. Defaults to `False`. - - Returns: - ImageType: The resized image. The type is determined by the input type and - may be either a `numpy.ndarray` or `PIL.Image.Image`. - """ - if keep_aspect_ratio: - image_ratio = image.shape[1] / image.shape[0] - target_ratio = target_resolution_wh[0] / target_resolution_wh[1] - if image_ratio >= target_ratio: - width_new = target_resolution_wh[0] - height_new = int(target_resolution_wh[0] / image_ratio) - else: - height_new = target_resolution_wh[1] - width_new = int(target_resolution_wh[1] * image_ratio) - else: - width_new, height_new = target_resolution_wh - - return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR) diff --git a/test/utils/test_image.py b/test/utils/test_image.py index 50b6b5c1b..b3e28a277 100644 --- a/test/utils/test_image.py +++ b/test/utils/test_image.py @@ -8,19 +8,20 @@ from supervision.utils.image import ( create_tiles, letterbox_image, - resize_image_keeping_aspect_ratio, + resize_image, ) -def test_resize_image_keeping_aspect_ratio_for_opencv_image() -> None: +def test_resize_image_for_opencv_image() -> None: # given image = np.zeros((480, 640, 3), dtype=np.uint8) expected_result = np.zeros((768, 1024, 3), dtype=np.uint8) # when - result = resize_image_keeping_aspect_ratio( + result = resize_image( image=image, - desired_size=(1024, 1024), + resolution_wh=(1024, 1024), + keep_aspect_ratio=True, ) # then @@ -29,15 +30,16 @@ def test_resize_image_keeping_aspect_ratio_for_opencv_image() -> None: ), "Expected output shape to be (w, h): (1024, 768)" -def test_resize_image_keeping_aspect_ratio_for_pillow_image() -> None: +def test_resize_image_for_pillow_image() -> None: # given image = Image.new(mode="RGB", size=(640, 480), color=(0, 0, 0)) expected_result = Image.new(mode="RGB", size=(1024, 768), color=(0, 0, 0)) # when - result = resize_image_keeping_aspect_ratio( + result = resize_image( image=image, - desired_size=(1024, 1024), + resolution_wh=(1024, 1024), + keep_aspect_ratio=True, ) # then From a7dccfcc0d3f168e36b9f4be06686206be1d3260 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:48:30 +0000 Subject: [PATCH 08/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/image.py | 2 +- test/utils/test_image.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/supervision/utils/image.py b/supervision/utils/image.py index 4c5cc9c67..e04ea45ed 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -204,7 +204,7 @@ def resize_image( resized_image.size # (1000, 562) ``` - + ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width="800" } """ # noqa E501 // docs if keep_aspect_ratio: diff --git a/test/utils/test_image.py b/test/utils/test_image.py index b3e28a277..a114d9aae 100644 --- a/test/utils/test_image.py +++ b/test/utils/test_image.py @@ -5,11 +5,7 @@ from PIL import Image, ImageChops from supervision import Color, Point -from supervision.utils.image import ( - create_tiles, - letterbox_image, - resize_image, -) +from supervision.utils.image import create_tiles, letterbox_image, resize_image def test_resize_image_for_opencv_image() -> None: From 774ac2ed4f82706c0a889741b2619cc8575b5ee0 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 17:59:19 +0200 Subject: [PATCH 09/18] update `letterbox_image` docs --- supervision/utils/image.py | 86 +++++++++++++++++++------------------- test/utils/test_image.py | 4 +- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/supervision/utils/image.py b/supervision/utils/image.py index e04ea45ed..274cef2f6 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -222,6 +222,48 @@ def resize_image( return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR) +@convert_for_image_processing +def letterbox_image( + image: ImageType, + resolution_wh: Tuple[int, int], + color: Union[Tuple[int, int, int], Color] = (0, 0, 0), +) -> ImageType: + """ + Resizes and pads an image to a specified resolution with a given color, maintaining + the original aspect ratio. + + Args: + image (ImageType): The image to be resized. `ImageType` is a flexible type, + accepting either `numpy.ndarray` or `PIL.Image.Image`. + resolution_wh (Tuple[int, int]): The target resolution as + `(width, height)`. + color (Union[Tuple[int, int, int], Color]): The color to pad with. If tuple + provided it should be in BGR format. + + Returns: + ImageType: The resized image. The type is determined by the input type and + may be either a `numpy.ndarray` or `PIL.Image.Image`. + """ + color = unify_to_bgr(color=color) + resized_image = resize_image( + image=image, resolution_wh=resolution_wh, keep_aspect_ratio=True + ) + height_new, width_new = resized_image.shape[:2] + padding_top = (resolution_wh[1] - height_new) // 2 + padding_bottom = resolution_wh[1] - height_new - padding_top + padding_left = (resolution_wh[0] - width_new) // 2 + padding_right = resolution_wh[0] - width_new - padding_left + return cv2.copyMakeBorder( + resized_image, + padding_top, + padding_bottom, + padding_left, + padding_right, + cv2.BORDER_CONSTANT, + value=color, + ) + + def place_image( scene: np.ndarray, image: np.ndarray, anchor: Tuple[int, int] ) -> np.ndarray: @@ -434,7 +476,7 @@ def create_tiles( single_tile_size = _aggregate_images_shape(images=images, mode=tile_scaling) resized_images = [ letterbox_image( - image=i, target_resolution_wh=single_tile_size, color=tile_padding_color + image=i, resolution_wh=single_tile_size, color=tile_padding_color ) for i in images ] @@ -684,45 +726,3 @@ def _generate_color_image( shape: Tuple[int, int], color: Tuple[int, int, int] ) -> np.ndarray: return np.ones(shape[::-1] + (3,), dtype=np.uint8) * color - - -@convert_for_image_processing -def letterbox_image( - image: ImageType, - target_resolution_wh: Tuple[int, int], - color: Union[Tuple[int, int, int], Color] = (0, 0, 0), -) -> np.ndarray: - """ - Resize and pad image to fit the desired size, preserving its aspect - ratio, adding padding of given color if needed to maintain aspect ratio. - - Args: - image (ImageType): Input image (type will be adjusted by decorator, - you can provide PIL.Image) - target_resolution_wh (Tuple[int, int]): image size (width, height) representing - the target dimensions. - color (Union[Tuple[int, int, int], Color]): the color to pad with - If - tuple provided - should be BGR. - - Returns: - ImageType: letterboxed image (type may be adjusted to PIL.Image by - decorator if function was called with PIL.Image) - """ - color = unify_to_bgr(color=color) - resized_img = resize_image( - image=image, resolution_wh=target_resolution_wh, keep_aspect_ratio=True - ) - new_height, new_width = resized_img.shape[:2] - top_padding = (target_resolution_wh[1] - new_height) // 2 - bottom_padding = target_resolution_wh[1] - new_height - top_padding - left_padding = (target_resolution_wh[0] - new_width) // 2 - right_padding = target_resolution_wh[0] - new_width - left_padding - return cv2.copyMakeBorder( - resized_img, - top_padding, - bottom_padding, - left_padding, - right_padding, - cv2.BORDER_CONSTANT, - value=color, - ) diff --git a/test/utils/test_image.py b/test/utils/test_image.py index a114d9aae..487434aed 100644 --- a/test/utils/test_image.py +++ b/test/utils/test_image.py @@ -60,7 +60,7 @@ def test_letterbox_image_for_opencv_image() -> None: # when result = letterbox_image( - image=image, target_resolution_wh=(1024, 1024), color=(255, 255, 255) + image=image, resolution_wh=(1024, 1024), color=(255, 255, 255) ) # then @@ -86,7 +86,7 @@ def test_letterbox_image_for_pillow_image() -> None: # when result = letterbox_image( - image=image, target_resolution_wh=(1024, 1024), color=(255, 255, 255) + image=image, resolution_wh=(1024, 1024), color=(255, 255, 255) ) # then From 079e62fb0c544248cc6511d79d6be664928aa81e Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 21:23:23 +0200 Subject: [PATCH 10/18] rename `place_image` to overlay `image` --- docs/utils/image.md | 14 ++--- supervision/__init__.py | 2 +- supervision/annotators/core.py | 4 +- supervision/utils/image.py | 96 +++++++++++++++++++++++++--------- 4 files changed, 81 insertions(+), 35 deletions(-) diff --git a/docs/utils/image.md b/docs/utils/image.md index b61cddfce..16a18dc40 100644 --- a/docs/utils/image.md +++ b/docs/utils/image.md @@ -5,12 +5,6 @@ status: new # Image Utils -
-

ImageSink

-
- -:::supervision.utils.image.ImageSink -

crop_image

@@ -39,4 +33,10 @@ status: new

place_image

-:::supervision.utils.image.place_image +:::supervision.utils.image.overlay_image + +
+

ImageSink

+
+ +:::supervision.utils.image.ImageSink diff --git a/supervision/__init__.py b/supervision/__init__.py index b14a37ab8..9ff095573 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -77,7 +77,7 @@ create_tiles, crop_image, letterbox_image, - place_image, + overlay_image, resize_image, scale_image, ) diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py index 38ddc4421..854a9ece6 100644 --- a/supervision/annotators/core.py +++ b/supervision/annotators/core.py @@ -13,7 +13,7 @@ from supervision.draw.utils import draw_polygon from supervision.geometry.core import Position from supervision.utils.conversion import convert_for_annotation_method -from supervision.utils.image import crop_image, place_image, scale_image +from supervision.utils.image import crop_image, overlay_image, scale_image class BoundingBoxAnnotator(BaseAnnotator): @@ -1974,7 +1974,7 @@ def annotate( (x1, y1), (x2, y2) = self.calculate_crop_coordinates( anchor=anchor, crop_wh=crop_wh, position=self.position ) - scene = place_image(scene=scene, image=resized_crop, anchor=(x1, y1)) + scene = overlay_image(scene=scene, inserted_image=resized_crop, anchor=(x1, y1)) color = resolve_color( color=self.border_color, detections=detections, diff --git a/supervision/utils/image.py b/supervision/utils/image.py index 274cef2f6..b579ba75f 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -243,7 +243,41 @@ def letterbox_image( Returns: ImageType: The resized image. The type is determined by the input type and may be either a `numpy.ndarray` or `PIL.Image.Image`. - """ + + ## Examples: + + === "OpenCV" + + ```python + import cv2 + import supervision as sv + + image = cv2.imread() + image.shape + # (1080, 1920, 3) + + letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000)) + letterboxed_image.shape + # (1000, 1000, 3) + ``` + + === "Pillow" + + ```python + from PIL import Image + import supervision as sv + + image = Image.open() + image.size + # (1920, 1080) + + letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000)) + letterboxed_image.size + # (1000, 1000) + ``` + + ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" } + """ # noqa E501 // docs color = unify_to_bgr(color=color) resized_image = resize_image( image=image, resolution_wh=resolution_wh, keep_aspect_ratio=True @@ -264,48 +298,60 @@ def letterbox_image( ) -def place_image( - scene: np.ndarray, image: np.ndarray, anchor: Tuple[int, int] -) -> np.ndarray: +def overlay_image( + image: npt.NDArray[np.uint8], + overlay: npt.NDArray[np.uint8], + anchor: Tuple[int, int] +) -> npt.NDArray[np.uint8]: """ Places an image onto a scene at a given anchor point, handling cases where the image's position is partially or completely outside the scene's bounds. Args: - scene (np.ndarray): The background scene onto which the image is placed. - image (np.ndarray): The image to be placed onto the scene. - anchor (Tuple[int, int]): The (x, y) coordinates in the scene where the + image (np.ndarray): The background scene onto which the image is placed. + overlay (np.ndarray): The image to be placed onto the scene. + anchor (Tuple[int, int]): The `(x, y)` coordinates in the scene where the top-left corner of the image will be placed. Returns: - np.ndarray: The modified scene with the image placed at the anchor point, - or unchanged if the image placement is completely outside the scene. + np.ndarray: The result image with overlay. + + Examples: + ```python + import cv2 + import numpy as np + import supervision as sv + + image = cv2.imread() + overlay = np.zeros((200, 200, 3), dtype=np.uint8) + result_image = sv.letterbox_image( + image=image, overlay=overlay, anchor=(200, 400)) + ``` """ - scene_height, scene_width = scene.shape[:2] - image_height, image_width = image.shape[:2] + scene_height, scene_width = image.shape[:2] + image_height, image_width = overlay.shape[:2] anchor_x, anchor_y = anchor is_out_horizontally = anchor_x + image_width <= 0 or anchor_x >= scene_width is_out_vertically = anchor_y + image_height <= 0 or anchor_y >= scene_height if is_out_horizontally or is_out_vertically: - return scene + return image - start_y = max(anchor_y, 0) - start_x = max(anchor_x, 0) - end_y = min(scene_height, anchor_y + image_height) - end_x = min(scene_width, anchor_x + image_width) + x_min = max(anchor_x, 0) + y_min = max(anchor_y, 0) + x_max = min(scene_width, anchor_x + image_width) + y_max = min(scene_height, anchor_y + image_height) - crop_start_y = max(-anchor_y, 0) - crop_start_x = max(-anchor_x, 0) - crop_end_y = image_height - max((anchor_y + image_height) - scene_height, 0) - crop_end_x = image_width - max((anchor_x + image_width) - scene_width, 0) + crop_x_min = max(-anchor_x, 0) + crop_y_min = max(-anchor_y, 0) + crop_x_max = image_width - max((anchor_x + image_width) - scene_width, 0) + crop_y_max = image_height - max((anchor_y + image_height) - scene_height, 0) - scene[start_y:end_y, start_x:end_x] = image[ - crop_start_y:crop_end_y, crop_start_x:crop_end_x - ] + image[y_min:y_max, x_min:x_max] = \ + overlay[crop_y_min:crop_y_max, crop_x_min:crop_x_max] - return scene + return image class ImageSink: @@ -335,7 +381,7 @@ def __init__( source_path='source_video.mp4', stride=2): sink.save_image(image=image) ``` - """ + """ # noqa E501 // docs self.target_dir_path = target_dir_path self.overwrite = overwrite From f872fddaffe2d34c11f2fcc62b6d1b53a28aec85 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 19:24:02 +0000 Subject: [PATCH 11/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/annotators/core.py | 4 +++- supervision/utils/image.py | 9 +++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py index 854a9ece6..ac9018625 100644 --- a/supervision/annotators/core.py +++ b/supervision/annotators/core.py @@ -1974,7 +1974,9 @@ def annotate( (x1, y1), (x2, y2) = self.calculate_crop_coordinates( anchor=anchor, crop_wh=crop_wh, position=self.position ) - scene = overlay_image(scene=scene, inserted_image=resized_crop, anchor=(x1, y1)) + scene = overlay_image( + scene=scene, inserted_image=resized_crop, anchor=(x1, y1) + ) color = resolve_color( color=self.border_color, detections=detections, diff --git a/supervision/utils/image.py b/supervision/utils/image.py index b579ba75f..f6eef9ca8 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -275,7 +275,7 @@ def letterbox_image( letterboxed_image.size # (1000, 1000) ``` - + ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" } """ # noqa E501 // docs color = unify_to_bgr(color=color) @@ -301,7 +301,7 @@ def letterbox_image( def overlay_image( image: npt.NDArray[np.uint8], overlay: npt.NDArray[np.uint8], - anchor: Tuple[int, int] + anchor: Tuple[int, int], ) -> npt.NDArray[np.uint8]: """ Places an image onto a scene at a given anchor point, handling cases where @@ -348,8 +348,9 @@ def overlay_image( crop_x_max = image_width - max((anchor_x + image_width) - scene_width, 0) crop_y_max = image_height - max((anchor_y + image_height) - scene_height, 0) - image[y_min:y_max, x_min:x_max] = \ - overlay[crop_y_min:crop_y_max, crop_x_min:crop_x_max] + image[y_min:y_max, x_min:x_max] = overlay[ + crop_y_min:crop_y_max, crop_x_min:crop_x_max + ] return image From c30e377eb5393aed81b182d6a66217e73c6d439a Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 22:07:12 +0200 Subject: [PATCH 12/18] more docs improvements --- docs/utils/image.md | 2 +- supervision/__init__.py | 1 + supervision/utils/conversion.py | 4 ++-- supervision/utils/image.py | 31 ++++++++++++------------------- supervision/utils/iterables.py | 4 ++-- 5 files changed, 18 insertions(+), 24 deletions(-) diff --git a/docs/utils/image.md b/docs/utils/image.md index 16a18dc40..87fd1943e 100644 --- a/docs/utils/image.md +++ b/docs/utils/image.md @@ -30,7 +30,7 @@ status: new :::supervision.utils.image.letterbox_image
-

place_image

+

overlay_image

:::supervision.utils.image.overlay_image diff --git a/supervision/__init__.py b/supervision/__init__.py index 9ff095573..6cdb23ca9 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -34,6 +34,7 @@ ClassificationDataset, DetectionDataset, ) +from supervision.utils.conversion import pillow_to_cv2, cv2_to_pillow from supervision.detection.annotate import BoxAnnotator from supervision.detection.core import Detections from supervision.detection.line_zone import LineZone, LineZoneAnnotator diff --git a/supervision/utils/conversion.py b/supervision/utils/conversion.py index 608104bcd..8ddce9695 100644 --- a/supervision/utils/conversion.py +++ b/supervision/utils/conversion.py @@ -81,7 +81,7 @@ def pillow_to_cv2(image: Image.Image) -> np.ndarray: image (Image.Image): Pillow image (in RGB format). Returns: - np.ndarray: Input image converted to OpenCV format. + (np.ndarray): Input image converted to OpenCV format. """ scene = np.array(image) scene = cv2.cvtColor(scene, cv2.COLOR_RGB2BGR) @@ -97,7 +97,7 @@ def cv2_to_pillow(image: np.ndarray) -> Image.Image: image (np.ndarray): OpenCV image (in BGR format). Returns: - Image.Image: Input image converted to Pillow format. + (Image.Image): Input image converted to Pillow format. """ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) return Image.fromarray(image) diff --git a/supervision/utils/image.py b/supervision/utils/image.py index b579ba75f..488a9d360 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -44,8 +44,6 @@ def crop_image( (ImageType): The cropped image. The type is determined by the input type and may be either a `numpy.ndarray` or `PIL.Image.Image`. - Examples: - === "OpenCV" ```python @@ -106,8 +104,6 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType: Raises: ValueError: If the scale factor is non-positive. - Examples: - === "OpenCV" ```python @@ -166,11 +162,9 @@ def resize_image( aspect ratio. Defaults to `False`. Returns: - ImageType: The resized image. The type is determined by the input type and + (ImageType): The resized image. The type is determined by the input type and may be either a `numpy.ndarray` or `PIL.Image.Image`. - Examples: - === "OpenCV" ```python @@ -226,7 +220,7 @@ def resize_image( def letterbox_image( image: ImageType, resolution_wh: Tuple[int, int], - color: Union[Tuple[int, int, int], Color] = (0, 0, 0), + color: Union[Tuple[int, int, int], Color] = Color.BLACK, ) -> ImageType: """ Resizes and pads an image to a specified resolution with a given color, maintaining @@ -241,11 +235,9 @@ def letterbox_image( provided it should be in BGR format. Returns: - ImageType: The resized image. The type is determined by the input type and + (ImageType): The resized image. The type is determined by the input type and may be either a `numpy.ndarray` or `PIL.Image.Image`. - ## Examples: - === "OpenCV" ```python @@ -314,7 +306,7 @@ def overlay_image( top-left corner of the image will be placed. Returns: - np.ndarray: The result image with overlay. + (np.ndarray): The result image with overlay. Examples: ```python @@ -323,10 +315,11 @@ def overlay_image( import supervision as sv image = cv2.imread() - overlay = np.zeros((200, 200, 3), dtype=np.uint8) - result_image = sv.letterbox_image( - image=image, overlay=overlay, anchor=(200, 400)) + overlay = np.zeros((400, 400, 3), dtype=np.uint8) + result_image = sv.overlay_image(image=image, overlay=overlay, anchor=(200, 400)) ``` + + ![overlay_image](https://media.roboflow.com/supervision-docs/overlay-image.png){ align=center width="800" } """ scene_height, scene_width = image.shape[:2] image_height, image_width = overlay.shape[:2] @@ -374,11 +367,11 @@ def __init__( Examples: ```python import supervision as sv + + frames_generator = sv.get_video_frames_generator(, stride=2) - with sv.ImageSink(target_dir_path='target/directory/path', - overwrite=True) as sink: - for image in sv.get_video_frames_generator( - source_path='source_video.mp4', stride=2): + with sv.ImageSink(target_dir_path=) as sink: + for image in frames_generator: sink.save_image(image=image) ``` """ # noqa E501 // docs diff --git a/supervision/utils/iterables.py b/supervision/utils/iterables.py index ad570379c..52bfbeb6c 100644 --- a/supervision/utils/iterables.py +++ b/supervision/utils/iterables.py @@ -16,7 +16,7 @@ def create_batches( batch_size (int): The expected size of a batch. Returns: - Generator[List[V], None, None]: A generator that yields chunks + (Generator[List[V], None, None]): A generator that yields chunks of `sequence` of size `batch_size`, up to the length of the input `sequence`. @@ -54,7 +54,7 @@ def fill(sequence: List[V], desired_size: int, content: V) -> List[V]: `sequence` as padding. Returns: - List[V]: A padded version of the input `sequence` (if needed). + (List[V]): A padded version of the input `sequence` (if needed). Examples: ```python From 6248fa7d170ce765e5d59d2bce839836171841f3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 20:08:10 +0000 Subject: [PATCH 13/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/__init__.py | 2 +- supervision/utils/image.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index 6cdb23ca9..f8e7a8324 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -34,7 +34,6 @@ ClassificationDataset, DetectionDataset, ) -from supervision.utils.conversion import pillow_to_cv2, cv2_to_pillow from supervision.detection.annotate import BoxAnnotator from supervision.detection.core import Detections from supervision.detection.line_zone import LineZone, LineZoneAnnotator @@ -72,6 +71,7 @@ from supervision.geometry.utils import get_polygon_center from supervision.metrics.detection import ConfusionMatrix, MeanAveragePrecision from supervision.tracker.byte_tracker.core import ByteTrack +from supervision.utils.conversion import cv2_to_pillow, pillow_to_cv2 from supervision.utils.file import list_files_with_extensions from supervision.utils.image import ( ImageSink, diff --git a/supervision/utils/image.py b/supervision/utils/image.py index dd3316ec4..44ccbeac1 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -368,7 +368,7 @@ def __init__( Examples: ```python import supervision as sv - + frames_generator = sv.get_video_frames_generator(, stride=2) with sv.ImageSink(target_dir_path=) as sink: From d929d6f58b989a0fd38a359ca4b12b40921cccf2 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 22:44:10 +0200 Subject: [PATCH 14/18] `How to Save Detections` guide update --- docs/how_to/save_detections.md | 222 ++++++++++++++++++++++++++++----- 1 file changed, 191 insertions(+), 31 deletions(-) diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md index 81de930ec..e0233f9b5 100644 --- a/docs/how_to/save_detections.md +++ b/docs/how_to/save_detections.md @@ -9,48 +9,64 @@ TODO ## Run Detection +First, you'll need to obtain predictions from your object detection or segmentation +model. You can learn more on this topic in our +[How to Detect and Annotate](/latest/how_to/detect_and_annotate.md) guide. + === "Inference" ```python - import cv2 + import supervision as sv from inference import get_model model = get_model(model_id="yolov8n-640") - image = cv2.imread() - results = model.infer(image)[0] + frames_generator = sv.get_video_frames_generator() + + for frame in frames_generator: + + results = model.infer(image)[0] + detections = sv.Detections.from_inference(results) ``` === "Ultralytics" ```python - import cv2 + import supervision as sv from ultralytics import YOLO model = YOLO("yolov8n.pt") - image = cv2.imread() - results = model(image)[0] + frames_generator = sv.get_video_frames_generator() + + for frame in frames_generator: + + results = model(frame)[0] + detections = sv.Detections.from_ultralytics(results) ``` === "Transformers" ```python import torch - from PIL import Image + import supervision as sv from transformers import DetrImageProcessor, DetrForObjectDetection processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") + frames_generator = sv.get_video_frames_generator() + + for frame in frames_generator: - image = Image.open() - inputs = processor(images=image, return_tensors="pt") + frame = sv.cv2_to_pillow(frame) + inputs = processor(images=frame, return_tensors="pt") - with torch.no_grad(): - outputs = model(**inputs) + with torch.no_grad(): + outputs = model(**inputs) - width, height = image.size - target_size = torch.tensor([[height, width]]) - results = processor.post_process_object_detection( - outputs=outputs, target_sizes=target_size)[0] + width, height = frame.size + target_size = torch.tensor([[height, width]]) + results = processor.post_process_object_detection( + outputs=outputs, target_sizes=target_size)[0] + detections = sv.Detections.from_transformers(results) ``` ## Save Detections as CSV @@ -59,14 +75,15 @@ TODO === "Inference" - ```python + ```{ .py hl_lines="7 12" } import supervision as sv from inference import get_model model = get_model(model_id="yolov8n-640") + frames_generator = sv.get_video_frames_generator() with sv.CSVSink() as sink: - for frame in sv.get_video_frames_generator(): + for frame in frames_generator: results = model.infer(image)[0] detections = sv.Detections.from_inference(results) @@ -75,14 +92,15 @@ TODO === "Ultralytics" - ```python + ```{ .py hl_lines="7 12" } import supervision as sv from ultralytics import YOLO model = YOLO("yolov8n.pt") + frames_generator = sv.get_video_frames_generator() with sv.CSVSink() as sink: - for frame in sv.get_video_frames_generator(): + for frame in frames_generator: results = model(frame)[0] detections = sv.Detections.from_ultralytics(results) @@ -91,34 +109,176 @@ TODO === "Transformers" - ```python + ```{ .py hl_lines="9 23" } import torch - from PIL import Image + import supervision as sv from transformers import DetrImageProcessor, DetrForObjectDetection processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") + frames_generator = sv.get_video_frames_generator() - image = Image.open() - inputs = processor(images=image, return_tensors="pt") + with sv.CSVSink() as sink: + for frame in frames_generator: + + frame = sv.cv2_to_pillow(frame) + inputs = processor(images=frame, return_tensors="pt") - with torch.no_grad(): - outputs = model(**inputs) + with torch.no_grad(): + outputs = model(**inputs) - width, height = image.size - target_size = torch.tensor([[height, width]]) - results = processor.post_process_object_detection( - outputs=outputs, target_sizes=target_size)[0] + width, height = frame.size + target_size = torch.tensor([[height, width]]) + results = processor.post_process_object_detection( + outputs=outputs, target_sizes=target_size)[0] + detections = sv.Detections.from_transformers(results) + sink.append(detections, {}) ``` +TODO + +| x_min | y_min | x_max | y_max | class_id | confidence | tracker_id | class_name | +|---------|----------|---------|----------|----------|------------|------------|------------| +| 2941.14 | 1269.31 | 3220.77 | 1500.67 | 2 | 0.8517 | | car | +| 944.889 | 899.641 | 1235.42 | 1308.80 | 7 | 0.6752 | | truck | +| 1439.78 | 1077.79 | 1621.27 | 1231.40 | 2 | 0.6450 | | car | + ## Custom Fields TODO -## Save Detections as JSON +=== "Inference" + + ```{ .py hl_lines="8 12" } + import supervision as sv + from inference import get_model + + model = get_model(model_id="yolov8n-640") + frames_generator = sv.get_video_frames_generator() + + with sv.CSVSink() as sink: + for frame_index, frame in enumerate(frames_generator): + + results = model.infer(image)[0] + detections = sv.Detections.from_inference(results) + sink.append(detections, {"frame_index": frame_index}) + ``` + +=== "Ultralytics" + + ```{ .py hl_lines="8 12" } + import supervision as sv + from ultralytics import YOLO + + model = YOLO("yolov8n.pt") + frames_generator = sv.get_video_frames_generator() + + with sv.CSVSink() as sink: + for frame_index, frame in enumerate(frames_generator): + + results = model(frame)[0] + detections = sv.Detections.from_ultralytics(results) + sink.append(detections, {"frame_index": frame_index}) + ``` + +=== "Transformers" + + ```{ .py hl_lines="10 23" } + import torch + import supervision as sv + from transformers import DetrImageProcessor, DetrForObjectDetection + + processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") + model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") + frames_generator = sv.get_video_frames_generator() + + with sv.CSVSink() as sink: + for frame_index, frame in enumerate(frames_generator): + + frame = sv.cv2_to_pillow(frame) + inputs = processor(images=frame, return_tensors="pt") + + with torch.no_grad(): + outputs = model(**inputs) + + width, height = frame.size + target_size = torch.tensor([[height, width]]) + results = processor.post_process_object_detection( + outputs=outputs, target_sizes=target_size)[0] + detections = sv.Detections.from_transformers(results) + sink.append(detections, {"frame_index": frame_index}) + ``` TODO -## Process Video and Save Detections +| x_min | y_min | x_max | y_max | class_id | confidence | tracker_id | class_name | frame_index | +|---------|----------|---------|----------|----------|------------|------------|------------|-------------| +| 2941.14 | 1269.31 | 3220.77 | 1500.67 | 2 | 0.8517 | | car | 0 | +| 944.889 | 899.641 | 1235.42 | 1308.80 | 7 | 0.6752 | | truck | 0 | +| 1439.78 | 1077.79 | 1621.27 | 1231.40 | 2 | 0.6450 | | car | 0 | + +## Save Detections as JSON TODO + +=== "Inference" + + ```{ .py hl_lines="7" } + import supervision as sv + from inference import get_model + + model = get_model(model_id="yolov8n-640") + frames_generator = sv.get_video_frames_generator() + + with sv.JSONSink() as sink: + for frame_index, frame in enumerate(frames_generator): + + results = model.infer(image)[0] + detections = sv.Detections.from_inference(results) + sink.append(detections, {"frame_index": frame_index}) + ``` + +=== "Ultralytics" + + ```{ .py hl_lines="7" } + import supervision as sv + from ultralytics import YOLO + + model = YOLO("yolov8n.pt") + frames_generator = sv.get_video_frames_generator() + + with sv.JSONSink() as sink: + for frame_index, frame in enumerate(frames_generator): + + results = model(frame)[0] + detections = sv.Detections.from_ultralytics(results) + sink.append(detections, {"frame_index": frame_index}) + ``` + +=== "Transformers" + + ```{ .py hl_lines="9" } + import torch + import supervision as sv + from transformers import DetrImageProcessor, DetrForObjectDetection + + processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") + model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") + frames_generator = sv.get_video_frames_generator() + + with sv.JSONSink() as sink: + for frame_index, frame in enumerate(frames_generator): + + frame = sv.cv2_to_pillow(frame) + inputs = processor(images=frame, return_tensors="pt") + + with torch.no_grad(): + outputs = model(**inputs) + + width, height = frame.size + target_size = torch.tensor([[height, width]]) + results = processor.post_process_object_detection( + outputs=outputs, target_sizes=target_size)[0] + detections = sv.Detections.from_transformers(results) + sink.append(detections, {"frame_index": frame_index}) + ``` \ No newline at end of file From 99fbd4b601657e7a25ed68a2ae805413788cd1ad Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 20:45:06 +0000 Subject: [PATCH 15/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/how_to/save_detections.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md index e0233f9b5..5d0dc61a9 100644 --- a/docs/how_to/save_detections.md +++ b/docs/how_to/save_detections.md @@ -9,8 +9,8 @@ TODO ## Run Detection -First, you'll need to obtain predictions from your object detection or segmentation -model. You can learn more on this topic in our +First, you'll need to obtain predictions from your object detection or segmentation +model. You can learn more on this topic in our [How to Detect and Annotate](/latest/how_to/detect_and_annotate.md) guide. === "Inference" @@ -281,4 +281,4 @@ TODO outputs=outputs, target_sizes=target_size)[0] detections = sv.Detections.from_transformers(results) sink.append(detections, {"frame_index": frame_index}) - ``` \ No newline at end of file + ``` From e0d5f00ae3439bdece59cb1bd8d07ada53d91d78 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 23:15:49 +0200 Subject: [PATCH 16/18] initial version of `How to Save Detections` guide done --- docs/how_to/save_detections.md | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md index e0233f9b5..f432c9398 100644 --- a/docs/how_to/save_detections.md +++ b/docs/how_to/save_detections.md @@ -5,7 +5,13 @@ status: new # Save Detections -TODO +Supervision enables an easy way to save detections in .CSV and .JSON files for offline +processing. This guide demonstrates how to perform video inference using the +[Inference](https://github.com/roboflow/inference), +[Ultralytics](https://github.com/ultralytics/ultralytics) or +[Transformers](https://github.com/huggingface/transformers) packages and save their results with +[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) and +[`sv.JSONSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.JSONSink). ## Run Detection @@ -71,7 +77,11 @@ model. You can learn more on this topic in our ## Save Detections as CSV -TODO +To save detections to a `.CSV` file, open our +[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) +and then pass the +[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections) +object resulting from the inference to it. Its fields are parsed and saved on disk. === "Inference" @@ -135,8 +145,6 @@ TODO sink.append(detections, {}) ``` -TODO - | x_min | y_min | x_max | y_max | class_id | confidence | tracker_id | class_name | |---------|----------|---------|----------|----------|------------|------------|------------| | 2941.14 | 1269.31 | 3220.77 | 1500.67 | 2 | 0.8517 | | car | @@ -145,7 +153,12 @@ TODO ## Custom Fields -TODO +Besides regular fields in +[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections), +[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) +also allows you to add custom information to each row, which can be passed via the +`custom_data` dictionary. Let's utilize this feature to save information about the +frame index from which the detections originate. === "Inference" @@ -209,8 +222,6 @@ TODO sink.append(detections, {"frame_index": frame_index}) ``` -TODO - | x_min | y_min | x_max | y_max | class_id | confidence | tracker_id | class_name | frame_index | |---------|----------|---------|----------|----------|------------|------------|------------|-------------| | 2941.14 | 1269.31 | 3220.77 | 1500.67 | 2 | 0.8517 | | car | 0 | @@ -219,7 +230,11 @@ TODO ## Save Detections as JSON -TODO +If you prefer to save the result in a `.JSON` file instead of a `.CSV` file, all you +need to do is replace +[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) +with +[`sv.JSONSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.JSONSink). === "Inference" From c27d9eaf22cb90efd8f88c91d555e9c32935ece2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 21:16:18 +0000 Subject: [PATCH 17/18] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?= =?UTF-8?q?=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/how_to/save_detections.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md index 2c9250ea6..94de6c618 100644 --- a/docs/how_to/save_detections.md +++ b/docs/how_to/save_detections.md @@ -5,12 +5,12 @@ status: new # Save Detections -Supervision enables an easy way to save detections in .CSV and .JSON files for offline -processing. This guide demonstrates how to perform video inference using the +Supervision enables an easy way to save detections in .CSV and .JSON files for offline +processing. This guide demonstrates how to perform video inference using the [Inference](https://github.com/roboflow/inference), [Ultralytics](https://github.com/ultralytics/ultralytics) or -[Transformers](https://github.com/huggingface/transformers) packages and save their results with -[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) and +[Transformers](https://github.com/huggingface/transformers) packages and save their results with +[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) and [`sv.JSONSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.JSONSink). ## Run Detection @@ -77,10 +77,10 @@ model. You can learn more on this topic in our ## Save Detections as CSV -To save detections to a `.CSV` file, open our -[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) -and then pass the -[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections) +To save detections to a `.CSV` file, open our +[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) +and then pass the +[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections) object resulting from the inference to it. Its fields are parsed and saved on disk. === "Inference" @@ -153,11 +153,11 @@ object resulting from the inference to it. Its fields are parsed and saved on di ## Custom Fields -Besides regular fields in -[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections), -[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) -also allows you to add custom information to each row, which can be passed via the -`custom_data` dictionary. Let's utilize this feature to save information about the +Besides regular fields in +[`sv.Detections`](/latest/detection/core/#supervision.detection.core.Detections), +[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) +also allows you to add custom information to each row, which can be passed via the +`custom_data` dictionary. Let's utilize this feature to save information about the frame index from which the detections originate. === "Inference" @@ -230,10 +230,10 @@ frame index from which the detections originate. ## Save Detections as JSON -If you prefer to save the result in a `.JSON` file instead of a `.CSV` file, all you -need to do is replace -[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) -with +If you prefer to save the result in a `.JSON` file instead of a `.CSV` file, all you +need to do is replace +[`sv.CSVSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.CSVSink) +with [`sv.JSONSink`](/latest/detection/tools/save_detections/#supervision.detection.tools.csv_sink.JSONSink). === "Inference" From eb1c7716ef0e9835f873133dd41e3ae40c9442df Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Tue, 9 Apr 2024 23:18:02 +0200 Subject: [PATCH 18/18] make ruff happy --- supervision/utils/image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervision/utils/image.py b/supervision/utils/image.py index 44ccbeac1..9f2e17839 100644 --- a/supervision/utils/image.py +++ b/supervision/utils/image.py @@ -320,7 +320,7 @@ def overlay_image( ``` ![overlay_image](https://media.roboflow.com/supervision-docs/overlay-image.png){ align=center width="800" } - """ + """ # noqa E501 // docs scene_height, scene_width = image.shape[:2] image_height, image_width = overlay.shape[:2] anchor_x, anchor_y = anchor