diff --git a/.github/workflows/merge-main-into-prs.yml b/.github/workflows/merge-main-into-prs.yml
index db6a180295..372d888241 100644
--- a/.github/workflows/merge-main-into-prs.yml
+++ b/.github/workflows/merge-main-into-prs.yml
@@ -34,11 +34,11 @@ jobs:
g = Github(os.getenv('GITHUB_TOKEN'))
repo = g.get_repo(os.getenv('GITHUB_REPOSITORY'))
-
+
# Fetch the default branch name
default_branch_name = repo.default_branch
default_branch = repo.get_branch(default_branch_name)
-
+
for pr in repo.get_pulls(state='open', sort='created'):
try:
# Get full names for repositories and branches
diff --git a/README.md b/README.md
index e4314803be..14bc041938 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
-
+
diff --git a/export.py b/export.py
index fad82dbac6..9c9a0d9a77 100644
--- a/export.py
+++ b/export.py
@@ -91,6 +91,8 @@
class iOSModel(torch.nn.Module):
+ """Exports a PyTorch model to an iOS-compatible format with normalized input dimensions and class configurations."""
+
def __init__(self, model, im):
"""
Initializes an iOSModel with normalized input dimensions and number of classes from a PyTorch model.
diff --git a/models/common.py b/models/common.py
index 295a04d933..9d0818d62f 100644
--- a/models/common.py
+++ b/models/common.py
@@ -55,7 +55,8 @@ def autopad(k, p=None, d=1): # kernel, padding, dilation
class Conv(nn.Module):
- # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
+ """A standard Conv2D layer with batch normalization and optional activation for neural networks."""
+
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
@@ -81,7 +82,8 @@ def forward_fuse(self, x):
class DWConv(Conv):
- # Depth-wise convolution
+ """Implements depth-wise convolution for efficient spatial feature extraction in neural networks."""
+
def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
"""Initializes depth-wise convolution with optional activation; parameters are channel in/out, kernel, stride,
dilation.
@@ -90,7 +92,8 @@ def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, s
class DWConvTranspose2d(nn.ConvTranspose2d):
- # Depth-wise transpose convolution
+ """Implements a depth-wise transpose convolution layer with specified channels, kernel size, stride, and padding."""
+
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
"""Initializes a depth-wise or transpose convolution layer with specified in/out channels, kernel size, stride,
and padding.
@@ -99,7 +102,8 @@ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stri
class TransformerLayer(nn.Module):
- # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
+ """Transformer layer with multi-head attention and feed-forward network, optimized by removing LayerNorm."""
+
def __init__(self, c, num_heads):
"""Initializes a Transformer layer as per https://arxiv.org/abs/2010.11929, sans LayerNorm, with specified
embedding dimension and number of heads.
@@ -122,7 +126,8 @@ def forward(self, x):
class TransformerBlock(nn.Module):
- # Vision Transformer https://arxiv.org/abs/2010.11929
+ """Implements a Vision Transformer block with transformer layers; https://arxiv.org/abs/2010.11929."""
+
def __init__(self, c1, c2, num_heads, num_layers):
"""Initializes a Transformer block with optional convolution, linear, and transformer layers."""
super().__init__()
@@ -143,7 +148,8 @@ def forward(self, x):
class Bottleneck(nn.Module):
- # Standard bottleneck
+ """Implements a bottleneck layer with optional shortcut for efficient feature extraction in neural networks."""
+
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
"""Initializes a standard bottleneck layer with optional shortcut; args: input channels (c1), output channels
(c2), shortcut (bool), groups (g), expansion factor (e).
@@ -162,7 +168,8 @@ def forward(self, x):
class BottleneckCSP(nn.Module):
- # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+ """Implements a CSP Bottleneck layer for feature extraction."""
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
"""Initializes CSP Bottleneck with channel in/out, optional shortcut, groups, expansion; see
https://github.com/WongKinYiu/CrossStagePartialNetworks.
@@ -187,7 +194,8 @@ def forward(self, x):
class CrossConv(nn.Module):
- # Cross Convolution Downsample
+ """Implements Cross Convolution Downsample with 1D and 2D convolutions and optional shortcut."""
+
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
"""Initializes CrossConv with downsample options, combining 1D and 2D convolutions, optional shortcut if
input/output channels match.
@@ -204,7 +212,8 @@ def forward(self, x):
class C3(nn.Module):
- # CSP Bottleneck with 3 convolutions
+ """Implements a CSP Bottleneck with 3 convolutions, optional shortcuts, group convolutions, and expansion factor."""
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
"""Initializes CSP Bottleneck with 3 convolutions, optional shortcuts, group convolutions, and expansion
factor.
@@ -222,7 +231,8 @@ def forward(self, x):
class C3x(C3):
- # C3 module with cross-convolutions
+ """Extends the C3 module with cross-convolutions for enhanced feature extraction and flexibility."""
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes a C3x module with cross-convolutions, extending the C3 module with customizable parameters."""
super().__init__(c1, c2, n, shortcut, g, e)
@@ -231,7 +241,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
class C3TR(C3):
- # C3 module with TransformerBlock()
+ """C3 module with TransformerBlock for integrating attention mechanisms in CNNs."""
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes a C3 module with TransformerBlock, extending C3 for attention mechanisms."""
super().__init__(c1, c2, n, shortcut, g, e)
@@ -240,7 +251,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
class C3SPP(C3):
- # C3 module with SPP()
+ """Extends C3 with Spatial Pyramid Pooling (SPP) for enhanced feature extraction in CNNs."""
+
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
"""Initializes C3SPP module, extending C3 with Spatial Pyramid Pooling for enhanced feature extraction."""
super().__init__(c1, c2, n, shortcut, g, e)
@@ -249,7 +261,8 @@ def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
class C3Ghost(C3):
- # C3 module with GhostBottleneck()
+ """Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in neural networks."""
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes C3Ghost module with Ghost Bottlenecks for efficient feature extraction."""
super().__init__(c1, c2, n, shortcut, g, e)
@@ -258,7 +271,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
class SPP(nn.Module):
- # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
+ """Implements Spatial Pyramid Pooling (SPP) for enhanced feature extraction; see https://arxiv.org/abs/1406.4729."""
+
def __init__(self, c1, c2, k=(5, 9, 13)):
"""
Initializes SPP layer with specified channels and kernels.
@@ -284,7 +298,8 @@ def forward(self, x):
class SPPF(nn.Module):
- # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv3 by Glenn Jocher
+ """Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv3 models."""
+
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
"""Initializes the SPPF layer with specified input/output channels and kernel size for YOLOv3."""
super().__init__()
@@ -306,7 +321,8 @@ def forward(self, x):
class Focus(nn.Module):
- # Focus wh information into c-space
+ """Focuses spatial information into channel space using configurable convolution."""
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
"""Initializes Focus module to focus width and height information into channel space with configurable
convolution parameters.
@@ -322,7 +338,8 @@ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
class GhostConv(nn.Module):
- # Ghost Convolution https://github.com/huawei-noah/ghostnet
+ """Implements Ghost Convolution for efficient feature extraction; see github.com/huawei-noah/ghostnet."""
+
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
"""Initializes GhostConv with in/out channels, kernel size, stride, groups; see
https://github.com/huawei-noah/ghostnet.
@@ -339,7 +356,8 @@ def forward(self, x):
class GhostBottleneck(nn.Module):
- # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+ """Implements a Ghost Bottleneck layer for efficient feature extraction from GhostNet."""
+
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
"""Initializes GhostBottleneck module with in/out channels, kernel size, and stride; see
https://github.com/huawei-noah/ghostnet.
@@ -361,7 +379,8 @@ def forward(self, x):
class Contract(nn.Module):
- # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
+ """Contracts spatial dimensions into channels, e.g., (1,64,80,80) to (1,256,40,40) with a specified gain."""
+
def __init__(self, gain=2):
"""Initializes Contract module to refine input dimensions, e.g., from (1,64,80,80) to (1,256,40,40) with a
default gain of 2.
@@ -381,7 +400,8 @@ def forward(self, x):
class Expand(nn.Module):
- # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
+ """Expands spatial dimensions of input tensor by a factor while reducing channels correspondingly."""
+
def __init__(self, gain=2):
"""Initializes Expand module to increase spatial dimensions by factor `gain` while reducing channels
correspondingly.
@@ -401,7 +421,8 @@ def forward(self, x):
class Concat(nn.Module):
- # Concatenate a list of tensors along dimension
+ """Concatenates a list of tensors along a specified dimension for efficient feature aggregation."""
+
def __init__(self, dimension=1):
"""Initializes a module to concatenate tensors along a specified dimension."""
super().__init__()
@@ -415,7 +436,8 @@ def forward(self, x):
class DetectMultiBackend(nn.Module):
- # YOLOv3 MultiBackend class for python inference on various backends
+ """YOLOv3 multi-backend class for inference on frameworks like PyTorch, ONNX, TensorRT, and more."""
+
def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):
"""Initializes multi-backend detection with options for various frameworks and devices, also handles model
download.
@@ -749,7 +771,8 @@ def _load_metadata(f=Path("path/to/meta.yaml")):
class AutoShape(nn.Module):
- # YOLOv3 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+ """A wrapper for YOLOv3 models to handle diverse input types with preprocessing, inference, and NMS."""
+
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
agnostic = False # NMS class-agnostic
@@ -857,7 +880,8 @@ def forward(self, ims, size=640, augment=False, profile=False):
class Detections:
- # YOLOv3 detections class for inference results
+ """Handles YOLOv3 detection results with methods for visualization, saving, cropping, and format conversion."""
+
def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
"""Initializes YOLOv3 detections with image data, predictions, filenames, profiling times, class names, and
shapes.
@@ -1011,7 +1035,8 @@ def __repr__(self):
class Proto(nn.Module):
- # YOLOv3 mask Proto module for segmentation models
+ """Implements the YOLOv3 mask Proto module for segmentation, including convolutional layers and upsampling."""
+
def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
"""Initializes the Proto module for YOLOv3 segmentation, setting up convolutional layers and upsampling."""
super().__init__()
@@ -1026,7 +1051,8 @@ def forward(self, x):
class Classify(nn.Module):
- # YOLOv3 classification head, i.e. x(b,c1,20,20) to x(b,c2)
+ """Performs image classification using YOLOv3-based architecture with convolutional, pooling, and dropout layers."""
+
def __init__(
self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0
): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
diff --git a/models/experimental.py b/models/experimental.py
index 305434ec87..7c1ed58608 100644
--- a/models/experimental.py
+++ b/models/experimental.py
@@ -11,7 +11,8 @@
class Sum(nn.Module):
- # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
+ """Computes the weighted or unweighted sum of multiple input layers per https://arxiv.org/abs/1911.09070."""
+
def __init__(self, n, weight=False): # n: number of inputs
"""
Initializes a module to compute weighted/unweighted sum of n inputs, with optional learning weights.
@@ -42,7 +43,8 @@ def forward(self, x):
class MixConv2d(nn.Module):
- # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
+ """Implements mixed depth-wise convolutions for efficient neural networks; see https://arxiv.org/abs/1907.09595."""
+
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy
"""Initializes MixConv2d with mixed depth-wise convolution layers; details at
https://arxiv.org/abs/1907.09595.
@@ -72,7 +74,8 @@ def forward(self, x):
class Ensemble(nn.ModuleList):
- # Ensemble of models
+ """Combines outputs from multiple models to improve inference results."""
+
def __init__(self):
"""Initializes an ensemble of models to combine their outputs."""
super().__init__()
diff --git a/models/tf.py b/models/tf.py
index 13edcdcfca..b4c47e8e73 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -49,7 +49,8 @@
class TFBN(keras.layers.Layer):
- # TensorFlow BatchNormalization wrapper
+ """A TensorFlow BatchNormalization wrapper layer initialized with specific weights for YOLOv3 models."""
+
def __init__(self, w=None):
"""Initializes TFBN with weights, wrapping TensorFlow's BatchNormalization layer with specific initializers."""
super().__init__()
@@ -67,7 +68,8 @@ def call(self, inputs):
class TFPad(keras.layers.Layer):
- # Pad inputs in spatial dimensions 1 and 2
+ """Pads inputs in spatial dimensions 1 and 2 using specified padding width as an int or (int, int) tuple/list."""
+
def __init__(self, pad):
"""Initializes a padding layer for spatial dimensions 1 and 2, with `pad` as int or (int, int) tuple/list."""
super().__init__()
@@ -84,7 +86,8 @@ def call(self, inputs):
class TFConv(keras.layers.Layer):
- # Standard convolution
+ """Implements a standard convolutional layer with optional batch normalization and activation for TensorFlow."""
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
"""Initializes a convolutional layer with customizable filters, kernel size, stride, padding, groups, and
activation.
@@ -112,7 +115,8 @@ def call(self, inputs):
class TFDWConv(keras.layers.Layer):
- # Depthwise convolution
+ """Implements a depthwise convolutional layer with optional batch normalization and activation for TensorFlow."""
+
def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
"""Initializes a depthwise convolutional layer with optional batch normalization and activation."""
super().__init__()
@@ -136,7 +140,8 @@ def call(self, inputs):
class TFDWConvTranspose2d(keras.layers.Layer):
- # Depthwise ConvTranspose2d
+ """Implements a depthwise transposed convolutional layer for TensorFlow with equal input and output channels."""
+
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
"""Initializes TFDWConvTranspose2d with ch_in=c1=ch_out, k=4, p1=1; sets up depthwise Conv2DTranspose layers."""
super().__init__()
@@ -166,7 +171,8 @@ def call(self, inputs):
class TFFocus(keras.layers.Layer):
- # Focus wh information into c-space
+ """Focuses spatial information into channel space using a convolutional layer for efficient feature extraction."""
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
"""Initializes TFFocus layer for efficient information focusing into channel-space with customizable convolution
parameters.
@@ -183,7 +189,8 @@ def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
class TFBottleneck(keras.layers.Layer):
- # Standard bottleneck
+ """A TensorFlow bottleneck layer with optional shortcut connections, channel expansion, and group convolutions."""
+
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
"""Initializes a standard bottleneck layer with optional shortcut, channel expansion, and group convolutions."""
super().__init__()
@@ -200,7 +207,8 @@ def call(self, inputs):
class TFCrossConv(keras.layers.Layer):
- # Cross Convolution
+ """Implements a cross convolutional layer with customizable channels, kernel size, stride, groups, and shortcut."""
+
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
"""Initializes cross convolutional layer with parameters for channel sizes, kernel size, stride, groups,
expansion factor, shortcut option, and weights.
@@ -217,7 +225,8 @@ def call(self, inputs):
class TFConv2d(keras.layers.Layer):
- # Substitution for PyTorch nn.Conv2D
+ """Implements a TensorFlow 2.2+ Conv2D layer as a substitute for PyTorch's Conv2D with customizable parameters."""
+
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
"""Initializes TFConv2d layer for TensorFlow 2.2+, substituting PyTorch Conv2D; c1, c2: channels, k: kernel
size, s: stride.
@@ -240,7 +249,8 @@ def call(self, inputs):
class TFBottleneckCSP(keras.layers.Layer):
- # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+ """Implements a Cross Stage Partial (CSP) Bottleneck layer for efficient feature extraction in neural networks."""
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
"""Initializes CSP Bottleneck layer with channel configurations and optional shortcut, groups, expansion, and
weights.
@@ -263,7 +273,8 @@ def call(self, inputs):
class TFC3(keras.layers.Layer):
- # CSP Bottleneck with 3 convolutions
+ """CSP Bottleneck layer with 3 convolutions for enhanced feature extraction and integration in TensorFlow models."""
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
"""Initializes a CSP Bottleneck layer with 3 convolutions for channel manipulation and feature integration."""
super().__init__()
@@ -281,7 +292,8 @@ def call(self, inputs):
class TFC3x(keras.layers.Layer):
- # 3 module with cross-convolutions
+ """Implements a CSP Bottleneck layer with cross-convolutions for enhanced feature extraction in YOLOv3 models."""
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
"""Initializes a TFC3x layer with cross-convolutions, expanding and concatenating features for given channel
inputs and outputs.
@@ -301,7 +313,8 @@ def call(self, inputs):
class TFSPP(keras.layers.Layer):
- # Spatial pyramid pooling layer used in YOLOv3-SPP
+ """Implements Spatial Pyramid Pooling (SPP) for YOLOv3-SPP with configurable channels and kernel sizes."""
+
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
"""Initializes a Spatial Pyramid Pooling layer for YOLOv3-SPP with configurable in/out channels and kernel
sizes.
@@ -319,7 +332,8 @@ def call(self, inputs):
class TFSPPF(keras.layers.Layer):
- # Spatial pyramid pooling-Fast layer
+ """Implements a fast spatial pyramid pooling layer for efficient multi-scale feature extraction in YOLOv3 models."""
+
def __init__(self, c1, c2, k=5, w=None):
"""Initializes a Spatial Pyramid Pooling-Fast layer with specified channels, kernel size, and optional
weights.
@@ -339,7 +353,8 @@ def call(self, inputs):
class TFDetect(keras.layers.Layer):
- # TF YOLOv3 Detect layer
+ """Implements YOLOv3 detection layer in TensorFlow for object detection with configurable classes and anchors."""
+
def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
"""Initializes a YOLOv3 detection layer with specified classes, anchors, channels, image size, and weights."""
super().__init__()
@@ -393,7 +408,8 @@ def _make_grid(nx=20, ny=20):
class TFSegment(TFDetect):
- # YOLOv3 Segment head for segmentation models
+ """Implements YOLOv3 segmentation head for object detection and segmentation tasks using TensorFlow."""
+
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
"""Initializes a YOLOv3 Segment head with customizable parameters for segmentation models."""
super().__init__(nc, anchors, ch, imgsz, w)
@@ -414,6 +430,8 @@ def call(self, x):
class TFProto(keras.layers.Layer):
+ """Implements a TensorFlow layer for feature processing with convolution and upsample operations."""
+
def __init__(self, c1, c_=256, c2=32, w=None):
"""Initializes a TFProto layer with convolution and upsample operations for feature processing."""
super().__init__()
@@ -428,7 +446,8 @@ def call(self, inputs):
class TFUpsample(keras.layers.Layer):
- # TF version of torch.nn.Upsample()
+ """Implements an upsample layer using TensorFlow with specified size, scale factor, and interpolation mode."""
+
def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
"""Initializes an upsample layer with specific size, doubling scale factor (>0, even), interpolation mode, and
optional weights.
@@ -447,7 +466,8 @@ def call(self, inputs):
class TFConcat(keras.layers.Layer):
- # TF version of torch.concat()
+ """Concatenates input tensors along the specified dimension (NHWC format) using TensorFlow."""
+
def __init__(self, dimension=1, w=None):
"""Initializes a TensorFlow layer to concatenate tensors along the NHWC dimension, requiring dimension=1."""
super().__init__()
@@ -534,7 +554,8 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
class TFModel:
- # TF YOLOv3 model
+ """TensorFlow implementation of YOLOv3 for object detection, supporting Keras and TFLite models."""
+
def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
"""Initializes TF YOLOv3 model with config, channels, classes, optional pre-loaded model, and input image
size.
@@ -606,7 +627,8 @@ def _xywh2xyxy(xywh):
class AgnosticNMS(keras.layers.Layer):
- # TF Agnostic NMS
+ """Applies class-agnostic non-maximum suppression (NMS) to filter detections by IoU and confidence thresholds."""
+
def call(self, input, topk_all, iou_thres, conf_thres):
"""Applies non-maximum suppression (NMS) to filter detections based on IoU, confidence thresholds, and top-K."""
return tf.map_fn(
diff --git a/models/yolo.py b/models/yolo.py
index 8c77f04a66..49cd9c74fc 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -42,7 +42,8 @@
class Detect(nn.Module):
- # YOLOv3 Detect head for detection models
+ """YOLOv3 Detect head for processing detection model outputs, including grid and anchor grid generation."""
+
stride = None # strides computed during build
dynamic = False # force grid reconstruction
export = False # export mode
@@ -105,7 +106,8 @@ def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version
class Segment(Detect):
- # YOLOv3 Segment head for segmentation models
+ """YOLOv3 Segment head for segmentation models, adding mask prediction and prototyping to detection."""
+
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
"""Initializes the YOLOv3 segment head with customizable class count, anchors, masks, protos, channels, and
inplace option.
@@ -128,7 +130,8 @@ def forward(self, x):
class BaseModel(nn.Module):
- # YOLOv3 base model
+ """Implements the base YOLOv3 model architecture for object detection tasks."""
+
def forward(self, x, profile=False, visualize=False):
"""Performs a single-scale inference or training step on input `x`, with options for profiling and
visualization.
@@ -191,7 +194,8 @@ def _apply(self, fn):
class DetectionModel(BaseModel):
- # YOLOv3 detection model
+ """YOLOv3 detection model class for initializing and processing detection models with configurable parameters."""
+
def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None): # model, input channels, number of classes
"""Initializes YOLOv3 detection model with configurable YAML, input channels, classes, and anchors."""
super().__init__()
@@ -303,14 +307,16 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is
class SegmentationModel(DetectionModel):
- # YOLOv3 segmentation model
+ """Implements a YOLOv3-based segmentation model with customizable configuration, channels, classes, and anchors."""
+
def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
"""Initializes a SegmentationModel with optional configuration, channel, class count, and anchors parameters."""
super().__init__(cfg, ch, nc, anchors)
class ClassificationModel(BaseModel):
- # YOLOv3 classification model
+ """Implements a YOLOv3-based image classification model with configurable architecture and class count."""
+
def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index
"""Initializes a ClassificationModel from a detection model or YAML, with configurable classes and cutoff."""
super().__init__()
diff --git a/utils/__init__.py b/utils/__init__.py
index a34a38abef..918856178c 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -12,7 +12,8 @@ def emojis(str=""):
class TryExcept(contextlib.ContextDecorator):
- # YOLOv3 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
+ """A context manager and decorator for handling exceptions with optional custom messages."""
+
def __init__(self, msg=""):
"""Initializes TryExcept with optional custom message, used as decorator or context manager for exception
handling.
diff --git a/utils/activations.py b/utils/activations.py
index 95661bf1dd..a4ff9e48f7 100644
--- a/utils/activations.py
+++ b/utils/activations.py
@@ -7,7 +7,8 @@
class SiLU(nn.Module):
- # SiLU activation https://arxiv.org/pdf/1606.08415.pdf
+ """Applies the SiLU activation function to the input tensor as described in https://arxiv.org/pdf/1606.08415.pdf."""
+
@staticmethod
def forward(x):
"""Applies the SiLU activation function, as detailed in https://arxiv.org/pdf/1606.08415.pdf, on input tensor
@@ -17,7 +18,8 @@ def forward(x):
class Hardswish(nn.Module):
- # Hard-SiLU activation
+ """Applies the Hardswish activation function to the input tensor `x`."""
+
@staticmethod
def forward(x):
"""Applies Hardswish activation, suitable for TorchScript, CoreML, ONNX, modifying input `x` as per Hard-SiLU
@@ -27,7 +29,8 @@ def forward(x):
class Mish(nn.Module):
- # Mish activation https://github.com/digantamisra98/Mish
+ """Applies the Mish activation function to improve model performance; see https://github.com/digantamisra98/Mish."""
+
@staticmethod
def forward(x):
"""
@@ -39,8 +42,11 @@ def forward(x):
class MemoryEfficientMish(nn.Module):
- # Mish activation memory-efficient
+ """Applies the memory-efficient Mish activation function for improved model performance and reduced memory usage."""
+
class F(torch.autograd.Function):
+ """Memory-efficient implementation of the Mish activation function for enhanced model performance."""
+
@staticmethod
def forward(ctx, x):
"""Applies the Mish activation function in a memory-efficient manner, useful for enhancing model
@@ -65,7 +71,8 @@ def forward(self, x):
class FReLU(nn.Module):
- # FReLU activation https://arxiv.org/abs/2007.11824
+ """Implements the FReLU activation, combining ReLU and convolution from https://arxiv.org/abs/2007.11824."""
+
def __init__(self, c1, k=3): # ch_in, kernel
"""Initializes FReLU with specified channel size and kernel, implementing activation from
https://arxiv.org/abs/2007.11824.
diff --git a/utils/augmentations.py b/utils/augmentations.py
index 539b55304a..cf11b0c525 100644
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
@@ -18,7 +18,8 @@
class Albumentations:
- # YOLOv3 Albumentations class (optional, only used if package is installed)
+ """Provides optional image augmentation for YOLOv3 using the Albumentations library if installed."""
+
def __init__(self, size=640):
"""Initializes Albumentations class for optional YOLOv3 data augmentation with default size 640."""
self.transform = None
@@ -363,7 +364,8 @@ def classify_transforms(size=224):
class LetterBox:
- # YOLOv3 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
+ """Resizes and pads images to a specified size while maintaining aspect ratio."""
+
def __init__(self, size=(640, 640), auto=False, stride=32):
"""Initializes LetterBox for YOLOv3 image preprocessing with optional auto-sizing and stride; `size` can be int
or tuple.
@@ -388,7 +390,8 @@ def __call__(self, im): # im = np.array HWC
class CenterCrop:
- # YOLOv3 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
+ """Crops the center of an image to a specified size, maintaining aspect ratio."""
+
def __init__(self, size=640):
"""Initializes a CenterCrop object for YOLOv3, to crop images to a specified size, with default 640x640."""
super().__init__()
@@ -403,7 +406,8 @@ def __call__(self, im): # im = np.array HWC
class ToTensor:
- # YOLOv3 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
+ """Converts a BGR image in numpy format to a PyTorch tensor in RGB format, with optional half precision."""
+
def __init__(self, half=False):
"""Initializes ToTensor class for YOLOv3 image preprocessing to convert images to PyTorch tensors, optionally in
half precision.
diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index dc9ff22f3c..4fef716250 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -226,7 +226,8 @@ def __iter__(self):
class LoadScreenshots:
- # YOLOv3 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
+ """Loads screenshots as input data for YOLOv3, capturing screen regions specified by coordinates and dimensions."""
+
def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
"""Initializes a screenshot dataloader for YOLOv3; source format: [screen_number left top width height], default
img_size=640, stride=32.
@@ -278,7 +279,8 @@ def __next__(self):
class LoadImages:
- # YOLOv3 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
+ """Loads images and videos for YOLOv3 from various sources, including directories and '*.txt' path lists."""
+
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
"""Initializes the data loader for YOLOv3, supporting image, video, directory, and '*.txt' path lists with
customizable image sizing.
@@ -389,7 +391,8 @@ def __len__(self):
class LoadStreams:
- # YOLOv3 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
+ """Loads video streams for YOLOv3 inference, supporting multiple sources and customizable frame sizes."""
+
def __init__(self, sources="file.streams", img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
"""Initializes a stream loader for YOLOv3, handling video sources or files with customizable frame sizes and
intervals.
@@ -492,7 +495,8 @@ def img2label_paths(img_paths):
class LoadImagesAndLabels(Dataset):
- # YOLOv3 train_loader/val_loader, loads images and labels for training and validation
+ """Loads images and labels for YOLOv3 training and validation with support for augmentations and caching."""
+
cache_version = 0.6 # dataset labels *.cache version
rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
diff --git a/utils/general.py b/utils/general.py
index afee396906..812720b7bc 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -176,7 +176,8 @@ def user_config_dir(dir="Ultralytics", env_var="YOLOV5_CONFIG_DIR"):
class Profile(contextlib.ContextDecorator):
- # YOLOv3 Profile class. Usage: @Profile() decorator or 'with Profile():' context manager
+ """Profiles code execution time, usable as a context manager or decorator for performance monitoring."""
+
def __init__(self, t=0.0):
"""Initializes a profiling context for YOLOv3 with optional timing threshold `t` and checks CUDA
availability.
@@ -204,7 +205,8 @@ def time(self):
class Timeout(contextlib.ContextDecorator):
- # YOLOv3 Timeout class. Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager
+ """Enforces a timeout on code execution, raising TimeoutError on expiry."""
+
def __init__(self, seconds, *, timeout_msg="", suppress_timeout_errors=True):
"""Initializes a timeout context/decorator with specified duration, custom message, and error handling
option.
@@ -232,7 +234,8 @@ def __exit__(self, exc_type, exc_val, exc_tb):
class WorkingDirectory(contextlib.ContextDecorator):
- # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager
+ """Context manager to temporarily change the working directory, reverting to the original on exit."""
+
def __init__(self, new_dir):
"""Initializes context manager to temporarily change working directory, reverting on exit."""
self.dir = new_dir # new dir
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 0a15554cfa..35fa66ad89 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -63,7 +63,8 @@ def SummaryWriter(*args):
class Loggers:
- # YOLOv3 Loggers class
+ """Manages logging for training and validation using TensorBoard, Weights & Biases, ClearML, and Comet ML."""
+
def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
"""Initializes YOLOv3 logging with directory, weights, options, hyperparameters, and includes specified
loggers.
diff --git a/utils/loss.py b/utils/loss.py
index 148f5118e5..3d3e1208b3 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -14,7 +14,8 @@ def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#iss
class BCEBlurWithLogitsLoss(nn.Module):
- # BCEwithLogitLoss() with reduced missing label effects.
+ """Implements BCEWithLogitsLoss with adjustments to mitigate missing label effects using an alpha parameter."""
+
def __init__(self, alpha=0.05):
"""Initializes BCEBlurWithLogitsLoss with alpha to reduce missing label effects; default alpha is 0.05."""
super().__init__()
@@ -35,7 +36,8 @@ def forward(self, pred, true):
class FocalLoss(nn.Module):
- # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
+ """Implements Focal Loss to address class imbalance by modulating the loss based on prediction confidence."""
+
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
"""Initializes FocalLoss with specified loss function, gamma, and alpha for enhanced training on imbalanced
datasets.
@@ -71,7 +73,8 @@ def forward(self, pred, true):
class QFocalLoss(nn.Module):
- # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
+ """Implements Quality Focal Loss to handle class imbalance with a modulating factor and alpha."""
+
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
"""Initializes QFocalLoss with specified loss function, gamma, and alpha for element-wise focal loss
application.
@@ -103,6 +106,8 @@ def forward(self, pred, true):
class ComputeLoss:
+ """Computes the total loss for YOLO models by aggregating classification, box regression, and objectness losses."""
+
sort_obj_iou = False
# Compute losses
diff --git a/utils/metrics.py b/utils/metrics.py
index 20d9928359..f79bdfbdc4 100644
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -122,7 +122,8 @@ def compute_ap(recall, precision):
class ConfusionMatrix:
- # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
+ """Computes and visualizes a confusion matrix for object detection tasks with configurable thresholds."""
+
def __init__(self, nc, conf=0.25, iou_thres=0.45):
"""Initializes confusion matrix for object detection with adjustable confidence and IoU thresholds."""
self.matrix = np.zeros((nc + 1, nc + 1))
diff --git a/utils/plots.py b/utils/plots.py
index 9bb865dcd3..ad5f4ea6d3 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -29,7 +29,8 @@
class Colors:
- # Ultralytics color palette https://ultralytics.com/
+ """Provides a color palette and methods to convert indices to RGB or BGR color tuples."""
+
def __init__(self):
"""Initializes the Colors class with a palette from the Ultralytics color palette."""
hexs = (
diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
index 0343291d68..4b1fba08bf 100644
--- a/utils/segment/dataloaders.py
+++ b/utils/segment/dataloaders.py
@@ -82,6 +82,8 @@ def create_dataloader(
class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing
+ """Loads images, labels, and masks for training/testing with optional augmentations including mosaic and mixup."""
+
def __init__(
self,
path,
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 1871d5662e..4e204a42c8 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -12,7 +12,8 @@
class ComputeLoss:
- # Compute losses
+ """Computes classification, box regression, objectness, and segmentation losses for YOLOv3 model predictions."""
+
def __init__(self, model, autobalance=False, overlap=False):
"""Initializes ComputeLoss with model settings, optional autobalancing, and overlap handling."""
self.sort_obj_iou = False
diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py
index d4689ca015..9451cc9324 100644
--- a/utils/segment/metrics.py
+++ b/utils/segment/metrics.py
@@ -54,6 +54,8 @@ def ap_per_class_box_and_mask(
class Metric:
+ """Represents model evaluation metrics including precision, recall, F1 score, and average precision (AP) values."""
+
def __init__(self) -> None:
"""Initializes Metric class attributes for precision, recall, F1 score, AP values, and AP class indices."""
self.p = [] # (nc, )
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index b3eb9871e9..0b1d69a9ab 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -422,7 +422,8 @@ def smart_resume(ckpt, optimizer, ema=None, weights="yolov5s.pt", epochs=300, re
class EarlyStopping:
- # YOLOv3 simple early stopper
+ """Monitors training to halt if no improvement in fitness metric is observed for a specified number of epochs."""
+
def __init__(self, patience=30):
"""Initializes EarlyStopping to monitor training, halting if no improvement in 'patience' epochs, defaulting to
30.