diff --git a/.github/workflows/merge-main-into-prs.yml b/.github/workflows/merge-main-into-prs.yml index db6a180295..372d888241 100644 --- a/.github/workflows/merge-main-into-prs.yml +++ b/.github/workflows/merge-main-into-prs.yml @@ -34,11 +34,11 @@ jobs: g = Github(os.getenv('GITHUB_TOKEN')) repo = g.get_repo(os.getenv('GITHUB_REPOSITORY')) - + # Fetch the default branch name default_branch_name = repo.default_branch default_branch = repo.get_branch(default_branch_name) - + for pr in repo.get_pulls(state='open', sort='created'): try: # Get full names for repositories and branches diff --git a/README.md b/README.md index e4314803be..14bc041938 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ YOLOv3 CI YOLOv3 Citation Docker Pulls - Discord Ultralytics Forums + Discord Ultralytics Forums Ultralytics Reddit
Run on Gradient Open In Colab diff --git a/export.py b/export.py index fad82dbac6..9c9a0d9a77 100644 --- a/export.py +++ b/export.py @@ -91,6 +91,8 @@ class iOSModel(torch.nn.Module): + """Exports a PyTorch model to an iOS-compatible format with normalized input dimensions and class configurations.""" + def __init__(self, model, im): """ Initializes an iOSModel with normalized input dimensions and number of classes from a PyTorch model. diff --git a/models/common.py b/models/common.py index 295a04d933..9d0818d62f 100644 --- a/models/common.py +++ b/models/common.py @@ -55,7 +55,8 @@ def autopad(k, p=None, d=1): # kernel, padding, dilation class Conv(nn.Module): - # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation) + """A standard Conv2D layer with batch normalization and optional activation for neural networks.""" + default_act = nn.SiLU() # default activation def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): @@ -81,7 +82,8 @@ def forward_fuse(self, x): class DWConv(Conv): - # Depth-wise convolution + """Implements depth-wise convolution for efficient spatial feature extraction in neural networks.""" + def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation """Initializes depth-wise convolution with optional activation; parameters are channel in/out, kernel, stride, dilation. @@ -90,7 +92,8 @@ def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, s class DWConvTranspose2d(nn.ConvTranspose2d): - # Depth-wise transpose convolution + """Implements a depth-wise transpose convolution layer with specified channels, kernel size, stride, and padding.""" + def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out """Initializes a depth-wise or transpose convolution layer with specified in/out channels, kernel size, stride, and padding. @@ -99,7 +102,8 @@ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stri class TransformerLayer(nn.Module): - # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) + """Transformer layer with multi-head attention and feed-forward network, optimized by removing LayerNorm.""" + def __init__(self, c, num_heads): """Initializes a Transformer layer as per https://arxiv.org/abs/2010.11929, sans LayerNorm, with specified embedding dimension and number of heads. @@ -122,7 +126,8 @@ def forward(self, x): class TransformerBlock(nn.Module): - # Vision Transformer https://arxiv.org/abs/2010.11929 + """Implements a Vision Transformer block with transformer layers; https://arxiv.org/abs/2010.11929.""" + def __init__(self, c1, c2, num_heads, num_layers): """Initializes a Transformer block with optional convolution, linear, and transformer layers.""" super().__init__() @@ -143,7 +148,8 @@ def forward(self, x): class Bottleneck(nn.Module): - # Standard bottleneck + """Implements a bottleneck layer with optional shortcut for efficient feature extraction in neural networks.""" + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion """Initializes a standard bottleneck layer with optional shortcut; args: input channels (c1), output channels (c2), shortcut (bool), groups (g), expansion factor (e). @@ -162,7 +168,8 @@ def forward(self, x): class BottleneckCSP(nn.Module): - # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + """Implements a CSP Bottleneck layer for feature extraction.""" + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion """Initializes CSP Bottleneck with channel in/out, optional shortcut, groups, expansion; see https://github.com/WongKinYiu/CrossStagePartialNetworks. @@ -187,7 +194,8 @@ def forward(self, x): class CrossConv(nn.Module): - # Cross Convolution Downsample + """Implements Cross Convolution Downsample with 1D and 2D convolutions and optional shortcut.""" + def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): """Initializes CrossConv with downsample options, combining 1D and 2D convolutions, optional shortcut if input/output channels match. @@ -204,7 +212,8 @@ def forward(self, x): class C3(nn.Module): - # CSP Bottleneck with 3 convolutions + """Implements a CSP Bottleneck with 3 convolutions, optional shortcuts, group convolutions, and expansion factor.""" + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion """Initializes CSP Bottleneck with 3 convolutions, optional shortcuts, group convolutions, and expansion factor. @@ -222,7 +231,8 @@ def forward(self, x): class C3x(C3): - # C3 module with cross-convolutions + """Extends the C3 module with cross-convolutions for enhanced feature extraction and flexibility.""" + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): """Initializes a C3x module with cross-convolutions, extending the C3 module with customizable parameters.""" super().__init__(c1, c2, n, shortcut, g, e) @@ -231,7 +241,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): class C3TR(C3): - # C3 module with TransformerBlock() + """C3 module with TransformerBlock for integrating attention mechanisms in CNNs.""" + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): """Initializes a C3 module with TransformerBlock, extending C3 for attention mechanisms.""" super().__init__(c1, c2, n, shortcut, g, e) @@ -240,7 +251,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): class C3SPP(C3): - # C3 module with SPP() + """Extends C3 with Spatial Pyramid Pooling (SPP) for enhanced feature extraction in CNNs.""" + def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5): """Initializes C3SPP module, extending C3 with Spatial Pyramid Pooling for enhanced feature extraction.""" super().__init__(c1, c2, n, shortcut, g, e) @@ -249,7 +261,8 @@ def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5): class C3Ghost(C3): - # C3 module with GhostBottleneck() + """Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in neural networks.""" + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): """Initializes C3Ghost module with Ghost Bottlenecks for efficient feature extraction.""" super().__init__(c1, c2, n, shortcut, g, e) @@ -258,7 +271,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): class SPP(nn.Module): - # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729 + """Implements Spatial Pyramid Pooling (SPP) for enhanced feature extraction; see https://arxiv.org/abs/1406.4729.""" + def __init__(self, c1, c2, k=(5, 9, 13)): """ Initializes SPP layer with specified channels and kernels. @@ -284,7 +298,8 @@ def forward(self, x): class SPPF(nn.Module): - # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv3 by Glenn Jocher + """Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv3 models.""" + def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) """Initializes the SPPF layer with specified input/output channels and kernel size for YOLOv3.""" super().__init__() @@ -306,7 +321,8 @@ def forward(self, x): class Focus(nn.Module): - # Focus wh information into c-space + """Focuses spatial information into channel space using configurable convolution.""" + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups """Initializes Focus module to focus width and height information into channel space with configurable convolution parameters. @@ -322,7 +338,8 @@ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) class GhostConv(nn.Module): - # Ghost Convolution https://github.com/huawei-noah/ghostnet + """Implements Ghost Convolution for efficient feature extraction; see github.com/huawei-noah/ghostnet.""" + def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups """Initializes GhostConv with in/out channels, kernel size, stride, groups; see https://github.com/huawei-noah/ghostnet. @@ -339,7 +356,8 @@ def forward(self, x): class GhostBottleneck(nn.Module): - # Ghost Bottleneck https://github.com/huawei-noah/ghostnet + """Implements a Ghost Bottleneck layer for efficient feature extraction from GhostNet.""" + def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride """Initializes GhostBottleneck module with in/out channels, kernel size, and stride; see https://github.com/huawei-noah/ghostnet. @@ -361,7 +379,8 @@ def forward(self, x): class Contract(nn.Module): - # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) + """Contracts spatial dimensions into channels, e.g., (1,64,80,80) to (1,256,40,40) with a specified gain.""" + def __init__(self, gain=2): """Initializes Contract module to refine input dimensions, e.g., from (1,64,80,80) to (1,256,40,40) with a default gain of 2. @@ -381,7 +400,8 @@ def forward(self, x): class Expand(nn.Module): - # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) + """Expands spatial dimensions of input tensor by a factor while reducing channels correspondingly.""" + def __init__(self, gain=2): """Initializes Expand module to increase spatial dimensions by factor `gain` while reducing channels correspondingly. @@ -401,7 +421,8 @@ def forward(self, x): class Concat(nn.Module): - # Concatenate a list of tensors along dimension + """Concatenates a list of tensors along a specified dimension for efficient feature aggregation.""" + def __init__(self, dimension=1): """Initializes a module to concatenate tensors along a specified dimension.""" super().__init__() @@ -415,7 +436,8 @@ def forward(self, x): class DetectMultiBackend(nn.Module): - # YOLOv3 MultiBackend class for python inference on various backends + """YOLOv3 multi-backend class for inference on frameworks like PyTorch, ONNX, TensorRT, and more.""" + def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True): """Initializes multi-backend detection with options for various frameworks and devices, also handles model download. @@ -749,7 +771,8 @@ def _load_metadata(f=Path("path/to/meta.yaml")): class AutoShape(nn.Module): - # YOLOv3 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS + """A wrapper for YOLOv3 models to handle diverse input types with preprocessing, inference, and NMS.""" + conf = 0.25 # NMS confidence threshold iou = 0.45 # NMS IoU threshold agnostic = False # NMS class-agnostic @@ -857,7 +880,8 @@ def forward(self, ims, size=640, augment=False, profile=False): class Detections: - # YOLOv3 detections class for inference results + """Handles YOLOv3 detection results with methods for visualization, saving, cropping, and format conversion.""" + def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None): """Initializes YOLOv3 detections with image data, predictions, filenames, profiling times, class names, and shapes. @@ -1011,7 +1035,8 @@ def __repr__(self): class Proto(nn.Module): - # YOLOv3 mask Proto module for segmentation models + """Implements the YOLOv3 mask Proto module for segmentation, including convolutional layers and upsampling.""" + def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks """Initializes the Proto module for YOLOv3 segmentation, setting up convolutional layers and upsampling.""" super().__init__() @@ -1026,7 +1051,8 @@ def forward(self, x): class Classify(nn.Module): - # YOLOv3 classification head, i.e. x(b,c1,20,20) to x(b,c2) + """Performs image classification using YOLOv3-based architecture with convolutional, pooling, and dropout layers.""" + def __init__( self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0 ): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability diff --git a/models/experimental.py b/models/experimental.py index 305434ec87..7c1ed58608 100644 --- a/models/experimental.py +++ b/models/experimental.py @@ -11,7 +11,8 @@ class Sum(nn.Module): - # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + """Computes the weighted or unweighted sum of multiple input layers per https://arxiv.org/abs/1911.09070.""" + def __init__(self, n, weight=False): # n: number of inputs """ Initializes a module to compute weighted/unweighted sum of n inputs, with optional learning weights. @@ -42,7 +43,8 @@ def forward(self, x): class MixConv2d(nn.Module): - # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 + """Implements mixed depth-wise convolutions for efficient neural networks; see https://arxiv.org/abs/1907.09595.""" + def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy """Initializes MixConv2d with mixed depth-wise convolution layers; details at https://arxiv.org/abs/1907.09595. @@ -72,7 +74,8 @@ def forward(self, x): class Ensemble(nn.ModuleList): - # Ensemble of models + """Combines outputs from multiple models to improve inference results.""" + def __init__(self): """Initializes an ensemble of models to combine their outputs.""" super().__init__() diff --git a/models/tf.py b/models/tf.py index 13edcdcfca..b4c47e8e73 100644 --- a/models/tf.py +++ b/models/tf.py @@ -49,7 +49,8 @@ class TFBN(keras.layers.Layer): - # TensorFlow BatchNormalization wrapper + """A TensorFlow BatchNormalization wrapper layer initialized with specific weights for YOLOv3 models.""" + def __init__(self, w=None): """Initializes TFBN with weights, wrapping TensorFlow's BatchNormalization layer with specific initializers.""" super().__init__() @@ -67,7 +68,8 @@ def call(self, inputs): class TFPad(keras.layers.Layer): - # Pad inputs in spatial dimensions 1 and 2 + """Pads inputs in spatial dimensions 1 and 2 using specified padding width as an int or (int, int) tuple/list.""" + def __init__(self, pad): """Initializes a padding layer for spatial dimensions 1 and 2, with `pad` as int or (int, int) tuple/list.""" super().__init__() @@ -84,7 +86,8 @@ def call(self, inputs): class TFConv(keras.layers.Layer): - # Standard convolution + """Implements a standard convolutional layer with optional batch normalization and activation for TensorFlow.""" + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): """Initializes a convolutional layer with customizable filters, kernel size, stride, padding, groups, and activation. @@ -112,7 +115,8 @@ def call(self, inputs): class TFDWConv(keras.layers.Layer): - # Depthwise convolution + """Implements a depthwise convolutional layer with optional batch normalization and activation for TensorFlow.""" + def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None): """Initializes a depthwise convolutional layer with optional batch normalization and activation.""" super().__init__() @@ -136,7 +140,8 @@ def call(self, inputs): class TFDWConvTranspose2d(keras.layers.Layer): - # Depthwise ConvTranspose2d + """Implements a depthwise transposed convolutional layer for TensorFlow with equal input and output channels.""" + def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None): """Initializes TFDWConvTranspose2d with ch_in=c1=ch_out, k=4, p1=1; sets up depthwise Conv2DTranspose layers.""" super().__init__() @@ -166,7 +171,8 @@ def call(self, inputs): class TFFocus(keras.layers.Layer): - # Focus wh information into c-space + """Focuses spatial information into channel space using a convolutional layer for efficient feature extraction.""" + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): """Initializes TFFocus layer for efficient information focusing into channel-space with customizable convolution parameters. @@ -183,7 +189,8 @@ def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) class TFBottleneck(keras.layers.Layer): - # Standard bottleneck + """A TensorFlow bottleneck layer with optional shortcut connections, channel expansion, and group convolutions.""" + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion """Initializes a standard bottleneck layer with optional shortcut, channel expansion, and group convolutions.""" super().__init__() @@ -200,7 +207,8 @@ def call(self, inputs): class TFCrossConv(keras.layers.Layer): - # Cross Convolution + """Implements a cross convolutional layer with customizable channels, kernel size, stride, groups, and shortcut.""" + def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None): """Initializes cross convolutional layer with parameters for channel sizes, kernel size, stride, groups, expansion factor, shortcut option, and weights. @@ -217,7 +225,8 @@ def call(self, inputs): class TFConv2d(keras.layers.Layer): - # Substitution for PyTorch nn.Conv2D + """Implements a TensorFlow 2.2+ Conv2D layer as a substitute for PyTorch's Conv2D with customizable parameters.""" + def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None): """Initializes TFConv2d layer for TensorFlow 2.2+, substituting PyTorch Conv2D; c1, c2: channels, k: kernel size, s: stride. @@ -240,7 +249,8 @@ def call(self, inputs): class TFBottleneckCSP(keras.layers.Layer): - # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + """Implements a Cross Stage Partial (CSP) Bottleneck layer for efficient feature extraction in neural networks.""" + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): """Initializes CSP Bottleneck layer with channel configurations and optional shortcut, groups, expansion, and weights. @@ -263,7 +273,8 @@ def call(self, inputs): class TFC3(keras.layers.Layer): - # CSP Bottleneck with 3 convolutions + """CSP Bottleneck layer with 3 convolutions for enhanced feature extraction and integration in TensorFlow models.""" + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): """Initializes a CSP Bottleneck layer with 3 convolutions for channel manipulation and feature integration.""" super().__init__() @@ -281,7 +292,8 @@ def call(self, inputs): class TFC3x(keras.layers.Layer): - # 3 module with cross-convolutions + """Implements a CSP Bottleneck layer with cross-convolutions for enhanced feature extraction in YOLOv3 models.""" + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): """Initializes a TFC3x layer with cross-convolutions, expanding and concatenating features for given channel inputs and outputs. @@ -301,7 +313,8 @@ def call(self, inputs): class TFSPP(keras.layers.Layer): - # Spatial pyramid pooling layer used in YOLOv3-SPP + """Implements Spatial Pyramid Pooling (SPP) for YOLOv3-SPP with configurable channels and kernel sizes.""" + def __init__(self, c1, c2, k=(5, 9, 13), w=None): """Initializes a Spatial Pyramid Pooling layer for YOLOv3-SPP with configurable in/out channels and kernel sizes. @@ -319,7 +332,8 @@ def call(self, inputs): class TFSPPF(keras.layers.Layer): - # Spatial pyramid pooling-Fast layer + """Implements a fast spatial pyramid pooling layer for efficient multi-scale feature extraction in YOLOv3 models.""" + def __init__(self, c1, c2, k=5, w=None): """Initializes a Spatial Pyramid Pooling-Fast layer with specified channels, kernel size, and optional weights. @@ -339,7 +353,8 @@ def call(self, inputs): class TFDetect(keras.layers.Layer): - # TF YOLOv3 Detect layer + """Implements YOLOv3 detection layer in TensorFlow for object detection with configurable classes and anchors.""" + def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer """Initializes a YOLOv3 detection layer with specified classes, anchors, channels, image size, and weights.""" super().__init__() @@ -393,7 +408,8 @@ def _make_grid(nx=20, ny=20): class TFSegment(TFDetect): - # YOLOv3 Segment head for segmentation models + """Implements YOLOv3 segmentation head for object detection and segmentation tasks using TensorFlow.""" + def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None): """Initializes a YOLOv3 Segment head with customizable parameters for segmentation models.""" super().__init__(nc, anchors, ch, imgsz, w) @@ -414,6 +430,8 @@ def call(self, x): class TFProto(keras.layers.Layer): + """Implements a TensorFlow layer for feature processing with convolution and upsample operations.""" + def __init__(self, c1, c_=256, c2=32, w=None): """Initializes a TFProto layer with convolution and upsample operations for feature processing.""" super().__init__() @@ -428,7 +446,8 @@ def call(self, inputs): class TFUpsample(keras.layers.Layer): - # TF version of torch.nn.Upsample() + """Implements an upsample layer using TensorFlow with specified size, scale factor, and interpolation mode.""" + def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w' """Initializes an upsample layer with specific size, doubling scale factor (>0, even), interpolation mode, and optional weights. @@ -447,7 +466,8 @@ def call(self, inputs): class TFConcat(keras.layers.Layer): - # TF version of torch.concat() + """Concatenates input tensors along the specified dimension (NHWC format) using TensorFlow.""" + def __init__(self, dimension=1, w=None): """Initializes a TensorFlow layer to concatenate tensors along the NHWC dimension, requiring dimension=1.""" super().__init__() @@ -534,7 +554,8 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) class TFModel: - # TF YOLOv3 model + """TensorFlow implementation of YOLOv3 for object detection, supporting Keras and TFLite models.""" + def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes """Initializes TF YOLOv3 model with config, channels, classes, optional pre-loaded model, and input image size. @@ -606,7 +627,8 @@ def _xywh2xyxy(xywh): class AgnosticNMS(keras.layers.Layer): - # TF Agnostic NMS + """Applies class-agnostic non-maximum suppression (NMS) to filter detections by IoU and confidence thresholds.""" + def call(self, input, topk_all, iou_thres, conf_thres): """Applies non-maximum suppression (NMS) to filter detections based on IoU, confidence thresholds, and top-K.""" return tf.map_fn( diff --git a/models/yolo.py b/models/yolo.py index 8c77f04a66..49cd9c74fc 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -42,7 +42,8 @@ class Detect(nn.Module): - # YOLOv3 Detect head for detection models + """YOLOv3 Detect head for processing detection model outputs, including grid and anchor grid generation.""" + stride = None # strides computed during build dynamic = False # force grid reconstruction export = False # export mode @@ -105,7 +106,8 @@ def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version class Segment(Detect): - # YOLOv3 Segment head for segmentation models + """YOLOv3 Segment head for segmentation models, adding mask prediction and prototyping to detection.""" + def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True): """Initializes the YOLOv3 segment head with customizable class count, anchors, masks, protos, channels, and inplace option. @@ -128,7 +130,8 @@ def forward(self, x): class BaseModel(nn.Module): - # YOLOv3 base model + """Implements the base YOLOv3 model architecture for object detection tasks.""" + def forward(self, x, profile=False, visualize=False): """Performs a single-scale inference or training step on input `x`, with options for profiling and visualization. @@ -191,7 +194,8 @@ def _apply(self, fn): class DetectionModel(BaseModel): - # YOLOv3 detection model + """YOLOv3 detection model class for initializing and processing detection models with configurable parameters.""" + def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None): # model, input channels, number of classes """Initializes YOLOv3 detection model with configurable YAML, input channels, classes, and anchors.""" super().__init__() @@ -303,14 +307,16 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class SegmentationModel(DetectionModel): - # YOLOv3 segmentation model + """Implements a YOLOv3-based segmentation model with customizable configuration, channels, classes, and anchors.""" + def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None): """Initializes a SegmentationModel with optional configuration, channel, class count, and anchors parameters.""" super().__init__(cfg, ch, nc, anchors) class ClassificationModel(BaseModel): - # YOLOv3 classification model + """Implements a YOLOv3-based image classification model with configurable architecture and class count.""" + def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index """Initializes a ClassificationModel from a detection model or YAML, with configurable classes and cutoff.""" super().__init__() diff --git a/utils/__init__.py b/utils/__init__.py index a34a38abef..918856178c 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -12,7 +12,8 @@ def emojis(str=""): class TryExcept(contextlib.ContextDecorator): - # YOLOv3 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager + """A context manager and decorator for handling exceptions with optional custom messages.""" + def __init__(self, msg=""): """Initializes TryExcept with optional custom message, used as decorator or context manager for exception handling. diff --git a/utils/activations.py b/utils/activations.py index 95661bf1dd..a4ff9e48f7 100644 --- a/utils/activations.py +++ b/utils/activations.py @@ -7,7 +7,8 @@ class SiLU(nn.Module): - # SiLU activation https://arxiv.org/pdf/1606.08415.pdf + """Applies the SiLU activation function to the input tensor as described in https://arxiv.org/pdf/1606.08415.pdf.""" + @staticmethod def forward(x): """Applies the SiLU activation function, as detailed in https://arxiv.org/pdf/1606.08415.pdf, on input tensor @@ -17,7 +18,8 @@ def forward(x): class Hardswish(nn.Module): - # Hard-SiLU activation + """Applies the Hardswish activation function to the input tensor `x`.""" + @staticmethod def forward(x): """Applies Hardswish activation, suitable for TorchScript, CoreML, ONNX, modifying input `x` as per Hard-SiLU @@ -27,7 +29,8 @@ def forward(x): class Mish(nn.Module): - # Mish activation https://github.com/digantamisra98/Mish + """Applies the Mish activation function to improve model performance; see https://github.com/digantamisra98/Mish.""" + @staticmethod def forward(x): """ @@ -39,8 +42,11 @@ def forward(x): class MemoryEfficientMish(nn.Module): - # Mish activation memory-efficient + """Applies the memory-efficient Mish activation function for improved model performance and reduced memory usage.""" + class F(torch.autograd.Function): + """Memory-efficient implementation of the Mish activation function for enhanced model performance.""" + @staticmethod def forward(ctx, x): """Applies the Mish activation function in a memory-efficient manner, useful for enhancing model @@ -65,7 +71,8 @@ def forward(self, x): class FReLU(nn.Module): - # FReLU activation https://arxiv.org/abs/2007.11824 + """Implements the FReLU activation, combining ReLU and convolution from https://arxiv.org/abs/2007.11824.""" + def __init__(self, c1, k=3): # ch_in, kernel """Initializes FReLU with specified channel size and kernel, implementing activation from https://arxiv.org/abs/2007.11824. diff --git a/utils/augmentations.py b/utils/augmentations.py index 539b55304a..cf11b0c525 100644 --- a/utils/augmentations.py +++ b/utils/augmentations.py @@ -18,7 +18,8 @@ class Albumentations: - # YOLOv3 Albumentations class (optional, only used if package is installed) + """Provides optional image augmentation for YOLOv3 using the Albumentations library if installed.""" + def __init__(self, size=640): """Initializes Albumentations class for optional YOLOv3 data augmentation with default size 640.""" self.transform = None @@ -363,7 +364,8 @@ def classify_transforms(size=224): class LetterBox: - # YOLOv3 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + """Resizes and pads images to a specified size while maintaining aspect ratio.""" + def __init__(self, size=(640, 640), auto=False, stride=32): """Initializes LetterBox for YOLOv3 image preprocessing with optional auto-sizing and stride; `size` can be int or tuple. @@ -388,7 +390,8 @@ def __call__(self, im): # im = np.array HWC class CenterCrop: - # YOLOv3 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()]) + """Crops the center of an image to a specified size, maintaining aspect ratio.""" + def __init__(self, size=640): """Initializes a CenterCrop object for YOLOv3, to crop images to a specified size, with default 640x640.""" super().__init__() @@ -403,7 +406,8 @@ def __call__(self, im): # im = np.array HWC class ToTensor: - # YOLOv3 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + """Converts a BGR image in numpy format to a PyTorch tensor in RGB format, with optional half precision.""" + def __init__(self, half=False): """Initializes ToTensor class for YOLOv3 image preprocessing to convert images to PyTorch tensors, optionally in half precision. diff --git a/utils/dataloaders.py b/utils/dataloaders.py index dc9ff22f3c..4fef716250 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -226,7 +226,8 @@ def __iter__(self): class LoadScreenshots: - # YOLOv3 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"` + """Loads screenshots as input data for YOLOv3, capturing screen regions specified by coordinates and dimensions.""" + def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None): """Initializes a screenshot dataloader for YOLOv3; source format: [screen_number left top width height], default img_size=640, stride=32. @@ -278,7 +279,8 @@ def __next__(self): class LoadImages: - # YOLOv3 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` + """Loads images and videos for YOLOv3 from various sources, including directories and '*.txt' path lists.""" + def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): """Initializes the data loader for YOLOv3, supporting image, video, directory, and '*.txt' path lists with customizable image sizing. @@ -389,7 +391,8 @@ def __len__(self): class LoadStreams: - # YOLOv3 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams` + """Loads video streams for YOLOv3 inference, supporting multiple sources and customizable frame sizes.""" + def __init__(self, sources="file.streams", img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): """Initializes a stream loader for YOLOv3, handling video sources or files with customizable frame sizes and intervals. @@ -492,7 +495,8 @@ def img2label_paths(img_paths): class LoadImagesAndLabels(Dataset): - # YOLOv3 train_loader/val_loader, loads images and labels for training and validation + """Loads images and labels for YOLOv3 training and validation with support for augmentations and caching.""" + cache_version = 0.6 # dataset labels *.cache version rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] diff --git a/utils/general.py b/utils/general.py index afee396906..812720b7bc 100644 --- a/utils/general.py +++ b/utils/general.py @@ -176,7 +176,8 @@ def user_config_dir(dir="Ultralytics", env_var="YOLOV5_CONFIG_DIR"): class Profile(contextlib.ContextDecorator): - # YOLOv3 Profile class. Usage: @Profile() decorator or 'with Profile():' context manager + """Profiles code execution time, usable as a context manager or decorator for performance monitoring.""" + def __init__(self, t=0.0): """Initializes a profiling context for YOLOv3 with optional timing threshold `t` and checks CUDA availability. @@ -204,7 +205,8 @@ def time(self): class Timeout(contextlib.ContextDecorator): - # YOLOv3 Timeout class. Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager + """Enforces a timeout on code execution, raising TimeoutError on expiry.""" + def __init__(self, seconds, *, timeout_msg="", suppress_timeout_errors=True): """Initializes a timeout context/decorator with specified duration, custom message, and error handling option. @@ -232,7 +234,8 @@ def __exit__(self, exc_type, exc_val, exc_tb): class WorkingDirectory(contextlib.ContextDecorator): - # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager + """Context manager to temporarily change the working directory, reverting to the original on exit.""" + def __init__(self, new_dir): """Initializes context manager to temporarily change working directory, reverting on exit.""" self.dir = new_dir # new dir diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 0a15554cfa..35fa66ad89 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -63,7 +63,8 @@ def SummaryWriter(*args): class Loggers: - # YOLOv3 Loggers class + """Manages logging for training and validation using TensorBoard, Weights & Biases, ClearML, and Comet ML.""" + def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS): """Initializes YOLOv3 logging with directory, weights, options, hyperparameters, and includes specified loggers. diff --git a/utils/loss.py b/utils/loss.py index 148f5118e5..3d3e1208b3 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -14,7 +14,8 @@ def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#iss class BCEBlurWithLogitsLoss(nn.Module): - # BCEwithLogitLoss() with reduced missing label effects. + """Implements BCEWithLogitsLoss with adjustments to mitigate missing label effects using an alpha parameter.""" + def __init__(self, alpha=0.05): """Initializes BCEBlurWithLogitsLoss with alpha to reduce missing label effects; default alpha is 0.05.""" super().__init__() @@ -35,7 +36,8 @@ def forward(self, pred, true): class FocalLoss(nn.Module): - # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + """Implements Focal Loss to address class imbalance by modulating the loss based on prediction confidence.""" + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): """Initializes FocalLoss with specified loss function, gamma, and alpha for enhanced training on imbalanced datasets. @@ -71,7 +73,8 @@ def forward(self, pred, true): class QFocalLoss(nn.Module): - # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + """Implements Quality Focal Loss to handle class imbalance with a modulating factor and alpha.""" + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): """Initializes QFocalLoss with specified loss function, gamma, and alpha for element-wise focal loss application. @@ -103,6 +106,8 @@ def forward(self, pred, true): class ComputeLoss: + """Computes the total loss for YOLO models by aggregating classification, box regression, and objectness losses.""" + sort_obj_iou = False # Compute losses diff --git a/utils/metrics.py b/utils/metrics.py index 20d9928359..f79bdfbdc4 100644 --- a/utils/metrics.py +++ b/utils/metrics.py @@ -122,7 +122,8 @@ def compute_ap(recall, precision): class ConfusionMatrix: - # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix + """Computes and visualizes a confusion matrix for object detection tasks with configurable thresholds.""" + def __init__(self, nc, conf=0.25, iou_thres=0.45): """Initializes confusion matrix for object detection with adjustable confidence and IoU thresholds.""" self.matrix = np.zeros((nc + 1, nc + 1)) diff --git a/utils/plots.py b/utils/plots.py index 9bb865dcd3..ad5f4ea6d3 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -29,7 +29,8 @@ class Colors: - # Ultralytics color palette https://ultralytics.com/ + """Provides a color palette and methods to convert indices to RGB or BGR color tuples.""" + def __init__(self): """Initializes the Colors class with a palette from the Ultralytics color palette.""" hexs = ( diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py index 0343291d68..4b1fba08bf 100644 --- a/utils/segment/dataloaders.py +++ b/utils/segment/dataloaders.py @@ -82,6 +82,8 @@ def create_dataloader( class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing + """Loads images, labels, and masks for training/testing with optional augmentations including mosaic and mixup.""" + def __init__( self, path, diff --git a/utils/segment/loss.py b/utils/segment/loss.py index 1871d5662e..4e204a42c8 100644 --- a/utils/segment/loss.py +++ b/utils/segment/loss.py @@ -12,7 +12,8 @@ class ComputeLoss: - # Compute losses + """Computes classification, box regression, objectness, and segmentation losses for YOLOv3 model predictions.""" + def __init__(self, model, autobalance=False, overlap=False): """Initializes ComputeLoss with model settings, optional autobalancing, and overlap handling.""" self.sort_obj_iou = False diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py index d4689ca015..9451cc9324 100644 --- a/utils/segment/metrics.py +++ b/utils/segment/metrics.py @@ -54,6 +54,8 @@ def ap_per_class_box_and_mask( class Metric: + """Represents model evaluation metrics including precision, recall, F1 score, and average precision (AP) values.""" + def __init__(self) -> None: """Initializes Metric class attributes for precision, recall, F1 score, AP values, and AP class indices.""" self.p = [] # (nc, ) diff --git a/utils/torch_utils.py b/utils/torch_utils.py index b3eb9871e9..0b1d69a9ab 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -422,7 +422,8 @@ def smart_resume(ckpt, optimizer, ema=None, weights="yolov5s.pt", epochs=300, re class EarlyStopping: - # YOLOv3 simple early stopper + """Monitors training to halt if no improvement in fitness metric is observed for a specified number of epochs.""" + def __init__(self, patience=30): """Initializes EarlyStopping to monitor training, halting if no improvement in 'patience' epochs, defaulting to 30.