diff --git a/.github/workflows/merge-main-into-prs.yml b/.github/workflows/merge-main-into-prs.yml
index db6a180295..372d888241 100644
--- a/.github/workflows/merge-main-into-prs.yml
+++ b/.github/workflows/merge-main-into-prs.yml
@@ -34,11 +34,11 @@ jobs:
 
           g = Github(os.getenv('GITHUB_TOKEN'))
           repo = g.get_repo(os.getenv('GITHUB_REPOSITORY'))
-          
+
           # Fetch the default branch name
           default_branch_name = repo.default_branch
           default_branch = repo.get_branch(default_branch_name)
-          
+
           for pr in repo.get_pulls(state='open', sort='created'):
               try:
                   # Get full names for repositories and branches
diff --git a/README.md b/README.md
index e4314803be..14bc041938 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
     <a href="https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml"><img src="https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg" alt="YOLOv3 CI"></a>
     <a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="YOLOv3 Citation"></a>
     <a href="https://hub.docker.com/r/ultralytics/yolov3"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov3?logo=docker" alt="Docker Pulls"></a>
-    <a href="https://ultralytics.com/discord"><img alt="Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a> <a href="https://community.ultralytics.com"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a>
+    <a href="https://ultralytics.com/discord"><img alt="Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a> <a href="https://community.ultralytics.com"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a> <a href="https://reddit.com/r/ultralytics"><img alt="Ultralytics Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/ultralytics?style=flat&logo=reddit&logoColor=white&label=Reddit&color=blue"></a>
     <br>
     <a href="https://bit.ly/yolov5-paperspace-notebook"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run on Gradient"></a>
     <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
diff --git a/export.py b/export.py
index fad82dbac6..9c9a0d9a77 100644
--- a/export.py
+++ b/export.py
@@ -91,6 +91,8 @@
 
 
 class iOSModel(torch.nn.Module):
+    """Exports a PyTorch model to an iOS-compatible format with normalized input dimensions and class configurations."""
+
     def __init__(self, model, im):
         """
         Initializes an iOSModel with normalized input dimensions and number of classes from a PyTorch model.
diff --git a/models/common.py b/models/common.py
index 295a04d933..9d0818d62f 100644
--- a/models/common.py
+++ b/models/common.py
@@ -55,7 +55,8 @@ def autopad(k, p=None, d=1):  # kernel, padding, dilation
 
 
 class Conv(nn.Module):
-    # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
+    """A standard Conv2D layer with batch normalization and optional activation for neural networks."""
+
     default_act = nn.SiLU()  # default activation
 
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
@@ -81,7 +82,8 @@ def forward_fuse(self, x):
 
 
 class DWConv(Conv):
-    # Depth-wise convolution
+    """Implements depth-wise convolution for efficient spatial feature extraction in neural networks."""
+
     def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
         """Initializes depth-wise convolution with optional activation; parameters are channel in/out, kernel, stride,
         dilation.
@@ -90,7 +92,8 @@ def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, s
 
 
 class DWConvTranspose2d(nn.ConvTranspose2d):
-    # Depth-wise transpose convolution
+    """Implements a depth-wise transpose convolution layer with specified channels, kernel size, stride, and padding."""
+
     def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
         """Initializes a depth-wise or transpose convolution layer with specified in/out channels, kernel size, stride,
         and padding.
@@ -99,7 +102,8 @@ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stri
 
 
 class TransformerLayer(nn.Module):
-    # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
+    """Transformer layer with multi-head attention and feed-forward network, optimized by removing LayerNorm."""
+
     def __init__(self, c, num_heads):
         """Initializes a Transformer layer as per https://arxiv.org/abs/2010.11929, sans LayerNorm, with specified
         embedding dimension and number of heads.
@@ -122,7 +126,8 @@ def forward(self, x):
 
 
 class TransformerBlock(nn.Module):
-    # Vision Transformer https://arxiv.org/abs/2010.11929
+    """Implements a Vision Transformer block with transformer layers; https://arxiv.org/abs/2010.11929."""
+
     def __init__(self, c1, c2, num_heads, num_layers):
         """Initializes a Transformer block with optional convolution, linear, and transformer layers."""
         super().__init__()
@@ -143,7 +148,8 @@ def forward(self, x):
 
 
 class Bottleneck(nn.Module):
-    # Standard bottleneck
+    """Implements a bottleneck layer with optional shortcut for efficient feature extraction in neural networks."""
+
     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
         """Initializes a standard bottleneck layer with optional shortcut; args: input channels (c1), output channels
         (c2), shortcut (bool), groups (g), expansion factor (e).
@@ -162,7 +168,8 @@ def forward(self, x):
 
 
 class BottleneckCSP(nn.Module):
-    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    """Implements a CSP Bottleneck layer for feature extraction."""
+
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
         """Initializes CSP Bottleneck with channel in/out, optional shortcut, groups, expansion; see
         https://github.com/WongKinYiu/CrossStagePartialNetworks.
@@ -187,7 +194,8 @@ def forward(self, x):
 
 
 class CrossConv(nn.Module):
-    # Cross Convolution Downsample
+    """Implements Cross Convolution Downsample with 1D and 2D convolutions and optional shortcut."""
+
     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
         """Initializes CrossConv with downsample options, combining 1D and 2D convolutions, optional shortcut if
         input/output channels match.
@@ -204,7 +212,8 @@ def forward(self, x):
 
 
 class C3(nn.Module):
-    # CSP Bottleneck with 3 convolutions
+    """Implements a CSP Bottleneck with 3 convolutions, optional shortcuts, group convolutions, and expansion factor."""
+
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
         """Initializes CSP Bottleneck with 3 convolutions, optional shortcuts, group convolutions, and expansion
         factor.
@@ -222,7 +231,8 @@ def forward(self, x):
 
 
 class C3x(C3):
-    # C3 module with cross-convolutions
+    """Extends the C3 module with cross-convolutions for enhanced feature extraction and flexibility."""
+
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
         """Initializes a C3x module with cross-convolutions, extending the C3 module with customizable parameters."""
         super().__init__(c1, c2, n, shortcut, g, e)
@@ -231,7 +241,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
 
 
 class C3TR(C3):
-    # C3 module with TransformerBlock()
+    """C3 module with TransformerBlock for integrating attention mechanisms in CNNs."""
+
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
         """Initializes a C3 module with TransformerBlock, extending C3 for attention mechanisms."""
         super().__init__(c1, c2, n, shortcut, g, e)
@@ -240,7 +251,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
 
 
 class C3SPP(C3):
-    # C3 module with SPP()
+    """Extends C3 with Spatial Pyramid Pooling (SPP) for enhanced feature extraction in CNNs."""
+
     def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
         """Initializes C3SPP module, extending C3 with Spatial Pyramid Pooling for enhanced feature extraction."""
         super().__init__(c1, c2, n, shortcut, g, e)
@@ -249,7 +261,8 @@ def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
 
 
 class C3Ghost(C3):
-    # C3 module with GhostBottleneck()
+    """Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in neural networks."""
+
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
         """Initializes C3Ghost module with Ghost Bottlenecks for efficient feature extraction."""
         super().__init__(c1, c2, n, shortcut, g, e)
@@ -258,7 +271,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
 
 
 class SPP(nn.Module):
-    # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
+    """Implements Spatial Pyramid Pooling (SPP) for enhanced feature extraction; see https://arxiv.org/abs/1406.4729."""
+
     def __init__(self, c1, c2, k=(5, 9, 13)):
         """
         Initializes SPP layer with specified channels and kernels.
@@ -284,7 +298,8 @@ def forward(self, x):
 
 
 class SPPF(nn.Module):
-    # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv3 by Glenn Jocher
+    """Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv3 models."""
+
     def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
         """Initializes the SPPF layer with specified input/output channels and kernel size for YOLOv3."""
         super().__init__()
@@ -306,7 +321,8 @@ def forward(self, x):
 
 
 class Focus(nn.Module):
-    # Focus wh information into c-space
+    """Focuses spatial information into channel space using configurable convolution."""
+
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
         """Initializes Focus module to focus width and height information into channel space with configurable
         convolution parameters.
@@ -322,7 +338,8 @@ def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
 
 
 class GhostConv(nn.Module):
-    # Ghost Convolution https://github.com/huawei-noah/ghostnet
+    """Implements Ghost Convolution for efficient feature extraction; see github.com/huawei-noah/ghostnet."""
+
     def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
         """Initializes GhostConv with in/out channels, kernel size, stride, groups; see
         https://github.com/huawei-noah/ghostnet.
@@ -339,7 +356,8 @@ def forward(self, x):
 
 
 class GhostBottleneck(nn.Module):
-    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+    """Implements a Ghost Bottleneck layer for efficient feature extraction from GhostNet."""
+
     def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
         """Initializes GhostBottleneck module with in/out channels, kernel size, and stride; see
         https://github.com/huawei-noah/ghostnet.
@@ -361,7 +379,8 @@ def forward(self, x):
 
 
 class Contract(nn.Module):
-    # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
+    """Contracts spatial dimensions into channels, e.g., (1,64,80,80) to (1,256,40,40) with a specified gain."""
+
     def __init__(self, gain=2):
         """Initializes Contract module to refine input dimensions, e.g., from (1,64,80,80) to (1,256,40,40) with a
         default gain of 2.
@@ -381,7 +400,8 @@ def forward(self, x):
 
 
 class Expand(nn.Module):
-    # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
+    """Expands spatial dimensions of input tensor by a factor while reducing channels correspondingly."""
+
     def __init__(self, gain=2):
         """Initializes Expand module to increase spatial dimensions by factor `gain` while reducing channels
         correspondingly.
@@ -401,7 +421,8 @@ def forward(self, x):
 
 
 class Concat(nn.Module):
-    # Concatenate a list of tensors along dimension
+    """Concatenates a list of tensors along a specified dimension for efficient feature aggregation."""
+
     def __init__(self, dimension=1):
         """Initializes a module to concatenate tensors along a specified dimension."""
         super().__init__()
@@ -415,7 +436,8 @@ def forward(self, x):
 
 
 class DetectMultiBackend(nn.Module):
-    # YOLOv3 MultiBackend class for python inference on various backends
+    """YOLOv3 multi-backend class for inference on frameworks like PyTorch, ONNX, TensorRT, and more."""
+
     def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):
         """Initializes multi-backend detection with options for various frameworks and devices, also handles model
         download.
@@ -749,7 +771,8 @@ def _load_metadata(f=Path("path/to/meta.yaml")):
 
 
 class AutoShape(nn.Module):
-    # YOLOv3 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+    """A wrapper for YOLOv3 models to handle diverse input types with preprocessing, inference, and NMS."""
+
     conf = 0.25  # NMS confidence threshold
     iou = 0.45  # NMS IoU threshold
     agnostic = False  # NMS class-agnostic
@@ -857,7 +880,8 @@ def forward(self, ims, size=640, augment=False, profile=False):
 
 
 class Detections:
-    # YOLOv3 detections class for inference results
+    """Handles YOLOv3 detection results with methods for visualization, saving, cropping, and format conversion."""
+
     def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
         """Initializes YOLOv3 detections with image data, predictions, filenames, profiling times, class names, and
         shapes.
@@ -1011,7 +1035,8 @@ def __repr__(self):
 
 
 class Proto(nn.Module):
-    # YOLOv3 mask Proto module for segmentation models
+    """Implements the YOLOv3 mask Proto module for segmentation, including convolutional layers and upsampling."""
+
     def __init__(self, c1, c_=256, c2=32):  # ch_in, number of protos, number of masks
         """Initializes the Proto module for YOLOv3 segmentation, setting up convolutional layers and upsampling."""
         super().__init__()
@@ -1026,7 +1051,8 @@ def forward(self, x):
 
 
 class Classify(nn.Module):
-    # YOLOv3 classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    """Performs image classification using YOLOv3-based architecture with convolutional, pooling, and dropout layers."""
+
     def __init__(
         self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0
     ):  # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
diff --git a/models/experimental.py b/models/experimental.py
index 305434ec87..7c1ed58608 100644
--- a/models/experimental.py
+++ b/models/experimental.py
@@ -11,7 +11,8 @@
 
 
 class Sum(nn.Module):
-    # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
+    """Computes the weighted or unweighted sum of multiple input layers per https://arxiv.org/abs/1911.09070."""
+
     def __init__(self, n, weight=False):  # n: number of inputs
         """
         Initializes a module to compute weighted/unweighted sum of n inputs, with optional learning weights.
@@ -42,7 +43,8 @@ def forward(self, x):
 
 
 class MixConv2d(nn.Module):
-    # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
+    """Implements mixed depth-wise convolutions for efficient neural networks; see https://arxiv.org/abs/1907.09595."""
+
     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):  # ch_in, ch_out, kernel, stride, ch_strategy
         """Initializes MixConv2d with mixed depth-wise convolution layers; details at
         https://arxiv.org/abs/1907.09595.
@@ -72,7 +74,8 @@ def forward(self, x):
 
 
 class Ensemble(nn.ModuleList):
-    # Ensemble of models
+    """Combines outputs from multiple models to improve inference results."""
+
     def __init__(self):
         """Initializes an ensemble of models to combine their outputs."""
         super().__init__()
diff --git a/models/tf.py b/models/tf.py
index 13edcdcfca..b4c47e8e73 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -49,7 +49,8 @@
 
 
 class TFBN(keras.layers.Layer):
-    # TensorFlow BatchNormalization wrapper
+    """A TensorFlow BatchNormalization wrapper layer initialized with specific weights for YOLOv3 models."""
+
     def __init__(self, w=None):
         """Initializes TFBN with weights, wrapping TensorFlow's BatchNormalization layer with specific initializers."""
         super().__init__()
@@ -67,7 +68,8 @@ def call(self, inputs):
 
 
 class TFPad(keras.layers.Layer):
-    # Pad inputs in spatial dimensions 1 and 2
+    """Pads inputs in spatial dimensions 1 and 2 using specified padding width as an int or (int, int) tuple/list."""
+
     def __init__(self, pad):
         """Initializes a padding layer for spatial dimensions 1 and 2, with `pad` as int or (int, int) tuple/list."""
         super().__init__()
@@ -84,7 +86,8 @@ def call(self, inputs):
 
 
 class TFConv(keras.layers.Layer):
-    # Standard convolution
+    """Implements a standard convolutional layer with optional batch normalization and activation for TensorFlow."""
+
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
         """Initializes a convolutional layer with customizable filters, kernel size, stride, padding, groups, and
         activation.
@@ -112,7 +115,8 @@ def call(self, inputs):
 
 
 class TFDWConv(keras.layers.Layer):
-    # Depthwise convolution
+    """Implements a depthwise convolutional layer with optional batch normalization and activation for TensorFlow."""
+
     def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
         """Initializes a depthwise convolutional layer with optional batch normalization and activation."""
         super().__init__()
@@ -136,7 +140,8 @@ def call(self, inputs):
 
 
 class TFDWConvTranspose2d(keras.layers.Layer):
-    # Depthwise ConvTranspose2d
+    """Implements a depthwise transposed convolutional layer for TensorFlow with equal input and output channels."""
+
     def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
         """Initializes TFDWConvTranspose2d with ch_in=c1=ch_out, k=4, p1=1; sets up depthwise Conv2DTranspose layers."""
         super().__init__()
@@ -166,7 +171,8 @@ def call(self, inputs):
 
 
 class TFFocus(keras.layers.Layer):
-    # Focus wh information into c-space
+    """Focuses spatial information into channel space using a convolutional layer for efficient feature extraction."""
+
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
         """Initializes TFFocus layer for efficient information focusing into channel-space with customizable convolution
         parameters.
@@ -183,7 +189,8 @@ def call(self, inputs):  # x(b,w,h,c) -> y(b,w/2,h/2,4c)
 
 
 class TFBottleneck(keras.layers.Layer):
-    # Standard bottleneck
+    """A TensorFlow bottleneck layer with optional shortcut connections, channel expansion, and group convolutions."""
+
     def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):  # ch_in, ch_out, shortcut, groups, expansion
         """Initializes a standard bottleneck layer with optional shortcut, channel expansion, and group convolutions."""
         super().__init__()
@@ -200,7 +207,8 @@ def call(self, inputs):
 
 
 class TFCrossConv(keras.layers.Layer):
-    # Cross Convolution
+    """Implements a cross convolutional layer with customizable channels, kernel size, stride, groups, and shortcut."""
+
     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
         """Initializes cross convolutional layer with parameters for channel sizes, kernel size, stride, groups,
         expansion factor, shortcut option, and weights.
@@ -217,7 +225,8 @@ def call(self, inputs):
 
 
 class TFConv2d(keras.layers.Layer):
-    # Substitution for PyTorch nn.Conv2D
+    """Implements a TensorFlow 2.2+ Conv2D layer as a substitute for PyTorch's Conv2D with customizable parameters."""
+
     def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
         """Initializes TFConv2d layer for TensorFlow 2.2+, substituting PyTorch Conv2D; c1, c2: channels, k: kernel
         size, s: stride.
@@ -240,7 +249,8 @@ def call(self, inputs):
 
 
 class TFBottleneckCSP(keras.layers.Layer):
-    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    """Implements a Cross Stage Partial (CSP) Bottleneck layer for efficient feature extraction in neural networks."""
+
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
         """Initializes CSP Bottleneck layer with channel configurations and optional shortcut, groups, expansion, and
         weights.
@@ -263,7 +273,8 @@ def call(self, inputs):
 
 
 class TFC3(keras.layers.Layer):
-    # CSP Bottleneck with 3 convolutions
+    """CSP Bottleneck layer with 3 convolutions for enhanced feature extraction and integration in TensorFlow models."""
+
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
         """Initializes a CSP Bottleneck layer with 3 convolutions for channel manipulation and feature integration."""
         super().__init__()
@@ -281,7 +292,8 @@ def call(self, inputs):
 
 
 class TFC3x(keras.layers.Layer):
-    # 3 module with cross-convolutions
+    """Implements a CSP Bottleneck layer with cross-convolutions for enhanced feature extraction in YOLOv3 models."""
+
     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
         """Initializes a TFC3x layer with cross-convolutions, expanding and concatenating features for given channel
         inputs and outputs.
@@ -301,7 +313,8 @@ def call(self, inputs):
 
 
 class TFSPP(keras.layers.Layer):
-    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    """Implements Spatial Pyramid Pooling (SPP) for YOLOv3-SPP with configurable channels and kernel sizes."""
+
     def __init__(self, c1, c2, k=(5, 9, 13), w=None):
         """Initializes a Spatial Pyramid Pooling layer for YOLOv3-SPP with configurable in/out channels and kernel
         sizes.
@@ -319,7 +332,8 @@ def call(self, inputs):
 
 
 class TFSPPF(keras.layers.Layer):
-    # Spatial pyramid pooling-Fast layer
+    """Implements a fast spatial pyramid pooling layer for efficient multi-scale feature extraction in YOLOv3 models."""
+
     def __init__(self, c1, c2, k=5, w=None):
         """Initializes a Spatial Pyramid Pooling-Fast layer with specified channels, kernel size, and optional
         weights.
@@ -339,7 +353,8 @@ def call(self, inputs):
 
 
 class TFDetect(keras.layers.Layer):
-    # TF YOLOv3 Detect layer
+    """Implements YOLOv3 detection layer in TensorFlow for object detection with configurable classes and anchors."""
+
     def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
         """Initializes a YOLOv3 detection layer with specified classes, anchors, channels, image size, and weights."""
         super().__init__()
@@ -393,7 +408,8 @@ def _make_grid(nx=20, ny=20):
 
 
 class TFSegment(TFDetect):
-    # YOLOv3 Segment head for segmentation models
+    """Implements YOLOv3 segmentation head for object detection and segmentation tasks using TensorFlow."""
+
     def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
         """Initializes a YOLOv3 Segment head with customizable parameters for segmentation models."""
         super().__init__(nc, anchors, ch, imgsz, w)
@@ -414,6 +430,8 @@ def call(self, x):
 
 
 class TFProto(keras.layers.Layer):
+    """Implements a TensorFlow layer for feature processing with convolution and upsample operations."""
+
     def __init__(self, c1, c_=256, c2=32, w=None):
         """Initializes a TFProto layer with convolution and upsample operations for feature processing."""
         super().__init__()
@@ -428,7 +446,8 @@ def call(self, inputs):
 
 
 class TFUpsample(keras.layers.Layer):
-    # TF version of torch.nn.Upsample()
+    """Implements an upsample layer using TensorFlow with specified size, scale factor, and interpolation mode."""
+
     def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
         """Initializes an upsample layer with specific size, doubling scale factor (>0, even), interpolation mode, and
         optional weights.
@@ -447,7 +466,8 @@ def call(self, inputs):
 
 
 class TFConcat(keras.layers.Layer):
-    # TF version of torch.concat()
+    """Concatenates input tensors along the specified dimension (NHWC format) using TensorFlow."""
+
     def __init__(self, dimension=1, w=None):
         """Initializes a TensorFlow layer to concatenate tensors along the NHWC dimension, requiring dimension=1."""
         super().__init__()
@@ -534,7 +554,8 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
 
 
 class TFModel:
-    # TF YOLOv3 model
+    """TensorFlow implementation of YOLOv3 for object detection, supporting Keras and TFLite models."""
+
     def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)):  # model, channels, classes
         """Initializes TF YOLOv3 model with config, channels, classes, optional pre-loaded model, and input image
         size.
@@ -606,7 +627,8 @@ def _xywh2xyxy(xywh):
 
 
 class AgnosticNMS(keras.layers.Layer):
-    # TF Agnostic NMS
+    """Applies class-agnostic non-maximum suppression (NMS) to filter detections by IoU and confidence thresholds."""
+
     def call(self, input, topk_all, iou_thres, conf_thres):
         """Applies non-maximum suppression (NMS) to filter detections based on IoU, confidence thresholds, and top-K."""
         return tf.map_fn(
diff --git a/models/yolo.py b/models/yolo.py
index 8c77f04a66..49cd9c74fc 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -42,7 +42,8 @@
 
 
 class Detect(nn.Module):
-    # YOLOv3 Detect head for detection models
+    """YOLOv3 Detect head for processing detection model outputs, including grid and anchor grid generation."""
+
     stride = None  # strides computed during build
     dynamic = False  # force grid reconstruction
     export = False  # export mode
@@ -105,7 +106,8 @@ def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version
 
 
 class Segment(Detect):
-    # YOLOv3 Segment head for segmentation models
+    """YOLOv3 Segment head for segmentation models, adding mask prediction and prototyping to detection."""
+
     def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
         """Initializes the YOLOv3 segment head with customizable class count, anchors, masks, protos, channels, and
         inplace option.
@@ -128,7 +130,8 @@ def forward(self, x):
 
 
 class BaseModel(nn.Module):
-    # YOLOv3 base model
+    """Implements the base YOLOv3 model architecture for object detection tasks."""
+
     def forward(self, x, profile=False, visualize=False):
         """Performs a single-scale inference or training step on input `x`, with options for profiling and
         visualization.
@@ -191,7 +194,8 @@ def _apply(self, fn):
 
 
 class DetectionModel(BaseModel):
-    # YOLOv3 detection model
+    """YOLOv3 detection model class for initializing and processing detection models with configurable parameters."""
+
     def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):  # model, input channels, number of classes
         """Initializes YOLOv3 detection model with configurable YAML, input channels, classes, and anchors."""
         super().__init__()
@@ -303,14 +307,16 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
 
 
 class SegmentationModel(DetectionModel):
-    # YOLOv3 segmentation model
+    """Implements a YOLOv3-based segmentation model with customizable configuration, channels, classes, and anchors."""
+
     def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
         """Initializes a SegmentationModel with optional configuration, channel, class count, and anchors parameters."""
         super().__init__(cfg, ch, nc, anchors)
 
 
 class ClassificationModel(BaseModel):
-    # YOLOv3 classification model
+    """Implements a YOLOv3-based image classification model with configurable architecture and class count."""
+
     def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):  # yaml, model, number of classes, cutoff index
         """Initializes a ClassificationModel from a detection model or YAML, with configurable classes and cutoff."""
         super().__init__()
diff --git a/utils/__init__.py b/utils/__init__.py
index a34a38abef..918856178c 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -12,7 +12,8 @@ def emojis(str=""):
 
 
 class TryExcept(contextlib.ContextDecorator):
-    # YOLOv3 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
+    """A context manager and decorator for handling exceptions with optional custom messages."""
+
     def __init__(self, msg=""):
         """Initializes TryExcept with optional custom message, used as decorator or context manager for exception
         handling.
diff --git a/utils/activations.py b/utils/activations.py
index 95661bf1dd..a4ff9e48f7 100644
--- a/utils/activations.py
+++ b/utils/activations.py
@@ -7,7 +7,8 @@
 
 
 class SiLU(nn.Module):
-    # SiLU activation https://arxiv.org/pdf/1606.08415.pdf
+    """Applies the SiLU activation function to the input tensor as described in https://arxiv.org/pdf/1606.08415.pdf."""
+
     @staticmethod
     def forward(x):
         """Applies the SiLU activation function, as detailed in https://arxiv.org/pdf/1606.08415.pdf, on input tensor
@@ -17,7 +18,8 @@ def forward(x):
 
 
 class Hardswish(nn.Module):
-    # Hard-SiLU activation
+    """Applies the Hardswish activation function to the input tensor `x`."""
+
     @staticmethod
     def forward(x):
         """Applies Hardswish activation, suitable for TorchScript, CoreML, ONNX, modifying input `x` as per Hard-SiLU
@@ -27,7 +29,8 @@ def forward(x):
 
 
 class Mish(nn.Module):
-    # Mish activation https://github.com/digantamisra98/Mish
+    """Applies the Mish activation function to improve model performance; see https://github.com/digantamisra98/Mish."""
+
     @staticmethod
     def forward(x):
         """
@@ -39,8 +42,11 @@ def forward(x):
 
 
 class MemoryEfficientMish(nn.Module):
-    # Mish activation memory-efficient
+    """Applies the memory-efficient Mish activation function for improved model performance and reduced memory usage."""
+
     class F(torch.autograd.Function):
+        """Memory-efficient implementation of the Mish activation function for enhanced model performance."""
+
         @staticmethod
         def forward(ctx, x):
             """Applies the Mish activation function in a memory-efficient manner, useful for enhancing model
@@ -65,7 +71,8 @@ def forward(self, x):
 
 
 class FReLU(nn.Module):
-    # FReLU activation https://arxiv.org/abs/2007.11824
+    """Implements the FReLU activation, combining ReLU and convolution from https://arxiv.org/abs/2007.11824."""
+
     def __init__(self, c1, k=3):  # ch_in, kernel
         """Initializes FReLU with specified channel size and kernel, implementing activation from
         https://arxiv.org/abs/2007.11824.
diff --git a/utils/augmentations.py b/utils/augmentations.py
index 539b55304a..cf11b0c525 100644
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
@@ -18,7 +18,8 @@
 
 
 class Albumentations:
-    # YOLOv3 Albumentations class (optional, only used if package is installed)
+    """Provides optional image augmentation for YOLOv3 using the Albumentations library if installed."""
+
     def __init__(self, size=640):
         """Initializes Albumentations class for optional YOLOv3 data augmentation with default size 640."""
         self.transform = None
@@ -363,7 +364,8 @@ def classify_transforms(size=224):
 
 
 class LetterBox:
-    # YOLOv3 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
+    """Resizes and pads images to a specified size while maintaining aspect ratio."""
+
     def __init__(self, size=(640, 640), auto=False, stride=32):
         """Initializes LetterBox for YOLOv3 image preprocessing with optional auto-sizing and stride; `size` can be int
         or tuple.
@@ -388,7 +390,8 @@ def __call__(self, im):  # im = np.array HWC
 
 
 class CenterCrop:
-    # YOLOv3 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
+    """Crops the center of an image to a specified size, maintaining aspect ratio."""
+
     def __init__(self, size=640):
         """Initializes a CenterCrop object for YOLOv3, to crop images to a specified size, with default 640x640."""
         super().__init__()
@@ -403,7 +406,8 @@ def __call__(self, im):  # im = np.array HWC
 
 
 class ToTensor:
-    # YOLOv3 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
+    """Converts a BGR image in numpy format to a PyTorch tensor in RGB format, with optional half precision."""
+
     def __init__(self, half=False):
         """Initializes ToTensor class for YOLOv3 image preprocessing to convert images to PyTorch tensors, optionally in
         half precision.
diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index dc9ff22f3c..4fef716250 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -226,7 +226,8 @@ def __iter__(self):
 
 
 class LoadScreenshots:
-    # YOLOv3 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
+    """Loads screenshots as input data for YOLOv3, capturing screen regions specified by coordinates and dimensions."""
+
     def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
         """Initializes a screenshot dataloader for YOLOv3; source format: [screen_number left top width height], default
         img_size=640, stride=32.
@@ -278,7 +279,8 @@ def __next__(self):
 
 
 class LoadImages:
-    # YOLOv3 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
+    """Loads images and videos for YOLOv3 from various sources, including directories and '*.txt' path lists."""
+
     def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
         """Initializes the data loader for YOLOv3, supporting image, video, directory, and '*.txt' path lists with
         customizable image sizing.
@@ -389,7 +391,8 @@ def __len__(self):
 
 
 class LoadStreams:
-    # YOLOv3 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
+    """Loads video streams for YOLOv3 inference, supporting multiple sources and customizable frame sizes."""
+
     def __init__(self, sources="file.streams", img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
         """Initializes a stream loader for YOLOv3, handling video sources or files with customizable frame sizes and
         intervals.
@@ -492,7 +495,8 @@ def img2label_paths(img_paths):
 
 
 class LoadImagesAndLabels(Dataset):
-    # YOLOv3 train_loader/val_loader, loads images and labels for training and validation
+    """Loads images and labels for YOLOv3 training and validation with support for augmentations and caching."""
+
     cache_version = 0.6  # dataset labels *.cache version
     rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
 
diff --git a/utils/general.py b/utils/general.py
index afee396906..812720b7bc 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -176,7 +176,8 @@ def user_config_dir(dir="Ultralytics", env_var="YOLOV5_CONFIG_DIR"):
 
 
 class Profile(contextlib.ContextDecorator):
-    # YOLOv3 Profile class. Usage: @Profile() decorator or 'with Profile():' context manager
+    """Profiles code execution time, usable as a context manager or decorator for performance monitoring."""
+
     def __init__(self, t=0.0):
         """Initializes a profiling context for YOLOv3 with optional timing threshold `t` and checks CUDA
         availability.
@@ -204,7 +205,8 @@ def time(self):
 
 
 class Timeout(contextlib.ContextDecorator):
-    # YOLOv3 Timeout class. Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager
+    """Enforces a timeout on code execution, raising TimeoutError on expiry."""
+
     def __init__(self, seconds, *, timeout_msg="", suppress_timeout_errors=True):
         """Initializes a timeout context/decorator with specified duration, custom message, and error handling
         option.
@@ -232,7 +234,8 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
 
 class WorkingDirectory(contextlib.ContextDecorator):
-    # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager
+    """Context manager to temporarily change the working directory, reverting to the original on exit."""
+
     def __init__(self, new_dir):
         """Initializes context manager to temporarily change working directory, reverting on exit."""
         self.dir = new_dir  # new dir
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
index 0a15554cfa..35fa66ad89 100644
--- a/utils/loggers/__init__.py
+++ b/utils/loggers/__init__.py
@@ -63,7 +63,8 @@ def SummaryWriter(*args):
 
 
 class Loggers:
-    # YOLOv3 Loggers class
+    """Manages logging for training and validation using TensorBoard, Weights & Biases, ClearML, and Comet ML."""
+
     def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
         """Initializes YOLOv3 logging with directory, weights, options, hyperparameters, and includes specified
         loggers.
diff --git a/utils/loss.py b/utils/loss.py
index 148f5118e5..3d3e1208b3 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -14,7 +14,8 @@ def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#iss
 
 
 class BCEBlurWithLogitsLoss(nn.Module):
-    # BCEwithLogitLoss() with reduced missing label effects.
+    """Implements BCEWithLogitsLoss with adjustments to mitigate missing label effects using an alpha parameter."""
+
     def __init__(self, alpha=0.05):
         """Initializes BCEBlurWithLogitsLoss with alpha to reduce missing label effects; default alpha is 0.05."""
         super().__init__()
@@ -35,7 +36,8 @@ def forward(self, pred, true):
 
 
 class FocalLoss(nn.Module):
-    # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
+    """Implements Focal Loss to address class imbalance by modulating the loss based on prediction confidence."""
+
     def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
         """Initializes FocalLoss with specified loss function, gamma, and alpha for enhanced training on imbalanced
         datasets.
@@ -71,7 +73,8 @@ def forward(self, pred, true):
 
 
 class QFocalLoss(nn.Module):
-    # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
+    """Implements Quality Focal Loss to handle class imbalance with a modulating factor and alpha."""
+
     def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
         """Initializes QFocalLoss with specified loss function, gamma, and alpha for element-wise focal loss
         application.
@@ -103,6 +106,8 @@ def forward(self, pred, true):
 
 
 class ComputeLoss:
+    """Computes the total loss for YOLO models by aggregating classification, box regression, and objectness losses."""
+
     sort_obj_iou = False
 
     # Compute losses
diff --git a/utils/metrics.py b/utils/metrics.py
index 20d9928359..f79bdfbdc4 100644
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -122,7 +122,8 @@ def compute_ap(recall, precision):
 
 
 class ConfusionMatrix:
-    # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
+    """Computes and visualizes a confusion matrix for object detection tasks with configurable thresholds."""
+
     def __init__(self, nc, conf=0.25, iou_thres=0.45):
         """Initializes confusion matrix for object detection with adjustable confidence and IoU thresholds."""
         self.matrix = np.zeros((nc + 1, nc + 1))
diff --git a/utils/plots.py b/utils/plots.py
index 9bb865dcd3..ad5f4ea6d3 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -29,7 +29,8 @@
 
 
 class Colors:
-    # Ultralytics color palette https://ultralytics.com/
+    """Provides a color palette and methods to convert indices to RGB or BGR color tuples."""
+
     def __init__(self):
         """Initializes the Colors class with a palette from the Ultralytics color palette."""
         hexs = (
diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py
index 0343291d68..4b1fba08bf 100644
--- a/utils/segment/dataloaders.py
+++ b/utils/segment/dataloaders.py
@@ -82,6 +82,8 @@ def create_dataloader(
 
 
 class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels):  # for training/testing
+    """Loads images, labels, and masks for training/testing with optional augmentations including mosaic and mixup."""
+
     def __init__(
         self,
         path,
diff --git a/utils/segment/loss.py b/utils/segment/loss.py
index 1871d5662e..4e204a42c8 100644
--- a/utils/segment/loss.py
+++ b/utils/segment/loss.py
@@ -12,7 +12,8 @@
 
 
 class ComputeLoss:
-    # Compute losses
+    """Computes classification, box regression, objectness, and segmentation losses for YOLOv3 model predictions."""
+
     def __init__(self, model, autobalance=False, overlap=False):
         """Initializes ComputeLoss with model settings, optional autobalancing, and overlap handling."""
         self.sort_obj_iou = False
diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py
index d4689ca015..9451cc9324 100644
--- a/utils/segment/metrics.py
+++ b/utils/segment/metrics.py
@@ -54,6 +54,8 @@ def ap_per_class_box_and_mask(
 
 
 class Metric:
+    """Represents model evaluation metrics including precision, recall, F1 score, and average precision (AP) values."""
+
     def __init__(self) -> None:
         """Initializes Metric class attributes for precision, recall, F1 score, AP values, and AP class indices."""
         self.p = []  # (nc, )
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index b3eb9871e9..0b1d69a9ab 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -422,7 +422,8 @@ def smart_resume(ckpt, optimizer, ema=None, weights="yolov5s.pt", epochs=300, re
 
 
 class EarlyStopping:
-    # YOLOv3 simple early stopper
+    """Monitors training to halt if no improvement in fitness metric is observed for a specified number of epochs."""
+
     def __init__(self, patience=30):
         """Initializes EarlyStopping to monitor training, halting if no improvement in 'patience' epochs, defaulting to
         30.