Greenroom-Robotics · darrenjkt · Dec 12, 2024 · Dec 12, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,20 @@
+{
+	"name": "rt_detr",
+	"dockerComposeFile": [
+		"../docker-compose.yaml"
+	],
+	"service": "rt_detr",
+	"workspaceFolder": "/home/ros/RT-DETR",
+	"shutdownAction": "stopCompose",
+	"customizations": {
+		"vscode": {
+		  "settings": {
+			"remote.autoForwardPorts": false
+		},
+		"extensions": [
+			"ms-python.python",
+			"ms-python.debugpy"
+		]
+		}
+	}
+}
diff --git a/.gitignore b/.gitignore
@@ -170,3 +170,10 @@ rtdetr_pytorch/output/
 rtdetr_pytorch/dataset/
 rtdetrv2_pytorch/output/
 rtdetrv2_pytorch/dataset/
+checkpoints
+data
+vis_results
+*.png
+*.jpg
+*.pkl
+*2017.json
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -0,0 +1,27 @@
+services:
+  rt_detr:
+    container_name: rt_detr
+    image: rt_detr:latest
+    build:
+      context: ./
+      dockerfile: docker/Dockerfile
+      args:
+        - TARGET_PATH=.
+    volumes:
+      - ./rtdetrv2_pytorch:/home/ros/RT-DETR
+      - ./benchmark:/home/ros/RT-DETR/benchmark
+      - /mnt/gr-nas/visionai-data:/home/ros/RT-DETR/data
+      - /tmp/.X11-unix:/tmp/.X11-unix:rw
+    env_file: docker/.env
+    privileged: true
+    working_dir: /home/ros/RT-DETR
+    user: ros
+    ipc: host
+    network_mode: host
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            device_ids: ['0']
+            capabilities: [gpu]
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -0,0 +1,11 @@
+FROM ghcr.io/greenroom-robotics/ros_builder:jazzy-latest-cuda
+
+COPY rtdetrv2_pytorch/requirements.txt /tmp/requirements.txt
+RUN pip install -r /tmp/requirements.txt --user 
+
+RUN sudo apt-get update && sudo apt-get install python3-opencv
+
+# Misc dependencies
+RUN pip install debugpy
+
+CMD ["tail", "-f", "/dev/null"]
diff --git a/rtdetrv2_pytorch/.vscode/launch.json b/rtdetrv2_pytorch/.vscode/launch.json
@@ -0,0 +1,25 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+
+        {
+            "name": "Python Debugger: Remote Attach",
+            "type": "debugpy",
+            "request": "attach",
+            "connect": {
+                "host": "localhost",
+                "port": 5678
+            },
+            "pathMappings": [
+                {
+                    "localRoot": "/home/ros/RT-DETR",
+                    "remoteRoot": "/home/ros/RT-DETR"
+                }
+            ],
+            "justMyCode": false,   
+        }
+    ]
+}
diff --git a/rtdetrv2_pytorch/configs/dataset/gr_detection.yml b/rtdetrv2_pytorch/configs/dataset/gr_detection.yml
@@ -0,0 +1,42 @@
+task: detection
+
+evaluator:
+  type: CocoEvaluator
+  iou_types: ['bbox', ]
+
+num_classes: 5 # Has to be actual number of classes + 1
+remap_mscoco_category: False
+
+
+train_dataloader: 
+  type: DataLoader
+  dataset: 
+    type: CocoDetection
+    img_folder: ./data
+    ann_file: ./data/datasets/image/experiments/e24-007-d24-002-traintestsplit/coco_labels/split_train.mapped.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: True
+  num_workers: 4
+  drop_last: True 
+  collate_fn:
+    type: BatchImageCollateFuncion
+
+
+val_dataloader:
+  type: DataLoader
+  dataset: 
+    type: CocoDetection
+    img_folder: ./data
+    ann_file: ./data/datasets/image/experiments/e24-007-d24-002-traintestsplit/coco_labels/split_test.mapped.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~ 
+  shuffle: False
+  num_workers: 4
+  drop_last: False
+  collate_fn:
+    type: BatchImageCollateFuncion
diff --git a/rtdetrv2_pytorch/configs/gr/include/dataloader.yml b/rtdetrv2_pytorch/configs/gr/include/dataloader.yml
@@ -0,0 +1,38 @@
+
+train_dataloader: 
+  dataset: 
+    transforms:
+      ops:
+        - {type: RandomPhotometricDistort, p: 0.5}
+        - {type: RandomZoomOut, fill: 0}
+        - {type: RandomIoUCrop, p: 0.8}
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: RandomHorizontalFlip}
+        - {type: Resize, size: [640, 640], }
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}   
+        - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
+      policy:
+        name: stop_epoch
+        epoch: 71 # epoch in [71, ~) stop `ops`
+        ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']
+
+  collate_fn:
+    type: BatchImageCollateFuncion
+    scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
+    stop_epoch: 71 # epoch in [71, ~) stop `multiscales`
+
+  shuffle: True
+  total_batch_size: 16 # total batch size equals to 16 (4 * 4)
+  num_workers: 4
+
+
+val_dataloader:
+  dataset: 
+    transforms:
+      ops: 
+        - {type: Resize, size: [640, 640]}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}   
+  shuffle: False
+  total_batch_size: 32
+  num_workers: 4
diff --git a/rtdetrv2_pytorch/configs/gr/include/optimizer.yml b/rtdetrv2_pytorch/configs/gr/include/optimizer.yml
@@ -0,0 +1,37 @@
+
+use_amp: True
+use_ema: True 
+ema:
+  type: ModelEMA
+  decay: 0.9999
+  warmups: 2000
+
+
+epoches: 72
+clip_max_norm: 0.1
+
+
+optimizer:
+  type: AdamW
+  params: 
+    - 
+      params: '^(?=.*backbone)(?!.*norm).*$'
+      lr: 0.00001
+    - 
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.0001
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+lr_scheduler:
+  type: MultiStepLR
+  milestones: [1000]
+  gamma: 0.1
+
+
+lr_warmup_scheduler:
+  type: LinearWarmup
+  warmup_duration: 2000
diff --git a/rtdetrv2_pytorch/configs/gr/include/rtdetrv2_r50vd.yml b/rtdetrv2_pytorch/configs/gr/include/rtdetrv2_r50vd.yml
@@ -0,0 +1,83 @@
+task: detection
+
+model: RTDETR
+criterion: RTDETRCriterionv2
+postprocessor: RTDETRPostProcessor
+
+
+use_focal_loss: True
+eval_spatial_size: [640, 640] # h w
+
+
+RTDETR: 
+  backbone: PResNet
+  encoder: HybridEncoder
+  decoder: RTDETRTransformerv2
+
+
+PResNet:
+  depth: 50
+  variant: d
+  freeze_at: 0
+  return_idx: [1, 2, 3]
+  num_stages: 4
+  freeze_norm: True
+  pretrained: True 
+
+
+HybridEncoder:
+  in_channels: [512, 1024, 2048]
+  feat_strides: [8, 16, 32]
+
+  # intra
+  hidden_dim: 256
+  use_encoder_idx: [2]
+  num_encoder_layers: 1
+  nhead: 8
+  dim_feedforward: 1024
+  dropout: 0.
+  enc_act: 'gelu'
+
+  # cross
+  expansion: 1.0
+  depth_mult: 1
+  act: 'silu'
+
+
+RTDETRTransformerv2:
+  feat_channels: [256, 256, 256]
+  feat_strides: [8, 16, 32]
+  hidden_dim: 256
+  num_levels: 3
+
+  num_layers: 6
+  num_queries: 300
+
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0 # 1.0 0.4
+
+  eval_idx: -1
+
+  # NEW
+  num_points: [4, 4, 4] # [3,3,3] [2,2,2]
+  cross_attn_method: default # default, discrete
+  query_select_method: default # default, agnostic 
+
+
+RTDETRPostProcessor:
+  num_top_queries: 300
+
+
+RTDETRCriterionv2:
+  weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
+  losses: ['vfl', 'boxes', ]
+  alpha: 0.75
+  gamma: 2.0
+
+  matcher:
+    type: HungarianMatcher
+    weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
+    alpha: 0.25
+    gamma: 2.0
+
diff --git a/rtdetrv2_pytorch/configs/gr/rtdetrv2_r18vd_10e_gr.yml b/rtdetrv2_pytorch/configs/gr/rtdetrv2_r18vd_10e_gr.yml
@@ -0,0 +1,67 @@
+__include__: [
+  '../dataset/gr_detection.yml',
+  './include/dataloader.yml',
+  './include/optimizer.yml',
+  './include/rtdetrv2_r50vd.yml',
+  '../runtime.yml',
+]
+
+
+output_dir: ./output/rtdetrv2_r18vd_10e
+
+
+PResNet:
+  depth: 18
+  freeze_at: -1
+  freeze_norm: False
+  pretrained: True
+
+
+HybridEncoder:
+  in_channels: [128, 256, 512]
+  hidden_dim: 256
+  expansion: 0.5
+
+
+RTDETRTransformerv2:
+  num_layers: 3
+
+
+epoches: 10
+
+optimizer:
+  type: AdamW
+  params:
+    - 
+      params: '^(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+eval_spatial_size: [1280, 1280]
+
+train_dataloader: 
+  dataset: 
+    transforms:
+      ops:
+        - {type: RandomPhotometricDistort, p: 0.5}
+        - {type: RandomZoomOut, fill: 0}
+        - {type: RandomIoUCrop, p: 0.8}
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: RandomHorizontalFlip}
+        - {type: Resize, size: [1280, 1280], }
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}   
+        - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}    
+      policy:
+        epoch: 45
+  collate_fn:
+    scales: ~
+
+  total_batch_size: 6
+
+val_dataloader:
+  dataset: 
+    transforms:
+      ops: 
+        - {type: Resize, size: [1280, 1280]}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}   
+  total_batch_size: 12