From b6f519b28763e19903b2eba1bdf4dde7c58e3b42 Mon Sep 17 00:00:00 2001 From: "Eng.Ahmed ElBamby" Date: Thu, 2 Apr 2026 23:05:15 +0000 Subject: [PATCH 1/3] Add multitask configs, task preflight checks, and flash backend controls --- ultralytics/cfg/models/v13/yolov13-obb.yaml | 52 +++ ultralytics/cfg/models/v13/yolov13-pose.yaml | 53 +++ ultralytics/cfg/models/v13/yolov13-seg.yaml | 52 +++ ultralytics/cfg/models/v13/yolov13l-obb.yaml | 49 +++ ultralytics/cfg/models/v13/yolov13l-pose.yaml | 50 +++ ultralytics/cfg/models/v13/yolov13l-seg.yaml | 49 +++ ultralytics/cfg/models/v13/yolov13l.yaml | 50 +++ ultralytics/cfg/models/v13/yolov13n-obb.yaml | 49 +++ ultralytics/cfg/models/v13/yolov13n-pose.yaml | 50 +++ ultralytics/cfg/models/v13/yolov13n-seg.yaml | 49 +++ ultralytics/cfg/models/v13/yolov13s-obb.yaml | 49 +++ ultralytics/cfg/models/v13/yolov13s-pose.yaml | 50 +++ ultralytics/cfg/models/v13/yolov13s-seg.yaml | 49 +++ ultralytics/cfg/models/v13/yolov13s.yaml | 50 +++ ultralytics/cfg/models/v13/yolov13x-obb.yaml | 49 +++ ultralytics/cfg/models/v13/yolov13x-pose.yaml | 50 +++ ultralytics/cfg/models/v13/yolov13x-seg.yaml | 49 +++ ultralytics/cfg/models/v13/yolov13x.yaml | 50 +++ ultralytics/data/utils.py | 144 ++++++- ultralytics/engine/trainer.py | 25 +- ultralytics/engine/validator.py | 2 +- ultralytics/models/yolo/world/train_world.py | 4 +- ultralytics/nn/modules/block.py | 352 ++++++++++-------- ultralytics/utils/dist.py | 71 +++- ultralytics/utils/flash_turing_interface.py | 68 ++++ ultralytics/utils/metrics.py | 22 +- 26 files changed, 1395 insertions(+), 192 deletions(-) create mode 100644 ultralytics/cfg/models/v13/yolov13-obb.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13-pose.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13-seg.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13l-obb.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13l-pose.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13l-seg.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13l.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13n-obb.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13n-pose.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13n-seg.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13s-obb.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13s-pose.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13s-seg.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13s.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13x-obb.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13x-pose.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13x-seg.yaml create mode 100644 ultralytics/cfg/models/v13/yolov13x.yaml create mode 100644 ultralytics/utils/flash_turing_interface.py diff --git a/ultralytics/cfg/models/v13/yolov13-obb.yaml b/ultralytics/cfg/models/v13/yolov13-obb.yaml new file mode 100644 index 000000000..24b83154d --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13-obb.yaml @@ -0,0 +1,52 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + m: [0.75, 0.75, 768] # Medium + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large + xl: [1.25, 1.75, 512] # Extra Large Plus + +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, OBB, [nc, 1]] diff --git a/ultralytics/cfg/models/v13/yolov13-pose.yaml b/ultralytics/cfg/models/v13/yolov13-pose.yaml new file mode 100644 index 000000000..b37076573 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13-pose.yaml @@ -0,0 +1,53 @@ +nc: 80 # number of classes +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + m: [0.75, 0.75, 768] # Medium + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large + xl: [1.25, 1.75, 512] # Extra Large Plus + +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Pose, [nc, kpt_shape]] diff --git a/ultralytics/cfg/models/v13/yolov13-seg.yaml b/ultralytics/cfg/models/v13/yolov13-seg.yaml new file mode 100644 index 000000000..33ed2f075 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13-seg.yaml @@ -0,0 +1,52 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + m: [0.75, 0.75, 768] # Medium + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large + xl: [1.25, 1.75, 512] # Extra Large Plus + +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Segment, [nc, 32, 256]] diff --git a/ultralytics/cfg/models/v13/yolov13l-obb.yaml b/ultralytics/cfg/models/v13/yolov13l-obb.yaml new file mode 100644 index 000000000..b7a71e767 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13l-obb.yaml @@ -0,0 +1,49 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, OBB, [nc, 1]] diff --git a/ultralytics/cfg/models/v13/yolov13l-pose.yaml b/ultralytics/cfg/models/v13/yolov13l-pose.yaml new file mode 100644 index 000000000..1dbb90ba5 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13l-pose.yaml @@ -0,0 +1,50 @@ +nc: 80 # number of classes +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Pose, [nc, kpt_shape]] diff --git a/ultralytics/cfg/models/v13/yolov13l-seg.yaml b/ultralytics/cfg/models/v13/yolov13l-seg.yaml new file mode 100644 index 000000000..d675cc2b9 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13l-seg.yaml @@ -0,0 +1,49 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Segment, [nc, 32, 256]] diff --git a/ultralytics/cfg/models/v13/yolov13l.yaml b/ultralytics/cfg/models/v13/yolov13l.yaml new file mode 100644 index 000000000..68babb93e --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13l.yaml @@ -0,0 +1,50 @@ +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov13n.yaml' will call yolov13.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large + +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2, 1, 2]] # 1-P2/4 + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] # 3-P3/8 + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] # 5-P4/16 + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] # 7-P5/32 + - [-1, 4, A2C2f, [1024, True, 1]] # 8 + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] #12 + - [[4, 10], 1, FullPAD_Tunnel, []] #13 + - [[8, 11], 1, FullPAD_Tunnel, []] #14 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] # cat backbone P4 + - [-1, 2, DSC3k2, [512, True]] # 17 + - [[-1, 9], 1, FullPAD_Tunnel, []] #18 + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] # cat backbone P3 + - [-1, 2, DSC3k2, [256, True]] # 21 + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] #23 + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] # cat head P4 + - [-1, 2, DSC3k2, [512, True]] # 26 + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] # cat head P5 + - [-1, 2, DSC3k2, [1024,True]] # 30 (P5/32-large) + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v13/yolov13n-obb.yaml b/ultralytics/cfg/models/v13/yolov13n-obb.yaml new file mode 100644 index 000000000..b7a71e767 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13n-obb.yaml @@ -0,0 +1,49 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, OBB, [nc, 1]] diff --git a/ultralytics/cfg/models/v13/yolov13n-pose.yaml b/ultralytics/cfg/models/v13/yolov13n-pose.yaml new file mode 100644 index 000000000..1dbb90ba5 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13n-pose.yaml @@ -0,0 +1,50 @@ +nc: 80 # number of classes +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Pose, [nc, kpt_shape]] diff --git a/ultralytics/cfg/models/v13/yolov13n-seg.yaml b/ultralytics/cfg/models/v13/yolov13n-seg.yaml new file mode 100644 index 000000000..d675cc2b9 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13n-seg.yaml @@ -0,0 +1,49 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Segment, [nc, 32, 256]] diff --git a/ultralytics/cfg/models/v13/yolov13s-obb.yaml b/ultralytics/cfg/models/v13/yolov13s-obb.yaml new file mode 100644 index 000000000..b7a71e767 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13s-obb.yaml @@ -0,0 +1,49 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, OBB, [nc, 1]] diff --git a/ultralytics/cfg/models/v13/yolov13s-pose.yaml b/ultralytics/cfg/models/v13/yolov13s-pose.yaml new file mode 100644 index 000000000..1dbb90ba5 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13s-pose.yaml @@ -0,0 +1,50 @@ +nc: 80 # number of classes +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Pose, [nc, kpt_shape]] diff --git a/ultralytics/cfg/models/v13/yolov13s-seg.yaml b/ultralytics/cfg/models/v13/yolov13s-seg.yaml new file mode 100644 index 000000000..d675cc2b9 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13s-seg.yaml @@ -0,0 +1,49 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Segment, [nc, 32, 256]] diff --git a/ultralytics/cfg/models/v13/yolov13s.yaml b/ultralytics/cfg/models/v13/yolov13s.yaml new file mode 100644 index 000000000..68babb93e --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13s.yaml @@ -0,0 +1,50 @@ +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov13n.yaml' will call yolov13.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large + +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2, 1, 2]] # 1-P2/4 + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] # 3-P3/8 + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] # 5-P4/16 + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] # 7-P5/32 + - [-1, 4, A2C2f, [1024, True, 1]] # 8 + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] #12 + - [[4, 10], 1, FullPAD_Tunnel, []] #13 + - [[8, 11], 1, FullPAD_Tunnel, []] #14 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] # cat backbone P4 + - [-1, 2, DSC3k2, [512, True]] # 17 + - [[-1, 9], 1, FullPAD_Tunnel, []] #18 + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] # cat backbone P3 + - [-1, 2, DSC3k2, [256, True]] # 21 + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] #23 + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] # cat head P4 + - [-1, 2, DSC3k2, [512, True]] # 26 + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] # cat head P5 + - [-1, 2, DSC3k2, [1024,True]] # 30 (P5/32-large) + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/cfg/models/v13/yolov13x-obb.yaml b/ultralytics/cfg/models/v13/yolov13x-obb.yaml new file mode 100644 index 000000000..b7a71e767 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13x-obb.yaml @@ -0,0 +1,49 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, OBB, [nc, 1]] diff --git a/ultralytics/cfg/models/v13/yolov13x-pose.yaml b/ultralytics/cfg/models/v13/yolov13x-pose.yaml new file mode 100644 index 000000000..1dbb90ba5 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13x-pose.yaml @@ -0,0 +1,50 @@ +nc: 80 # number of classes +kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Pose, [nc, kpt_shape]] diff --git a/ultralytics/cfg/models/v13/yolov13x-seg.yaml b/ultralytics/cfg/models/v13/yolov13x-seg.yaml new file mode 100644 index 000000000..d675cc2b9 --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13x-seg.yaml @@ -0,0 +1,49 @@ +nc: 80 # number of classes +scales: # model compound scaling constants + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] + - [-1, 1, Conv, [128, 3, 2, 1, 2]] + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] + - [-1, 4, A2C2f, [1024, True, 1]] + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] + - [[4, 10], 1, FullPAD_Tunnel, []] + - [[8, 11], 1, FullPAD_Tunnel, []] + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] + - [-1, 2, DSC3k2, [256, True]] + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] + - [-1, 2, DSC3k2, [512, True]] + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] + - [-1, 2, DSC3k2, [1024, True]] + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Segment, [nc, 32, 256]] diff --git a/ultralytics/cfg/models/v13/yolov13x.yaml b/ultralytics/cfg/models/v13/yolov13x.yaml new file mode 100644 index 000000000..68babb93e --- /dev/null +++ b/ultralytics/cfg/models/v13/yolov13x.yaml @@ -0,0 +1,50 @@ +nc: 80 # number of classes +scales: # model compound scaling constants, i.e. 'model=yolov13n.yaml' will call yolov13.yaml with scale 'n' + # [depth, width, max_channels] + n: [0.50, 0.25, 1024] # Nano + s: [0.50, 0.50, 1024] # Small + l: [1.00, 1.00, 512] # Large + x: [1.00, 1.50, 512] # Extra Large + +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2, 1, 2]] # 1-P2/4 + - [-1, 2, DSC3k2, [256, False, 0.25]] + - [-1, 1, Conv, [256, 3, 2, 1, 4]] # 3-P3/8 + - [-1, 2, DSC3k2, [512, False, 0.25]] + - [-1, 1, DSConv, [512, 3, 2]] # 5-P4/16 + - [-1, 4, A2C2f, [512, True, 4]] + - [-1, 1, DSConv, [1024, 3, 2]] # 7-P5/32 + - [-1, 4, A2C2f, [1024, True, 1]] # 8 + +head: + - [[4, 6, 8], 2, HyperACE, [512, 8, True, True, 0.5, 1, "both"]] + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [ 9, 1, DownsampleConv, []] + - [[6, 9], 1, FullPAD_Tunnel, []] #12 + - [[4, 10], 1, FullPAD_Tunnel, []] #13 + - [[8, 11], 1, FullPAD_Tunnel, []] #14 + + - [-1, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 12], 1, Concat, [1]] # cat backbone P4 + - [-1, 2, DSC3k2, [512, True]] # 17 + - [[-1, 9], 1, FullPAD_Tunnel, []] #18 + + - [17, 1, nn.Upsample, [None, 2, "nearest"]] + - [[-1, 13], 1, Concat, [1]] # cat backbone P3 + - [-1, 2, DSC3k2, [256, True]] # 21 + - [10, 1, Conv, [256, 1, 1]] + - [[21, 22], 1, FullPAD_Tunnel, []] #23 + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 18], 1, Concat, [1]] # cat head P4 + - [-1, 2, DSC3k2, [512, True]] # 26 + - [[-1, 9], 1, FullPAD_Tunnel, []] + + - [26, 1, Conv, [512, 3, 2]] + - [[-1, 14], 1, Concat, [1]] # cat head P5 + - [-1, 2, DSC3k2, [1024,True]] # 30 (P5/32-large) + - [[-1, 11], 1, FullPAD_Tunnel, []] + + - [[23, 27, 31], 1, Detect, [nc]] # Detect(P3, P4, P5) diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index 50b597d86..59ad043dc 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -298,7 +298,146 @@ def find_dataset_yaml(path: Path) -> Path: return files[0] -def check_det_dataset(dataset, autodownload=True): +def _sample_label_files(train_paths, max_files=200): + """Collect a sample of label files from train image paths.""" + label_files = [] + seen = set() + + def _add_from_dir(d: Path): + if not d.exists() or not d.is_dir(): + return False + added = False + for lb in d.rglob("*.txt"): + s = str(lb) + if s not in seen: + seen.add(s) + label_files.append(lb) + added = True + if len(label_files) >= max_files: + return True + return added + + for p in train_paths: + p = Path(p) + if not p.exists(): + continue + + # train path can be a text file with image paths + if p.is_file() and p.suffix.lower() == ".txt": + try: + im_files = [ + Path(x.strip()) for x in p.read_text(encoding="utf-8", errors="ignore").splitlines() if x.strip() + ] + except Exception: + im_files = [] + for im in im_files: + lb = Path(str(im).replace(f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}")).with_suffix(".txt") + if lb.exists(): + s = str(lb) + if s not in seen: + seen.add(s) + label_files.append(lb) + if len(label_files) >= max_files: + return label_files + continue + + if p.is_file(): + continue + + candidates = [ + Path(str(p).replace(f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}")), + p.parent / "labels", + p.parent.parent / "labels" / p.name, + p / "labels", + ] + for c in candidates: + if _add_from_dir(c) and len(label_files) >= max_files: + return label_files + + # Fallback: scan nearby roots for any labels subtree + for root in {p, p.parent, p.parent.parent}: + if not root.exists() or not root.is_dir(): + continue + for labels_dir in root.rglob("labels"): + if _add_from_dir(labels_dir) and len(label_files) >= max_files: + return label_files + + return label_files + + +def _validate_task_label_schema(data, task): + """Validate task-specific label line shape for a sample of train labels.""" + if task not in {"segment", "pose", "obb"}: + return + + train = data.get("train") + train_paths = train if isinstance(train, (list, tuple)) else [train] + label_files = _sample_label_files(train_paths) + if not label_files: + LOGGER.warning(f"WARNING ⚠️ task={task} preflight skipped: no label files sampled from train paths.") + return + + if task == "pose": + kpt_shape = data.get("kpt_shape") + if not isinstance(kpt_shape, (list, tuple)) or len(kpt_shape) != 2: + raise SyntaxError(emojis(f"Pose dataset requires 'kpt_shape: [num_kpts, dims]' in data YAML. {HELP_URL}")) + nkpt, ndim = int(kpt_shape[0]), int(kpt_shape[1]) + if nkpt <= 0 or ndim not in {2, 3}: + raise SyntaxError( + emojis(f"Invalid kpt_shape={kpt_shape}. Expected [num_kpts>0, dims in {{2,3}}]. {HELP_URL}") + ) + expected = 5 + nkpt * ndim + else: + expected = None + + errors = [] + checked = 0 + for lb in label_files: + try: + lines = lb.read_text(encoding="utf-8", errors="ignore").splitlines() + except Exception: + continue + + for i, line in enumerate(lines, 1): + line = line.strip() + if not line: + continue + parts = line.split() + checked += 1 + n = len(parts) + + try: + _ = [float(x) for x in parts] + except ValueError: + errors.append(f"{lb}:{i} contains non-numeric values") + if len(errors) >= 5: + break + continue + + if task == "pose": + if n != expected: + errors.append(f"{lb}:{i} has {n} columns, expected {expected} for pose") + elif task == "segment": + if n < 7 or n % 2 == 0: + errors.append(f"{lb}:{i} has {n} columns, expected cls + polygon coords (odd >= 7)") + elif task == "obb": + if n < 9 or n % 2 == 0: + errors.append(f"{lb}:{i} has {n} columns, expected cls + 8+ corner coords (odd >= 9)") + + if len(errors) >= 5: + break + if len(errors) >= 5: + break + + if errors: + hint = "\n".join(errors) + raise SyntaxError(emojis(f"Task preflight failed for task={task} ❌\n{hint}\n{HELP_URL}")) + + if checked == 0: + LOGGER.warning(f"WARNING ⚠️ task={task} preflight sampled labels but found no non-empty label rows.") + + +def check_det_dataset(dataset, autodownload=True, task=None): """ Download, verify, and/or unzip a dataset if not found locally. @@ -388,6 +527,9 @@ def check_det_dataset(dataset, autodownload=True): LOGGER.info(f"Dataset download {s}\n") check_font("Arial.ttf" if is_ascii(data["names"]) else "Arial.Unicode.ttf") # download fonts + if task in {"segment", "pose", "obb"}: + _validate_task_label_schema(data, task) + return data # dictionary diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index a373cd825..aa8287788 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -264,13 +264,19 @@ def _setup_train(self, world_size): self.amp = torch.tensor(check_amp(self.model), device=self.device) callbacks.default_callbacks = callbacks_backup # restore callbacks if RANK > -1 and world_size > 1: # DDP - dist.broadcast(self.amp, src=0) # broadcast the tensor from rank 0 to all other ranks (returns None) + self.amp = self.amp.int() # gloo backend may fail on bool tensor broadcast in some torch builds + dist.broadcast(self.amp, src=0) self.amp = bool(self.amp) # as boolean self.scaler = ( torch.amp.GradScaler("cuda", enabled=self.amp) if TORCH_2_4 else torch.cuda.amp.GradScaler(enabled=self.amp) ) if world_size > 1: - self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK], find_unused_parameters=True) + self.model = nn.parallel.DistributedDataParallel( + self.model, + device_ids=[RANK], + find_unused_parameters=True, + gradient_as_bucket_view=False, + ) self.set_model_attributes() # set again after DDP wrapper # Check imgsz @@ -385,6 +391,19 @@ def _do_train(self, world_size=1): (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None else self.loss_items ) + # Non-finite loss guard for DDP stability + loss_is_finite = torch.isfinite(self.loss.detach()).int() + if RANK != -1: + dist.all_reduce(loss_is_finite, op=dist.ReduceOp.MIN) + if not bool(loss_is_finite.item()): + if RANK in {-1, 0}: + loss_value = float(self.loss.detach().float().cpu()) + LOGGER.warning( + "WARNING ⚠️ Non-finite loss detected (%.4g). Skipping optimizer step." % loss_value + ) + self.optimizer.zero_grad(set_to_none=True) + continue + # Backward self.scaler.scale(self.loss).backward() @@ -560,7 +579,7 @@ def get_dataset(self): "pose", "obb", }: - data = check_det_dataset(self.args.data) + data = check_det_dataset(self.args.data, task=self.args.task) if "yaml_file" in data: self.args.data = data["yaml_file"] # for validating 'yolo train data=url.zip' usage except Exception as e: diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py index 6dc8026f2..8757a0224 100644 --- a/ultralytics/engine/validator.py +++ b/ultralytics/engine/validator.py @@ -142,7 +142,7 @@ def __call__(self, trainer=None, model=None): LOGGER.info(f"Setting batch={self.args.batch} input of shape ({self.args.batch}, 3, {imgsz}, {imgsz})") if str(self.args.data).split(".")[-1] in {"yaml", "yml"}: - self.data = check_det_dataset(self.args.data) + self.data = check_det_dataset(self.args.data, task=self.args.task) elif self.args.task == "classify": self.data = check_cls_dataset(self.args.data, split=self.args.split) else: diff --git a/ultralytics/models/yolo/world/train_world.py b/ultralytics/models/yolo/world/train_world.py index 3cbdb2a4e..d344a5947 100644 --- a/ultralytics/models/yolo/world/train_world.py +++ b/ultralytics/models/yolo/world/train_world.py @@ -74,7 +74,9 @@ def get_dataset(self): data_yaml = self.args.data assert data_yaml.get("train", False), "train dataset not found" # object365.yaml assert data_yaml.get("val", False), "validation dataset not found" # lvis.yaml - data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()} + data = { + k: [check_det_dataset(d, task=self.args.task) for d in v.get("yolo_data", [])] for k, v in data_yaml.items() + } assert len(data["val"]) == 1, f"Only support validating on 1 dataset for now, but got {len(data['val'])}." val_split = "minival" if "lvis" in data["val"][0]["val"] else "val" for d in data["val"]: diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py index 7bfad101a..c69855427 100644 --- a/ultralytics/nn/modules/block.py +++ b/ultralytics/nn/modules/block.py @@ -50,10 +50,10 @@ "PSA", "SCDown", "TorchVision", - "HyperACE", - "DownsampleConv", + "HyperACE", + "DownsampleConv", "FullPAD_Tunnel", - "DSC3k2" + "DSC3k2", ) @@ -1161,21 +1161,80 @@ def forward(self, x): y = self.m(x) return y + import logging +import os + logger = logging.getLogger(__name__) USE_FLASH_ATTN = False -try: - import torch - if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8: # Ampere or newer - from flash_attn.flash_attn_interface import flash_attn_func - USE_FLASH_ATTN = True - else: - from torch.nn.functional import scaled_dot_product_attention as sdpa - logger.warning("FlashAttention is not available on this device. Using scaled_dot_product_attention instead.") -except Exception: - from torch.nn.functional import scaled_dot_product_attention as sdpa - logger.warning("FlashAttention is not available on this device. Using scaled_dot_product_attention instead.") +FLASH_BACKEND = "fallback" +FLASH_ERROR = "" + + +def configure_flash_backend(disable_flash=None, use_turing_flash=None): + """Configure flash attention backend from flags or environment.""" + global USE_FLASH_ATTN, FLASH_BACKEND, FLASH_ERROR + + USE_FLASH_ATTN = False + FLASH_BACKEND = "fallback" + FLASH_ERROR = "" + + try: + import torch + + disable_flash = (os.getenv("Y13_DISABLE_FLASH", "0") == "1") if disable_flash is None else bool(disable_flash) + use_turing_flash = ( + (os.getenv("Y13_USE_TURING_FLASH", "0") == "1") if use_turing_flash is None else bool(use_turing_flash) + ) + + if torch.cuda.is_available() and not disable_flash: + major, minor = torch.cuda.get_device_capability() + + if major >= 8: + try: + from flash_attn.flash_attn_interface import flash_attn_func as _flash_attn_func + + globals()["flash_attn_func"] = _flash_attn_func + USE_FLASH_ATTN = True + FLASH_BACKEND = "flash_attn" + except Exception as e: + FLASH_ERROR = str(e) + + elif (major, minor) == (7, 5) and use_turing_flash: + try: + from ultralytics.utils.flash_turing_interface import flash_attn_func as _flash_attn_func + + globals()["flash_attn_func"] = _flash_attn_func + USE_FLASH_ATTN = True + FLASH_BACKEND = "flash_attn_turing" + except Exception as e: + FLASH_ERROR = str(e) + + if not USE_FLASH_ATTN: + if disable_flash: + logger.info("Flash attention disabled by Y13_DISABLE_FLASH=1, using fallback attention backend.") + elif FLASH_ERROR: + logger.warning( + f"Flash attention backend unavailable ({FLASH_ERROR}). Using fallback attention backend." + ) + else: + logger.warning( + "Flash attention backend unavailable on this device/config. Using fallback attention backend." + ) + else: + logger.info(f"Flash backend selected: {FLASH_BACKEND}") + except Exception as e: + FLASH_ERROR = str(e) + USE_FLASH_ATTN = False + FLASH_BACKEND = "fallback" + logger.warning(f"Flash attention initialization failed ({FLASH_ERROR}). Using fallback attention backend.") + + return FLASH_BACKEND + + +configure_flash_backend() + class AAttn(nn.Module): """ @@ -1196,8 +1255,8 @@ class AAttn(nn.Module): >>> x = torch.randn(2, 64, 128, 128) >>> output = model(x) >>> print(output.shape) - - Notes: + + Notes: recommend that dim//num_heads be a multiple of 32 or 64. """ @@ -1217,7 +1276,6 @@ def __init__(self, dim, num_heads, area=1): self.pe = Conv(all_head_dim, dim, 5, 1, 2, g=dim, act=False) - def forward(self, x): """Processes the input tensor 'x' through the area-attention""" B, C, H, W = x.shape @@ -1225,8 +1283,8 @@ def forward(self, x): qk = self.qk(x).flatten(2).transpose(1, 2) v = self.v(x) - pp = self.pe(v) - v = v.flatten(2).transpose(1, 2) + pp = self.pe(v).contiguous() + v = v.flatten(2).transpose(1, 2).contiguous() if self.area > 1: qk = qk.reshape(B * self.area, N // self.area, C * 2) @@ -1234,36 +1292,32 @@ def forward(self, x): B, N, _ = qk.shape q, k = qk.split([C, C], dim=2) - if x.is_cuda and USE_FLASH_ATTN: + if x.is_cuda and USE_FLASH_ATTN and self.head_dim in {64, 128}: q = q.view(B, N, self.num_heads, self.head_dim) k = k.view(B, N, self.num_heads, self.head_dim) v = v.view(B, N, self.num_heads, self.head_dim) - x = flash_attn_func( - q.contiguous().half(), - k.contiguous().half(), - v.contiguous().half() - ).to(q.dtype) + x = flash_attn_func(q.contiguous().half(), k.contiguous().half(), v.contiguous().half()).to(q.dtype) else: - q = q.transpose(1, 2).view(B, self.num_heads, self.head_dim, N) - k = k.transpose(1, 2).view(B, self.num_heads, self.head_dim, N) - v = v.transpose(1, 2).view(B, self.num_heads, self.head_dim, N) + q = q.transpose(1, 2).reshape(B, self.num_heads, self.head_dim, N).contiguous() + k = k.transpose(1, 2).reshape(B, self.num_heads, self.head_dim, N).contiguous() + v = v.transpose(1, 2).reshape(B, self.num_heads, self.head_dim, N).contiguous() - attn = (q.transpose(-2, -1) @ k) * (self.head_dim ** -0.5) + attn = (q.transpose(-2, -1) @ k) * (self.head_dim**-0.5) max_attn = attn.max(dim=-1, keepdim=True).values exp_attn = torch.exp(attn - max_attn) attn = exp_attn / exp_attn.sum(dim=-1, keepdim=True) - x = (v @ attn.transpose(-2, -1)) + x = v @ attn.transpose(-2, -1) - x = x.permute(0, 3, 1, 2) + x = x.permute(0, 3, 1, 2).contiguous() if self.area > 1: x = x.reshape(B // self.area, N * self.area, C) B, N, _ = x.shape - x = x.reshape(B, H, W, C).permute(0, 3, 1, 2) + x = x.reshape(B, H, W, C).permute(0, 3, 1, 2).contiguous() return self.proj(x + pp) - + class ABlock(nn.Module): """ @@ -1287,8 +1341,8 @@ class ABlock(nn.Module): >>> x = torch.randn(2, 64, 128, 128) >>> output = model(x) >>> print(output.shape) - - Notes: + + Notes: recommend that dim//num_heads be a multiple of 32 or 64. """ @@ -1316,7 +1370,7 @@ def forward(self, x): return x -class A2C2f(nn.Module): +class A2C2f(nn.Module): """ A2C2f module with residual enhanced feature extraction using ABlock blocks with area-attention. Also known as R-ELAN @@ -1361,7 +1415,10 @@ def __init__(self, c1, c2, n=1, a2=True, area=1, residual=False, mlp_ratio=2.0, self.gamma = nn.Parameter(init_values * torch.ones((c2)), requires_grad=True) if a2 and residual else None self.m = nn.ModuleList( - nn.Sequential(*(ABlock(c_, num_heads, mlp_ratio, area) for _ in range(2))) if a2 else C3k(c_, c_, 2, shortcut, g) for _ in range(n) + nn.Sequential(*(ABlock(c_, num_heads, mlp_ratio, area) for _ in range(2))) + if a2 + else C3k(c_, c_, 2, shortcut, g) + for _ in range(n) ) def forward(self, x): @@ -1372,12 +1429,13 @@ def forward(self, x): return x + self.gamma.view(1, -1, 1, 1) * self.cv2(torch.cat(y, 1)) return self.cv2(torch.cat(y, 1)) + class DSBottleneck(nn.Module): """ An improved bottleneck block using depthwise separable convolutions (DSConv). This class implements a lightweight bottleneck module that replaces standard convolutions with depthwise - separable convolutions to reduce parameters and computational cost. + separable convolutions to reduce parameters and computational cost. Attributes: c1 (int): Number of input channels. @@ -1399,11 +1457,12 @@ class DSBottleneck(nn.Module): >>> print(output.shape) torch.Size([2, 64, 32, 32]) """ + def __init__(self, c1, c2, shortcut=True, e=0.5, k1=3, k2=5, d2=1): super().__init__() c_ = int(c2 * e) - self.cv1 = DSConv(c1, c_, k1, s=1, p=None, d=1) - self.cv2 = DSConv(c_, c2, k2, s=1, p=None, d=d2) + self.cv1 = DSConv(c1, c_, k1, s=1, p=None, d=1) + self.cv2 = DSConv(c_, c2, k2, s=1, p=None, d=d2) self.add = shortcut and c1 == c2 def forward(self, x): @@ -1440,34 +1499,13 @@ class DSC3k(C3): >>> print(output.shape) torch.Size([2, 128, 64, 64]) """ - def __init__( - self, - c1, - c2, - n=1, - shortcut=True, - g=1, - e=0.5, - k1=3, - k2=5, - d2=1 - ): + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, k1=3, k2=5, d2=1): super().__init__(c1, c2, n, shortcut, g, e) - c_ = int(c2 * e) - - self.m = nn.Sequential( - *( - DSBottleneck( - c_, c_, - shortcut=shortcut, - e=1.0, - k1=k1, - k2=k2, - d2=d2 - ) - for _ in range(n) - ) - ) + c_ = int(c2 * e) + + self.m = nn.Sequential(*(DSBottleneck(c_, c_, shortcut=shortcut, e=1.0, k1=k1, k2=k2, d2=d2) for _ in range(n))) + class DSC3k2(C2f): """ @@ -1505,47 +1543,19 @@ class DSC3k2(C2f): >>> print(f"With DSC3k: {output2.shape}") With DSC3k: torch.Size([2, 64, 128, 128]) """ - def __init__( - self, - c1, - c2, - n=1, - dsc3k=False, - e=0.5, - g=1, - shortcut=True, - k1=3, - k2=7, - d2=1 - ): + + def __init__(self, c1, c2, n=1, dsc3k=False, e=0.5, g=1, shortcut=True, k1=3, k2=7, d2=1): super().__init__(c1, c2, n, shortcut, g, e) if dsc3k: self.m = nn.ModuleList( - DSC3k( - self.c, self.c, - n=2, - shortcut=shortcut, - g=g, - e=1.0, - k1=k1, - k2=k2, - d2=d2 - ) - for _ in range(n) + DSC3k(self.c, self.c, n=2, shortcut=shortcut, g=g, e=1.0, k1=k1, k2=k2, d2=d2) for _ in range(n) ) else: self.m = nn.ModuleList( - DSBottleneck( - self.c, self.c, - shortcut=shortcut, - e=1.0, - k1=k1, - k2=k2, - d2=d2 - ) - for _ in range(n) + DSBottleneck(self.c, self.c, shortcut=shortcut, e=1.0, k1=k1, k2=k2, d2=d2) for _ in range(n) ) + class AdaHyperedgeGen(nn.Module): """ Generates an adaptive hyperedge participation matrix from a set of vertex features. @@ -1572,6 +1582,7 @@ class AdaHyperedgeGen(nn.Module): >>> print(A.shape) torch.Size([2, 100, 16]) """ + def __init__(self, node_dim, num_hyperedges, num_heads=4, dropout=0.1, context="both"): super().__init__() self.num_heads = num_heads @@ -1582,47 +1593,45 @@ def __init__(self, node_dim, num_hyperedges, num_heads=4, dropout=0.1, context=" self.prototype_base = nn.Parameter(torch.Tensor(num_hyperedges, node_dim)) nn.init.xavier_uniform_(self.prototype_base) if context in ("mean", "max"): - self.context_net = nn.Linear(node_dim, num_hyperedges * node_dim) + self.context_net = nn.Linear(node_dim, num_hyperedges * node_dim) elif context == "both": - self.context_net = nn.Linear(2*node_dim, num_hyperedges * node_dim) + self.context_net = nn.Linear(2 * node_dim, num_hyperedges * node_dim) else: - raise ValueError( - f"Unsupported context '{context}'. " - "Expected one of: 'mean', 'max', 'both'." - ) + raise ValueError(f"Unsupported context '{context}'. Expected one of: 'mean', 'max', 'both'.") self.pre_head_proj = nn.Linear(node_dim, node_dim) - + self.dropout = nn.Dropout(dropout) self.scaling = math.sqrt(self.head_dim) def forward(self, X): B, N, D = X.shape if self.context == "mean": - context_cat = X.mean(dim=1) + context_cat = X.mean(dim=1) elif self.context == "max": - context_cat, _ = X.max(dim=1) + context_cat, _ = X.max(dim=1) else: - avg_context = X.mean(dim=1) - max_context, _ = X.max(dim=1) - context_cat = torch.cat([avg_context, max_context], dim=-1) - prototype_offsets = self.context_net(context_cat).view(B, self.num_hyperedges, D) - prototypes = self.prototype_base.unsqueeze(0) + prototype_offsets - - X_proj = self.pre_head_proj(X) + avg_context = X.mean(dim=1) + max_context, _ = X.max(dim=1) + context_cat = torch.cat([avg_context, max_context], dim=-1) + prototype_offsets = self.context_net(context_cat).view(B, self.num_hyperedges, D) + prototypes = self.prototype_base.unsqueeze(0) + prototype_offsets + + X_proj = self.pre_head_proj(X) X_heads = X_proj.view(B, N, self.num_heads, self.head_dim).transpose(1, 2) proto_heads = prototypes.view(B, self.num_hyperedges, self.num_heads, self.head_dim).permute(0, 2, 1, 3) - + X_heads_flat = X_heads.reshape(B * self.num_heads, N, self.head_dim) proto_heads_flat = proto_heads.reshape(B * self.num_heads, self.num_hyperedges, self.head_dim).transpose(1, 2) - - logits = torch.bmm(X_heads_flat, proto_heads_flat) / self.scaling - logits = logits.view(B, self.num_heads, N, self.num_hyperedges).mean(dim=1) - - logits = self.dropout(logits) + + logits = torch.bmm(X_heads_flat, proto_heads_flat) / self.scaling + logits = logits.view(B, self.num_heads, N, self.num_hyperedges).mean(dim=1) + + logits = self.dropout(logits) return F.softmax(logits, dim=1) + class AdaHGConv(nn.Module): """ Performs the adaptive hypergraph convolution. @@ -1646,34 +1655,30 @@ class AdaHGConv(nn.Module): Examples: >>> import torch >>> model = AdaHGConv(embed_dim=128, num_hyperedges=16, num_heads=8) - >>> x = torch.randn(2, 256, 128) # (Batch, Num_Nodes, Dim) + >>> x = torch.randn(2, 256, 128) # (Batch, Num_Nodes, Dim) >>> output = model(x) >>> print(output.shape) torch.Size([2, 256, 128]) """ + def __init__(self, embed_dim, num_hyperedges=16, num_heads=4, dropout=0.1, context="both"): super().__init__() self.edge_generator = AdaHyperedgeGen(embed_dim, num_hyperedges, num_heads, dropout, context) - self.edge_proj = nn.Sequential( - nn.Linear(embed_dim, embed_dim ), - nn.GELU() - ) - self.node_proj = nn.Sequential( - nn.Linear(embed_dim, embed_dim ), - nn.GELU() - ) - + self.edge_proj = nn.Sequential(nn.Linear(embed_dim, embed_dim), nn.GELU()) + self.node_proj = nn.Sequential(nn.Linear(embed_dim, embed_dim), nn.GELU()) + def forward(self, X): - A = self.edge_generator(X) - - He = torch.bmm(A.transpose(1, 2), X) + A = self.edge_generator(X) + + He = torch.bmm(A.transpose(1, 2), X) He = self.edge_proj(He) - - X_new = torch.bmm(A, He) + + X_new = torch.bmm(A, He) X_new = self.node_proj(X_new) - + return X_new + X - + + class AdaHGComputation(nn.Module): """ A wrapper module for applying adaptive hypergraph convolution to 4D feature maps. @@ -1695,28 +1700,26 @@ class AdaHGComputation(nn.Module): Examples: >>> import torch >>> model = AdaHGComputation(embed_dim=64, num_hyperedges=8, num_heads=4) - >>> x = torch.randn(2, 64, 32, 32) # (B, C, H, W) + >>> x = torch.randn(2, 64, 32, 32) # (B, C, H, W) >>> output = model(x) >>> print(output.shape) torch.Size([2, 64, 32, 32]) """ + def __init__(self, embed_dim, num_hyperedges=16, num_heads=8, dropout=0.1, context="both"): super().__init__() self.embed_dim = embed_dim self.hgnn = AdaHGConv( - embed_dim=embed_dim, - num_hyperedges=num_hyperedges, - num_heads=num_heads, - dropout=dropout, - context=context + embed_dim=embed_dim, num_hyperedges=num_hyperedges, num_heads=num_heads, dropout=dropout, context=context ) - + def forward(self, x): B, C, H, W = x.shape - tokens = x.flatten(2).transpose(1, 2) - tokens = self.hgnn(tokens) + tokens = x.flatten(2).transpose(1, 2) + tokens = self.hgnn(tokens) x_out = tokens.transpose(1, 2).view(B, C, H, W) - return x_out + return x_out + class C3AH(nn.Module): """ @@ -1744,23 +1747,23 @@ class C3AH(nn.Module): >>> print(output.shape) torch.Size([2, 128, 32, 32]) """ + def __init__(self, c1, c2, e=1.0, num_hyperedges=8, context="both"): super().__init__() - c_ = int(c2 * e) + c_ = int(c2 * e) assert c_ % 16 == 0, "Dimension of AdaHGComputation should be a multiple of 16." num_heads = c_ // 16 self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) - self.m = AdaHGComputation(embed_dim=c_, - num_hyperedges=num_hyperedges, - num_heads=num_heads, - dropout=0.1, - context=context) - self.cv3 = Conv(2 * c_, c2, 1) - + self.m = AdaHGComputation( + embed_dim=c_, num_hyperedges=num_hyperedges, num_heads=num_heads, dropout=0.1, context=context + ) + self.cv3 = Conv(2 * c_, c2, 1) + def forward(self, x): return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) + class FuseModule(nn.Module): """ A module to fuse multi-scale features for the HyperACE block. @@ -1785,10 +1788,11 @@ class FuseModule(nn.Module): >>> print(output.shape) torch.Size([2, 64, 32, 32]) """ + def __init__(self, c_in, channel_adjust): super(FuseModule, self).__init__() self.downsample = nn.AvgPool2d(kernel_size=2) - self.upsample = nn.Upsample(scale_factor=2, mode='nearest') + self.upsample = nn.Upsample(scale_factor=2, mode="nearest") if channel_adjust: self.conv_out = Conv(4 * c_in, c_in, 1) else: @@ -1801,6 +1805,7 @@ def forward(self, x): out = self.conv_out(x_cat) return out + class HyperACE(nn.Module): """ Hypergraph-based Adaptive Correlation Enhancement (HyperACE). @@ -1833,18 +1838,32 @@ class HyperACE(nn.Module): >>> print(output.shape) torch.Size([2, 256, 32, 32]) """ - def __init__(self, c1, c2, n=1, num_hyperedges=8, dsc3k=True, shortcut=False, e1=0.5, e2=1, context="both", channel_adjust=True): + + def __init__( + self, + c1, + c2, + n=1, + num_hyperedges=8, + dsc3k=True, + shortcut=False, + e1=0.5, + e2=1, + context="both", + channel_adjust=True, + ): super().__init__() - self.c = int(c2 * e1) + self.c = int(c2 * e1) self.cv1 = Conv(c1, 3 * self.c, 1, 1) - self.cv2 = Conv((4 + n) * self.c, c2, 1) + self.cv2 = Conv((4 + n) * self.c, c2, 1) self.m = nn.ModuleList( - DSC3k(self.c, self.c, 2, shortcut, k1=3, k2=7) if dsc3k else DSBottleneck(self.c, self.c, shortcut=shortcut) for _ in range(n) + DSC3k(self.c, self.c, 2, shortcut, k1=3, k2=7) if dsc3k else DSBottleneck(self.c, self.c, shortcut=shortcut) + for _ in range(n) ) self.fuse = FuseModule(c1, channel_adjust) self.branch1 = C3AH(self.c, self.c, e2, num_hyperedges, context) self.branch2 = C3AH(self.c, self.c, e2, num_hyperedges, context) - + def forward(self, X): x = self.fuse(X) y = list(self.cv1(x).chunk(3, 1)) @@ -1855,6 +1874,7 @@ def forward(self, X): y.append(out2) return self.cv2(torch.cat(y, 1)) + class DownsampleConv(nn.Module): """ A simple downsampling block with optional channel adjustment. @@ -1877,17 +1897,19 @@ class DownsampleConv(nn.Module): >>> print(output.shape) torch.Size([2, 128, 16, 16]) """ + def __init__(self, in_channels, channel_adjust=True): super().__init__() self.downsample = nn.AvgPool2d(kernel_size=2) if channel_adjust: self.channel_adjust = Conv(in_channels, in_channels * 2, 1) else: - self.channel_adjust = nn.Identity() + self.channel_adjust = nn.Identity() def forward(self, x): return self.channel_adjust(self.downsample(x)) + class FullPAD_Tunnel(nn.Module): """ A gated fusion module for the Full-Pipeline Aggregation-and-Distribution (FullPAD) paradigm. @@ -1908,9 +1930,11 @@ class FullPAD_Tunnel(nn.Module): >>> print(output.shape) torch.Size([2, 64, 32, 32]) """ + def __init__(self): super().__init__() self.gate = nn.Parameter(torch.tensor(0.0)) + def forward(self, x): out = x[0] + self.gate * x[1] return out diff --git a/ultralytics/utils/dist.py b/ultralytics/utils/dist.py index 8b7e5bbe4..ffbd37eaf 100644 --- a/ultralytics/utils/dist.py +++ b/ultralytics/utils/dist.py @@ -1,45 +1,82 @@ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license +from __future__ import annotations + import os import shutil import socket import sys import tempfile +from pathlib import Path from . import USER_CONFIG_DIR from .torch_utils import TORCH_1_9 +PROJECT_ROOT = Path(__file__).resolve().parents[2] -def find_free_network_port() -> int: - """ - Finds a free port on localhost. - It is useful in single-node training when we don't want to connect to a real main node but have to set the - `MASTER_PORT` environment variable. - """ +def find_free_network_port() -> int: + """Find a free port on localhost.""" with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind(("127.0.0.1", 0)) - return s.getsockname()[1] # port + return s.getsockname()[1] + + +def _serialize_overrides(overrides: dict) -> dict: + """Serialize trainer overrides for DDP subprocess compatibility.""" + serialized = overrides.copy() + augmentations = serialized.get("augmentations") + if augmentations is not None: + try: + import albumentations as A + + serialized["augmentations"] = [A.to_dict(t) for t in augmentations] + serialized["_augmentations_serialized"] = True + except Exception: + serialized["augmentations"] = None + serialized["_augmentations_serialized"] = False + return serialized def generate_ddp_file(trainer): - """Generates a DDP file and returns its file name.""" + """Generate temporary python entrypoint for DDP subprocess.""" module, name = f"{trainer.__class__.__module__}.{trainer.__class__.__name__}".rsplit(".", 1) + overrides = _serialize_overrides(vars(trainer.args)) content = f""" # Ultralytics Multi-GPU training temp file (should be automatically deleted after use) -overrides = {vars(trainer.args)} +import sys +from pathlib import Path, PosixPath, WindowsPath + +repo_root = Path(r"{PROJECT_ROOT}") +if str(repo_root) not in sys.path: + sys.path.insert(0, str(repo_root)) + +import os + +overrides = {overrides} + +if "use_turing_flash" in overrides: + os.environ["Y13_USE_TURING_FLASH"] = "1" if overrides["use_turing_flash"] else "0" +if "force_disable_flash" in overrides: + os.environ["Y13_DISABLE_FLASH"] = "1" if overrides["force_disable_flash"] else "0" if __name__ == "__main__": from {module} import {name} from ultralytics.utils import DEFAULT_CFG_DICT + if overrides.pop("_augmentations_serialized", False) and overrides.get("augmentations") is not None: + import albumentations as A + + overrides["augmentations"] = [A.from_dict(t) for t in overrides["augmentations"]] + cfg = DEFAULT_CFG_DICT.copy() - cfg.update(save_dir='') # handle the extra key 'save_dir' + cfg.update(save_dir='') trainer = {name}(cfg=cfg, overrides=overrides) trainer.args.model = "{getattr(trainer.hub_session, "model_url", trainer.args.model)}" - results = trainer.train() + trainer.train() """ + (USER_CONFIG_DIR / "DDP").mkdir(exist_ok=True) with tempfile.NamedTemporaryFile( prefix="_temp_", @@ -54,11 +91,11 @@ def generate_ddp_file(trainer): def generate_ddp_command(world_size, trainer): - """Generates and returns command for distributed training.""" - import __main__ # noqa local import to avoid https://github.com/Lightning-AI/lightning/issues/15218 + """Generate command tuple for distributed training.""" + import __main__ # noqa: F401 - if not trainer.resume: - shutil.rmtree(trainer.save_dir) # remove the save_dir + if not trainer.resume and trainer.save_dir.exists(): + shutil.rmtree(trainer.save_dir, ignore_errors=True) file = generate_ddp_file(trainer) dist_cmd = "torch.distributed.run" if TORCH_1_9 else "torch.distributed.launch" port = find_free_network_port() @@ -67,6 +104,6 @@ def generate_ddp_command(world_size, trainer): def ddp_cleanup(trainer, file): - """Delete temp file if created.""" - if f"{id(trainer)}.py" in file: # if temp_file suffix in file + """Delete temp DDP file if created.""" + if f"{id(trainer)}.py" in file and os.path.exists(file): os.remove(file) diff --git a/ultralytics/utils/flash_turing_interface.py b/ultralytics/utils/flash_turing_interface.py new file mode 100644 index 000000000..5ada6ce59 --- /dev/null +++ b/ultralytics/utils/flash_turing_interface.py @@ -0,0 +1,68 @@ +# Ultralytics AGPL-3.0 License + +from __future__ import annotations + +from typing import Optional, Tuple + +import torch + +import flash_attn_turing as flash_attn_gpu + + +def _maybe_contiguous(x: Optional[torch.Tensor]) -> Optional[torch.Tensor]: + return x.contiguous() if x is not None and x.stride(-1) != 1 else x + + +def _flash_attn_forward( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + softmax_scale: float, + causal: bool, +) -> Tuple[torch.Tensor, torch.Tensor]: + q, k, v = [_maybe_contiguous(x) for x in (q, k, v)] + out, lse = flash_attn_gpu.fwd(q, k, v, softmax_scale, causal) + return out, lse + + +def _flash_attn_backward( + dout: torch.Tensor, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + out: torch.Tensor, + lse: torch.Tensor, + softmax_scale: float, + causal: bool, +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + dout, q, k, v, out, lse = [_maybe_contiguous(x) for x in (dout, q, k, v, out, lse)] + dq, dk, dv = flash_attn_gpu.bwd(q, k, v, out, lse, dout, softmax_scale, causal) + return dq, dk, dv + + +class _FlashAttnFunc(torch.autograd.Function): + @staticmethod + def forward(ctx, q, k, v, softmax_scale: Optional[float], causal: bool): + softmax_scale = q.shape[-1] ** (-0.5) if softmax_scale is None else softmax_scale + out, lse = _flash_attn_forward(q, k, v, softmax_scale, causal) + if any(x.requires_grad for x in (q, k, v)): + ctx.save_for_backward(q, k, v, out, lse) + ctx.softmax_scale = softmax_scale + ctx.causal = causal + return out + + @staticmethod + def backward(ctx, dout): + q, k, v, out, lse = ctx.saved_tensors + dq, dk, dv = _flash_attn_backward(dout, q, k, v, out, lse, ctx.softmax_scale, ctx.causal) + return dq, dk, dv, None, None + + +def flash_attn_func( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + softmax_scale: Optional[float] = None, + causal: bool = False, +) -> torch.Tensor: + return _FlashAttnFunc.apply(q, k, v, softmax_scale, causal) diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py index a5fbff886..a742891c8 100644 --- a/ultralytics/utils/metrics.py +++ b/ultralytics/utils/metrics.py @@ -741,7 +741,7 @@ def map(self): def mean_results(self): """Mean of results, return mp, mr, map50, map.""" - return [self.mp, self.mr, self.map50, self.map75,self.map] + return [self.mp, self.mr, self.map50, self.map75, self.map] def class_result(self, i): """Class-aware result, return p[i], r[i], ap50[i], ap[i].""" @@ -865,7 +865,13 @@ def process(self, tp, conf, pred_cls, target_cls): @property def keys(self): """Returns a list of keys for accessing specific metrics.""" - return ["metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP75(B)", "metrics/mAP50-95(B)"] + return [ + "metrics/precision(B)", + "metrics/recall(B)", + "metrics/mAP50(B)", + "metrics/mAP75(B)", + "metrics/mAP50-95(B)", + ] def mean_results(self): """Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95.""" @@ -1267,14 +1273,20 @@ def process(self, tp, conf, pred_cls, target_cls): @property def keys(self): """Returns a list of keys for accessing specific metrics.""" - return ["metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP50-95(B)"] + return [ + "metrics/precision(B)", + "metrics/recall(B)", + "metrics/mAP50(B)", + "metrics/mAP75(B)", + "metrics/mAP50-95(B)", + ] def mean_results(self): - """Calculate mean of detected objects & return precision, recall, mAP50, and mAP50-95.""" + """Calculate mean results and return precision, recall, mAP50, mAP75, and mAP50-95.""" return self.box.mean_results() def class_result(self, i): - """Return the result of evaluating the performance of an object detection model on a specific class.""" + """Return per-class precision, recall, AP50, AP75, and AP50-95.""" return self.box.class_result(i) @property From 4e20b0c44459434bc88d385da0cfde6edd40619e Mon Sep 17 00:00:00 2001 From: ahmedelbamby-aast Date: Fri, 3 Apr 2026 01:19:15 +0200 Subject: [PATCH 2/3] Update ultralytics/utils/metrics.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- ultralytics/utils/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py index a742891c8..5579f6a30 100644 --- a/ultralytics/utils/metrics.py +++ b/ultralytics/utils/metrics.py @@ -740,7 +740,7 @@ def map(self): return self.all_ap.mean() if len(self.all_ap) else 0.0 def mean_results(self): - """Mean of results, return mp, mr, map50, map.""" + """Mean of results, return mp, mr, map50, map75, map.""" return [self.mp, self.mr, self.map50, self.map75, self.map] def class_result(self, i): From c0bf19091a194bfb6f52c65e4b9211adc4650695 Mon Sep 17 00:00:00 2001 From: ahmedelbamby-aast Date: Fri, 3 Apr 2026 01:20:10 +0200 Subject: [PATCH 3/3] Update ultralytics/data/utils.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- ultralytics/data/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index 59ad043dc..ad06f89f6 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -331,7 +331,7 @@ def _add_from_dir(d: Path): except Exception: im_files = [] for im in im_files: - lb = Path(str(im).replace(f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}")).with_suffix(".txt") + lb = Path(img2label_paths([str(im)])[0]) if lb.exists(): s = str(lb) if s not in seen: