@@ -119,7 +119,12 @@ def __init__(self, session, class_names: dict):
119119 self ._input_w = shape [3 ] if isinstance (shape [3 ], int ) else 640
120120
121121 def __call__ (self , source , conf : float = 0.25 , verbose : bool = True , ** kwargs ):
122- """Run inference on an image path or PIL Image."""
122+ """Run inference on an image path or PIL Image.
123+
124+ All models use onnx-community HuggingFace format:
125+ outputs[0] = logits [1, 300, 80] (raw, pre-sigmoid)
126+ outputs[1] = pred_boxes [1, 300, 4] (cx, cy, w, h normalized 0..1)
127+ """
123128 import numpy as np
124129 from PIL import Image
125130
@@ -150,31 +155,36 @@ def __call__(self, source, conf: float = 0.25, verbose: bool = True, **kwargs):
150155
151156 # Run inference
152157 outputs = self .session .run (None , {self ._input_name : blob })
153- preds = outputs [0 ] # shape: [1, num_detections, 6]
158+ logits = outputs [0 ][0 ] # [300, 80] raw class logits
159+ pred_boxes = outputs [1 ][0 ] # [300, 4] cx, cy, w, h (normalized 0..1)
160+
161+ # Sigmoid → class probabilities
162+ probs = 1.0 / (1.0 + np .exp (- logits ))
154163
155- # Parse detections: [x1, y1, x2, y2, confidence, class_id]
164+ # Parse detections
156165 boxes = []
157- for det in preds [0 ]:
158- det_conf = float (det [4 ])
166+ for i in range (len (pred_boxes )):
167+ cls_id = int (np .argmax (probs [i ]))
168+ det_conf = float (probs [i ][cls_id ])
159169 if det_conf < conf :
160170 continue
161171
162- # Scale coordinates back to original image space
163- x1 = (float (det [0 ]) - pad_x ) / scale
164- y1 = (float (det [1 ]) - pad_y ) / scale
165- x2 = (float (det [2 ]) - pad_x ) / scale
166- y2 = (float (det [3 ]) - pad_y ) / scale
172+ # cx,cy,w,h (normalized) → x1,y1,x2,y2 (original image pixels)
173+ cx , cy , bw , bh = pred_boxes [i ]
174+ px_cx = cx * self ._input_w
175+ px_cy = cy * self ._input_h
176+ px_w = bw * self ._input_w
177+ px_h = bh * self ._input_h
167178
168- # Clip to image bounds
169- x1 = max (0 , min (x1 , orig_w ))
170- y1 = max (0 , min (y1 , orig_h ))
171- x2 = max (0 , min (x2 , orig_w ))
172- y2 = max (0 , min (y2 , orig_h ))
179+ x1 = max (0 , min ((px_cx - px_w / 2 - pad_x ) / scale , orig_w ))
180+ y1 = max (0 , min ((px_cy - px_h / 2 - pad_y ) / scale , orig_h ))
181+ x2 = max (0 , min ((px_cx + px_w / 2 - pad_x ) / scale , orig_w ))
182+ y2 = max (0 , min ((px_cy + px_h / 2 - pad_y ) / scale , orig_h ))
173183
174184 boxes .append (_BoxResult (
175185 xyxy = np .array ([[x1 , y1 , x2 , y2 ]]),
176186 conf = np .array ([det_conf ]),
177- cls = np .array ([int ( det [ 5 ]) ]),
187+ cls = np .array ([cls_id ]),
178188 ))
179189
180190 return [_DetResult (boxes )]
0 commit comments