|
31 | 31 | #: Legacy 选船列表左侧裁剪宽度 (px@1280) |
32 | 32 | LEGACY_LIST_WIDTH: int = 1048 |
33 | 33 |
|
34 | | -_LEVEL_PATTERN = re.compile(r'[Ll][Vv]\.?\s*(\d+)') |
| 34 | +_LEVEL_PATTERN = re.compile(r'[Ll][Vv]\.?\s*([0-9ILilOo]{1,6})') |
| 35 | +_LEVEL_NOISY_PATTERN = re.compile(r'(?:[LlIi1O0][VvYy])[\.:]?\s*([0-9ILilOo]{1,6})') |
| 36 | +_MAX_LEVEL_VALUE = 200 |
| 37 | +_MAX_LEVEL_NOISE_CHARS = 1 |
| 38 | +_MAX_NOISY_LEVEL_HITS_BEFORE_RETRY = 1 |
| 39 | + |
| 40 | + |
| 41 | +class LevelOCRRetryNeededError(RuntimeError): |
| 42 | + """等级 OCR 噪声过高,需要重新截图识别。""" |
35 | 43 |
|
36 | 44 |
|
37 | 45 | def to_legacy_format(screen: np.ndarray) -> tuple[np.ndarray, float, float]: |
@@ -169,15 +177,151 @@ def recognize_ships_in_list( |
169 | 177 |
|
170 | 178 | def _parse_level(text: str) -> int | None: |
171 | 179 | """从 OCR 文本中提取 ``Lv.XX`` 格式等级数字。""" |
172 | | - m = _LEVEL_PATTERN.search(text) |
| 180 | + level, _need_retry = _parse_level_with_status(text) |
| 181 | + return level |
| 182 | + |
| 183 | + |
| 184 | +def _parse_level_with_status(text: str) -> tuple[int | None, bool]: |
| 185 | + """解析等级并返回是否应触发重识别。""" |
| 186 | + compact = text.strip().replace(' ', '') |
| 187 | + |
| 188 | + m = _LEVEL_PATTERN.search(compact) |
173 | 189 | if m: |
174 | | - try: |
175 | | - return int(m.group(1)) |
176 | | - except ValueError: |
177 | | - return None |
| 190 | + raw_digits = m.group(1) |
| 191 | + if _noise_char_count(raw_digits) > _MAX_LEVEL_NOISE_CHARS: |
| 192 | + return None, True |
| 193 | + level = _coerce_level_digits(raw_digits) |
| 194 | + if level is not None: |
| 195 | + return level, False |
| 196 | + |
| 197 | + m2 = _LEVEL_NOISY_PATTERN.search(compact) |
| 198 | + if m2: |
| 199 | + raw_digits = m2.group(1) |
| 200 | + if _noise_char_count(raw_digits) > _MAX_LEVEL_NOISE_CHARS: |
| 201 | + return None, True |
| 202 | + level = _coerce_level_digits(raw_digits) |
| 203 | + if level is not None: |
| 204 | + return level, False |
| 205 | + |
| 206 | + return None, False |
| 207 | + |
| 208 | + |
| 209 | +def _noise_char_count(raw_digits: str) -> int: |
| 210 | + return sum(1 for ch in raw_digits if ch in 'ILilOo') |
| 211 | + |
| 212 | + |
| 213 | +def _coerce_level_digits(raw_digits: str) -> int | None: |
| 214 | + """将 OCR 提取出的数字串映射为合法等级值。""" |
| 215 | + trans = str.maketrans( |
| 216 | + { |
| 217 | + 'I': '1', |
| 218 | + 'i': '1', |
| 219 | + 'l': '1', |
| 220 | + 'L': '1', |
| 221 | + 'O': '0', |
| 222 | + 'o': '0', |
| 223 | + } |
| 224 | + ) |
| 225 | + normalized = raw_digits.translate(trans) |
| 226 | + digits = ''.join(ch for ch in normalized if ch.isdigit()) |
| 227 | + if not digits: |
| 228 | + return None |
| 229 | + |
| 230 | + candidates: list[int] = [] |
| 231 | + |
| 232 | + # 先尝试前 3 位(常见误读: 1046 -> 104, 110544 -> 110) |
| 233 | + if len(digits) >= 3: |
| 234 | + candidates.append(int(digits[:3])) |
| 235 | + if len(digits) >= 2: |
| 236 | + candidates.append(int(digits[:2])) |
| 237 | + candidates.append(int(digits[:1])) |
| 238 | + |
| 239 | + # 兼容前导 0 的场景(如 051 -> 51) |
| 240 | + if digits.startswith('0') and len(digits) >= 3: |
| 241 | + candidates.insert(0, int(digits[1:3])) |
| 242 | + |
| 243 | + seen_vals: set[int] = set() |
| 244 | + for value in candidates: |
| 245 | + if value in seen_vals: |
| 246 | + continue |
| 247 | + seen_vals.add(value) |
| 248 | + if 1 <= value <= _MAX_LEVEL_VALUE: |
| 249 | + return value |
| 250 | + |
178 | 251 | return None |
179 | 252 |
|
180 | 253 |
|
| 254 | +def _center_x(bbox: tuple[int, int, int, int] | None, width: int) -> float: |
| 255 | + if bbox is None: |
| 256 | + return width / 2 |
| 257 | + x1, _, x2, _ = bbox |
| 258 | + return (x1 + x2) / 2 |
| 259 | + |
| 260 | + |
| 261 | +def _probe_level_near_name( |
| 262 | + ocr: OCREngine, |
| 263 | + screen: np.ndarray, |
| 264 | + *, |
| 265 | + y_start: int, |
| 266 | + y_end: int, |
| 267 | + name_x: float, |
| 268 | + max_x: int, |
| 269 | +) -> int | None: |
| 270 | + """在同一 y 行按舰名 x 位置裁剪区域,二次识别等级。""" |
| 271 | + h, w = screen.shape[:2] |
| 272 | + row_h = max(1, y_end - y_start) |
| 273 | + |
| 274 | + x_pad = max(70, int(w * 0.045)) |
| 275 | + x0 = max(0, int(name_x - x_pad)) |
| 276 | + x1 = min(max_x, int(name_x + x_pad)) |
| 277 | + |
| 278 | + y0 = max(0, y_start - int(row_h * 1.6)) |
| 279 | + y1 = min(h, y_end + int(row_h * 0.4)) |
| 280 | + |
| 281 | + if x1 <= x0 or y1 <= y0: |
| 282 | + return None |
| 283 | + |
| 284 | + roi = screen[y0:y1, x0:x1] |
| 285 | + if roi.size == 0: |
| 286 | + return None |
| 287 | + |
| 288 | + parsed_levels: list[int] = [] |
| 289 | + noisy_level_hits = 0 |
| 290 | + |
| 291 | + def collect_levels(img: np.ndarray) -> None: |
| 292 | + nonlocal noisy_level_hits |
| 293 | + results = ocr.recognize(img, allowlist='LlVvIiYy0Oo1.:-/0123456789') |
| 294 | + for r in results: |
| 295 | + text = r.text.strip() |
| 296 | + if not text: |
| 297 | + continue |
| 298 | + level, need_retry = _parse_level_with_status(text) |
| 299 | + if need_retry: |
| 300 | + noisy_level_hits += 1 |
| 301 | + continue |
| 302 | + if level is not None: |
| 303 | + parsed_levels.append(level) |
| 304 | + |
| 305 | + collect_levels(roi) |
| 306 | + |
| 307 | + gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY) |
| 308 | + up = cv2.resize(gray, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC) |
| 309 | + norm = cv2.normalize(up, None, 0, 255, cv2.NORM_MINMAX) |
| 310 | + binary = cv2.threshold(norm, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] |
| 311 | + binary_rgb = cv2.cvtColor(binary, cv2.COLOR_GRAY2RGB) |
| 312 | + collect_levels(binary_rgb) |
| 313 | + |
| 314 | + if not parsed_levels and noisy_level_hits > _MAX_NOISY_LEVEL_HITS_BEFORE_RETRY: |
| 315 | + raise LevelOCRRetryNeededError( |
| 316 | + f'等级 OCR 噪声过高: {noisy_level_hits} 条异常等级文本 (阈值 {_MAX_NOISY_LEVEL_HITS_BEFORE_RETRY})', |
| 317 | + ) |
| 318 | + |
| 319 | + if not parsed_levels: |
| 320 | + return None |
| 321 | + |
| 322 | + return max(parsed_levels) |
| 323 | + |
| 324 | + |
181 | 325 | def read_ship_levels( |
182 | 326 | ocr: OCREngine, |
183 | 327 | screen: np.ndarray, |
@@ -230,43 +374,80 @@ def read_ship_levels( |
230 | 374 | for y_start_720, y_end_720 in rows: |
231 | 375 | y_start = max(0, int((y_start_720 - 1) * scale_y)) |
232 | 376 | y_end = min(h, int((y_end_720 + 1) * scale_y)) |
| 377 | + row_key = round((y_start + y_end) / 2 / h, 4) |
233 | 378 |
|
234 | 379 | row_img = list_area_native[y_start:y_end] |
235 | 380 | results = ocr.recognize(row_img) |
236 | 381 |
|
237 | | - row_name: str | None = None |
238 | | - row_level: int | None = None |
| 382 | + name_hits: list[tuple[str, float]] = [] |
| 383 | + local_level_hits: list[tuple[int, float]] = [] |
239 | 384 |
|
240 | 385 | for r in results: |
241 | 386 | text = r.text.strip() |
242 | 387 | if not text: |
243 | 388 | continue |
244 | 389 |
|
245 | | - # 尝试匹配等级 |
246 | | - if row_level is None: |
247 | | - level = _parse_level(text) |
248 | | - if level is not None: |
249 | | - row_level = level |
| 390 | + x_center = _center_x(r.bbox, row_img.shape[1]) |
250 | 391 |
|
251 | | - # 尝试匹配舰船名 |
252 | | - if row_name is None: |
253 | | - name = _fuzzy_match(text, SHIPNAMES) |
254 | | - if name is not None and name not in seen: |
255 | | - row_name = name |
| 392 | + level = _parse_level(text) |
| 393 | + if level is not None: |
| 394 | + local_level_hits.append((level, x_center)) |
256 | 395 |
|
257 | | - if row_name is not None: |
| 396 | + name = _fuzzy_match(text, SHIPNAMES) |
| 397 | + if name is not None: |
| 398 | + name_hits.append((name, x_center)) |
| 399 | + |
| 400 | + if not name_hits: |
| 401 | + continue |
| 402 | + |
| 403 | + name_hits.sort(key=lambda item: item[1]) |
| 404 | + local_level_hits.sort(key=lambda item: item[1]) |
| 405 | + max_pair_dist = max(80.0, row_img.shape[1] * 0.12) |
| 406 | + |
| 407 | + for row_name, name_x in name_hits: |
258 | 408 | if deduplicate_by_name and row_name in seen: |
259 | 409 | continue |
| 410 | + |
| 411 | + row_level: int | None = None |
| 412 | + |
| 413 | + best_level: int | None = None |
| 414 | + best_dist = float('inf') |
| 415 | + for candidate_level, candidate_x in local_level_hits: |
| 416 | + dist = abs(candidate_x - name_x) |
| 417 | + if dist < best_dist: |
| 418 | + best_dist = dist |
| 419 | + best_level = candidate_level |
| 420 | + |
| 421 | + if best_level is not None and best_dist <= max_pair_dist: |
| 422 | + row_level = best_level |
| 423 | + |
| 424 | + if row_level is None: |
| 425 | + probe_level = _probe_level_near_name( |
| 426 | + ocr, |
| 427 | + screen, |
| 428 | + y_start=y_start, |
| 429 | + y_end=y_end, |
| 430 | + name_x=name_x, |
| 431 | + max_x=list_w_native, |
| 432 | + ) |
| 433 | + if probe_level is not None: |
| 434 | + row_level = probe_level |
| 435 | + |
260 | 436 | if deduplicate_by_name: |
261 | 437 | seen.add(row_name) |
262 | | - row_key = round((y_start + y_end) / 2 / h, 4) |
| 438 | + _log.debug( |
| 439 | + '[选船列表] 等级识别命中: name={} level={} row_key={}', |
| 440 | + row_name, |
| 441 | + row_level if row_level is not None else 'None', |
| 442 | + row_key, |
| 443 | + ) |
263 | 444 | if include_row_key: |
264 | 445 | found.append((row_name, row_level, row_key)) |
265 | 446 | else: |
266 | 447 | found.append((row_name, row_level)) |
267 | 448 |
|
268 | 449 | _log.debug( |
269 | 450 | '[选船列表] 等级识别: {}', |
270 | | - [(n, lv) for n, lv in found], |
| 451 | + [(entry[0], entry[1]) for entry in found], |
271 | 452 | ) |
272 | 453 | return found |
0 commit comments