|
94 | 94 | report_dir_path, |
95 | 95 | should_skip_by_timestamp, |
96 | 96 | status_json_path, |
| 97 | + update_api_latest_dates, |
97 | 98 | upsert_product_status, |
98 | 99 | write_local_timestamp, |
99 | 100 | ) |
@@ -889,6 +890,108 @@ def _maybe_run_coin_preprocess( |
889 | 890 | # 无论哪个分支退出,统一在此累加后处理阶段耗时 |
890 | 891 | report.phase_postprocess_seconds += max(0.0, time.time() - phase_start) |
891 | 892 |
|
| 893 | +def _prefetch_api_dates( |
| 894 | + products: List[str], |
| 895 | + api_base: str, |
| 896 | + hid: str, |
| 897 | + headers: Dict[str, str], |
| 898 | + log_dir: Path, |
| 899 | + max_workers: int = 8, |
| 900 | +) -> Dict[str, Tuple[str, str]]: |
| 901 | + """并发预取产品的 API 最新日期,写入缓存并返回。 |
| 902 | +
|
| 903 | + 已在缓存中且未过期的产品跳过。失败的产品静默跳过, |
| 904 | + Plan 阶段会回退到逐产品 HTTP 查询。 |
| 905 | + """ |
| 906 | + import threading |
| 907 | + from concurrent.futures import as_completed as _as_completed |
| 908 | + |
| 909 | + # 1. 读现有缓存,筛出需要查询的产品 |
| 910 | + existing_cache = load_api_latest_dates(log_dir) |
| 911 | + now = datetime.now() |
| 912 | + uncached = [] |
| 913 | + for product in products: |
| 914 | + cached = existing_cache.get(product) |
| 915 | + if cached: |
| 916 | + _, checked_at_str = cached |
| 917 | + if "T" in checked_at_str: |
| 918 | + try: |
| 919 | + checked_at = datetime.strptime(checked_at_str, "%Y-%m-%dT%H:%M:%S") |
| 920 | + if (now - checked_at).total_seconds() < API_DATE_CACHE_TTL_SECONDS: |
| 921 | + continue # 缓存新鲜,跳过 |
| 922 | + except ValueError: |
| 923 | + pass |
| 924 | + uncached.append(product) |
| 925 | + |
| 926 | + if not uncached: |
| 927 | + log_info( |
| 928 | + f"[预取] 全部 {len(products)} 个产品缓存命中,跳过 HTTP", |
| 929 | + event="PREFETCH", decision="all_cached", |
| 930 | + ) |
| 931 | + return existing_cache |
| 932 | + |
| 933 | + # 2. 并发预取未命中的产品 |
| 934 | + log_info( |
| 935 | + f"[预取] 并发查询 {len(uncached)}/{len(products)} 个产品", |
| 936 | + event="PREFETCH", decision="fetching", |
| 937 | + ) |
| 938 | + fetched: Dict[str, str] = {} |
| 939 | + abort_event = threading.Event() |
| 940 | + t_start = time.time() |
| 941 | + |
| 942 | + def _fetch_one(product: str) -> Tuple[str, Optional[str]]: |
| 943 | + """单产品 HTTP 查询,401/403 触发全局中止。""" |
| 944 | + if abort_event.is_set(): |
| 945 | + return product, None |
| 946 | + try: |
| 947 | + date_str = get_latest_time(api_base, product, hid, headers) |
| 948 | + return product, date_str |
| 949 | + except FatalRequestError as exc: |
| 950 | + # 认证失败时中止整个预取 |
| 951 | + if exc.status_code in (401, 403): |
| 952 | + abort_event.set() |
| 953 | + return product, None |
| 954 | + except Exception: |
| 955 | + return product, None |
| 956 | + |
| 957 | + effective_workers = min(max_workers, len(uncached)) |
| 958 | + executor = ThreadPoolExecutor(max_workers=effective_workers) |
| 959 | + try: |
| 960 | + futures = {executor.submit(_fetch_one, p): p for p in uncached} |
| 961 | + for future in _as_completed(futures, timeout=30): |
| 962 | + try: |
| 963 | + product, date_str = future.result() |
| 964 | + if date_str: |
| 965 | + fetched[product] = date_str |
| 966 | + except Exception: |
| 967 | + pass |
| 968 | + if abort_event.is_set(): |
| 969 | + break |
| 970 | + except TimeoutError: |
| 971 | + log_info("[预取] 超时,放弃剩余查询", event="PREFETCH", decision="timeout") |
| 972 | + finally: |
| 973 | + executor.shutdown(wait=False, cancel_futures=True) |
| 974 | + |
| 975 | + elapsed = time.time() - t_start |
| 976 | + log_info( |
| 977 | + f"[预取] 完成,成功 {len(fetched)}/{len(uncached)},耗时 {elapsed:.1f}s", |
| 978 | + event="PREFETCH", decision="done", |
| 979 | + ) |
| 980 | + |
| 981 | + # 3. 持久化并返回内存合并的缓存(不重读文件,避免竞争和过期条目泄漏) |
| 982 | + if fetched: |
| 983 | + try: |
| 984 | + update_api_latest_dates(log_dir, fetched) |
| 985 | + except Exception: |
| 986 | + pass |
| 987 | + # 合并:保留新鲜的已有缓存 + 刚预取的结果 |
| 988 | + checked_at_now = datetime.now().strftime("%Y-%m-%dT%H:%M:%S") |
| 989 | + merged: Dict[str, Tuple[str, str]] = dict(existing_cache) |
| 990 | + for product, date_str in fetched.items(): |
| 991 | + merged[product] = (date_str, checked_at_now) |
| 992 | + return merged |
| 993 | + |
| 994 | + |
892 | 995 | def _execute_plans( |
893 | 996 | plans: Sequence[ProductPlan], |
894 | 997 | command_ctx: CommandContext, |
@@ -922,8 +1025,15 @@ def _execute_plans( |
922 | 1025 | has_error = False |
923 | 1026 | t_run_start = time.time() |
924 | 1027 |
|
925 | | - # 加载 API 日期缓存(check_updates 写入的),新鲜时跳过门控 HTTP |
926 | | - _api_date_cache = load_api_latest_dates(report_dir_path(command_ctx.data_root)) |
| 1028 | + # 并发预取所有产品的 API 最新日期,写入缓存供 Plan 阶段命中(替代单次 load_api_latest_dates) |
| 1029 | + product_names = [normalize_product_name(p.name) for p in plans] |
| 1030 | + _api_date_cache = _prefetch_api_dates( |
| 1031 | + products=product_names, |
| 1032 | + api_base=command_ctx.api_base.rstrip("/"), |
| 1033 | + hid=hid, |
| 1034 | + headers=headers, |
| 1035 | + log_dir=report_dir_path(command_ctx.data_root), |
| 1036 | + ) |
927 | 1037 |
|
928 | 1038 | # stop-on-error 要求严格顺序控制,强制串行 |
929 | 1039 | effective_workers = max(1, max_workers) if not command_ctx.stop_on_error else 1 |
|
0 commit comments