Skip to content

Commit c8ed98b

Browse files
authored
Merge pull request #167 from helpfulengineering/fix/direct-matching-distance
in direct matching layer, false positives could cause failure mode in which NLP layer is not invoked. It should instead be used as a second opinion on every call unless either a strong direct match is found, or explicitly disabled
2 parents 0998609 + a3bb939 commit c8ed98b

5 files changed

Lines changed: 776 additions & 196 deletions

File tree

.repo-map.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,10 @@ Total Python files: 310
525525
│ │ │ def _build_optional_human_summary()
526526
│ │ │ def _build_match_suggestions()
527527
│ │ │ def _detect_domain_from_manifest()
528+
│ │ │ def _resolve_matching_local_okw_json_dir()
529+
│ │ │ def _load_facilities_from_local_okw_json_dir()
530+
│ │ │ def _extract_facility_process_names_for_prefilter()
531+
│ │ │ def _prefilter_facilities_by_required_processes()
528532
│ │ │ def _matches_filters()
529533
│ │ ├── okh.py
530534
│ │ ├── okw.py
@@ -1578,7 +1582,7 @@ The converte...
15781582

15791583
Orchestrates direc...
15801584

1581-
**Internal Dependencies:** 1 imports
1585+
**Internal Dependencies:** 4 imports
15821586

15831587
### `src/core/services/okh_service.py`
15841588

@@ -3972,7 +3976,7 @@ This module combines the functionality fr...
39723976
- `render_match_summary(match_summary)`
39733977
- Render a compact human-readable line from a structured match summary....
39743978

3975-
**Internal Dependencies:** 5 imports
3979+
**Internal Dependencies:** 7 imports
39763980

39773981
### `src/core/api/routes/okh.py`
39783982

src/config/settings.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,11 @@ def _get_secret_or_env(key: str, default: str = None) -> str:
196196
else:
197197
raise
198198

199+
# When true, skip StorageOrganizer .gitkeep seeding on app startup (no PUTs to remote).
200+
STORAGE_SKIP_DIRECTORY_BOOTSTRAP = _get_secret_or_env(
201+
"STORAGE_SKIP_DIRECTORY_BOOTSTRAP", "false"
202+
).lower() in ("true", "1", "t")
203+
199204
# Cache Configuration
200205
CACHE_ENABLED = _get_secret_or_env("CACHE_ENABLED", "true").lower() in (
201206
"true",
@@ -241,3 +246,34 @@ def _get_secret_or_env(key: str, default: str = None) -> str:
241246
f"MAX_DEPTH ({MAX_DEPTH}) is outside recommended range (0-10). "
242247
"Consider using a value between 1-5 for optimal performance."
243248
)
249+
250+
# NLP veto (second opinion on fuzzy direct + heuristic hits)
251+
MATCHING_NLP_VETO_ENABLED = _get_secret_or_env(
252+
"MATCHING_NLP_VETO_ENABLED", "false"
253+
).lower() in ("true", "1", "t")
254+
MATCHING_NLP_VETO_THRESHOLD = float(
255+
_get_secret_or_env("MATCHING_NLP_VETO_THRESHOLD", "0.2")
256+
)
257+
if not 0.0 <= MATCHING_NLP_VETO_THRESHOLD <= 1.0:
258+
logger.warning(
259+
f"MATCHING_NLP_VETO_THRESHOLD ({MATCHING_NLP_VETO_THRESHOLD}) should be in [0, 1]; "
260+
"clamping may occur at runtime."
261+
)
262+
263+
# When set, match requests with no inline `okw_facilities` load `*.json` from this path
264+
# (recursively) instead of listing remote storage — avoids hanging curls when cloud
265+
# storage is slow or misconfigured. Production should normally leave this unset.
266+
_match_local = _get_secret_or_env("MATCHING_LOCAL_OKW_JSON_DIR", "") or ""
267+
MATCHING_LOCAL_OKW_JSON_DIR = _match_local.strip() if _match_local.strip() else None
268+
269+
# When true (default), MatchingService.initialize() eagerly loads spaCy models for
270+
# each domain. When false, models load on first NLP use — avoids long stalls inside
271+
# FastAPI Depends(get_matching_service) on constrained or cold-start environments.
272+
MATCHING_PREINIT_NLP = _get_secret_or_env("MATCHING_PREINIT_NLP", "true").lower() in (
273+
"true",
274+
"1",
275+
"t",
276+
)
277+
278+
# Do not log MATCHING_* here: this module imports before main.setup_logging(), so
279+
# INFO lines would be dropped by the default root logger (WARNING). Log from lifespan.

src/core/api/models/match/request.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ class MatchRequest(BaseAPIRequest, LLMRequestMixin):
4444
location: Optional[str] = None
4545
capabilities: Optional[List[str]] = None
4646
materials: Optional[List[str]] = None
47+
max_candidate_facilities: Optional[int] = Field(
48+
200,
49+
ge=1,
50+
le=5000,
51+
description=(
52+
"Upper bound on facilities considered during matching after requirement-aware "
53+
"prefiltering. Lower values improve latency on large OKW pools."
54+
),
55+
)
4756

4857
# Advanced filtering parameters
4958
max_distance_km: Optional[float] = None # Distance filter

0 commit comments

Comments
 (0)