Skip to content

Commit 0bd0b0f

Browse files
authored
refactor(compiler): extract assembleQuery helper and modularize services (#27)
- Extract MAX_RESULTS_LIMIT constant in compiler (used in 2 sites) - Extract assembleQuery() helper to centralize query-tail assembly - Extract count-assets service from stats route - Extract validate-slots service from search-factory - Reduces function size and improves testability Signed-off-by: Carlo van Driesten <carlo.van-driesten@bmw.de>
1 parent 266779c commit 0bd0b0f

5 files changed

Lines changed: 135 additions & 76 deletions

File tree

apps/api/src/routes/stats.ts

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
import { extractErrorMessage, internalError } from '@ontology-search/core/errors'
1+
import { internalError } from '@ontology-search/core/errors'
22
import { REQUEST_ID_HEADER, RequestLogger } from '@ontology-search/core/logging'
3-
import { buildDomainRegistry } from '@ontology-search/ontology/domain-registry'
4-
import { compileCountQuery, getAssetDomains, getInitializedStore } from '@ontology-search/search'
53
import { Hono } from 'hono'
64

5+
import { countAssets } from '../services/count-assets.js'
76
import type { AppEnv } from '../types.js'
87

98
export const statsRoutes = new Hono<AppEnv>()
@@ -14,38 +13,11 @@ statsRoutes.get('/', async (c) => {
1413

1514
try {
1615
logger.info('Stats request started')
17-
const store = await getInitializedStore()
18-
const registry = await buildDomainRegistry()
19-
const assetDomains = await getAssetDomains()
20-
21-
const counts: Record<string, number> = {}
22-
let totalAssets = 0
23-
24-
for (const domainName of registry.domainNames) {
25-
if (!assetDomains.has(domainName)) continue
26-
try {
27-
const query = await compileCountQuery(domainName)
28-
const result = await store.query(query)
29-
const count = parseInt(result.results.bindings[0]?.['count']?.value ?? '0', 10)
30-
if (count > 0) {
31-
counts[domainName] = count
32-
totalAssets += count
33-
}
34-
} catch (error) {
35-
// intentional: degraded response — one failing domain count should
36-
// not block the entire /stats response; the domain is simply omitted.
37-
// Surface WHY in the log so operators can investigate (was the SPARQL
38-
// malformed, the store unreachable, a timeout, …).
39-
logger.warn('Skipped domain count', {
40-
domain: domainName,
41-
error: extractErrorMessage(error),
42-
})
43-
}
44-
}
16+
const { counts, totalAssets, availableDomains } = await countAssets(logger)
4517

4618
logger.info('Stats request completed', { totalAssets, domainCount: Object.keys(counts).length })
4719

48-
return c.json({ totalAssets, domains: counts, availableDomains: registry.domainNames }, 200, {
20+
return c.json({ totalAssets, domains: counts, availableDomains }, 200, {
4921
[REQUEST_ID_HEADER]: requestId,
5022
})
5123
} catch (error) {

apps/api/src/search-factory.ts

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,23 @@
88
* Tests construct SearchService directly with mock dependencies.
99
*/
1010
import { generateStructuredSearch } from '@ontology-search/llm'
11-
import { validateRangesAgainstShacl, validateSlotsAgainstShacl } from '@ontology-search/llm'
12-
import { ShaclValidator } from '@ontology-search/ontology/shacl-validator'
1311
import type {
1412
NlSearchOptions,
1513
RefineOptions,
1614
RefineResult,
1715
SearchResult,
18-
SearchSlots,
1916
} from '@ontology-search/search'
2017
import {
2118
compileAllCountQueries,
2219
compileSlots,
23-
extractVocabulary,
2420
getInitializedStore,
2521
type SearchDependencies,
2622
SearchService,
2723
} from '@ontology-search/search'
2824
import { enforceSparqlPolicy } from '@ontology-search/sparql/policy'
2925

26+
import { validateSlots } from './services/validate-slots.js'
27+
3028
let instance: SearchService | null = null
3129

3230
/**
@@ -41,30 +39,7 @@ export async function getSearchService(): Promise<SearchService> {
4139
compileSlots,
4240
compileCountQueries: compileAllCountQueries,
4341
enforcePolicy: enforceSparqlPolicy,
44-
validateSlots: async (slots: SearchSlots): Promise<SearchSlots> => {
45-
// Defense-in-depth gate for /refine: run the same SHACL validator the
46-
// LLM agent uses, then re-emit the slots with any violating values
47-
// dropped. The compiler will see only ontology-valid filters/location.
48-
const shacl = await ShaclValidator.fromWorkspace()
49-
const store = await getInitializedStore()
50-
const vocabulary = await extractVocabulary(store)
51-
const result = await validateSlotsAgainstShacl(
52-
slots.filters ?? {},
53-
slots.location,
54-
slots.license,
55-
shacl,
56-
vocabulary
57-
)
58-
// Drop ranges with property names not in the schema (e.g. `numberLanes`).
59-
const rangeResult = validateRangesAgainstShacl(slots.ranges ?? {}, shacl)
60-
return {
61-
...slots,
62-
filters: result.filters,
63-
ranges: rangeResult.ranges,
64-
location: result.location,
65-
license: result.license,
66-
}
67-
},
42+
validateSlots,
6843
}
6944

7045
instance = new SearchService(deps)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/**
2+
* Count-assets service — tallies assets per domain via SPARQL COUNT queries.
3+
*
4+
* Extracted from the /stats route to keep route handlers as pure
5+
* request/response plumbing (SRP) and enable independent unit testing.
6+
*/
7+
import { extractErrorMessage } from '@ontology-search/core/errors'
8+
import type { RequestLogger } from '@ontology-search/core/logging'
9+
import { buildDomainRegistry } from '@ontology-search/ontology/domain-registry'
10+
import { compileCountQuery, getAssetDomains, getInitializedStore } from '@ontology-search/search'
11+
12+
export interface AssetCounts {
13+
counts: Record<string, number>
14+
totalAssets: number
15+
availableDomains: string[]
16+
}
17+
18+
/**
19+
* Count assets per domain by compiling and executing a COUNT query for each.
20+
*
21+
* Tolerates per-domain failures (degraded response) so a single broken
22+
* domain cannot block the whole /stats response. The logger receives a
23+
* structured `warn` for each skipped domain that names the failing
24+
* domain AND surfaces the underlying error message — operators need
25+
* both to diagnose whether the SPARQL was malformed, the store was
26+
* unreachable, a timeout fired, etc.
27+
*/
28+
export async function countAssets(logger: RequestLogger): Promise<AssetCounts> {
29+
const store = await getInitializedStore()
30+
const registry = await buildDomainRegistry()
31+
const assetDomains = await getAssetDomains()
32+
33+
const counts: Record<string, number> = {}
34+
let totalAssets = 0
35+
36+
for (const domainName of registry.domainNames) {
37+
if (!assetDomains.has(domainName)) continue
38+
try {
39+
const query = await compileCountQuery(domainName)
40+
const result = await store.query(query)
41+
const count = parseInt(result.results.bindings[0]?.['count']?.value ?? '0', 10)
42+
if (count > 0) {
43+
counts[domainName] = count
44+
totalAssets += count
45+
}
46+
} catch (error) {
47+
logger.warn('Skipped domain count', {
48+
domain: domainName,
49+
error: extractErrorMessage(error),
50+
})
51+
}
52+
}
53+
54+
return { counts, totalAssets, availableDomains: registry.domainNames }
55+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/**
2+
* Slot validation service — SHACL-based defense-in-depth for /refine.
3+
*
4+
* Runs the same SHACL validator the LLM agent uses, then re-emits slots
5+
* with any violating values dropped. The compiler only sees ontology-valid
6+
* filters and ranges.
7+
*
8+
* Extracted from search-factory for readability — the validation logic
9+
* is wiring-level (not a reusable service), but substantial enough to
10+
* warrant its own module.
11+
*/
12+
import { validateRangesAgainstShacl, validateSlotsAgainstShacl } from '@ontology-search/llm'
13+
import { ShaclValidator } from '@ontology-search/ontology/shacl-validator'
14+
import type { SearchSlots } from '@ontology-search/search'
15+
import { extractVocabulary, getInitializedStore } from '@ontology-search/search'
16+
17+
/**
18+
* Validate search slots against SHACL constraints.
19+
* Returns cleaned slots with invalid values removed.
20+
*/
21+
export async function validateSlots(slots: SearchSlots): Promise<SearchSlots> {
22+
const shacl = await ShaclValidator.fromWorkspace()
23+
const store = await getInitializedStore()
24+
const vocabulary = await extractVocabulary(store)
25+
26+
const result = await validateSlotsAgainstShacl(
27+
slots.filters ?? {},
28+
slots.location,
29+
slots.license,
30+
shacl,
31+
vocabulary
32+
)
33+
34+
// Drop ranges with property names not in the schema
35+
const rangeResult = validateRangesAgainstShacl(slots.ranges ?? {}, shacl)
36+
37+
return {
38+
...slots,
39+
filters: result.filters,
40+
ranges: rangeResult.ranges,
41+
location: result.location,
42+
license: result.license,
43+
}
44+
}

packages/search/src/compiler.ts

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,33 @@ export function escapeSparqlLiteral(value: string): string {
6969
.replace(/\t/g, '\\t')
7070
}
7171

72+
/**
73+
* Assemble a complete SPARQL SELECT query from its constituent parts.
74+
* Centralizes the query-tail pattern used by both single-domain and
75+
* cross-domain compilation. The LIMIT defaults to the operator-tunable
76+
* `SPARQL_DEFAULT_LIMIT` config field; the policy gate enforces the
77+
* separate `SPARQL_MAX_LIMIT` ceiling (the Zod schema rejects configs
78+
* where the default would exceed the ceiling).
79+
*/
80+
function assembleQuery(
81+
prefixes: string,
82+
selectVars: string[] | Set<string>,
83+
patterns: string[],
84+
optionals: string[],
85+
filters: string[],
86+
limit: number = getConfig().SPARQL_DEFAULT_LIMIT
87+
): string {
88+
const vars = selectVars instanceof Set ? [...selectVars] : selectVars
89+
const selectClause = `SELECT ${vars.join(' ')}`
90+
const whereBody = [...patterns, ...optionals, ...filters].join('\n ')
91+
92+
return `${prefixes}
93+
${selectClause} WHERE {
94+
${whereBody}
95+
}
96+
LIMIT ${limit}`
97+
}
98+
7299
/** Cached compiler vocabulary (ontology doesn't change at runtime) */
73100
let cachedCompilerVocab: CompilerVocab | null = null
74101

@@ -280,14 +307,7 @@ export async function compileSlots(slots: SearchSlots): Promise<string> {
280307
}
281308

282309
// Build the query
283-
const selectClause = `SELECT ${[...selectVars].join(' ')}`
284-
const whereBody = [...patterns, ...optionals, ...filters].join('\n ')
285-
286-
return `${prefixes}
287-
${selectClause} WHERE {
288-
${whereBody}
289-
}
290-
LIMIT ${getConfig().SPARQL_DEFAULT_LIMIT}`
310+
return assembleQuery(prefixes, selectVars, patterns, optionals, filters)
291311
}
292312

293313
/**
@@ -404,14 +424,7 @@ function compileCrossDomainQuery(
404424
}
405425

406426
// Build the query
407-
const selectClause = `SELECT ${selectVars.join(' ')}`
408-
const whereBody = [...patterns, ...optionals, ...filters].join('\n ')
409-
410-
return `${prefixes}
411-
${selectClause} WHERE {
412-
${whereBody}
413-
}
414-
LIMIT ${getConfig().SPARQL_DEFAULT_LIMIT}`
427+
return assembleQuery(prefixes, selectVars, patterns, optionals, filters)
415428
}
416429

417430
/**

0 commit comments

Comments
 (0)