@@ -45,19 +45,6 @@ function getIndexVersion (): string {
4545 return getMetadata ( serverCore . metadata . ElasticIndexVersion ) ?? 'v2'
4646}
4747
48- /** Fields that are defined in the schema and are facet-capable (keyword-like). */
49- const FACET_FIELDS = new Set ( [
50- 'id' ,
51- 'workspaceId' ,
52- '_class' ,
53- 'space' ,
54- 'attachedTo' ,
55- 'attachedToClass' ,
56- 'modifiedBy' ,
57- 'core:class:Doc%createdBy' ,
58- 'core:class:Doc%modifiedBy'
59- ] )
60-
6148function buildCollectionSchema ( collectionName : string ) : {
6249 name : string
6350 fields : CollectionFieldSchema [ ]
@@ -101,11 +88,34 @@ function isConnectionError (err: any): boolean {
10188 return false
10289}
10390
91+ /** Fields that are known to be string arrays in the schema. */
92+ const ARRAY_FIELDS = new Set ( [ '_class' ] )
93+
10494/** Escape a value for use inside a Typesense filter_by backtick-quoted string. */
10595function escapeFilterValue ( val : string ) : string {
10696 return val . replace ( / ` / g, '\\`' )
10797}
10898
99+ /**
100+ * Sanitize a document for Typesense upsert.
101+ * - Removes binary `data` field
102+ * - Coerces fields ending in `_fields` to string arrays (dynamic ES fields)
103+ * - Ensures `_class` is always an array
104+ */
105+ function sanitizeDoc ( doc : Record < string , any > ) : Record < string , any > {
106+ const result : Record < string , any > = { }
107+ for ( const [ key , value ] of Object . entries ( doc ) ) {
108+ if ( key === 'data' ) continue
109+ // Coerce _class and *_fields to arrays
110+ if ( ARRAY_FIELDS . has ( key ) || key . endsWith ( '_fields' ) ) {
111+ result [ key ] = Array . isArray ( value ) ? value : value != null ? [ String ( value ) ] : [ ]
112+ } else {
113+ result [ key ] = value
114+ }
115+ }
116+ return result
117+ }
118+
109119/**
110120 * Build a Typesense filter_by string from a workspace ID and a Huly DocumentQuery.
111121 * Skips `$`-prefixed keys (like `$search`).
@@ -216,32 +226,25 @@ class TypesenseAdapter implements FullTextAdapter {
216226 // Require searchTitle to exist (non-empty)
217227 filterParts . push ( 'searchTitle:!=""' )
218228
219- // Scoring: boost documents where a facet field matches a specific value.
220- // Typesense doesn't support per-term boosting like ES function_score,
221- // so we use optional filter clauses that prefer matching documents.
222- if ( options . scoring !== undefined && options . scoring . length > 0 ) {
223- const optionalParts : string [ ] = [ ]
224- for ( const scoring of options . scoring ) {
225- if ( FACET_FIELDS . has ( scoring . attr ) ) {
226- optionalParts . push ( `${ scoring . attr } :=\`${ escapeFilterValue ( String ( scoring . value ) ) } \`` )
227- }
228- }
229- if ( optionalParts . length > 0 ) {
230- // Use optional filter_by syntax: main filters && (optional1 || optional2)
231- // Documents matching optional filters rank higher via _text_match + filter proximity
232- filterParts . push ( `(${ optionalParts . join ( ' || ' ) } )` )
233- }
234- }
229+ // Scoring: ES uses function_score with should clauses (soft boosts).
230+ // Typesense has no equivalent — filter_by is always mandatory (AND).
231+ // We skip scoring filters entirely and rely on text_match ranking,
232+ // which already prioritizes title matches via query_by_weights.
233+ // Adding scoring fields to filter_by would incorrectly EXCLUDE
234+ // documents that don't match, instead of just ranking them lower.
235235
236236 const filterBy = filterParts . join ( ' && ' )
237237
238238 const searchParams : any = {
239239 q : query . query ,
240240 query_by : 'searchTitle,searchShortTitle,fulltextSummary' ,
241- query_by_weights : '50,50 ,1' ,
241+ query_by_weights : '100,100 ,1' ,
242242 filter_by : filterBy ,
243243 limit : options . limit ?? DEFAULT_LIMIT ,
244244 prefix : 'true,true,false' ,
245+ num_typos : '2,2,1' ,
246+ typo_tokens_threshold : 1 ,
247+ drop_tokens_threshold : 1 ,
245248 sort_by : '_text_match:desc'
246249 }
247250
@@ -257,7 +260,7 @@ class TypesenseAdapter implements FullTextAdapter {
257260 return {
258261 ...doc ,
259262 id : this . getDocId ( workspaceId , doc . id ) ,
260- _score : hit . text_match ?? 0
263+ _score : hit . text_match_info ?. best_field_score ?? hit . text_match ?? 0
261264 }
262265 } )
263266 }
@@ -291,34 +294,24 @@ class TypesenseAdapter implements FullTextAdapter {
291294 }
292295
293296 // In Elastic, additional query fields are soft boosts (should clauses).
294- // Typesense has no direct equivalent, so we add them as optional filter
295- // clauses — documents matching them rank higher but aren't excluded.
296- const optionalParts : string [ ] = [ ]
297- for ( const [ q , v ] of Object . entries ( query ) ) {
298- if ( q . startsWith ( '$' ) ) continue
299- if ( typeof v === 'object' && v !== null ) {
300- if ( v . $in !== undefined && Array . isArray ( v . $in ) ) {
301- optionalParts . push ( `${ q } :=[${ v . $in . map ( ( val : string ) => `\`${ escapeFilterValue ( val ) } \`` ) . join ( ',' ) } ]` )
302- }
303- } else {
304- optionalParts . push ( `${ q } :=\`${ escapeFilterValue ( String ( v ) ) } \`` )
305- }
306- }
307- if ( optionalParts . length > 0 ) {
308- filterParts . push ( `(${ optionalParts . join ( ' || ' ) } )` )
309- }
297+ // Typesense filter_by is always mandatory (AND), so adding these as
298+ // filters would incorrectly exclude non-matching documents instead of
299+ // just ranking them lower. We skip them and rely on text_match ranking.
310300
311301 const filterBy = filterParts . join ( ' && ' )
312302
313303 const searchParams : any = {
314304 q : query . $search ,
315305 query_by : 'searchTitle,searchShortTitle,fulltextSummary' ,
316- query_by_weights : '50,50 ,1' ,
306+ query_by_weights : '100,100 ,1' ,
317307 filter_by : filterBy ,
318308 limit : size ?? DEFAULT_LIMIT ,
319309 offset : from ?? 0 ,
320310 sort_by : '_text_match:desc' ,
321- prefix : 'true,true,false'
311+ prefix : 'true,true,false' ,
312+ num_typos : '2,2,1' ,
313+ typo_tokens_threshold : 1 ,
314+ drop_tokens_threshold : 1
322315 }
323316
324317 const result = await ctx . with (
@@ -334,7 +327,7 @@ class TypesenseAdapter implements FullTextAdapter {
334327 return {
335328 ...doc ,
336329 id : this . getDocId ( workspaceId , doc . id ) ,
337- _score : hit . text_match ?? 0
330+ _score : hit . text_match_info ?. best_field_score ?? hit . text_match ?? 0
338331 }
339332 } )
340333 } catch ( err : any ) {
@@ -356,13 +349,7 @@ class TypesenseAdapter implements FullTextAdapter {
356349 }
357350
358351 const fulltextId = this . getFulltextDocId ( workspaceId , doc . id )
359- const tsDoc : Record < string , any > = {
360- ...doc ,
361- id : fulltextId ,
362- workspaceId
363- }
364- // Remove binary data — Typesense cannot process it
365- delete tsDoc . data
352+ const tsDoc = sanitizeDoc ( { ...doc , id : fulltextId , workspaceId } )
366353
367354 try {
368355 await this . client . collections ( this . collectionName ) . documents ( ) . upsert ( tsDoc )
@@ -406,12 +393,7 @@ class TypesenseAdapter implements FullTextAdapter {
406393 const batch = parts . splice ( 0 , BATCH_SIZE )
407394 const jsonlLines = batch
408395 . map ( ( doc ) => {
409- const tsDoc : Record < string , any > = {
410- ...doc ,
411- id : this . getFulltextDocId ( workspaceId , doc . id ) ,
412- workspaceId
413- }
414- delete tsDoc . data
396+ const tsDoc = sanitizeDoc ( { ...doc , id : this . getFulltextDocId ( workspaceId , doc . id ) , workspaceId } )
415397 return JSON . stringify ( tsDoc )
416398 } )
417399 . join ( '\n' )
@@ -479,7 +461,16 @@ class TypesenseAdapter implements FullTextAdapter {
479461 while ( remaining . length > 0 ) {
480462 const batch = remaining . splice ( 0 , BATCH_SIZE )
481463 const jsonlLines = batch . map ( ( doc : any ) => JSON . stringify ( doc ) ) . join ( '\n' )
482- await this . client . collections ( this . collectionName ) . documents ( ) . import ( jsonlLines , { action : 'upsert' } )
464+ const results = await this . client
465+ . collections ( this . collectionName )
466+ . documents ( )
467+ . import ( jsonlLines , { action : 'upsert' } )
468+ const errors = (
469+ typeof results === 'string' ? results . split ( '\n' ) . map ( ( l : string ) => JSON . parse ( l ) ) : results
470+ ) . filter ( ( r : any ) => r . success === false )
471+ if ( errors . length > 0 ) {
472+ console . error ( `updateByQuery upsert errors: ${ errors . map ( ( e : any ) => e . error ) . join ( '; ' ) } ` )
473+ }
483474 }
484475 } catch ( err : any ) {
485476 if ( isConnectionError ( err ) ) {
0 commit comments