@@ -343,9 +343,15 @@ def _(sources: list[UploadFile]):
343343 if len (sources ) == 0 :
344344 return JSONResponse ('No sources provided' , 400 )
345345
346+ filtered_sources = []
347+
346348 for source in sources :
347349 if not value_of (source .filename ):
348- return JSONResponse (f'Invalid source filename for: { source .headers .get ("title" )} ' , 400 )
350+ logger .warning ('Skipping source with invalid source_id' , extra = {
351+ 'source_id' : source .filename ,
352+ 'title' : source .headers .get ('title' ),
353+ })
354+ continue
349355
350356 with index_lock :
351357 if source .filename in _indexing :
@@ -364,12 +370,14 @@ def _(sources: list[UploadFile]):
364370 and source .headers ['modified' ].isdigit ()
365371 and value_of (source .headers .get ('provider' ))
366372 ):
367- logger .error ( 'Invalid /missing headers received ' , extra = {
373+ logger .warning ( 'Skipping source with invalid /missing headers' , extra = {
368374 'source_id' : source .filename ,
369375 'title' : source .headers .get ('title' ),
370376 'headers' : source .headers ,
371377 })
372- return JSONResponse (f'Invaild/missing headers for: { source .filename } ' , 400 )
378+ continue
379+
380+ filtered_sources .append (source )
373381
374382 # wait for 10 minutes before failing the request
375383 semres = doc_parse_semaphore .acquire (block = True , timeout = 10 * 60 )
@@ -381,27 +389,27 @@ def _(sources: list[UploadFile]):
381389 )
382390
383391 with index_lock :
384- for source in sources :
392+ for source in filtered_sources :
385393 _indexing [source .filename ] = source .size
386394
387395 try :
388396 loaded_sources , not_added_sources = exec_in_proc (
389397 target = embed_sources ,
390- args = (vectordb_loader , app .extra ['CONFIG' ], sources )
398+ args = (vectordb_loader , app .extra ['CONFIG' ], filtered_sources )
391399 )
392400 except (DbException , EmbeddingException ):
393401 raise
394402 except Exception as e :
395403 raise DbException ('Error: failed to load sources' ) from e
396404 finally :
397405 with index_lock :
398- for source in sources :
406+ for source in filtered_sources :
399407 _indexing .pop (source .filename , None )
400408 doc_parse_semaphore .release ()
401409
402- if len (loaded_sources ) != len (sources ):
410+ if len (loaded_sources ) != len (filtered_sources ):
403411 logger .debug ('Some sources were not loaded' , extra = {
404- 'Count of loaded sources' : f'{ len (loaded_sources )} /{ len (sources )} ' ,
412+ 'Count of loaded sources' : f'{ len (loaded_sources )} /{ len (filtered_sources )} ' ,
405413 'source_ids' : loaded_sources ,
406414 })
407415
0 commit comments