@@ -117,6 +117,7 @@ def _process_sources(
117117 'len(filtered_sources)' : len (filtered_sources ),
118118 'filtered_sources' : filtered_sources ,
119119 })
120+ loaded_source_ids = [source .filename for source in existing_sources ]
120121
121122 # update userIds for existing sources
122123 # allow the userIds as additional users, not as the only users
@@ -138,23 +139,26 @@ def _process_sources(
138139 if len (filtered_sources ) == 0 :
139140 # no new sources to embed
140141 logger .debug ('Filtered all sources, nothing to embed' )
141- return [], [ ]
142+ return loaded_source_ids , [] # pyright: ignore[reportReturnType ]
142143
143144 logger .debug ('Filtered sources:' , extra = {
144145 'source_ids' : [source .filename for source in filtered_sources ]
145146 })
147+ # invalid/empty sources are filtered out here and not counted in loaded/retryable
146148 indocuments = _sources_to_indocuments (config , filtered_sources )
147149
148150 logger .debug ('Converted all sources to documents' )
149151
150152 if len (indocuments ) == 0 :
151- # document(s) were empty, not an error
153+ # filtered document(s) were invalid/ empty, not an error
152154 logger .debug ('All documents were found empty after being processed' )
153- return [], [ ]
155+ return loaded_source_ids , [] # pyright: ignore[reportReturnType ]
154156
155- added_sources , not_added_sources = vectordb .add_indocuments (indocuments )
157+ added_source_ids , retry_source_ids = vectordb .add_indocuments (indocuments )
158+ loaded_source_ids .extend (added_source_ids )
156159 logger .debug ('Added documents to vectordb' )
157- return added_sources , not_added_sources
160+
161+ return loaded_source_ids , retry_source_ids # pyright: ignore[reportReturnType]
158162
159163
160164def _decode_latin_1 (s : str ) -> str :
0 commit comments