Skip to content

Commit 37672a1

Browse files
committed
fixing skip_documents
1 parent 4cc5dbd commit 37672a1

1 file changed

Lines changed: 6 additions & 5 deletions

File tree

  • integrations/oracle/src/haystack_integrations/document_stores/oracle

integrations/oracle/src/haystack_integrations/document_stores/oracle/document_store.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,10 @@ def _insert_documents(self, documents: list[Document]) -> int:
284284
return len(rows)
285285

286286
def _skip_duplicate_documents(self, documents: list[Document]) -> int:
287-
# MERGE rowcount in Oracle reflects rows touched, not just inserted.
288-
# Count before/after to return an accurate number of newly written docs.
287+
# For a MERGE with WHEN NOT MATCHED only, Oracle reports 0 rows affected
288+
# for existing documents and 1 for each new insert. oracledb sums these
289+
# across executemany iterations, so cur.rowcount equals the number of
290+
# newly written documents.
289291
sql = f"""
290292
MERGE INTO {self.table_name} t
291293
USING (SELECT :doc_id AS id FROM dual) s ON (t.id = s.id)
@@ -295,11 +297,10 @@ def _skip_duplicate_documents(self, documents: list[Document]) -> int:
295297
"""
296298
rows = [OracleDocumentStore._to_named_row(d) for d in documents]
297299
with self._get_connection() as conn, conn.cursor() as cur:
298-
count_before = conn.cursor().execute(f"SELECT COUNT(*) FROM {self.table_name}").fetchone()[0]
299300
cur.executemany(sql, rows)
300-
count_after = conn.cursor().execute(f"SELECT COUNT(*) FROM {self.table_name}").fetchone()[0]
301+
written = cur.rowcount
301302
conn.commit()
302-
return count_after - count_before
303+
return written
303304

304305
def _upsert_documents(self, documents: list[Document]) -> int:
305306
sql = f"""

0 commit comments

Comments
 (0)