From d6c478a8ed21ec6f151101bcfd16974207358db3 Mon Sep 17 00:00:00 2001 From: Mingtian Yin Date: Wed, 16 Jul 2025 14:58:58 -0700 Subject: [PATCH 1/3] fix(main): fix many mainline test issues 1) test_spanner_vector_store's dependency on HNLoader fails - use a different loader 2) test_spanner_vector_store's add_document failed with PK size exceeded (this is due to 1, the loader now returns slightly longer content) - use a different url 3) test_spanner_loader doesn't properly clean up the test so that now testing database has too many tables - add cleanup for these tests --- tests/integration/test_spanner_loader.py | 40 +++++++++++++++++-- .../integration/test_spanner_vector_store.py | 19 +++++---- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_spanner_loader.py b/tests/integration/test_spanner_loader.py index fd8f0284..d5cc942d 100644 --- a/tests/integration/test_spanner_loader.py +++ b/tests/integration/test_spanner_loader.py @@ -35,9 +35,41 @@ def client() -> Client: return Client(project=project_id) +@pytest.fixture() +def cleanupGSQL(client): + yield + + print("\nPerforming GSQL cleanup after each test...") + + database = client.instance(instance_id).database(google_database) + operation = database.update_ddl( + [ + f"DROP TABLE IF EXISTS {table_name}", + ] + ) + operation.result(OPERATION_TIMEOUT_SECONDS) + + # Code to perform teardown after each test goes here + print("\nGSQL Cleanup complete.") + + +@pytest.fixture() +def cleanupPGSQL(client): + yield + + print("\nPerforming PGSQL cleanup after each test...") + + database = client.instance(instance_id).database(pg_database) + operation = database.update_ddl([f"DROP TABLE IF EXISTS {table_name}"]) + operation.result(OPERATION_TIMEOUT_SECONDS) + + # Code to perform teardown after each test goes here + print("\n PGSQL Cleanup complete.") + + class TestSpannerDocumentLoaderGoogleSQL: @pytest.fixture(autouse=True, scope="class") - def setup_database(self, client): + def setup_database(self, client, cleanupGSQL): database = client.instance(instance_id).database(google_database) operation = database.update_ddl([f"DROP TABLE IF EXISTS {table_name}"]) operation.result(OPERATION_TIMEOUT_SECONDS) @@ -455,7 +487,7 @@ def test_loader_custom_json_metadata(self, client): class TestSpannerDocumentLoaderPostgreSQL: @pytest.fixture(autouse=True, scope="class") - def setup_database(self, client): + def setup_database(self, client, cleanupPGSQL): database = client.instance(instance_id).database(pg_database) operation = database.update_ddl([f"DROP TABLE IF EXISTS {table_name}"]) operation.result(OPERATION_TIMEOUT_SECONDS) @@ -872,7 +904,7 @@ def test_loader_custom_json_metadata(self, client): class TestSpannerDocumentSaver: @pytest.fixture(name="google_client") - def setup_google_client(self, client) -> Client: + def setup_google_client(self, client, cleanupGSQL) -> Client: database = client.instance(instance_id).database(google_database) operation = database.update_ddl([f"DROP TABLE IF EXISTS {table_name}"]) print("table dropped") @@ -880,7 +912,7 @@ def setup_google_client(self, client) -> Client: yield client @pytest.fixture(name="pg_client") - def setup_pg_client(self, client) -> Client: + def setup_pg_client(self, client, cleanupPGSQL) -> Client: database = client.instance(instance_id).database(pg_database) operation = database.update_ddl([f"DROP TABLE IF EXISTS {table_name}"]) operation.result(OPERATION_TIMEOUT_SECONDS) diff --git a/tests/integration/test_spanner_vector_store.py b/tests/integration/test_spanner_vector_store.py index 8cc25a0b..24b23d13 100644 --- a/tests/integration/test_spanner_vector_store.py +++ b/tests/integration/test_spanner_vector_store.py @@ -20,7 +20,7 @@ import pytest from google.cloud.spanner import Client # type: ignore -from langchain_community.document_loaders import HNLoader +from langchain_community.document_loaders import RecursiveUrlLoader from langchain_community.embeddings import FakeEmbeddings from langchain_google_spanner.vector_store import ( # type: ignore @@ -245,11 +245,13 @@ def setup_database(self, client): id_column="row_id", metadata_columns=[ TableColumn(name="metadata", type="JSON", is_null=True), - TableColumn(name="title", type="STRING(MAX)", is_null=False), + TableColumn(name="title", type="STRING(MAX)"), ], ) - loader = HNLoader("https://news.ycombinator.com/item?id=34817881") + loader = RecursiveUrlLoader( + "https://news.ycombinator.com/item?id=1", max_depth=1 + ) embeddings = FakeEmbeddings(size=3) @@ -327,7 +329,7 @@ def test_spanner_vector_delete_data(self, setup_database): docs = loader.load() - deleted = db.delete(documents=[docs[0], docs[1]]) + deleted = db.delete(documents=docs) assert deleted @@ -459,7 +461,9 @@ def setup_database(self, client): ], ) - loader = HNLoader("https://news.ycombinator.com/item?id=34817881") + loader = RecursiveUrlLoader( + "https://news.ycombinator.com/item?id=1", max_depth=1 + ) embeddings = FakeEmbeddings(size=title_vector_size) def cleanup_db(): @@ -677,8 +681,9 @@ def setup_database(self, client): ], ) - loader = HNLoader("https://news.ycombinator.com/item?id=34817881") - + loader = RecursiveUrlLoader( + "https://news.ycombinator.com/item?id=1", max_depth=1 + ) embeddings = FakeEmbeddings(size=3) yield loader, embeddings From 280332d0a1ef6d8de2c6d9d2c22f8b8b86f26aa9 Mon Sep 17 00:00:00 2001 From: Mingtian Yin Date: Wed, 16 Jul 2025 15:27:35 -0700 Subject: [PATCH 2/3] Fix cleanup code --- tests/integration/test_spanner_loader.py | 4 ++-- tests/integration/test_spanner_vector_store.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_spanner_loader.py b/tests/integration/test_spanner_loader.py index d5cc942d..99bb1b0e 100644 --- a/tests/integration/test_spanner_loader.py +++ b/tests/integration/test_spanner_loader.py @@ -35,7 +35,7 @@ def client() -> Client: return Client(project=project_id) -@pytest.fixture() +@pytest.fixture(scope="class") def cleanupGSQL(client): yield @@ -53,7 +53,7 @@ def cleanupGSQL(client): print("\nGSQL Cleanup complete.") -@pytest.fixture() +@pytest.fixture(scope="class") def cleanupPGSQL(client): yield diff --git a/tests/integration/test_spanner_vector_store.py b/tests/integration/test_spanner_vector_store.py index 24b23d13..41bd4901 100644 --- a/tests/integration/test_spanner_vector_store.py +++ b/tests/integration/test_spanner_vector_store.py @@ -556,7 +556,7 @@ def test_delete(self, setup_database): ) docs = loader.load() - deleted = db.delete(documents=[docs[0], docs[1]]) + deleted = db.delete(documents=docs) assert deleted @@ -760,7 +760,7 @@ def test_spanner_vector_delete_data(self, setup_database): docs = loader.load() - deleted = db.delete(documents=[docs[0], docs[1]]) + deleted = db.delete(documents=docs) assert deleted From b0676fd69773709fcafdd16192b172c47d84de6e Mon Sep 17 00:00:00 2001 From: Mingtian Yin Date: Fri, 18 Jul 2025 09:25:55 -0700 Subject: [PATCH 3/3] Switch to new test databases The old databases are extremely slow due to too many schema objects Also verified pytest test/integrations/ properly cleaned up upon test completion --- integration.cloudbuild.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration.cloudbuild.yaml b/integration.cloudbuild.yaml index 538325c4..e27c8a02 100644 --- a/integration.cloudbuild.yaml +++ b/integration.cloudbuild.yaml @@ -36,8 +36,8 @@ steps: timeout: "7200s" substitutions: _INSTANCE_ID: test-instance - _GOOGLE_DATABASE: test-google-db - _PG_DATABASE: test-pg-db + _GOOGLE_DATABASE: test-gsql-db + _PG_DATABASE: test-pgsql-db _VERSION: "3.9" options: