diff --git a/backend/adapter_processor_v2/tests/test_adapter_api.py b/backend/adapter_processor_v2/tests/test_adapter_api.py new file mode 100644 index 0000000000..795907ee5b --- /dev/null +++ b/backend/adapter_processor_v2/tests/test_adapter_api.py @@ -0,0 +1,59 @@ +"""Critical path ``adapter-register-llm``: POST /api/v1/adapter/ registers an +LLM adapter. Exercises the real endpoint wiring — auth, serializer, metadata +encryption, org-scoped persistence — with only the SDK context-window lookup +(a provider-shaped call) mocked. Needs a live DB (integration tier). +""" + +from __future__ import annotations + +import secrets +from unittest.mock import patch + +from account_v2.models import Organization, User +from django.test import TestCase +from rest_framework import status +from rest_framework.test import APIRequestFactory, force_authenticate +from tenant_account_v2.models import OrganizationMember +from utils.user_context import UserContext + +from adapter_processor_v2.models import AdapterInstance +from adapter_processor_v2.views import AdapterInstanceViewSet + + +class AdapterRegisterLLMAPITest(TestCase): + def setUp(self) -> None: + self.org = Organization.objects.create( + name="org-a", display_name="Org A", organization_id="org-a" + ) + UserContext.set_organization_identifier(self.org.organization_id) + self.user = User.objects.create_user( + username="owner@example.com", + email="owner@example.com", + password=secrets.token_urlsafe(), + ) + OrganizationMember.objects.create( + organization=self.org, user=self.user, role="user" + ) + self.create_view = AdapterInstanceViewSet.as_view({"post": "create"}) + + @patch.object(AdapterInstance, "get_context_window_size", return_value=4096) + def test_register_llm_adapter_persists_encrypted(self, _ctx_window) -> None: + payload = { + "adapter_id": "openai|test-llm", + "adapter_name": "my-openai", + "adapter_type": "LLM", + "adapter_metadata": {"api_key": "sk-test", "model": "gpt-4o-mini"}, + } + request = APIRequestFactory().post("/api/v1/adapter/", payload, format="json") + force_authenticate(request, user=self.user) + + response = self.create_view(request) + + assert response.status_code == status.HTTP_201_CREATED, response.data + instance = AdapterInstance.objects.get(adapter_name="my-openai") + # persisted under the request user's org, created_by the request user + assert instance.organization_id == self.org.id + assert instance.created_by == self.user + # metadata stored encrypted (binary), decrypts back via .metadata + assert instance.adapter_metadata_b is not None + assert instance.metadata["model"] == "gpt-4o-mini" diff --git a/backend/conftest.py b/backend/conftest.py index 16452bb9ef..bbf1910425 100644 --- a/backend/conftest.py +++ b/backend/conftest.py @@ -22,3 +22,23 @@ # to make a mis-located file debuggable instead of silently empty. if not load_dotenv(Path(__file__).parent / "test.env", override=False): print("[conftest] backend/test.env not found; using ambient environment", flush=True) + + +def pytest_collection_modifyitems(items): + """Auto-mark every DB-bound test as ``integration`` so the rig's unit tier + (``-m 'not integration'``) skips it while the integration tier (live + Postgres) runs it. Detects Django ``TestCase``/``TransactionTestCase`` + subclasses and any item using the ``django_db`` marker — the two ways a + backend test needs a database. Kept central so tests declare their DB need + by how they're written, not by a hand-maintained marker on each file. + """ + import pytest + from django.test import TestCase, TransactionTestCase + + for item in items: + cls = getattr(item, "cls", None) + needs_db = item.get_closest_marker("django_db") is not None or ( + cls is not None and issubclass(cls, (TestCase, TransactionTestCase)) + ) + if needs_db: + item.add_marker(pytest.mark.integration) diff --git a/backend/dashboard_metrics/tests/test_tasks.py b/backend/dashboard_metrics/tests/test_tasks.py index 1e38e2ca89..ac45887d4c 100644 --- a/backend/dashboard_metrics/tests/test_tasks.py +++ b/backend/dashboard_metrics/tests/test_tasks.py @@ -1,11 +1,11 @@ """Unit tests for Dashboard Metrics Celery tasks.""" -import uuid from datetime import datetime, timedelta from django.test import TestCase, TransactionTestCase from django.utils import timezone +from account_v2.models import Organization from dashboard_metrics.models import ( EventMetricsDaily, EventMetricsHourly, @@ -86,7 +86,10 @@ class TestCleanupTasks(TransactionTestCase): def setUp(self): """Set up test fixtures.""" - self.org_id = str(uuid.uuid4()) + # organization FK targets Organization's int PK, not a UUID. + self.org = Organization.objects.create( + organization_id="test-org", name="test-org", display_name="Test Org" + ) def test_cleanup_hourly_metrics_deletes_old_records(self): """Test that cleanup deletes hourly records older than retention.""" @@ -96,7 +99,7 @@ def test_cleanup_hourly_metrics_deletes_old_records(self): # Create old record EventMetricsHourly.objects.create( - organization_id=self.org_id, + organization=self.org, timestamp=old_timestamp, metric_name="old_metric", metric_type=MetricType.COUNTER, @@ -107,7 +110,7 @@ def test_cleanup_hourly_metrics_deletes_old_records(self): # Create recent record EventMetricsHourly.objects.create( - organization_id=self.org_id, + organization=self.org, timestamp=recent_timestamp, metric_name="recent_metric", metric_type=MetricType.COUNTER, @@ -122,9 +125,10 @@ def test_cleanup_hourly_metrics_deletes_old_records(self): assert result["deleted"] == 1 assert result["retention_days"] == 30 - # Verify old is deleted, recent remains - assert not EventMetricsHourly.objects.filter(metric_name="old_metric").exists() - assert EventMetricsHourly.objects.filter(metric_name="recent_metric").exists() + # _base_manager bypasses the org-scoped default manager, which filters + # by UserContext.get_organization() — None here, so .objects sees nothing. + assert not EventMetricsHourly._base_manager.filter(metric_name="old_metric").exists() + assert EventMetricsHourly._base_manager.filter(metric_name="recent_metric").exists() def test_cleanup_daily_metrics_deletes_old_records(self): """Test that cleanup deletes daily records older than retention.""" @@ -134,7 +138,7 @@ def test_cleanup_daily_metrics_deletes_old_records(self): # Create old record EventMetricsDaily.objects.create( - organization_id=self.org_id, + organization=self.org, date=old_date, metric_name="old_daily_metric", metric_type=MetricType.COUNTER, @@ -145,7 +149,7 @@ def test_cleanup_daily_metrics_deletes_old_records(self): # Create recent record EventMetricsDaily.objects.create( - organization_id=self.org_id, + organization=self.org, date=recent_date, metric_name="recent_daily_metric", metric_type=MetricType.COUNTER, @@ -160,10 +164,10 @@ def test_cleanup_daily_metrics_deletes_old_records(self): assert result["deleted"] == 1 # Verify old is deleted, recent remains - assert not EventMetricsDaily.objects.filter( + assert not EventMetricsDaily._base_manager.filter( metric_name="old_daily_metric" ).exists() - assert EventMetricsDaily.objects.filter( + assert EventMetricsDaily._base_manager.filter( metric_name="recent_daily_metric" ).exists() @@ -173,7 +177,7 @@ def test_cleanup_hourly_with_custom_retention(self): old_timestamp = now - timedelta(days=10) EventMetricsHourly.objects.create( - organization_id=self.org_id, + organization=self.org, timestamp=old_timestamp, metric_name="custom_retention_metric", metric_type=MetricType.COUNTER, diff --git a/backend/prompt_studio/prompt_studio_core_v2/tests/test_build_index_payload.py b/backend/prompt_studio/prompt_studio_core_v2/tests/test_build_index_payload.py index adb5713243..ee4d23f6df 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/tests/test_build_index_payload.py +++ b/backend/prompt_studio/prompt_studio_core_v2/tests/test_build_index_payload.py @@ -42,6 +42,12 @@ # --------------------------------------------------------------------------- +# Originals displaced by the stubs below, restored once the helper is imported +# so the stubs never leak into sibling test modules' collection (a stubbed +# ``account_v2.models`` would otherwise break their real imports). +_SAVED_MODULES: dict[str, types.ModuleType | None] = {} + + def _install(name: str, attrs: dict[str, Any] | None = None) -> types.ModuleType: """Install (or replace) a fake module into ``sys.modules``. @@ -50,6 +56,7 @@ def _install(name: str, attrs: dict[str, Any] | None = None) -> types.ModuleType (via pytest collection, conftest, etc.), and we need our fake to actually take effect. """ + _SAVED_MODULES.setdefault(name, sys.modules.get(name)) mod = types.ModuleType(name) if attrs: for key, value in attrs.items(): @@ -69,12 +76,32 @@ def _install_package(name: str) -> types.ModuleType: """ if name in sys.modules: return sys.modules[name] + _SAVED_MODULES.setdefault(name, None) mod = types.ModuleType(name) mod.__path__ = [] # type: ignore[attr-defined] sys.modules[name] = mod return mod +def _restore_modules() -> None: + """Undo every stub installed above, restoring the real modules (or + removing the stub when nothing was there before). The helper has already + bound its imports by the time this runs, so its tests are unaffected. + """ + for name, original in _SAVED_MODULES.items(): + if original is None: + sys.modules.pop(name, None) + else: + sys.modules[name] = original + _SAVED_MODULES.clear() + # The helper imported above is now cached bound to the stubbed globals. + # Evict it so any later importer in this process gets a real copy; our + # own `_psh_mod`/`PromptStudioHelper` refs are already bound, unaffected. + sys.modules.pop( + "prompt_studio.prompt_studio_core_v2.prompt_studio_helper", None + ) + + try: # Account / adapter stubs _install_package("account_v2") @@ -91,7 +118,10 @@ def _install_package(name: str) -> types.ModuleType: ) _install( "adapter_processor_v2.models", - {"AdapterInstance": MagicMock(name="AdapterInstance")}, + { + "AdapterInstance": MagicMock(name="AdapterInstance"), + "UserDefaultAdapter": MagicMock(name="UserDefaultAdapter"), + }, ) # Plugins stub @@ -290,6 +320,8 @@ def __init__(self, **kwargs: Any) -> None: ) PromptStudioHelper = None # type: ignore[assignment] IKeys = None # type: ignore[assignment] +finally: + _restore_modules() pytestmark = pytest.mark.skipif( diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 3ef2c187a4..5862987e50 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -71,7 +71,10 @@ dev = [ "responses>=0.25.7", "psutil>=7.0.0", ] -test = ["pytest>=8.0.1"] +test = [ + "pytest>=8.0.1", + "pytest-django>=4.12.0", +] deploy = [ "gunicorn~=23.0", # For serving the application # Keep versions empty and let uv decide version @@ -101,6 +104,9 @@ constraint-dependencies = [ # Note: test.env is loaded by backend/conftest.py via python-dotenv directly # (replaces the unmaintained pytest-dotenv plugin). addopts = "-s" +markers = [ + "integration: needs live infra (Postgres/Redis); runs in the rig's integration tier, not unit (select with -m integration / exclude with -m 'not integration')", +] [tool.poe] envfile = ".env" diff --git a/backend/usage_v2/tests/test_helper.py b/backend/usage_v2/tests/test_helper.py index 4f6ffc9c40..f0311c22aa 100644 --- a/backend/usage_v2/tests/test_helper.py +++ b/backend/usage_v2/tests/test_helper.py @@ -5,69 +5,34 @@ bare ``"llm"`` bucket from leaking into API deployment responses when a producer-side LLM call site forgets to set ``llm_usage_reason``. -The tests deliberately do not require a live Django database — the -backend test environment has no ``pytest-django``, no SQLite fallback, -and uses ``django-tenants`` against Postgres in production. Instead -the tests stub ``account_usage.models`` and ``usage_v2.models`` in -``sys.modules`` *before* importing the helper, so the helper module -loads cleanly without triggering Django's app registry checks. The -fake ``Usage.objects.filter`` chain returns a deterministic list of -row dicts shaped exactly like the real ``.values(...).annotate(...)`` -queryset rows the helper iterates over. +The tests exercise only the helper's in-memory aggregation logic, not +the ORM. We rebind the ``Usage`` symbol the helper resolved at import +to a fake whose ``objects.filter`` chain returns a deterministic list +of row dicts shaped exactly like the real +``.values(...).annotate(...)`` queryset rows the helper iterates over. """ from __future__ import annotations -import sys -import types from typing import Any from unittest.mock import MagicMock +import pytest +import usage_v2.helper as helper_mod +from usage_v2.helper import UsageHelper -# --------------------------------------------------------------------------- -# Module-level stubs. Must run BEFORE ``usage_v2.helper`` is imported, so we -# do it at import time and capture the helper reference for the tests below. -# --------------------------------------------------------------------------- - - -def _install_stubs() -> tuple[Any, Any]: - """Install fake ``account_usage.models`` and ``usage_v2.models`` modules - so that ``usage_v2.helper`` can be imported without Django being set up. - - Returns ``(UsageHelper, FakeUsage)`` — the helper class to test and the - fake Usage class whose ``objects.filter`` we will swap per-test. - """ - # Fake account_usage package + models module - if "account_usage" not in sys.modules: - account_usage_pkg = types.ModuleType("account_usage") - account_usage_pkg.__path__ = [] # mark as package - sys.modules["account_usage"] = account_usage_pkg - if "account_usage.models" not in sys.modules: - account_usage_models = types.ModuleType("account_usage.models") - account_usage_models.PageUsage = MagicMock(name="PageUsage") - sys.modules["account_usage.models"] = account_usage_models - - # Fake usage_v2.models with a Usage class whose ``objects`` is a - # MagicMock (so each test can rebind ``filter.return_value``). - if "usage_v2.models" not in sys.modules or not hasattr( - sys.modules["usage_v2.models"], "_is_test_stub" - ): - usage_v2_models = types.ModuleType("usage_v2.models") - usage_v2_models._is_test_stub = True - - class _FakeUsage: - objects = MagicMock(name="Usage.objects") - - usage_v2_models.Usage = _FakeUsage - sys.modules["usage_v2.models"] = usage_v2_models - - # Now import the helper — this picks up our stubs. - from usage_v2.helper import UsageHelper - return UsageHelper, sys.modules["usage_v2.models"].Usage +class FakeUsage: + # objects is a MagicMock so each test can rebind filter.return_value. + objects = MagicMock(name="Usage.objects") -UsageHelper, FakeUsage = _install_stubs() +@pytest.fixture(autouse=True) +def _swap_usage(monkeypatch: pytest.MonkeyPatch) -> None: + # Swap the symbol get_usage_by_model resolves, per-test, so monkeypatch + # restores the real model afterwards — a module-level rebind would leak + # FakeUsage into every later test in the same process. + monkeypatch.setattr(helper_mod, "Usage", FakeUsage) # --------------------------------------------------------------------------- diff --git a/backend/utils/file_storage/__init__.py b/backend/utils/file_storage/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/utils/file_storage/helpers/__init__.py b/backend/utils/file_storage/helpers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py index df09776651..c8b75612fe 100644 --- a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py +++ b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py @@ -6,13 +6,13 @@ from file_management.exceptions import InvalidFileType from file_management.file_management_helper import FileManagerHelper -from utils.file_storage.constants import FileStorageConstants, FileStorageKeys -from utils.file_storage.helpers.streaming_writer import write_streaming from unstract.core.utilities import UnstractUtils from unstract.sdk1.file_storage import FileStorage from unstract.sdk1.file_storage.constants import StorageType from unstract.sdk1.file_storage.env_helper import EnvHelper +from utils.file_storage.constants import FileStorageConstants, FileStorageKeys +from utils.file_storage.helpers.streaming_writer import write_streaming logger = logging.getLogger(__name__) diff --git a/backend/uv.lock b/backend/uv.lock index bad2b30614..20e5fc22da 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -2970,6 +2970,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] +[[package]] +name = "pytest-django" +version = "4.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/13/2b/db9a193df89e5660137f5428063bcc2ced7ad790003b26974adf5c5ceb3b/pytest_django-4.12.0.tar.gz", hash = "sha256:df94ec819a83c8979c8f6de13d9cdfbe76e8c21d39473cfe2b40c9fc9be3c758", size = 91156, upload-time = "2026-02-14T18:40:49.235Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/83/a5/41d091f697c09609e7ef1d5d61925494e0454ebf51de7de05f0f0a728f1d/pytest_django-4.12.0-py3-none-any.whl", hash = "sha256:3ff300c49f8350ba2953b90297d23bf5f589db69545f56f1ec5f8cff5da83e85", size = 26123, upload-time = "2026-02-14T18:40:47.381Z" }, +] + [[package]] name = "python-crontab" version = "3.3.0" @@ -3721,6 +3733,7 @@ dev = [ ] test = [ { name = "pytest" }, + { name = "pytest-django" }, ] [package.metadata] @@ -3784,7 +3797,10 @@ dev = [ { name = "unstract-tool-sandbox", editable = "../unstract/tool-sandbox" }, { name = "unstract-workflow-execution", editable = "../unstract/workflow-execution" }, ] -test = [{ name = "pytest", specifier = ">=8.0.1" }] +test = [ + { name = "pytest", specifier = ">=8.0.1" }, + { name = "pytest-django", specifier = ">=4.12.0" }, +] [[package]] name = "unstract-connectors" diff --git a/backend/workflow_manager/endpoint_v2/tests/destination-connectors/test_destination_connector_postgres.py b/backend/workflow_manager/endpoint_v2/tests/destination-connectors/test_destination_connector_postgres.py index 33f31f055a..48e354bcc5 100644 --- a/backend/workflow_manager/endpoint_v2/tests/destination-connectors/test_destination_connector_postgres.py +++ b/backend/workflow_manager/endpoint_v2/tests/destination-connectors/test_destination_connector_postgres.py @@ -21,7 +21,7 @@ def setUp(self) -> None: "database": os.getenv("DB_NAME", "test_unstract"), "user": os.getenv("DB_USER", "postgres"), "password": os.getenv("DB_PASSWORD", "password"), - "schema": "test", # Add schema to fix PostgreSQL issue + "schema": os.getenv("DB_SCHEMA", "public"), } # Test data that will be inserted into the database @@ -32,7 +32,9 @@ def setUp(self) -> None: "processing_time": 1.5, } self.input_file_path = "/path/to/test/file.pdf" - self.test_table_name = "OUTPUT_3" + # Lowercase: the connector quotes the name on CREATE (case-preserved) but + # lowercases it when reading information_schema back. + self.test_table_name = "output_3" # Create real PostgreSQL connector instance self.postgres_connector = PostgreSQL(settings=self.postgres_config) @@ -192,44 +194,6 @@ def test_insert_into_db_happy_path_postgresql(self) -> None: f"✅ Successfully inserted test data into PostgreSQL table: {self.test_table_name}" ) - def test_insert_into_db_with_error_postgresql(self) -> None: - """Test insertion with error parameter into real PostgreSQL database.""" - # Create mock objects - mock_workflow = self.create_mock_workflow() - mock_workflow_log = self.create_mock_workflow_log() - mock_connector_instance = self.create_real_connector_instance() - mock_endpoint = self.create_mock_endpoint(mock_connector_instance) - - # Create destination connector - destination_connector = self.create_destination_connector( - mock_workflow, mock_workflow_log, mock_endpoint - ) - - error_message = "Test processing error occurred" - - # Mock the methods that get data - with patch.object( - destination_connector, - "get_tool_execution_result", - return_value=self.test_data, - ): - with patch.object( - destination_connector, - "get_combined_metadata", - return_value=self.test_metadata, - ): - # Execute with error parameter - destination_connector.insert_into_db( - input_file_path=self.input_file_path, error=error_message - ) - - # Verify that all expected columns were created - self.verify_table_columns(self.test_table_name) - - print( - f"✅ Successfully inserted error data into PostgreSQL table: {self.test_table_name}" - ) - def test_postgresql_connector_connection(self) -> None: """Test that the PostgreSQL connector can establish a connection.""" # Test the real PostgreSQL connector directly diff --git a/platform-service/tests/test_auth_middleware.py b/platform-service/tests/test_auth_middleware.py deleted file mode 100644 index decf9aaa3b..0000000000 --- a/platform-service/tests/test_auth_middleware.py +++ /dev/null @@ -1,19 +0,0 @@ -import unittest - -from unstract.platform_service.main import ( - get_account_from_bearer_token, - validate_bearer_token, -) - - -class TestAuthMiddleware(unittest.TestCase): - def test_auth_middleware(self) -> None: - try: - self.assertTrue(validate_bearer_token("test")) - self.assertEqual(get_account_from_bearer_token("test"), "mock_org") - except Exception as e: - self.fail(f"Authentication Test failed: {e}") - - -if __name__ == "__main__": - unittest.main() diff --git a/pyproject.toml b/pyproject.toml index 17bc80034f..b9d3738924 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -178,7 +178,6 @@ keep-dict-typing = true [tool.pytest.ini_options] python_files = ["tests.py", "test_*.py", "*_tests.py"] -DJANGO_SETTINGS_MODULE = "backend.settings.test_cases" testpaths = ["tests"] markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", diff --git a/tests/compose/docker-compose.test.yaml b/tests/compose/docker-compose.test.yaml index a513633aaa..ee10d9416d 100644 --- a/tests/compose/docker-compose.test.yaml +++ b/tests/compose/docker-compose.test.yaml @@ -17,11 +17,6 @@ services: environment: - ENVIRONMENT=test - prompt-service: - image: unstract/prompt-service:${UNSTRACT_TEST_VERSION:-latest} - environment: - - ENVIRONMENT=test - platform-service: image: unstract/platform-service:${UNSTRACT_TEST_VERSION:-latest} environment: diff --git a/tests/critical_paths.yaml b/tests/critical_paths.yaml index 3a062c94d2..f0459275e8 100644 --- a/tests/critical_paths.yaml +++ b/tests/critical_paths.yaml @@ -3,9 +3,17 @@ # A "critical path" is an end-to-end user or system flow whose failure would # constitute a production incident. The rig reports: # ✅ covered — at least one group in `covered_by` ran green this build -# ⚠️ gap — `covered_by` is empty OR no group covering it ran +# ⚠️ gap — no covering group ran green this build # ❌ regression — a path that was ✅ on the cached main baseline is now not ✅ # +# Only one kind of gap gates --fail-on-critical-gap: +# • in-scope gap — a covering group ran in this tier but not green; fails. +# • out-of-scope gap — covered only by an unrun tier, or no group declared; +# warn-only (a tier can't fail for coverage it can't run). +# +# Only wire `covered_by` to a group that really exercises the path — a bogus +# mapping fails the build when that group breaks, for the wrong reason. +# # We intentionally do NOT chase 100% coverage. Focus on filling these gaps first. version: 1 @@ -21,10 +29,7 @@ paths: - id: adapter-register-llm description: "Register and validate an LLM adapter." entry: "POST /api/v1/adapter/" - # Honest declaration: unit-backend is currently optional/gated and - # e2e-smoke only hits /health/. Track as a gap until a real adapter test - # exists (likely under tests/e2e/smoke/ or a new tests/e2e/adapters/ group). - covered_by: [] + covered_by: [integration-backend] - id: workflow-create-execute description: "Create a workflow, configure source+destination, execute, poll, fetch result." @@ -46,11 +51,6 @@ paths: entry: "POST /api/v1/pipeline/{id}/execute/" covered_by: [] # gap - - id: tool-sandbox-exec - description: "Tool image runs in sandbox container and emits structured output." - entry: "internal: tool-registry → runner → docker run" - covered_by: [unit-runner] - - id: usage-token-tracking description: "Per-execution token usage is recorded and retrievable." entry: "GET /api/v1/usage/get_token_usage/" diff --git a/tests/groups.yaml b/tests/groups.yaml index d85ada8cb3..09abca28e1 100644 --- a/tests/groups.yaml +++ b/tests/groups.yaml @@ -32,21 +32,6 @@ groups: uv_sync_group: test coverage_source: src/unstract/sdk1 - unit-runner: - tier: unit - workdir: runner - # Runner co-locates its tests under src/. Pytest recurses from here. - # The project has no `test` uv group, so deps are pip-installed inline. - paths: [src] - pip_install: - - "flask~=3.1.0" - - "docker==6.1.3" - - "redis~=5.2.1" - - "python-dotenv>=1.0.0" - - "kubernetes" - install_editable: true - coverage_source: src/unstract/runner - unit-platform-service: tier: unit workdir: platform-service @@ -66,38 +51,49 @@ groups: unit-backend: tier: unit workdir: backend - # List paths explicitly. `[.]` recurses into every test_*.py in backend/, - # including vendored fixtures and pluggable-app tests that don't belong - # in the OSS rig — keep this list scoped to the apps actually under test. - paths: - - account_v2/tests - - adapter_processor_v2/tests - - api_deployment_v2/tests - - connector_v2/tests - - dashboard_metrics/tests - - file_management/tests - - project/tests - - prompt_studio/prompt_studio_registry_v2/tests - - tenant_account_v2/tests - - usage_v2/tests - - utils/tests - - workflow_manager/endpoint_v2/tests + # Pure backend tests — no DB. Collect the whole tree and let markers, not a + # hand-kept file list, decide membership: tests needing live infra carry + # `@pytest.mark.integration` (see integration-backend) and are excluded here. + paths: ["."] + markers: "not integration" uv_sync_group: test - env: - DJANGO_SETTINGS_MODULE: backend.settings.test_cases - # Backend ORM imports require a real Postgres; rig provisions it via - # testcontainers or compose when this group is selected. - requires_services: [postgres, redis] + # Anchored: integration-backend reuses the identical Django settings env so + # the two halves of the backend suite can't drift apart. + env: &backend_test_env + DJANGO_SETTINGS_MODULE: backend.settings.test + # Tenancy is row-level, not schema-per-tenant; tests run in public. + DB_SCHEMA: public + # base.py resolves these at import time with no default; supply test-safe + # values here. + DJANGO_SECRET_KEY: test-secret-key-not-for-production + # All-zero Fernet key: valid format, zero entropy so it reads as the + # obvious test placeholder it is (not a real secret). + ENCRYPTION_KEY: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= + CELERY_BROKER_BASE_URL: redis://localhost:6379 + CELERY_BROKER_USER: guest + CELERY_BROKER_PASS: guest + INDEXING_FLAG_TTL: "3600" + ENABLE_LOG_HISTORY: "False" + STRUCTURE_TOOL_IMAGE_URL: docker:test + STRUCTURE_TOOL_IMAGE_NAME: test-structure-tool + STRUCTURE_TOOL_IMAGE_TAG: test + SYSTEM_ADMIN_USERNAME: admin + SYSTEM_ADMIN_PASSWORD: admin + SYSTEM_ADMIN_EMAIL: admin@example.com + # ExecutionFileHandler builds execution-dir paths from this at init; only + # constructed, never written to in these tests. + WORKFLOW_EXECUTION_DIR_PREFIX: /tmp/unstract-workflow-exec coverage_source: . - optional: true # gated until backend test_cases settings are complete unit-connectors: tier: unit workdir: unstract/connectors paths: [tests] + # Pure connector tests only. Credential / live-infra tests are marked + # `@pytest.mark.integration` and run in `integration-connectors`. + markers: "not integration" uv_sync_group: test coverage_source: src - optional: true unit-core: tier: unit @@ -106,17 +102,41 @@ groups: # No `test` uv group in unstract/core today; rig still injects pytest plugins. install_editable: true coverage_source: src - optional: true - unit-tool-registry: - tier: unit - workdir: unstract/tool-registry + # ── Integration tier: needs infra but not full platform ──────────────────── + integration-backend: + tier: integration + workdir: backend + # Backend ORM tests — need live infra. The rig provisions the declared + # requires_services via testcontainers and injects their connection env into + # pytest (tests/rig/cli.py:_inject_infra_env). Not optional: these gate the + # integration tier. + paths: + - adapter_processor_v2/tests/test_adapter_api.py + - dashboard_metrics/tests + - prompt_studio/prompt_studio_registry_v2/tests + # Destination DB-writer tests (BE orchestration over the connector lib — + # superset of the connector-lib DB tests). Postgres runs against the + # provisioned testcontainer; the other engines skipTest without creds. + - workflow_manager/endpoint_v2/tests/destination-connectors + uv_sync_group: test + env: *backend_test_env + requires_services: [postgres, redis] + coverage_source: . + + integration-connectors: + tier: integration + workdir: unstract/connectors paths: [tests] + markers: "integration" + # Most connector integration tests need real third-party credentials + # (Snowflake, GDrive, Box, Dropbox, …) and skip when those are absent. The + # MinIO test actually runs: the rig provisions MinIO via testcontainers and + # injects MINIO_* creds (tests/rig/cli.py). + requires_services: [minio] uv_sync_group: test coverage_source: src - optional: true - # ── Integration tier: needs infra but not full platform ──────────────────── integration-workflow-execution: tier: integration paths: [tests/integration/workflow_execution] diff --git a/tests/rig/cli.py b/tests/rig/cli.py index 2876c42a97..e81cca117f 100644 --- a/tests/rig/cli.py +++ b/tests/rig/cli.py @@ -21,6 +21,7 @@ import uuid from functools import lru_cache from pathlib import Path +from urllib.parse import urlsplit from xml.sax import saxutils from tests.rig import critical_paths as cp @@ -32,7 +33,12 @@ load_groups, ) from tests.rig.reporting import GroupResult, parse_junit, write_summary -from tests.rig.runtime import PlatformEndpoints, PlatformRuntime, pick_runtime +from tests.rig.runtime import ( + PlatformEndpoints, + PlatformRuntime, + TestcontainersRuntime, + pick_runtime, +) from tests.rig.selection import resolve # Pytest exit codes that the rig treats as non-failure for aggregation: @@ -349,6 +355,11 @@ def cmd_run(args: argparse.Namespace) -> int: reports_dir.mkdir(parents=True, exist_ok=True) needs_platform = any(manifest.get(n).requires_platform for n in runnable) + # A group can declare `requires_services` (stateful infra like Postgres/ + # Redis) without needing the whole platform — provision just that infra via + # testcontainers instead of standing up every compose service. Platform wins + # when both are set. + needs_services = any(manifest.get(n).requires_services for n in runnable) runtime: PlatformRuntime | None = None endpoints: PlatformEndpoints | None = None group_results: list[GroupResult] = [] @@ -361,6 +372,14 @@ def cmd_run(args: argparse.Namespace) -> int: # `up()` is inside the try so a failure here still triggers `down()` # in the finally, cleaning up any partial stack. endpoints = runtime.up() + elif needs_services and not args.dry_run: + # Infra-only: testcontainers Postgres/Redis/etc., no platform + # services. ponytail: up() starts the full infra set even if a run + # only needs Postgres; trim to the requested services if startup + # cost ever matters. + runtime = TestcontainersRuntime() + print(f"[rig] bringing infra up via runtime={runtime.name} (requires_services)") + endpoints = runtime.up() # TODO(runtime-gate-skip): groups run unconditionally in topo order; # there is no skip-if-a-dependency-failed logic yet. The dep edge to @@ -486,11 +505,25 @@ def cmd_run(args: argparse.Namespace) -> int: if overall_exit == 0: overall_exit = 1 + # Only in-scope gaps gate: a declared covering group ran in this tier but + # not green. Out-of-scope gaps (covered only by other tiers, or undeclared) + # are reported but must not fail a tier for coverage it can't produce. gaps = [s for s in statuses if s.state == "gap"] - if gaps and args.fail_on_critical_gap: + in_scope_gaps = [s for s in gaps if s.in_scope] + out_of_scope_gaps = [s for s in gaps if not s.in_scope] + if out_of_scope_gaps: + ids = ", ".join(s.path.id for s in out_of_scope_gaps) + print( + f"[rig] ℹ️ {len(out_of_scope_gaps)} critical-path gap(s) out of scope " + f"for this run (warn-only, not covered by any group in this tier): " + f"{ids}", + file=sys.stderr, + ) + if in_scope_gaps and args.fail_on_critical_gap: + ids = ", ".join(s.path.id for s in in_scope_gaps) print( - f"\n[rig] ⚠️ {len(gaps)} critical-path gap(s) detected " - f"(fail-on-critical-gap)", + f"\n[rig] ⚠️ {len(in_scope_gaps)} critical-path gap(s) detected " + f"(fail-on-critical-gap): {ids}", file=sys.stderr, ) if overall_exit == 0: @@ -510,6 +543,55 @@ def cmd_run(args: argparse.Namespace) -> int: # ── execution helpers ───────────────────────────────────────────────────────── +def _db_env_from_postgres_url(url: str) -> dict[str, str]: + """Translate a provisioned Postgres URL into the discrete ``DB_*`` vars + Django reads (``backend/settings/base.py``). + + The rig provisions a throwaway Postgres via testcontainers for groups + declaring ``requires_services: [postgres]``. Without this translation the + backend falls back to the compose hostname ``backend-db-1``, unreachable + from the host-side pytest, and every ``django_db`` test errors on connect. + """ + # e.g. postgresql+psycopg2://user:pass@host:49153/dbname + parts = urlsplit(url) + return { + "DB_HOST": parts.hostname or "localhost", + "DB_PORT": str(parts.port or 5432), + "DB_USER": parts.username or "test", + "DB_PASSWORD": parts.password or "test", + "DB_NAME": parts.path.lstrip("/") or "test", + } + + +def _inject_infra_env( + env: dict[str, str], + group: GroupDefinition, + endpoints: PlatformEndpoints | None, +) -> None: + # Postgres/Redis override so a stale shell value can't shadow the + # provisioned testcontainer; MinIO uses setdefault so a developer's own + # endpoint wins. + if endpoints is None: + return + infra = endpoints.infra + if "postgres" in group.requires_services and infra.postgres_url: + env.update(_db_env_from_postgres_url(infra.postgres_url)) + if "minio" in group.requires_services and infra.minio_endpoint: + # http: this is a local, throwaway testcontainers MinIO with no TLS. + env.setdefault( + "MINIO_ENDPOINT_URL", f"http://{infra.minio_endpoint}" # NOSONAR + ) + if infra.minio_access_key: + env.setdefault("MINIO_ACCESS_KEY_ID", infra.minio_access_key) + if infra.minio_secret_key: + env.setdefault("MINIO_SECRET_ACCESS_KEY", infra.minio_secret_key) + if "redis" in group.requires_services and infra.redis_host: + redis_port = str(infra.redis_port or 6379) + env["REDIS_HOST"] = infra.redis_host + env["REDIS_PORT"] = redis_port + env["CELERY_BROKER_BASE_URL"] = f"redis://{infra.redis_host}:{redis_port}" + + def _green_group_names(results: list[GroupResult]) -> set[str]: return {r.name for r in results if r.status in ("pass", "empty")} @@ -553,6 +635,7 @@ def _execute_group( # leaked in". `setdefault` would let a leaked sentinel win, which # defeats the purpose — set unconditionally. env["UNSTRACT_RIG_SESSION_ID"] = _rig_session_id() + _inject_infra_env(env, group, endpoints) if coverage and group.coverage_source: env.update(coverage_env(group.name, reports_dir)) @@ -628,13 +711,8 @@ def _prepare_group_env(group: GroupDefinition, *, env: dict[str, str]) -> None: env=env, check=False, ) - if group.install_editable: - subprocess.run( - ["uv", "pip", "install", "-e", "."], - cwd=workdir, - env=env, - check=False, - ) + # install_editable is handled in _pytest_command via `--with-editable`; + # installing it here would be wiped by `uv run`'s venv re-sync. if group.pip_install: subprocess.run( ["uv", "pip", "install", *group.pip_install], @@ -647,6 +725,20 @@ def _prepare_group_env(group: GroupDefinition, *, env: dict[str, str]) -> None: # That avoids losing them on the next `uv run` (which re-syncs the venv). +def _pytest_base_cmd(group: GroupDefinition, workdir: Path) -> list[str]: + if not shutil.which("uv"): + return [sys.executable, "-m", "pytest"] + # `uv run` re-syncs the venv each call, wiping anything from `uv pip + # install`. `--with`/`--with-editable` inject plugins + the project into the + # ephemeral run env instead, surviving the sync. + with_args: list[str] = [] + for spec in RIG_PYTEST_PLUGINS: + with_args += ["--with", spec] + if group.install_editable: + with_args += ["--with-editable", str(workdir)] + return ["uv", "run", *with_args, "pytest"] + + def _pytest_command( group: GroupDefinition, *, @@ -660,17 +752,7 @@ def _pytest_command( workers: str, timeout: int, ) -> list[str]: - use_uv = shutil.which("uv") is not None - if use_uv: - # `uv run` re-syncs the project's venv each call, which would wipe any - # plugins added via `uv pip install`. `--with` injects them into the - # ephemeral run environment, surviving the sync. - with_args: list[str] = [] - for spec in RIG_PYTEST_PLUGINS: - with_args += ["--with", spec] - base: list[str] = ["uv", "run", *with_args, "pytest"] - else: - base = [sys.executable, "-m", "pytest"] + base = _pytest_base_cmd(group, workdir) cmd = [ *base, diff --git a/tests/rig/critical_paths.py b/tests/rig/critical_paths.py index b0cbbd0596..741c8b74c5 100644 --- a/tests/rig/critical_paths.py +++ b/tests/rig/critical_paths.py @@ -65,6 +65,12 @@ class CriticalPathStatus: state: CriticalPathState covering_groups_run: tuple[str, ...] notes: str = "" + # True when a declared covering group is in this run's scope (or scoping is + # off). An out-of-scope gap (coverage only in an unrun tier, or none + # declared) must not gate under --fail-on-critical-gap. Defaults False so a + # regression that forgets to pass it can only under-gate (spurious warning), + # never over-gate (spurious build block). + in_scope: bool = False def __post_init__(self) -> None: # Make the contradictory states unrepresentable rather than relying on @@ -123,13 +129,12 @@ def evaluate( groups_run_green: names of groups that ran AND passed in this build. baseline: parsed previous-summary.json from the main-branch cache, or None. Expected shape: ``{"covered_paths": ["auth-login", ...]}``. - scope_groups: collection of every group the caller considered running - this invocation (including dep-expanded deps and skipped - optional placeholders). When a critical path's ``covered_by`` - is fully outside ``scope_groups``, the path is classified as - ``gap`` rather than ``regression`` — running only the unit - tier shouldn't flag e2e-tier paths as regressed. If ``None``, - no scoping is applied (back-compat). + scope_groups: the groups this invocation actually runs (dep-expanded). + When a critical path's ``covered_by`` is fully outside + ``scope_groups``, the path is classified as ``gap`` rather than + ``regression`` — running only the unit tier shouldn't flag + e2e-tier paths as regressed. If ``None``, no scoping is applied + (back-compat). Returns: Statuses in the original registry order. @@ -164,6 +169,7 @@ def evaluate( state=state, covering_groups_run=covering, notes=note, + in_scope=in_scope, ) ) return statuses diff --git a/tests/rig/runtime.py b/tests/rig/runtime.py index 97be22d748..70de5db40c 100644 --- a/tests/rig/runtime.py +++ b/tests/rig/runtime.py @@ -51,6 +51,8 @@ class InfraEndpoints: rabbitmq_host: str | None = None rabbitmq_port: int | None = None minio_endpoint: str | None = None + minio_access_key: str | None = None + minio_secret_key: str | None = None def __post_init__(self) -> None: for host, port, label in ( @@ -205,6 +207,10 @@ def up(self) -> PlatformEndpoints: minio_endpoint=( f"{minio.get_container_host_ip()}:{minio.get_exposed_port(9000)}" ), + # Default testcontainers MinIO root creds; surfaced so the + # rig can inject them into connector integration tests. + minio_access_key=getattr(minio, "access_key", "minioadmin"), + minio_secret_key=getattr(minio, "secret_key", "minioadmin"), ), ) except Exception: diff --git a/tests/rig/tests/test_cli.py b/tests/rig/tests/test_cli.py index a975f43d62..3e3ac4dfd7 100644 --- a/tests/rig/tests/test_cli.py +++ b/tests/rig/tests/test_cli.py @@ -216,6 +216,109 @@ def fake_execute_group(group, **kwargs): ) +def _run_gap_scenario( + tmp_path: Path, monkeypatch, *, covered_by: str, fail_on_gap: bool +) -> int: + """Drive cmd_run with a single optional group ``unit-cov`` that runs RED and + one critical path covered by ``covered_by`` (a YAML list literal like + ``[unit-cov]`` or ``[]``). The group is optional so its own red exit never + gates — isolating the critical-gap logic. Returns the overall exit code. + """ + from tests.rig.reporting import GroupResult + + test_dir = Path(__file__).parent + manifest_yaml = ( + "version: 1\n" + "groups:\n" + " unit-cov:\n" + " tier: unit\n" + f" workdir: {test_dir}\n" + " paths: [.]\n" + " optional: true\n" + ) + cp_yaml = ( + "version: 1\n" + "paths:\n" + " - id: p1\n" + " description: ''\n" + " entry: ''\n" + f" covered_by: {covered_by}\n" + ) + (tmp_path / "groups.yaml").write_text(manifest_yaml) + (tmp_path / "critical_paths.yaml").write_text(cp_yaml) + + import tests.rig.cli as cli_mod + import tests.rig.critical_paths as cp_mod + import tests.rig.groups as groups_mod + + monkeypatch.setattr(groups_mod, "DEFAULT_MANIFEST", tmp_path / "groups.yaml") + monkeypatch.setattr(cp_mod, "DEFAULT_REGISTRY", tmp_path / "critical_paths.yaml") + + def fake_execute_group(group, **kwargs): + # The covering group runs red, so it never counts as green coverage. + result = GroupResult( + name=group.name, + tier=group.tier, + exit_code=1, + passed=0, + failed=1, + errors=0, + skipped=0, + duration_seconds=0.01, + ) + return result, 1 + + monkeypatch.setattr(cli_mod, "_execute_group", fake_execute_group) + + argv = [ + "run", + "unit-cov", + "--no-coverage", + "--no-parallel", + "--reports-dir", + str(tmp_path / "reports"), + "--baseline", + str(tmp_path / "reports" / "previous-summary.json"), + ] + if fail_on_gap: + argv.append("--fail-on-critical-gap") + args = cli_mod._build_parser().parse_args(argv) + return cli_mod.cmd_run(args) + + +def test_fail_on_critical_gap_gates_on_in_scope_gap(tmp_path: Path, monkeypatch) -> None: + """A critical path covered by an in-tier group that ran red is an IN-SCOPE + gap: --fail-on-critical-gap must fail the build on it (real coverage is + gone). Without the flag, it's reported but doesn't gate. + """ + assert ( + _run_gap_scenario( + tmp_path, monkeypatch, covered_by="[unit-cov]", fail_on_gap=True + ) + == 1 + ) + assert ( + _run_gap_scenario( + tmp_path, monkeypatch, covered_by="[unit-cov]", fail_on_gap=False + ) + == 0 + ) + + +def test_fail_on_critical_gap_ignores_out_of_scope_gap( + tmp_path: Path, monkeypatch +) -> None: + """A path with no declared coverage (or coverage only in another tier) is an + OUT-OF-SCOPE gap: --fail-on-critical-gap must NOT fail this tier on it. + This is the fix for the perma-red `main`: e2e-only and not-yet-covered paths + can't fail the unit/integration tiers. + """ + assert ( + _run_gap_scenario(tmp_path, monkeypatch, covered_by="[]", fail_on_gap=True) + == 0 + ) + + def test_cmd_run_teardown_failure_does_not_mask_up_failure( tmp_path: Path, monkeypatch ) -> None: @@ -465,3 +568,46 @@ def test_cmd_report_re_aggregates_existing_junit(tmp_path: Path, monkeypatch) -> for artifact in ("summary.md", "summary.json", "combined-test-report.md"): assert (reports_dir / artifact).exists(), f"missing {artifact}" assert "unit-x" in (reports_dir / "summary.md").read_text() + + +def test_db_env_from_postgres_url_maps_discrete_vars() -> None: + """The provisioned-Postgres URL (testcontainers, with a `+driver` scheme + and a random host port) must translate into the discrete DB_* vars Django + reads — otherwise integration-backend falls back to `backend-db-1` and + every django_db test errors on connect. + """ + import tests.rig.cli as cli_mod + + env = cli_mod._db_env_from_postgres_url( + "postgresql+psycopg2://tcuser:tcpass@127.0.0.1:49231/testdb" + ) + assert env == { + "DB_HOST": "127.0.0.1", + "DB_PORT": "49231", + "DB_USER": "tcuser", + "DB_PASSWORD": "tcpass", # NOSONAR - test placeholder, not a real credential + "DB_NAME": "testdb", + } + + +def test_inject_infra_env_wires_provisioned_redis() -> None: + """A group declaring `requires_services: [redis]` must get REDIS_HOST/PORT + + the Celery broker URL rewritten to the provisioned endpoint — otherwise + Redis-backed tests silently hit the localhost default and bypass the + testcontainer. + """ + import tests.rig.cli as cli_mod + from tests.rig.groups import GroupDefinition + from tests.rig.runtime import InfraEndpoints, PlatformEndpoints + + endpoints = PlatformEndpoints.from_env( + infra=InfraEndpoints(redis_host="redis.internal", redis_port=49999) + ) + group = GroupDefinition( + name="g", tier="integration", paths=("tests",), requires_services=("redis",) + ) + env: dict[str, str] = {} + cli_mod._inject_infra_env(env, group, endpoints) + assert env["REDIS_HOST"] == "redis.internal" + assert env["REDIS_PORT"] == "49999" + assert env["CELERY_BROKER_BASE_URL"] == "redis://redis.internal:49999" diff --git a/tests/rig/tests/test_critical_paths.py b/tests/rig/tests/test_critical_paths.py index b1798d3c1b..31d7e175f0 100644 --- a/tests/rig/tests/test_critical_paths.py +++ b/tests/rig/tests/test_critical_paths.py @@ -158,6 +158,43 @@ def test_scope_demotes_out_of_scope_regressions_to_gaps() -> None: assert by_id["straddle-path"].state == "regression" # partially in scope +def test_in_scope_flag_distinguishes_gap_flavours() -> None: + """The ``in_scope`` flag on a status is what lets --fail-on-critical-gap + gate only on coverage that this tier was actually responsible for. An + out-of-scope gap (e2e path during the unit tier, or a path with no declared + coverage) must report ``in_scope=False``; an in-scope gap (a declared + in-tier group that didn't run green) must report ``in_scope=True``. + """ + registry = _registry( + ("in-scope", ("unit-g",)), # declared group is in scope, but not green + ("e2e-only", ("e2e-g",)), # declared group is out of scope this run + ("undeclared", ()), # no declared coverage anywhere + ) + statuses = evaluate( + registry, + groups_run_green=set(), # nothing passed → all three are gaps + baseline=None, + scope_groups={"unit-g"}, + ) + by_id = {s.path.id: s for s in statuses} + assert all(s.state == "gap" for s in statuses) + assert by_id["in-scope"].in_scope is True + assert by_id["e2e-only"].in_scope is False + assert by_id["undeclared"].in_scope is False + + +def test_covered_path_is_in_scope() -> None: + registry = _registry(("p1", ("g1",))) + statuses = evaluate( + registry, + groups_run_green={"g1"}, + baseline=None, + scope_groups={"g1"}, + ) + assert statuses[0].state == "covered" + assert statuses[0].in_scope is True + + def test_scope_none_preserves_legacy_behavior() -> None: """scope_groups=None disables scope-filtering so callers that don't pass it keep the old "everything in baseline counts" semantics. diff --git a/tox.ini b/tox.ini index 2043476335..10b455e326 100644 --- a/tox.ini +++ b/tox.ini @@ -69,8 +69,5 @@ commands = python -m tests.rig {posargs:list-groups} # These mirror the pre-rig envs so existing scripts / CI snippets keep working # during the migration. They delegate to the corresponding rig group. -[testenv:runner] -commands = python -m tests.rig run unit-runner {posargs} - [testenv:sdk1] commands = python -m tests.rig run unit-sdk1 {posargs} diff --git a/unstract/connectors/pyproject.toml b/unstract/connectors/pyproject.toml index fa3afe95dd..d641413154 100644 --- a/unstract/connectors/pyproject.toml +++ b/unstract/connectors/pyproject.toml @@ -64,3 +64,6 @@ unstract-filesystem = { path = "../filesystem", editable = true } [tool.pytest.ini_options] pythonpath = ["src"] +markers = [ + "integration: needs live infra or external credentials; runs in the rig's integration tier, not unit (select with -m integration / exclude with -m 'not integration')", +] diff --git a/unstract/connectors/tests/databases/test_bigquery_db.py b/unstract/connectors/tests/databases/test_bigquery_db.py deleted file mode 100644 index 4518fa9a7e..0000000000 --- a/unstract/connectors/tests/databases/test_bigquery_db.py +++ /dev/null @@ -1,136 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch - -import google.api_core.exceptions - -from unstract.connectors.databases.bigquery.bigquery import BigQuery -from unstract.connectors.databases.exceptions import ( - BigQueryForbiddenException, - BigQueryNotFoundException, -) - - -class TestBigQuery(unittest.TestCase): - - def setUp(self): - """Set up test fixtures that are common across all tests.""" - self.bigquery = BigQuery( - { - "json_credentials": ( - '{"type":"service_account","project_id":"test_project"}' - ) - } - ) - - def _execute_query_with_mock_error(self, mock_error, expected_exception): - """Helper method to execute query with a mocked error. - - Args: - mock_error: The Google API exception to raise - expected_exception: The exception class expected to be raised - - Returns: - The exception context manager from assertRaises - """ - # Mock the engine and query job - mock_engine = MagicMock() - mock_query_job = MagicMock() - mock_engine.query.return_value = mock_query_job - mock_query_job.result.side_effect = mock_error - - # Mock get_information_schema to return empty dict - with patch.object(self.bigquery, "get_information_schema", return_value={}): - with self.assertRaises(expected_exception) as context: - self.bigquery.execute_query( - engine=mock_engine, - sql_query="INSERT INTO test.dataset.table VALUES (@col)", - table_name="test.dataset.table", - sql_values={"col": "value"}, - sql_keys=["col"], - ) - - return context - - def test_execute_query_forbidden_billing(self): - """Test that BigQueryForbiddenException includes actual billing error details.""" - # Create a mock Forbidden exception with billing error message - billing_error_msg = ( - "403 Billing has not been enabled for this project. " - "Enable billing at https://console.cloud.google.com/billing" - ) - mock_error = google.api_core.exceptions.Forbidden(billing_error_msg) - mock_error.message = billing_error_msg - - # Execute query with mock error - context = self._execute_query_with_mock_error( - mock_error, BigQueryForbiddenException - ) - - # Verify the exception message includes both default text and actual error details - error_msg = str(context.exception.detail) - self.assertIn("Access forbidden in bigquery", error_msg) - self.assertIn("Please check your permissions", error_msg) - self.assertIn("Details:", error_msg) - self.assertIn("403 Billing has not been enabled", error_msg) - self.assertIn("test.dataset.table", error_msg) - - def test_execute_query_forbidden_permission(self): - """Test that BigQueryForbiddenException includes actual permission error details.""" - # Create a mock Forbidden exception with permission error message - permission_error_msg = ( - "403 User does not have permission to access table test.dataset.table" - ) - mock_error = google.api_core.exceptions.Forbidden(permission_error_msg) - mock_error.message = permission_error_msg - - # Execute query with mock error - context = self._execute_query_with_mock_error( - mock_error, BigQueryForbiddenException - ) - - # Verify the exception message includes both default text and actual error details - error_msg = str(context.exception.detail) - self.assertIn("Access forbidden in bigquery", error_msg) - self.assertIn("Details:", error_msg) - self.assertIn("User does not have permission", error_msg) - - def test_execute_query_not_found(self): - """Test that BigQueryNotFoundException includes actual resource not found details.""" - # Create a mock NotFound exception - not_found_error_msg = "404 Dataset 'test:dataset' not found" - mock_error = google.api_core.exceptions.NotFound(not_found_error_msg) - mock_error.message = not_found_error_msg - - # Execute query with mock error - context = self._execute_query_with_mock_error( - mock_error, BigQueryNotFoundException - ) - - # Verify the exception message includes both default text and actual error details - error_msg = str(context.exception.detail) - self.assertIn("The requested resource was not found", error_msg) - self.assertIn("Details:", error_msg) - self.assertIn("404 Dataset", error_msg) - self.assertIn("test.dataset.table", error_msg) - - def test_exception_empty_detail(self): - """Test that exceptions handle empty detail gracefully.""" - # Create a mock Forbidden exception with empty message - mock_error = google.api_core.exceptions.Forbidden("") - mock_error.message = "" - - # Execute query with mock error - context = self._execute_query_with_mock_error( - mock_error, BigQueryForbiddenException - ) - - # Verify the exception message includes default text but not empty "Details:" - error_msg = str(context.exception.detail) - self.assertIn("Access forbidden in bigquery", error_msg) - self.assertIn("Please check your permissions", error_msg) - # When detail is empty, should not have "Details:" section - self.assertNotIn("Details:", error_msg) - - -if __name__ == "__main__": - unittest.main() diff --git a/unstract/connectors/tests/databases/test_mariadb.py b/unstract/connectors/tests/databases/test_mariadb.py deleted file mode 100644 index e6f008af2a..0000000000 --- a/unstract/connectors/tests/databases/test_mariadb.py +++ /dev/null @@ -1,129 +0,0 @@ -import os -import unittest -from typing import Any -from unittest.mock import Mock, patch - -import pymysql.err as MysqlError - -from unstract.connectors.databases.mariadb.mariadb import MariaDB -from unstract.connectors.exceptions import ConnectorError - - -class TestMariaDB(unittest.TestCase): - def setUp(self) -> None: - """Set up test configuration from environment variables""" - - # SSL enabled config for testing SSL scenarios - self.mariadb_config_ssl_enabled = { - "host": os.getenv("MARIADB_HOST", "localhost"), - "port": os.getenv("MARIADB_PORT", "3306"), - "database": os.getenv("MARIADB_DATABASE", "testdb"), - "user": os.getenv("MARIADB_USER", "root"), - "password": os.getenv("MARIADB_PASSWORD", ""), - "sslEnabled": True, - } - - # SSL disabled config for testing non-SSL scenarios - self.mariadb_config_ssl_disabled = { - "host": os.getenv("MARIADB_HOST", "localhost"), - "port": os.getenv("MARIADB_PORT", "3306"), - "database": os.getenv("MARIADB_DATABASE", "testdb"), - "user": os.getenv("MARIADB_USER", "root"), - "password": os.getenv("MARIADB_PASSWORD", ""), - "sslEnabled": False, - } - - def test_ssl_config_from_environment(self) -> None: - """Test SSL configuration is loaded from environment variables""" - # Use existing config but override SSL to read from environment - config = {**self.mariadb_config_ssl_enabled, "sslEnabled": os.getenv("MARIADB_SSL_ENABLED", "false").lower() == "true"} - - mariadb = MariaDB(config) - expected_ssl = os.getenv("MARIADB_SSL_ENABLED", "false").lower() == "true" - self.assertEqual(mariadb.ssl_enabled, expected_ssl) - - @patch("unstract.connectors.databases.mariadb.mariadb.pymysql.connect") - def test_connection_params_ssl_enabled(self, mock_connect: Any) -> None: - """Test that SSL parameters are passed when SSL is enabled""" - mock_connection = Mock() - mock_connect.return_value = mock_connection - mariadb = MariaDB(self.mariadb_config_ssl_enabled) - - result = mariadb.get_engine() - - # Verify pymysql.connect was called with SSL parameters - mock_connect.assert_called_once() - call_args = mock_connect.call_args[1] - self.assertIn("ssl", call_args) - self.assertEqual(call_args["ssl"], {"ssl_disabled": False}) - self.assertEqual(result, mock_connection) - - @patch("unstract.connectors.databases.mariadb.mariadb.pymysql.connect") - def test_connection_params_ssl_disabled(self, mock_connect: Any) -> None: - """Test that no SSL parameters are passed when SSL is disabled""" - mock_connection = Mock() - mock_connect.return_value = mock_connection - - mariadb = MariaDB(self.mariadb_config_ssl_disabled) - result = mariadb.get_engine() - - mock_connect.assert_called_once() - call_args = mock_connect.call_args[1] - self.assertNotIn("ssl", call_args) - self.assertEqual(result, mock_connection) - - @patch("unstract.connectors.databases.mariadb.mariadb.pymysql.connect") - def test_authentication_error_handling(self, mock_connect: Any) -> None: - """Test authentication error (1045) produces proper error message""" - mock_connect.side_effect = MysqlError.OperationalError( - 1045, "Access denied for user 'test'@'localhost'" - ) - - mariadb = MariaDB(self.mariadb_config_ssl_enabled) - - with self.assertRaises(ConnectorError) as context: - mariadb.get_engine() - - error_message = str(context.exception) - self.assertIn("Authentication failed", error_message) - self.assertIn("username, password and ssl-settings", error_message) - self.assertIn("localhost:3306", error_message) - self.assertIn("SSL enabled", error_message) - - @patch("unstract.connectors.databases.mariadb.mariadb.pymysql.connect") - def test_network_error_handling_ssl_enabled(self, mock_connect: Any) -> None: - """Test network error (2003) with SSL enabled includes SSL context""" - mock_connect.side_effect = MysqlError.OperationalError( - 2003, "Can't connect to MySQL server" - ) - - mariadb = MariaDB(self.mariadb_config_ssl_enabled) - - with self.assertRaises(ConnectorError) as context: - mariadb.get_engine() - - error_message = str(context.exception) - self.assertIn("Cannot connect to server", error_message) - self.assertIn("localhost:3306", error_message) - self.assertIn("SSL enabled", error_message) - - @patch("unstract.connectors.databases.mariadb.mariadb.pymysql.connect") - def test_network_error_handling_ssl_disabled(self, mock_connect: Any) -> None: - """Test network error (2003) with SSL disabled includes SSL context""" - mock_connect.side_effect = MysqlError.OperationalError( - 2003, "Can't connect to MySQL server" - ) - - mariadb = MariaDB(self.mariadb_config_ssl_disabled) - - with self.assertRaises(ConnectorError) as context: - mariadb.get_engine() - - error_message = str(context.exception) - self.assertIn("Cannot connect to server", error_message) - self.assertIn("localhost:3306", error_message) - self.assertIn("SSL disabled", error_message) - - -if __name__ == "__main__": - unittest.main() diff --git a/unstract/connectors/tests/databases/test_mssql_db.py b/unstract/connectors/tests/databases/test_mssql_db.py deleted file mode 100644 index c78340af91..0000000000 --- a/unstract/connectors/tests/databases/test_mssql_db.py +++ /dev/null @@ -1,29 +0,0 @@ -import unittest - -from unstract.connectors.databases.mssql.mssql import MSSQL - - -class TestMSSQL(unittest.TestCase): - def test_user_name_and_password(self): - mssql = MSSQL( - { - "user": "sa", - "password": "Ascon@123", - "server": "localhost", - "port": "1433", - "database": "testdb", - } - ) - query = "SELECT * FROM Employees" - cursor = mssql.get_engine().cursor() - cursor.execute(query) - results = cursor.fetchall() - - for c in results: - print(c) - - self.assertTrue(len(results) > 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/unstract/connectors/tests/databases/test_mysql_db.py b/unstract/connectors/tests/databases/test_mysql_db.py deleted file mode 100644 index aa47cd90cc..0000000000 --- a/unstract/connectors/tests/databases/test_mysql_db.py +++ /dev/null @@ -1,29 +0,0 @@ -import unittest - -from unstract.connectors.databases.mysql.mysql import MySQL - - -class TestMySQLDB(unittest.TestCase): - def test_user_name_and_password(self): - mysql = MySQL( - { - "user": "visitran", - "password": "mysqlpass", - "host": "localhost", - "port": "3307", - "database": "sakila", - } - ) - query = "SELECT * FROM category" - cursor = mysql.get_engine().cursor() - cursor.execute(query) - results = cursor.fetchall() - - for c in results: - print(c) - - self.assertTrue(len(results) > 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/unstract/connectors/tests/databases/test_postgresql_db.py b/unstract/connectors/tests/databases/test_postgresql_db.py deleted file mode 100644 index 96fceddd59..0000000000 --- a/unstract/connectors/tests/databases/test_postgresql_db.py +++ /dev/null @@ -1,50 +0,0 @@ -import unittest - -from unstract.connectors.databases.postgresql.postgresql import PostgreSQL - - -class TestPostgreSqlDB(unittest.TestCase): - def test_user_name_and_password(self): - psql = PostgreSQL( - { - "user": "test", - "password": "ascon", - "host": "localhost", - "port": "5432", - "database": "test7", - "schema": "public", - } - ) - query = "SELECT * FROM account_user LIMIT 3" - cursor = psql.get_engine().cursor() - cursor.execute(query) - results = cursor.fetchall() - - for c in results: - print(c) - - self.assertTrue(len(results) > 0) - - def test_connection_url(self): - connection_url = ( - "postgres://iamali003:FeQhupi41INg@ep-crimson-wind-434055" - ".us-east-2.aws.neon.tech/neondb" - ) - psql = PostgreSQL( - { - "connection_url": connection_url, - } - ) - query = "SELECT * FROM users LIMIT 3" - cursor = psql.get_engine().cursor() - cursor.execute(query) - results = cursor.fetchall() - - for c in results: - print(c) - - self.assertTrue(len(results) > 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/unstract/connectors/tests/databases/test_redshift_db.py b/unstract/connectors/tests/databases/test_redshift_db.py deleted file mode 100644 index 0b1300bfab..0000000000 --- a/unstract/connectors/tests/databases/test_redshift_db.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest - -from unstract.connectors.databases.redshift.redshift import Redshift - - -class TestRedshift(unittest.TestCase): - def test_user_name_and_password(self): - redshift = Redshift( - { - "user": "awsuser", - "password": "PASSWORD", - "host": "redshift-cluster-1.redshift.amazonaws.com", - "port": "5439", - "database": "dev", - } - ) - query = ( - "SELECT userid, username, firstname, lastname, city, state, email," - "phone, likesports, liketheatre, likeconcerts, likejazz," - "likeclassical, likeopera, likerock, likevegas, likebroadway," - "likemusicals FROM users limit 10" - ) - cursor = redshift.get_engine().cursor() - cursor.execute(query) - results = cursor.fetchall() - - for c in results: - print(c) - - self.assertTrue(len(results) > 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/unstract/connectors/tests/databases/test_snowflake_db.py b/unstract/connectors/tests/databases/test_snowflake_db.py deleted file mode 100644 index a87bb32733..0000000000 --- a/unstract/connectors/tests/databases/test_snowflake_db.py +++ /dev/null @@ -1,43 +0,0 @@ -import unittest - -from unstract.connectors.databases.snowflake.snowflake import SnowflakeDB - - -class TestSnowflakeDB(unittest.TestCase): - def test_something(self): - sf = SnowflakeDB( - { - "user": "arun", - "password": "PASSWORD", - "account": "JX91721.ap-south-1", - "database": "RESUME_COLLECTION", - "schema": "PUBLIC", - "warehouse": "COMPUTE_WH", - "role": "", - } - ) - # engine = sf.get_engine() - # try: - # with engine.connect() as connection: - # md = sqlalchemy.MetaData() - # table = sqlalchemy.Table( - # 'RESUME', md, autoload=True, autoload_with=engine) - # columns = table.c - # for c in columns: - # print(c.name, c.type) - # # connection.execute("select current_version()") - # except Exception as e: - # print(e) - # - # engine.dispose() - - cursor = sf.get_engine().cursor() - results = cursor.execute("describe table RESUME") - for c in results: - print(c) - - self.assertIsNotNone(results) # add assertion here - - -if __name__ == "__main__": - unittest.main() diff --git a/unstract/connectors/tests/filesystems/test_box_fs.py b/unstract/connectors/tests/filesystems/test_box_fs.py index cc062824bc..1aabdd8a81 100644 --- a/unstract/connectors/tests/filesystems/test_box_fs.py +++ b/unstract/connectors/tests/filesystems/test_box_fs.py @@ -1,17 +1,24 @@ import os import unittest +import pytest from unstract.connectors.filesystems.box import BoxFS +# Whole module needs live infra/credentials — integration tier only. +pytestmark = pytest.mark.integration + class TestBoxFS(unittest.TestCase): + @unittest.skipUnless( + os.environ.get("TEST_BOX_APP_SETTINGS"), + "Integration test requires TEST_BOX_APP_SETTINGS", + ) def test_basic(self): box_app_settings = os.environ.get("TEST_BOX_APP_SETTINGS") box_fs = BoxFS(settings={"box_app_settings": box_app_settings}) file_path = "/" try: files = box_fs.get_fsspec_fs().ls(file_path) - print(files) self.assertIsNotNone(files) except Exception as e: self.fail(f"TestBoxFS.test_basic failed: {e}") diff --git a/unstract/connectors/tests/filesystems/test_google_drive_fs.py b/unstract/connectors/tests/filesystems/test_google_drive_fs.py index 26b1ac59a8..b3e1b5a3fa 100644 --- a/unstract/connectors/tests/filesystems/test_google_drive_fs.py +++ b/unstract/connectors/tests/filesystems/test_google_drive_fs.py @@ -1,9 +1,19 @@ +import os import unittest +import pytest from unstract.connectors.filesystems.google_drive.google_drive import GoogleDriveFS +# Whole module needs live infra/credentials — integration tier only. +pytestmark = pytest.mark.integration + class TestGoogleDriveFS(unittest.TestCase): + @unittest.skipUnless( + os.environ.get("GDRIVE_GOOGLE_SERVICE_ACCOUNT") + and os.environ.get("GDRIVE_GOOGLE_PROJECT_ID"), + "Integration test requires GDRIVE_GOOGLE_SERVICE_ACCOUNT and GDRIVE_GOOGLE_PROJECT_ID", + ) def test_basic(self): self.assertEqual(GoogleDriveFS.requires_oauth(), True) drive = GoogleDriveFS( @@ -14,7 +24,7 @@ def test_basic(self): } ) - print(drive.get_fsspec_fs().ls("")) + self.assertIsNotNone(drive.get_fsspec_fs().ls("")) if __name__ == "__main__": diff --git a/unstract/connectors/tests/filesystems/test_http_fs.py b/unstract/connectors/tests/filesystems/test_http_fs.py index 4548d99543..0fb0f27940 100644 --- a/unstract/connectors/tests/filesystems/test_http_fs.py +++ b/unstract/connectors/tests/filesystems/test_http_fs.py @@ -1,24 +1,28 @@ +import os import unittest +import pytest from unstract.connectors.filesystems.http.http import HttpFS +# Live-HTTP test — integration tier only, so `unit-connectors` (-m "not +# integration") never selects it even when HTTP_FS_TEST_URL is set. +pytestmark = pytest.mark.integration + class TestHttpFS(unittest.TestCase): - # Run a local HTTP server with - # `python -m http.server -b localhost 8080` + # Needs a reachable HTTP server. Start one locally, e.g. + # python -m http.server -b localhost 8080 + # then run with HTTP_FS_TEST_URL=http://localhost:8080/. Skip-guarded so it + # never hits a hard-coded live URL during a plain unit run. + @unittest.skipUnless( + os.environ.get("HTTP_FS_TEST_URL"), + "Integration test requires a reachable HTTP server via HTTP_FS_TEST_URL", + ) def test_basic(self): self.assertEqual(HttpFS.can_write(), False) - # Assuming that the server is run locally - # url = "http://localhost:8080/" - url = "https://filesystem-spec.readthedocs.io/" - http_fs = HttpFS(settings={"base_url": url}) - file_path = "/" - try: - # print(http_fs.get_fsspec_fs().ls(file_path)) - files = http_fs.get_fsspec_fs().ls(file_path) - self.assertIsNotNone(files) - except Exception as e: - self.fail(f"TestHttpFS.test_basic failed: {e}") + http_fs = HttpFS(settings={"base_url": os.environ["HTTP_FS_TEST_URL"]}) + files = http_fs.get_fsspec_fs().ls("/") + self.assertIsNotNone(files) if __name__ == "__main__": diff --git a/unstract/connectors/tests/filesystems/test_miniofs.py b/unstract/connectors/tests/filesystems/test_miniofs.py index 9da9a1dcf0..bd624f4b00 100644 --- a/unstract/connectors/tests/filesystems/test_miniofs.py +++ b/unstract/connectors/tests/filesystems/test_miniofs.py @@ -3,10 +3,10 @@ import unittest from unittest.mock import AsyncMock, patch +import pytest from botocore.exceptions import ClientError from s3fs.core import S3FileSystem from s3fs.errors import translate_boto_error - from unstract.connectors.filesystems.minio.exceptions import s3_error_code from unstract.connectors.filesystems.minio.minio import ( MinioFS, @@ -16,38 +16,33 @@ class TestMinoFS(unittest.TestCase): - @unittest.skip("") - def test_s3(self) -> None: - self.assertEqual(MinioFS.requires_oauth(), False) - access_key = os.environ.get("AWS_ACCESS_KEY_ID") - secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY") - s3 = MinioFS( - { - "key": access_key, - "secret": secret_key, - "path": "/", - "endpoint_url": "https://s3.amazonaws.com", - } - ) - - print(s3.get_fsspec_fs().ls("unstract-user-storage")) - - # @unittest.skip("Minio is not running") + @pytest.mark.integration + @unittest.skipUnless( + os.environ.get("MINIO_ACCESS_KEY_ID") + and os.environ.get("MINIO_SECRET_ACCESS_KEY"), + "Integration test requires a live MinIO and MINIO_ACCESS_KEY_ID + MINIO_SECRET_ACCESS_KEY", + ) def test_minio(self) -> None: + # Endpoint comes from the rig (testcontainers MinIO) via + # MINIO_ENDPOINT_URL; falls back to the local run-platform MinIO so a + # developer can run this by hand. Real round-trip: create a bucket and + # prove it shows up through the access-filtered listing. self.assertEqual(MinioFS.requires_oauth(), False) - access_key = os.environ.get("MINIO_ACCESS_KEY_ID") - secret_key = os.environ.get("MINIO_SECRET_ACCESS_KEY") - print(access_key, secret_key) - s3 = MinioFS( + fs = MinioFS( { - "key": access_key, - "secret": secret_key, - "endpoint_url": "http://localhost:9000", - "path": "/minio-test", + "key": os.environ["MINIO_ACCESS_KEY_ID"], + "secret": os.environ["MINIO_SECRET_ACCESS_KEY"], + "endpoint_url": os.environ.get( + "MINIO_ENDPOINT_URL", "http://localhost:9000" + ), + "path": "/", } - ) - - print(s3.get_fsspec_fs().ls("/minio-test")) + ).get_fsspec_fs() + bucket = "rig-minio-test" + if not fs.exists(bucket): + fs.mkdir(bucket) + listed = [b.rstrip("/").split("/")[-1] for b in fs.ls("")] + self.assertIn(bucket, listed) def _translated_error(code: str) -> BaseException: diff --git a/unstract/connectors/tests/filesystems/test_pcs.py b/unstract/connectors/tests/filesystems/test_pcs.py index fa3f49c432..580f08a55d 100644 --- a/unstract/connectors/tests/filesystems/test_pcs.py +++ b/unstract/connectors/tests/filesystems/test_pcs.py @@ -1,10 +1,19 @@ import os import unittest +import pytest from unstract.connectors.filesystems.ucs import UnstractCloudStorage +# Whole module needs live infra/credentials — integration tier only. +pytestmark = pytest.mark.integration + class TestPCS_FS(unittest.TestCase): + @unittest.skipUnless( + os.environ.get("GOOGLE_STORAGE_ACCESS_KEY_ID") + and os.environ.get("GOOGLE_STORAGE_SECRET_ACCESS_KEY"), + "Integration test requires GOOGLE_STORAGE_ACCESS_KEY_ID and GOOGLE_STORAGE_SECRET_ACCESS_KEY", + ) def test_pcs(self) -> None: self.assertEqual(UnstractCloudStorage.requires_oauth(), False) access_key = os.environ.get("GOOGLE_STORAGE_ACCESS_KEY_ID") @@ -18,7 +27,7 @@ def test_pcs(self) -> None: } ) - print(gcs.get_fsspec_fs().ls("unstract-user-storage")) # type:ignore + self.assertIsNotNone(gcs.get_fsspec_fs().ls("unstract-user-storage")) # type:ignore if __name__ == "__main__": diff --git a/unstract/connectors/tests/filesystems/test_sharepoint_fs.py b/unstract/connectors/tests/filesystems/test_sharepoint_fs.py index d976e73a99..6827ce959a 100644 --- a/unstract/connectors/tests/filesystems/test_sharepoint_fs.py +++ b/unstract/connectors/tests/filesystems/test_sharepoint_fs.py @@ -5,6 +5,8 @@ import unittest from datetime import datetime, timezone +import pytest + logger = logging.getLogger(__name__) @@ -115,14 +117,6 @@ def test_connector_initialization_missing_auth(self): SharePointFS(settings=invalid_settings) self.assertIn("requires authentication", str(context.exception)) - def test_json_schema_has_is_personal(self): - """Test that JSON schema includes is_personal field.""" - from unstract.connectors.filesystems.sharepoint import SharePointFS - - schema = SharePointFS.get_json_schema() - self.assertIn("is_personal", schema) - self.assertIn("Personal Account", schema) - def test_json_schema_has_oneof_pattern(self): """Test that JSON schema uses dependencies/oneOf pattern for dual auth methods.""" import json @@ -243,6 +237,7 @@ def test_get_connector_root_dir(self): self.assertEqual(result, "") +@pytest.mark.integration class TestSharePointFSIntegration(unittest.TestCase): """Integration tests for SharePointFS (require real credentials).""" diff --git a/unstract/connectors/tests/filesystems/test_zs_dropbox_fs.py b/unstract/connectors/tests/filesystems/test_zs_dropbox_fs.py index df35208d7e..a72bc915e3 100644 --- a/unstract/connectors/tests/filesystems/test_zs_dropbox_fs.py +++ b/unstract/connectors/tests/filesystems/test_zs_dropbox_fs.py @@ -1,10 +1,18 @@ import os import unittest +import pytest from unstract.connectors.filesystems.zs_dropbox import DropboxFS +# Whole module needs live infra/credentials — integration tier only. +pytestmark = pytest.mark.integration + class TestDropboxFS(unittest.TestCase): + @unittest.skipUnless( + os.environ.get("TEST_DROPBOX_ACCESS_TOKEN"), + "Integration test requires TEST_DROPBOX_ACCESS_TOKEN", + ) def test_access_token(self): access_token = os.environ.get("TEST_DROPBOX_ACCESS_TOKEN") settings = {"token": access_token} @@ -12,9 +20,7 @@ def test_access_token(self): # Leave empty for root file_path = "" try: - # print(dropbox_fs.get_fsspec_fs().ls(file_path)) files = dropbox_fs.get_fsspec_fs().ls(file_path) - print(files) self.assertIsNotNone(files) except Exception as e: self.fail(f"TestDropboxFS.test_access_token failed: {e}") diff --git a/unstract/core/tests/account_services/test_pandora_account.py b/unstract/core/tests/account_services/test_pandora_account.py deleted file mode 100644 index 8e74fe3bc4..0000000000 --- a/unstract/core/tests/account_services/test_pandora_account.py +++ /dev/null @@ -1,15 +0,0 @@ -import unittest - -from unstract.core.account_services.unstract_account import UnstractAccount - - -class TestUnstractAccount(unittest.TestCase): - def test_provision_blob(self): - account = UnstractAccount("acme", "johndoe") - account.provision_s3_storage() - account.upload_sample_files() - self.assertEqual(True, True) # add assertion here - - -if __name__ == "__main__": - unittest.main() diff --git a/unstract/core/tests/test_pubsub_helper.py b/unstract/core/tests/test_pubsub_helper.py deleted file mode 100644 index ae3e102690..0000000000 --- a/unstract/core/tests/test_pubsub_helper.py +++ /dev/null @@ -1,21 +0,0 @@ -import unittest - -from unstract.core.pubsub_helper import LogHelper as Log - - -class PubSubHelperTestCase(unittest.TestCase): - def test_pubsub(self): - ps1 = Log.publish( - project_guid="test", - message=Log.log(stage="COMPILE", message="Compile process started"), - ) - ps2 = Log.publish( - project_guid="test", - message=Log.log(level="ERROR", stage="COMPILE", message="Compile failed"), - ) - self.assertEqual(ps1, True) - self.assertEqual(ps2, True) - - -if __name__ == "__main__": - unittest.main()