Zipstack · chandrasekharan-zipstack · Jun 2, 2026 · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026
diff --git a/backend/dashboard_metrics/tests/test_tasks.py b/backend/dashboard_metrics/tests/test_tasks.py
@@ -1,11 +1,11 @@
 """Unit tests for Dashboard Metrics Celery tasks."""
 
-import uuid
 from datetime import datetime, timedelta
 
 from django.test import TestCase, TransactionTestCase
 from django.utils import timezone
 
+from account_v2.models import Organization
 from dashboard_metrics.models import (
     EventMetricsDaily,
     EventMetricsHourly,
@@ -86,7 +86,10 @@ class TestCleanupTasks(TransactionTestCase):
 
     def setUp(self):
         """Set up test fixtures."""
-        self.org_id = str(uuid.uuid4())
+        # organization FK targets Organization's int PK, not a UUID.
+        self.org = Organization.objects.create(
+            organization_id="test-org", name="test-org", display_name="Test Org"
+        )
 
     def test_cleanup_hourly_metrics_deletes_old_records(self):
         """Test that cleanup deletes hourly records older than retention."""
@@ -96,7 +99,7 @@ def test_cleanup_hourly_metrics_deletes_old_records(self):
 
         # Create old record
         EventMetricsHourly.objects.create(
-            organization_id=self.org_id,
+            organization=self.org,
             timestamp=old_timestamp,
             metric_name="old_metric",
             metric_type=MetricType.COUNTER,
@@ -107,7 +110,7 @@ def test_cleanup_hourly_metrics_deletes_old_records(self):
 
         # Create recent record
         EventMetricsHourly.objects.create(
-            organization_id=self.org_id,
+            organization=self.org,
             timestamp=recent_timestamp,
             metric_name="recent_metric",
             metric_type=MetricType.COUNTER,
@@ -122,9 +125,10 @@ def test_cleanup_hourly_metrics_deletes_old_records(self):
         assert result["deleted"] == 1
         assert result["retention_days"] == 30
 
-        # Verify old is deleted, recent remains
-        assert not EventMetricsHourly.objects.filter(metric_name="old_metric").exists()
-        assert EventMetricsHourly.objects.filter(metric_name="recent_metric").exists()
+        # _base_manager bypasses the org-scoped default manager, which filters
+        # by UserContext.get_organization() — None here, so .objects sees nothing.
+        assert not EventMetricsHourly._base_manager.filter(metric_name="old_metric").exists()
+        assert EventMetricsHourly._base_manager.filter(metric_name="recent_metric").exists()
 
     def test_cleanup_daily_metrics_deletes_old_records(self):
         """Test that cleanup deletes daily records older than retention."""
@@ -134,7 +138,7 @@ def test_cleanup_daily_metrics_deletes_old_records(self):
 
         # Create old record
         EventMetricsDaily.objects.create(
-            organization_id=self.org_id,
+            organization=self.org,
             date=old_date,
             metric_name="old_daily_metric",
             metric_type=MetricType.COUNTER,
@@ -145,7 +149,7 @@ def test_cleanup_daily_metrics_deletes_old_records(self):
 
         # Create recent record
         EventMetricsDaily.objects.create(
-            organization_id=self.org_id,
+            organization=self.org,
             date=recent_date,
             metric_name="recent_daily_metric",
             metric_type=MetricType.COUNTER,
@@ -160,10 +164,10 @@ def test_cleanup_daily_metrics_deletes_old_records(self):
         assert result["deleted"] == 1
 
         # Verify old is deleted, recent remains
-        assert not EventMetricsDaily.objects.filter(
+        assert not EventMetricsDaily._base_manager.filter(
             metric_name="old_daily_metric"
         ).exists()
-        assert EventMetricsDaily.objects.filter(
+        assert EventMetricsDaily._base_manager.filter(
             metric_name="recent_daily_metric"
         ).exists()
 
@@ -173,7 +177,7 @@ def test_cleanup_hourly_with_custom_retention(self):
         old_timestamp = now - timedelta(days=10)
 
         EventMetricsHourly.objects.create(
-            organization_id=self.org_id,
+            organization=self.org,
             timestamp=old_timestamp,
             metric_name="custom_retention_metric",
             metric_type=MetricType.COUNTER,

diff --git a/backend/prompt_studio/prompt_studio_core_v2/tests/test_build_index_payload.py b/backend/prompt_studio/prompt_studio_core_v2/tests/test_build_index_payload.py
@@ -42,6 +42,12 @@
 # ---------------------------------------------------------------------------
 
 
+# Originals displaced by the stubs below, restored once the helper is imported
+# so the stubs never leak into sibling test modules' collection (a stubbed
+# ``account_v2.models`` would otherwise break their real imports).
+_SAVED_MODULES: dict[str, types.ModuleType | None] = {}
+
+
 def _install(name: str, attrs: dict[str, Any] | None = None) -> types.ModuleType:
     """Install (or replace) a fake module into ``sys.modules``.
 
@@ -50,6 +56,7 @@ def _install(name: str, attrs: dict[str, Any] | None = None) -> types.ModuleType
     (via pytest collection, conftest, etc.), and we need our fake to
     actually take effect.
     """
+    _SAVED_MODULES.setdefault(name, sys.modules.get(name))
     mod = types.ModuleType(name)
     if attrs:
         for key, value in attrs.items():
@@ -69,12 +76,32 @@ def _install_package(name: str) -> types.ModuleType:
     """
     if name in sys.modules:
         return sys.modules[name]
+    _SAVED_MODULES.setdefault(name, None)
     mod = types.ModuleType(name)
     mod.__path__ = []  # type: ignore[attr-defined]
     sys.modules[name] = mod
     return mod
 
 
+def _restore_modules() -> None:
+    """Undo every stub installed above, restoring the real modules (or
+    removing the stub when nothing was there before). The helper has already
+    bound its imports by the time this runs, so its tests are unaffected.
+    """
+    for name, original in _SAVED_MODULES.items():
+        if original is None:
+            sys.modules.pop(name, None)
+        else:
+            sys.modules[name] = original
+    _SAVED_MODULES.clear()
+    # The helper imported above is now cached bound to the stubbed globals.
+    # Evict it so any later importer in this process gets a real copy; our
+    # own `_psh_mod`/`PromptStudioHelper` refs are already bound, unaffected.
+    sys.modules.pop(
+        "prompt_studio.prompt_studio_core_v2.prompt_studio_helper", None
+    )
+
+
 try:
     # Account / adapter stubs
     _install_package("account_v2")
@@ -290,6 +317,8 @@ def __init__(self, **kwargs: Any) -> None:
     )
     PromptStudioHelper = None  # type: ignore[assignment]
     IKeys = None  # type: ignore[assignment]
+finally:
+    _restore_modules()
 
 
 pytestmark = pytest.mark.skipif(

diff --git a/backend/pyproject.toml b/backend/pyproject.toml
@@ -71,7 +71,10 @@ dev = [
     "responses>=0.25.7",
     "psutil>=7.0.0",
 ]
-test = ["pytest>=8.0.1"]
+test = [
+    "pytest>=8.0.1",
+    "pytest-django>=4.12.0",
+]
 deploy = [
     "gunicorn~=23.0", # For serving the application
     # Keep versions empty and let uv decide version

diff --git a/backend/usage_v2/tests/test_helper.py b/backend/usage_v2/tests/test_helper.py
@@ -5,69 +5,34 @@
 bare ``"llm"`` bucket from leaking into API deployment responses when
 a producer-side LLM call site forgets to set ``llm_usage_reason``.
 
-The tests deliberately do not require a live Django database — the
-backend test environment has no ``pytest-django``, no SQLite fallback,
-and uses ``django-tenants`` against Postgres in production.  Instead
-the tests stub ``account_usage.models`` and ``usage_v2.models`` in
-``sys.modules`` *before* importing the helper, so the helper module
-loads cleanly without triggering Django's app registry checks.  The
-fake ``Usage.objects.filter`` chain returns a deterministic list of
-row dicts shaped exactly like the real ``.values(...).annotate(...)``
-queryset rows the helper iterates over.
+The tests exercise only the helper's in-memory aggregation logic, not
+the ORM.  We rebind the ``Usage`` symbol the helper resolved at import
+to a fake whose ``objects.filter`` chain returns a deterministic list
+of row dicts shaped exactly like the real
+``.values(...).annotate(...)`` queryset rows the helper iterates over.
 """
 
 from __future__ import annotations
 
-import sys
-import types
 from typing import Any
 from unittest.mock import MagicMock
 
+import pytest
+import usage_v2.helper as helper_mod
+from usage_v2.helper import UsageHelper
 
-# ---------------------------------------------------------------------------
-# Module-level stubs.  Must run BEFORE ``usage_v2.helper`` is imported, so we
-# do it at import time and capture the helper reference for the tests below.
-# ---------------------------------------------------------------------------
-
-
-def _install_stubs() -> tuple[Any, Any]:
-    """Install fake ``account_usage.models`` and ``usage_v2.models`` modules
-    so that ``usage_v2.helper`` can be imported without Django being set up.
-
-    Returns ``(UsageHelper, FakeUsage)`` — the helper class to test and the
-    fake Usage class whose ``objects.filter`` we will swap per-test.
-    """
-    # Fake account_usage package + models module
-    if "account_usage" not in sys.modules:
-        account_usage_pkg = types.ModuleType("account_usage")
-        account_usage_pkg.__path__ = []  # mark as package
-        sys.modules["account_usage"] = account_usage_pkg
-    if "account_usage.models" not in sys.modules:
-        account_usage_models = types.ModuleType("account_usage.models")
-        account_usage_models.PageUsage = MagicMock(name="PageUsage")
-        sys.modules["account_usage.models"] = account_usage_models
-
-    # Fake usage_v2.models with a Usage class whose ``objects`` is a
-    # MagicMock (so each test can rebind ``filter.return_value``).
-    if "usage_v2.models" not in sys.modules or not hasattr(
-        sys.modules["usage_v2.models"], "_is_test_stub"
-    ):
-        usage_v2_models = types.ModuleType("usage_v2.models")
-        usage_v2_models._is_test_stub = True
-
-        class _FakeUsage:
-            objects = MagicMock(name="Usage.objects")
-
-        usage_v2_models.Usage = _FakeUsage
-        sys.modules["usage_v2.models"] = usage_v2_models
-
-    # Now import the helper — this picks up our stubs.
-    from usage_v2.helper import UsageHelper
 
-    return UsageHelper, sys.modules["usage_v2.models"].Usage
+class FakeUsage:
+    # objects is a MagicMock so each test can rebind filter.return_value.
+    objects = MagicMock(name="Usage.objects")
 
 
-UsageHelper, FakeUsage = _install_stubs()
+@pytest.fixture(autouse=True)
+def _swap_usage(monkeypatch: pytest.MonkeyPatch) -> None:
+    # Swap the symbol get_usage_by_model resolves, per-test, so monkeypatch
+    # restores the real model afterwards — a module-level rebind would leak
+    # FakeUsage into every later test in the same process.
+    monkeypatch.setattr(helper_mod, "Usage", FakeUsage)
 
 
 # ---------------------------------------------------------------------------

diff --git a/backend/utils/file_storage/__init__.py b/backend/utils/file_storage/__init__.py
diff --git a/backend/utils/file_storage/helpers/__init__.py b/backend/utils/file_storage/helpers/__init__.py
diff --git a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py
@@ -6,13 +6,13 @@
 
 from file_management.exceptions import InvalidFileType
 from file_management.file_management_helper import FileManagerHelper
-from utils.file_storage.constants import FileStorageConstants, FileStorageKeys
-from utils.file_storage.helpers.streaming_writer import write_streaming
 
 from unstract.core.utilities import UnstractUtils
 from unstract.sdk1.file_storage import FileStorage
 from unstract.sdk1.file_storage.constants import StorageType
 from unstract.sdk1.file_storage.env_helper import EnvHelper
+from utils.file_storage.constants import FileStorageConstants, FileStorageKeys
+from utils.file_storage.helpers.streaming_writer import write_streaming
 
 logger = logging.getLogger(__name__)
 

diff --git a/backend/uv.lock b/backend/uv.lock
diff --git a/platform-service/tests/test_auth_middleware.py b/platform-service/tests/test_auth_middleware.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -178,7 +178,6 @@ keep-dict-typing = true
 
 [tool.pytest.ini_options]
 python_files = ["tests.py", "test_*.py", "*_tests.py"]
-DJANGO_SETTINGS_MODULE = "backend.settings.test_cases"
 testpaths = ["tests"]
 markers = [
     "slow: marks tests as slow (deselect with '-m \"not slow\"')",

diff --git a/tests/critical_paths.yaml b/tests/critical_paths.yaml
@@ -3,9 +3,17 @@
 # A "critical path" is an end-to-end user or system flow whose failure would
 # constitute a production incident. The rig reports:
 #   ✅ covered    — at least one group in `covered_by` ran green this build
-#   ⚠️ gap        — `covered_by` is empty OR no group covering it ran
+#   ⚠️ gap        — no covering group ran green this build
 #   ❌ regression — a path that was ✅ on the cached main baseline is now not ✅
 #
+# Only one kind of gap gates --fail-on-critical-gap:
+#   • in-scope gap     — a covering group ran in this tier but not green; fails.
+#   • out-of-scope gap — covered only by an unrun tier, or no group declared;
+#                        warn-only (a tier can't fail for coverage it can't run).
+#
+# Only wire `covered_by` to a group that really exercises the path — a bogus
+# mapping fails the build when that group breaks, for the wrong reason.
+#
 # We intentionally do NOT chase 100% coverage. Focus on filling these gaps first.
 version: 1
 
@@ -21,10 +29,10 @@ paths:
   - id: adapter-register-llm
     description: "Register and validate an LLM adapter."
     entry: "POST /api/v1/adapter/"
-    # Honest declaration: unit-backend is currently optional/gated and
-    # e2e-smoke only hits /health/. Track as a gap until a real adapter test
-    # exists (likely under tests/e2e/smoke/ or a new tests/e2e/adapters/ group).
-    covered_by: []
+    # unit-sdk1 covers adapter registration + parameter validation at the SDK
+    # layer (the logic the endpoint delegates to). The HTTP round-trip stays an
+    # e2e concern; promote when an e2e adapter group exists.
+    covered_by: [unit-sdk1]
 
   - id: workflow-create-execute
     description: "Create a workflow, configure source+destination, execute, poll, fetch result."
@@ -46,11 +54,6 @@ paths:
     entry: "POST /api/v1/pipeline/{id}/execute/"
     covered_by: []   # gap
 
-  - id: tool-sandbox-exec
-    description: "Tool image runs in sandbox container and emits structured output."
-    entry: "internal: tool-registry → runner → docker run"
-    covered_by: [unit-runner]
-
   - id: usage-token-tracking
     description: "Per-execution token usage is recorded and retrievable."
     entry: "GET /api/v1/usage/get_token_usage/"