Skip to content

Commit 7db3b9f

Browse files
Gabe Pescoclaude
authored andcommitted
Fix: create_external_models misclassifies cross-repo models as external (#5326)
In a multi-repo setup, `create_external_models` partitions models per-config before passing them to `create_external_models_file`. This caused dependencies that belong to another repo (e.g. `bronze.a` referenced by `silver.c` in repo_2) to appear missing from the current config's model set and be incorrectly classified as external. The fix adds an `all_models` parameter to `create_external_models_file` that represents the full set of loaded models across all repos. Dependency resolution uses this complete set, so cross-repo internal models are never mistaken for external ones. The per-config `models` argument is still used to scope which models' dependencies are written to each repo's `external_models.yaml`. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 8f092ac commit 7db3b9f

File tree

3 files changed

+66
-2
lines changed

3 files changed

+66
-2
lines changed

sqlmesh/core/context.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2488,6 +2488,7 @@ def create_external_models(self, strict: bool = False) -> None:
24882488
gateway=external_models_gateway,
24892489
max_workers=self.concurrent_tasks,
24902490
strict=strict,
2491+
all_models=self._models,
24912492
)
24922493

24932494
@python_api_analytics

sqlmesh/core/schema_loader.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,33 @@ def create_external_models_file(
2424
gateway: t.Optional[str] = None,
2525
max_workers: int = 1,
2626
strict: bool = False,
27+
all_models: t.Optional[t.Dict[str, Model]] = None,
2728
) -> None:
2829
"""Create or replace a YAML file with column and types of all columns in all external models.
2930
3031
Args:
3132
path: The path to store the YAML file.
32-
models: FQN to model
33+
models: FQN to model for the current repo/config being processed.
3334
adapter: The engine adapter.
3435
state_reader: The state reader.
3536
dialect: The dialect to serialize the schema as.
3637
gateway: If the model should be associated with a specific gateway; the gateway key
3738
max_workers: The max concurrent workers to fetch columns.
3839
strict: If True, raise an error if the external model is missing in the database.
40+
all_models: FQN to model across all loaded repos. When provided, a dependency is only
41+
classified as external if it is absent from this full set. This prevents cross-repo
42+
internal models from being misclassified as external in multi-repo setups.
3943
"""
44+
known_models: t.Union[UniqueKeyDict[str, Model], t.Dict[str, Model]] = (
45+
all_models if all_models is not None else models
46+
)
4047
external_model_fqns = set()
4148

4249
for fqn, model in models.items():
4350
if model.kind.is_external:
4451
external_model_fqns.add(fqn)
4552
for dep in model.depends_on:
46-
if dep not in models:
53+
if dep not in known_models:
4754
external_model_fqns.add(dep)
4855

4956
# Make sure we don't convert internal models into external ones.

tests/core/integration/test_multi_repo.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
)
2222
from sqlmesh.core.console import get_console
2323
from sqlmesh.core.context import Context
24+
from sqlmesh.utils import yaml
2425
from sqlmesh.utils.date import now
2526
from tests.conftest import DuckDBMetadata
2627
from tests.utils.test_helpers import use_terminal_console
@@ -559,3 +560,58 @@ def test_engine_adapters_multi_repo_all_gateways_gathered(copy_to_temp_path):
559560
gathered_gateways = context.engine_adapters.keys()
560561
expected_gateways = {"local", "memory", "extra"}
561562
assert gathered_gateways == expected_gateways
563+
564+
565+
@use_terminal_console
566+
def test_multi_repo_create_external_models(copy_to_temp_path):
567+
"""create_external_models should not classify cross-repo models as external (sqlmesh#5326).
568+
569+
silver.c and silver.e (repo_2) depend on bronze.a (repo_1). When running
570+
create_external_models with both repos loaded, bronze.a must NOT be treated
571+
as an external model because it is an internal model defined in repo_1.
572+
573+
The observable symptom of the bug is a warning: "Unable to get schema for
574+
'bronze.a'" — because SQLMesh tries to query the schema of what it wrongly
575+
thinks is an external table. A correct implementation never attempts this
576+
lookup and therefore emits no such warning.
577+
"""
578+
paths = copy_to_temp_path("examples/multi")
579+
repo_1_path = f"{paths[0]}/repo_1"
580+
repo_2_path = f"{paths[0]}/repo_2"
581+
582+
context = Context(paths=[repo_1_path, repo_2_path], gateway="memory")
583+
context._new_state_sync().reset(default_catalog=context.default_catalog)
584+
585+
with patch.object(context.console, "log_warning") as mock_warning:
586+
context.create_external_models()
587+
588+
warning_messages = [str(call) for call in mock_warning.call_args_list]
589+
schema_lookup_warnings = [
590+
msg for msg in warning_messages if "bronze" in msg and "a" in msg and "schema" in msg.lower()
591+
]
592+
assert not schema_lookup_warnings, (
593+
"bronze.a should not be looked up as an external model, but got warnings: "
594+
+ str(schema_lookup_warnings)
595+
)
596+
597+
# repo_2's external_models.yaml must not contain bronze.a
598+
repo_2_external = Path(repo_2_path) / c.EXTERNAL_MODELS_YAML
599+
if repo_2_external.exists():
600+
contents = yaml.load(repo_2_external)
601+
external_names = [e["name"] for e in contents]
602+
assert not any("bronze" in name and "a" in name for name in external_names), (
603+
f"bronze.a should not be in repo_2's external models, but found: {external_names}"
604+
)
605+
606+
# repo_1 has no external dependencies at all
607+
repo_1_external = Path(repo_1_path) / c.EXTERNAL_MODELS_YAML
608+
if repo_1_external.exists():
609+
contents = yaml.load(repo_1_external)
610+
assert len(contents) == 0, (
611+
f"repo_1 should have no external models, got: {contents}"
612+
)
613+
614+
# Plan should still resolve all 5 models as internal after create_external_models
615+
context.load()
616+
plan = context.plan_builder().build()
617+
assert len(plan.new_snapshots) == 5

0 commit comments

Comments
 (0)