diff --git a/olive/cache.py b/olive/cache.py index ceb64e152..7f27a50de 100644 --- a/olive/cache.py +++ b/olive/cache.py @@ -396,14 +396,57 @@ def save_model( model_attributes = model_json_config.get("model_attributes") or {} if model_attributes.get("no_flatten"): - # Preserve directory structure (e.g., for diffusers models exported by optimum) + # Preserve directory structure (e.g., for diffusers models + # exported by optimum, or multimodal ORT GenAI packages from + # MobiusBuilder where components live in //). source_path = Path(model_json_config["model_path"]) + source_path_resolved = source_path.resolve() if source_path.exists(): shutil.copytree(source_path, actual_output_dir, dirs_exist_ok=overwrite) - # Update component paths to point to new location + def _rebase_additional_files(config: dict, fallback_dir: Path): + model_attributes = config.get("model_attributes") or {} + additional_files = model_attributes.get("additional_files") or [] + if not additional_files: + return + + rebased_additional_files = [] + for additional_file in additional_files: + source_additional_file = Path(additional_file) + try: + relative = source_additional_file.resolve().relative_to(source_path_resolved) + output_additional_file = actual_output_dir / relative + except ValueError: + output_additional_file = fallback_dir / source_additional_file.name + if source_additional_file.exists() and not output_additional_file.exists(): + output_additional_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source_additional_file, output_additional_file) + rebased_additional_files.append(str(output_additional_file)) + + model_attributes["additional_files"] = rebased_additional_files + config["model_attributes"] = model_attributes + + # Rewrite each component's model_path so it points into the + # new output location while preserving the component's + # relative position underneath the package root. Without + # rebasing component paths the saved model_config.json + # cannot be loaded (and onnx_file_name is left untouched, + # so we must not collapse component subdirs into the root). + _rebase_additional_files(model_json_config, actual_output_dir) for component in model_json_config["model_components"]: - component["config"]["model_path"] = str(actual_output_dir) + component_config = component["config"] + component_model_path = component_config.get("model_path") + if component_model_path: + try: + relative = Path(component_model_path).resolve().relative_to(source_path_resolved) + except ValueError: + # Component path is not under the composite root; + # fall back to placing it at the package root. + relative = Path() + component_config["model_path"] = str(actual_output_dir / relative) + else: + component_config["model_path"] = str(actual_output_dir) + _rebase_additional_files(component_config, Path(component_config["model_path"])) model_json_config["model_path"] = str(actual_output_dir) else: copied_components = [] diff --git a/olive/passes/onnx/mobius_model_builder.py b/olive/passes/onnx/mobius_model_builder.py index 7b7e52bb3..05e41841d 100644 --- a/olive/passes/onnx/mobius_model_builder.py +++ b/olive/passes/onnx/mobius_model_builder.py @@ -201,7 +201,9 @@ def _run_for_config( ) # Multi-component model (VLMs, encoder-decoders, diffusion pipelines): - # mobius saves each component to //model.onnx. + # mobius saves each component to //model.onnx with shared + # sidecar files (genai_config.json, tokenizer.json, image_processor.json, + # audio_feature_extraction.json) at output_dir root. components = [] for key in package_keys: component_dir = output_dir / key @@ -211,18 +213,20 @@ def _run_for_config( f"MobiusBuilder: expected output file not found: {onnx_path}. " f"mobius.build() may have failed silently for component '{key}'." ) - additional_files = sorted( + # Per-component additional files: only files that live inside the + # component's own directory. Shared sidecars (genai_config, tokenizer, + # image_processor) are attached to the composite handler below so + # they land in the output root, not duplicated in every component. + component_additional_files = sorted( {str(fp) for fp in component_dir.iterdir()} - {str(onnx_path), str(onnx_path) + ".data"} ) - # Include ORT GenAI artifacts from root output_dir (shared across components) - additional_files = sorted(set(additional_files) | set(genai_artifacts.values())) components.append( ONNXModelHandler( model_path=str(component_dir), onnx_file_name="model.onnx", model_attributes={ "mobius_component": key, - "additional_files": additional_files, + "additional_files": component_additional_files, **(model.model_attributes or {}), }, ) @@ -234,6 +238,15 @@ def _run_for_config( model_path=str(output_dir), model_attributes={ "mobius_package_keys": package_keys, + # Preserve the /model.onnx subdirectory layout so + # ORT GenAI can resolve each component by its "filename" key. + # Without this, Olive's cache flattens components to top-level + # .onnx files and breaks GenAI loading. + "no_flatten": True, + # Shared package-level sidecars carried via the composite handler + # so they end up at the package root (alongside genai_config.json), + # not duplicated into each / subdirectory. + "additional_files": sorted(set(genai_artifacts.values())), **(model.model_attributes or {}), }, ) diff --git a/test/passes/onnx/test_mobius_model_builder.py b/test/passes/onnx/test_mobius_model_builder.py index cd3d7338d..7b1903047 100644 --- a/test/passes/onnx/test_mobius_model_builder.py +++ b/test/passes/onnx/test_mobius_model_builder.py @@ -220,12 +220,22 @@ def test_genai_artifacts_in_single_component(tmp_path): def test_genai_artifacts_in_multi_component(tmp_path): - """ORT GenAI artifacts must be included in all components of multi-component models.""" + """ORT GenAI artifacts must be attached at composite-level, not duplicated per component.""" out = tmp_path / "out" out.mkdir(parents=True, exist_ok=True) keys = ["model", "vision", "embedding"] pkg = _fake_pkg(keys, out) + def _save_with_component_sidecar(directory: str, **_kwargs): + out_dir = Path(directory) + for key in keys: + component_dir = out_dir / key + component_dir.mkdir(parents=True, exist_ok=True) + (component_dir / "model.onnx").write_text("dummy") + (out_dir / "vision" / "vision_local.txt").write_text("vision") + + pkg.save.side_effect = _save_with_component_sidecar + # Mock genai artifact files genai_config = str(out / "genai_config.json") image_processor = str(out / "image_processor.json") @@ -240,11 +250,19 @@ def test_genai_artifacts_in_multi_component(tmp_path): result = p.run(_make_hf_model("microsoft/phi-4-vision"), out) assert isinstance(result, CompositeModelHandler) - # Verify all components include genai artifacts - for component in result.model_components: + composite_additional_files = result.model_attributes.get("additional_files", []) + assert genai_config in composite_additional_files + assert image_processor in composite_additional_files + + # Shared GenAI sidecars should not be duplicated into each component. + components = list(result.model_components) + for component in components: additional_files = component.model_attributes.get("additional_files", []) - assert genai_config in additional_files - assert image_processor in additional_files + assert genai_config not in additional_files + assert image_processor not in additional_files + + vision = components[result.model_component_names.index("vision")] + assert str(out / "vision" / "vision_local.txt") in vision.model_attributes.get("additional_files", []) # --------------------------------------------------------------------------- diff --git a/test/test_cache.py b/test/test_cache.py index 5b6405e5f..4063ff155 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -350,6 +350,73 @@ def test_save_model_with_custom_onnx_filename(self, tmp_path): with open(output_json_path) as f: assert expected_output_path == json.load(f)["config"]["model_path"] + def test_save_model_no_flatten_rebases_component_and_additional_file_paths(self, tmp_path): + # setup + model_id = "composite_model" + cache = CacheConfig(cache_dir=tmp_path / "cache").create_cache() + + source_dir = tmp_path / "source_model" + decoder_dir = source_dir / "decoder" + embedding_dir = source_dir / "embedding" + decoder_dir.mkdir(parents=True, exist_ok=True) + embedding_dir.mkdir(parents=True, exist_ok=True) + (decoder_dir / "model.onnx").write_text("decoder") + (embedding_dir / "model.onnx").write_text("embedding") + (decoder_dir / "decoder_local.txt").write_text("decoder local") + (source_dir / "genai_config.json").write_text("{}") + (source_dir / "tokenizer.json").write_text("{}") + + model_json = { + "type": "compositemodel", + "config": { + "model_path": str(source_dir), + "model_component_names": ["decoder", "embedding"], + "model_components": [ + { + "type": "onnxmodel", + "config": { + "model_path": str(decoder_dir), + "onnx_file_name": "model.onnx", + "model_attributes": {"additional_files": [str(decoder_dir / "decoder_local.txt")]}, + }, + }, + { + "type": "onnxmodel", + "config": {"model_path": str(embedding_dir), "onnx_file_name": "model.onnx"}, + }, + ], + "model_attributes": { + "no_flatten": True, + "additional_files": [str(source_dir / "genai_config.json"), str(source_dir / "tokenizer.json")], + }, + }, + } + with cache.get_model_json_path(model_id).open("w") as f: + json.dump(model_json, f) + + output_dir = tmp_path / "output" + output_json = cache.save_model(model_id, output_dir, True) + + # assert copied layout + assert (output_dir / "decoder" / "model.onnx").exists() + assert (output_dir / "embedding" / "model.onnx").exists() + assert (output_dir / "genai_config.json").exists() + assert (output_dir / "tokenizer.json").exists() + + # assert rewritten config paths + assert output_json["config"]["model_path"] == str(output_dir) + decoder_component = output_json["config"]["model_components"][0]["config"] + assert decoder_component["model_path"] == str(output_dir / "decoder") + assert decoder_component["onnx_file_name"] == "model.onnx" + assert decoder_component["model_attributes"]["additional_files"] == [ + str(output_dir / "decoder" / "decoder_local.txt") + ] + + assert output_json["config"]["model_attributes"]["additional_files"] == [ + str(output_dir / "genai_config.json"), + str(output_dir / "tokenizer.json"), + ] + class TestSharedCache: @pytest.fixture(autouse=True)