Skip to content

Commit 1fe688a

Browse files
yiyixuxuyiyi@huggingface.coDN6
authored
[modular] not pass trust_remote_code to external repos (#13204)
* add * update warn * add a test * updaqte * update_component with custom model * add more tests * Apply suggestion from @DN6 Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com> * up --------- Co-authored-by: yiyi@huggingface.co <yiyi@ip-26-0-161-123.ec2.internal> Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>
1 parent bbbcdd8 commit 1fe688a

File tree

3 files changed

+239
-8
lines changed

3 files changed

+239
-8
lines changed

src/diffusers/modular_pipelines/modular_pipeline.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1707,6 +1707,8 @@ def __init__(
17071707
_blocks_class_name=self._blocks.__class__.__name__ if self._blocks is not None else None
17081708
)
17091709

1710+
self._pretrained_model_name_or_path = pretrained_model_name_or_path
1711+
17101712
@property
17111713
def default_call_parameters(self) -> dict[str, Any]:
17121714
"""
@@ -2254,6 +2256,11 @@ def update_components(self, **kwargs):
22542256
new_component_spec = current_component_spec
22552257
if hasattr(self, name) and getattr(self, name) is not None:
22562258
logger.warning(f"ModularPipeline.update_components: setting {name} to None (spec unchanged)")
2259+
elif (
2260+
current_component_spec.default_creation_method == "from_pretrained"
2261+
and getattr(component, "_diffusers_load_id", None) is None
2262+
):
2263+
new_component_spec = ComponentSpec(name=name, type_hint=type(component))
22572264
else:
22582265
new_component_spec = ComponentSpec.from_component(name, component)
22592266

@@ -2325,17 +2332,49 @@ def load_components(self, names: list[str] | str | None = None, **kwargs):
23252332
elif "default" in value:
23262333
# check if the default is specified
23272334
component_load_kwargs[key] = value["default"]
2335+
# Only pass trust_remote_code to components from the same repo as the pipeline.
2336+
# When a user passes trust_remote_code=True, they intend to trust code from the
2337+
# pipeline's repo, not from external repos referenced in modular_model_index.json.
2338+
trust_remote_code_stripped = False
2339+
if (
2340+
"trust_remote_code" in component_load_kwargs
2341+
and self._pretrained_model_name_or_path is not None
2342+
and spec.pretrained_model_name_or_path != self._pretrained_model_name_or_path
2343+
):
2344+
component_load_kwargs.pop("trust_remote_code")
2345+
trust_remote_code_stripped = True
2346+
2347+
if not spec.pretrained_model_name_or_path:
2348+
logger.info(f"Skipping component `{name}`: no pretrained model path specified.")
2349+
continue
2350+
23282351
try:
23292352
components_to_register[name] = spec.load(**component_load_kwargs)
23302353
except Exception:
2331-
logger.warning(
2332-
f"\nFailed to create component {name}:\n"
2333-
f"- Component spec: {spec}\n"
2334-
f"- load() called with kwargs: {component_load_kwargs}\n"
2335-
"If this component is not required for your workflow you can safely ignore this message.\n\n"
2336-
"Traceback:\n"
2337-
f"{traceback.format_exc()}"
2338-
)
2354+
tb = traceback.format_exc()
2355+
if trust_remote_code_stripped and "trust_remote_code" in tb:
2356+
warning_msg = (
2357+
f"Failed to load component `{name}` from external repository "
2358+
f"`{spec.pretrained_model_name_or_path}`.\n\n"
2359+
f"`trust_remote_code=True` was not forwarded to `{name}` because it comes from "
2360+
f"a different repository than the pipeline (`{self._pretrained_model_name_or_path}`). "
2361+
f"For safety, `trust_remote_code` is only forwarded to components from the same "
2362+
f"repository as the pipeline.\n\n"
2363+
f"You need to load this component manually with `trust_remote_code=True` and pass it "
2364+
f"to the pipeline via `pipe.update_components()`. For example, if it is a custom model:\n\n"
2365+
f' {name} = AutoModel.from_pretrained("{spec.pretrained_model_name_or_path}", trust_remote_code=True)\n'
2366+
f" pipe.update_components({name}={name})\n"
2367+
)
2368+
else:
2369+
warning_msg = (
2370+
f"Failed to create component {name}:\n"
2371+
f"- Component spec: {spec}\n"
2372+
f"- load() called with kwargs: {component_load_kwargs}\n"
2373+
"If this component is not required for your workflow you can safely ignore this message.\n\n"
2374+
"Traceback:\n"
2375+
f"{tb}"
2376+
)
2377+
logger.warning(warning_msg)
23392378

23402379
# Register all components at once
23412380
self.register_components(**components_to_register)

tests/modular_pipelines/test_modular_pipelines_common.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,18 @@ def test_load_components_selective_loading(self):
687687
assert pipe.unet is not None
688688
assert getattr(pipe, "vae", None) is None
689689

690+
def test_load_components_selective_loading_incremental(self):
691+
"""Loading a subset of components should not affect already-loaded components."""
692+
pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-xl-pipe")
693+
694+
pipe.load_components(names="unet", torch_dtype=torch.float32)
695+
pipe.load_components(names="text_encoder", torch_dtype=torch.float32)
696+
697+
assert hasattr(pipe, "unet")
698+
assert pipe.unet is not None
699+
assert hasattr(pipe, "text_encoder")
700+
assert pipe.text_encoder is not None
701+
690702
def test_load_components_skips_invalid_pretrained_path(self):
691703
pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-xl-pipe")
692704

@@ -749,6 +761,36 @@ def test_save_pretrained_roundtrip_with_local_model(self, tmp_path):
749761
for key in original_state_dict:
750762
assert torch.equal(original_state_dict[key], loaded_state_dict[key]), f"Mismatch in {key}"
751763

764+
def test_save_pretrained_updates_index_for_model_with_no_load_id(self, tmp_path):
765+
"""testing the workflow of update the pipeline with a custom model and save the pipeline,
766+
the modular_model_index.json should point to the save directory."""
767+
import json
768+
769+
from diffusers import UNet2DConditionModel
770+
771+
pipe = ModularPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-xl-pipe")
772+
pipe.load_components(torch_dtype=torch.float32)
773+
774+
unet = UNet2DConditionModel.from_pretrained(
775+
"hf-internal-testing/tiny-stable-diffusion-xl-pipe", subfolder="unet"
776+
)
777+
assert not hasattr(unet, "_diffusers_load_id")
778+
779+
pipe.update_components(unet=unet)
780+
781+
save_dir = str(tmp_path / "my-pipeline")
782+
pipe.save_pretrained(save_dir)
783+
784+
with open(os.path.join(save_dir, "modular_model_index.json")) as f:
785+
index = json.load(f)
786+
787+
_library, _cls, unet_spec = index["unet"]
788+
assert unet_spec["pretrained_model_name_or_path"] == save_dir
789+
assert unet_spec["subfolder"] == "unet"
790+
791+
_library, _cls, vae_spec = index["vae"]
792+
assert vae_spec["pretrained_model_name_or_path"] == "hf-internal-testing/tiny-stable-diffusion-xl-pipe"
793+
752794
def test_save_pretrained_overwrite_modular_index(self, tmp_path):
753795
"""With overwrite_modular_index=True, all component references should point to the save directory."""
754796
import json

tests/modular_pipelines/test_modular_pipelines_custom_blocks.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,156 @@ def test_custom_block_supported_components(self):
192192
assert len(pipe.components) == 1
193193
assert pipe.component_names[0] == "transformer"
194194

195+
def test_trust_remote_code_not_propagated_to_external_repo(self):
196+
"""When a modular pipeline repo references a component from an external repo that has custom
197+
code (auto_map in config), calling load_components(trust_remote_code=True) should NOT
198+
propagate trust_remote_code to that external component. The external component should fail
199+
to load."""
200+
201+
from diffusers import ModularPipeline
202+
203+
CUSTOM_MODEL_CODE = (
204+
"import torch\n"
205+
"from diffusers import ModelMixin, ConfigMixin\n"
206+
"from diffusers.configuration_utils import register_to_config\n"
207+
"\n"
208+
"class CustomModel(ModelMixin, ConfigMixin):\n"
209+
" @register_to_config\n"
210+
" def __init__(self, hidden_size=8):\n"
211+
" super().__init__()\n"
212+
" self.linear = torch.nn.Linear(hidden_size, hidden_size)\n"
213+
"\n"
214+
" def forward(self, x):\n"
215+
" return self.linear(x)\n"
216+
)
217+
218+
with tempfile.TemporaryDirectory() as external_repo_dir, tempfile.TemporaryDirectory() as pipeline_repo_dir:
219+
# Step 1: Create an external model repo with custom code (requires trust_remote_code)
220+
with open(os.path.join(external_repo_dir, "modeling.py"), "w") as f:
221+
f.write(CUSTOM_MODEL_CODE)
222+
223+
config = {
224+
"_class_name": "CustomModel",
225+
"_diffusers_version": "0.0.0",
226+
"auto_map": {"AutoModel": "modeling.CustomModel"},
227+
"hidden_size": 8,
228+
}
229+
with open(os.path.join(external_repo_dir, "config.json"), "w") as f:
230+
json.dump(config, f)
231+
232+
torch.save({}, os.path.join(external_repo_dir, "diffusion_pytorch_model.bin"))
233+
234+
# Step 2: Create a custom block that references the external repo.
235+
# Define both the class (for direct use) and its code string (for block.py).
236+
class ExternalRefBlock(ModularPipelineBlocks):
237+
@property
238+
def expected_components(self):
239+
return [
240+
ComponentSpec(
241+
"custom_model",
242+
AutoModel,
243+
pretrained_model_name_or_path=external_repo_dir,
244+
)
245+
]
246+
247+
@property
248+
def inputs(self) -> List[InputParam]:
249+
return [InputParam("prompt", type_hint=str, required=True)]
250+
251+
@property
252+
def intermediate_inputs(self) -> List[InputParam]:
253+
return []
254+
255+
@property
256+
def intermediate_outputs(self) -> List[OutputParam]:
257+
return [OutputParam("output", type_hint=str)]
258+
259+
def __call__(self, components, state: PipelineState) -> PipelineState:
260+
block_state = self.get_block_state(state)
261+
block_state.output = "test"
262+
self.set_block_state(state, block_state)
263+
return components, state
264+
265+
EXTERNAL_REF_BLOCK_CODE_STR = (
266+
"from typing import List\n"
267+
"from diffusers import AutoModel\n"
268+
"from diffusers.modular_pipelines import (\n"
269+
" ComponentSpec,\n"
270+
" InputParam,\n"
271+
" ModularPipelineBlocks,\n"
272+
" OutputParam,\n"
273+
" PipelineState,\n"
274+
")\n"
275+
"\n"
276+
"class ExternalRefBlock(ModularPipelineBlocks):\n"
277+
" @property\n"
278+
" def expected_components(self):\n"
279+
" return [\n"
280+
" ComponentSpec(\n"
281+
' "custom_model",\n'
282+
" AutoModel,\n"
283+
f' pretrained_model_name_or_path="{external_repo_dir}",\n'
284+
" )\n"
285+
" ]\n"
286+
"\n"
287+
" @property\n"
288+
" def inputs(self) -> List[InputParam]:\n"
289+
' return [InputParam("prompt", type_hint=str, required=True)]\n'
290+
"\n"
291+
" @property\n"
292+
" def intermediate_inputs(self) -> List[InputParam]:\n"
293+
" return []\n"
294+
"\n"
295+
" @property\n"
296+
" def intermediate_outputs(self) -> List[OutputParam]:\n"
297+
' return [OutputParam("output", type_hint=str)]\n'
298+
"\n"
299+
" def __call__(self, components, state: PipelineState) -> PipelineState:\n"
300+
" block_state = self.get_block_state(state)\n"
301+
' block_state.output = "test"\n'
302+
" self.set_block_state(state, block_state)\n"
303+
" return components, state\n"
304+
)
305+
306+
# Save the block config, write block.py, then load back via from_pretrained
307+
block = ExternalRefBlock()
308+
block.save_pretrained(pipeline_repo_dir)
309+
310+
# auto_map will reference the module name derived from ExternalRefBlock.__module__,
311+
# which is "test_modular_pipelines_custom_blocks". Write the code file with that name.
312+
code_path = os.path.join(pipeline_repo_dir, "test_modular_pipelines_custom_blocks.py")
313+
with open(code_path, "w") as f:
314+
f.write(EXTERNAL_REF_BLOCK_CODE_STR)
315+
316+
block = ModularPipelineBlocks.from_pretrained(pipeline_repo_dir, trust_remote_code=True)
317+
pipe = block.init_pipeline()
318+
pipe.save_pretrained(pipeline_repo_dir)
319+
320+
# Step 3: Load the pipeline from the saved directory.
321+
loaded_pipe = ModularPipeline.from_pretrained(pipeline_repo_dir, trust_remote_code=True)
322+
323+
assert loaded_pipe._pretrained_model_name_or_path == pipeline_repo_dir
324+
assert loaded_pipe._component_specs["custom_model"].pretrained_model_name_or_path == external_repo_dir
325+
assert getattr(loaded_pipe, "custom_model", None) is None
326+
327+
# Step 4a: load_components WITHOUT trust_remote_code.
328+
# It should still fail
329+
loaded_pipe.load_components()
330+
assert getattr(loaded_pipe, "custom_model", None) is None
331+
332+
# Step 4b: load_components with trust_remote_code=True.
333+
# trust_remote_code should be stripped for the external component, so it fails.
334+
# The warning should contain guidance about manually loading with trust_remote_code.
335+
loaded_pipe.load_components(trust_remote_code=True)
336+
assert getattr(loaded_pipe, "custom_model", None) is None
337+
338+
# Step 4c: Manually load with AutoModel and update_components — this should work.
339+
from diffusers import AutoModel
340+
341+
custom_model = AutoModel.from_pretrained(external_repo_dir, trust_remote_code=True)
342+
loaded_pipe.update_components(custom_model=custom_model)
343+
assert getattr(loaded_pipe, "custom_model", None) is not None
344+
195345
def test_custom_block_loads_from_hub(self):
196346
repo_id = "hf-internal-testing/tiny-modular-diffusers-block"
197347
block = ModularPipelineBlocks.from_pretrained(repo_id, trust_remote_code=True)

0 commit comments

Comments
 (0)