Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions data_juicer/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1681,8 +1681,6 @@ def get_init_configs(cfg: Union[Namespace, Dict], load_configs_only: bool = True
"""
set init configs of data-juicer for cfg
"""
temp_dir = tempfile.gettempdir()
temp_file = os.path.join(temp_dir, "job_dj_config.json")
if isinstance(cfg, Namespace):
cfg = namespace_to_dict(cfg)

Expand All @@ -1703,10 +1701,13 @@ def get_init_configs(cfg: Union[Namespace, Dict], load_configs_only: bool = True
for attr in internal_attrs:
cfg.pop(attr, None)

# create a temp config file
with open(temp_file, "w") as f:
json.dump(prepare_cfgs_for_export(cfg), f)
inited_dj_cfg = init_configs(["--config", temp_file], load_configs_only=load_configs_only)
# Use a unique temporary file per call to avoid race conditions when
# multiple requests are processed concurrently (e.g. in API service mode).
# The file is automatically deleted after the with-block exits.
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", prefix="job_dj_config_", delete=True) as temp_f:
json.dump(prepare_cfgs_for_export(cfg), temp_f)
temp_f.flush()
inited_dj_cfg = init_configs(["--config", temp_f.name], load_configs_only=load_configs_only)
return inited_dj_cfg


Expand Down
16 changes: 7 additions & 9 deletions service.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
from typing import Dict
from urllib.parse import parse_qs

from jsonargparse import Namespace
from pydantic import validate_call

from data_juicer.config.config import get_default_cfg
from data_juicer.config.config import get_default_cfg, get_init_configs
from data_juicer.core.data.dataset_builder import DatasetBuilder
from data_juicer.core.exporter import Exporter
from data_juicer.utils.lazy_loader import LazyLoader
Expand Down Expand Up @@ -143,13 +142,12 @@ def _parse_json_dumps(params: Dict, prefix="<json_dumps>"):


def _setup_cfg(params: Dict):
"""convert string `cfg` to Namespace"""
# TODO: Traverse method's signature and convert any arguments \
# that should be Namespace but are passed as str
if cfg_str := params.get("cfg"):
if isinstance(cfg_str, str):
cfg = Namespace(**json.loads(cfg_str))
params["cfg"] = cfg
"""convert string or dict `cfg` to a fully initialized Namespace"""
cfg_val = params.get("cfg")
if cfg_val is not None and isinstance(cfg_val, (str, dict)):
if isinstance(cfg_val, str):
cfg_val = json.loads(cfg_val)
params["cfg"] = get_init_configs(cfg_val, load_configs_only=True)
return params


Expand Down
Loading