Skip to content

Commit 7c17de0

Browse files
rizzipCopilot
andauthored
Enable base image and platform specification at the DSL level (#1044)
* (feat): add base_image and platform to step DSL, add user/app overrides on upload and compile * feat: use uv for requirements install in default dockerfile * fix: uv venv python version * fix: user/app id CLI overrides on clarifai pipeline [compile|upload] * chore: fix tests * fix: uv venv path * Revert "fix: uv venv path" This reverts commit 6a28d0b. * fix: uv venv initialization * fix: pipeline step default dockerfile * fix: tests * fix: tests * fix: avoid shadowing builtin in pipeline list command Agent-Logs-Url: https://github.com/Clarifai/clarifai-python/sessions/ae3f7db3-87f9-4c10-aa93-a2971021298f Co-authored-by: rizzip <32918283+rizzip@users.noreply.github.com> * fix: use underscore pipeline CLI identity flags Agent-Logs-Url: https://github.com/Clarifai/clarifai-python/sessions/d61bcecb-1a12-4eed-ab51-3c5e2afcb850 Co-authored-by: rizzip <32918283+rizzip@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
1 parent 9be1670 commit 7c17de0

9 files changed

Lines changed: 368 additions & 40 deletions

File tree

clarifai/cli/pipeline.py

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,17 @@ def pipeline():
100100
is_flag=True,
101101
help='Skip creating config-lock.yaml file.',
102102
)
103-
def upload(path, no_lockfile):
103+
@click.option(
104+
'--user_id',
105+
default=None,
106+
help='Override the user_id from the Clarifai context.',
107+
)
108+
@click.option(
109+
'--app_id',
110+
default=None,
111+
help='Override the app_id from the Clarifai context.',
112+
)
113+
def upload(path, no_lockfile, user_id, app_id):
104114
"""Upload a pipeline with associated pipeline steps to Clarifai.
105115
106116
PATH: Path to the pipeline configuration file or directory containing config.yaml. If not specified, the current directory is used by default.
@@ -110,6 +120,10 @@ def upload(path, no_lockfile):
110120

111121
if os.path.isfile(path) and path.endswith('.py'):
112122
pipeline_obj = load_pipeline_from_file(path)
123+
if user_id:
124+
pipeline_obj.user_id = user_id
125+
if app_id:
126+
pipeline_obj.app_id = app_id
113127
output_dir = os.path.join(
114128
os.path.dirname(os.path.abspath(path)), f'generated-{pipeline_obj.id}'
115129
)
@@ -128,16 +142,42 @@ def upload(path, no_lockfile):
128142
required=True,
129143
help='Directory to write the compiled pipeline config and step folders.',
130144
)
131-
def compile(path, output_dir):
132-
"""Compile YAML/config-based pipeline assets from a Python pipeline definition."""
145+
@click.option('--user_id', default=None, help='Override the user_id from the Clarifai context.')
146+
@click.option('--app_id', default=None, help='Override the app_id from the Clarifai context.')
147+
def compile(path, output_dir, user_id, app_id):
148+
"""Compile YAML/config-based pipeline assets from a Python pipeline definition.
149+
150+
Generates config.yaml, step directories (with requirements.txt and
151+
pipeline_step.py), and a Dockerfile for each locally managed step.
152+
"""
153+
from clarifai.runners.pipeline_steps.pipeline_step_builder import PipelineStepBuilder
133154
from clarifai.runners.pipelines import load_pipeline_from_file
134155

135156
if not os.path.isfile(path) or not path.endswith('.py'):
136157
raise click.UsageError('clarifai pipeline compile expects a Python file path.')
137158

138159
pipeline_obj = load_pipeline_from_file(path)
160+
if user_id:
161+
pipeline_obj.user_id = user_id
162+
if app_id:
163+
pipeline_obj.app_id = app_id
139164
config_path = pipeline_obj.generate(output_dir)
140-
logger.info(f"Generated pipeline assets at {config_path}")
165+
166+
# Generate Dockerfiles for all locally managed step directories.
167+
seen: set = set()
168+
step_ids = []
169+
for node in pipeline_obj.nodes:
170+
sid = node.step_definition.id
171+
if node.step_definition.is_managed and sid not in seen:
172+
seen.add(sid)
173+
step_ids.append(sid)
174+
for step_id in step_ids:
175+
step_dir = os.path.join(output_dir, step_id)
176+
if os.path.isdir(step_dir):
177+
PipelineStepBuilder(step_dir).create_dockerfile()
178+
logger.info(f"Generated Dockerfile for step '{step_id}'")
179+
180+
logger.info(f'Generated pipeline assets at {config_path}')
141181

142182

143183
@pipeline.command()
@@ -1049,7 +1089,7 @@ def validate_lock(lockfile_path):
10491089
raise click.Abort()
10501090

10511091

1052-
@pipeline.command(['ls'])
1092+
@pipeline.command(name='list', aliases=['ls'])
10531093
@click.option('--page_no', required=False, help='Page number to list.', default=1)
10541094
@click.option('--per_page', required=False, help='Number of items per page.', default=16)
10551095
@click.option(
@@ -1063,7 +1103,7 @@ def validate_lock(lockfile_path):
10631103
help='User ID to list pipelines from. If not provided, uses current user.',
10641104
)
10651105
@click.pass_context
1066-
def list(ctx, page_no, per_page, app_id, user_id):
1106+
def list_pipelines(ctx, page_no, per_page, app_id, user_id):
10671107
"""List all pipelines for the user."""
10681108
validate_context(ctx)
10691109

clarifai/runners/pipeline_steps/pipeline_step_builder.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import sys
44
import tarfile
55
import time
6-
from string import Template
76
from typing import List, Optional
87

98
import yaml
@@ -240,41 +239,46 @@ def create_pipeline_step(self):
240239

241240
def create_dockerfile(self):
242241
"""Create a Dockerfile for the pipeline step."""
243-
# Use similar logic to model builder for dockerfile creation
244-
dockerfile_template = """FROM --platform=$TARGETPLATFORM public.ecr.aws/clarifai-models/python-base:$PYTHON_VERSION-df565436eea93efb3e8d1eb558a0a46df29523ec as final
245-
246-
COPY --link requirements.txt /home/nonroot/requirements.txt
247-
248-
# Update clarifai package so we always have latest protocol to the API. Everything should land in /venv
249-
RUN ["pip", "install", "--no-cache-dir", "-r", "/home/nonroot/requirements.txt"]
250-
251-
# Copy in the actual files like config.yaml, requirements.txt, and most importantly 1/pipeline_step.py for the actual pipeline step.
252-
COPY --link=true 1 /home/nonroot/main/1
253-
# At this point we only need these for validation in the SDK.
254-
COPY --link=true requirements.txt config.yaml /home/nonroot/main/
255-
"""
256-
257-
# Get Python version from config or use default
258242
build_info = self.config.get('build_info', {})
259243
python_version = build_info.get('python_version', '3.12')
244+
base_image = build_info.get('base_image')
245+
platform = build_info.get('platform')
260246

261247
# Ensure requirements.txt has clarifai
262248
self._ensure_clarifai_requirement()
263249

264-
# Replace placeholders
265-
dockerfile_content = Template(dockerfile_template).safe_substitute(
266-
PYTHON_VERSION=python_version
250+
platform_str = f'--platform={platform}' if platform else ''
251+
image = (
252+
base_image
253+
or f'public.ecr.aws/clarifai-models/python-base:{python_version}-df565436eea93efb3e8d1eb558a0a46df29523ec'
254+
)
255+
256+
dockerfile_content = (
257+
f'FROM {platform_str} {image} as final\n'
258+
'\n'
259+
'COPY --link requirements.txt /home/nonroot/requirements.txt\n'
260+
'\n'
261+
'# Install uv, create a venv, and install requirements\n'
262+
f'RUN pip install uv && uv venv /tmp/venv --python {python_version} --clear\n'
263+
'ENV VIRTUAL_ENV=/tmp/venv\n'
264+
'ENV PATH="/tmp/venv/bin:$PATH"\n'
265+
'RUN uv pip install --no-cache-dir -r /home/nonroot/requirements.txt\n'
266+
'\n'
267+
'# Copy in the actual files like config.yaml, requirements.txt, and most importantly 1/pipeline_step.py for the actual pipeline step.\n'
268+
'COPY --link=true 1 /home/nonroot/main/1\n'
269+
'# At this point we only need these for validation in the SDK.\n'
270+
'COPY --link=true requirements.txt config.yaml /home/nonroot/main/\n'
267271
)
268272

269273
# Write Dockerfile if it doesn't exist
270274
dockerfile_path = os.path.join(self.folder, 'Dockerfile')
271275
if os.path.exists(dockerfile_path):
272-
logger.info(f"Dockerfile already exists at {dockerfile_path}, skipping creation.")
276+
logger.info(f'Dockerfile already exists at {dockerfile_path}, skipping creation.')
273277
return
274278
with open(dockerfile_path, 'w') as dockerfile:
275279
dockerfile.write(dockerfile_content)
276280

277-
logger.info(f"Created Dockerfile at {dockerfile_path}")
281+
logger.info(f'Created Dockerfile at {dockerfile_path}')
278282

279283
def _ensure_clarifai_requirement(self):
280284
"""Ensure clarifai is in requirements.txt with proper version."""

clarifai/runners/pipelines/codegen.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,15 @@ def generate_step_directory(step_definition, output_dir: str, user_id: str, app_
219219
'app_id': app_id,
220220
},
221221
'pipeline_step_input_params': step_definition.get_input_params(),
222-
'build_info': {'python_version': step_definition.python_version},
222+
'build_info': {
223+
k: v
224+
for k, v in [
225+
('python_version', step_definition.python_version),
226+
('base_image', step_definition.base_image),
227+
('platform', step_definition.platform),
228+
]
229+
if v is not None
230+
},
223231
'pipeline_step_compute_info': MessageToDict(
224232
step_definition.compute, preserving_proto_field_name=True
225233
),

clarifai/runners/pipelines/pipeline.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ def __init__(
2727
visibility: str = 'PRIVATE',
2828
):
2929
user_id, app_id = self._resolve_from_context(user_id, app_id)
30-
if not user_id or not app_id:
31-
raise ValueError(
32-
"Pipeline(...) needs user_id and app_id. Pass them explicitly, "
33-
"or run `clarifai login` to set them in your CLI context."
34-
)
3530
self.id = id
3631
self.user_id = user_id
3732
self.app_id = app_id
@@ -112,6 +107,14 @@ def _generate_task_name(self, step_id: str) -> str:
112107
suffix += 1
113108
return candidate
114109

110+
def _validate_identity(self):
111+
"""Raise if user_id/app_id are still unresolved at the time of use."""
112+
if not self.user_id or not self.app_id:
113+
raise ValueError(
114+
"Pipeline(...) needs user_id and app_id. Pass them explicitly, "
115+
"set --user_id/--app_id on the CLI, or run `clarifai login`."
116+
)
117+
115118
def validate(self):
116119
nodes_by_name = {node.name: node for node in self.nodes}
117120
for node in self.nodes:
@@ -247,6 +250,7 @@ def to_config(self) -> Dict[str, Any]:
247250
return config
248251

249252
def generate(self, output_dir: str) -> str:
253+
self._validate_identity()
250254
os.makedirs(output_dir, exist_ok=True)
251255
step_definitions = OrderedDict()
252256
for node in self.nodes:
@@ -262,6 +266,7 @@ def generate(self, output_dir: str) -> str:
262266
return config_path
263267

264268
def upload(self, no_lockfile: bool = False) -> Optional[str]:
269+
self._validate_identity()
265270
from clarifai.runners.pipelines.pipeline_builder import PipelineBuilder
266271

267272
with tempfile.TemporaryDirectory(prefix='clarifai-pipeline-') as temp_dir:

clarifai/runners/pipelines/step.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ def __init__(
9898
assets=None,
9999
compute: Optional[ComputeInfo] = None,
100100
python_version: str = '3.12',
101+
base_image: Optional[str] = None,
102+
platform: Optional[str] = None,
101103
secrets: Optional[Dict[str, str]] = None,
102104
):
103105
self.func = func
@@ -106,6 +108,8 @@ def __init__(
106108
self.assets = assets or []
107109
self.compute = compute or ComputeInfo()
108110
self.python_version = python_version
111+
self.base_image = base_image
112+
self.platform = platform
109113
self.secrets = secrets or {}
110114
self.signature = inspect.signature(func)
111115

@@ -215,6 +219,8 @@ def step(
215219
assets=None,
216220
compute: Optional[ComputeInfo] = None,
217221
python_version: str = '3.12',
222+
base_image: Optional[str] = None,
223+
platform: Optional[str] = None,
218224
secrets: Optional[Dict[str, str]] = None,
219225
):
220226
def decorator(func: Callable[..., Any]) -> StepDefinition:
@@ -225,6 +231,8 @@ def decorator(func: Callable[..., Any]) -> StepDefinition:
225231
assets=assets,
226232
compute=compute,
227233
python_version=python_version,
234+
base_image=base_image,
235+
platform=platform,
228236
secrets=secrets,
229237
)
230238

tests/cli/test_pipeline.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import yaml
77
from click.testing import CliRunner
88

9+
from clarifai.cli.pipeline import compile as compile_command
910
from clarifai.cli.pipeline import init, run, upload
1011
from clarifai.cli.pipeline_template import info, list_templates
1112
from clarifai.runners.pipelines.pipeline_builder import (
@@ -384,6 +385,21 @@ def test_cli_upload_help(self):
384385
assert result.exit_code == 0
385386
assert "Upload a pipeline with associated pipeline steps" in result.output
386387
assert "PATH" in result.output
388+
assert '--user_id' in result.output
389+
assert '--app_id' in result.output
390+
assert '--user-id' not in result.output
391+
assert '--app-id' not in result.output
392+
393+
def test_cli_compile_help_uses_underscore_identity_flags(self):
394+
"""Test compile help uses the existing underscore flag convention."""
395+
runner = CliRunner()
396+
result = runner.invoke(compile_command, ['--help'])
397+
398+
assert result.exit_code == 0
399+
assert '--user_id' in result.output
400+
assert '--app_id' in result.output
401+
assert '--user-id' not in result.output
402+
assert '--app-id' not in result.output
387403

388404
def test_cli_upload_missing_config(self):
389405
"""Test CLI upload with missing config file."""
@@ -2115,7 +2131,7 @@ def test_list_command_requires_app_id(self):
21152131
ctx_obj.current.api_base = 'https://api.clarifai.com'
21162132

21172133
# Import here to avoid circular imports in testing
2118-
from clarifai.cli.pipeline import list as list_command
2134+
from clarifai.cli.pipeline import list_pipelines as list_command
21192135

21202136
result = runner.invoke(
21212137
list_command,
@@ -2152,7 +2168,7 @@ def test_list_command_success_with_app_id(self, mock_display, mock_app_class, mo
21522168
ctx_obj.current.api_base = 'https://api.clarifai.com'
21532169

21542170
# Import here to avoid circular imports in testing
2155-
from clarifai.cli.pipeline import list as list_command
2171+
from clarifai.cli.pipeline import list_pipelines as list_command
21562172

21572173
result = runner.invoke(
21582174
list_command,
@@ -2185,7 +2201,7 @@ def test_list_command_default_parameters(self, mock_validate):
21852201
ctx_obj.current.api_base = 'https://api.clarifai.com'
21862202

21872203
# Import here to avoid circular imports in testing
2188-
from clarifai.cli.pipeline import list as list_command
2204+
from clarifai.cli.pipeline import list_pipelines as list_command
21892205

21902206
with patch('clarifai.client.app.App') as mock_app_class:
21912207
mock_app_instance = Mock()

tests/cli/test_pipeline_dsl_cli.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def test_generate_python_pipeline_file_writes_output(tmp_path: Path):
3838
with patch('clarifai.runners.pipelines.load_pipeline_from_file') as mock_loader:
3939
mock_pipeline = Mock()
4040
mock_pipeline.generate.return_value = str(output_dir / 'config.yaml')
41+
mock_pipeline.nodes = [] # no managed steps → no Dockerfiles expected
4142
mock_loader.return_value = mock_pipeline
4243

4344
result = runner.invoke(compile, [str(pipeline_file), '--output-dir', str(output_dir)])
@@ -68,3 +69,27 @@ def test_generate_real_example_pipeline_writes_mixed_step_config(tmp_path: Path)
6869
assert (output_dir / 'prepare-text' / '1' / 'text_utils.py').exists()
6970
assert not (output_dir / 'summarize').exists()
7071
assert not (output_dir / 'classify-sentiment').exists()
72+
# compile must also generate Dockerfiles for locally managed steps
73+
assert (output_dir / 'prepare-text' / 'Dockerfile').exists()
74+
assert (output_dir / 'assemble-report' / 'Dockerfile').exists()
75+
76+
77+
def test_compile_generates_dockerfiles_for_managed_steps(tmp_path: Path):
78+
"""compile writes a Dockerfile next to each locally managed step directory."""
79+
repo_root = Path(__file__).resolve().parents[2]
80+
pipeline_file = repo_root / 'examples' / 'pipeline_dsl_text_pipeline.py'
81+
output_dir = tmp_path / 'compiled'
82+
runner = CliRunner()
83+
84+
result = runner.invoke(compile, [str(pipeline_file), '--output-dir', str(output_dir)])
85+
86+
assert result.exit_code == 0, result.output
87+
for step_id in ('prepare-text', 'assemble-report'):
88+
dockerfile = output_dir / step_id / 'Dockerfile'
89+
assert dockerfile.exists(), f'Dockerfile missing for step {step_id!r}'
90+
content = dockerfile.read_text(encoding='utf-8')
91+
assert 'FROM ' in content
92+
assert 'COPY --link=true 1 /home/nonroot/main/1' in content
93+
# Pre-existing (non-managed) steps must NOT get a Dockerfile.
94+
assert not (output_dir / 'summarize').exists()
95+
assert not (output_dir / 'classify-sentiment').exists()

0 commit comments

Comments
 (0)