From a4ce7a2e4296116e5fb462e715ddee339a4f715a Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Wed, 8 Oct 2025 16:23:15 +0200 Subject: [PATCH 01/24] chore: created CWL examples --- .../bad_cores/clt_bad_cores.cwl | 13 +++++++++++ .../bad_cores/step_bad_cores.cwl | 21 +++++++++++++++++ .../bad_cores/step_run_bad_cores.cwl | 21 +++++++++++++++++ .../bad_cores/wf_bad_cores.cwl | 21 +++++++++++++++++ .../bad_ram/clt_bad_ram.cwl | 13 +++++++++++ .../bad_ram/step_bad_ram.cwl | 21 +++++++++++++++++ .../bad_ram/step_run_bad_ram.cwl | 21 +++++++++++++++++ .../bad_ram/wf_bad_ram.cwl | 21 +++++++++++++++++ .../cores_conflict_wf_step.cwl | 23 +++++++++++++++++++ .../cores_conflict_wf_step_run.cwl | 23 +++++++++++++++++++ .../ram_conflict_wf_step.cwl | 23 +++++++++++++++++++ .../ram_conflict_wf_step_run.cwl | 23 +++++++++++++++++++ 12 files changed, 244 insertions(+) create mode 100644 test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl create mode 100644 test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl create mode 100644 test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl create mode 100644 test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl create mode 100644 test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl create mode 100644 test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl create mode 100644 test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl create mode 100644 test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl create mode 100644 test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl create mode 100644 test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl create mode 100644 test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl create mode 100644 test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl diff --git a/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl new file mode 100644 index 00000000..39922a09 --- /dev/null +++ b/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl @@ -0,0 +1,13 @@ +cwlVersion: v1.2 +class: CommandLineTool +label: "Higher coresMin CLT" +doc: The CommandLineTool ResourceRequirement has a coresMin higher than its coresMax = failure + +requirements: + ResourceRequirement: + coresMin: 4 # > 2 + coresMax: 2 +inputs: [] +outputs: [] + +baseCommand: ["echo", "Hello World"] \ No newline at end of file diff --git a/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl new file mode 100644 index 00000000..a46043bf --- /dev/null +++ b/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl @@ -0,0 +1,21 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher coresMin WorkflowStep" +doc: The WorkflowStep ResourceRequirement has a coresMin higher than its coresMax = failure + +inputs: [] +outputs: [] + +steps: + bad_step: + requirements: + ResourceRequirement: + coresMin: 4 # > 2 + coresMax: 2 + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + out: [] + in: [] \ No newline at end of file diff --git a/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl new file mode 100644 index 00000000..1d569f76 --- /dev/null +++ b/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl @@ -0,0 +1,21 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher coresMin WorkflowStep.run" +doc: The WorkflowStep.run ResourceRequirement has a coresMin higher than its coresMax = failure + +inputs: [] +outputs: [] + +steps: + bad_step: + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + requirements: + ResourceRequirement: + coresMin: 4 # > 2 + coresMax: 2 + out: [] + in: [] \ No newline at end of file diff --git a/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl new file mode 100644 index 00000000..c334828b --- /dev/null +++ b/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl @@ -0,0 +1,21 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher coresMin Workflow" +doc: The Workflow ResourceRequirement has a coresMin higher than its coresMax = failure + +requirements: + ResourceRequirement: + coresMin: 4 # > 2 + coresMax: 2 +inputs: [] +outputs: [] + +steps: + good_step: + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + out: [] + in: [] \ No newline at end of file diff --git a/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl new file mode 100644 index 00000000..e4e2de72 --- /dev/null +++ b/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl @@ -0,0 +1,13 @@ +cwlVersion: v1.2 +class: CommandLineTool +label: "Higher ramMin CLT" +doc: The CommandLineTool ResourceRequirement has a ramMin higher than its ramMax = failure + +requirements: + ResourceRequirement: + ramMin: 2048 # > 1024 + ramMax: 1024 +inputs: [] +outputs: [] + +baseCommand: ["echo", "Hello World"] \ No newline at end of file diff --git a/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl new file mode 100644 index 00000000..9c38eed6 --- /dev/null +++ b/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl @@ -0,0 +1,21 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher ramMin WorkflowStep" +doc: The WorkflowStep ResourceRequirement has a ramMin higher than its ramMax = failure + +inputs: [] +outputs: [] + +steps: + bad_step: + requirements: + ResourceRequirement: + ramMin: 2048 # > 1024 + ramMax: 1024 + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + out: [] + in: [] \ No newline at end of file diff --git a/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl new file mode 100644 index 00000000..bd9f29b4 --- /dev/null +++ b/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl @@ -0,0 +1,21 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher ramMin WorkflowStep.run" +doc: The WorkflowStep.run ResourceRequirement has a ramMin higher than its ramMax = failure + +inputs: [] +outputs: [] + +steps: + bad_step: + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + requirements: + ResourceRequirement: + ramMin: 2048 # > 1024 + ramMax: 1024 + out: [] + in: [] \ No newline at end of file diff --git a/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl new file mode 100644 index 00000000..26002152 --- /dev/null +++ b/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl @@ -0,0 +1,21 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher ramMin Workflow" +doc: The Workflow ResourceRequirement has a ramMin higher than its ramMax = failure + +requirements: + ResourceRequirement: + ramMin: 2048 # > 1024 + ramMax: 1024 +inputs: [] +outputs: [] + +steps: + good_step: + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + out: [] + in: [] \ No newline at end of file diff --git a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl new file mode 100644 index 00000000..dcaafc8d --- /dev/null +++ b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl @@ -0,0 +1,23 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher WorkflowStep coresMin than Workflow coresMax" +doc: The WorkflowStep ResourceRequirement has a coresMin higher than the Workflow ResourceRequirement coresMax = failure + +inputs: [] +outputs: [] +requirements: + ResourceRequirement: + coresMax: 2 # also equals coresMin (when coresMin not specified) + +steps: + too_high_cores: + requirements: + ResourceRequirement: + coresMin: 4 # > 2 + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + in: [] + out: [] \ No newline at end of file diff --git a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl new file mode 100644 index 00000000..4207091b --- /dev/null +++ b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl @@ -0,0 +1,23 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher WorkflowStep.run coresMin than Workflow coresMax" +doc: The WorkflowStep.run ResourceRequirement has a coresMin higher than the Workflow ResourceRequirement coresMax = failure + +inputs: [] +outputs: [] +requirements: + ResourceRequirement: + coresMax: 2 # also equals coresMin (when coresMin not specified) + +steps: + too_high_cores: + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + requirements: + ResourceRequirement: + coresMin: 4 # > 2 + in: [] + out: [] \ No newline at end of file diff --git a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl new file mode 100644 index 00000000..1285fd8e --- /dev/null +++ b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl @@ -0,0 +1,23 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher WorkflowStep ramMin than Workflow ramMax" +doc: The WorkflowStep ResourceRequirement has a ramMin higher than the Workflow ResourceRequirement ramMax = failure + +inputs: [] +outputs: [] +requirements: + ResourceRequirement: + ramMax: 1024 # also equals ramMin (when ramMin not specified) + +steps: + too_high_ram: + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + requirements: + ResourceRequirement: + ramMin: 2048 # > 1024 + in: [] + out: [] \ No newline at end of file diff --git a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl new file mode 100644 index 00000000..70e31b13 --- /dev/null +++ b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl @@ -0,0 +1,23 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher WorkflowStep.run ramMin than Workflow ramMax" +doc: The WorkflowStep.run ResourceRequirement has a ramMin higher than the Workflow ResourceRequirement ramMax = failure + +inputs: [] +outputs: [] +requirements: + ResourceRequirement: + ramMax: 1024 # also equals ramMin (when ramMin not specified) + +steps: + too_high_ram: + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + requirements: + ResourceRequirement: + ramMin: 2048 # > 1024 + in: [] + out: [] \ No newline at end of file From 9791f80c64d43442ad49844e9f9efe974be7cf5a Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Wed, 8 Oct 2025 17:03:22 +0200 Subject: [PATCH 02/24] chore: added tests --- test/test_workflows.py | 66 +++++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/test/test_workflows.py b/test/test_workflows.py index ec451874..b7511628 100644 --- a/test/test_workflows.py +++ b/test/test_workflows.py @@ -180,32 +180,27 @@ def test_run_job_success(cli_runner, cleanup, cwl_file, inputs): [ # The description file is malformed: class attribute is unknown ( - "test/workflows/malformed_description/description_malformed_class.cwl", - [], + "test/workflows/malformed_description/description_malformed_class.cwl", [], "`class`containsundefinedreferenceto", ), # The description file is malformed: baseCommand is unknown ( - "test/workflows/malformed_description/description_malformed_command.cwl", - [], + "test/workflows/malformed_description/description_malformed_command.cwl", [], "invalidfield`baseComand`", ), # The description file points to a non-existent file (subworkflow) ( - "test/workflows/bad_references/reference_doesnotexists.cwl", - [], + "test/workflows/bad_references/reference_doesnotexists.cwl", [], "Nosuchfileordirectory", ), # The description file points to another file point to it (circular dependency) ( - "test/workflows/bad_references/reference_circular1.cwl", - [], + "test/workflows/bad_references/reference_circular1.cwl", [], "Recursingintostep", ), # The description file points to itself (another circular dependency) ( - "test/workflows/bad_references/reference_circular1.cwl", - [], + "test/workflows/bad_references/reference_circular2.cwl", [], "Recursingintostep", ), # The configuration file is malformed: the hints are overridden more than once @@ -216,6 +211,57 @@ def test_run_job_success(cli_runner, cleanup, cwl_file, inputs): ], "Failedtovalidatetheparameter", ), + # The core resource requirements are wrong: coresMin is higher than coresMax value + ( + "test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl", [], + "RequirementValidationError" + ), + ( + "test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl", [], + "RequirementValidationError" + ), + ( + "test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl", [], + "RequirementValidationError" + ), + ( + "test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl", [], + "RequirementValidationError" + ), + # The ram resource requirements are wrong: ramMin is higher than ramMax value + ( + "test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl", [], + "RequirementValidationError" + ), + ( + "test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl", [], + "RequirementValidationError" + ), + ( + "test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl", [], + "RequirementValidationError" + ), + ( + "test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl", [], + "RequirementValidationError" + ), + # The resource requirements contains conflicts + ( + "test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl", [], + "RequirementValidationError" + ), + ( + "test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl", [], + "RequirementValidationError" + ), + ( + "test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl", [], + "RequirementValidationError" + ), + ( + "test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl", [], + "RequirementValidationError" + ) ], ) def test_run_job_validation_failure( From 899b4652a6335a5e058ea8b7aea290bda3b82601 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Wed, 8 Oct 2025 17:41:05 +0200 Subject: [PATCH 03/24] feat: added ResourceRequirement validator --- .../execution_hooks/validator.py | 113 ++++++++++++++++++ src/dirac_cwl_proto/job/__init__.py | 23 +++- 2 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 src/dirac_cwl_proto/execution_hooks/validator.py diff --git a/src/dirac_cwl_proto/execution_hooks/validator.py b/src/dirac_cwl_proto/execution_hooks/validator.py new file mode 100644 index 00000000..4321e8fb --- /dev/null +++ b/src/dirac_cwl_proto/execution_hooks/validator.py @@ -0,0 +1,113 @@ +from abc import ABC, abstractmethod +from pydantic import BaseModel, ConfigDict +from cwl_utils.parser.cwl_v1_2 import Workflow, CommandLineTool, WorkflowStep +from typing import ClassVar + +class RequirementValidator(BaseModel, ABC): + model_config = ConfigDict(validate_assignment=True, arbitrary_types_allowed=True) + + requirement_class: ClassVar[str] + cwl_object: Workflow | CommandLineTool | WorkflowStep # TODO: might need to add ExpressionTool later? and NestedWorkflow? + + def get_requirement(self, cwl_object: Workflow | CommandLineTool | WorkflowStep): + """ + Extract the requirement from the current cwl_object, + based on the requirement class we want. + + :param cwl_object: The cwl_object to extract the requirement from. + :return: The requirement object, or None if not found. + """ + requirements = getattr(cwl_object, "requirements", []) or [] + for requirement in requirements: + if requirement.class_ == self.requirement_class: + return requirement + return None + + @abstractmethod + def validate_requirement(self, requirement, context: str = "", global_requirement = None): + """ + Validate a requirement, specific for each requirement class. + + :param requirement: The current requirement to validate. + :param context: A context string describing the validation context. Ex: "Step requirement", "Global requirement", etc. + :param global_requirement: The global Workflow requirement, if any. + """ + pass + + def validate(self): + """ + Validate requirements in Workflow, WorkflowStep and WorkflowStep.run. + # TODO: might need to add ExpressionTool later and also, find a way to validate NestedWorkflow? + """ + global_requirement = self.get_requirement(self.cwl_object) + + # Validate Workflow/CommandLineTool global requirements. + if global_requirement: + self.validate_requirement(global_requirement, context="global requirements") + + # Validate WorkflowStep requirements, if any. + if self.cwl_object.class_ != 'CommandLineTool' and self.cwl_object.steps: + for step in self.cwl_object.steps: + step_requirement = self.get_requirement(step) + if step_requirement: + self.validate_requirement(step_requirement, + context="step requirements", + global_requirement=global_requirement) + # Validate WorkflowStep.run, if any. + if step.run: + step_run_requirement = self.get_requirement(step.run) + if step_run_requirement: + self.validate_requirement(step_run_requirement, + context="step run requirements", + global_requirement=global_requirement) + + +class ResourceRequirementValidator(RequirementValidator): + requirement_class = "ResourceRequirement" + + @staticmethod + def validate_minmax(min_value, max_value, resource, context): + """ + Check if the resource min_value is higher than the resource max_value. + If so, raise a ValueError. + + :param min_value: The current resource min_value. + :param max_value: The current resource max_value. + :param resource: The resource name. + :param context: A context string describing the validation context. Ex: "Step requirement", "Global requirement", etc. + """ + if min_value and max_value and min_value > max_value: + raise ValueError(f"{resource}Min is higher than {resource}Max in {context}") + + @staticmethod + def validate_conflict(min_value, global_max_value, resource, context): + """ + Check if the resource min_value is higher than the global resource max_value. + If so, raise a ValueError. + + :param min_value: The current resource min_value. + :param global_max_value: The global resource max_value. + :param resource: The resource name. + :param context: A context string describing the validation context. Ex: "Step requirement", "Global requirement", etc. + """ + if min_value and global_max_value and min_value > global_max_value: + raise ValueError(f"{resource}Min is higher than global {resource}Max in {context}") + + def validate_requirement(self, requirement, context: str = "", global_requirement = None): + """ + Validate a ResourceRequirement. + Verify: + - that resourceMin is not higher than resourceMax (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run) + - that resourceMin (WorkflowStep, WorkflowStep.run) is not higher than global (Workflow) resourceMax. + + :param requirement: The current ResourceRequirement to validate. + :param context: A context string describing the validation context. Ex: "Step requirement", "Global requirement", etc. + :param global_requirement: The global Workflow requirement, if any. + """ + self.validate_minmax(requirement.ramMin, requirement.ramMax, "ram", context) + self.validate_minmax(requirement.coresMin, requirement.coresMax, "cores", context) + + if global_requirement: + # Only WorkflowStep and WorkflowStep.run cases + self.validate_conflict(requirement.ramMin, global_requirement.ramMax, "ram", context) + self.validate_conflict(requirement.coresMin, global_requirement.coresMax, "cores", context) \ No newline at end of file diff --git a/src/dirac_cwl_proto/job/__init__.py b/src/dirac_cwl_proto/job/__init__.py index e4941b7a..fe48b094 100644 --- a/src/dirac_cwl_proto/job/__init__.py +++ b/src/dirac_cwl_proto/job/__init__.py @@ -27,7 +27,12 @@ from ruamel.yaml import YAML from schema_salad.exceptions import ValidationException -from dirac_cwl_proto.execution_hooks.core import ExecutionHooksBasePlugin +from dirac_cwl_proto.execution_hooks import ( + ExecutionHooksBasePlugin, +) +from dirac_cwl_proto.execution_hooks.validator import ( + ResourceRequirementValidator, +) from dirac_cwl_proto.submission_models import ( JobInputModel, JobSubmissionModel, @@ -220,12 +225,27 @@ def submit_job_router(job: JobSubmissionModel) -> bool: # Validate the jobs logger.info("Validating the job(s)...") + + try: + #TODO: I don't know if it's the best way to do that ? + # If we keep this class-idea, maybe later we could do a list of RequirementClass for each one we want to validate for the workflow + # and loop on it to call validate() on each one? Without calling them manyally like this: + ResourceRequirementValidator(cwl_object=job.task).validate() + except ValueError as ex: + #TODO: I don't really know how to handle this to match with the current test? + # because it seems that I need to raise a ValidationException for 'test_run_job_validation_failure' (maybe I'm wrong on that) + # the way I did it is kinda bad here because it raises a ValueError and a ValidationException for the same exception... + # we have like 3 traceback when trying to submit a bad job using the CLI + logger.exception(f"RequirementValidationError: {ex}") + raise ValidationException(f"RequirementValidationError: {ex}") + # Initiate 1 job per parameter jobs = [] if not job.parameters: jobs.append(job) else: for parameter in job.parameters: + print(parameter) jobs.append( JobSubmissionModel( task=job.task, @@ -247,7 +267,6 @@ def submit_job_router(job: JobSubmissionModel) -> bool: return all(results) - # ----------------------------------------------------------------------------- # JobWrapper # ----------------------------------------------------------------------------- From a03fbcab89d9b29b486c41eb0ff2a61c7d843ff1 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Thu, 9 Oct 2025 09:49:54 +0200 Subject: [PATCH 04/24] fix: remove test print and pre-commit check --- src/dirac_cwl_proto/job/__init__.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/dirac_cwl_proto/job/__init__.py b/src/dirac_cwl_proto/job/__init__.py index fe48b094..fc2d762c 100644 --- a/src/dirac_cwl_proto/job/__init__.py +++ b/src/dirac_cwl_proto/job/__init__.py @@ -227,17 +227,21 @@ def submit_job_router(job: JobSubmissionModel) -> bool: logger.info("Validating the job(s)...") try: - #TODO: I don't know if it's the best way to do that ? - # If we keep this class-idea, maybe later we could do a list of RequirementClass for each one we want to validate for the workflow - # and loop on it to call validate() on each one? Without calling them manyally like this: + # TODO: I don't know if it's the best way to do that ? + # If we keep this class-idea, maybe later we could do a list of RequirementClass + # for each one we want to validate for the workflow + # and loop on it to call validate() on each one? + # Without calling them manyally like this: ResourceRequirementValidator(cwl_object=job.task).validate() except ValueError as ex: - #TODO: I don't really know how to handle this to match with the current test? - # because it seems that I need to raise a ValidationException for 'test_run_job_validation_failure' (maybe I'm wrong on that) - # the way I did it is kinda bad here because it raises a ValueError and a ValidationException for the same exception... + # TODO: I don't really know how to handle this to match with the current test? + # because it seems that I need to raise a ValidationException + # for 'test_run_job_validation_failure' (maybe I'm wrong on that) + # the way I did it is kinda bad here because it raises a ValueError + # and a ValidationException for the same exception... # we have like 3 traceback when trying to submit a bad job using the CLI logger.exception(f"RequirementValidationError: {ex}") - raise ValidationException(f"RequirementValidationError: {ex}") + raise ValidationException(f"RequirementValidationError: {ex}") from ex # Initiate 1 job per parameter jobs = [] @@ -245,7 +249,6 @@ def submit_job_router(job: JobSubmissionModel) -> bool: jobs.append(job) else: for parameter in job.parameters: - print(parameter) jobs.append( JobSubmissionModel( task=job.task, @@ -267,6 +270,7 @@ def submit_job_router(job: JobSubmissionModel) -> bool: return all(results) + # ----------------------------------------------------------------------------- # JobWrapper # ----------------------------------------------------------------------------- From 9d93635741c8f117486867c873232b4d5eb5fb04 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Fri, 17 Oct 2025 14:01:13 +0200 Subject: [PATCH 05/24] fix: modified exception handling --- ...{validator.py => requirement_validator.py} | 81 +++++++++++++------ src/dirac_cwl_proto/job/__init__.py | 25 +++--- test/test_workflows.py | 77 +++++++++++------- 3 files changed, 113 insertions(+), 70 deletions(-) rename src/dirac_cwl_proto/execution_hooks/{validator.py => requirement_validator.py} (62%) diff --git a/src/dirac_cwl_proto/execution_hooks/validator.py b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py similarity index 62% rename from src/dirac_cwl_proto/execution_hooks/validator.py rename to src/dirac_cwl_proto/execution_hooks/requirement_validator.py index 4321e8fb..b48b1b01 100644 --- a/src/dirac_cwl_proto/execution_hooks/validator.py +++ b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py @@ -1,13 +1,15 @@ from abc import ABC, abstractmethod -from pydantic import BaseModel, ConfigDict -from cwl_utils.parser.cwl_v1_2 import Workflow, CommandLineTool, WorkflowStep from typing import ClassVar +from cwl_utils.parser.cwl_v1_2 import CommandLineTool, Workflow, WorkflowStep +from pydantic import BaseModel, ConfigDict + + class RequirementValidator(BaseModel, ABC): model_config = ConfigDict(validate_assignment=True, arbitrary_types_allowed=True) requirement_class: ClassVar[str] - cwl_object: Workflow | CommandLineTool | WorkflowStep # TODO: might need to add ExpressionTool later? and NestedWorkflow? + cwl_object: Workflow | CommandLineTool | WorkflowStep def get_requirement(self, cwl_object: Workflow | CommandLineTool | WorkflowStep): """ @@ -24,20 +26,22 @@ def get_requirement(self, cwl_object: Workflow | CommandLineTool | WorkflowStep) return None @abstractmethod - def validate_requirement(self, requirement, context: str = "", global_requirement = None): + def validate_requirement( + self, requirement, context: str = "", global_requirement=None + ): """ Validate a requirement, specific for each requirement class. :param requirement: The current requirement to validate. - :param context: A context string describing the validation context. Ex: "Step requirement", "Global requirement", etc. + :param context: A context string describing the validation context. + Ex: "Step requirement", "Global requirement", etc. :param global_requirement: The global Workflow requirement, if any. """ pass - def validate(self): + def validate_requirements(self): """ Validate requirements in Workflow, WorkflowStep and WorkflowStep.run. - # TODO: might need to add ExpressionTool later and also, find a way to validate NestedWorkflow? """ global_requirement = self.get_requirement(self.cwl_object) @@ -46,20 +50,29 @@ def validate(self): self.validate_requirement(global_requirement, context="global requirements") # Validate WorkflowStep requirements, if any. - if self.cwl_object.class_ != 'CommandLineTool' and self.cwl_object.steps: + if self.cwl_object.class_ != "CommandLineTool" and self.cwl_object.steps: for step in self.cwl_object.steps: step_requirement = self.get_requirement(step) if step_requirement: - self.validate_requirement(step_requirement, - context="step requirements", - global_requirement=global_requirement) + self.validate_requirement( + step_requirement, + context="step requirements", + global_requirement=global_requirement, + ) # Validate WorkflowStep.run, if any. if step.run: step_run_requirement = self.get_requirement(step.run) if step_run_requirement: - self.validate_requirement(step_run_requirement, - context="step run requirements", - global_requirement=global_requirement) + self.validate_requirement( + step_run_requirement, + context="step run requirements", + global_requirement=global_requirement, + ) + + +class RequirementError(Exception): + def __init__(self, message): + super().__init__(message) class ResourceRequirementValidator(RequirementValidator): @@ -74,7 +87,8 @@ def validate_minmax(min_value, max_value, resource, context): :param min_value: The current resource min_value. :param max_value: The current resource max_value. :param resource: The resource name. - :param context: A context string describing the validation context. Ex: "Step requirement", "Global requirement", etc. + :param context: A context string describing the validation context. + Ex: "Step requirement", "Global requirement", etc. """ if min_value and max_value and min_value > max_value: raise ValueError(f"{resource}Min is higher than {resource}Max in {context}") @@ -88,12 +102,17 @@ def validate_conflict(min_value, global_max_value, resource, context): :param min_value: The current resource min_value. :param global_max_value: The global resource max_value. :param resource: The resource name. - :param context: A context string describing the validation context. Ex: "Step requirement", "Global requirement", etc. + :param context: A context string describing the validation context. + Ex: "Step requirement", "Global requirement", etc. """ if min_value and global_max_value and min_value > global_max_value: - raise ValueError(f"{resource}Min is higher than global {resource}Max in {context}") + raise ValueError( + f"{resource}Min is higher than global {resource}Max in {context}" + ) - def validate_requirement(self, requirement, context: str = "", global_requirement = None): + def validate_requirement( + self, requirement, context: str = "", global_requirement=None + ): """ Validate a ResourceRequirement. Verify: @@ -101,13 +120,23 @@ def validate_requirement(self, requirement, context: str = "", global_requiremen - that resourceMin (WorkflowStep, WorkflowStep.run) is not higher than global (Workflow) resourceMax. :param requirement: The current ResourceRequirement to validate. - :param context: A context string describing the validation context. Ex: "Step requirement", "Global requirement", etc. + :param context: A context string describing the validation context. + Ex: "Step requirement", "Global requirement", etc. :param global_requirement: The global Workflow requirement, if any. """ - self.validate_minmax(requirement.ramMin, requirement.ramMax, "ram", context) - self.validate_minmax(requirement.coresMin, requirement.coresMax, "cores", context) - - if global_requirement: - # Only WorkflowStep and WorkflowStep.run cases - self.validate_conflict(requirement.ramMin, global_requirement.ramMax, "ram", context) - self.validate_conflict(requirement.coresMin, global_requirement.coresMax, "cores", context) \ No newline at end of file + try: + self.validate_minmax(requirement.ramMin, requirement.ramMax, "ram", context) + self.validate_minmax( + requirement.coresMin, requirement.coresMax, "cores", context + ) + + if global_requirement: + # Only WorkflowStep and WorkflowStep.run cases + self.validate_conflict( + requirement.ramMin, global_requirement.ramMax, "ram", context + ) + self.validate_conflict( + requirement.coresMin, global_requirement.coresMax, "cores", context + ) + except ValueError as ex: + raise RequirementError(f"ResourceRequirement is invalid: {ex}") from ex diff --git a/src/dirac_cwl_proto/job/__init__.py b/src/dirac_cwl_proto/job/__init__.py index fc2d762c..d675e2a9 100644 --- a/src/dirac_cwl_proto/job/__init__.py +++ b/src/dirac_cwl_proto/job/__init__.py @@ -30,7 +30,8 @@ from dirac_cwl_proto.execution_hooks import ( ExecutionHooksBasePlugin, ) -from dirac_cwl_proto.execution_hooks.validator import ( +from dirac_cwl_proto.execution_hooks.requirement_validator import ( + RequirementError, ResourceRequirementValidator, ) from dirac_cwl_proto.submission_models import ( @@ -146,9 +147,11 @@ def submit_job_client( "[blue]:information_source:[/blue] [bold]CLI:[/bold] Submitting the job(s) to service..." ) print_json(job.model_dump_json(indent=4)) - if not submit_job_router(job): + try: + submit_job_router(job) + except Exception as ex: console.print( - "[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to run job(s)." + f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to run job(s) : {ex}" ) return typer.Exit(code=1) console.print("[green]:heavy_check_mark:[/green] [bold]CLI:[/bold] Job(s) done.") @@ -232,16 +235,10 @@ def submit_job_router(job: JobSubmissionModel) -> bool: # for each one we want to validate for the workflow # and loop on it to call validate() on each one? # Without calling them manyally like this: - ResourceRequirementValidator(cwl_object=job.task).validate() - except ValueError as ex: - # TODO: I don't really know how to handle this to match with the current test? - # because it seems that I need to raise a ValidationException - # for 'test_run_job_validation_failure' (maybe I'm wrong on that) - # the way I did it is kinda bad here because it raises a ValueError - # and a ValidationException for the same exception... - # we have like 3 traceback when trying to submit a bad job using the CLI - logger.exception(f"RequirementValidationError: {ex}") - raise ValidationException(f"RequirementValidationError: {ex}") from ex + ResourceRequirementValidator(cwl_object=job.task).validate_requirements() + except RequirementError as ex: + logger.exception(f"RequirementValidationError - {ex}") + raise # Initiate 1 job per parameter jobs = [] @@ -431,7 +428,7 @@ def run_job(job: JobSubmissionModel) -> bool: except Exception: logger.exception("JobWrapper: Failed to execute workflow") - return False + raise finally: # Clean up if job_path.exists(): diff --git a/test/test_workflows.py b/test/test_workflows.py index b7511628..5fbc15fe 100644 --- a/test/test_workflows.py +++ b/test/test_workflows.py @@ -180,27 +180,32 @@ def test_run_job_success(cli_runner, cleanup, cwl_file, inputs): [ # The description file is malformed: class attribute is unknown ( - "test/workflows/malformed_description/description_malformed_class.cwl", [], + "test/workflows/malformed_description/description_malformed_class.cwl", + [], "`class`containsundefinedreferenceto", ), # The description file is malformed: baseCommand is unknown ( - "test/workflows/malformed_description/description_malformed_command.cwl", [], + "test/workflows/malformed_description/description_malformed_command.cwl", + [], "invalidfield`baseComand`", ), # The description file points to a non-existent file (subworkflow) ( - "test/workflows/bad_references/reference_doesnotexists.cwl", [], + "test/workflows/bad_references/reference_doesnotexists.cwl", + [], "Nosuchfileordirectory", ), # The description file points to another file point to it (circular dependency) ( - "test/workflows/bad_references/reference_circular1.cwl", [], + "test/workflows/bad_references/reference_circular1.cwl", + [], "Recursingintostep", ), # The description file points to itself (another circular dependency) ( - "test/workflows/bad_references/reference_circular2.cwl", [], + "test/workflows/bad_references/reference_circular2.cwl", + [], "Recursingintostep", ), # The configuration file is malformed: the hints are overridden more than once @@ -213,55 +218,67 @@ def test_run_job_success(cli_runner, cleanup, cwl_file, inputs): ), # The core resource requirements are wrong: coresMin is higher than coresMax value ( - "test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl", + [], + "ResourceRequirementisinvalid", ), ( - "test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl", + [], + "ResourceRequirementisinvalid", ), ( - "test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl", + [], + "ResourceRequirementisinvalid", ), ( - "test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl", + [], + "ResourceRequirementisinvalid", ), # The ram resource requirements are wrong: ramMin is higher than ramMax value ( - "test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl", + [], + "ResourceRequirementisinvalid", ), ( - "test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl", + [], + "ResourceRequirementisinvalid", ), ( - "test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl", + [], + "ResourceRequirementisinvalid", ), ( - "test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl", + [], + "ResourceRequirementisinvalid", ), # The resource requirements contains conflicts ( - "test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl", + [], + "ResourceRequirementisinvalid", ), ( - "test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl", + [], + "ResourceRequirementisinvalid", ), ( - "test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl", [], - "RequirementValidationError" + "test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl", + [], + "ResourceRequirementisinvalid", ), ( - "test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl", [], - "RequirementValidationError" - ) + "test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl", + [], + "ResourceRequirementisinvalid", + ), ], ) def test_run_job_validation_failure( From 341750b30282eb1d1ad322940557d79295400e7f Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Mon, 20 Oct 2025 11:43:57 +0200 Subject: [PATCH 06/24] feat: added Production case --- .../execution_hooks/requirement_validator.py | 81 +++++++++++++++---- src/dirac_cwl_proto/job/__init__.py | 5 -- src/dirac_cwl_proto/production/__init__.py | 14 ++++ .../bad_merge_production.cwl | 73 +++++++++++++++++ 4 files changed, 152 insertions(+), 21 deletions(-) create mode 100644 test/workflows/resource_requirements/bad_merge_production.cwl diff --git a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py index b48b1b01..283a97e5 100644 --- a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py +++ b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py @@ -25,6 +25,11 @@ def get_requirement(self, cwl_object: Workflow | CommandLineTool | WorkflowStep) return requirement return None + # TODO : get all nested workflows of a workflow, + # for each nested workflow, validate its requirements + def get_nested_workflows(self): + pass + @abstractmethod def validate_requirement( self, requirement, context: str = "", global_requirement=None @@ -39,35 +44,65 @@ def validate_requirement( """ pass - def validate_requirements(self): + @abstractmethod + def validate_production_requirement(self, requirement, is_global: bool = False): + """ + Validate a production workflow requirement, specific for each requirement class. + This method also depends on the production workflow rules. + + :param requirement: The current requirement to validate. + :param is_global: True if it's a global requirement, False otherwise. + """ + pass + + def validate_requirements(self, production: bool = False) -> None: """ - Validate requirements in Workflow, WorkflowStep and WorkflowStep.run. + Validate all requirements in Workflow, WorkflowStep and WorkflowStep.run. + + :param production: True if the requirements are for a production workflow, False otherwise. + Maybe we could add a transformation parameter later. """ global_requirement = self.get_requirement(self.cwl_object) # Validate Workflow/CommandLineTool global requirements. if global_requirement: + if production: + self.validate_production_requirement(global_requirement, is_global=True) self.validate_requirement(global_requirement, context="global requirements") # Validate WorkflowStep requirements, if any. if self.cwl_object.class_ != "CommandLineTool" and self.cwl_object.steps: - for step in self.cwl_object.steps: - step_requirement = self.get_requirement(step) - if step_requirement: + self.validate_steps_requirements(global_requirement, production) + + def validate_steps_requirements(self, global_requirement, production: bool = False): + """ + Validate steps requirements in WorkflowStep and WorkflowStep.run. + + :param global_requirement: The global Workflow requirement, if any. + :param production: True if the requirements are for a production workflow, False otherwise. + """ + # Validate WorkflowStep requirements, if any. + for step in self.cwl_object.steps: + step_requirement = self.get_requirement(step) + if step_requirement: + if production: + self.validate_production_requirement(step_requirement) + self.validate_requirement( + step_requirement, + context="step requirements", + global_requirement=global_requirement, + ) + # Validate WorkflowStep.run, if any. + if step.run: + step_run_requirement = self.get_requirement(step.run) + if step_run_requirement: + if production: + self.validate_production_requirement(step_run_requirement) self.validate_requirement( - step_requirement, - context="step requirements", + step_run_requirement, + context="step run requirements", global_requirement=global_requirement, ) - # Validate WorkflowStep.run, if any. - if step.run: - step_run_requirement = self.get_requirement(step.run) - if step_run_requirement: - self.validate_requirement( - step_run_requirement, - context="step run requirements", - global_requirement=global_requirement, - ) class RequirementError(Exception): @@ -110,6 +145,20 @@ def validate_conflict(min_value, global_max_value, resource, context): f"{resource}Min is higher than global {resource}Max in {context}" ) + def validate_production_requirement(self, requirement, is_global: bool = False): + """ + Raise an error if there's a global ResourceRequirement in a production workflow. + Otherwise, add some logic for WorkflowSteps, CommandLineTools and WorkflowStep.run in production workflows. + + :param requirement: The current requirement to validate. + :param is_global: True if there's a global ResourceRequirement, False otherwise. + """ + if is_global: + raise RequirementError( + "ResourceRequirement is invalid: global ResourceRequirement is not allowed in production workflows" + ) + # else ... + def validate_requirement( self, requirement, context: str = "", global_requirement=None ): diff --git a/src/dirac_cwl_proto/job/__init__.py b/src/dirac_cwl_proto/job/__init__.py index d675e2a9..d039a621 100644 --- a/src/dirac_cwl_proto/job/__init__.py +++ b/src/dirac_cwl_proto/job/__init__.py @@ -230,11 +230,6 @@ def submit_job_router(job: JobSubmissionModel) -> bool: logger.info("Validating the job(s)...") try: - # TODO: I don't know if it's the best way to do that ? - # If we keep this class-idea, maybe later we could do a list of RequirementClass - # for each one we want to validate for the workflow - # and loop on it to call validate() on each one? - # Without calling them manyally like this: ResourceRequirementValidator(cwl_object=job.task).validate_requirements() except RequirementError as ex: logger.exception(f"RequirementValidationError - {ex}") diff --git a/src/dirac_cwl_proto/production/__init__.py b/src/dirac_cwl_proto/production/__init__.py index a4008fae..23df6b75 100644 --- a/src/dirac_cwl_proto/production/__init__.py +++ b/src/dirac_cwl_proto/production/__init__.py @@ -25,6 +25,10 @@ SchedulingHint, TransformationExecutionHooksHint, ) +from dirac_cwl_proto.execution_hooks.requirement_validator import ( + RequirementError, + ResourceRequirementValidator, +) from dirac_cwl_proto.submission_models import ( ProductionSubmissionModel, TransformationSubmissionModel, @@ -140,6 +144,16 @@ def submit_production_router(production: ProductionSubmissionModel) -> bool: # Validate the transformation logger.info("Validating the production...") + + try: + # Production jobs can't have a global ResourceRequirement + ResourceRequirementValidator(cwl_object=production.task).validate_requirements( + production=True + ) + except RequirementError as ex: + logger.exception(f"RequirementValidationError - {ex}") + raise + # Already validated by the pydantic model logger.info("Production validated!") diff --git a/test/workflows/resource_requirements/bad_merge_production.cwl b/test/workflows/resource_requirements/bad_merge_production.cwl new file mode 100644 index 00000000..c3a57685 --- /dev/null +++ b/test/workflows/resource_requirements/bad_merge_production.cwl @@ -0,0 +1,73 @@ +cwlVersion: v1.2 +label: "Bad Merge Workflow" +class: Workflow +doc: The global ResourceRequirement should not be allowed in a production workflow, file depends on "test/workflows/merge" folder + +requirements: + MultipleInputFeatureRequirement: {} + ResourceRequirement: # not allowed here + coresMax: 4 + +# Define the inputs of the workflow +inputs: + num-points: + type: int + doc: "Number of random points to generate for the simulation" + default: 1000 + output-path-step1: + type: string + default: result_1.sim + output-path-step2: + type: string + default: result_2.sim + +# Define the outputs of the workflow +outputs: + pi_approximation: + type: File + outputSource: gathering/pi_result + +# Define the steps of the workflow +steps: + # Simulation step 1 + simulate_step1: + in: + num-points: num-points + output-path: output-path-step1 + out: [sim] + run: ../merge/pisimulate_v2.cwl + + # Simulation step 2 + simulate_step2: + in: + num-points: num-points + output-path: output-path-step2 + out: [sim] + run: ../merge/pisimulate_v2.cwl + + # Gathering step + gathering: + in: + input-data: + source: + - simulate_step1/sim + - simulate_step2/sim + linkMerge: merge_flattened + out: [pi_result] + run: + class: CommandLineTool + requirements: + ResourceRequirement: + coresMin: 1 + ramMin: 1024 + inputs: + input-data: + type: File[] + inputBinding: + separate: true + outputs: + pi_result: + type: File + outputBinding: + glob: "result*.sim" + baseCommand: [pi-gather] From 8561a30882584db2896a58535391806bd3718a3c Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Mon, 20 Oct 2025 14:49:03 +0200 Subject: [PATCH 07/24] feat: added nested_wf case --- .../execution_hooks/requirement_validator.py | 76 +++++++++++++------ test/test_workflows.py | 15 ++++ .../nested_wf/external_conflict_nested_wf.cwl | 23 ++++++ .../nested_wf/internal_conflict_nested_wf.cwl | 18 +++++ .../nested_wf/nested_wf_high_cores.cwl | 21 +++++ .../nested_wf/nested_wf_high_ram.cwl | 22 ++++++ 6 files changed, 150 insertions(+), 25 deletions(-) create mode 100644 test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl create mode 100644 test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl create mode 100644 test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_cores.cwl create mode 100644 test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_ram.cwl diff --git a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py index 283a97e5..3caab349 100644 --- a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py +++ b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py @@ -25,11 +25,6 @@ def get_requirement(self, cwl_object: Workflow | CommandLineTool | WorkflowStep) return requirement return None - # TODO : get all nested workflows of a workflow, - # for each nested workflow, validate its requirements - def get_nested_workflows(self): - pass - @abstractmethod def validate_requirement( self, requirement, context: str = "", global_requirement=None @@ -55,36 +50,47 @@ def validate_production_requirement(self, requirement, is_global: bool = False): """ pass - def validate_requirements(self, production: bool = False) -> None: + def validate_requirements(self, cwl_object=None, production: bool = False) -> None: """ - Validate all requirements in Workflow, WorkflowStep and WorkflowStep.run. + Validate all requirements in a Workflow. + :param cwl_object: The cwl_object to validate the requirements for. + If None, use the cwl_object from the class. This is used for nested workflows validation. :param production: True if the requirements are for a production workflow, False otherwise. - Maybe we could add a transformation parameter later. + -- Maybe we could add a transformation parameter later. """ - global_requirement = self.get_requirement(self.cwl_object) + cwl_object = cwl_object if cwl_object else self.cwl_object + global_requirement = self.get_requirement(cwl_object) # Validate Workflow/CommandLineTool global requirements. if global_requirement: + # Production-workflow-specific validation. if production: self.validate_production_requirement(global_requirement, is_global=True) self.validate_requirement(global_requirement, context="global requirements") # Validate WorkflowStep requirements, if any. - if self.cwl_object.class_ != "CommandLineTool" and self.cwl_object.steps: - self.validate_steps_requirements(global_requirement, production) + if cwl_object.class_ != "CommandLineTool" and cwl_object.steps: + self.validate_steps_requirements( + cwl_object.steps, global_requirement, production + ) - def validate_steps_requirements(self, global_requirement, production: bool = False): + def validate_steps_requirements( + self, steps, global_requirement, production: bool = False + ): """ - Validate steps requirements in WorkflowStep and WorkflowStep.run. + Validate steps requirements in WorkflowStep. + :param steps: The WorkflowStep to validate the requirements for. :param global_requirement: The global Workflow requirement, if any. :param production: True if the requirements are for a production workflow, False otherwise. + -- Maybe we could add a transformation parameter later. """ - # Validate WorkflowStep requirements, if any. - for step in self.cwl_object.steps: + for step in steps: step_requirement = self.get_requirement(step) + # Validate WorkflowStep requirements, if any. if step_requirement: + # Production-workflow-specific validation. if production: self.validate_production_requirement(step_requirement) self.validate_requirement( @@ -92,17 +98,36 @@ def validate_steps_requirements(self, global_requirement, production: bool = Fal context="step requirements", global_requirement=global_requirement, ) + # Validate WorkflowStep.run, if any. if step.run: - step_run_requirement = self.get_requirement(step.run) - if step_run_requirement: - if production: - self.validate_production_requirement(step_run_requirement) - self.validate_requirement( - step_run_requirement, - context="step run requirements", - global_requirement=global_requirement, - ) + self.validate_run_requirements(step.run, global_requirement, production) + + def validate_run_requirements( + self, run, global_requirement, production: bool = False + ): + """ + Validate WorkflowStep.run requirements. + + :param run: The WorkflowStep.run to validate the requirements for. + :param global_requirement: The global Workflow requirement, if any. + :param production: True if the requirements are for a production workflow, False otherwise. + -- Maybe we could add a transformation parameter later. + """ + if run.class_ == "Workflow": + # Validate nested Workflow requirements, if any. + self.validate_requirements(cwl_object=run) + + step_run_requirement = self.get_requirement(run) + if step_run_requirement: + # Production-workflow-specific validation. + if production: + self.validate_production_requirement(step_run_requirement) + self.validate_requirement( + step_run_requirement, + context="step run requirements", + global_requirement=global_requirement, + ) class RequirementError(Exception): @@ -148,7 +173,8 @@ def validate_conflict(min_value, global_max_value, resource, context): def validate_production_requirement(self, requirement, is_global: bool = False): """ Raise an error if there's a global ResourceRequirement in a production workflow. - Otherwise, add some logic for WorkflowSteps, CommandLineTools and WorkflowStep.run in production workflows. + Otherwise, add some logic for WorkflowSteps, CommandLineTools + and WorkflowStep.run, etc. in production workflows. :param requirement: The current requirement to validate. :param is_global: True if there's a global ResourceRequirement, False otherwise. diff --git a/test/test_workflows.py b/test/test_workflows.py index 5fbc15fe..a010773e 100644 --- a/test/test_workflows.py +++ b/test/test_workflows.py @@ -279,6 +279,16 @@ def test_run_job_success(cli_runner, cleanup, cwl_file, inputs): [], "ResourceRequirementisinvalid", ), + ( + "test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl", + [], + "ResourceRequirementisinvalid", + ), + ( + "test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl", + [], + "ResourceRequirementisinvalid", + ), ], ) def test_run_job_validation_failure( @@ -699,6 +709,11 @@ def test_run_simple_production_success(cli_runner, cleanup, cwl_file, metadata): "test/workflows/mandelbrot/type_dependencies/production/malformed-nonexisting-type_metadata-mandelbrot_complete.yaml", "Unknownexecutionhooksplugin:'MandelBrotDoesNotExist'", ), + ( + "test/workflows/resource_requirements/bad_merge_production.cwl", + "test/workflows/merge/type_dependencies/production/metadata-merge_complete.yaml", + "ResourceRequirementisinvalid", + ), ], ) def test_run_production_validation_failure( diff --git a/test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl b/test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl new file mode 100644 index 00000000..eaff97fa --- /dev/null +++ b/test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl @@ -0,0 +1,23 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher NestedWorkflow resourceMin than resourceMax" +doc: The NestedWorkflow ResourceRequirement has higher resourceMin than global resourceMax = failure, both NestedWorflows + here are wrong but only the first one will trigger the exception. + +inputs: [] +outputs: [] + +requirements: + ResourceRequirement: + ramMax: 512 + coresMax: 4 + +steps: + too_high_ram: + run: ./nested_wf_high_ram.cwl + in: [] + out: [] + too_high_cores: + run: ./nested_wf_high_cores.cwl + in: [] + out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl b/test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl new file mode 100644 index 00000000..109041b3 --- /dev/null +++ b/test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl @@ -0,0 +1,18 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher NestedWorkflow resourceMin than resourceMax" +doc: The NestedWorkflow ResourceRequirement has higher resourceMin than resourceMax = failure, both NestedWorflows + here are wrong but only the first one will trigger the exception. + +inputs: [] +outputs: [] + +steps: + too_high_ram: + run: ../ram_conflict_wf_step.cwl + in: [ ] + out: [ ] + too_high_cores: + run: ../cores_conflict_wf_step.cwl + in: [] + out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_cores.cwl b/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_cores.cwl new file mode 100644 index 00000000..ae930d7d --- /dev/null +++ b/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_cores.cwl @@ -0,0 +1,21 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher NestedWorkflow resourceMin than resourceMax" +doc: This NestedWorkflow ResourceRequirement has higher coresMin than global coresMax = failure + +inputs: [] +outputs: [] + +requirements: + ResourceRequirement: + coresMin: 10 + +steps: + step: + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + in: [] + out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_ram.cwl b/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_ram.cwl new file mode 100644 index 00000000..a909b0e6 --- /dev/null +++ b/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_ram.cwl @@ -0,0 +1,22 @@ +cwlVersion: v1.2 +class: Workflow +label: "Higher NestedWorkflow resourceMin than resourceMax" +doc: This NestedWorkflow ResourceRequirement has higher ramMin than global ramMax = failure + + +inputs: [] +outputs: [] + +requirements: + ResourceRequirement: + ramMin: 1028 + +steps: + step: + run: + class: CommandLineTool + baseCommand: ["echo", "Hello World"] + inputs: [] + outputs: [] + in: [] + out: [] From 42c51d95d2c80d474f3269093b288b503748b8ec Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Mon, 20 Oct 2025 14:52:39 +0200 Subject: [PATCH 08/24] fix: pre-commit --- .../workflows/resource_requirements/bad_cores/clt_bad_cores.cwl | 2 +- .../resource_requirements/bad_cores/step_bad_cores.cwl | 2 +- .../resource_requirements/bad_cores/step_run_bad_cores.cwl | 2 +- test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl | 2 +- test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl | 2 +- test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl | 2 +- .../resource_requirements/bad_ram/step_run_bad_ram.cwl | 2 +- test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl | 2 +- .../resource_conflicts/cores_conflict_wf_step.cwl | 2 +- .../resource_conflicts/cores_conflict_wf_step_run.cwl | 2 +- .../resource_conflicts/ram_conflict_wf_step.cwl | 2 +- .../resource_conflicts/ram_conflict_wf_step_run.cwl | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl index 39922a09..fd659699 100644 --- a/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl +++ b/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl @@ -10,4 +10,4 @@ requirements: inputs: [] outputs: [] -baseCommand: ["echo", "Hello World"] \ No newline at end of file +baseCommand: ["echo", "Hello World"] diff --git a/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl index a46043bf..8e53a247 100644 --- a/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl +++ b/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl @@ -18,4 +18,4 @@ steps: inputs: [] outputs: [] out: [] - in: [] \ No newline at end of file + in: [] diff --git a/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl index 1d569f76..3ec63546 100644 --- a/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl +++ b/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl @@ -18,4 +18,4 @@ steps: coresMin: 4 # > 2 coresMax: 2 out: [] - in: [] \ No newline at end of file + in: [] diff --git a/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl index c334828b..b0a42cc7 100644 --- a/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl +++ b/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl @@ -18,4 +18,4 @@ steps: inputs: [] outputs: [] out: [] - in: [] \ No newline at end of file + in: [] diff --git a/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl index e4e2de72..5829853c 100644 --- a/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl +++ b/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl @@ -10,4 +10,4 @@ requirements: inputs: [] outputs: [] -baseCommand: ["echo", "Hello World"] \ No newline at end of file +baseCommand: ["echo", "Hello World"] diff --git a/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl index 9c38eed6..aeb414f7 100644 --- a/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl +++ b/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl @@ -18,4 +18,4 @@ steps: inputs: [] outputs: [] out: [] - in: [] \ No newline at end of file + in: [] diff --git a/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl index bd9f29b4..301ca664 100644 --- a/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl +++ b/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl @@ -18,4 +18,4 @@ steps: ramMin: 2048 # > 1024 ramMax: 1024 out: [] - in: [] \ No newline at end of file + in: [] diff --git a/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl index 26002152..83d7a960 100644 --- a/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl +++ b/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl @@ -18,4 +18,4 @@ steps: inputs: [] outputs: [] out: [] - in: [] \ No newline at end of file + in: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl index dcaafc8d..e292a9ae 100644 --- a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl +++ b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl @@ -20,4 +20,4 @@ steps: inputs: [] outputs: [] in: [] - out: [] \ No newline at end of file + out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl index 4207091b..70ed6045 100644 --- a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl +++ b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl @@ -20,4 +20,4 @@ steps: ResourceRequirement: coresMin: 4 # > 2 in: [] - out: [] \ No newline at end of file + out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl index 1285fd8e..a77b68d0 100644 --- a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl +++ b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl @@ -20,4 +20,4 @@ steps: ResourceRequirement: ramMin: 2048 # > 1024 in: [] - out: [] \ No newline at end of file + out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl index 70e31b13..4b303097 100644 --- a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl +++ b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl @@ -20,4 +20,4 @@ steps: ResourceRequirement: ramMin: 2048 # > 1024 in: [] - out: [] \ No newline at end of file + out: [] From efda0062a20db137f6d4cb36116433fadd389941 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Mon, 20 Oct 2025 15:02:45 +0200 Subject: [PATCH 09/24] fix: refactored code --- .../execution_hooks/requirement_validator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py index 3caab349..f29d4417 100644 --- a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py +++ b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py @@ -9,7 +9,7 @@ class RequirementValidator(BaseModel, ABC): model_config = ConfigDict(validate_assignment=True, arbitrary_types_allowed=True) requirement_class: ClassVar[str] - cwl_object: Workflow | CommandLineTool | WorkflowStep + cwl_object: Workflow | CommandLineTool def get_requirement(self, cwl_object: Workflow | CommandLineTool | WorkflowStep): """ @@ -70,7 +70,7 @@ def validate_requirements(self, cwl_object=None, production: bool = False) -> No self.validate_requirement(global_requirement, context="global requirements") # Validate WorkflowStep requirements, if any. - if cwl_object.class_ != "CommandLineTool" and cwl_object.steps: + if not isinstance(cwl_object, CommandLineTool) and cwl_object.steps: self.validate_steps_requirements( cwl_object.steps, global_requirement, production ) @@ -114,7 +114,7 @@ def validate_run_requirements( :param production: True if the requirements are for a production workflow, False otherwise. -- Maybe we could add a transformation parameter later. """ - if run.class_ == "Workflow": + if isinstance(run, Workflow): # Validate nested Workflow requirements, if any. self.validate_requirements(cwl_object=run) From 87762ab4d96992e845031b74a8437073b8d8a789 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Fri, 24 Oct 2025 14:50:32 +0200 Subject: [PATCH 10/24] fix: refactored tests --- test/test_workflows.py | 244 ++++++++++++------ .../bad_cores/clt_bad_cores.cwl | 13 - .../bad_cores/step_bad_cores.cwl | 21 -- .../bad_cores/step_run_bad_cores.cwl | 21 -- .../bad_cores/wf_bad_cores.cwl | 21 -- .../bad_merge_production.cwl | 73 ------ .../bad_ram/clt_bad_ram.cwl | 13 - .../bad_ram/step_bad_ram.cwl | 21 -- .../bad_ram/step_run_bad_ram.cwl | 21 -- .../bad_ram/wf_bad_ram.cwl | 21 -- .../cores_conflict_wf_step.cwl | 23 -- .../cores_conflict_wf_step_run.cwl | 23 -- .../nested_wf/external_conflict_nested_wf.cwl | 23 -- .../nested_wf/internal_conflict_nested_wf.cwl | 18 -- .../nested_wf/nested_wf_high_cores.cwl | 21 -- .../nested_wf/nested_wf_high_ram.cwl | 22 -- .../ram_conflict_wf_step.cwl | 23 -- .../ram_conflict_wf_step_run.cwl | 23 -- 18 files changed, 166 insertions(+), 479 deletions(-) delete mode 100644 test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl delete mode 100644 test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl delete mode 100644 test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl delete mode 100644 test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl delete mode 100644 test/workflows/resource_requirements/bad_merge_production.cwl delete mode 100644 test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl delete mode 100644 test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl delete mode 100644 test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl delete mode 100644 test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl delete mode 100644 test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl delete mode 100644 test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl delete mode 100644 test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl delete mode 100644 test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl delete mode 100644 test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_cores.cwl delete mode 100644 test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_ram.cwl delete mode 100644 test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl delete mode 100644 test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl diff --git a/test/test_workflows.py b/test/test_workflows.py index a010773e..5fe8b7d3 100644 --- a/test/test_workflows.py +++ b/test/test_workflows.py @@ -5,9 +5,19 @@ from pathlib import Path import pytest +from cwl_utils.parser.cwl_v1_2 import ( + CommandLineTool, + ResourceRequirement, + Workflow, + WorkflowStep, +) from typer.testing import CliRunner from dirac_cwl_proto import app +from dirac_cwl_proto.execution_hooks.requirement_validator import ( + RequirementError, + ResourceRequirementValidator, +) def strip_ansi_codes(text: str) -> str: @@ -216,79 +226,6 @@ def test_run_job_success(cli_runner, cleanup, cwl_file, inputs): ], "Failedtovalidatetheparameter", ), - # The core resource requirements are wrong: coresMin is higher than coresMax value - ( - "test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl", - [], - "ResourceRequirementisinvalid", - ), - # The ram resource requirements are wrong: ramMin is higher than ramMax value - ( - "test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl", - [], - "ResourceRequirementisinvalid", - ), - # The resource requirements contains conflicts - ( - "test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl", - [], - "ResourceRequirementisinvalid", - ), - ( - "test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl", - [], - "ResourceRequirementisinvalid", - ), ], ) def test_run_job_validation_failure( @@ -709,11 +646,6 @@ def test_run_simple_production_success(cli_runner, cleanup, cwl_file, metadata): "test/workflows/mandelbrot/type_dependencies/production/malformed-nonexisting-type_metadata-mandelbrot_complete.yaml", "Unknownexecutionhooksplugin:'MandelBrotDoesNotExist'", ), - ( - "test/workflows/resource_requirements/bad_merge_production.cwl", - "test/workflows/merge/type_dependencies/production/metadata-merge_complete.yaml", - "ResourceRequirementisinvalid", - ), ], ) def test_run_production_validation_failure( @@ -765,3 +697,159 @@ def test_run_production_validation_failure( f"Expected error '{expected_error}' not found in " f"stdout: {clean_output}, stderr: {clean_stderr}, exception: {clean_exception}" ) + + +# ----------------------------------------------------------------------------- +# Requirements tests +# ----------------------------------------------------------------------------- + + +# Helper functions +def create_commandlinetool(requirements=None, inputs=None, outputs=None): + return CommandLineTool( + requirements=requirements or [], + inputs=inputs or [], + outputs=outputs or [], + ) + + +def create_workflow(requirements=None, steps=None, inputs=None, outputs=None): + return Workflow( + requirements=requirements or [], + steps=steps or [], + inputs=inputs or [], + outputs=outputs or [], + ) + + +def create_step(requirements=None, run=None, in_=None, out=None): + return WorkflowStep( + requirements=requirements or [], + run=run, + in_=in_ or [], + out=out or [], + ) + + +@pytest.mark.parametrize( + "bad_min_max_reqs", + [ + # cores + ResourceRequirement(coresMin=4, coresMax=2), + # ram + ResourceRequirement(ramMin=2048, ramMax=1024), + ], +) +def test_bad_min_max_resource_reqs(bad_min_max_reqs): + """ + Test invalid min/max resource requirements in CWL objects. + """ + + # CommandlineTool with bad minmax reqs + clt = create_commandlinetool(requirements=[bad_min_max_reqs]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=clt).validate_requirements() + + # WorkflowStep.run with bad minmax reqs + step_bad_run = create_step(run=clt) + workflow = create_workflow(steps=[step_bad_run]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + + # WorkflowStep with bad minmax reqs + clt = create_commandlinetool() + step = create_step(run=clt, requirements=[bad_min_max_reqs]) + workflow = create_workflow(steps=[step]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + + # Workflow with bad minmax reqs + workflow = create_workflow(requirements=[bad_min_max_reqs]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + + # NestedWorkflow with bad minmax reqs + nest_workflow = create_workflow(requirements=[bad_min_max_reqs]) + step = create_step(run=nest_workflow) + workflow = create_workflow(steps=[step]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + + # DeepNestedWorkflow with bad minmax reqs + deep_workflow = create_workflow(requirements=[bad_min_max_reqs]) + deep_step = create_step(run=deep_workflow) + nest_workflow = create_workflow(steps=[deep_step]) + step = create_step(run=nest_workflow) + workflow = create_workflow(steps=[step]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + + +@pytest.mark.parametrize( + ("global_requirements", "higher_requirements"), + [ + # cores + ( + ResourceRequirement(coresMax=2), + ResourceRequirement(coresMin=4), + ), + # ram + ( + ResourceRequirement(ramMax=512), + ResourceRequirement(ramMin=1024), + ), + ], +) +def test_bad_global_requirements(global_requirements, higher_requirements): + """ + Test global requirements conflicts. + """ + + # Workflow - WorkflowStep conflict + step = create_step(requirements=[higher_requirements]) + workflow = create_workflow(requirements=[global_requirements], steps=[step]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + + # Workflow - WorkflowStep.run conflict + run = create_commandlinetool(requirements=[higher_requirements]) + step = create_step(run=run) + workflow = create_workflow(requirements=[global_requirements], steps=[step]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + + # Workflow - NestedWorkflow conflict + nest_workflow = create_workflow(requirements=[higher_requirements]) + step = create_step(run=nest_workflow) + workflow = create_workflow(requirements=[global_requirements], steps=[step]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + + +@pytest.mark.parametrize( + "requirements", + [ + # cores + ResourceRequirement(coresMin=2, coresMax=4), + # ram + ResourceRequirement(ramMin=1024, ramMax=2048), + ], +) +def test_production_requirements(requirements): + """ + Test production case requirements. + """ + + # Production workflows can't have global requirements + workflow = create_workflow(requirements=[requirements]) + with pytest.raises(RequirementError): + ResourceRequirementValidator(cwl_object=workflow).validate_requirements( + production=True + ) + + # Production workflows can have step requirements + step = create_step(requirements=[requirements]) + workflow = create_workflow(steps=[step]) + ResourceRequirementValidator(cwl_object=workflow).validate_requirements( + production=True + ) diff --git a/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl deleted file mode 100644 index fd659699..00000000 --- a/test/workflows/resource_requirements/bad_cores/clt_bad_cores.cwl +++ /dev/null @@ -1,13 +0,0 @@ -cwlVersion: v1.2 -class: CommandLineTool -label: "Higher coresMin CLT" -doc: The CommandLineTool ResourceRequirement has a coresMin higher than its coresMax = failure - -requirements: - ResourceRequirement: - coresMin: 4 # > 2 - coresMax: 2 -inputs: [] -outputs: [] - -baseCommand: ["echo", "Hello World"] diff --git a/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl deleted file mode 100644 index 8e53a247..00000000 --- a/test/workflows/resource_requirements/bad_cores/step_bad_cores.cwl +++ /dev/null @@ -1,21 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher coresMin WorkflowStep" -doc: The WorkflowStep ResourceRequirement has a coresMin higher than its coresMax = failure - -inputs: [] -outputs: [] - -steps: - bad_step: - requirements: - ResourceRequirement: - coresMin: 4 # > 2 - coresMax: 2 - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - out: [] - in: [] diff --git a/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl deleted file mode 100644 index 3ec63546..00000000 --- a/test/workflows/resource_requirements/bad_cores/step_run_bad_cores.cwl +++ /dev/null @@ -1,21 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher coresMin WorkflowStep.run" -doc: The WorkflowStep.run ResourceRequirement has a coresMin higher than its coresMax = failure - -inputs: [] -outputs: [] - -steps: - bad_step: - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - requirements: - ResourceRequirement: - coresMin: 4 # > 2 - coresMax: 2 - out: [] - in: [] diff --git a/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl b/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl deleted file mode 100644 index b0a42cc7..00000000 --- a/test/workflows/resource_requirements/bad_cores/wf_bad_cores.cwl +++ /dev/null @@ -1,21 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher coresMin Workflow" -doc: The Workflow ResourceRequirement has a coresMin higher than its coresMax = failure - -requirements: - ResourceRequirement: - coresMin: 4 # > 2 - coresMax: 2 -inputs: [] -outputs: [] - -steps: - good_step: - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - out: [] - in: [] diff --git a/test/workflows/resource_requirements/bad_merge_production.cwl b/test/workflows/resource_requirements/bad_merge_production.cwl deleted file mode 100644 index c3a57685..00000000 --- a/test/workflows/resource_requirements/bad_merge_production.cwl +++ /dev/null @@ -1,73 +0,0 @@ -cwlVersion: v1.2 -label: "Bad Merge Workflow" -class: Workflow -doc: The global ResourceRequirement should not be allowed in a production workflow, file depends on "test/workflows/merge" folder - -requirements: - MultipleInputFeatureRequirement: {} - ResourceRequirement: # not allowed here - coresMax: 4 - -# Define the inputs of the workflow -inputs: - num-points: - type: int - doc: "Number of random points to generate for the simulation" - default: 1000 - output-path-step1: - type: string - default: result_1.sim - output-path-step2: - type: string - default: result_2.sim - -# Define the outputs of the workflow -outputs: - pi_approximation: - type: File - outputSource: gathering/pi_result - -# Define the steps of the workflow -steps: - # Simulation step 1 - simulate_step1: - in: - num-points: num-points - output-path: output-path-step1 - out: [sim] - run: ../merge/pisimulate_v2.cwl - - # Simulation step 2 - simulate_step2: - in: - num-points: num-points - output-path: output-path-step2 - out: [sim] - run: ../merge/pisimulate_v2.cwl - - # Gathering step - gathering: - in: - input-data: - source: - - simulate_step1/sim - - simulate_step2/sim - linkMerge: merge_flattened - out: [pi_result] - run: - class: CommandLineTool - requirements: - ResourceRequirement: - coresMin: 1 - ramMin: 1024 - inputs: - input-data: - type: File[] - inputBinding: - separate: true - outputs: - pi_result: - type: File - outputBinding: - glob: "result*.sim" - baseCommand: [pi-gather] diff --git a/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl deleted file mode 100644 index 5829853c..00000000 --- a/test/workflows/resource_requirements/bad_ram/clt_bad_ram.cwl +++ /dev/null @@ -1,13 +0,0 @@ -cwlVersion: v1.2 -class: CommandLineTool -label: "Higher ramMin CLT" -doc: The CommandLineTool ResourceRequirement has a ramMin higher than its ramMax = failure - -requirements: - ResourceRequirement: - ramMin: 2048 # > 1024 - ramMax: 1024 -inputs: [] -outputs: [] - -baseCommand: ["echo", "Hello World"] diff --git a/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl deleted file mode 100644 index aeb414f7..00000000 --- a/test/workflows/resource_requirements/bad_ram/step_bad_ram.cwl +++ /dev/null @@ -1,21 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher ramMin WorkflowStep" -doc: The WorkflowStep ResourceRequirement has a ramMin higher than its ramMax = failure - -inputs: [] -outputs: [] - -steps: - bad_step: - requirements: - ResourceRequirement: - ramMin: 2048 # > 1024 - ramMax: 1024 - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - out: [] - in: [] diff --git a/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl deleted file mode 100644 index 301ca664..00000000 --- a/test/workflows/resource_requirements/bad_ram/step_run_bad_ram.cwl +++ /dev/null @@ -1,21 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher ramMin WorkflowStep.run" -doc: The WorkflowStep.run ResourceRequirement has a ramMin higher than its ramMax = failure - -inputs: [] -outputs: [] - -steps: - bad_step: - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - requirements: - ResourceRequirement: - ramMin: 2048 # > 1024 - ramMax: 1024 - out: [] - in: [] diff --git a/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl b/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl deleted file mode 100644 index 83d7a960..00000000 --- a/test/workflows/resource_requirements/bad_ram/wf_bad_ram.cwl +++ /dev/null @@ -1,21 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher ramMin Workflow" -doc: The Workflow ResourceRequirement has a ramMin higher than its ramMax = failure - -requirements: - ResourceRequirement: - ramMin: 2048 # > 1024 - ramMax: 1024 -inputs: [] -outputs: [] - -steps: - good_step: - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - out: [] - in: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl deleted file mode 100644 index e292a9ae..00000000 --- a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step.cwl +++ /dev/null @@ -1,23 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher WorkflowStep coresMin than Workflow coresMax" -doc: The WorkflowStep ResourceRequirement has a coresMin higher than the Workflow ResourceRequirement coresMax = failure - -inputs: [] -outputs: [] -requirements: - ResourceRequirement: - coresMax: 2 # also equals coresMin (when coresMin not specified) - -steps: - too_high_cores: - requirements: - ResourceRequirement: - coresMin: 4 # > 2 - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - in: [] - out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl b/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl deleted file mode 100644 index 70ed6045..00000000 --- a/test/workflows/resource_requirements/resource_conflicts/cores_conflict_wf_step_run.cwl +++ /dev/null @@ -1,23 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher WorkflowStep.run coresMin than Workflow coresMax" -doc: The WorkflowStep.run ResourceRequirement has a coresMin higher than the Workflow ResourceRequirement coresMax = failure - -inputs: [] -outputs: [] -requirements: - ResourceRequirement: - coresMax: 2 # also equals coresMin (when coresMin not specified) - -steps: - too_high_cores: - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - requirements: - ResourceRequirement: - coresMin: 4 # > 2 - in: [] - out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl b/test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl deleted file mode 100644 index eaff97fa..00000000 --- a/test/workflows/resource_requirements/resource_conflicts/nested_wf/external_conflict_nested_wf.cwl +++ /dev/null @@ -1,23 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher NestedWorkflow resourceMin than resourceMax" -doc: The NestedWorkflow ResourceRequirement has higher resourceMin than global resourceMax = failure, both NestedWorflows - here are wrong but only the first one will trigger the exception. - -inputs: [] -outputs: [] - -requirements: - ResourceRequirement: - ramMax: 512 - coresMax: 4 - -steps: - too_high_ram: - run: ./nested_wf_high_ram.cwl - in: [] - out: [] - too_high_cores: - run: ./nested_wf_high_cores.cwl - in: [] - out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl b/test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl deleted file mode 100644 index 109041b3..00000000 --- a/test/workflows/resource_requirements/resource_conflicts/nested_wf/internal_conflict_nested_wf.cwl +++ /dev/null @@ -1,18 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher NestedWorkflow resourceMin than resourceMax" -doc: The NestedWorkflow ResourceRequirement has higher resourceMin than resourceMax = failure, both NestedWorflows - here are wrong but only the first one will trigger the exception. - -inputs: [] -outputs: [] - -steps: - too_high_ram: - run: ../ram_conflict_wf_step.cwl - in: [ ] - out: [ ] - too_high_cores: - run: ../cores_conflict_wf_step.cwl - in: [] - out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_cores.cwl b/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_cores.cwl deleted file mode 100644 index ae930d7d..00000000 --- a/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_cores.cwl +++ /dev/null @@ -1,21 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher NestedWorkflow resourceMin than resourceMax" -doc: This NestedWorkflow ResourceRequirement has higher coresMin than global coresMax = failure - -inputs: [] -outputs: [] - -requirements: - ResourceRequirement: - coresMin: 10 - -steps: - step: - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - in: [] - out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_ram.cwl b/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_ram.cwl deleted file mode 100644 index a909b0e6..00000000 --- a/test/workflows/resource_requirements/resource_conflicts/nested_wf/nested_wf_high_ram.cwl +++ /dev/null @@ -1,22 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher NestedWorkflow resourceMin than resourceMax" -doc: This NestedWorkflow ResourceRequirement has higher ramMin than global ramMax = failure - - -inputs: [] -outputs: [] - -requirements: - ResourceRequirement: - ramMin: 1028 - -steps: - step: - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - in: [] - out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl deleted file mode 100644 index a77b68d0..00000000 --- a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step.cwl +++ /dev/null @@ -1,23 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher WorkflowStep ramMin than Workflow ramMax" -doc: The WorkflowStep ResourceRequirement has a ramMin higher than the Workflow ResourceRequirement ramMax = failure - -inputs: [] -outputs: [] -requirements: - ResourceRequirement: - ramMax: 1024 # also equals ramMin (when ramMin not specified) - -steps: - too_high_ram: - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - requirements: - ResourceRequirement: - ramMin: 2048 # > 1024 - in: [] - out: [] diff --git a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl b/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl deleted file mode 100644 index 4b303097..00000000 --- a/test/workflows/resource_requirements/resource_conflicts/ram_conflict_wf_step_run.cwl +++ /dev/null @@ -1,23 +0,0 @@ -cwlVersion: v1.2 -class: Workflow -label: "Higher WorkflowStep.run ramMin than Workflow ramMax" -doc: The WorkflowStep.run ResourceRequirement has a ramMin higher than the Workflow ResourceRequirement ramMax = failure - -inputs: [] -outputs: [] -requirements: - ResourceRequirement: - ramMax: 1024 # also equals ramMin (when ramMin not specified) - -steps: - too_high_ram: - run: - class: CommandLineTool - baseCommand: ["echo", "Hello World"] - inputs: [] - outputs: [] - requirements: - ResourceRequirement: - ramMin: 2048 # > 1024 - in: [] - out: [] From 139ac8243ae0568288f8e1b7cb54f2488f0eb5a4 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Mon, 3 Nov 2025 11:37:52 +0100 Subject: [PATCH 11/24] fix: fixed duplicate test --- test/test_workflows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_workflows.py b/test/test_workflows.py index 5fe8b7d3..b5105463 100644 --- a/test/test_workflows.py +++ b/test/test_workflows.py @@ -214,7 +214,7 @@ def test_run_job_success(cli_runner, cleanup, cwl_file, inputs): ), # The description file points to itself (another circular dependency) ( - "test/workflows/bad_references/reference_circular2.cwl", + "test/workflows/bad_references/reference_itself.cwl", [], "Recursingintostep", ), @@ -480,7 +480,7 @@ def run_and_capture(): ), # The description file points to itself (another circular dependency) ( - "test/workflows/bad_references/reference_circular1.cwl", + "test/workflows/bad_references/reference_itself.cwl", [], "Recursingintostep", ), From 04b6f8d9cd37dfe96ccdc345031c8e4162c8290b Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Wed, 5 Nov 2025 14:56:29 +0100 Subject: [PATCH 12/24] feat: added Production ResReq check in SubmissionModel --- .../execution_hooks/requirement_validator.py | 57 ++----------------- src/dirac_cwl_proto/production/__init__.py | 13 ----- src/dirac_cwl_proto/submission_models.py | 11 +++- test/test_workflows.py | 11 ++-- 4 files changed, 19 insertions(+), 73 deletions(-) diff --git a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py index f29d4417..ef133354 100644 --- a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py +++ b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py @@ -39,60 +39,35 @@ def validate_requirement( """ pass - @abstractmethod - def validate_production_requirement(self, requirement, is_global: bool = False): - """ - Validate a production workflow requirement, specific for each requirement class. - This method also depends on the production workflow rules. - - :param requirement: The current requirement to validate. - :param is_global: True if it's a global requirement, False otherwise. - """ - pass - - def validate_requirements(self, cwl_object=None, production: bool = False) -> None: + def validate_requirements(self, cwl_object=None) -> None: """ Validate all requirements in a Workflow. :param cwl_object: The cwl_object to validate the requirements for. If None, use the cwl_object from the class. This is used for nested workflows validation. - :param production: True if the requirements are for a production workflow, False otherwise. - -- Maybe we could add a transformation parameter later. """ cwl_object = cwl_object if cwl_object else self.cwl_object global_requirement = self.get_requirement(cwl_object) # Validate Workflow/CommandLineTool global requirements. if global_requirement: - # Production-workflow-specific validation. - if production: - self.validate_production_requirement(global_requirement, is_global=True) self.validate_requirement(global_requirement, context="global requirements") # Validate WorkflowStep requirements, if any. if not isinstance(cwl_object, CommandLineTool) and cwl_object.steps: - self.validate_steps_requirements( - cwl_object.steps, global_requirement, production - ) + self.validate_steps_requirements(cwl_object.steps, global_requirement) - def validate_steps_requirements( - self, steps, global_requirement, production: bool = False - ): + def validate_steps_requirements(self, steps, global_requirement): """ Validate steps requirements in WorkflowStep. :param steps: The WorkflowStep to validate the requirements for. :param global_requirement: The global Workflow requirement, if any. - :param production: True if the requirements are for a production workflow, False otherwise. - -- Maybe we could add a transformation parameter later. """ for step in steps: step_requirement = self.get_requirement(step) # Validate WorkflowStep requirements, if any. if step_requirement: - # Production-workflow-specific validation. - if production: - self.validate_production_requirement(step_requirement) self.validate_requirement( step_requirement, context="step requirements", @@ -101,18 +76,14 @@ def validate_steps_requirements( # Validate WorkflowStep.run, if any. if step.run: - self.validate_run_requirements(step.run, global_requirement, production) + self.validate_run_requirements(step.run, global_requirement) - def validate_run_requirements( - self, run, global_requirement, production: bool = False - ): + def validate_run_requirements(self, run, global_requirement): """ Validate WorkflowStep.run requirements. :param run: The WorkflowStep.run to validate the requirements for. :param global_requirement: The global Workflow requirement, if any. - :param production: True if the requirements are for a production workflow, False otherwise. - -- Maybe we could add a transformation parameter later. """ if isinstance(run, Workflow): # Validate nested Workflow requirements, if any. @@ -120,9 +91,6 @@ def validate_run_requirements( step_run_requirement = self.get_requirement(run) if step_run_requirement: - # Production-workflow-specific validation. - if production: - self.validate_production_requirement(step_run_requirement) self.validate_requirement( step_run_requirement, context="step run requirements", @@ -170,21 +138,6 @@ def validate_conflict(min_value, global_max_value, resource, context): f"{resource}Min is higher than global {resource}Max in {context}" ) - def validate_production_requirement(self, requirement, is_global: bool = False): - """ - Raise an error if there's a global ResourceRequirement in a production workflow. - Otherwise, add some logic for WorkflowSteps, CommandLineTools - and WorkflowStep.run, etc. in production workflows. - - :param requirement: The current requirement to validate. - :param is_global: True if there's a global ResourceRequirement, False otherwise. - """ - if is_global: - raise RequirementError( - "ResourceRequirement is invalid: global ResourceRequirement is not allowed in production workflows" - ) - # else ... - def validate_requirement( self, requirement, context: str = "", global_requirement=None ): diff --git a/src/dirac_cwl_proto/production/__init__.py b/src/dirac_cwl_proto/production/__init__.py index 23df6b75..2a27b1b4 100644 --- a/src/dirac_cwl_proto/production/__init__.py +++ b/src/dirac_cwl_proto/production/__init__.py @@ -25,10 +25,6 @@ SchedulingHint, TransformationExecutionHooksHint, ) -from dirac_cwl_proto.execution_hooks.requirement_validator import ( - RequirementError, - ResourceRequirementValidator, -) from dirac_cwl_proto.submission_models import ( ProductionSubmissionModel, TransformationSubmissionModel, @@ -145,15 +141,6 @@ def submit_production_router(production: ProductionSubmissionModel) -> bool: # Validate the transformation logger.info("Validating the production...") - try: - # Production jobs can't have a global ResourceRequirement - ResourceRequirementValidator(cwl_object=production.task).validate_requirements( - production=True - ) - except RequirementError as ex: - logger.exception(f"RequirementValidationError - {ex}") - raise - # Already validated by the pydantic model logger.info("Production validated!") diff --git a/src/dirac_cwl_proto/submission_models.py b/src/dirac_cwl_proto/submission_models.py index 7933cd1e..77072cd3 100644 --- a/src/dirac_cwl_proto/submission_models.py +++ b/src/dirac_cwl_proto/submission_models.py @@ -102,8 +102,10 @@ class ProductionSubmissionModel(BaseModel): steps_scheduling: dict[str, SchedulingHint] = {} @model_validator(mode="before") - def validate_steps_metadata(cls, values): + def validate_production(cls, values): task = values.get("task") + + # Metadata steps_execution_hooks = values.get("steps_execution_hooks") if task and steps_execution_hooks: @@ -118,6 +120,13 @@ def validate_steps_metadata(cls, values): f"The following steps are missing from the task workflow: {missing_steps}" ) + # ResourceRequirement + for req in task.requirements: + if req.class_ == "ResourceRequirement": + raise ValueError( + "Global ResourceRequirement is not allowed in productions." + ) + return values @field_serializer("task") diff --git a/test/test_workflows.py b/test/test_workflows.py index b5105463..efa26db7 100644 --- a/test/test_workflows.py +++ b/test/test_workflows.py @@ -18,6 +18,7 @@ RequirementError, ResourceRequirementValidator, ) +from dirac_cwl_proto.submission_models import ProductionSubmissionModel def strip_ansi_codes(text: str) -> str: @@ -842,14 +843,10 @@ def test_production_requirements(requirements): # Production workflows can't have global requirements workflow = create_workflow(requirements=[requirements]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=workflow).validate_requirements( - production=True - ) + with pytest.raises(ValueError): + ProductionSubmissionModel(task=workflow, steps_execution_hooks={}) # Production workflows can have step requirements step = create_step(requirements=[requirements]) workflow = create_workflow(steps=[step]) - ResourceRequirementValidator(cwl_object=workflow).validate_requirements( - production=True - ) + ProductionSubmissionModel(task=workflow, steps_execution_hooks={}) From 4ae201aedc5b974529a0b23392ea1b12386467e0 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Wed, 5 Nov 2025 17:11:33 +0100 Subject: [PATCH 13/24] feat: added ResourceRequirement validation in SubmissionModel --- .../execution_hooks/requirement_validator.py | 170 ------------------ src/dirac_cwl_proto/job/__init__.py | 20 +-- src/dirac_cwl_proto/submission_models.py | 108 ++++++++++- test/test_workflows.py | 69 ++++--- 4 files changed, 149 insertions(+), 218 deletions(-) delete mode 100644 src/dirac_cwl_proto/execution_hooks/requirement_validator.py diff --git a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py b/src/dirac_cwl_proto/execution_hooks/requirement_validator.py deleted file mode 100644 index ef133354..00000000 --- a/src/dirac_cwl_proto/execution_hooks/requirement_validator.py +++ /dev/null @@ -1,170 +0,0 @@ -from abc import ABC, abstractmethod -from typing import ClassVar - -from cwl_utils.parser.cwl_v1_2 import CommandLineTool, Workflow, WorkflowStep -from pydantic import BaseModel, ConfigDict - - -class RequirementValidator(BaseModel, ABC): - model_config = ConfigDict(validate_assignment=True, arbitrary_types_allowed=True) - - requirement_class: ClassVar[str] - cwl_object: Workflow | CommandLineTool - - def get_requirement(self, cwl_object: Workflow | CommandLineTool | WorkflowStep): - """ - Extract the requirement from the current cwl_object, - based on the requirement class we want. - - :param cwl_object: The cwl_object to extract the requirement from. - :return: The requirement object, or None if not found. - """ - requirements = getattr(cwl_object, "requirements", []) or [] - for requirement in requirements: - if requirement.class_ == self.requirement_class: - return requirement - return None - - @abstractmethod - def validate_requirement( - self, requirement, context: str = "", global_requirement=None - ): - """ - Validate a requirement, specific for each requirement class. - - :param requirement: The current requirement to validate. - :param context: A context string describing the validation context. - Ex: "Step requirement", "Global requirement", etc. - :param global_requirement: The global Workflow requirement, if any. - """ - pass - - def validate_requirements(self, cwl_object=None) -> None: - """ - Validate all requirements in a Workflow. - - :param cwl_object: The cwl_object to validate the requirements for. - If None, use the cwl_object from the class. This is used for nested workflows validation. - """ - cwl_object = cwl_object if cwl_object else self.cwl_object - global_requirement = self.get_requirement(cwl_object) - - # Validate Workflow/CommandLineTool global requirements. - if global_requirement: - self.validate_requirement(global_requirement, context="global requirements") - - # Validate WorkflowStep requirements, if any. - if not isinstance(cwl_object, CommandLineTool) and cwl_object.steps: - self.validate_steps_requirements(cwl_object.steps, global_requirement) - - def validate_steps_requirements(self, steps, global_requirement): - """ - Validate steps requirements in WorkflowStep. - - :param steps: The WorkflowStep to validate the requirements for. - :param global_requirement: The global Workflow requirement, if any. - """ - for step in steps: - step_requirement = self.get_requirement(step) - # Validate WorkflowStep requirements, if any. - if step_requirement: - self.validate_requirement( - step_requirement, - context="step requirements", - global_requirement=global_requirement, - ) - - # Validate WorkflowStep.run, if any. - if step.run: - self.validate_run_requirements(step.run, global_requirement) - - def validate_run_requirements(self, run, global_requirement): - """ - Validate WorkflowStep.run requirements. - - :param run: The WorkflowStep.run to validate the requirements for. - :param global_requirement: The global Workflow requirement, if any. - """ - if isinstance(run, Workflow): - # Validate nested Workflow requirements, if any. - self.validate_requirements(cwl_object=run) - - step_run_requirement = self.get_requirement(run) - if step_run_requirement: - self.validate_requirement( - step_run_requirement, - context="step run requirements", - global_requirement=global_requirement, - ) - - -class RequirementError(Exception): - def __init__(self, message): - super().__init__(message) - - -class ResourceRequirementValidator(RequirementValidator): - requirement_class = "ResourceRequirement" - - @staticmethod - def validate_minmax(min_value, max_value, resource, context): - """ - Check if the resource min_value is higher than the resource max_value. - If so, raise a ValueError. - - :param min_value: The current resource min_value. - :param max_value: The current resource max_value. - :param resource: The resource name. - :param context: A context string describing the validation context. - Ex: "Step requirement", "Global requirement", etc. - """ - if min_value and max_value and min_value > max_value: - raise ValueError(f"{resource}Min is higher than {resource}Max in {context}") - - @staticmethod - def validate_conflict(min_value, global_max_value, resource, context): - """ - Check if the resource min_value is higher than the global resource max_value. - If so, raise a ValueError. - - :param min_value: The current resource min_value. - :param global_max_value: The global resource max_value. - :param resource: The resource name. - :param context: A context string describing the validation context. - Ex: "Step requirement", "Global requirement", etc. - """ - if min_value and global_max_value and min_value > global_max_value: - raise ValueError( - f"{resource}Min is higher than global {resource}Max in {context}" - ) - - def validate_requirement( - self, requirement, context: str = "", global_requirement=None - ): - """ - Validate a ResourceRequirement. - Verify: - - that resourceMin is not higher than resourceMax (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run) - - that resourceMin (WorkflowStep, WorkflowStep.run) is not higher than global (Workflow) resourceMax. - - :param requirement: The current ResourceRequirement to validate. - :param context: A context string describing the validation context. - Ex: "Step requirement", "Global requirement", etc. - :param global_requirement: The global Workflow requirement, if any. - """ - try: - self.validate_minmax(requirement.ramMin, requirement.ramMax, "ram", context) - self.validate_minmax( - requirement.coresMin, requirement.coresMax, "cores", context - ) - - if global_requirement: - # Only WorkflowStep and WorkflowStep.run cases - self.validate_conflict( - requirement.ramMin, global_requirement.ramMax, "ram", context - ) - self.validate_conflict( - requirement.coresMin, global_requirement.coresMax, "cores", context - ) - except ValueError as ex: - raise RequirementError(f"ResourceRequirement is invalid: {ex}") from ex diff --git a/src/dirac_cwl_proto/job/__init__.py b/src/dirac_cwl_proto/job/__init__.py index d039a621..5cfabcff 100644 --- a/src/dirac_cwl_proto/job/__init__.py +++ b/src/dirac_cwl_proto/job/__init__.py @@ -30,10 +30,6 @@ from dirac_cwl_proto.execution_hooks import ( ExecutionHooksBasePlugin, ) -from dirac_cwl_proto.execution_hooks.requirement_validator import ( - RequirementError, - ResourceRequirementValidator, -) from dirac_cwl_proto.submission_models import ( JobInputModel, JobSubmissionModel, @@ -147,11 +143,9 @@ def submit_job_client( "[blue]:information_source:[/blue] [bold]CLI:[/bold] Submitting the job(s) to service..." ) print_json(job.model_dump_json(indent=4)) - try: - submit_job_router(job) - except Exception as ex: + if not submit_job_router(job): console.print( - f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to run job(s) : {ex}" + "[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to run job(s)." ) return typer.Exit(code=1) console.print("[green]:heavy_check_mark:[/green] [bold]CLI:[/bold] Job(s) done.") @@ -229,12 +223,6 @@ def submit_job_router(job: JobSubmissionModel) -> bool: # Validate the jobs logger.info("Validating the job(s)...") - try: - ResourceRequirementValidator(cwl_object=job.task).validate_requirements() - except RequirementError as ex: - logger.exception(f"RequirementValidationError - {ex}") - raise - # Initiate 1 job per parameter jobs = [] if not job.parameters: @@ -266,8 +254,6 @@ def submit_job_router(job: JobSubmissionModel) -> bool: # ----------------------------------------------------------------------------- # JobWrapper # ----------------------------------------------------------------------------- - - def _pre_process( executable: CommandLineTool | Workflow | ExpressionTool, arguments: JobInputModel | None, @@ -423,7 +409,7 @@ def run_job(job: JobSubmissionModel) -> bool: except Exception: logger.exception("JobWrapper: Failed to execute workflow") - raise + return False finally: # Clean up if job_path.exists(): diff --git a/src/dirac_cwl_proto/submission_models.py b/src/dirac_cwl_proto/submission_models.py index 77072cd3..611a990a 100644 --- a/src/dirac_cwl_proto/submission_models.py +++ b/src/dirac_cwl_proto/submission_models.py @@ -9,10 +9,11 @@ from typing import Any -from cwl_utils.parser import save +from cwl_utils.parser import WorkflowStep, save from cwl_utils.parser.cwl_v1_2 import ( CommandLineTool, ExpressionTool, + ResourceRequirement, Workflow, ) from pydantic import BaseModel, ConfigDict, field_serializer, model_validator @@ -53,6 +54,15 @@ class JobSubmissionModel(BaseModel): scheduling: SchedulingHint execution_hooks: ExecutionHooksHint + @model_validator(mode="before") + def validate_job(cls, values): + task = values.get("task") + + # Validate Resource Requirement values of CWLObject, will raise ValueError if needed. + validate_resource_requirements(task) + + return values + @field_serializer("task") def serialize_task(self, value): if isinstance(value, (CommandLineTool, Workflow, ExpressionTool)): @@ -121,11 +131,10 @@ def validate_production(cls, values): ) # ResourceRequirement - for req in task.requirements: - if req.class_ == "ResourceRequirement": - raise ValueError( - "Global ResourceRequirement is not allowed in productions." - ) + if any(req.class_ == "ResourceRequirement" for req in task.requirements): + raise ValueError( + "Global ResourceRequirement is not allowed in productions." + ) return values @@ -140,8 +149,6 @@ def serialize_task(self, value): # ----------------------------------------------------------------------------- # Module helpers # ----------------------------------------------------------------------------- - - def extract_dirac_hints(cwl: Any) -> tuple[ExecutionHooksHint, SchedulingHint]: """Thin wrapper that returns (ExecutionHooksHint, SchedulingHint). @@ -150,3 +157,88 @@ def extract_dirac_hints(cwl: Any) -> tuple[ExecutionHooksHint, SchedulingHint]: convenience. """ return ExecutionHooksHint.from_cwl(cwl), SchedulingHint.from_cwl(cwl) + + +def validate_resource_requirements(task): + """ + Validate ResourceRequirements of a task (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run). + + :param task: The task to validate + """ + cwl_req = get_resource_requirement(task) + + # Validate Workflow/CLT requirements. + if cwl_req: + validate_resource_requirement(cwl_req) + + # Validate WorkflowStep requirements. + if not isinstance(task, CommandLineTool) and task.steps: + for step in task.steps: + step_req = get_resource_requirement(step) + if step_req: + validate_resource_requirement(step_req, cwl_req=cwl_req) + + # Validate run requirements for each step if they exist. + if step.run: + if isinstance(step.run, Workflow): + # Validate nested Workflow requirements, if any. + validate_resource_requirements(task=step.run) + + step_run_req = get_resource_requirement(step.run) + if step_run_req: + validate_resource_requirement(step_run_req, cwl_req=cwl_req) + + +def validate_resource_requirement(requirement, cwl_req=None): + """ + Validate a ResourceRequirement. + Verify: + - that resourceMin is not higher than resourceMax (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run) + --> #TODO this should be done by cwl-utils/cwltool later + - that resourceMin (WorkflowStep, WorkflowStep.run) is not higher than global (Workflow) resourceMax. + + :param requirement: The current ResourceRequirement to validate. + :param cwl_req: The global Workflow/CLT requirement, if any. + :raises ValueError: If the requirement is invalid. + """ + + def check_resource( + current_resource, req_min_value, req_max_value, global_max_value=None + ): + if req_min_value and req_max_value and req_min_value > req_max_value: + raise ValueError( + f"{current_resource}Min is higher than {current_resource}Max" + ) + if global_max_value and req_min_value and req_min_value > global_max_value: + raise ValueError( + f"{current_resource}Min is higher than global {current_resource}Max" + ) + + for resource, min_value, max_value in [ + ("ram", requirement.ramMin, requirement.ramMax), + ("cores", requirement.coresMin, requirement.coresMax), + ("tmpdir", requirement.tmpdirMin, requirement.tmpdirMax), + ("outdir", requirement.outdirMin, requirement.outdirMax), + ]: + check_resource( + resource, + min_value, + max_value, + cwl_req and getattr(cwl_req, f"{resource}Max"), + ) + + +def get_resource_requirement( + cwl_object: Workflow | CommandLineTool | WorkflowStep, +) -> ResourceRequirement | None: + """ + Extract the resource requirement from the current cwl_object + + :param cwl_object: The cwl_object to extract the requirement from. + :return: The resource requirement object, or None if not found. + """ + requirements = getattr(cwl_object, "requirements", []) or [] + for requirement in requirements: + if requirement.class_ == "ResourceRequirement": + return requirement + return None diff --git a/test/test_workflows.py b/test/test_workflows.py index efa26db7..5fa8fea3 100644 --- a/test/test_workflows.py +++ b/test/test_workflows.py @@ -14,11 +14,16 @@ from typer.testing import CliRunner from dirac_cwl_proto import app -from dirac_cwl_proto.execution_hooks.requirement_validator import ( - RequirementError, - ResourceRequirementValidator, +from dirac_cwl_proto.execution_hooks import ( + ExecutionHooksHint, + SchedulingHint, + TransformationExecutionHooksHint, +) +from dirac_cwl_proto.submission_models import ( + JobSubmissionModel, + ProductionSubmissionModel, + TransformationSubmissionModel, ) -from dirac_cwl_proto.submission_models import ProductionSubmissionModel def strip_ansi_codes(text: str) -> str: @@ -701,7 +706,7 @@ def test_run_production_validation_failure( # ----------------------------------------------------------------------------- -# Requirements tests +# Resource Requirements tests # ----------------------------------------------------------------------------- @@ -732,6 +737,18 @@ def create_step(requirements=None, run=None, in_=None, out=None): ) +def submit_task(task): + with pytest.raises(ValueError): + JobSubmissionModel( + task=task, execution_hooks=ExecutionHooksHint(), scheduling=SchedulingHint() + ) + TransformationSubmissionModel( + task=task, + execution_hooks=TransformationExecutionHooksHint(), + scheduling=SchedulingHint(), + ) + + @pytest.mark.parametrize( "bad_min_max_reqs", [ @@ -739,6 +756,10 @@ def create_step(requirements=None, run=None, in_=None, out=None): ResourceRequirement(coresMin=4, coresMax=2), # ram ResourceRequirement(ramMin=2048, ramMax=1024), + # tmpdir + ResourceRequirement(tmpdirMin=1024, tmpdirMax=512), + # outdir + ResourceRequirement(outdirMin=512, outdirMax=256), ], ) def test_bad_min_max_resource_reqs(bad_min_max_reqs): @@ -748,33 +769,28 @@ def test_bad_min_max_resource_reqs(bad_min_max_reqs): # CommandlineTool with bad minmax reqs clt = create_commandlinetool(requirements=[bad_min_max_reqs]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=clt).validate_requirements() + submit_task(clt) # WorkflowStep.run with bad minmax reqs step_bad_run = create_step(run=clt) workflow = create_workflow(steps=[step_bad_run]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + submit_task(workflow) # WorkflowStep with bad minmax reqs clt = create_commandlinetool() step = create_step(run=clt, requirements=[bad_min_max_reqs]) workflow = create_workflow(steps=[step]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + submit_task(workflow) # Workflow with bad minmax reqs workflow = create_workflow(requirements=[bad_min_max_reqs]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + submit_task(workflow) # NestedWorkflow with bad minmax reqs nest_workflow = create_workflow(requirements=[bad_min_max_reqs]) step = create_step(run=nest_workflow) workflow = create_workflow(steps=[step]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + submit_task(workflow) # DeepNestedWorkflow with bad minmax reqs deep_workflow = create_workflow(requirements=[bad_min_max_reqs]) @@ -782,8 +798,7 @@ def test_bad_min_max_resource_reqs(bad_min_max_reqs): nest_workflow = create_workflow(steps=[deep_step]) step = create_step(run=nest_workflow) workflow = create_workflow(steps=[step]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + submit_task(workflow) @pytest.mark.parametrize( @@ -799,6 +814,13 @@ def test_bad_min_max_resource_reqs(bad_min_max_reqs): ResourceRequirement(ramMax=512), ResourceRequirement(ramMin=1024), ), + # tmpdir + ( + ResourceRequirement(tmpdirMax=512), + ResourceRequirement(tmpdirMin=1024), + ), + # outdir + (ResourceRequirement(outdirMax=256), ResourceRequirement(outdirMin=512)), ], ) def test_bad_global_requirements(global_requirements, higher_requirements): @@ -809,22 +831,19 @@ def test_bad_global_requirements(global_requirements, higher_requirements): # Workflow - WorkflowStep conflict step = create_step(requirements=[higher_requirements]) workflow = create_workflow(requirements=[global_requirements], steps=[step]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + submit_task(workflow) # Workflow - WorkflowStep.run conflict run = create_commandlinetool(requirements=[higher_requirements]) step = create_step(run=run) workflow = create_workflow(requirements=[global_requirements], steps=[step]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + submit_task(workflow) # Workflow - NestedWorkflow conflict nest_workflow = create_workflow(requirements=[higher_requirements]) step = create_step(run=nest_workflow) workflow = create_workflow(requirements=[global_requirements], steps=[step]) - with pytest.raises(RequirementError): - ResourceRequirementValidator(cwl_object=workflow).validate_requirements() + submit_task(workflow) @pytest.mark.parametrize( @@ -834,6 +853,10 @@ def test_bad_global_requirements(global_requirements, higher_requirements): ResourceRequirement(coresMin=2, coresMax=4), # ram ResourceRequirement(ramMin=1024, ramMax=2048), + # tmpdir + ResourceRequirement(tmpdirMin=512, tmpdirMax=1024), + # outdir + ResourceRequirement(outdirMin=256, outdirMax=512), ], ) def test_production_requirements(requirements): From eeb44968dd8b3014aad72ac0ce9064f017536d57 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Mon, 16 Feb 2026 16:45:38 +0100 Subject: [PATCH 14/24] fix: rebase and fixed tests --- README.md | 2 +- src/dirac_cwl/submission_models.py | 138 ++++----- src/dirac_cwl_proto/job/__init__.py | 416 ---------------------------- test/test_resource_requirements.py | 165 +++++++++++ test/test_workflows.py | 176 ------------ 5 files changed, 240 insertions(+), 657 deletions(-) delete mode 100644 src/dirac_cwl_proto/job/__init__.py create mode 100644 test/test_resource_requirements.py diff --git a/README.md b/README.md index 14c29b24..d0e46087 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ # Dirac CWL Prototype ![Workflow tests](https://github.com/DIRACGrid/dirac-cwl/actions/workflows/main.yml/badge.svg?branch=main) -![Schema Generation](https://github.com/DIRACGrid/dirac-cwl/actions/workflows/generate-schemas.yml/badge.svg?branch=main) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/dirac-cwl.svg)](https://anaconda.org/conda-forge/dirac-cwl) [![Conda Recipe](https://img.shields.io/badge/recipe-dirac--cwl-green.svg)](https://anaconda.org/conda-forge/dirac-cwl) +![Schema Generation](https://github.com/DIRACGrid/dirac-cwl/actions/workflows/generate-schemas.yml/badge.svg?branch=main) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/dirac-cwl.svg)](https://anaconda.org/conda-forge/dirac-cwl) [![Conda Recipe](https://img.shields.io/badge/recipe-dirac--cwl-green.svg)](https://anaconda.org/conda-forge/dirac-cwl) This Python prototype introduces a command-line interface (CLI) designed for the end-to-end execution of Common Workflow Language (CWL) workflows at different scales. It enables users to locally test CWL workflows, and then run them as jobs, transformations and/or productions. diff --git a/src/dirac_cwl/submission_models.py b/src/dirac_cwl/submission_models.py index 611a990a..f380430e 100644 --- a/src/dirac_cwl/submission_models.py +++ b/src/dirac_cwl/submission_models.py @@ -7,7 +7,7 @@ from __future__ import annotations -from typing import Any +from typing import Any, Optional from cwl_utils.parser import WorkflowStep, save from cwl_utils.parser.cwl_v1_2 import ( @@ -18,7 +18,7 @@ ) from pydantic import BaseModel, ConfigDict, field_serializer, model_validator -from dirac_cwl_proto.execution_hooks import ( +from dirac_cwl.execution_hooks import ( ExecutionHooksHint, SchedulingHint, TransformationExecutionHooksHint, @@ -40,37 +40,65 @@ class JobInputModel(BaseModel): @field_serializer("cwl") def serialize_cwl(self, value): + """Serialize CWL object to dictionary. + + :param value: CWL object to serialize. + :return: Serialized CWL dictionary. + """ return save(value) -class JobSubmissionModel(BaseModel): - """Job definition sent to the router.""" +class BaseJobModel(BaseModel): + """Base class for Job definition.""" # Allow arbitrary types to be passed to the model model_config = ConfigDict(arbitrary_types_allowed=True) task: CommandLineTool | Workflow | ExpressionTool - parameters: list[JobInputModel] | None = None - scheduling: SchedulingHint - execution_hooks: ExecutionHooksHint @model_validator(mode="before") def validate_job(cls, values): + """Validate job workflow. + + :param values: Model values dictionary. + :return: Validated values dictionary. + """ task = values.get("task") - # Validate Resource Requirement values of CWLObject, will raise ValueError if needed. + # ResourceRequirement validation validate_resource_requirements(task) + # Hints validation + ExecutionHooksHint.from_cwl(task), SchedulingHint.from_cwl(task) + return values @field_serializer("task") def serialize_task(self, value): + """Serialize CWL task object to dictionary. + + :param value: CWL task object to serialize. + :return: Serialized task dictionary. + :raises TypeError: If value is not a valid CWL task type. + """ if isinstance(value, (CommandLineTool, Workflow, ExpressionTool)): return save(value) else: raise TypeError(f"Cannot serialize type {type(value)}") +class JobSubmissionModel(BaseJobModel): + """Job definition sent to the router.""" + + inputs: list[JobInputModel] | None = None + + +class JobModel(BaseJobModel): + """Job definition sent to the job wrapper.""" + + input: Optional[JobInputModel] = None + + # ----------------------------------------------------------------------------- # Transformation models # ----------------------------------------------------------------------------- @@ -83,16 +111,31 @@ class TransformationSubmissionModel(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) task: CommandLineTool | Workflow | ExpressionTool - execution_hooks: TransformationExecutionHooksHint - scheduling: SchedulingHint @field_serializer("task") def serialize_task(self, value): + """Serialize CWL task object to dictionary. + + :param value: CWL task object to serialize. + :return: Serialized task dictionary. + :raises TypeError: If value is not a valid CWL task type. + """ if isinstance(value, (CommandLineTool, Workflow, ExpressionTool)): return save(value) else: raise TypeError(f"Cannot serialize type {type(value)}") + @model_validator(mode="before") + def validate_hints(cls, values): + """Validate transformation execution hooks and scheduling hints in the task. + + :param values: Model values dictionary. + :return: Validated values dictionary. + """ + task = values.get("task") + TransformationExecutionHooksHint.from_cwl(task), SchedulingHint.from_cwl(task) + return values + # ----------------------------------------------------------------------------- # Production models @@ -106,59 +149,33 @@ class ProductionSubmissionModel(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) task: Workflow - # Key: step name, Value: description & execution_hooks of a transformation - steps_execution_hooks: dict[str, TransformationExecutionHooksHint] - # Key: step name, Value: scheduling configuration for a transformation - steps_scheduling: dict[str, SchedulingHint] = {} - - @model_validator(mode="before") - def validate_production(cls, values): - task = values.get("task") - - # Metadata - steps_execution_hooks = values.get("steps_execution_hooks") - - if task and steps_execution_hooks: - # Extract the available steps in the task - task_steps = set([step.id.split("#")[-1] for step in task.steps]) - metadata_keys = set(steps_execution_hooks.keys()) - - # Check if all metadata keys exist in the task's workflow steps - missing_steps = metadata_keys - task_steps - if missing_steps: - raise ValueError( - f"The following steps are missing from the task workflow: {missing_steps}" - ) - - # ResourceRequirement - if any(req.class_ == "ResourceRequirement" for req in task.requirements): - raise ValueError( - "Global ResourceRequirement is not allowed in productions." - ) - - return values @field_serializer("task") def serialize_task(self, value): + """Serialize CWL workflow object to dictionary. + + :param value: CWL workflow object to serialize. + :return: Serialized workflow dictionary. + :raises TypeError: If value is not a valid CWL workflow type. + """ if isinstance(value, (ExpressionTool, CommandLineTool, Workflow)): return save(value) else: raise TypeError(f"Cannot serialize type {type(value)}") + @model_validator(mode="before") + def validate_production(cls, values): + """Validate production workflow.""" + task = values.get("task") -# ----------------------------------------------------------------------------- -# Module helpers -# ----------------------------------------------------------------------------- -def extract_dirac_hints(cwl: Any) -> tuple[ExecutionHooksHint, SchedulingHint]: - """Thin wrapper that returns (ExecutionHooksHint, SchedulingHint). + # ResourceRequirement validation + if any(req.class_ == "ResourceRequirement" for req in task.requirements): + raise ValueError("ResourceRequirement is not allowed at Production-level.") - Prefer the class-factory APIs `ExecutionHooksHint.from_cwl` and - `SchedulingHint.from_cwl` for new code. This helper remains for - convenience. - """ - return ExecutionHooksHint.from_cwl(cwl), SchedulingHint.from_cwl(cwl) + return values +# ResourceRequirement validations, temporary code, waiting on cwltool PR: https://github.com/common-workflow-language/cwltool/pull/2179. def validate_resource_requirements(task): """ Validate ResourceRequirements of a task (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run). @@ -190,11 +207,10 @@ def validate_resource_requirements(task): def validate_resource_requirement(requirement, cwl_req=None): - """ - Validate a ResourceRequirement. + """Validate a ResourceRequirement. + Verify: - that resourceMin is not higher than resourceMax (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run) - --> #TODO this should be done by cwl-utils/cwltool later - that resourceMin (WorkflowStep, WorkflowStep.run) is not higher than global (Workflow) resourceMax. :param requirement: The current ResourceRequirement to validate. @@ -202,17 +218,11 @@ def validate_resource_requirement(requirement, cwl_req=None): :raises ValueError: If the requirement is invalid. """ - def check_resource( - current_resource, req_min_value, req_max_value, global_max_value=None - ): + def check_resource(current_resource, req_min_value, req_max_value, global_max_value=None): if req_min_value and req_max_value and req_min_value > req_max_value: - raise ValueError( - f"{current_resource}Min is higher than {current_resource}Max" - ) + raise ValueError(f"{current_resource}Min is higher than {current_resource}Max") if global_max_value and req_min_value and req_min_value > global_max_value: - raise ValueError( - f"{current_resource}Min is higher than global {current_resource}Max" - ) + raise ValueError(f"{current_resource}Min is higher than global {current_resource}Max") for resource, min_value, max_value in [ ("ram", requirement.ramMin, requirement.ramMax), @@ -232,7 +242,7 @@ def get_resource_requirement( cwl_object: Workflow | CommandLineTool | WorkflowStep, ) -> ResourceRequirement | None: """ - Extract the resource requirement from the current cwl_object + Extract the resource requirement from the current cwl_object. :param cwl_object: The cwl_object to extract the requirement from. :return: The resource requirement object, or None if not found. diff --git a/src/dirac_cwl_proto/job/__init__.py b/src/dirac_cwl_proto/job/__init__.py deleted file mode 100644 index 5cfabcff..00000000 --- a/src/dirac_cwl_proto/job/__init__.py +++ /dev/null @@ -1,416 +0,0 @@ -""" -CLI interface to run a workflow as a job. -""" - -import logging -import random -import shutil -import subprocess -import tarfile -from pathlib import Path -from typing import Any, cast - -import typer -from cwl_utils.pack import pack -from cwl_utils.parser import load_document, save -from cwl_utils.parser.cwl_v1_2 import ( - CommandLineTool, - ExpressionTool, - File, - Saveable, - Workflow, -) -from cwl_utils.parser.cwl_v1_2_utils import load_inputfile -from rich import print_json -from rich.console import Console -from rich.text import Text -from ruamel.yaml import YAML -from schema_salad.exceptions import ValidationException - -from dirac_cwl_proto.execution_hooks import ( - ExecutionHooksBasePlugin, -) -from dirac_cwl_proto.submission_models import ( - JobInputModel, - JobSubmissionModel, - extract_dirac_hints, -) - -app = typer.Typer() -console = Console() - -# ----------------------------------------------------------------------------- -# dirac-cli commands -# ----------------------------------------------------------------------------- - - -@app.command("submit") -def submit_job_client( - task_path: str = typer.Argument(..., help="Path to the CWL file"), - parameter_path: list[str] - | None = typer.Option(None, help="Path to the files containing the metadata"), - # Specific parameter for the purpose of the prototype - local: bool - | None = typer.Option( - True, help="Run the job locally instead of submitting it to the router" - ), -): - """ - Correspond to the dirac-cli command to submit jobs - - This command will: - - Validate the workflow - - Start the jobs - """ - # Validate the workflow - console.print( - "[blue]:information_source:[/blue] [bold]CLI:[/bold] Validating the job(s)..." - ) - try: - task = load_document(pack(task_path)) - except FileNotFoundError as ex: - console.print( - f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to load the task:\n{ex}" - ) - return typer.Exit(code=1) - except ValidationException as ex: - console.print( - f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to validate the task:\n{ex}" - ) - return typer.Exit(code=1) - - console.print(f"\t[green]:heavy_check_mark:[/green] Task {task_path}") - - # Extract and validate dirac hints; unknown hints are logged as warnings. - try: - job_metadata, job_scheduling = extract_dirac_hints(task) - except Exception as exc: - console.print( - f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Invalid DIRAC hints:\n{exc}" - ) - return typer.Exit(code=1) - - console.print("\t[green]:heavy_check_mark:[/green] Metadata") - console.print("\t[green]:heavy_check_mark:[/green] Description") - - parameters = [] - if parameter_path: - for parameter_p in parameter_path: - try: - parameter = load_inputfile(parameter_p) - except Exception as ex: - console.print( - f"[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to validate the parameter:\n{ex}" - ) - return typer.Exit(code=1) - - overrides = parameter.pop("cwltool:overrides", {}) - if overrides: - override_hints = overrides[next(iter(overrides))].get("hints", {}) - if override_hints: - job_scheduling = job_scheduling.model_copy( - update=override_hints.pop("dirac:scheduling", {}) - ) - job_metadata = job_metadata.model_copy( - update=override_hints.pop("dirac:execution-hooks", {}) - ) - - # Upload the local files to the sandbox store - sandbox_id = upload_local_input_files(parameter) - - parameters.append( - JobInputModel( - sandbox=[sandbox_id] if sandbox_id else None, - cwl=parameter, - ) - ) - console.print( - f"\t[green]:heavy_check_mark:[/green] Parameter {parameter_p}" - ) - - job = JobSubmissionModel( - task=task, - parameters=parameters, - scheduling=job_scheduling, - execution_hooks=job_metadata, - ) - console.print( - "[green]:heavy_check_mark:[/green] [bold]CLI:[/bold] Job(s) validated." - ) - - # Submit the job - console.print( - "[blue]:information_source:[/blue] [bold]CLI:[/bold] Submitting the job(s) to service..." - ) - print_json(job.model_dump_json(indent=4)) - if not submit_job_router(job): - console.print( - "[red]:heavy_multiplication_x:[/red] [bold]CLI:[/bold] Failed to run job(s)." - ) - return typer.Exit(code=1) - console.print("[green]:heavy_check_mark:[/green] [bold]CLI:[/bold] Job(s) done.") - - -def upload_local_input_files(input_data: dict[str, Any]) -> str | None: - """ - Extract the files from the parameters. - - :param parameters: The parameters of the job - - :return: The list of files - """ - Path("sandboxstore").mkdir(exist_ok=True) - - # Get the files from the input data - files = [] - for _, input_value in input_data.items(): - if isinstance(input_value, list): - for item in input_value: - if isinstance(item, File): - files.append(item) - elif isinstance(input_value, File): - files.append(input_value) - - if not files: - return None - - # Tar the files and upload them to the file catalog - sandbox_path = ( - Path("sandboxstore") / f"input_sandbox_{random.randint(1000, 9999)}.tar.gz" - ) - with tarfile.open(sandbox_path, "w:gz") as tar: - for file in files: - # TODO: path is not the only attribute to consider, but so far it is the only one used - if not file.path: - raise NotImplementedError("File path is not defined.") - - file_path = Path(file.path.replace("file://", "")) - console.print( - f"\t\t[blue]:information_source:[/blue] Found {file_path} locally, uploading it to the sandbox store..." - ) - tar.add(file_path, arcname=file_path.name) - console.print( - f"\t\t[blue]:information_source:[/blue] File(s) will be available through {sandbox_path}" - ) - - # Modify the location of the files to point to the future location on the worker node - for file in files: - # TODO: path is not the only attribute to consider, but so far it is the only one used - if not file.path: - raise NotImplementedError("File path is not defined.") - - file.path = str(Path(".") / file.path.split("/")[-1]) - - sandbox_id = sandbox_path.name.replace(".tar.gz", "") - return sandbox_id - - -# ----------------------------------------------------------------------------- -# dirac-router commands -# ----------------------------------------------------------------------------- - - -def submit_job_router(job: JobSubmissionModel) -> bool: - """ - Execute a job using the router. - - :param job: The task to execute - - :return: True if the job executed successfully, False otherwise - """ - logger = logging.getLogger("JobRouter") - - # Validate the jobs - logger.info("Validating the job(s)...") - - # Initiate 1 job per parameter - jobs = [] - if not job.parameters: - jobs.append(job) - else: - for parameter in job.parameters: - jobs.append( - JobSubmissionModel( - task=job.task, - parameters=[parameter], - scheduling=job.scheduling, - execution_hooks=job.execution_hooks, - ) - ) - logger.info("Job(s) validated!") - - # Simulate the submission of the job (just execute the job locally) - logger.info("Running jobs...") - results = [] - for job in jobs: - logger.info("Running job:\n") - print_json(job.model_dump_json(indent=4)) - results.append(run_job(job)) - logger.info("Jobs done.") - - return all(results) - - -# ----------------------------------------------------------------------------- -# JobWrapper -# ----------------------------------------------------------------------------- -def _pre_process( - executable: CommandLineTool | Workflow | ExpressionTool, - arguments: JobInputModel | None, - runtime_metadata: ExecutionHooksBasePlugin | None, - job_path: Path, -) -> list[str]: - """ - Pre-process the job before execution. - - :return: True if the job is pre-processed successfully, False otherwise - """ - logger = logging.getLogger("JobWrapper - Pre-process") - - # Prepare the task for cwltool - logger.info("Preparing the task for cwltool...") - command = ["cwltool"] - - task_dict = save(executable) - task_path = job_path / "task.cwl" - with open(task_path, "w") as task_file: - YAML().dump(task_dict, task_file) - command.append(str(task_path.name)) - - if arguments: - if arguments.sandbox: - # Download the files from the sandbox store - logger.info("Downloading the files from the sandbox store...") - for sandbox in arguments.sandbox: - sandbox_path = Path("sandboxstore") / f"{sandbox}.tar.gz" - with tarfile.open(sandbox_path, "r:gz") as tar: - tar.extractall(job_path, filter="data") - logger.info("Files downloaded successfully!") - - # Download input data from the file catalog - logger.info("Downloading input data from the file catalog...") - input_data = [] - for _, input_value in arguments.cwl.items(): - input = input_value - if not isinstance(input_value, list): - input = [input_value] - - for item in input: - if not isinstance(item, File): - continue - - # TODO: path is not the only attribute to consider, but so far it is the only one used - if not item.path: - raise NotImplementedError("File path is not defined.") - - input_path = Path(item.path) - if "filecatalog" in input_path.parts: - input_data.append(item) - - for file in input_data: - # TODO: path is not the only attribute to consider, but so far it is the only one used - if not file.path: - raise NotImplementedError("File path is not defined.") - - input_path = Path(file.path) - shutil.copy(input_path, job_path / input_path.name) - file.path = file.path.split("/")[-1] - logger.info("Input data downloaded successfully!") - - # Prepare the parameters for cwltool - logger.info("Preparing the parameters for cwltool...") - parameter_dict = save(cast(Saveable, arguments.cwl)) - parameter_path = job_path / "parameter.cwl" - with open(parameter_path, "w") as parameter_file: - YAML().dump(parameter_dict, parameter_file) - command.append(str(parameter_path.name)) - if runtime_metadata: - return runtime_metadata.pre_process(job_path, command) - - return command - - -def _post_process( - status: int, - stdout: str, - stderr: str, - job_path: Path, - runtime_metadata: ExecutionHooksBasePlugin | None, -): - """ - Post-process the job after execution. - - :return: True if the job is post-processed successfully, False otherwise - """ - logger = logging.getLogger("JobWrapper - Post-process") - if status != 0: - raise RuntimeError(f"Error {status} during the task execution.") - - logger.info(stdout) - logger.info(stderr) - - if runtime_metadata: - return runtime_metadata.post_process(job_path) - - return True - - -def run_job(job: JobSubmissionModel) -> bool: - """ - Executes a given CWL workflow using cwltool. - This is the equivalent of the DIRAC JobWrapper. - - :return: True if the job is executed successfully, False otherwise - """ - logger = logging.getLogger("JobWrapper") - # Instantiate runtime metadata from the serializable descriptor and - # the job context so implementations can access task inputs/overrides. - runtime_metadata = ( - job.execution_hooks.to_runtime(job) if job.execution_hooks else None - ) - - # Isolate the job in a specific directory - job_path = Path(".") / "workernode" / f"{random.randint(1000, 9999)}" - job_path.mkdir(parents=True, exist_ok=True) - - try: - # Pre-process the job - logger.info("Pre-processing Task...") - command = _pre_process( - job.task, - job.parameters[0] if job.parameters else None, - runtime_metadata, - job_path, - ) - logger.info("Task pre-processed successfully!") - - # Execute the task - logger.info(f"Executing Task: {command}") - result = subprocess.run(command, capture_output=True, text=True, cwd=job_path) - - if result.returncode != 0: - logger.error( - f"Error in executing workflow:\n{Text.from_ansi(result.stderr)}" - ) - return False - logger.info("Task executed successfully!") - - # Post-process the job - logger.info("Post-processing Task...") - _post_process( - result.returncode, - result.stdout, - result.stderr, - job_path, - runtime_metadata, - ) - logger.info("Task post-processed successfully!") - return True - - except Exception: - logger.exception("JobWrapper: Failed to execute workflow") - return False - finally: - # Clean up - if job_path.exists(): - shutil.rmtree(job_path) diff --git a/test/test_resource_requirements.py b/test/test_resource_requirements.py new file mode 100644 index 00000000..796e8062 --- /dev/null +++ b/test/test_resource_requirements.py @@ -0,0 +1,165 @@ +"""Integration tests for CWL Resource Requirements validation.""" + +import pytest +from cwl_utils.parser.cwl_v1_2 import ( + CommandLineTool, + ResourceRequirement, + Workflow, + WorkflowStep, +) + +from dirac_cwl.submission_models import JobSubmissionModel, ProductionSubmissionModel, TransformationSubmissionModel + + +# Helper functions +def create_commandlinetool(requirements=None, inputs=None, outputs=None): + """Create a CommandLineTool with the given requirements, inputs, and outputs.""" + return CommandLineTool( + requirements=requirements or [], + inputs=inputs or [], + outputs=outputs or [], + ) + + +def create_workflow(requirements=None, steps=None, inputs=None, outputs=None): + """Create a Workflow with the given requirements, steps, inputs, and outputs.""" + return Workflow( + requirements=requirements or [], + steps=steps or [], + inputs=inputs or [], + outputs=outputs or [], + ) + + +def create_step(requirements=None, run=None, in_=None, out=None): + """Create a WorkflowStep with the given requirements, run, inputs, and outputs.""" + return WorkflowStep( + requirements=requirements or [], + run=run, + in_=in_ or [], + out=out or [], + ) + + +def submit_task(task): + """Submit failing tasks.""" + with pytest.raises(ValueError): + JobSubmissionModel(task=task) + TransformationSubmissionModel(task=task) + + +@pytest.mark.parametrize( + "bad_min_max_reqs", + [ + # cores + ResourceRequirement(coresMin=4, coresMax=2), + # ram + ResourceRequirement(ramMin=2048, ramMax=1024), + # tmpdir + ResourceRequirement(tmpdirMin=1024, tmpdirMax=512), + # outdir + ResourceRequirement(outdirMin=512, outdirMax=256), + ], +) +def test_bad_min_max_resource_reqs(bad_min_max_reqs): + """Test invalid min/max resource requirements in CWL objects.""" + # CommandlineTool with bad minmax reqs + clt = create_commandlinetool(requirements=[bad_min_max_reqs]) + submit_task(clt) + + # WorkflowStep.run with bad minmax reqs + step_bad_run = create_step(run=clt) + workflow = create_workflow(steps=[step_bad_run]) + submit_task(workflow) + + # WorkflowStep with bad minmax reqs + clt = create_commandlinetool() + step = create_step(run=clt, requirements=[bad_min_max_reqs]) + workflow = create_workflow(steps=[step]) + submit_task(workflow) + + # Workflow with bad minmax reqs + workflow = create_workflow(requirements=[bad_min_max_reqs]) + submit_task(workflow) + + # NestedWorkflow with bad minmax reqs + nest_workflow = create_workflow(requirements=[bad_min_max_reqs]) + step = create_step(run=nest_workflow) + workflow = create_workflow(steps=[step]) + submit_task(workflow) + + # DeepNestedWorkflow with bad minmax reqs + deep_workflow = create_workflow(requirements=[bad_min_max_reqs]) + deep_step = create_step(run=deep_workflow) + nest_workflow = create_workflow(steps=[deep_step]) + step = create_step(run=nest_workflow) + workflow = create_workflow(steps=[step]) + submit_task(workflow) + + +@pytest.mark.parametrize( + ("global_requirements", "higher_requirements"), + [ + # cores + ( + ResourceRequirement(coresMax=2), + ResourceRequirement(coresMin=4), + ), + # ram + ( + ResourceRequirement(ramMax=512), + ResourceRequirement(ramMin=1024), + ), + # tmpdir + ( + ResourceRequirement(tmpdirMax=512), + ResourceRequirement(tmpdirMin=1024), + ), + # outdir + (ResourceRequirement(outdirMax=256), ResourceRequirement(outdirMin=512)), + ], +) +def test_bad_global_requirements(global_requirements, higher_requirements): + """Test global requirements conflicts.""" + # Workflow - WorkflowStep conflict + step = create_step(requirements=[higher_requirements]) + workflow = create_workflow(requirements=[global_requirements], steps=[step]) + submit_task(workflow) + + # Workflow - WorkflowStep.run conflict + run = create_commandlinetool(requirements=[higher_requirements]) + step = create_step(run=run) + workflow = create_workflow(requirements=[global_requirements], steps=[step]) + submit_task(workflow) + + # Workflow - NestedWorkflow conflict + nest_workflow = create_workflow(requirements=[higher_requirements]) + step = create_step(run=nest_workflow) + workflow = create_workflow(requirements=[global_requirements], steps=[step]) + submit_task(workflow) + + +@pytest.mark.parametrize( + "requirements", + [ + # cores + ResourceRequirement(coresMin=2, coresMax=4), + # ram + ResourceRequirement(ramMin=1024, ramMax=2048), + # tmpdir + ResourceRequirement(tmpdirMin=512, tmpdirMax=1024), + # outdir + ResourceRequirement(outdirMin=256, outdirMax=512), + ], +) +def test_production_requirements(requirements): + """Test production case requirements.""" + # Production workflows can't have global requirements + workflow = create_workflow(requirements=[requirements]) + with pytest.raises(ValueError): + ProductionSubmissionModel(task=workflow) + + # Production workflows can have step requirements + step = create_step(requirements=[requirements]) + workflow = create_workflow(steps=[step]) + ProductionSubmissionModel(task=workflow) diff --git a/test/test_workflows.py b/test/test_workflows.py index 5fa8fea3..a9ec8faf 100644 --- a/test/test_workflows.py +++ b/test/test_workflows.py @@ -5,12 +5,6 @@ from pathlib import Path import pytest -from cwl_utils.parser.cwl_v1_2 import ( - CommandLineTool, - ResourceRequirement, - Workflow, - WorkflowStep, -) from typer.testing import CliRunner from dirac_cwl_proto import app @@ -703,173 +697,3 @@ def test_run_production_validation_failure( f"Expected error '{expected_error}' not found in " f"stdout: {clean_output}, stderr: {clean_stderr}, exception: {clean_exception}" ) - - -# ----------------------------------------------------------------------------- -# Resource Requirements tests -# ----------------------------------------------------------------------------- - - -# Helper functions -def create_commandlinetool(requirements=None, inputs=None, outputs=None): - return CommandLineTool( - requirements=requirements or [], - inputs=inputs or [], - outputs=outputs or [], - ) - - -def create_workflow(requirements=None, steps=None, inputs=None, outputs=None): - return Workflow( - requirements=requirements or [], - steps=steps or [], - inputs=inputs or [], - outputs=outputs or [], - ) - - -def create_step(requirements=None, run=None, in_=None, out=None): - return WorkflowStep( - requirements=requirements or [], - run=run, - in_=in_ or [], - out=out or [], - ) - - -def submit_task(task): - with pytest.raises(ValueError): - JobSubmissionModel( - task=task, execution_hooks=ExecutionHooksHint(), scheduling=SchedulingHint() - ) - TransformationSubmissionModel( - task=task, - execution_hooks=TransformationExecutionHooksHint(), - scheduling=SchedulingHint(), - ) - - -@pytest.mark.parametrize( - "bad_min_max_reqs", - [ - # cores - ResourceRequirement(coresMin=4, coresMax=2), - # ram - ResourceRequirement(ramMin=2048, ramMax=1024), - # tmpdir - ResourceRequirement(tmpdirMin=1024, tmpdirMax=512), - # outdir - ResourceRequirement(outdirMin=512, outdirMax=256), - ], -) -def test_bad_min_max_resource_reqs(bad_min_max_reqs): - """ - Test invalid min/max resource requirements in CWL objects. - """ - - # CommandlineTool with bad minmax reqs - clt = create_commandlinetool(requirements=[bad_min_max_reqs]) - submit_task(clt) - - # WorkflowStep.run with bad minmax reqs - step_bad_run = create_step(run=clt) - workflow = create_workflow(steps=[step_bad_run]) - submit_task(workflow) - - # WorkflowStep with bad minmax reqs - clt = create_commandlinetool() - step = create_step(run=clt, requirements=[bad_min_max_reqs]) - workflow = create_workflow(steps=[step]) - submit_task(workflow) - - # Workflow with bad minmax reqs - workflow = create_workflow(requirements=[bad_min_max_reqs]) - submit_task(workflow) - - # NestedWorkflow with bad minmax reqs - nest_workflow = create_workflow(requirements=[bad_min_max_reqs]) - step = create_step(run=nest_workflow) - workflow = create_workflow(steps=[step]) - submit_task(workflow) - - # DeepNestedWorkflow with bad minmax reqs - deep_workflow = create_workflow(requirements=[bad_min_max_reqs]) - deep_step = create_step(run=deep_workflow) - nest_workflow = create_workflow(steps=[deep_step]) - step = create_step(run=nest_workflow) - workflow = create_workflow(steps=[step]) - submit_task(workflow) - - -@pytest.mark.parametrize( - ("global_requirements", "higher_requirements"), - [ - # cores - ( - ResourceRequirement(coresMax=2), - ResourceRequirement(coresMin=4), - ), - # ram - ( - ResourceRequirement(ramMax=512), - ResourceRequirement(ramMin=1024), - ), - # tmpdir - ( - ResourceRequirement(tmpdirMax=512), - ResourceRequirement(tmpdirMin=1024), - ), - # outdir - (ResourceRequirement(outdirMax=256), ResourceRequirement(outdirMin=512)), - ], -) -def test_bad_global_requirements(global_requirements, higher_requirements): - """ - Test global requirements conflicts. - """ - - # Workflow - WorkflowStep conflict - step = create_step(requirements=[higher_requirements]) - workflow = create_workflow(requirements=[global_requirements], steps=[step]) - submit_task(workflow) - - # Workflow - WorkflowStep.run conflict - run = create_commandlinetool(requirements=[higher_requirements]) - step = create_step(run=run) - workflow = create_workflow(requirements=[global_requirements], steps=[step]) - submit_task(workflow) - - # Workflow - NestedWorkflow conflict - nest_workflow = create_workflow(requirements=[higher_requirements]) - step = create_step(run=nest_workflow) - workflow = create_workflow(requirements=[global_requirements], steps=[step]) - submit_task(workflow) - - -@pytest.mark.parametrize( - "requirements", - [ - # cores - ResourceRequirement(coresMin=2, coresMax=4), - # ram - ResourceRequirement(ramMin=1024, ramMax=2048), - # tmpdir - ResourceRequirement(tmpdirMin=512, tmpdirMax=1024), - # outdir - ResourceRequirement(outdirMin=256, outdirMax=512), - ], -) -def test_production_requirements(requirements): - """ - Test production case requirements. - """ - - # Production workflows can't have global requirements - workflow = create_workflow(requirements=[requirements]) - with pytest.raises(ValueError): - ProductionSubmissionModel(task=workflow, steps_execution_hooks={}) - - # Production workflows can have step requirements - step = create_step(requirements=[requirements]) - workflow = create_workflow(steps=[step]) - ProductionSubmissionModel(task=workflow, steps_execution_hooks={}) From f7b90acc8468acf553d00be783cae556c1c986d8 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Tue, 17 Feb 2026 11:52:32 +0100 Subject: [PATCH 15/24] docs: updated docstring --- src/dirac_cwl/submission_models.py | 40 +++++++++++++++++++----------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/dirac_cwl/submission_models.py b/src/dirac_cwl/submission_models.py index f380430e..546d4ae2 100644 --- a/src/dirac_cwl/submission_models.py +++ b/src/dirac_cwl/submission_models.py @@ -175,25 +175,30 @@ def validate_production(cls, values): return values -# ResourceRequirement validations, temporary code, waiting on cwltool PR: https://github.com/common-workflow-language/cwltool/pull/2179. +# ----------------------------------------------------------------------------- +# ResourceRequirement validations +# ----------------------------------------------------------------------------- +# Temporary code, waiting on cwltool PR: https://github.com/common-workflow-language/cwltool/pull/2179. + + def validate_resource_requirements(task): """ Validate ResourceRequirements of a task (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run). :param task: The task to validate """ - cwl_req = get_resource_requirement(task) + cwl_req = _get_resource_requirement(task) # Validate Workflow/CLT requirements. if cwl_req: - validate_resource_requirement(cwl_req) + _validate_resource_requirement(cwl_req) # Validate WorkflowStep requirements. if not isinstance(task, CommandLineTool) and task.steps: for step in task.steps: - step_req = get_resource_requirement(step) + step_req = _get_resource_requirement(step) if step_req: - validate_resource_requirement(step_req, cwl_req=cwl_req) + _validate_resource_requirement(step_req, cwl_req=cwl_req) # Validate run requirements for each step if they exist. if step.run: @@ -201,27 +206,34 @@ def validate_resource_requirements(task): # Validate nested Workflow requirements, if any. validate_resource_requirements(task=step.run) - step_run_req = get_resource_requirement(step.run) + step_run_req = _get_resource_requirement(step.run) if step_run_req: - validate_resource_requirement(step_run_req, cwl_req=cwl_req) + _validate_resource_requirement(step_run_req, cwl_req=cwl_req) -def validate_resource_requirement(requirement, cwl_req=None): +def _validate_resource_requirement(requirement, cwl_req=None): """Validate a ResourceRequirement. Verify: - that resourceMin is not higher than resourceMax (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run) - - that resourceMin (WorkflowStep, WorkflowStep.run) is not higher than global (Workflow) resourceMax. + - that resourceMin (WorkflowStep, WorkflowStep.run) is not higher than the Workflow-level resourceMax. :param requirement: The current ResourceRequirement to validate. - :param cwl_req: The global Workflow/CLT requirement, if any. + :param cwl_req: The Workflow-level/CLT requirement, if any. :raises ValueError: If the requirement is invalid. """ - def check_resource(current_resource, req_min_value, req_max_value, global_max_value=None): + def _check_resource(current_resource, req_min_value, req_max_value, wf_req_max_value=None): + """Check single resource requirement values. + + :param current_resource: The current checked resource (ram, cores, tmpdir, outdir). + :param req_min_value: The current resourceMin value. + :param req_max_value: The current resourceMax value. + :param wf_req_max_value: The Workflow-level resourceMax value, if any. + """ if req_min_value and req_max_value and req_min_value > req_max_value: raise ValueError(f"{current_resource}Min is higher than {current_resource}Max") - if global_max_value and req_min_value and req_min_value > global_max_value: + if wf_req_max_value and req_min_value and req_min_value > wf_req_max_value: raise ValueError(f"{current_resource}Min is higher than global {current_resource}Max") for resource, min_value, max_value in [ @@ -230,7 +242,7 @@ def check_resource(current_resource, req_min_value, req_max_value, global_max_va ("tmpdir", requirement.tmpdirMin, requirement.tmpdirMax), ("outdir", requirement.outdirMin, requirement.outdirMax), ]: - check_resource( + _check_resource( resource, min_value, max_value, @@ -238,7 +250,7 @@ def check_resource(current_resource, req_min_value, req_max_value, global_max_va ) -def get_resource_requirement( +def _get_resource_requirement( cwl_object: Workflow | CommandLineTool | WorkflowStep, ) -> ResourceRequirement | None: """ From a9bad2564acdf4015b41942c02d3e4404f2c131c Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Tue, 17 Feb 2026 14:02:48 +0100 Subject: [PATCH 16/24] chore: added validation to transformations --- src/dirac_cwl/submission_models.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/dirac_cwl/submission_models.py b/src/dirac_cwl/submission_models.py index 546d4ae2..ae8305d5 100644 --- a/src/dirac_cwl/submission_models.py +++ b/src/dirac_cwl/submission_models.py @@ -126,14 +126,20 @@ def serialize_task(self, value): raise TypeError(f"Cannot serialize type {type(value)}") @model_validator(mode="before") - def validate_hints(cls, values): - """Validate transformation execution hooks and scheduling hints in the task. + def validate_transformation(cls, values): + """Validate transformation workflow. :param values: Model values dictionary. :return: Validated values dictionary. """ task = values.get("task") + + # ResourceRequirement validation + validate_resource_requirements(task) + + # Hints validation TransformationExecutionHooksHint.from_cwl(task), SchedulingHint.from_cwl(task) + return values From 62bccef35ac2937740ac48a9147b251bd2a1f59b Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Tue, 17 Feb 2026 14:02:58 +0100 Subject: [PATCH 17/24] fix: fixed tests and added docstring --- test/test_resource_requirements.py | 102 ++++++++++++++--------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/test/test_resource_requirements.py b/test/test_resource_requirements.py index 796e8062..6955a421 100644 --- a/test/test_resource_requirements.py +++ b/test/test_resource_requirements.py @@ -1,18 +1,22 @@ """Integration tests for CWL Resource Requirements validation.""" +from typing import List, Optional + import pytest -from cwl_utils.parser.cwl_v1_2 import ( - CommandLineTool, - ResourceRequirement, - Workflow, - WorkflowStep, -) +from cwl_utils.parser.cwl_v1_2 import CommandLineTool, ResourceRequirement, Workflow, WorkflowStep from dirac_cwl.submission_models import JobSubmissionModel, ProductionSubmissionModel, TransformationSubmissionModel - +# ----------------------------------------------------------------------------- # Helper functions -def create_commandlinetool(requirements=None, inputs=None, outputs=None): +# ----------------------------------------------------------------------------- + + +def create_commandlinetool( + requirements: Optional[list] = None, + inputs: Optional[list] = None, + outputs: Optional[list] = None, +) -> CommandLineTool: """Create a CommandLineTool with the given requirements, inputs, and outputs.""" return CommandLineTool( requirements=requirements or [], @@ -21,7 +25,12 @@ def create_commandlinetool(requirements=None, inputs=None, outputs=None): ) -def create_workflow(requirements=None, steps=None, inputs=None, outputs=None): +def create_workflow( + requirements: Optional[list] = None, + steps: Optional[List[WorkflowStep]] = None, + inputs: Optional[list] = None, + outputs: Optional[list] = None, +) -> Workflow: """Create a Workflow with the given requirements, steps, inputs, and outputs.""" return Workflow( requirements=requirements or [], @@ -31,7 +40,12 @@ def create_workflow(requirements=None, steps=None, inputs=None, outputs=None): ) -def create_step(requirements=None, run=None, in_=None, out=None): +def create_step( + requirements: Optional[list] = None, + run: Optional[CommandLineTool | Workflow] = None, + in_: Optional[list] = None, + out: Optional[list] = None, +) -> WorkflowStep: """Create a WorkflowStep with the given requirements, run, inputs, and outputs.""" return WorkflowStep( requirements=requirements or [], @@ -41,23 +55,26 @@ def create_step(requirements=None, run=None, in_=None, out=None): ) -def submit_task(task): - """Submit failing tasks.""" +def assert_submission_fails(task): + """Assert that submission fails with ValueError for Job and Transformation models with bad resource requirements. + + :param: CWL task to submit (Workflow, WorkflowStep, CommandLineTool, etc.) + """ with pytest.raises(ValueError): JobSubmissionModel(task=task) + with pytest.raises(ValueError): TransformationSubmissionModel(task=task) +# ----------------------------------------------------------------------------- +# Resource requirements tests +# ----------------------------------------------------------------------------- @pytest.mark.parametrize( "bad_min_max_reqs", [ - # cores ResourceRequirement(coresMin=4, coresMax=2), - # ram ResourceRequirement(ramMin=2048, ramMax=1024), - # tmpdir ResourceRequirement(tmpdirMin=1024, tmpdirMax=512), - # outdir ResourceRequirement(outdirMin=512, outdirMax=256), ], ) @@ -65,28 +82,28 @@ def test_bad_min_max_resource_reqs(bad_min_max_reqs): """Test invalid min/max resource requirements in CWL objects.""" # CommandlineTool with bad minmax reqs clt = create_commandlinetool(requirements=[bad_min_max_reqs]) - submit_task(clt) + assert_submission_fails(clt) # WorkflowStep.run with bad minmax reqs step_bad_run = create_step(run=clt) workflow = create_workflow(steps=[step_bad_run]) - submit_task(workflow) + assert_submission_fails(workflow) # WorkflowStep with bad minmax reqs clt = create_commandlinetool() step = create_step(run=clt, requirements=[bad_min_max_reqs]) workflow = create_workflow(steps=[step]) - submit_task(workflow) + assert_submission_fails(workflow) # Workflow with bad minmax reqs workflow = create_workflow(requirements=[bad_min_max_reqs]) - submit_task(workflow) + assert_submission_fails(workflow) # NestedWorkflow with bad minmax reqs nest_workflow = create_workflow(requirements=[bad_min_max_reqs]) step = create_step(run=nest_workflow) workflow = create_workflow(steps=[step]) - submit_task(workflow) + assert_submission_fails(workflow) # DeepNestedWorkflow with bad minmax reqs deep_workflow = create_workflow(requirements=[bad_min_max_reqs]) @@ -94,67 +111,50 @@ def test_bad_min_max_resource_reqs(bad_min_max_reqs): nest_workflow = create_workflow(steps=[deep_step]) step = create_step(run=nest_workflow) workflow = create_workflow(steps=[step]) - submit_task(workflow) + assert_submission_fails(workflow) @pytest.mark.parametrize( - ("global_requirements", "higher_requirements"), + ("wf_level_requirements", "higher_requirements"), [ - # cores - ( - ResourceRequirement(coresMax=2), - ResourceRequirement(coresMin=4), - ), - # ram - ( - ResourceRequirement(ramMax=512), - ResourceRequirement(ramMin=1024), - ), - # tmpdir - ( - ResourceRequirement(tmpdirMax=512), - ResourceRequirement(tmpdirMin=1024), - ), - # outdir + (ResourceRequirement(coresMax=2), ResourceRequirement(coresMin=4)), + (ResourceRequirement(ramMax=512), ResourceRequirement(ramMin=1024)), + (ResourceRequirement(tmpdirMax=512), ResourceRequirement(tmpdirMin=1024)), (ResourceRequirement(outdirMax=256), ResourceRequirement(outdirMin=512)), ], ) -def test_bad_global_requirements(global_requirements, higher_requirements): +def test_bad_wf_level_requirements(wf_level_requirements, higher_requirements): """Test global requirements conflicts.""" # Workflow - WorkflowStep conflict step = create_step(requirements=[higher_requirements]) - workflow = create_workflow(requirements=[global_requirements], steps=[step]) - submit_task(workflow) + workflow = create_workflow(requirements=[wf_level_requirements], steps=[step]) + assert_submission_fails(workflow) # Workflow - WorkflowStep.run conflict run = create_commandlinetool(requirements=[higher_requirements]) step = create_step(run=run) - workflow = create_workflow(requirements=[global_requirements], steps=[step]) - submit_task(workflow) + workflow = create_workflow(requirements=[wf_level_requirements], steps=[step]) + assert_submission_fails(workflow) # Workflow - NestedWorkflow conflict nest_workflow = create_workflow(requirements=[higher_requirements]) step = create_step(run=nest_workflow) - workflow = create_workflow(requirements=[global_requirements], steps=[step]) - submit_task(workflow) + workflow = create_workflow(requirements=[wf_level_requirements], steps=[step]) + assert_submission_fails(workflow) @pytest.mark.parametrize( "requirements", [ - # cores ResourceRequirement(coresMin=2, coresMax=4), - # ram ResourceRequirement(ramMin=1024, ramMax=2048), - # tmpdir ResourceRequirement(tmpdirMin=512, tmpdirMax=1024), - # outdir ResourceRequirement(outdirMin=256, outdirMax=512), ], ) def test_production_requirements(requirements): """Test production case requirements.""" - # Production workflows can't have global requirements + # Production workflows can't have Workflow-level requirements workflow = create_workflow(requirements=[requirements]) with pytest.raises(ValueError): ProductionSubmissionModel(task=workflow) From b28993aff8d6f2e6f020334e11017cf2cef4652a Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Tue, 3 Mar 2026 10:37:04 +0100 Subject: [PATCH 18/24] fix: added ResReq validation for Production --- src/dirac_cwl/submission_models.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/dirac_cwl/submission_models.py b/src/dirac_cwl/submission_models.py index ae8305d5..3d0a0034 100644 --- a/src/dirac_cwl/submission_models.py +++ b/src/dirac_cwl/submission_models.py @@ -175,8 +175,7 @@ def validate_production(cls, values): task = values.get("task") # ResourceRequirement validation - if any(req.class_ == "ResourceRequirement" for req in task.requirements): - raise ValueError("ResourceRequirement is not allowed at Production-level.") + validate_resource_requirements(task) return values @@ -267,6 +266,6 @@ def _get_resource_requirement( """ requirements = getattr(cwl_object, "requirements", []) or [] for requirement in requirements: - if requirement.class_ == "ResourceRequirement": + if isinstance(requirement, ResourceRequirement): return requirement return None From 4e20817c9bcfee78626a84ffe463def7869398e5 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Tue, 3 Mar 2026 10:43:32 +0100 Subject: [PATCH 19/24] fix: removed validation case --- src/dirac_cwl/submission_models.py | 33 ++++++------------------------ 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/src/dirac_cwl/submission_models.py b/src/dirac_cwl/submission_models.py index 3d0a0034..36c75513 100644 --- a/src/dirac_cwl/submission_models.py +++ b/src/dirac_cwl/submission_models.py @@ -203,7 +203,7 @@ def validate_resource_requirements(task): for step in task.steps: step_req = _get_resource_requirement(step) if step_req: - _validate_resource_requirement(step_req, cwl_req=cwl_req) + _validate_resource_requirement(step_req) # Validate run requirements for each step if they exist. if step.run: @@ -213,46 +213,25 @@ def validate_resource_requirements(task): step_run_req = _get_resource_requirement(step.run) if step_run_req: - _validate_resource_requirement(step_run_req, cwl_req=cwl_req) + _validate_resource_requirement(step_run_req) -def _validate_resource_requirement(requirement, cwl_req=None): +def _validate_resource_requirement(requirement): """Validate a ResourceRequirement. - Verify: - - that resourceMin is not higher than resourceMax (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run) - - that resourceMin (WorkflowStep, WorkflowStep.run) is not higher than the Workflow-level resourceMax. + Verify that resourceMin is not higher than resourceMax (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run) :param requirement: The current ResourceRequirement to validate. - :param cwl_req: The Workflow-level/CLT requirement, if any. :raises ValueError: If the requirement is invalid. """ - - def _check_resource(current_resource, req_min_value, req_max_value, wf_req_max_value=None): - """Check single resource requirement values. - - :param current_resource: The current checked resource (ram, cores, tmpdir, outdir). - :param req_min_value: The current resourceMin value. - :param req_max_value: The current resourceMax value. - :param wf_req_max_value: The Workflow-level resourceMax value, if any. - """ - if req_min_value and req_max_value and req_min_value > req_max_value: - raise ValueError(f"{current_resource}Min is higher than {current_resource}Max") - if wf_req_max_value and req_min_value and req_min_value > wf_req_max_value: - raise ValueError(f"{current_resource}Min is higher than global {current_resource}Max") - for resource, min_value, max_value in [ ("ram", requirement.ramMin, requirement.ramMax), ("cores", requirement.coresMin, requirement.coresMax), ("tmpdir", requirement.tmpdirMin, requirement.tmpdirMax), ("outdir", requirement.outdirMin, requirement.outdirMax), ]: - _check_resource( - resource, - min_value, - max_value, - cwl_req and getattr(cwl_req, f"{resource}Max"), - ) + if min_value and max_value and min_value > max_value: + raise ValueError(f"{resource}Min is higher than {resource}Max") def _get_resource_requirement( From be8ac30552fd875470a5ea77b39b10fe3075d65a Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Tue, 3 Mar 2026 10:43:38 +0100 Subject: [PATCH 20/24] fix: fixed tests --- test/test_resource_requirements.py | 53 ++---------------------------- 1 file changed, 2 insertions(+), 51 deletions(-) diff --git a/test/test_resource_requirements.py b/test/test_resource_requirements.py index 6955a421..6c76e9d9 100644 --- a/test/test_resource_requirements.py +++ b/test/test_resource_requirements.py @@ -64,6 +64,8 @@ def assert_submission_fails(task): JobSubmissionModel(task=task) with pytest.raises(ValueError): TransformationSubmissionModel(task=task) + with pytest.raises(ValueError): + ProductionSubmissionModel(task=task) # ----------------------------------------------------------------------------- @@ -112,54 +114,3 @@ def test_bad_min_max_resource_reqs(bad_min_max_reqs): step = create_step(run=nest_workflow) workflow = create_workflow(steps=[step]) assert_submission_fails(workflow) - - -@pytest.mark.parametrize( - ("wf_level_requirements", "higher_requirements"), - [ - (ResourceRequirement(coresMax=2), ResourceRequirement(coresMin=4)), - (ResourceRequirement(ramMax=512), ResourceRequirement(ramMin=1024)), - (ResourceRequirement(tmpdirMax=512), ResourceRequirement(tmpdirMin=1024)), - (ResourceRequirement(outdirMax=256), ResourceRequirement(outdirMin=512)), - ], -) -def test_bad_wf_level_requirements(wf_level_requirements, higher_requirements): - """Test global requirements conflicts.""" - # Workflow - WorkflowStep conflict - step = create_step(requirements=[higher_requirements]) - workflow = create_workflow(requirements=[wf_level_requirements], steps=[step]) - assert_submission_fails(workflow) - - # Workflow - WorkflowStep.run conflict - run = create_commandlinetool(requirements=[higher_requirements]) - step = create_step(run=run) - workflow = create_workflow(requirements=[wf_level_requirements], steps=[step]) - assert_submission_fails(workflow) - - # Workflow - NestedWorkflow conflict - nest_workflow = create_workflow(requirements=[higher_requirements]) - step = create_step(run=nest_workflow) - workflow = create_workflow(requirements=[wf_level_requirements], steps=[step]) - assert_submission_fails(workflow) - - -@pytest.mark.parametrize( - "requirements", - [ - ResourceRequirement(coresMin=2, coresMax=4), - ResourceRequirement(ramMin=1024, ramMax=2048), - ResourceRequirement(tmpdirMin=512, tmpdirMax=1024), - ResourceRequirement(outdirMin=256, outdirMax=512), - ], -) -def test_production_requirements(requirements): - """Test production case requirements.""" - # Production workflows can't have Workflow-level requirements - workflow = create_workflow(requirements=[requirements]) - with pytest.raises(ValueError): - ProductionSubmissionModel(task=workflow) - - # Production workflows can have step requirements - step = create_step(requirements=[requirements]) - workflow = create_workflow(steps=[step]) - ProductionSubmissionModel(task=workflow) From 78f8653a1f9a238768e82d39adfdbc631cf55fef Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Tue, 3 Mar 2026 10:45:23 +0100 Subject: [PATCH 21/24] fix: pixi.lock --- pixi.lock | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pixi.lock b/pixi.lock index 76a3099b..4947ac9d 100644 --- a/pixi.lock +++ b/pixi.lock @@ -5,8 +5,6 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -1458,7 +1456,7 @@ packages: requires_python: '>=3.11' - pypi: ./ name: dirac-cwl - version: 1.1.2.dev3+g773310367 + version: 1.2.1.dev22+gf60c9bf3b sha256: 0b624b7b1adb33bc3b4cb29dbf85bb2db495122acda95aaa775f0aedef77767f requires_dist: - cwl-utils @@ -1480,6 +1478,7 @@ packages: - pytest-mock ; extra == 'testing' - mypy ; extra == 'testing' requires_python: '>=3.11' + editable: true - pypi: https://files.pythonhosted.org/packages/9f/90/279f55fff9481f9e0424c3c97b24dc10004ec8d8f98ddf5afd07a7b79194/diraccfg-1.0.1-py2.py3-none-any.whl name: diraccfg version: 1.0.1 From b3d16d703a4891e6933a5150a372220e6b4a14c2 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Tue, 3 Mar 2026 11:41:12 +0100 Subject: [PATCH 22/24] fix: pixi.lock --- pixi.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pixi.lock b/pixi.lock index 4947ac9d..23324782 100644 --- a/pixi.lock +++ b/pixi.lock @@ -1456,7 +1456,7 @@ packages: requires_python: '>=3.11' - pypi: ./ name: dirac-cwl - version: 1.2.1.dev22+gf60c9bf3b + version: 1.2.1.dev22+gf60c9bf3b.d20260303 sha256: 0b624b7b1adb33bc3b4cb29dbf85bb2db495122acda95aaa775f0aedef77767f requires_dist: - cwl-utils From cd29c0f67efa7f7b0e88e027d11c605e3ec79f54 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Fri, 6 Mar 2026 14:16:43 +0100 Subject: [PATCH 23/24] fix: fixed mypy --- src/dirac_cwl/production/__init__.py | 1 - test/test_workflows.py | 505 +++++++++++---------------- 2 files changed, 207 insertions(+), 299 deletions(-) diff --git a/src/dirac_cwl/production/__init__.py b/src/dirac_cwl/production/__init__.py index a74533ca..83281031 100644 --- a/src/dirac_cwl/production/__init__.py +++ b/src/dirac_cwl/production/__init__.py @@ -128,7 +128,6 @@ def submit_production_router(production: ProductionSubmissionModel) -> bool: # Validate the transformation logger.info("Validating the production...") - # Already validated by the pydantic model logger.info("Production validated!") diff --git a/test/test_workflows.py b/test/test_workflows.py index a9ec8faf..c177fbaf 100644 --- a/test/test_workflows.py +++ b/test/test_workflows.py @@ -1,5 +1,10 @@ +"""Integration tests for CWL workflow execution.""" + +import os +import platform import re import shutil +import subprocess import threading import time from pathlib import Path @@ -7,17 +12,8 @@ import pytest from typer.testing import CliRunner -from dirac_cwl_proto import app -from dirac_cwl_proto.execution_hooks import ( - ExecutionHooksHint, - SchedulingHint, - TransformationExecutionHooksHint, -) -from dirac_cwl_proto.submission_models import ( - JobSubmissionModel, - ProductionSubmissionModel, - TransformationSubmissionModel, -) +from dirac_cwl import app +from dirac_cwl.data_management_mocks.sandbox import SANDBOX_STORE_DIR, download_sandbox def strip_ansi_codes(text: str) -> str: @@ -27,11 +23,14 @@ def strip_ansi_codes(text: str) -> str: @pytest.fixture() def cli_runner(): + """Provide a Typer CLI runner for testing.""" return CliRunner() @pytest.fixture() def cleanup(): + """Provide a cleanup function to remove test directories.""" + def _cleanup(): shutil.rmtree("filecatalog", ignore_errors=True) shutil.rmtree("sandboxstore", ignore_errors=True) @@ -42,6 +41,30 @@ def _cleanup(): _cleanup() +@pytest.fixture() +def pi_test_files(): + """Create test files needed for pi workflow tests.""" + # Create job input files + job_dir = Path("test/workflows/pi/type_dependencies/job") + job_dir.mkdir(parents=True, exist_ok=True) + + # Create result files for pi-gather job test (result_1.sim through result_5.sim) + result_files = [] + for i in range(1, 6): + result_file = job_dir / f"result_{i}.sim" + with open(result_file, "w") as f: + # Create different sample data for each file + f.write(f"0.{i} 0.{i + 1}\n-0.{i + 2} 0.{i + 3}\n0.{i + 4} -0.{i + 5}\n") + result_files.append(result_file) + + yield + + # Cleanup - remove created files + for result_file in result_files: + if result_file.exists(): + result_file.unlink() + + # ----------------------------------------------------------------------------- # Job tests # ----------------------------------------------------------------------------- @@ -58,9 +81,7 @@ def _cleanup(): # A string input is passed ( "test/workflows/helloworld/description_with_inputs.cwl", - [ - "test/workflows/helloworld/type_dependencies/job/inputs-helloworld_with_inputs1.yaml" - ], + ["test/workflows/helloworld/type_dependencies/job/inputs-helloworld_with_inputs1.yaml"], ), # Multiple string inputs are passed ( @@ -73,12 +94,12 @@ def _cleanup(): ), # --- Test metadata example --- # A string input is passed - ( - "test/workflows/test_meta/test_meta.cwl", - [ - "test/workflows/test_meta/override_dirac_hints.yaml", - ], - ), + ("test/workflows/test_meta/test_meta.cwl", []), + # --- Test outputs hints example --- + # Output to filecatalog + ("test/workflows/test_outputs/test_outputs.cwl", []), + # Output to sandbox + ("test/workflows/test_outputs/test_outputs_sandbox.cwl", []), # --- Crypto example --- # Complete ( @@ -106,7 +127,7 @@ def _cleanup(): ["test/workflows/crypto/type_dependencies/job/inputs-crypto_complete.yaml"], ), # --- Pi example --- - # Complete + # Complete workflow ( "test/workflows/pi/description.cwl", ["test/workflows/pi/type_dependencies/job/inputs-pi_complete.yaml"], @@ -118,60 +139,10 @@ def _cleanup(): "test/workflows/pi/pigather.cwl", ["test/workflows/pi/type_dependencies/job/inputs-pi_gather.yaml"], ), - # --- Merge example --- - # Complete - ("test/workflows/merge/description.cwl", []), - # --- LHCb example --- - # Complete - ( - "test/workflows/lhcb/description.cwl", - ["test/workflows/lhcb/type_dependencies/job/inputs-lhcb_complete.yaml"], - ), - # Simulate only - ( - "test/workflows/lhcb/lhcbsimulate.cwl", - ["test/workflows/lhcb/type_dependencies/job/inputs-lhcb_simulate.yaml"], - ), - # Reconstruct only - ( - "test/workflows/lhcb/lhcbreconstruct.cwl", - ["test/workflows/lhcb/type_dependencies/job/inputs-lhcb_reconstruct.yaml"], - ), - # --- Mandelbrot example --- - # Complete - ( - "test/workflows/mandelbrot/description.cwl", - [ - "test/workflows/mandelbrot/type_dependencies/job/inputs-mandelbrot_complete.yaml" - ], - ), - # Image production only - ("test/workflows/mandelbrot/image-prod.cwl", []), - # Image merge only - ( - "test/workflows/mandelbrot/image-merge.cwl", - [ - "test/workflows/mandelbrot/type_dependencies/job/inputs-mandelbrot_imagemerge.yaml" - ], - ), - # --- Gaussian fit example --- - # Data generation only - ( - "test/workflows/gaussian_fit/data_generation/data-generation.cwl", - [ - "test/workflows/gaussian_fit/type_dependencies/job/inputs-data-generation.yaml" - ], - ), - # Gaussian fit only - ( - "test/workflows/gaussian_fit/gaussian_fit/gaussian-fit.cwl", - [ - "test/workflows/gaussian_fit/type_dependencies/job/inputs-gaussian-fit.yaml" - ], - ), ], ) -def test_run_job_success(cli_runner, cleanup, cwl_file, inputs): +def test_run_job_success(cli_runner, cleanup, pi_test_files, cwl_file, inputs): + """Test successful job submission and execution.""" # CWL file is the first argument command = ["job", "submit", cwl_file] @@ -218,19 +189,10 @@ def test_run_job_success(cli_runner, cleanup, cwl_file, inputs): [], "Recursingintostep", ), - # The configuration file is malformed: the hints are overridden more than once - ( - "test/workflows/test_meta/test_meta.cwl", - [ - "test/workflows/test_meta/override_dirac_hints_twice.yaml", - ], - "Failedtovalidatetheparameter", - ), ], ) -def test_run_job_validation_failure( - cli_runner, cleanup, cwl_file, inputs, expected_error -): +def test_run_job_validation_failure(cli_runner, cleanup, cwl_file, inputs, expected_error): + """Test job submission fails with invalid workflow or inputs.""" command = ["job", "submit", cwl_file] for input in inputs: command.extend(["--parameter-path", input]) @@ -244,9 +206,7 @@ def test_run_job_validation_failure( clean_stderr = re.sub(r"\s+", "", result.stderr or "") except (ValueError, AttributeError): clean_stderr = "" - clean_exception = re.sub( - r"\s+", "", str(result.exception) if result.exception else "" - ) + clean_exception = re.sub(r"\s+", "", str(result.exception) if result.exception else "") # Handle different possible error messages for circular references if expected_error == "Recursingintostep": @@ -258,9 +218,7 @@ def test_run_job_validation_failure( "circularreference", ] error_found = any( - pattern in clean_output - or pattern in clean_stderr - or pattern in clean_exception + pattern in clean_output or pattern in clean_stderr or pattern in clean_exception for pattern in circular_ref_patterns ) assert error_found, ( @@ -269,9 +227,7 @@ def test_run_job_validation_failure( ) else: error_found = ( - expected_error in clean_output - or expected_error in clean_stderr - or expected_error in clean_exception + expected_error in clean_output or expected_error in clean_stderr or expected_error in clean_exception ) assert error_found, ( f"Expected error '{expected_error}' not found in " @@ -279,138 +235,169 @@ def test_run_job_validation_failure( ) +@pytest.mark.skipif( + platform.system() != "Linux" or not shutil.which("taskset"), reason="taskset command only available on Linux" +) +def test_run_job_parallely(tmp_path, cleanup): + """Test parallel job execution performance.""" + # Ensure the workflow exists + workflow = Path("test/workflows/parallel/description.cwl").resolve() + assert workflow.exists() + + # Ensure we actually have >=2 CPUs available on this runner for the parallel half + allowed = os.sched_getaffinity(0) + if len(allowed) < 2: + pytest.skip("Runner exposes only 1 CPU") + + # Pick two CPUs to use + allowed = sorted(allowed) + cpu0, cpu1 = allowed[0], allowed[1] + + def read_interval(p: Path): + """Read interval.""" + start, end = p.read_text().splitlines() + return int(start), int(end) + + def overlaps(a, b): + """Check if two intervals overlap.""" + (a0, a1), (b0, b1) = a, b + return a0 < b1 and b0 < a1 + + def run(cpu_spec: str): + subprocess.run( + ["taskset", "-c", cpu_spec, "dirac-cwl", "job", "submit", str(workflow)], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + check=True, + ) + + # TODO: need to find a way to get a job id and associate the output sandbox to the job id + sandbox_files = list(Path(SANDBOX_STORE_DIR).glob("*.tar.*")) + assert len(sandbox_files) == 1, f"Expected exactly one sandbox file, found {len(sandbox_files)}" + sandbox_file = sandbox_files[0].name + download_sandbox(str(sandbox_file), tmp_path) + + # Get the output sandbox paths + a = read_interval(tmp_path / "a.txt") + b = read_interval(tmp_path / "b.txt") + + # TODO: no need that line once we assign sandbox to job id + (Path(SANDBOX_STORE_DIR) / sandbox_file).unlink() # Clean up sandboxstore for next run + return a, b + + # 1 CPU => expect no overlap + a1, b1 = run(str(cpu0)) + assert not overlaps(a1, b1), f"Expected sequential execution on 1 CPU, got overlap: a={a1}, b={b1}" + + # 2 CPUs => expect overlap + a2, b2 = run(f"{cpu0},{cpu1}") + assert overlaps(a2, b2), f"Expected parallel execution on 2 CPUs, got no overlap: a={a2}, b={b2}" + + +@pytest.mark.parametrize( + "cwl_file, inputs, destination_source_input_data", + [ + # --- Pi example --- + ( + "test/workflows/pi/pigather.cwl", + ["test/workflows/pi/type_dependencies/job/inputs-pi_gather_catalog.yaml"], + { + "filecatalog/pi/100": [ + "test/workflows/pi/type_dependencies/job/result_1.sim", + "test/workflows/pi/type_dependencies/job/result_2.sim", + "test/workflows/pi/type_dependencies/job/result_3.sim", + "test/workflows/pi/type_dependencies/job/result_4.sim", + "test/workflows/pi/type_dependencies/job/result_5.sim", + ] + }, + ), + ], +) +def test_run_job_with_input_data(cli_runner, cleanup, pi_test_files, cwl_file, inputs, destination_source_input_data): + """Test job execution with input data from file catalog.""" + for destination, inputs_data in destination_source_input_data.items(): + # Copy the input data to the destination + destination = Path(destination) + destination.mkdir(parents=True, exist_ok=True) + for input in inputs_data: + shutil.copy(input, destination) + + # CWL file is the first argument + command = ["job", "submit", cwl_file] + + # Add the input file(s) + for input in inputs: + command.extend(["--parameter-path", input]) + + result = cli_runner.invoke(app, command) + # Remove ANSI color codes for assertion + clean_output = strip_ansi_codes(result.stdout) + assert "CLI: Job(s) done" in clean_output, f"Failed to run the job: {result.stdout}" + + # ----------------------------------------------------------------------------- # Transformation tests # ----------------------------------------------------------------------------- @pytest.mark.parametrize( - "cwl_file, metadata", + "cwl_file", [ # --- Hello World example --- # There is no input expected - ("test/workflows/helloworld/description_basic.cwl", None), + "test/workflows/helloworld/description_basic.cwl", # --- Crypto example --- # Complete - ("test/workflows/crypto/description.cwl", None), + "test/workflows/crypto/description.cwl", # Caesar only - ("test/workflows/crypto/caesar.cwl", None), + "test/workflows/crypto/caesar.cwl", # ROT13 only - ("test/workflows/crypto/rot13.cwl", None), + "test/workflows/crypto/rot13.cwl", # Base64 only - ("test/workflows/crypto/base64.cwl", None), + "test/workflows/crypto/base64.cwl", # MD5 only - ("test/workflows/crypto/md5.cwl", None), + "test/workflows/crypto/md5.cwl", # --- Pi example --- - # There is no input expected - ( - "test/workflows/pi/pisimulate.cwl", - "test/workflows/pi/type_dependencies/transformation/metadata-pi_simulate.yaml", - ), - # --- Pi v2 example --- - # There is no input expected - ( - "test/workflows/merge/pisimulate_v2.cwl", - "test/workflows/merge/type_dependencies/transformation/metadata-pi_simulate_v2.yaml", - ), - # --- LHCb example --- - ( - "test/workflows/lhcb/lhcbsimulate.cwl", - "test/workflows/lhcb/type_dependencies/transformation/metadata-lhcb_simulate.yaml", - ), - # --- Mandelbrot example --- - ( - "test/workflows/mandelbrot/image-prod.cwl", - "test/workflows/mandelbrot/type_dependencies/transformation/metadata-mandelbrot_imageprod.yaml", - ), - # --- Gaussian fit example --- - # Data generation workflow - ( - "test/workflows/gaussian_fit/data_generation/data-generation.cwl", - "test/workflows/gaussian_fit/type_dependencies/transformation/inputs-data-generation.yaml", - ), + # Pi simulate transformation + "test/workflows/pi/pisimulate.cwl", ], ) -def test_run_nonblocking_transformation_success( - cli_runner, cleanup, cwl_file, metadata -): +def test_run_nonblocking_transformation_success(cli_runner, cleanup, cwl_file): + """Test successful non-blocking transformation submission and execution.""" # CWL file is the first argument command = ["transformation", "submit", cwl_file] - # Add the metadata file - if metadata: - command.extend(["--metadata-path", metadata]) result = cli_runner.invoke(app, command) clean_output = strip_ansi_codes(result.stdout) - assert ( - "Transformation done" in clean_output - ), f"Failed to run the transformation: {result.stdout}" + assert "Transformation done" in clean_output, f"Failed to run the transformation: {result.stdout}" @pytest.mark.parametrize( - "cwl_file, metadata, destination_source_input_data", + "cwl_file, destination_source_input_data", [ # --- Pi example --- + # Pi gather transformation (waits for simulation result files) ( "test/workflows/pi/pigather.cwl", - "test/workflows/pi/type_dependencies/transformation/metadata-pi_gather.yaml", - { - "filecatalog/pi/100": [ - "test/workflows/pi/type_dependencies/job/result_1.sim", - "test/workflows/pi/type_dependencies/job/result_2.sim", - "test/workflows/pi/type_dependencies/job/result_3.sim", - "test/workflows/pi/type_dependencies/job/result_4.sim", - "test/workflows/pi/type_dependencies/job/result_5.sim", - ] - }, - ), - # --- LHCb example --- - ( - "test/workflows/lhcb/lhcbreconstruct.cwl", - "test/workflows/lhcb/type_dependencies/transformation/metadata-lhcb_reconstruct.yaml", - { - "filecatalog/lhcb/456/123/simulation": [ - "test/workflows/lhcb/type_dependencies/job/Gauss_123_456_1.sim", - "test/workflows/lhcb/type_dependencies/job/Gauss_456_456_1.sim", - "test/workflows/lhcb/type_dependencies/job/Gauss_789_456_1.sim", - ] - }, - ), - # --- Mandelbrot example --- - ( - "test/workflows/mandelbrot/image-merge.cwl", - "test/workflows/mandelbrot/type_dependencies/transformation/metadata-mandelbrot_imagemerge.yaml", { - "filecatalog/mandelbrot/images/raw/1920x1080/": [ - "test/workflows/mandelbrot/type_dependencies/transformation/data_1.txt", - "test/workflows/mandelbrot/type_dependencies/transformation/data_2.txt", - "test/workflows/mandelbrot/type_dependencies/transformation/data_3.txt", + "filecatalog/pi/100/input-data": [ + ("result_1.sim", "0.1 0.2\n-0.3 0.4\n0.5 -0.6\n"), + ("result_2.sim", "-0.1 0.8\n0.9 -0.2\n-0.7 0.3\n"), + ("result_3.sim", "0.4 0.5\n-0.8 -0.1\n0.6 0.7\n"), + ("result_4.sim", "-0.9 0.0\n0.2 -0.4\n-0.5 0.8\n"), + ("result_5.sim", "0.3 -0.7\n-0.6 0.1\n0.9 -0.2\n"), ] }, ), - # Gaussian fit workflow - ( - "test/workflows/gaussian_fit/gaussian_fit/gaussian-fit-workflow.cwl", - "test/workflows/gaussian_fit/type_dependencies/transformation/inputs-gaussian-fit.yaml", - { - "filecatalog/gaussian_fit/data-generation-1/": [ - "test/workflows/gaussian_fit/type_dependencies/transformation/data-generation-1/data_gen1.txt", - ], - "filecatalog/gaussian_fit/data-generation-2/": [ - "test/workflows/gaussian_fit/type_dependencies/transformation/data-generation-2/data_gen2.txt", - ], - }, - ), ], ) -def test_run_blocking_transformation_success( - cli_runner, cleanup, cwl_file, metadata, destination_source_input_data -): +def test_run_blocking_transformation_success(cli_runner, cleanup, cwl_file, destination_source_input_data): + """Test successful blocking transformation with input data dependencies.""" + # Define a function to run the transformation command and return the result def run_transformation(): command = ["transformation", "submit", cwl_file] - if metadata: - command.extend(["--metadata-path", metadata]) return cli_runner.invoke(app, command) # Start running the transformation in a separate thread and capture the result @@ -420,83 +407,71 @@ def run_and_capture(): nonlocal transformation_result transformation_result = run_transformation() + # Start the transformation in a separate thread (it will wait for files) transformation_thread = threading.Thread(target=run_and_capture) transformation_thread.start() - # Give it some time to ensure the command is waiting for input files - time.sleep(5) + # Give it some time to start and begin waiting for input files + time.sleep(2) - # Ensure the command is waiting (e.g., it hasn't finished yet) - assert ( - transformation_thread.is_alive() - ), "The transformation should be waiting for files." + # Verify the transformation is still running (waiting for files) + assert transformation_thread.is_alive(), "The transformation should be waiting for files." + # Now create the input files (simulating files becoming available) for destination, inputs in destination_source_input_data.items(): - # Copy the input data to the destination + # Create the destination directory and files with content destination = Path(destination) destination.mkdir(parents=True, exist_ok=True) - for input in inputs: - shutil.copy(input, destination) + for filename, content in inputs: + file_path = destination / filename + with open(file_path, "w") as f: + f.write(content) - # Wait for the thread to finish - transformation_thread.join(timeout=60) + # Wait for the transformation to detect the files and complete + transformation_thread.join(timeout=30) # Check if the transformation completed successfully - assert ( - transformation_result is not None - ), "The transformation result was not captured." + assert transformation_result is not None, "The transformation result was not captured." clean_transformation_output = strip_ansi_codes(transformation_result.stdout) - assert ( - "Transformation done" in clean_transformation_output - ), "The transformation did not complete successfully." + assert "Transformation done" in clean_transformation_output, "The transformation did not complete successfully." @pytest.mark.parametrize( - "cwl_file, metadata, expected_error", + "cwl_file, expected_error", [ # The description file is malformed: class attribute is unknown ( "test/workflows/malformed_description/description_malformed_class.cwl", - None, "`class`containsundefinedreferenceto", ), # The description file is malformed: baseCommand is unknown ( "test/workflows/malformed_description/description_malformed_command.cwl", - None, "invalidfield`baseComand`", ), # The description file points to a non-existent file (subworkflow) ( "test/workflows/bad_references/reference_doesnotexists.cwl", - [], "Nosuchfileordirectory", ), # The description file points to another file point to it (circular dependency) ( "test/workflows/bad_references/reference_circular1.cwl", - [], "Recursingintostep", ), # The description file points to itself (another circular dependency) ( "test/workflows/bad_references/reference_itself.cwl", - [], "Recursingintostep", ), ], ) -def test_run_transformation_validation_failure( - cli_runner, cwl_file, cleanup, metadata, expected_error -): +def test_run_transformation_validation_failure(cli_runner, cwl_file, cleanup, expected_error): + """Test transformation submission fails with invalid workflow.""" command = ["transformation", "submit", cwl_file] - if metadata: - command.extend(["--metadata-path", metadata]) result = cli_runner.invoke(app, command) clean_stdout = strip_ansi_codes(result.stdout) - assert ( - "Transformation done" not in clean_stdout - ), "The transformation did complete successfully." + assert "Transformation done" not in clean_stdout, "The transformation did complete successfully." # Check all possible output sources clean_output = re.sub(r"\s+", "", result.stdout) @@ -504,9 +479,7 @@ def test_run_transformation_validation_failure( clean_stderr = re.sub(r"\s+", "", result.stderr or "") except (ValueError, AttributeError): clean_stderr = "" - clean_exception = re.sub( - r"\s+", "", str(result.exception) if result.exception else "" - ) + clean_exception = re.sub(r"\s+", "", str(result.exception) if result.exception else "") # Handle multiple possible error patterns for circular references if expected_error == "Recursingintostep": @@ -518,9 +491,7 @@ def test_run_transformation_validation_failure( "circularreference", ] error_found = any( - pattern in clean_output - or pattern in clean_stderr - or pattern in clean_exception + pattern in clean_output or pattern in clean_stderr or pattern in clean_exception for pattern in circular_ref_patterns ) assert error_found, ( @@ -529,9 +500,7 @@ def test_run_transformation_validation_failure( ) else: error_found = ( - expected_error in clean_output - or expected_error in clean_stderr - or expected_error in clean_exception + expected_error in clean_output or expected_error in clean_stderr or expected_error in clean_exception ) assert error_found, ( f"Expected error '{expected_error}' not found in " @@ -545,121 +514,65 @@ def test_run_transformation_validation_failure( @pytest.mark.parametrize( - "cwl_file, metadata", + "cwl_file", [ # --- Crypto example --- - # Complete - ("test/workflows/crypto/description.cwl", None), - # --- Pi example --- - # There is no input expected - ( - "test/workflows/pi/description.cwl", - "test/workflows/pi/type_dependencies/production/metadata-pi_complete.yaml", - ), - # --- Merge example --- - # There is no input expected - ( - "test/workflows/merge/description.cwl", - "test/workflows/merge/type_dependencies/production/metadata-merge_complete.yaml", - ), - # --- LHCb example --- - # Complete - ( - "test/workflows/lhcb/description.cwl", - "test/workflows/lhcb/type_dependencies/production/metadata-lhcb_complete.yaml", - ), - # --- Mandelbrot example --- - ( - "test/workflows/mandelbrot/description.cwl", - "test/workflows/mandelbrot/type_dependencies/production/metadata-mandelbrot_complete.yaml", - ), - # --- Gaussian fit example --- - # Complete - ( - "test/workflows/gaussian_fit/main-workflow.cwl", - "test/workflows/gaussian_fit/type_dependencies/production/metadata-gaussian-fit-complete.yaml", - ), + # Complete workflow with independent steps (ideal for production mode) + "test/workflows/crypto/description.cwl" ], ) -def test_run_simple_production_success(cli_runner, cleanup, cwl_file, metadata): +def test_run_simple_production_success(cli_runner, cleanup, pi_test_files, cwl_file): + """Test successful production workflow submission and execution.""" # CWL file is the first argument command = ["production", "submit", cwl_file] - # Add the metadata file - if metadata: - command.extend(["--steps-metadata-path", metadata]) result = cli_runner.invoke(app, command) clean_output = strip_ansi_codes(result.stdout) - assert ( - "Production done" in clean_output - ), f"Failed to run the production: {result.stdout}" + assert "Production done" in clean_output, f"Failed to run the production: {result.stdout}" @pytest.mark.parametrize( - "cwl_file, metadata, expected_error", + "cwl_file, expected_error", [ # The description file is malformed: class attribute is unknown ( "test/workflows/malformed_description/description_malformed_class.cwl", - None, "`class`containsundefinedreferenceto", ), # The description file is malformed: baseCommand is unknown ( "test/workflows/malformed_description/description_malformed_command.cwl", - None, "invalidfield`baseComand`", ), # The description file points to a non-existent file (subworkflow) ( "test/workflows/bad_references/reference_doesnotexists.cwl", - [], "Nosuchfileordirectory", ), # The description file points to another file point to it (circular dependency) ( "test/workflows/bad_references/reference_circular1.cwl", - [], "Recursingintostep", ), # The description file points to itself (another circular dependency) ( "test/workflows/bad_references/reference_circular1.cwl", - [], "Recursingintostep", ), # The workflow is a CommandLineTool instead of a Workflow ( "test/workflows/helloworld/description_basic.cwl", - None, "InputshouldbeaninstanceofWorkflow", ), - # The metadata has an unexistent step name - ( - "test/workflows/mandelbrot/description.cwl", - "test/workflows/mandelbrot/type_dependencies/production/malformed-wrong-stepname_metadata-mandelbrot_complete.yaml", - "Thefollowingstepsaremissingfromthetaskworkflow:{'this-step-doesnot-exist'}", - ), - # The metadata has an unexistent type - ( - "test/workflows/mandelbrot/description.cwl", - "test/workflows/mandelbrot/type_dependencies/production/malformed-nonexisting-type_metadata-mandelbrot_complete.yaml", - "Unknownexecutionhooksplugin:'MandelBrotDoesNotExist'", - ), ], ) -def test_run_production_validation_failure( - cli_runner, cleanup, cwl_file, metadata, expected_error -): +def test_run_production_validation_failure(cli_runner, cleanup, cwl_file, expected_error): + """Test production submission fails with invalid workflow.""" command = ["production", "submit", cwl_file] - if metadata: - command.extend(["--steps-metadata-path", metadata]) result = cli_runner.invoke(app, command) clean_stdout = strip_ansi_codes(result.stdout) - assert ( - "Transformation done" not in clean_stdout - ), "The transformation did complete successfully." + assert "Transformation done" not in clean_stdout, "The transformation did complete successfully." # Check all possible output sources clean_output = re.sub(r"\s+", "", f"{result.stdout}") @@ -678,9 +591,7 @@ def test_run_production_validation_failure( "circularreference", ] error_found = any( - pattern in clean_output - or pattern in clean_stderr - or pattern in clean_exception + pattern in clean_output or pattern in clean_stderr or pattern in clean_exception for pattern in circular_ref_patterns ) assert error_found, ( @@ -689,9 +600,7 @@ def test_run_production_validation_failure( ) else: error_found = ( - expected_error in clean_output - or expected_error in clean_stderr - or expected_error in clean_exception + expected_error in clean_output or expected_error in clean_stderr or expected_error in clean_exception ) assert error_found, ( f"Expected error '{expected_error}' not found in " From 6d1a4e657a0142ffdc4946207b3ac9f63664dbf3 Mon Sep 17 00:00:00 2001 From: Stella-Maria Renucci Date: Wed, 1 Apr 2026 12:52:13 +0200 Subject: [PATCH 24/24] fix: added ExpressionTool and updated code --- pixi.lock | 5 +- src/dirac_cwl/submission_models.py | 83 ++++++++++-------------------- test/test_resource_requirements.py | 33 ++++++++++-- 3 files changed, 61 insertions(+), 60 deletions(-) diff --git a/pixi.lock b/pixi.lock index 23324782..76a3099b 100644 --- a/pixi.lock +++ b/pixi.lock @@ -5,6 +5,8 @@ environments: - url: https://conda.anaconda.org/conda-forge/ indexes: - https://pypi.org/simple + options: + pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -1456,7 +1458,7 @@ packages: requires_python: '>=3.11' - pypi: ./ name: dirac-cwl - version: 1.2.1.dev22+gf60c9bf3b.d20260303 + version: 1.1.2.dev3+g773310367 sha256: 0b624b7b1adb33bc3b4cb29dbf85bb2db495122acda95aaa775f0aedef77767f requires_dist: - cwl-utils @@ -1478,7 +1480,6 @@ packages: - pytest-mock ; extra == 'testing' - mypy ; extra == 'testing' requires_python: '>=3.11' - editable: true - pypi: https://files.pythonhosted.org/packages/9f/90/279f55fff9481f9e0424c3c97b24dc10004ec8d8f98ddf5afd07a7b79194/diraccfg-1.0.1-py2.py3-none-any.whl name: diraccfg version: 1.0.1 diff --git a/src/dirac_cwl/submission_models.py b/src/dirac_cwl/submission_models.py index 36c75513..53fc620f 100644 --- a/src/dirac_cwl/submission_models.py +++ b/src/dirac_cwl/submission_models.py @@ -9,7 +9,7 @@ from typing import Any, Optional -from cwl_utils.parser import WorkflowStep, save +from cwl_utils.parser import save from cwl_utils.parser.cwl_v1_2 import ( CommandLineTool, ExpressionTool, @@ -186,65 +186,38 @@ def validate_production(cls, values): # Temporary code, waiting on cwltool PR: https://github.com/common-workflow-language/cwltool/pull/2179. -def validate_resource_requirements(task): - """ - Validate ResourceRequirements of a task (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run). +def validate_resource_requirements(task: CommandLineTool | Workflow | ExpressionTool): + """Validate ResourceRequirements of a task recursively. - :param task: The task to validate + :param task: The task to validate. + :raises ValueError: If any ResourceRequirement has min > max. """ - cwl_req = _get_resource_requirement(task) - - # Validate Workflow/CLT requirements. - if cwl_req: - _validate_resource_requirement(cwl_req) - - # Validate WorkflowStep requirements. - if not isinstance(task, CommandLineTool) and task.steps: - for step in task.steps: - step_req = _get_resource_requirement(step) - if step_req: - _validate_resource_requirement(step_req) - - # Validate run requirements for each step if they exist. + # Validate task-level requirements + for req in getattr(task, "requirements", None) or []: + if isinstance(req, ResourceRequirement): + _validate_min_max(req) + + # Recurse into workflow steps + if isinstance(task, Workflow): + for step in task.steps or []: + for req in getattr(step, "requirements", None) or []: + if isinstance(req, ResourceRequirement): + _validate_min_max(req) if step.run: - if isinstance(step.run, Workflow): - # Validate nested Workflow requirements, if any. - validate_resource_requirements(task=step.run) - - step_run_req = _get_resource_requirement(step.run) - if step_run_req: - _validate_resource_requirement(step_run_req) - + validate_resource_requirements(step.run) -def _validate_resource_requirement(requirement): - """Validate a ResourceRequirement. - Verify that resourceMin is not higher than resourceMax (CommandLineTool, Workflow, WorkflowStep, WorkflowStep.run) +def _validate_min_max(req: ResourceRequirement): + """Check that min does not exceed max for any resource. - :param requirement: The current ResourceRequirement to validate. - :raises ValueError: If the requirement is invalid. + :param req: The ResourceRequirement to validate. + :raises ValueError: If min > max for any resource. """ - for resource, min_value, max_value in [ - ("ram", requirement.ramMin, requirement.ramMax), - ("cores", requirement.coresMin, requirement.coresMax), - ("tmpdir", requirement.tmpdirMin, requirement.tmpdirMax), - ("outdir", requirement.outdirMin, requirement.outdirMax), + for name, lo, hi in [ + ("cores", req.coresMin, req.coresMax), + ("ram", req.ramMin, req.ramMax), + ("tmpdir", req.tmpdirMin, req.tmpdirMax), + ("outdir", req.outdirMin, req.outdirMax), ]: - if min_value and max_value and min_value > max_value: - raise ValueError(f"{resource}Min is higher than {resource}Max") - - -def _get_resource_requirement( - cwl_object: Workflow | CommandLineTool | WorkflowStep, -) -> ResourceRequirement | None: - """ - Extract the resource requirement from the current cwl_object. - - :param cwl_object: The cwl_object to extract the requirement from. - :return: The resource requirement object, or None if not found. - """ - requirements = getattr(cwl_object, "requirements", []) or [] - for requirement in requirements: - if isinstance(requirement, ResourceRequirement): - return requirement - return None + if lo and hi and lo > hi: + raise ValueError(f"{name}Min ({lo}) exceeds {name}Max ({hi})") diff --git a/test/test_resource_requirements.py b/test/test_resource_requirements.py index 6c76e9d9..2465e9b9 100644 --- a/test/test_resource_requirements.py +++ b/test/test_resource_requirements.py @@ -1,9 +1,9 @@ """Integration tests for CWL Resource Requirements validation.""" -from typing import List, Optional +from typing import Optional import pytest -from cwl_utils.parser.cwl_v1_2 import CommandLineTool, ResourceRequirement, Workflow, WorkflowStep +from cwl_utils.parser.cwl_v1_2 import CommandLineTool, ExpressionTool, ResourceRequirement, Workflow, WorkflowStep from dirac_cwl.submission_models import JobSubmissionModel, ProductionSubmissionModel, TransformationSubmissionModel @@ -27,7 +27,7 @@ def create_commandlinetool( def create_workflow( requirements: Optional[list] = None, - steps: Optional[List[WorkflowStep]] = None, + steps: Optional[list[WorkflowStep]] = None, inputs: Optional[list] = None, outputs: Optional[list] = None, ) -> Workflow: @@ -55,6 +55,20 @@ def create_step( ) +def create_expressiontool( + requirements: Optional[list] = None, + inputs: Optional[list] = None, + outputs: Optional[list] = None, +) -> ExpressionTool: + """Create an ExpressionTool with the given requirements, inputs, and outputs.""" + return ExpressionTool( + expression="", + requirements=requirements or [], + inputs=inputs or [], + outputs=outputs or [], + ) + + def assert_submission_fails(task): """Assert that submission fails with ValueError for Job and Transformation models with bad resource requirements. @@ -86,17 +100,30 @@ def test_bad_min_max_resource_reqs(bad_min_max_reqs): clt = create_commandlinetool(requirements=[bad_min_max_reqs]) assert_submission_fails(clt) + # ExpressionTool with bad minmax reqs + expression_tool = create_expressiontool(requirements=[bad_min_max_reqs]) + assert_submission_fails(expression_tool) + # WorkflowStep.run with bad minmax reqs step_bad_run = create_step(run=clt) workflow = create_workflow(steps=[step_bad_run]) assert_submission_fails(workflow) + step_bad_run = create_step(run=expression_tool) + workflow = create_workflow(steps=[step_bad_run]) + assert_submission_fails(workflow) + # WorkflowStep with bad minmax reqs clt = create_commandlinetool() step = create_step(run=clt, requirements=[bad_min_max_reqs]) workflow = create_workflow(steps=[step]) assert_submission_fails(workflow) + expression_tool = create_commandlinetool() + step = create_step(run=expression_tool, requirements=[bad_min_max_reqs]) + workflow = create_workflow(steps=[step]) + assert_submission_fails(workflow) + # Workflow with bad minmax reqs workflow = create_workflow(requirements=[bad_min_max_reqs]) assert_submission_fails(workflow)