From 3b91ca5f059861ecfc2125379c28df98d6363083 Mon Sep 17 00:00:00 2001 From: Kelly Date: Thu, 17 Apr 2025 14:21:51 -0400 Subject: [PATCH] Remove source files for deprecated components --- .../batch_benchmark_inference/asset.yaml | 8 - .../batch_benchmark_inference/spec.yaml | 282 --------- .../asset.yaml | 8 - .../spec.yaml | 222 ------- .../asset.yaml | 8 - .../spec.yaml | 287 --------- .../batch_benchmark_score/asset.yaml | 8 - .../batch_benchmark_score/spec.yaml | 94 --- .../batch_inference_preparer/asset.yaml | 8 - .../batch_inference_preparer/spec.yaml | 80 --- .../batch_output_formatter/asset.yaml | 8 - .../batch_output_formatter/spec.yaml | 97 ---- .../batch_resource_manager/asset.yaml | 8 - .../batch_resource_manager/spec.yaml | 175 ------ .../benchmark_result_aggregator/asset.yaml | 8 - .../benchmark_result_aggregator/spec.yaml | 31 - .../compute_performance_metrics/asset.yaml | 8 - .../compute_performance_metrics/spec.yaml | 73 --- .../components/dataset_downloader/asset.yaml | 8 - .../components/dataset_downloader/spec.yaml | 44 -- .../dataset_preprocessor/asset.yaml | 8 - .../components/dataset_preprocessor/spec.yaml | 64 -- .../components/dataset_sampler/asset.yaml | 8 - .../components/dataset_sampler/spec.yaml | 58 -- .../inference_postprocessor/asset.yaml | 8 - .../inference_postprocessor/spec.yaml | 151 ----- .../components/prompt_crafter/asset.yaml | 8 - .../components/prompt_crafter/spec.yaml | 153 ----- .../hello_command_component/asset.yaml | 3 - .../hello_command_component/spec.yaml | 18 - .../hello_pipeline_component/asset.yaml | 3 - .../hello_pipeline_component/spec.yaml | 20 - assets/basic/components/src/hello.py | 11 - .../driver/batch_score_llm/asset.yaml | 3 - .../driver/batch_score_llm/spec.yaml | 65 --- .../components/batch_deploy_model/asset.yaml | 3 - .../components/batch_deploy_model/spec.yaml | 197 ------- .../components/delete_endpoint/asset.yaml | 3 - .../components/delete_endpoint/spec.yaml | 40 -- .../common/components/deploy_model/asset.yaml | 3 - .../common/components/deploy_model/spec.yaml | 208 ------- .../mlflow_model_local_validation/asset.yaml | 3 - .../mlflow_model_local_validation/spec.yaml | 76 --- .../components/register_model/asset.yaml | 3 - .../components/register_model/spec.yaml | 78 --- .../export_data_database/asset.yaml | 2 - .../export_data_database/spec.yaml | 16 - .../import_data_database/asset.yaml | 2 - .../import_data_database/spec.yaml | 15 - .../import_data_file_system/asset.yaml | 2 - .../import_data_file_system/spec.yaml | 15 - .../data_ingestion_db_to_acs/asset.yaml | 4 - .../data_ingestion_db_to_acs/spec.yaml | 340 ----------- .../data_ingestion_db_to_faiss/asset.yaml | 4 - .../data_ingestion_db_to_faiss/spec.yaml | 324 ----------- .../asset.yaml | 4 - .../spec.yaml | 319 ---------- .../asset.yaml | 4 - .../spec.yaml | 303 ---------- .../asset.yaml | 3 - .../spec.yaml | 83 --- .../oai_completions_finetune/asset.yaml | 3 - .../oai_completions_finetune/spec.yaml | 112 ---- .../automl_image_classification/asset.yaml | 3 - .../automl_image_classification/spec.yaml | 10 - .../asset.yaml | 3 - .../spec.yaml | 10 - .../asset.yaml | 3 - .../spec.yaml | 10 - .../automl_image_object_detection/asset.yaml | 3 - .../automl_image_object_detection/spec.yaml | 10 - .../automl_tabular_classification/asset.yaml | 3 - .../automl_tabular_classification/spec.yaml | 10 - .../automl_tabular_forecasting/asset.yaml | 3 - .../automl_tabular_forecasting/spec.yaml | 10 - .../automl_tabular_regression/asset.yaml | 3 - .../automl_tabular_regression/spec.yaml | 10 - .../automl_text_classification/asset.yaml | 3 - .../automl_text_classification/spec.yaml | 10 - .../asset.yaml | 3 - .../spec.yaml | 10 - .../components/automl_text_ner/asset.yaml | 3 - .../components/automl_text_ner/spec.yaml | 10 - .../components/pipeline/asset.yaml | 3 - .../components/pipeline/spec.yaml | 547 ------------------ .../components/validation/asset.yaml | 3 - .../components/validation/spec.yaml | 170 ------ .../src/validation/validation.py | 22 - .../pipeline_components/nlp_ner/asset.yaml | 3 - .../pipeline_components/nlp_ner/spec.yaml | 264 --------- .../components/preprocess/nlp_ner/asset.yaml | 3 - .../components/preprocess/nlp_ner/spec.yaml | 64 -- .../diffusers_text_to_image/asset.yaml | 3 - .../diffusers_text_to_image/spec.yaml | 425 -------------- .../finetune/hf_classification/asset.yaml | 3 - .../finetune/hf_classification/spec.yaml | 356 ------------ .../components/finetune/mmd_od_is/asset.yaml | 3 - .../components/finetune/mmd_od_is/spec.yaml | 347 ----------- .../components/finetune/mmt/asset.yaml | 3 - .../components/finetune/mmt/spec.yaml | 328 ----------- .../components/framework_selector/asset.yaml | 3 - .../components/framework_selector/spec.yaml | 39 -- .../diffusers_text_to_image/asset.yaml | 3 - .../diffusers_text_to_image/spec.yaml | 69 --- .../model_import/hf_classification/asset.yaml | 3 - .../model_import/hf_classification/spec.yaml | 67 --- .../model_import/mmd_od_is/asset.yaml | 3 - .../model_import/mmd_od_is/spec.yaml | 67 --- .../components/model_import/mmt/asset.yaml | 3 - .../components/model_import/mmt/spec.yaml | 67 --- .../model_output_selector/asset.yaml | 3 - .../model_output_selector/spec.yaml | 52 -- .../classification/asset.yaml | 3 - .../classification/spec.yaml | 385 ------------ .../diffusers_text_to_image/asset.yaml | 3 - .../diffusers_text_to_image/spec.yaml | 489 ---------------- .../instance_segmentation/asset.yaml | 3 - .../instance_segmentation/spec.yaml | 437 -------------- .../pipeline_components/mmt/asset.yaml | 3 - .../pipeline_components/mmt/spec.yaml | 382 ------------ .../object_detection/asset.yaml | 3 - .../object_detection/spec.yaml | 464 --------------- .../multimodal_classification/asset.yaml | 3 - .../multimodal_classification/spec.yaml | 361 ------------ .../multimodal_classification/asset.yaml | 3 - .../multimodal_classification/spec.yaml | 47 -- .../multimodal_classification/asset.yaml | 3 - .../multimodal_classification/spec.yaml | 434 -------------- .../multimodal_classification/asset.yaml | 3 - .../multimodal_classification/spec.yaml | 93 --- .../components/compute_metrics/README.md | 37 -- .../components/compute_metrics/asset.yaml | 3 - .../components/compute_metrics/spec.yaml | 94 --- .../components/evaluate_model/README.md | 40 -- .../components/evaluate_model/asset.yaml | 3 - .../components/evaluate_model/spec.yaml | 94 --- .../components/pipeline_component/README.md | 38 -- .../components/pipeline_component/asset.yaml | 3 - .../components/pipeline_component/spec.yaml | 146 ----- .../README.md | 38 -- .../asset.yaml | 3 - .../spec.yaml | 98 ---- .../components/download_model/asset.yaml | 3 - .../components/download_model/spec.yaml | 67 --- .../image_classification/asset.yaml | 3 - .../components/image_classification/spec.yaml | 231 -------- .../instance_segmentation/asset.yaml | 3 - .../instance_segmentation/spec.yaml | 270 --------- .../components/object_detection/asset.yaml | 3 - .../components/object_detection/spec.yaml | 282 --------- 150 files changed, 11849 deletions(-) delete mode 100644 assets/aml-benchmark/components/batch_benchmark_inference/asset.yaml delete mode 100644 assets/aml-benchmark/components/batch_benchmark_inference/spec.yaml delete mode 100644 assets/aml-benchmark/components/batch_benchmark_inference_claude/asset.yaml delete mode 100644 assets/aml-benchmark/components/batch_benchmark_inference_claude/spec.yaml delete mode 100644 assets/aml-benchmark/components/batch_benchmark_inference_with_inference_compute/asset.yaml delete mode 100644 assets/aml-benchmark/components/batch_benchmark_inference_with_inference_compute/spec.yaml delete mode 100644 assets/aml-benchmark/components/batch_benchmark_score/asset.yaml delete mode 100644 assets/aml-benchmark/components/batch_benchmark_score/spec.yaml delete mode 100644 assets/aml-benchmark/components/batch_inference_preparer/asset.yaml delete mode 100644 assets/aml-benchmark/components/batch_inference_preparer/spec.yaml delete mode 100644 assets/aml-benchmark/components/batch_output_formatter/asset.yaml delete mode 100644 assets/aml-benchmark/components/batch_output_formatter/spec.yaml delete mode 100644 assets/aml-benchmark/components/batch_resource_manager/asset.yaml delete mode 100644 assets/aml-benchmark/components/batch_resource_manager/spec.yaml delete mode 100644 assets/aml-benchmark/components/benchmark_result_aggregator/asset.yaml delete mode 100644 assets/aml-benchmark/components/benchmark_result_aggregator/spec.yaml delete mode 100644 assets/aml-benchmark/components/compute_performance_metrics/asset.yaml delete mode 100644 assets/aml-benchmark/components/compute_performance_metrics/spec.yaml delete mode 100644 assets/aml-benchmark/components/dataset_downloader/asset.yaml delete mode 100644 assets/aml-benchmark/components/dataset_downloader/spec.yaml delete mode 100644 assets/aml-benchmark/components/dataset_preprocessor/asset.yaml delete mode 100644 assets/aml-benchmark/components/dataset_preprocessor/spec.yaml delete mode 100644 assets/aml-benchmark/components/dataset_sampler/asset.yaml delete mode 100644 assets/aml-benchmark/components/dataset_sampler/spec.yaml delete mode 100644 assets/aml-benchmark/components/inference_postprocessor/asset.yaml delete mode 100644 assets/aml-benchmark/components/inference_postprocessor/spec.yaml delete mode 100644 assets/aml-benchmark/components/prompt_crafter/asset.yaml delete mode 100644 assets/aml-benchmark/components/prompt_crafter/spec.yaml delete mode 100644 assets/basic/components/hello_command_component/asset.yaml delete mode 100644 assets/basic/components/hello_command_component/spec.yaml delete mode 100644 assets/basic/components/hello_pipeline_component/asset.yaml delete mode 100644 assets/basic/components/hello_pipeline_component/spec.yaml delete mode 100644 assets/basic/components/src/hello.py delete mode 100644 assets/batch_score/components/driver/batch_score_llm/asset.yaml delete mode 100644 assets/batch_score/components/driver/batch_score_llm/spec.yaml delete mode 100644 assets/common/components/batch_deploy_model/asset.yaml delete mode 100644 assets/common/components/batch_deploy_model/spec.yaml delete mode 100644 assets/common/components/delete_endpoint/asset.yaml delete mode 100644 assets/common/components/delete_endpoint/spec.yaml delete mode 100644 assets/common/components/deploy_model/asset.yaml delete mode 100644 assets/common/components/deploy_model/spec.yaml delete mode 100644 assets/common/components/mlflow_model_local_validation/asset.yaml delete mode 100644 assets/common/components/mlflow_model_local_validation/spec.yaml delete mode 100644 assets/common/components/register_model/asset.yaml delete mode 100644 assets/common/components/register_model/spec.yaml delete mode 100644 assets/data/data-transfer/export_data_database/asset.yaml delete mode 100644 assets/data/data-transfer/export_data_database/spec.yaml delete mode 100644 assets/data/data-transfer/import_data_database/asset.yaml delete mode 100644 assets/data/data-transfer/import_data_database/spec.yaml delete mode 100644 assets/data/data-transfer/import_data_file_system/asset.yaml delete mode 100644 assets/data/data-transfer/import_data_file_system/spec.yaml delete mode 100644 assets/large_language_models/components_pipelines/data_ingestion_db_to_acs/asset.yaml delete mode 100644 assets/large_language_models/components_pipelines/data_ingestion_db_to_acs/spec.yaml delete mode 100644 assets/large_language_models/components_pipelines/data_ingestion_db_to_faiss/asset.yaml delete mode 100644 assets/large_language_models/components_pipelines/data_ingestion_db_to_faiss/spec.yaml delete mode 100644 assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_acs_e2e/asset.yaml delete mode 100644 assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_acs_e2e/spec.yaml delete mode 100644 assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_faiss_e2e/asset.yaml delete mode 100644 assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_faiss_e2e/spec.yaml delete mode 100644 assets/large_language_models/components_pipelines/oai_v2_1p/openai_completions_finetune_pipeline/asset.yaml delete mode 100644 assets/large_language_models/components_pipelines/oai_v2_1p/openai_completions_finetune_pipeline/spec.yaml delete mode 100644 assets/oai/components_3p/oai_completions_finetune/asset.yaml delete mode 100644 assets/oai/components_3p/oai_completions_finetune/spec.yaml delete mode 100644 assets/training/automl/components/automl_image_classification/asset.yaml delete mode 100644 assets/training/automl/components/automl_image_classification/spec.yaml delete mode 100644 assets/training/automl/components/automl_image_classification_multilabel/asset.yaml delete mode 100644 assets/training/automl/components/automl_image_classification_multilabel/spec.yaml delete mode 100644 assets/training/automl/components/automl_image_instance_segmentation/asset.yaml delete mode 100644 assets/training/automl/components/automl_image_instance_segmentation/spec.yaml delete mode 100644 assets/training/automl/components/automl_image_object_detection/asset.yaml delete mode 100644 assets/training/automl/components/automl_image_object_detection/spec.yaml delete mode 100644 assets/training/automl/components/automl_tabular_classification/asset.yaml delete mode 100644 assets/training/automl/components/automl_tabular_classification/spec.yaml delete mode 100644 assets/training/automl/components/automl_tabular_forecasting/asset.yaml delete mode 100644 assets/training/automl/components/automl_tabular_forecasting/spec.yaml delete mode 100644 assets/training/automl/components/automl_tabular_regression/asset.yaml delete mode 100644 assets/training/automl/components/automl_tabular_regression/spec.yaml delete mode 100644 assets/training/automl/components/automl_text_classification/asset.yaml delete mode 100644 assets/training/automl/components/automl_text_classification/spec.yaml delete mode 100644 assets/training/automl/components/automl_text_classification_multilabel/asset.yaml delete mode 100644 assets/training/automl/components/automl_text_classification_multilabel/spec.yaml delete mode 100644 assets/training/automl/components/automl_text_ner/asset.yaml delete mode 100644 assets/training/automl/components/automl_text_ner/spec.yaml delete mode 100644 assets/training/distillation/components/pipeline/asset.yaml delete mode 100644 assets/training/distillation/components/pipeline/spec.yaml delete mode 100644 assets/training/finetune_acft_common/components/validation/asset.yaml delete mode 100644 assets/training/finetune_acft_common/components/validation/spec.yaml delete mode 100644 assets/training/finetune_acft_common/src/validation/validation.py delete mode 100644 assets/training/finetune_acft_hf_nlp/components/pipeline_components/nlp_ner/asset.yaml delete mode 100644 assets/training/finetune_acft_hf_nlp/components/pipeline_components/nlp_ner/spec.yaml delete mode 100644 assets/training/finetune_acft_hf_nlp/components/preprocess/nlp_ner/asset.yaml delete mode 100644 assets/training/finetune_acft_hf_nlp/components/preprocess/nlp_ner/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/finetune/diffusers_text_to_image/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/finetune/diffusers_text_to_image/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/finetune/hf_classification/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/finetune/hf_classification/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/finetune/mmd_od_is/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/finetune/mmd_od_is/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/finetune/mmt/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/finetune/mmt/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/framework_selector/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/framework_selector/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_import/diffusers_text_to_image/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_import/diffusers_text_to_image/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_import/hf_classification/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_import/hf_classification/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_import/mmd_od_is/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_import/mmd_od_is/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_import/mmt/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_import/mmt/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_output_selector/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/model_output_selector/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/classification/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/classification/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/diffusers_text_to_image/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/diffusers_text_to_image/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/instance_segmentation/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/instance_segmentation/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/mmt/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/mmt/spec.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/object_detection/asset.yaml delete mode 100644 assets/training/finetune_acft_image/components/pipeline_components/object_detection/spec.yaml delete mode 100644 assets/training/finetune_acft_multimodal/components/finetune/multimodal_classification/asset.yaml delete mode 100644 assets/training/finetune_acft_multimodal/components/finetune/multimodal_classification/spec.yaml delete mode 100644 assets/training/finetune_acft_multimodal/components/model_import/multimodal_classification/asset.yaml delete mode 100644 assets/training/finetune_acft_multimodal/components/model_import/multimodal_classification/spec.yaml delete mode 100644 assets/training/finetune_acft_multimodal/components/pipeline_components/multimodal_classification/asset.yaml delete mode 100644 assets/training/finetune_acft_multimodal/components/pipeline_components/multimodal_classification/spec.yaml delete mode 100644 assets/training/finetune_acft_multimodal/components/preprocess/multimodal_classification/asset.yaml delete mode 100644 assets/training/finetune_acft_multimodal/components/preprocess/multimodal_classification/spec.yaml delete mode 100644 assets/training/model_evaluation/components/compute_metrics/README.md delete mode 100644 assets/training/model_evaluation/components/compute_metrics/asset.yaml delete mode 100644 assets/training/model_evaluation/components/compute_metrics/spec.yaml delete mode 100644 assets/training/model_evaluation/components/evaluate_model/README.md delete mode 100644 assets/training/model_evaluation/components/evaluate_model/asset.yaml delete mode 100644 assets/training/model_evaluation/components/evaluate_model/spec.yaml delete mode 100644 assets/training/model_evaluation/components/pipeline_component/README.md delete mode 100644 assets/training/model_evaluation/components/pipeline_component/asset.yaml delete mode 100644 assets/training/model_evaluation/components/pipeline_component/spec.yaml delete mode 100644 assets/training/model_evaluation/components/validation_trigger_model_evaluation/README.md delete mode 100644 assets/training/model_evaluation/components/validation_trigger_model_evaluation/asset.yaml delete mode 100644 assets/training/model_evaluation/components/validation_trigger_model_evaluation/spec.yaml delete mode 100644 assets/training/model_management/components/download_model/asset.yaml delete mode 100644 assets/training/model_management/components/download_model/spec.yaml delete mode 100644 assets/training/vision/components/image_classification/asset.yaml delete mode 100644 assets/training/vision/components/image_classification/spec.yaml delete mode 100644 assets/training/vision/components/instance_segmentation/asset.yaml delete mode 100644 assets/training/vision/components/instance_segmentation/spec.yaml delete mode 100644 assets/training/vision/components/object_detection/asset.yaml delete mode 100644 assets/training/vision/components/object_detection/spec.yaml diff --git a/assets/aml-benchmark/components/batch_benchmark_inference/asset.yaml b/assets/aml-benchmark/components/batch_benchmark_inference/asset.yaml deleted file mode 100644 index dc855c4422..0000000000 --- a/assets/aml-benchmark/components/batch_benchmark_inference/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: false - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/batch_benchmark_inference diff --git a/assets/aml-benchmark/components/batch_benchmark_inference/spec.yaml b/assets/aml-benchmark/components/batch_benchmark_inference/spec.yaml deleted file mode 100644 index 07b7d0cdf2..0000000000 --- a/assets/aml-benchmark/components/batch_benchmark_inference/spec.yaml +++ /dev/null @@ -1,282 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -name: batch_benchmark_inference -display_name: Batch Benchmark Inference -description: Components for batch endpoint inference -version: 0.0.12 - -inputs: - input_dataset: - type: uri_folder - description: Input jsonl dataset that contains prompt. For the performance test, this one will be neglected. - optional: True - model_type: - type: string - description: Type of model's input and output contract. Can be one of ('oai', 'oss', 'vision_oss') - optional: False - enum: - - oai - - oss - - vision_oss - batch_input_pattern: - type: string - description: >- - The string for the batch input pattern. The input should be the payload format with substitution - for the key for the value put in the `###`. For example, one can use the following format for - a llama text-gen model with a input dataset has `prompt` for the payload - and `_batch_request_metadata` storing the corresponding ground truth. - { - "input_data": - { - "input_string": ["###"], - "parameters": - { - "temperature": 0.6, - "max_new_tokens": 100, - "do_sample": true - } - }, - "_batch_request_metadata": ###<_batch_request_metadata> - } - - For AOAI chat completion model, the following pattern can be used, - { - "messages": ###, - "temperature": 0.7, - "top_p": 0.95, - "frequency_penalty": 0, - "presence_penalty": 0, - "max_tokens": 800, - "stop": null - } - optional: False - endpoint_url: - type: string - optional: False - description: The URL of the endpoint. - is_performance_test: - type: boolean - default: False - description: If true, the performance test will be run and the input dataset will be neglected. - use_tiktoken: - type: boolean - default: False - description: If true, `cl100k_base` encoder is used from tiktoken to calculate token count; overrides any other token count calculation. - optional: True - authentication_type: - type: string - optional: False - description: Authentication type for endpoint- azureml_workspace_connection or managed_identity. - default: azureml_workspace_connection - enum: - - azureml_workspace_connection - - managed_identity - deployment_name: - type: string - optional: True - description: The deployment name. Only needed for managed OSS deployment. - connections_name: - type: string - optional: True - description: Connections name for the endpoint. Only required if authentication_type is "azureml_workspace_connection". - label_column_name: - type: string - optional: True - description: The label column name. - additional_columns: - type: string - optional: True - description: The name(s) for additional columns that could be helpful to calculate some metrics, separated by comma (","). - n_samples: - type: integer - description: The number of top samples send to endpoint. When performance test is enabled, this will be the number of repeated samples send to the endpoint. - optional: True - handle_response_failure: - type: string - optional: False - description: The way that the formatter handles the failed response. 'use_fallback' will replace them with fallback_value and 'neglect' will drop those rows. - enum: - - use_fallback - - neglect - default: use_fallback - fallback_value: - description: The fallback value that can be used when request payload failed. If not provided, the fallback value will be an empty string. - type: string - optional: True - min_endpoint_success_ratio: - type: number - description: The minimum value of (successful_requests / total_requests) required for classifying inference as successful. If (successful_requests / total_requests) < min_endpoint_success_ratio, the experiment will be marked as failed. By default it is 0. (0 means all requests are allowed to fail while 1 means no request should fail.) - min: 0 - max: 1 - default: 0 - optional: False - additional_headers: - type: string - optional: True - description: A stringified json expressing additional headers to be added to each request. - ensure_ascii: - type: boolean - optional: False - default: False - description: If ensure_ascii is true, the output is guaranteed to have all incoming non-ASCII characters escaped. If ensure_ascii is false, these characters will be output as-is. More detailed information can be found at https://docs.python.org/3/library/json.html - max_retry_time_interval: - type: integer - optional: True - description: The maximum time (in seconds) spent retrying a payload. If unspecified, payloads are retried unlimited times. - mini_batch_size: - type: string - optional: true - default: 100KB - description: The mini batch size for parallel run. - endpoint_config_file: - type: uri_file - optional: True - description: The endpoint config file. - initial_worker_count: - type: integer - optional: False - default: 5 - description: The initial number of workers to use for scoring. - max_worker_count: - type: integer - optional: False - default: 200 - description: Overrides initial_worker_count if necessary - instance_count: - type: integer - default: 1 - description: 'Number of nodes in a compute cluster we will run the train step on.' - max_concurrency_per_instance: - type: integer - default: 1 - description: Number of processes that will be run concurrently on any given node. This number should not be larger than 1/2 of the number of cores in an individual node in the specified cluster. - debug_mode: - type: boolean - optional: False - default: False - description: Enable debug mode will print all the debug logs in the score step. - app_insights_connection_string: - type: string - optional: True - description: Application insights connection string where the batch score component will log metrics and logs. -outputs: - predictions: - type: uri_file - description: The prediction data. - performance_metadata: - type: uri_file - description: The performance data. - ground_truth: - type: uri_file - description: The ground truth data that has a one-to-one mapping with the prediction data. - successful_requests: - type: uri_file - description: The successful requests. - failed_requests: - type: uri_file - description: The failed requests. - unsafe_content_blocked_requests: - type: uri_file - description: The unsafe requests that were blocked due to Responsible AI concerns. -jobs: - # Preparer - batch_inference_preparer: - type: command - component: azureml:batch_inference_preparer:0.0.13 - inputs: - input_dataset: ${{parent.inputs.input_dataset}} - model_type: ${{parent.inputs.model_type}} - batch_input_pattern: ${{parent.inputs.batch_input_pattern}} - is_performance_test: ${{parent.inputs.is_performance_test}} - n_samples: ${{parent.inputs.n_samples}} - endpoint_url: ${{parent.inputs.endpoint_url}} - label_column_name: ${{parent.inputs.label_column_name}} - additional_columns: ${{parent.inputs.additional_columns}} - outputs: - formatted_data: - type: mltable - ground_truth_metadata: - type: uri_folder - # Config generation - config_generation: - type: command - component: azureml:batch_benchmark_config_generator:0.0.7 - inputs: - scoring_url: ${{parent.inputs.endpoint_url}} - connection_name: ${{parent.inputs.connections_name}} - deployment_name: ${{parent.inputs.deployment_name}} - authentication_type: ${{parent.inputs.authentication_type}} - debug_mode: ${{parent.inputs.debug_mode}} - additional_headers: ${{parent.inputs.additional_headers}} - ensure_ascii: ${{parent.inputs.ensure_ascii}} - max_retry_time_interval: ${{parent.inputs.max_retry_time_interval}} - initial_worker_count: ${{parent.inputs.initial_worker_count}} - max_worker_count: ${{parent.inputs.max_worker_count}} - configuration_file: ${{parent.inputs.endpoint_config_file}} - model_type: ${{parent.inputs.model_type}} - app_insights_connection_string: ${{parent.inputs.app_insights_connection_string}} - outputs: - batch_score_config: - type: uri_file - path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.json - # Inference - endpoint_batch_score: - type: parallel - component: azureml://registries/azureml/components/batch_score_llm/versions/1.1.9 - inputs: - async_mode: False - data_input_table: ${{parent.jobs.batch_inference_preparer.outputs.formatted_data}} - configuration_file: ${{parent.jobs.config_generation.outputs.batch_score_config}} - outputs: - job_output_path: - type: uri_file - mini_batch_results_output_directory: - type: uri_folder - resources: - instance_count: ${{parent.inputs.instance_count}} - max_concurrency_per_instance: ${{parent.inputs.max_concurrency_per_instance}} - mini_batch_size: ${{parent.inputs.mini_batch_size}} - retry_settings: - timeout: 6000 - max_retries: 10 - environment_variables: - BATCH_SCORE_INITIAL_REQUEST_TIMEOUT: '180' - BATCH_SCORE_DELAY_AFTER_SUCCESSFUL_REQUEST: 'False' - BATCH_SCORE_MAX_REQUEST_TIMEOUT: '300' - - # Reformat - batch_output_formatter: - type: command - component: azureml:batch_output_formatter:0.0.13 - inputs: - model_type: ${{parent.inputs.model_type}} - batch_inference_output: ${{parent.jobs.endpoint_batch_score.outputs.mini_batch_results_output_directory}} - label_column_name: ${{parent.inputs.label_column_name}} - additional_columns: ${{parent.inputs.additional_columns}} - ground_truth_input: ${{parent.jobs.batch_inference_preparer.outputs.ground_truth_metadata}} - fallback_value: ${{parent.inputs.fallback_value}} - handle_response_failure: ${{parent.inputs.handle_response_failure}} - is_performance_test: ${{parent.inputs.is_performance_test}} - use_tiktoken: ${{parent.inputs.use_tiktoken}} - endpoint_url: ${{parent.inputs.endpoint_url}} - min_endpoint_success_ratio: ${{parent.inputs.min_endpoint_success_ratio}} - outputs: - predictions: - type: uri_file - path: ${{parent.outputs.predictions}} - performance_metadata: - type: uri_file - path: ${{parent.outputs.performance_metadata}} - ground_truth: - type: uri_file - path: ${{parent.outputs.ground_truth}} - successful_requests: - type: uri_file - path: ${{parent.outputs.successful_requests}} - failed_requests: - type: uri_file - path: ${{parent.outputs.failed_requests}} - unsafe_content_blocked_requests: - type: uri_file - path: ${{parent.outputs.unsafe_content_blocked_requests}} diff --git a/assets/aml-benchmark/components/batch_benchmark_inference_claude/asset.yaml b/assets/aml-benchmark/components/batch_benchmark_inference_claude/asset.yaml deleted file mode 100644 index fce95a94f7..0000000000 --- a/assets/aml-benchmark/components/batch_benchmark_inference_claude/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/batch_benchmark_inference_claude diff --git a/assets/aml-benchmark/components/batch_benchmark_inference_claude/spec.yaml b/assets/aml-benchmark/components/batch_benchmark_inference_claude/spec.yaml deleted file mode 100644 index 6312dc64b4..0000000000 --- a/assets/aml-benchmark/components/batch_benchmark_inference_claude/spec.yaml +++ /dev/null @@ -1,222 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -name: batch_benchmark_inference_claude -display_name: Batch Benchmark Inference with claude support -description: Components for batch endpoint inference -version: 0.0.2 - -inputs: - input_dataset: - type: uri_folder - description: Input jsonl dataset that contains prompt. For the performance test, this one will be neglected. - optional: True - model_type: - type: string - description: Type of model. Can be one of ('aoai', 'oss', 'vision_oss', 'claude') - optional: True - batch_input_pattern: - type: string - description: >- - The string for the batch input pattern. The input should be the payload format with substitution - for the key for the value put in the `###`. For example, one can use the following format for - a llama text-gen model with a input dataset has `prompt` for the payload - and `_batch_request_metadata` storing the corresponding ground truth. - { - "input_data": - { - "input_string": ["###"], - "parameters": - { - "temperature": 0.6, - "max_new_tokens": 100, - "do_sample": true - } - }, - "_batch_request_metadata": ###<_batch_request_metadata> - } - For AOAI model, the following pattern can be used, - { - "messages": - [ - {"role": "user", "content": "###" } ], - "temperature": 0.7, - "top_p": 0.95, - "frequency_penalty": 0, - "presence_penalty": 0, - "max_tokens": 800, - "stop": null - } - For Vision OSS, the input should be as follows - { - "image": "image1", - "text": "label1, label2, label3" - } - For Claude model, another pattern should be used - { - "prompt": "Prompt text \n\nHuman:\n### Question: Question text\n###Answer:\n\nAssistant:", - "prompt_length": 775, - "completion": "The correct answer" - } - optional: False - endpoint_url: - type: string - optional: False - description: The endpoint url. - is_performance_test: - type: boolean - default: False - description: If true, the performance test will be run and the input dataset will be neglected. - deployment_name: - type: string - optional: True - description: The deployment name. Only needed for managed OSS deployment. - connections_name: - type: string - optional: False - description: Connections name for the endpoint. - label_column_name: - type: string - optional: True - description: The label column name. - n_samples: - type: integer - description: The number of top samples send to endpoint. When performance test is enabled, this will be the number of repeated samples send to the endpoint. - optional: True - handle_response_failure: - type: string - optional: False - description: The way that the formatter handles the failed response. - enum: - - use_fallback - - neglect - default: use_fallback - fallback_value: - description: The fallback value that can be used when request payload failed. If not provided, the fallback value will be an empty string. - type: string - optional: True - additional_headers: - type: string - optional: True - description: A stringified json expressing additional headers to be added to each request. - ensure_ascii: - type: boolean - optional: False - default: False - description: If ensure_ascii is true, the output is guaranteed to have all incoming non-ASCII characters escaped. If ensure_ascii is false, these characters will be output as-is. More detailed information can be found at https://docs.python.org/3/library/json.html - max_retry_time_interval: - type: integer - optional: True - description: The maximum time (in seconds) spent retrying a payload. If unspecified, payloads are retried unlimited times. - mini_batch_size: - type: string - optional: true - default: 100KB - description: The mini batch size for parallel run. - initial_worker_count: - type: integer - optional: False - default: 5 - description: The initial number of workers to use for scoring. - max_worker_count: - type: integer - optional: False - default: 200 - description: Overrides initial_worker_count if necessary - instance_count: - type: integer - default: 1 - description: 'Number of nodes in a compute cluster we will run the train step on.' - max_concurrency_per_instance: - type: integer - default: 1 - description: Number of processes that will be run concurrently on any given node. This number should not be larger than 1/2 of the number of cores in an individual node in the specified cluster. - debug_mode: - type: boolean - optional: False - default: False - description: Enable debug mode will print all the debug logs in the score step. -outputs: - predictions: - type: uri_file - description: The prediction data. - performance_metadata: - type: uri_file - description: The performance data. - ground_truth: - type: uri_file - description: The ground truth data that has a one-to-one mapping with the prediction data. - -jobs: - # Preparer - batch_inference_preparer: - type: command - component: azureml:batch_inference_preparer:0.0.6 - inputs: - input_dataset: ${{parent.inputs.input_dataset}} - model_type: ${{parent.inputs.model_type}} - batch_input_pattern: ${{parent.inputs.batch_input_pattern}} - is_performance_test: ${{parent.inputs.is_performance_test}} - n_samples: ${{parent.inputs.n_samples}} - endpoint_url: ${{parent.inputs.endpoint_url}} - label_column_name: ${{parent.inputs.label_column_name}} - outputs: - formatted_data: - type: mltable - ground_truth_metadata: - type: uri_folder - # Inference - endpoint_batch_score: - type: parallel - component: azureml:batch_benchmark_score:0.0.5 - inputs: - model_type: ${{parent.inputs.model_type}} - online_endpoint_url: ${{parent.inputs.endpoint_url}} - deployment_name: ${{parent.inputs.deployment_name}} - connections_name: ${{parent.inputs.connections_name}} - debug_mode: ${{parent.inputs.debug_mode}} - additional_headers: ${{parent.inputs.additional_headers}} - ensure_ascii: ${{parent.inputs.ensure_ascii}} - max_retry_time_interval: ${{parent.inputs.max_retry_time_interval}} - initial_worker_count: ${{parent.inputs.initial_worker_count}} - max_worker_count: ${{parent.inputs.max_worker_count}} - data_input_table: ${{parent.jobs.batch_inference_preparer.outputs.formatted_data}} - mini_batch_size: ${{parent.inputs.mini_batch_size}} - outputs: - job_out_path: - type: uri_file - mini_batch_results_out_directory: - type: uri_folder - metrics_out_directory: - type: uri_folder - resources: - instance_count: ${{parent.inputs.instance_count}} - max_concurrency_per_instance: ${{parent.inputs.max_concurrency_per_instance}} - mini_batch_size: "100KB" - retry_settings: - timeout: 6000 - max_retries: 10 - # Reformat - batch_output_formatter: - type: command - component: azureml:batch_output_formatter:0.0.6 - inputs: - model_type: ${{parent.inputs.model_type}} - batch_inference_output: ${{parent.jobs.endpoint_batch_score.outputs.mini_batch_results_out_directory}} - label_column_name: ${{parent.inputs.label_column_name}} - ground_truth_input: ${{parent.jobs.batch_inference_preparer.outputs.ground_truth_metadata}} - fallback_value: ${{parent.inputs.fallback_value}} - handle_response_failure: ${{parent.inputs.handle_response_failure}} - is_performance_test: ${{parent.inputs.is_performance_test}} - endpoint_url: ${{parent.inputs.endpoint_url}} - outputs: - predictions: - type: uri_file - path: ${{parent.outputs.predictions}} - performance_metadata: - type: uri_file - path: ${{parent.outputs.performance_metadata}} - ground_truth: - type: uri_file - path: ${{parent.outputs.ground_truth}} - diff --git a/assets/aml-benchmark/components/batch_benchmark_inference_with_inference_compute/asset.yaml b/assets/aml-benchmark/components/batch_benchmark_inference_with_inference_compute/asset.yaml deleted file mode 100644 index dc855c4422..0000000000 --- a/assets/aml-benchmark/components/batch_benchmark_inference_with_inference_compute/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: false - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/batch_benchmark_inference diff --git a/assets/aml-benchmark/components/batch_benchmark_inference_with_inference_compute/spec.yaml b/assets/aml-benchmark/components/batch_benchmark_inference_with_inference_compute/spec.yaml deleted file mode 100644 index 564237e411..0000000000 --- a/assets/aml-benchmark/components/batch_benchmark_inference_with_inference_compute/spec.yaml +++ /dev/null @@ -1,287 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -name: batch_benchmark_inference_with_inference_compute -display_name: Batch Benchmark Inference (With Inference compute Support) -description: Components for batch endpoint inference with inference compute support. -version: 0.0.7 - -inputs: - input_dataset: - type: uri_folder - description: Input jsonl dataset that contains prompt. For the performance test, this one will be neglected. - optional: True - model_type: - type: string - description: Type of model's input and output contract. Can be one of ('oai', 'oss', 'vision_oss') - optional: False - enum: - - oai - - oss - - vision_oss - inference_compute: - type: string - description: Compute to be used for inferencing. - optional: False - batch_input_pattern: - type: string - description: >- - The string for the batch input pattern. The input should be the payload format with substitution - for the key for the value put in the `###`. For example, one can use the following format for - a llama text-gen model with a input dataset has `prompt` for the payload - and `_batch_request_metadata` storing the corresponding ground truth. - { - "input_data": - { - "input_string": ["###"], - "parameters": - { - "temperature": 0.6, - "max_new_tokens": 100, - "do_sample": true - } - }, - "_batch_request_metadata": ###<_batch_request_metadata> - } - - For AOAI chat completion model, the following pattern can be used, - { - "messages": ###, - "temperature": 0.7, - "top_p": 0.95, - "frequency_penalty": 0, - "presence_penalty": 0, - "max_tokens": 800, - "stop": null - } - optional: False - endpoint_url: - type: string - optional: False - description: The URL of the endpoint. - is_performance_test: - type: boolean - default: False - description: If true, the performance test will be run and the input dataset will be neglected. - use_tiktoken: - type: boolean - default: False - description: If true, `cl100k_base` encoder is used from tiktoken to calculate token count; overrides any other token count calculation. - optional: True - authentication_type: - type: string - optional: False - description: Authentication type for endpoint- azureml_workspace_connection or managed_identity. - default: azureml_workspace_connection - enum: - - azureml_workspace_connection - - managed_identity - deployment_name: - type: string - optional: True - description: The deployment name. Only needed for managed OSS deployment. - connections_name: - type: string - optional: True - description: Connections name for the endpoint. Only required if authentication_type is "azureml_workspace_connection". - label_column_name: - type: string - optional: True - description: The label column name. - additional_columns: - type: string - optional: True - description: The name(s) for additional columns that could be helpful to calculate some metrics, separated by comma (","). - n_samples: - type: integer - description: The number of top samples send to endpoint. When performance test is enabled, this will be the number of repeated samples send to the endpoint. - optional: True - handle_response_failure: - type: string - optional: False - description: The way that the formatter handles the failed response. 'use_fallback' will replace them with fallback_value and 'neglect' will drop those rows. - enum: - - use_fallback - - neglect - default: use_fallback - fallback_value: - description: The fallback value that can be used when request payload failed. If not provided, the fallback value will be an empty string. - type: string - optional: True - min_endpoint_success_ratio: - type: number - description: The minimum value of (successful_requests / total_requests) required for classifying inference as successful. If (successful_requests / total_requests) < min_endpoint_success_ratio, the experiment will be marked as failed. By default it is 0. (0 means all requests are allowed to fail while 1 means no request should fail.) - min: 0 - max: 1 - default: 0 - optional: False - additional_headers: - type: string - optional: True - description: A stringified json expressing additional headers to be added to each request. - ensure_ascii: - type: boolean - optional: False - default: False - description: If ensure_ascii is true, the output is guaranteed to have all incoming non-ASCII characters escaped. If ensure_ascii is false, these characters will be output as-is. More detailed information can be found at https://docs.python.org/3/library/json.html - max_retry_time_interval: - type: integer - optional: True - description: The maximum time (in seconds) spent retrying a payload. If unspecified, payloads are retried unlimited times. - mini_batch_size: - type: string - optional: true - default: 100KB - description: The mini batch size for parallel run. - endpoint_config_file: - type: uri_file - optional: True - description: The endpoint config file. - initial_worker_count: - type: integer - optional: False - default: 5 - description: The initial number of workers to use for scoring. - max_worker_count: - type: integer - optional: False - default: 200 - description: Overrides initial_worker_count if necessary - instance_count: - type: integer - default: 1 - description: 'Number of nodes in a compute cluster we will run the train step on.' - max_concurrency_per_instance: - type: integer - default: 1 - description: Number of processes that will be run concurrently on any given node. This number should not be larger than 1/2 of the number of cores in an individual node in the specified cluster. - debug_mode: - type: boolean - optional: False - default: False - description: Enable debug mode will print all the debug logs in the score step. - app_insights_connection_string: - type: string - optional: True - description: Application insights connection string where the batch score component will log metrics and logs. -outputs: - predictions: - type: uri_file - description: The prediction data. - performance_metadata: - type: uri_file - description: The performance data. - ground_truth: - type: uri_file - description: The ground truth data that has a one-to-one mapping with the prediction data. - successful_requests: - type: uri_file - description: The successful requests. - failed_requests: - type: uri_file - description: The failed requests. - unsafe_content_blocked_requests: - type: uri_file - description: The unsafe requests that were blocked due to Responsible AI concerns. -jobs: - # Preparer - batch_inference_preparer: - type: command - component: azureml:batch_inference_preparer:0.0.13 - inputs: - input_dataset: ${{parent.inputs.input_dataset}} - model_type: ${{parent.inputs.model_type}} - batch_input_pattern: ${{parent.inputs.batch_input_pattern}} - is_performance_test: ${{parent.inputs.is_performance_test}} - n_samples: ${{parent.inputs.n_samples}} - endpoint_url: ${{parent.inputs.endpoint_url}} - label_column_name: ${{parent.inputs.label_column_name}} - additional_columns: ${{parent.inputs.additional_columns}} - outputs: - formatted_data: - type: mltable - ground_truth_metadata: - type: uri_folder - # Config generation - config_generation: - type: command - component: azureml:batch_benchmark_config_generator:0.0.7 - inputs: - scoring_url: ${{parent.inputs.endpoint_url}} - connection_name: ${{parent.inputs.connections_name}} - deployment_name: ${{parent.inputs.deployment_name}} - authentication_type: ${{parent.inputs.authentication_type}} - debug_mode: ${{parent.inputs.debug_mode}} - additional_headers: ${{parent.inputs.additional_headers}} - ensure_ascii: ${{parent.inputs.ensure_ascii}} - max_retry_time_interval: ${{parent.inputs.max_retry_time_interval}} - initial_worker_count: ${{parent.inputs.initial_worker_count}} - max_worker_count: ${{parent.inputs.max_worker_count}} - configuration_file: ${{parent.inputs.endpoint_config_file}} - model_type: ${{parent.inputs.model_type}} - app_insights_connection_string: ${{parent.inputs.app_insights_connection_string}} - outputs: - batch_score_config: - type: uri_file - path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.json - # Inference - endpoint_batch_score: - type: parallel - component: azureml://registries/azureml/components/batch_score_llm/versions/1.1.9 - inputs: - async_mode: False - data_input_table: ${{parent.jobs.batch_inference_preparer.outputs.formatted_data}} - configuration_file: ${{parent.jobs.config_generation.outputs.batch_score_config}} - outputs: - job_output_path: - type: uri_file - mini_batch_results_output_directory: - type: uri_folder - resources: - instance_count: ${{parent.inputs.instance_count}} - max_concurrency_per_instance: ${{parent.inputs.max_concurrency_per_instance}} - mini_batch_size: ${{parent.inputs.mini_batch_size}} - retry_settings: - timeout: 6000 - max_retries: 10 - compute: ${{parent.inputs.inference_compute}} - environment_variables: - BATCH_SCORE_INITIAL_REQUEST_TIMEOUT: '180' - BATCH_SCORE_DELAY_AFTER_SUCCESSFUL_REQUEST: 'False' - BATCH_SCORE_MAX_REQUEST_TIMEOUT: '300' - - # Reformat - batch_output_formatter: - type: command - component: azureml:batch_output_formatter:0.0.13 - inputs: - model_type: ${{parent.inputs.model_type}} - batch_inference_output: ${{parent.jobs.endpoint_batch_score.outputs.mini_batch_results_output_directory}} - label_column_name: ${{parent.inputs.label_column_name}} - additional_columns: ${{parent.inputs.additional_columns}} - ground_truth_input: ${{parent.jobs.batch_inference_preparer.outputs.ground_truth_metadata}} - fallback_value: ${{parent.inputs.fallback_value}} - handle_response_failure: ${{parent.inputs.handle_response_failure}} - is_performance_test: ${{parent.inputs.is_performance_test}} - use_tiktoken: ${{parent.inputs.use_tiktoken}} - endpoint_url: ${{parent.inputs.endpoint_url}} - min_endpoint_success_ratio: ${{parent.inputs.min_endpoint_success_ratio}} - outputs: - predictions: - type: uri_file - path: ${{parent.outputs.predictions}} - performance_metadata: - type: uri_file - path: ${{parent.outputs.performance_metadata}} - ground_truth: - type: uri_file - path: ${{parent.outputs.ground_truth}} - successful_requests: - type: uri_file - path: ${{parent.outputs.successful_requests}} - failed_requests: - type: uri_file - path: ${{parent.outputs.failed_requests}} - unsafe_content_blocked_requests: - type: uri_file - path: ${{parent.outputs.unsafe_content_blocked_requests}} diff --git a/assets/aml-benchmark/components/batch_benchmark_score/asset.yaml b/assets/aml-benchmark/components/batch_benchmark_score/asset.yaml deleted file mode 100644 index dc855c4422..0000000000 --- a/assets/aml-benchmark/components/batch_benchmark_score/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: false - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/batch_benchmark_inference diff --git a/assets/aml-benchmark/components/batch_benchmark_score/spec.yaml b/assets/aml-benchmark/components/batch_benchmark_score/spec.yaml deleted file mode 100644 index 8985d3c3e2..0000000000 --- a/assets/aml-benchmark/components/batch_benchmark_score/spec.yaml +++ /dev/null @@ -1,94 +0,0 @@ -$schema: http://azureml/sdk-2-0/ParallelComponent.json -name: batch_benchmark_score -version: 0.0.6 -display_name: Batch Benchmark Score -is_deterministic: False -type: parallel -inputs: - data_input_table: - type: mltable - optional: False - description: The data to be split and scored in parallel. - model_type: - type: string - description: Type of model. Can be one of ('aoai', 'oss', 'vision_oss', 'claude') - optional: True - online_endpoint_url: - type: string - optional: False - description: Online endpoint url. - deployment_name: - type: string - optional: True - description: Deployment name for the model. Can be optional for OAI models. - connections_name: - type: string - optional: False - description: Connections name for the endpoint. - additional_properties: - type: string - optional: True - description: A stringified json expressing additional properties to be added to each request body at the top level. - additional_headers: - type: string - optional: True - description: A stringified json expressing additional headers to be added to each request. - mini_batch_size: - type: string - optional: True - description: The mini batch size for parallel run. - user_agent_segment: - type: string - optional: True - description: A user-provided segment to be included in the User-Agent header for all requests sent by the component. The overall user agent format is "BatchScoreComponent:/Workload:::/Run::" - ensure_ascii: - type: boolean - optional: False - default: False - description: If ensure_ascii is true, the output is guaranteed to have all incoming non-ASCII characters escaped. If ensure_ascii is false, these characters will be output as-is. More defailted information can be found at https://docs.python.org/3/library/json.html - max_retry_time_interval: - type: integer - optional: True - description: The maximum time (in seconds) spent retrying a payload. If unspecified, payloads are retried unlimited times. - initial_worker_count: - type: integer - optional: False - default: 5 - max_worker_count: - type: integer - optional: False - default: 200 - description: Overrides initial_worker_count if necessary - debug_mode: - type: boolean - optional: False - default: False - description: Enable debug mode will print all the debug logs in the score step. - deployment_metadata: - type: uri_file - optional: True - description: The deployment metadata directory that contains deployment details. -outputs: - job_out_path: - type: uri_file - mini_batch_results_out_directory: - type: uri_folder - metrics_out_directory: - type: uri_folder -task: - code: ../src - environment: azureml://registries/azureml/environments/model-evaluation/versions/20 - program_arguments: --append_row_safe_output True --debug_mode ${{inputs.debug_mode}} $[[--model_type ${{inputs.model_type}}]] --online_endpoint_url ${{inputs.online_endpoint_url}} $[[--additional_properties ${{inputs.additional_properties}}]] $[[--additional_headers ${{inputs.additional_headers}}]] $[[--user_agent_segment ${{inputs.user_agent_segment}}]] --metrics_out_directory ${{outputs.metrics_out_directory}} --tally_failed_requests False --tally_exclusions none --run_type parallel --segment_large_requests disabled --segment_max_token_size 600 --ensure_ascii ${{inputs.ensure_ascii}} --output_behavior append_row --initial_worker_count ${{inputs.initial_worker_count}} --max_worker_count ${{inputs.max_worker_count}} $[[--max_retry_time_interval ${{inputs.max_retry_time_interval}}]] --save_mini_batch_results enabled --mini_batch_results_out_directory ${{outputs.mini_batch_results_out_directory}} --connections_name ${{inputs.connections_name}} $[[--deployment_name ${{inputs.deployment_name}}]] $[[--input_metadata ${{inputs.deployment_metadata}}]] $[[--mini_batch_size ${{inputs.mini_batch_size}}]] - entry_script: aml_benchmark.batch_benchmark_score.batch_score.main - type: run_function - append_row_to: ${{outputs.job_out_path}} -input_data: ${{inputs.data_input_table}} -error_threshold: -1 -logging_level: DEBUG -max_concurrency_per_instance: 1 -mini_batch_error_threshold: 5 -mini_batch_size: '3072' -retry_settings: - max_retries: 10 - timeout: 6000 -... \ No newline at end of file diff --git a/assets/aml-benchmark/components/batch_inference_preparer/asset.yaml b/assets/aml-benchmark/components/batch_inference_preparer/asset.yaml deleted file mode 100644 index 2238bb87fc..0000000000 --- a/assets/aml-benchmark/components/batch_inference_preparer/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/batch_inference_preparer diff --git a/assets/aml-benchmark/components/batch_inference_preparer/spec.yaml b/assets/aml-benchmark/components/batch_inference_preparer/spec.yaml deleted file mode 100644 index 1e7e7e8d49..0000000000 --- a/assets/aml-benchmark/components/batch_inference_preparer/spec.yaml +++ /dev/null @@ -1,80 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -name: batch_inference_preparer -display_name: Batch Inference Preparer -description: Prepare the jsonl file and endpoint for batch inference component. -version: 0.0.14 -is_deterministic: true - -inputs: - input_dataset: - type: uri_folder - description: Input jsonl dataset that contains prompt. For the performance test, this one will be neglected. - optional: True - model_type: - type: string - description: Type of model. Can be one of ('aoai', 'oss', 'vision_oss', 'claude') - optional: True - batch_input_pattern: - type: string - description: >- - The string for the batch input pattern. The input should be the payload format with substitution - for the key for the value put in the `###`. For example, one can use the following format for - a llama text-gen model with a input dataset has `prompt` for the payload - and `_batch_request_metadata` storing the corresponding ground truth. - {"input_data": - { - "input_string": ["###"], - "parameters": - { - "temperature": 0.6, - "max_new_tokens": 100, - "do_sample": true - } - }, - "_batch_request_metadata": ###<_batch_request_metadata> - } - optional: False - label_column_name: - type: string - optional: True - description: The label column name. - additional_columns: - type: string - optional: True - description: Name(s) of additional column(s) that could be useful to compute metrics, separated by comma (","). - is_performance_test: - type: boolean - default: False - description: If true, the performance test will be run. - endpoint_url: - type: string - optional: True - description: The endpoint name or url. - n_samples: - type: integer - description: The number of top samples send to endpoint. - optional: True -outputs: - formatted_data: - type: mltable - description: Path to the folder where the payload will be stored. - ground_truth_metadata: - type: uri_folder - description: Path to the folder where the ground truth metadata will be stored. - -code: ../src -environment: azureml://registries/azureml/environments/evaluation/labels/latest -command: >- - python -m aml_benchmark.batch_inference_preparer.main - --batch_input_pattern '${{inputs.batch_input_pattern}}' - --formatted_data ${{outputs.formatted_data}} - --output_metadata ${{outputs.ground_truth_metadata}} - --is_performance_test ${{inputs.is_performance_test}} - $[[--model_type ${{inputs.model_type}}]] - $[[--input_dataset ${{inputs.input_dataset}}]] - $[[--n_samples ${{inputs.n_samples}}]] - $[[--endpoint_url ${{inputs.endpoint_url}}]] - $[[--label_key ${{inputs.label_column_name}}]] - $[[--additional_columns ${{inputs.additional_columns}}]] diff --git a/assets/aml-benchmark/components/batch_output_formatter/asset.yaml b/assets/aml-benchmark/components/batch_output_formatter/asset.yaml deleted file mode 100644 index dd15110bee..0000000000 --- a/assets/aml-benchmark/components/batch_output_formatter/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/batch_output_formatter diff --git a/assets/aml-benchmark/components/batch_output_formatter/spec.yaml b/assets/aml-benchmark/components/batch_output_formatter/spec.yaml deleted file mode 100644 index 881d99112b..0000000000 --- a/assets/aml-benchmark/components/batch_output_formatter/spec.yaml +++ /dev/null @@ -1,97 +0,0 @@ -name: batch_output_formatter -version: 0.0.15 -display_name: Batch Output Formatter -is_deterministic: True -type: command -description: Output Formatter for batch inference output -inputs: - model_type: - type: string - description: Type of model. Can be one of ('oai', 'oss', 'vision_oss', 'claude') - optional: True - batch_inference_output: - type: uri_folder - description: The raw batch inference output. - optional: False - label_column_name: - type: string - optional: True - description: The label column name. - additional_columns: - type: string - optional: True - description: Name(s) of additional column(s) that could be useful to compute metrics, separated by comma (","). - endpoint_url: - type: string - optional: True - ground_truth_input: - type: uri_folder - description: The raw batch inference output. - optional: True - handle_response_failure: - type: string - optional: False - description: The way that the formatter handles the failed response. - enum: - - use_fallback - - neglect - default: use_fallback - fallback_value: - description: The fallback value that can be used when request payload failed. - type: string - optional: True - min_endpoint_success_ratio: - description: The minimum value of (successful_requests / total_requests) required for classifying inference as successful. If (successful_requests / total_requests) < min_endpoint_success_ratio, the experiment will be marked as failed. By default it is 0. (0 means all requests are allowed to fail while 1 means no request should fail.) - type: number - min: 0 - max: 1 - default: 0 - optional: False - is_performance_test: - type: boolean - default: False - description: If true, the performance test will be run. - optional: False - use_tiktoken: - type: boolean - default: False - description: If true, `cl100k_base` encoder is used from tiktoken to calculate token count; overrides any other token count calculation. - optional: True -outputs: - predictions: - type: uri_file - performance_metadata: - type: uri_file - ground_truth: - type: uri_file - successful_requests: - type: uri_file - failed_requests: - type: uri_file - unsafe_content_blocked_requests: - type: uri_file -code: ../src -environment: azureml://registries/azureml/environments/evaluation/labels/latest - -resources: - instance_count: 1 - -command: >- - python -m aml_benchmark.batch_output_formatter.main - $[[--model_type ${{inputs.model_type}}]] - --batch_inference_output ${{inputs.batch_inference_output}} - --prediction_data ${{outputs.predictions}} - --min_endpoint_success_ratio ${{inputs.min_endpoint_success_ratio}} - --perf_data ${{outputs.performance_metadata}} - --successful_requests_data ${{outputs.successful_requests}} - --failed_requests_data ${{outputs.failed_requests}} - --blocked_requests_data ${{outputs.unsafe_content_blocked_requests}} - --predict_ground_truth_data ${{outputs.ground_truth}} - $[[--endpoint_url ${{inputs.endpoint_url}}]] - $[[--label_key ${{inputs.label_column_name}}]] - $[[--additional_columns ${{inputs.additional_columns}}]] - --handle_response_failure ${{inputs.handle_response_failure}} - --is_performance_test ${{inputs.is_performance_test}} - $[[--use_tiktoken ${{inputs.use_tiktoken}}]] - $[[--fallback_value ${{inputs.fallback_value}}]] - $[[--ground_truth_input ${{inputs.ground_truth_input}}]] diff --git a/assets/aml-benchmark/components/batch_resource_manager/asset.yaml b/assets/aml-benchmark/components/batch_resource_manager/asset.yaml deleted file mode 100644 index 32297283d8..0000000000 --- a/assets/aml-benchmark/components/batch_resource_manager/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: false - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/batch_resource_manager diff --git a/assets/aml-benchmark/components/batch_resource_manager/spec.yaml b/assets/aml-benchmark/components/batch_resource_manager/spec.yaml deleted file mode 100644 index 6a80526527..0000000000 --- a/assets/aml-benchmark/components/batch_resource_manager/spec.yaml +++ /dev/null @@ -1,175 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command -is_deterministic: False -name: batch_resource_manager -display_name: Batch Inference Resource Manager -description: Resource Manager for batch inference. -version: 0.0.8 - -inputs: - wait_input: - type: uri_file - mode: direct - optional: True - endpoint_name: - type: string - optional: True - description: The endpoint name or url. If not provided, the endpoint will be created. - deployment_name: - type: string - optional: True - description: The deployment name. If not provided, the deployment will be created. - model: - type: string - optional: True - description: Model name or model asset path. Only needed for managed deployment. - model_type: - type: string - optional: True - enum: - - oss - - oai - description: Model name or model asset path. Only needed for managed deployment. - model_version: - type: string - optional: True - description: Model version. If model asset path provided, this one can be optional. - is_finetuned_model: - type: boolean - optional: True - default: False - description: If the model is a finetuned model. - finetuned_subscription_id: - type: string - optional: True - description: The subscription id for the finetuned model. - finetuned_resource_group: - type: string - optional: True - description: The resource group for the finetuned model. - finetuned_workspace: - type: string - optional: True - description: The workspace name for the finetuned model. - deployment_sku: - type: string - optional: True - description: The sku for the deployment. Only needed for managed deployment. - deployment_env: - type: string - optional: True - description: The env for the deployment. - endpoint_workspace: - type: string - optional: True - description: The workspace name for the endpoint. If not provided, the same workspace of the run will be used. - endpoint_resource_group: - type: string - optional: True - description: The resource group name for the endpoint. If not provided, the same resource group of the run will be used. - endpoint_subscription_id: - type: string - optional: True - description: The subscription id for the endpoint. If not provided, the same subscription id of the run will be used. - endpoint_location: - type: string - optional: True - description: The location for the endpoint. Only required for the AOAI endpoint. If not provided, the same location of the run will be used. - connections_name: - type: string - description: The connections name. - optional: True - deletion_model: - type: boolean - optional: True - default: True - description: If true, the managed resources created during the run will be deleted. - do_quota_validation: - type: boolean - optional: True - default: True - description: If doing quota valiation or not for AOAI model. - redeploy_model: - type: boolean - optional: True - default: False - description: If deploying the same model for OSS endpoint. - use_max_quota: - type: boolean - optional: True - default: True - description: If using max quota or not for AOAI model. - deployment_metadata: - type: uri_file - optional: True - description: The deployment metadata directory that contains deployment details. - finetuned_model_metadata: - type: uri_file - optional: True - description: The finetuned_model metadata contains finetuned model id. - additional_deployment_env_vars: - type: string - optional: True - description: additional deployment env vars - delete_managed_deployment: - type: boolean - optional: True - default: True - description: If deleting the managed deployment. - deployment_retries: - type: integer - optional: True - default: 1 - description: The number of retries for deployment. - deployment_retry_interval_seconds: - type: integer - optional: True - default: 60 - description: The interval seconds for deployment retry. - wait_finetuned_step: - type: boolean - optional: True - default: False - description: If waiting the finetuned step to finish. - finetuned_step_name: - type: string - optional: True - description: The finetuned step name. -outputs: - output_metadata: - type: uri_file - description: Path to the folder where the deployment metadata will be stored. - -code: ../src -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest -command: >- - python -m aml_benchmark.batch_resource_manager.main - --output_metadata ${{outputs.output_metadata}} - $[[--endpoint_name ${{inputs.endpoint_name}}]] - $[[--model_type ${{inputs.model_type}}]] - $[[--deployment_name ${{inputs.deployment_name}}]] - $[[--model ${{inputs.model}}]] - $[[--model_version ${{inputs.model_version}}]] - $[[--deployment_sku ${{inputs.deployment_sku}}]] - $[[--endpoint_workspace ${{inputs.endpoint_workspace}}]] - $[[--endpoint_resource_group ${{inputs.endpoint_resource_group}}]] - $[[--endpoint_subscription_id '${{inputs.endpoint_subscription_id}}']] - $[[--endpoint_location '${{inputs.endpoint_location}}']] - $[[--connections_name ${{inputs.connections_name}}]] - $[[--deletion_model ${{inputs.deletion_model}}]] - $[[--delete_managed_deployment ${{inputs.delete_managed_deployment}}]] - $[[--use_max_quota ${{inputs.use_max_quota}}]] - $[[--do_quota_validation ${{inputs.do_quota_validation}}]] - $[[--redeploy_model ${{inputs.redeploy_model}}]] - $[[--deployment_metadata ${{inputs.deployment_metadata}}]] - $[[--deployment_env ${{inputs.deployment_env}}]] - $[[--additional_deployment_env_vars '${{inputs.additional_deployment_env_vars}}']] - $[[--is_finetuned_model ${{inputs.is_finetuned_model}}]] - $[[--finetuned_subscription_id ${{inputs.finetuned_subscription_id}}]] - $[[--finetuned_resource_group ${{inputs.finetuned_resource_group}}]] - $[[--finetuned_workspace ${{inputs.finetuned_workspace}}]] - $[[--deployment_retries ${{inputs.deployment_retries}}]] - $[[--deployment_retry_interval_seconds ${{inputs.deployment_retry_interval_seconds}}]] - $[[--wait_finetuned_step ${{inputs.wait_finetuned_step}}]] - $[[--finetuned_step_name ${{inputs.finetuned_step_name}}]] - $[[--finetuned_model_metadata ${{inputs.finetuned_model_metadata}}]] diff --git a/assets/aml-benchmark/components/benchmark_result_aggregator/asset.yaml b/assets/aml-benchmark/components/benchmark_result_aggregator/asset.yaml deleted file mode 100644 index d429452ba9..0000000000 --- a/assets/aml-benchmark/components/benchmark_result_aggregator/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/benchmark_result_aggregator \ No newline at end of file diff --git a/assets/aml-benchmark/components/benchmark_result_aggregator/spec.yaml b/assets/aml-benchmark/components/benchmark_result_aggregator/spec.yaml deleted file mode 100644 index ec372ff4c2..0000000000 --- a/assets/aml-benchmark/components/benchmark_result_aggregator/spec.yaml +++ /dev/null @@ -1,31 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -name: benchmark_result_aggregator -display_name: Benchmark result aggregator -description: Aggregate quality metrics, performance metrics and all of the metadata from the pipeline. Also add them to the root run. -version: 0.0.14 -is_deterministic: false - -inputs: - quality_metrics: - type: uri_folder - description: The quality metrics in json format. - optional: True - performance_metrics: - type: uri_folder - description: The performance metrics in json format. - optional: True - -outputs: - benchmark_result: - type: uri_file - description: The json file with all of the aggregated results. - -code: ../src -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest -command: >- - python -m aml_benchmark.benchmark_result_aggregator.main - $[[--quality_metrics_path ${{inputs.quality_metrics}}]] - $[[--performance_metrics_path ${{inputs.performance_metrics}}]] - --output_dataset_path ${{outputs.benchmark_result}} diff --git a/assets/aml-benchmark/components/compute_performance_metrics/asset.yaml b/assets/aml-benchmark/components/compute_performance_metrics/asset.yaml deleted file mode 100644 index ded20fb763..0000000000 --- a/assets/aml-benchmark/components/compute_performance_metrics/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/compute_performance_metrics \ No newline at end of file diff --git a/assets/aml-benchmark/components/compute_performance_metrics/spec.yaml b/assets/aml-benchmark/components/compute_performance_metrics/spec.yaml deleted file mode 100644 index 8016d3dc2b..0000000000 --- a/assets/aml-benchmark/components/compute_performance_metrics/spec.yaml +++ /dev/null @@ -1,73 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -name: compute_performance_metrics -display_name: Compute Performance Metrics -description: Performs performance metric post processing using data from a model inference run. -version: 0.0.11 -is_deterministic: true - -inputs: - performance_data: - type: uri_folder - description: Data outputted by model inferencing that contains performance data. - optional: False - percentiles: - type: string - description: Comma-separated list of percentiles of latency to be calculated. - optional: True - default: "50,90,99" - batch_size_column_name: - type: string - description: The name of the column that contains the batch size information. Ex. "batch_size" - optional: False - start_time_column_name: - type: string - description: The name of the column that contains the start timestamp in ISO 8601 format. Ex. "start_time_iso" - optional: False - end_time_column_name: - type: string - description: The name of the column that contains the end timestamp in ISO 8601 format. Ex. "end_time_iso" - optional: False - input_token_count_column_name: - type: string - description: The name of the column that contains the input token count information. Ex. "input_token_count" - optional: True - output_token_count_column_name: - type: string - description: The name of the column that contains the output token count information. Ex. "output_token_count" - optional: True - input_char_count_column_name: - type: string - description: The name of the column that contains the input character count information. Ex. "input_char_count" - optional: True - output_char_count_column_name: - type: string - description: The name of the column that contains the output character count information. Ex. "output_char_count" - optional: True - is_batch_inference_result: - type: boolean - description: If True, we will use the time between the first and last request to calculate the tokens per second and request per second. If False, we will use individual request time to calculate the tokens per second and request per second. - optional: False - default: True - -outputs: - performance_result: - type: uri_file - description: Path to the file where the calculated performance metric results will be stored. - -code: ../src -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest -command: >- - python -m aml_benchmark.compute_performance_metrics.main - --performance_data ${{inputs.performance_data}} - --batch_size_column_name ${{inputs.batch_size_column_name}} - --start_time_column_name ${{inputs.start_time_column_name}} - --end_time_column_name ${{inputs.end_time_column_name}} - --performance_result ${{outputs.performance_result}} - --is_batch_inference_result ${{inputs.is_batch_inference_result}} - $[[--percentiles ${{inputs.percentiles}}]] - $[[--input_token_count_column_name ${{inputs.input_token_count_column_name}}]] - $[[--output_token_count_column_name ${{inputs.output_token_count_column_name}}]] - $[[--input_char_count_column_name ${{inputs.input_char_count_column_name}}]] - $[[--output_char_count_column_name ${{inputs.output_char_count_column_name}}]] \ No newline at end of file diff --git a/assets/aml-benchmark/components/dataset_downloader/asset.yaml b/assets/aml-benchmark/components/dataset_downloader/asset.yaml deleted file mode 100644 index f1335188b2..0000000000 --- a/assets/aml-benchmark/components/dataset_downloader/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/dataset_downloader \ No newline at end of file diff --git a/assets/aml-benchmark/components/dataset_downloader/spec.yaml b/assets/aml-benchmark/components/dataset_downloader/spec.yaml deleted file mode 100644 index 1fb4e11435..0000000000 --- a/assets/aml-benchmark/components/dataset_downloader/spec.yaml +++ /dev/null @@ -1,44 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -name: dataset_downloader -display_name: Dataset Downloader -description: Downloads the dataset onto blob store. -version: 0.0.11 - -inputs: - dataset_name: - type: string - description: Name of the dataset to download from HuggingFace; must be null if script is specified. - optional: True - configuration: - type: string - description: >- - If a specific sub-dataset of the dataset to download, specify the configuration name; specify 'all' to download all - configurations; specify comma-separated values to download multiple configurations (Ex: config1,config2). Else, leave it null. - optional: True - split: - type: string - description: If a specific split of the dataset to download, specify the split name; specify 'all' to download all splits. - optional: False - script_path: - type: uri_file - description: >- - Path to the dataset loading script. Must follow the HuggingFace dataset loading script template. - For example, please refer https://github.com/Azure/azureml-assets/tree/main/assets/aml-benchmark/scripts/data_loaders. - optional: True - -outputs: - output_dataset: - type: uri_folder - description: Path to the directory where the dataset will be downloaded. - -code: ../src -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest -command: >- - python -m aml_benchmark.dataset_downloader.main - $[[--dataset_name ${{inputs.dataset_name}}]] - $[[--configuration ${{inputs.configuration}}]] - --split ${{inputs.split}} - $[[--script ${{inputs.script_path}}]] - --output_dataset ${{outputs.output_dataset}} \ No newline at end of file diff --git a/assets/aml-benchmark/components/dataset_preprocessor/asset.yaml b/assets/aml-benchmark/components/dataset_preprocessor/asset.yaml deleted file mode 100644 index 4ce6dad748..0000000000 --- a/assets/aml-benchmark/components/dataset_preprocessor/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/dataset_preprocessor \ No newline at end of file diff --git a/assets/aml-benchmark/components/dataset_preprocessor/spec.yaml b/assets/aml-benchmark/components/dataset_preprocessor/spec.yaml deleted file mode 100644 index 99e8c947e9..0000000000 --- a/assets/aml-benchmark/components/dataset_preprocessor/spec.yaml +++ /dev/null @@ -1,64 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -name: dataset_preprocessor -display_name: Dataset Preprocessor -description: Dataset Preprocessor -version: 0.0.11 -is_deterministic: true - -inputs: - dataset: - type: uri_file - description: | - Path to load the dataset. - optional: False - template_input: - type: string - description: | - JSON serialized dictionary to perform preprocessing on the dataset. - Must contain key-value pair where key is the name of the column enclosed in " " and associated dict value is - presented using jinja template logic - which will be used to extract respective value from the dataset. - Example format: - {"": {{key in the json file for this column}}, ....}. The processed output will be dumped to a - jsonl file in this format: {"": "", ....}. - optional: True - script_path: - type: uri_file - description: | - Path to the custom preprocessor python script provided by user. If both this input and template_input` are provided, - then, `template_input` is ignored. - This [base template] - (https://github.com/Azure/azureml-assets/tree/main/assets/aml-benchmark/scripts/custom_dataset_preprocessors/base_preprocessor_template.py) - should be used to create a custom preprocessor script. - optional: True - encoder_config: - type: string - description: | - JSON serialized dictionary to perform mapping. Must contain key-value pair "column_name": "" whose - value needs mapping, followed by key-value pairs containing idtolabel or labeltoid mappers. - Example format: - {"column_name":"label", "0":"NEUTRAL", "1":"ENTAILMENT", "2":"CONTRADICTION"} - optional: True - -outputs: - output_dataset: - type: uri_file - description: | - Path to the output the processed .jsonl file. - -code: ../src - -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest - -command: >- - python -m aml_benchmark.dataset_preprocessor.main - --dataset ${{inputs.dataset}} - $[[--template_input '${{inputs.template_input}}']] - $[[--script_path ${{inputs.script_path}}]] - $[[--encoder_config '${{inputs.encoder_config}}']] - --output_dataset ${{outputs.output_dataset}} - -resources: - instance_count: 1 \ No newline at end of file diff --git a/assets/aml-benchmark/components/dataset_sampler/asset.yaml b/assets/aml-benchmark/components/dataset_sampler/asset.yaml deleted file mode 100644 index 36afecc973..0000000000 --- a/assets/aml-benchmark/components/dataset_sampler/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/dataset_sampler \ No newline at end of file diff --git a/assets/aml-benchmark/components/dataset_sampler/spec.yaml b/assets/aml-benchmark/components/dataset_sampler/spec.yaml deleted file mode 100644 index aa9ddf75f7..0000000000 --- a/assets/aml-benchmark/components/dataset_sampler/spec.yaml +++ /dev/null @@ -1,58 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -name: dataset_sampler -display_name: Dataset Sampler -description: Samples a dataset containing JSONL file(s). -version: 0.0.11 - -inputs: - dataset: - type: uri_folder - description: Path to the input directory or .jsonl file from which the data will be sampled. - optional: False - sampling_style: - type: string - optional: False - default: head - description: >- - The sampling method to use. Use `head` to sample from beginning of the file, `tail` to sample from the end - of the file, `random` to sample randomly and `duplicate` to append the input file to itself until the correct - output size is reached. - enum: - - random - - head - - tail - - duplicate - sampling_ratio: - type: number - min: 0 - optional: True - description: >- - Portion of the dataset to be sampled. If `sampling style` is not `duplicate`, must be a float in (0,1]; must be null if - `n_samples` is specified. NOTE: If the `sampling_style` is `duplicate`, the component will duplicate the data in a "round robin" - fashion, going over the input several times. This operation is very slow! So be cautious when using for large datasets. - n_samples: - type: integer - optional: True - description: Absolute number of samples to be taken (alternative to `sampling_ratio`); must be null if `sampling_ratio` is specified. - random_seed: - type: integer - optional: True - description: Random seed for sampling mode; if not specified, 0 is used. Used only when `sampling_style` is `random`. - -outputs: - output_dataset: - type: uri_file - description: Path to the jsonl file where the sampled dataset will be saved. - -code: ../src -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest -command: >- - python -m aml_benchmark.dataset_sampler.main - --dataset ${{inputs.dataset}} - --sampling_style ${{inputs.sampling_style}} - $[[--sampling_ratio ${{inputs.sampling_ratio}}]] - $[[--n_samples ${{inputs.n_samples}}]] - $[[--random_seed ${{inputs.random_seed}}]] - --output_dataset ${{outputs.output_dataset}} \ No newline at end of file diff --git a/assets/aml-benchmark/components/inference_postprocessor/asset.yaml b/assets/aml-benchmark/components/inference_postprocessor/asset.yaml deleted file mode 100644 index 6a45f0ed4d..0000000000 --- a/assets/aml-benchmark/components/inference_postprocessor/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/inference_postprocessor \ No newline at end of file diff --git a/assets/aml-benchmark/components/inference_postprocessor/spec.yaml b/assets/aml-benchmark/components/inference_postprocessor/spec.yaml deleted file mode 100644 index 631cd7f65f..0000000000 --- a/assets/aml-benchmark/components/inference_postprocessor/spec.yaml +++ /dev/null @@ -1,151 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -name: inference_postprocessor -display_name: Inference Postprocessor -description: Inference Postprocessor -version: 0.0.12 -is_deterministic: true - -inputs: - prediction_dataset: - type: uri_file - description: >- - A file that contains predicted values - optional: False - prediction_column_name: - type: string - description: >- - Key in prediction dataset that contains predictions. - optional: False - ground_truth_dataset: - type: uri_file - description: >- - A file that contains the ground truth - optional: True - ground_truth_column_name: - type: string - description: >- - Key in ground truth dataset that contains ground truth. If ground_truth_dataset is given, then, - this is required input. - optional: True - additional_columns: - type: string - description: >- - Name(s) of additional columns that could be helpful for computing some metrics, separated by comma (","). - optional: True - remove_prefixes: - type: string - description: >- - A set of string prefixes separated by comma list of string prefixes to be removed from the inference - results in sequence. The prefixes should be separated by a comma. - Example: for the inference string - - "###>>>Hello world." - and prefixes - - "###,>>>" - will output "Hello world". - optional: True - separator: - type: string - description: >- - The separator used in few_shot patterns. One common example is "###". - If provided, response will be split on this separator, and only the first part will be used. - Example: "This is the first part ### This is the second part" will result in - "This is the first part". - optional: True - find_first: - type: string - description: >- - A list of strings to search for in the inference results. The first occurrence of each string will \ - be extracted and the occurrence with minimum index will be returned. - Must provide a comma-separated list of strings. - Example: - >>> find_first = "positive,negative" - >>> completion = "This is a positive example, not negative" - # Output: "positive" - optional: True - extract_number: - type: string - description: > - If the inference results contain a number, this can be used to extract the first or last number in the inference results. - The number will be extracted as a string. - Example: - >>> extract_number = "first" - >>> prediction = "Adding 0.3 to 1,000 gives 1,000.3" - # Output: "0.3" - Example: - >>> extract_number = "last" - >>> prediction = "Adding 0.3 to 1,000 gives 1,000.3" - # Output: "1000.3" - optional: True - enum: - - first - - last - regex_expr: - type: string - description: >- - A regular expression to extract the answer from the inference results. The pattern - must contain a group to be extracted. The first group and the first match will be used. - Example: "\n\nThe answer is: (\d)." - optional: True - strip_characters: - type: string - description: >- - A set of characters to remove from the beginning or end of the extracted answer.It is applied in the very end - of the extraction process. - optional: True - label_map: - type: string - description: | - JSON serialized dictionary to perform mapping. Must contain key-value pair "column_name": "" whose - value needs mapping, followed by key-value pairs containing idtolabel or labeltoid mappers. - Example format: - {"column_name":"label", "0":"NEUTRAL", "1":"ENTAILMENT", "2":"CONTRADICTION"} - optional: True - template: - type: string - description: >- - Jinja template containing logic to extract prediction. In case of multiple predictions, logic must be written in a written in - format so that it outputs a list of formatted predictions. - Example: - >>> prediction = ["The answer is phone.", "The answer is cellular."] - The provided jinja template logic should be able extract and output in this format: - # Output : ["phone", "cellular"] - optional: True - script_path: - type: uri_file - description: >- - Path to the custom postprocessor python script to extract prediction. - This [base template] - (https://github.com/Azure/azureml-assets/tree/main/assets/aml-benchmark/scripts/custom_inference_postprocessors/base_postprocessor_template.py) - tshould be used to create a custom postprocessor script. - optional: True - - -outputs: - output_dataset_result: - type: uri_file - description: >- - Path to the output the post processed result in .jsonl file. - -code: ../src - -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest - -command: >- - python -m aml_benchmark.inference_postprocessor.main - --prediction_dataset ${{inputs.prediction_dataset}} - --prediction_column_name ${{inputs.prediction_column_name}} - $[[--ground_truth_dataset ${{inputs.ground_truth_dataset}}]] - $[[--ground_truth_column_name ${{inputs.ground_truth_column_name}}]] - $[[--additional_columns ${{inputs.additional_columns}}]] - $[[--separator '${{inputs.separator}}']] - $[[--find_first '${{inputs.find_first}}']] - $[[--regex_expr '${{inputs.regex_expr}}']] - $[[--remove_prefixes '${{inputs.remove_prefixes}}']] - $[[--strip_characters '${{inputs.strip_characters}}']] - $[[--extract_number '${{inputs.extract_number}}']] - $[[--label_map '${{inputs.label_map}}']] - $[[--template '${{inputs.template}}']] - $[[--script_path ${{inputs.script_path}}]] - --output_dataset_result ${{outputs.output_dataset_result}} diff --git a/assets/aml-benchmark/components/prompt_crafter/asset.yaml b/assets/aml-benchmark/components/prompt_crafter/asset.yaml deleted file mode 100644 index 8b55a72f27..0000000000 --- a/assets/aml-benchmark/components/prompt_crafter/asset.yaml +++ /dev/null @@ -1,8 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Benchmarking"] -test: - pytest: - enabled: true - conda_environment: ../../dev_conda_env.yaml - tests_dir: ../../tests/prompt_crafter \ No newline at end of file diff --git a/assets/aml-benchmark/components/prompt_crafter/spec.yaml b/assets/aml-benchmark/components/prompt_crafter/spec.yaml deleted file mode 100644 index 76c570f38c..0000000000 --- a/assets/aml-benchmark/components/prompt_crafter/spec.yaml +++ /dev/null @@ -1,153 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -name: prompt_crafter -display_name: Prompt Crafter -description: This component is used to create prompts from a given dataset. From a - given jinja prompt template, it will generate prompts. It can also create - few-shot prompts given a few-shot dataset and the number of shots. -version: 0.0.14 -is_deterministic: true - -inputs: - prompt_type: - type: string - optional: false - enum: - - chat - - completions - description: | - Determine the prompt format. This component supports chat and completion models. - Completions: - {"prompt": "Few shot prompts will go here"} - Chat: - Chat models have 3 roles: System, User and Assistant. - Example: - {"prompt": [{"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "Example chat input"}, - {"role": "assistant", "content": "Example chat output"}]} - default: completions - test_data: - type: uri_file - optional: false - description: | - The uri file (jsonl) used to generate prompts. - prompt_pattern: - type: string - optional: false - description: | - The pattern to be used to generate the prompts. It should be a valid jinja template. - - Example: - Input Data : - {"question":"Example Question?", - "choices":{"option":["Answer1","Answer2","Answer3","Answer4"]}, - "answerKey":"D"} - - The prompt pattern for the above input data can be: - "Question: {{question}}\n - Choices are: - (1) {{choices.option[0]}}\n - (2) {{choices.option[1]}}\n - (3) {{choices.option[2]}}\n - (4) {{choices.option[3]}}\n" - few_shot_pattern: - type: string - optional: true - description: | - The pattern used to generate the few shot portion of a prompt. It should be a valid jinja template. - If this pattern is not provided, few shot prompts are generated from a concatenation of prompt_pattern and output_pattern. - - Example: - Input Data : - {"question":"Example Question?", - "choices":{"option":["Answer1","Answer2","Answer3","Answer4"]}, - "answerKey":"D"} - - The few shot pattern for the above input data can be: - "Question: {{question}}\n - Choices are: - (1) {{choices.option[0]}}\n - (2) {{choices.option[1]}}\n - (3) {{choices.option[2]}}\n - (4) {{choices.option[3]}}\n - Answer: {{answerKey}}" - n_shots: - type: integer - optional: false - default: 0 - description: | - The number of shots to use in the few-shot prompts. Default is 0, which means no few-shot - examples will be generated. n_shots must be smaller than the size of few_shot dataset. - few_shot_data: - type: uri_file - optional: true - description: | - The uri file(jsonl) to be used to generate the n-shot prompts. - output_pattern: - type: string - optional: false - description: | - The jinja template representing the expected output that would be used for few shot - prompts when n_shot > 0. e.g: {{answerKey}} - few_shot_separator: - type: string - optional: true - default: '' - description: | - The separator to be added between few-shot prompts. - system_message: - type: string - optional: true - description: | - This is the description of the task that the Assisstant should perform. - Applicable for chat models only. - e.g: "You are a helpful assistant." will be added to system role for chat models. - {"role": "system", "content": "You are a helpful assistant"} - prefix: - type: string - optional: true - description: | - The prefix to be added to the prompts. e.g: "Question: " - random_seed: - type: integer - optional: true - default: 0 - description: Random seed for sampling few-shots; if not specified, 0 is used. - ground_truth_column_name: - type: string - optional: true - default: '' - description: | - This will be used as the ground truth column if present in the input. - If not present, the output_pattern will be used as the ground truth. - additional_columns: - type: string - optional: true - default: '' - description: | - Any additional columns that would be helpful for computing metrics, if present in the input. - If there're multiple such columns, they should be separated by comma (","). -outputs: - output_file: - type: uri_file - description: Output file path where few_shot_prompt data will be written. - -code: ../src -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest -command: >- - python -m aml_benchmark.prompt_crafter.main - --test_data ${{inputs.test_data}} - $[[--few_shot_data ${{inputs.few_shot_data}}]] - --n_shots ${{inputs.n_shots}} - --prompt_type ${{inputs.prompt_type}} - $[[--random_seed ${{inputs.random_seed}}]] - --prompt_pattern '${{inputs.prompt_pattern}}' - --output_pattern '${{inputs.output_pattern}}' - $[[--few_shot_pattern '${{inputs.few_shot_pattern}}']] - $[[--system_message '${{inputs.system_message}}']] - $[[--ground_truth_column_name '${{inputs.ground_truth_column_name}}']] - $[[--additional_columns '${{inputs.additional_columns}}']] - $[[--prefix '${{inputs.prefix}}']] - $[[--few_shot_separator '${{inputs.few_shot_separator}}']] - --output_file ${{outputs.output_file}} \ No newline at end of file diff --git a/assets/basic/components/hello_command_component/asset.yaml b/assets/basic/components/hello_command_component/asset.yaml deleted file mode 100644 index f6b6fea0f0..0000000000 --- a/assets/basic/components/hello_command_component/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Sample"] diff --git a/assets/basic/components/hello_command_component/spec.yaml b/assets/basic/components/hello_command_component/spec.yaml deleted file mode 100644 index 2391accf01..0000000000 --- a/assets/basic/components/hello_command_component/spec.yaml +++ /dev/null @@ -1,18 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json - -name: hello_command -version: 0.1.0 -type: command - -display_name: Hello World from Command -description: Command Component that takes in a string input message and prints it out. -is_deterministic: true - -inputs: - message: - type: string - default: "Hello, Command Component!" -code: ../src -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/2 -command: >- - python hello.py --message "${{inputs.message}}" diff --git a/assets/basic/components/hello_pipeline_component/asset.yaml b/assets/basic/components/hello_pipeline_component/asset.yaml deleted file mode 100644 index d5ec8e5e67..0000000000 --- a/assets/basic/components/hello_pipeline_component/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Sample"] \ No newline at end of file diff --git a/assets/basic/components/hello_pipeline_component/spec.yaml b/assets/basic/components/hello_pipeline_component/spec.yaml deleted file mode 100644 index fc006aa457..0000000000 --- a/assets/basic/components/hello_pipeline_component/spec.yaml +++ /dev/null @@ -1,20 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json - -name: hello_pipeline -version: 0.1.0 -type: pipeline - -display_name: Hello World from Pipeline -description: Pipeline Component that takes in a string input message and passes it to the Hello World Command Component to be printed out. -is_deterministic: true - -inputs: - message: - type: string - default: "Hello, Pipeline Component!" -jobs: - hello_world: - type: command - component: azureml://registries/azureml/components/hello_command/versions/0.1.0 - inputs: - message: ${{parent.inputs.message}} diff --git a/assets/basic/components/src/hello.py b/assets/basic/components/src/hello.py deleted file mode 100644 index 710af756a5..0000000000 --- a/assets/basic/components/src/hello.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Hello World script.""" -from argparse import ArgumentParser - - -parser = ArgumentParser() -parser.add_argument("--message") -args = parser.parse_args() - -print(args.message) diff --git a/assets/batch_score/components/driver/batch_score_llm/asset.yaml b/assets/batch_score/components/driver/batch_score_llm/asset.yaml deleted file mode 100644 index b4a731a068..0000000000 --- a/assets/batch_score/components/driver/batch_score_llm/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Batch Score"] diff --git a/assets/batch_score/components/driver/batch_score_llm/spec.yaml b/assets/batch_score/components/driver/batch_score_llm/spec.yaml deleted file mode 100644 index 85c3c7f4a7..0000000000 --- a/assets/batch_score/components/driver/batch_score_llm/spec.yaml +++ /dev/null @@ -1,65 +0,0 @@ -$schema: http://azureml/sdk-2-0/ParallelComponent.json -type: parallel - -name: batch_score_llm -version: 1.1.9 -display_name: Batch Score Large Language Models -is_deterministic: False - -inputs: - # Predefined arguments for parallel job: - # https://learn.microsoft.com/en-us/azure/machine-learning/reference-yaml-job-parallel?source=recommendations#predefined-arguments-for-parallel-job - resume_from: - type: string - optional: True - description: The pipeline run id to resume from - - # PRS preview feature - async_mode: - type: boolean - optional: True - default: False - description: Whether to use PRS mini-batch streaming feature, which allows each PRS processor to process multiple mini-batches at a time. - - # Custom arguments - configuration_file: - type: uri_file - optional: False - description: Configures the behavior of batch scoring. - data_input_table: - type: mltable - optional: False - description: The data to be split and scored in parallel. - -outputs: - job_output_path: - type: uri_file - mini_batch_results_output_directory: - type: uri_folder - -max_concurrency_per_instance: 1 -resources: - instance_count: 1 -mini_batch_size: 3kb -mini_batch_error_threshold: 5 -logging_level: "DEBUG" -retry_settings: - max_retries: 2 - timeout: 60 - -input_data: ${{inputs.data_input_table}} - -task: - code: ../src - type: run_function - entry_script: batch_score.main - # Enable PRS safe append row configuration that is needed when dealing with large outputs with Unicode characters. - # Using --append_row_safe_output true - program_arguments: >- - $[[--amlbi_async_mode ${{inputs.async_mode}}]] - --append_row_safe_output true - --configuration_file ${{inputs.configuration_file}} - --partitioned_scoring_results ${{outputs.mini_batch_results_output_directory}} - $[[--resume_from ${{inputs.resume_from}}]] - environment: azureml://registries/azureml/environments/model-evaluation/versions/30 - append_row_to: ${{outputs.job_output_path}} diff --git a/assets/common/components/batch_deploy_model/asset.yaml b/assets/common/components/batch_deploy_model/asset.yaml deleted file mode 100644 index 5d0befc5f8..0000000000 --- a/assets/common/components/batch_deploy_model/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Models"] \ No newline at end of file diff --git a/assets/common/components/batch_deploy_model/spec.yaml b/assets/common/components/batch_deploy_model/spec.yaml deleted file mode 100644 index e812d9405a..0000000000 --- a/assets/common/components/batch_deploy_model/spec.yaml +++ /dev/null @@ -1,197 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: batch_deploy_model -version: 0.0.5 -type: command - -is_deterministic: True - -display_name: Batch deploy model -description: - Batch deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached. - -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/19 - -code: ../../src -command: >- - python batch_deploy.py - $[[--registration_details_folder ${{inputs.registration_details_folder}}]] - $[[--model_id ${{inputs.model_id}}]] - $[[--inference_payload_file ${{inputs.inference_payload_file}}]] - $[[--inference_payload_folder ${{inputs.inference_payload_folder}}]] - $[[--endpoint_name ${{inputs.endpoint_name}}]] - $[[--deployment_name ${{inputs.deployment_name}}]] - $[[--compute_name ${{inputs.compute_name}}]] - $[[--size ${{inputs.size}}]] - $[[--min_instances ${{inputs.min_instances}}]] - $[[--max_instances ${{inputs.max_instances}}]] - $[[--idle_time_before_scale_down ${{inputs.idle_time_before_scale_down}}]] - $[[--output_file_name ${{inputs.output_file_name}}]] - $[[--max_concurrency_per_instance ${{inputs.max_concurrency_per_instance}}]] - $[[--error_threshold ${{inputs.error_threshold}}]] - $[[--max_retries ${{inputs.max_retries}}]] - $[[--timeout ${{inputs.timeout}}]] - $[[--logging_level ${{inputs.logging_level}}]] - $[[--mini_batch_size ${{inputs.mini_batch_size}}]] - $[[--instance_count ${{inputs.instance_count}}]] - --batch_job_output_folder ${{outputs.batch_job_output_folder}} - -inputs: - # Output of registering component - registration_details_folder: - type: uri_folder - optional: true - description: Folder containing model registration details in a JSON file named model_registration_details.json - - model_id: - type: string - optional: true - description: | - Asset ID of the model registered in workspace/registry. - Registry - azureml://registries//models//versions/ - Workspace - azureml:: - - inference_payload_file: - type: uri_file - optional: true - description: File containing data used to validate deployment - - inference_payload_folder: - type: uri_folder - optional: true - description: Folder containing files used to validate deployment - - endpoint_name: - type: string - optional: true - description: Name of the endpoint - - deployment_name: - type: string - optional: true - default: default - description: Name of the deployment - - compute_name: - type: string - optional: true - default: cpu-cluster - description: Name of the compute cluster to execute the batch scoring jobs on. New compute will be created if the compute cluster is not present. - - size: - type: string - optional: true - enum: - - Standard_DS1_v2 - - Standard_DS2_v2 - - Standard_DS3_v2 - - Standard_DS4_v2 - - Standard_DS5_v2 - - Standard_F2s_v2 - - Standard_F4s_v2 - - Standard_F8s_v2 - - Standard_F16s_v2 - - Standard_F32s_v2 - - Standard_F48s_v2 - - Standard_F64s_v2 - - Standard_F72s_v2 - - Standard_FX24mds - - Standard_FX36mds - - Standard_FX48mds - - Standard_E2s_v3 - - Standard_E4s_v3 - - Standard_E8s_v3 - - Standard_E16s_v3 - - Standard_E32s_v3 - - Standard_E48s_v3 - - Standard_E64s_v3 - - Standard_NC4as_T4_v3 - - Standard_NC6s_v2 - - Standard_NC6s_v3 - - Standard_NC8as_T4_v3 - - Standard_NC12s_v2 - - Standard_NC12s_v3 - - Standard_NC16as_T4_v3 - - Standard_NC24s_v2 - - Standard_NC24s_v3 - - Standard_NC24rs_v3 - - Standard_NC64as_T4_v3 - - Standard_ND40rs_v2 - - Standard_ND96asr_v4 - - Standard_ND96amsr_A100_v4 - default: Standard_NC24s_v3 - description: Compute instance size to deploy model. Make sure that instance type is available and have enough quota available. - - min_instances: - type: integer - optional: true - default: 0 - description: Minimum number of instances of the compute cluster to be created. - - max_instances: - type: integer - optional: true - default: 1 - description: Maximum number of instances of the compute cluster to be created. - - idle_time_before_scale_down: - type: integer - optional: true - default: 120 - description: Node Idle Time before scaling down the compute cluster to be created. - - output_file_name: - type: string - optional: true - default: predictions.csv - description: Name of the batch scoring output file. - - max_concurrency_per_instance: - type: integer - optional: true - default: 1 - description: The maximum number of parallel scoring_script runs per instance. - - error_threshold: - type: integer - optional: true - default: -1 - description: The number of file failures that should be ignored. - - max_retries: - type: integer - optional: true - default: 3 - description: The maximum number of retries for a failed or timed-out mini batch. - - timeout: - type: integer - optional: true - default: 500 - description: The timeout in seconds for scoring a single mini batch. - - logging_level: - type: string - optional: true - default: info - description: The log verbosity level. - - mini_batch_size: - type: integer - optional: true - default: 10 - description: The number of files the code_configuration.scoring_script can process in one run() call. - - instance_count: - type: integer - optional: true - default: 1 - description: The number of nodes to use for each batch scoring job. - -outputs: - batch_job_output_folder: - type: uri_folder - description: Folder to which batch job outputs will be saved. - -tags: - Preview: "" - Internal: "" diff --git a/assets/common/components/delete_endpoint/asset.yaml b/assets/common/components/delete_endpoint/asset.yaml deleted file mode 100644 index 1b8aeefc66..0000000000 --- a/assets/common/components/delete_endpoint/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Deployment"] \ No newline at end of file diff --git a/assets/common/components/delete_endpoint/spec.yaml b/assets/common/components/delete_endpoint/spec.yaml deleted file mode 100644 index 20d0f24c34..0000000000 --- a/assets/common/components/delete_endpoint/spec.yaml +++ /dev/null @@ -1,40 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: delete_endpoint -version: 0.0.7 -type: command - -is_deterministic: True - -display_name: Delete Endpoint -description: - Deletes an endpoint resource. - -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/19 - -code: ../../src -command: >- - python delete_endpoint.py - $[[--model_deployment_details ${{inputs.model_deployment_details}}]] - $[[--endpoint_name ${{inputs.endpoint_name}}]] - $[[--deployment_name ${{inputs.deployment_name}}]] - -inputs: - # Output of registering component - model_deployment_details: - type: uri_file - optional: true - description: JSON file that contains the deployment details. - - endpoint_name: - type: string - optional: true - description: Name of the endpoint to delete. - - deployment_name: - type: string - optional: true - description: Name of the deployment to delete. - -tags: - Preview: "" - Internal: "" \ No newline at end of file diff --git a/assets/common/components/deploy_model/asset.yaml b/assets/common/components/deploy_model/asset.yaml deleted file mode 100644 index 5d0befc5f8..0000000000 --- a/assets/common/components/deploy_model/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Models"] \ No newline at end of file diff --git a/assets/common/components/deploy_model/spec.yaml b/assets/common/components/deploy_model/spec.yaml deleted file mode 100644 index db1670adeb..0000000000 --- a/assets/common/components/deploy_model/spec.yaml +++ /dev/null @@ -1,208 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: deploy_model -version: 0.0.11 -type: command - -is_deterministic: True - -display_name: Deploy model -description: - Deploy a model to a workspace. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached. - -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/19 - -code: ../../src -command: >- - python deploy.py - $[[--registration_details_folder ${{inputs.registration_details_folder}}]] - $[[--model_id ${{inputs.model_id}}]] - $[[--inference_payload ${{inputs.inference_payload}}]] - $[[--endpoint_name ${{inputs.endpoint_name}}]] - $[[--deployment_name ${{inputs.deployment_name}}]] - $[[--instance_type ${{inputs.instance_type}}]] - $[[--instance_count ${{inputs.instance_count}}]] - $[[--max_concurrent_requests_per_instance ${{inputs.max_concurrent_requests_per_instance}}]] - $[[--request_timeout_ms ${{inputs.request_timeout_ms}}]] - $[[--max_queue_wait_ms ${{inputs.max_queue_wait_ms}}]] - $[[--failure_threshold_readiness_probe ${{inputs.failure_threshold_readiness_probe}}]] - $[[--success_threshold_readiness_probe ${{inputs.success_threshold_readiness_probe}}]] - $[[--timeout_readiness_probe ${{inputs.timeout_readiness_probe}}]] - $[[--period_readiness_probe ${{inputs.period_readiness_probe}}]] - $[[--initial_delay_readiness_probe ${{inputs.initial_delay_readiness_probe}}]] - $[[--failure_threshold_liveness_probe ${{inputs.failure_threshold_liveness_probe}}]] - $[[--timeout_liveness_probe ${{inputs.timeout_liveness_probe}}]] - $[[--period_liveness_probe ${{inputs.period_liveness_probe}}]] - $[[--initial_delay_liveness_probe ${{inputs.initial_delay_liveness_probe}}]] - $[[--egress_public_network_access ${{inputs.egress_public_network_access}}]] - --model_deployment_details ${{outputs.model_deployment_details}} - -inputs: - # Output of registering component - registration_details_folder: - type: uri_folder - optional: true - description: Folder containing model registration details in a JSON file named model_registration_details.json - - model_id: - type: string - optional: true - description: | - Asset ID of the model registered in workspace/registry. - Registry - azureml://registries//models//versions/ - Workspace - azureml:: - - inference_payload: - type: uri_file - optional: true - description: JSON payload which would be used to validate deployment - - endpoint_name: - type: string - optional: true - description: Name of the endpoint - - deployment_name: - type: string - optional: true - default: default - description: Name of the deployment - - instance_type: - type: string - optional: true - enum: - - Standard_DS1_v2 - - Standard_DS2_v2 - - Standard_DS3_v2 - - Standard_DS4_v2 - - Standard_DS5_v2 - - Standard_F2s_v2 - - Standard_F4s_v2 - - Standard_F8s_v2 - - Standard_F16s_v2 - - Standard_F32s_v2 - - Standard_F48s_v2 - - Standard_F64s_v2 - - Standard_F72s_v2 - - Standard_FX24mds - - Standard_FX36mds - - Standard_FX48mds - - Standard_E2s_v3 - - Standard_E4s_v3 - - Standard_E8s_v3 - - Standard_E16s_v3 - - Standard_E32s_v3 - - Standard_E48s_v3 - - Standard_E64s_v3 - - Standard_NC4as_T4_v3 - - Standard_NC6s_v2 - - Standard_NC6s_v3 - - Standard_NC8as_T4_v3 - - Standard_NC12s_v2 - - Standard_NC12s_v3 - - Standard_NC16as_T4_v3 - - Standard_NC24s_v2 - - Standard_NC24s_v3 - - Standard_NC24rs_v3 - - Standard_NC64as_T4_v3 - - Standard_ND40rs_v2 - - Standard_ND96asr_v4 - - Standard_ND96amsr_A100_v4 - default: Standard_NC24s_v3 - description: Compute instance type to deploy model. Make sure that instance type is available and have enough quota available. - - instance_count: - type: integer - optional: true - default: 1 - description: Number of instances you want to use for deployment. Make sure instance type have enough quota available. - - max_concurrent_requests_per_instance: - type: integer - default: 1 - optional: true - description: Maximum concurrent requests to be handled per instance - - request_timeout_ms: - type: integer - default: 60000 - optional: true - description: Request timeout in ms. Max limit is 90000. - - max_queue_wait_ms: - type: integer - default: 60000 - optional: true - description: Maximum queue wait time of a request in ms - - failure_threshold_readiness_probe: - type: integer - default: 10 - optional: true - description: The number of times system will try after failing the readiness probe - - success_threshold_readiness_probe: - type: integer - default: 1 - optional: true - description: The minimum consecutive successes for the readiness probe to be considered successful after having failed - - timeout_readiness_probe: - type: integer - default: 10 - optional: true - description: The number of seconds after which the readiness probe times out - - period_readiness_probe: - type: integer - default: 10 - optional: true - description: How often (in seconds) to perform the readiness probe - - initial_delay_readiness_probe: - type: integer - default: 10 - optional: true - description: The number of seconds after the container has started before the readiness probe is initiated - - failure_threshold_liveness_probe: - type: integer - default: 30 - optional: true - description: The number of times system will try after failing the liveness probe - - timeout_liveness_probe: - type: integer - default: 10 - optional: true - description: The number of seconds after which the liveness probe times out - - period_liveness_probe: - type: integer - default: 10 - optional: true - description: How often (in seconds) to perform the liveness probe - - initial_delay_liveness_probe: - type: integer - default: 10 - optional: true - description: The number of seconds after the container has started before the liveness probe is initiated - - egress_public_network_access: - type: string - default: enabled - optional: true - enum: - - enabled - - disabled - description: Setting it to disabled secures the deployment by restricting communication between the deployment and the Azure resources used by it - -outputs: - model_deployment_details: - type: uri_file - description: Json file to which deployment details will be written - -tags: - Preview: "" - Internal: "" diff --git a/assets/common/components/mlflow_model_local_validation/asset.yaml b/assets/common/components/mlflow_model_local_validation/asset.yaml deleted file mode 100644 index 5d0befc5f8..0000000000 --- a/assets/common/components/mlflow_model_local_validation/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Models"] \ No newline at end of file diff --git a/assets/common/components/mlflow_model_local_validation/spec.yaml b/assets/common/components/mlflow_model_local_validation/spec.yaml deleted file mode 100644 index cca624fd97..0000000000 --- a/assets/common/components/mlflow_model_local_validation/spec.yaml +++ /dev/null @@ -1,76 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json - -name: mlflow_model_local_validation -version: 0.0.17 -type: command - -is_deterministic: True - -display_name: MLFlow model local validation -description: Validates if a MLFLow model can be loaded on a compute and is usable for inferencing. - -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/25 - -code: ../../src/ -command: >- - python -u run_mlflow_model_local_validation.py - --model-path ${{inputs.model_path}} - $[[--test-data-path ${{inputs.test_data_path}}]] - $[[--column-rename-map "${{inputs.column_rename_map}}"]] - $[[--task-name ${{inputs.task_name}}]] - --output-model-path ${{outputs.mlflow_model_folder}} - -inputs: - model_path: - type: mlflow_model - mode: ro_mount - description: MLFlow model to be validated - - test_data_path: - type: uri_file - optional: true - description: Test dataset for model inferencing - - column_rename_map: - type: string - optional: true - description: | - Provide mapping of dataset column names that should be renamed before inferencing. - eg: col1:ren1; col2:ren2; col3:ren3 - - task_name: - description: A Hugging face task on which model was trained on - enum: - - chat-completion - - fill-mask - - token-classification - - question-answering - - summarization - - text-generation - - text2text-generation - - text-classification - - translation - - image-classification - - image-classification-multilabel - - image-object-detection - - image-instance-segmentation - - image-to-text - - text-to-image - - text-to-image-inpainting - - image-text-to-text - - image-to-image - - zero-shot-image-classification - - mask-generation - - video-multi-object-tracking - - visual-question-answering - optional: true - type: string - -outputs: - mlflow_model_folder: - type: uri_folder - mode: rw_mount - description: Validated input model. Here input model is used to block further steps in pipeline job if local validation fails - -tags: - Preview: "" diff --git a/assets/common/components/register_model/asset.yaml b/assets/common/components/register_model/asset.yaml deleted file mode 100644 index c01772d398..0000000000 --- a/assets/common/components/register_model/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Model"] diff --git a/assets/common/components/register_model/spec.yaml b/assets/common/components/register_model/spec.yaml deleted file mode 100644 index f7b32ce406..0000000000 --- a/assets/common/components/register_model/spec.yaml +++ /dev/null @@ -1,78 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: register_model -version: 0.0.20 -type: command - -is_deterministic: True - -display_name: Register model -description: - Register a model to a workspace or a registry. The component works on compute with [MSI](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-manage-compute-instance?tabs=python) attached. - -environment: azureml://registries/azureml/environments/python-sdk-v2/versions/25 - -code: ../../src -command: >- - python -u register.py - --model_path ${{inputs.model_path}} - $[[--model_type ${{inputs.model_type}}]] - $[[--model_name ${{inputs.model_name}}]] - $[[--model_version ${{inputs.model_version}}]] - $[[--registry_name ${{inputs.registry_name}}]] - $[[--model_download_metadata ${{inputs.model_download_metadata}}]] - $[[--model_description "${{inputs.model_description}}"]] - $[[--model_metadata ${{inputs.model_metadata}}]] - --registration_details_folder ${{outputs.registration_details_folder}} - -inputs: - model_name: - type: string - optional: true - description: Model name to use in the registration. If name already exists, the version will be auto incremented. Model name must be alphanumeric and can contain dashes/hyphens - - model_version: - type: string - optional: true - description: Model version in workspace/registry. If the same model name and version exists, the version will be auto incremented. Model version must be a numeric string - - model_type: - type: string - optional: true - enum: - - custom_model - - mlflow_model - default: mlflow_model - description: Model type - - model_description: - type: string - optional: true - description: Description of the model that will be shown in AzureML registry or workspace - - registry_name: - type: string - optional: true - description: Name of the AzureML asset registry where the model will be registered. Model will be registered in a workspace if this is unspecified - - model_path: - type: uri_folder - optional: false - description: Path to the model directory - - model_download_metadata: - type: uri_file - optional: true - description: A JSON file which contains information related to model download. - - model_metadata: - type: uri_file - optional: true - description: JSON/YAML file that contains model metadata confirming to Model V2 [contract](https://azuremlschemas.azureedge.net/latest/model.schema.json) - -outputs: - registration_details_folder: - type: uri_folder - description: Folder containing model registration details in a JSON file named model_registration_details.json - -tags: - Preview: "" diff --git a/assets/data/data-transfer/export_data_database/asset.yaml b/assets/data/data-transfer/export_data_database/asset.yaml deleted file mode 100644 index d1698f05cc..0000000000 --- a/assets/data/data-transfer/export_data_database/asset.yaml +++ /dev/null @@ -1,2 +0,0 @@ -type: component -spec: spec.yaml diff --git a/assets/data/data-transfer/export_data_database/spec.yaml b/assets/data/data-transfer/export_data_database/spec.yaml deleted file mode 100644 index 75a7495dfe..0000000000 --- a/assets/data/data-transfer/export_data_database/spec.yaml +++ /dev/null @@ -1,16 +0,0 @@ -$schema: http://azureml/sdk-2-0/DataTransferComponent.json -name: export_data_database -display_name: export_data_database (Preview) -version: 0.0.1 -type: data_transfer -description: - Component that export data from uri_file data asset to database within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/ai-components) (Preview). -task: export_data - -inputs: - source: - type: uri_file - -sink: - type: database \ No newline at end of file diff --git a/assets/data/data-transfer/import_data_database/asset.yaml b/assets/data/data-transfer/import_data_database/asset.yaml deleted file mode 100644 index d1698f05cc..0000000000 --- a/assets/data/data-transfer/import_data_database/asset.yaml +++ /dev/null @@ -1,2 +0,0 @@ -type: component -spec: spec.yaml diff --git a/assets/data/data-transfer/import_data_database/spec.yaml b/assets/data/data-transfer/import_data_database/spec.yaml deleted file mode 100644 index 5df873cc1b..0000000000 --- a/assets/data/data-transfer/import_data_database/spec.yaml +++ /dev/null @@ -1,15 +0,0 @@ -$schema: http://azureml/sdk-2-0/DataTransferComponent.json -name: import_data_database -display_name: import_data_database (Preview) -description: - Component that import data from database as mltable data asset within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/ai-components) (Preview). -version: 0.0.1 -type: data_transfer -task: import_data - -source: - type: database -outputs: - sink: - type: mltable \ No newline at end of file diff --git a/assets/data/data-transfer/import_data_file_system/asset.yaml b/assets/data/data-transfer/import_data_file_system/asset.yaml deleted file mode 100644 index d1698f05cc..0000000000 --- a/assets/data/data-transfer/import_data_file_system/asset.yaml +++ /dev/null @@ -1,2 +0,0 @@ -type: component -spec: spec.yaml diff --git a/assets/data/data-transfer/import_data_file_system/spec.yaml b/assets/data/data-transfer/import_data_file_system/spec.yaml deleted file mode 100644 index 6014f46f60..0000000000 --- a/assets/data/data-transfer/import_data_file_system/spec.yaml +++ /dev/null @@ -1,15 +0,0 @@ -$schema: http://azureml/sdk-2-0/DataTransferComponent.json -name: import_data_file_system -display_name: import_data_file_system (Preview) -version: 0.0.1 -description: - Component that import data from external file_system as uri_folder data asset within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/ai-components) (Preview). -type: data_transfer -task: import_data - -source: - type: file_system -outputs: - sink: - type: uri_folder \ No newline at end of file diff --git a/assets/large_language_models/components_pipelines/data_ingestion_db_to_acs/asset.yaml b/assets/large_language_models/components_pipelines/data_ingestion_db_to_acs/asset.yaml deleted file mode 100644 index dbebe8dc08..0000000000 --- a/assets/large_language_models/components_pipelines/data_ingestion_db_to_acs/asset.yaml +++ /dev/null @@ -1,4 +0,0 @@ -spec: spec.yaml -type: component -categories: -- Retrieval Augmented Generation \ No newline at end of file diff --git a/assets/large_language_models/components_pipelines/data_ingestion_db_to_acs/spec.yaml b/assets/large_language_models/components_pipelines/data_ingestion_db_to_acs/spec.yaml deleted file mode 100644 index 67a15e43aa..0000000000 --- a/assets/large_language_models/components_pipelines/data_ingestion_db_to_acs/spec.yaml +++ /dev/null @@ -1,340 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline -tags: - Preview: "" -name: llm_ingest_db_to_acs -display_name: LLM - SQL Datastore to ACS Pipeline -version: 0.0.97 -description: Single job pipeline to chunk data from AzureML sql data store, and create ACS embeddings index -settings: - default_compute: serverless -inputs: - db_datastore: - type: string - description: database datastore uri in the format of 'azureml://datastores/{datastore_name}' - embeddings_model: - type: string - description: The model used to generate embeddings. 'azure_open_ai://endpoint/{endpoint_name}/deployment/{deployment_name}/model/{model_name}' - chat_aoai_deployment_name: - type: string - optional: true - description: The name of the chat AOAI deployment - embedding_aoai_deployment_name: - type: string - description: The name of the embedding AOAI deployment - embeddings_dataset_name: - type: string - description: The name of the acs index - max_tables: - type: integer - optional: true - max_columns: - type: integer - optional: true - max_rows: - type: integer - optional: true - max_sampling_rows: - type: integer - optional: true - max_text_length: - type: integer - optional: true - max_knowledge_pieces: - type: integer - optional: true - selected_tables: - type: string - optional: true - column_settings: - type: string - optional: true - llm_config: - type: string - optional: true - description: The name of the llm config - runtime: - type: string - optional: false - description: The name of the runtime - serverless_instance_count: - type: integer - optional: true - default: "1" - serverless_instance_type: - type: string - optional: true - default: Standard_DS3_v2 - embedding_connection: - type: string - optional: true - description: Azure OpenAI workspace connection ARM ID for embeddings - llm_connection: - type: string - optional: true - description: Azure OpenAI workspace connection ARM ID for LLM - acs_connection: - type: string - optional: true - description: Azure Cognitive Search workspace connection ARM ID - acs_config: - type: string - description: JSON describing the acs index to create or update for embeddings - sample_data: - type: uri_folder - description: "Sample data to be used for data ingestion. format: 'azureml:samples-test:1'" - optional: true - # path: "azureml:samples-test:1" - # data ingest setting - sample_acs_config: - type: string - description: "JSON describing the acs index to create or update for samples" - include_builtin_examples: - type: boolean - default: true - optional: true - tools: - type: string - optional: true - description: 'The name of the tools for dbcopilot. Supported tools: "tsql", "python". Format: ["tsql", "python"]' - knowledge_pieces: - type: string - optional: true - description: "The list of knowledge pieces to be used for grounding." - include_views: - type: boolean - optional: true - description: "Whether to turn on views." - instruct_template: - type: string - optional: true - description: "The instruct template for the LLM." - managed_identity_enabled: - type: boolean - default: false - optional: true - description: "Whether to connect using managed identity." -outputs: - grounding_index: - type: uri_folder - db_context: - type: uri_folder -jobs: - db_meta_loading_generator: - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - asset_uri: - path: ${{parent.inputs.db_datastore}} - max_tables: - path: ${{parent.inputs.max_tables}} - max_columns: - path: ${{parent.inputs.max_columns}} - max_rows: - path: ${{parent.inputs.max_rows}} - max_sampling_rows: - path: ${{parent.inputs.max_sampling_rows}} - max_text_length: - path: ${{parent.inputs.max_text_length}} - max_knowledge_pieces: - path: ${{parent.inputs.max_knowledge_pieces}} - selected_tables: - path: ${{parent.inputs.selected_tables}} - column_settings: - path: ${{parent.inputs.column_settings}} - include_views: - path :${{parent.inputs.include_views}} - outputs: - output_chunk_file: - type: uri_folder - output_grounding_context_file: ${{parent.outputs.db_context}} - environment_variables: - MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}} - component: "azureml:llm_dbcopilot_grounding:0.0.70" - type: command - generate_meta_embeddings: - type: command - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - component: "azureml:llm_rag_generate_embeddings:0.0.66" - inputs: - chunks_source: - type: uri_folder - path: ${{parent.jobs.db_meta_loading_generator.outputs.output_chunk_file}} - embeddings_model: ${{parent.inputs.embeddings_model}} - outputs: - embeddings: - mode: upload - type: uri_folder - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - # generate_meta_embeddings: - # resources: - # instance_count: ${{parent.inputs.serverless_instance_count}} - # instance_type: ${{parent.inputs.serverless_instance_type}} - # properties: - # compute_specification: - # automatic: true - # retry_settings: - # timeout: 3600 - # max_retries: 3 - # environment_variables: - # AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - # inputs: - # chunks_source: - # path: ${{parent.jobs.db_meta_loading_generator.outputs.output_chunk_file}} - # embeddings_model: - # path: ${{parent.inputs.embeddings_model}} - # outputs: - # embeddings: - # mode: upload - # type: uri_folder - # component: "azureml:llm_rag_generate_embeddings_parallel:0.0.9" - # type: parallel - create_meta_acs_index_job: - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - AZUREML_WORKSPACE_CONNECTION_ID_ACS: ${{parent.inputs.acs_connection}} - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - embeddings: - path: ${{parent.jobs.generate_meta_embeddings.outputs.embeddings}} - acs_config: - path: ${{parent.inputs.acs_config}} - outputs: - index: ${{parent.outputs.grounding_index}} - component: "azureml:llm_rag_update_acs_index:0.0.70" - type: command - ######################################### - db_sample_loading_generator: - type: command - component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.45" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - sample_folder: - type: uri_folder - path: ${{parent.inputs.sample_data}} - include_builtin: ${{parent.inputs.include_builtin_examples}} - tools: ${{parent.inputs.tools}} - grounding_context: ${{parent.jobs.db_meta_loading_generator.outputs.output_grounding_context_file}} - outputs: - output_chunk_file: - type: uri_folder - ######################################### - generate_sample_embeddings: - type: command - component: "azureml:llm_rag_generate_embeddings:0.0.66" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - chunks_source: - type: uri_folder - path: ${{parent.jobs.db_sample_loading_generator.outputs.output_chunk_file}} - embeddings_model: ${{parent.inputs.embeddings_model}} - outputs: - embeddings: - type: uri_folder - mode: upload - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - ######################################### - create_sample_acs_index_job: - type: command - component: "azureml:llm_rag_update_acs_index:0.0.70" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - embeddings: - type: uri_folder - path: ${{parent.jobs.generate_sample_embeddings.outputs.embeddings}} - acs_config: ${{parent.inputs.sample_acs_config}} - outputs: - index: - type: uri_folder - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - AZUREML_WORKSPACE_CONNECTION_ID_ACS: ${{parent.inputs.acs_connection}} - ######################################### - register_mlindex_asset_job: - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - storage_uri: - path: ${{parent.jobs.create_meta_acs_index_job.outputs.index}} - asset_name: - path: ${{parent.inputs.embeddings_dataset_name}} - outputs: - asset_id: - type: uri_file - component: "azureml:llm_rag_register_mlindex_asset:0.0.70" - type: command - create_prompt_flow: - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING: ${{parent.inputs.embedding_connection}} - AZUREML_WORKSPACE_CONNECTION_ID_AOAI_CHAT: ${{parent.inputs.llm_connection}} - MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}} - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - identity: - type: user_identity - inputs: - index_name: - path: ${{parent.inputs.embeddings_dataset_name}} - grounding_embedding_uri: - path: ${{parent.jobs.create_meta_acs_index_job.outputs.index}} - example_embedding_uri: - path: ${{parent.jobs.create_sample_acs_index_job.outputs.index}} - db_context_uri: - path: ${{parent.jobs.db_meta_loading_generator.outputs.output_grounding_context_file}} - asset_uri: - path: ${{parent.inputs.db_datastore}} - embedding_aoai_deployment_name: - path: ${{parent.inputs.embedding_aoai_deployment_name}} - chat_aoai_deployment_name: - path: ${{parent.inputs.chat_aoai_deployment_name}} - llm_config: - path: ${{parent.inputs.llm_config}} - runtime: - path: ${{parent.inputs.runtime}} - knowledge_pieces: - path: ${{parent.inputs.knowledge_pieces}} - include_views: - path: ${{parent.inputs.include_views}} - instruct_template: - path: ${{parent.inputs.instruct_template}} - component: "azureml:llm_dbcopilot_create_promptflow:0.0.70" - type: command diff --git a/assets/large_language_models/components_pipelines/data_ingestion_db_to_faiss/asset.yaml b/assets/large_language_models/components_pipelines/data_ingestion_db_to_faiss/asset.yaml deleted file mode 100644 index dbebe8dc08..0000000000 --- a/assets/large_language_models/components_pipelines/data_ingestion_db_to_faiss/asset.yaml +++ /dev/null @@ -1,4 +0,0 @@ -spec: spec.yaml -type: component -categories: -- Retrieval Augmented Generation \ No newline at end of file diff --git a/assets/large_language_models/components_pipelines/data_ingestion_db_to_faiss/spec.yaml b/assets/large_language_models/components_pipelines/data_ingestion_db_to_faiss/spec.yaml deleted file mode 100644 index eb3c723bf3..0000000000 --- a/assets/large_language_models/components_pipelines/data_ingestion_db_to_faiss/spec.yaml +++ /dev/null @@ -1,324 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline -tags: - Preview: "" -name: llm_ingest_db_to_faiss -display_name: LLM - SQL Datastore to FAISS Pipeline -version: 0.0.97 -description: Single job pipeline to chunk data from AzureML sql data store, and create FAISS embeddings index -settings: - default_compute: serverless -inputs: - db_datastore: - type: string - description: database datastore uri in the format of 'azureml://datastores/{datastore_name}' - embeddings_model: - type: string - description: The model used to generate embeddings. 'azure_open_ai://endpoint/{endpoint_name}/deployment/{deployment_name}/model/{model_name}' - chat_aoai_deployment_name: - type: string - optional: true - description: The name of the chat AOAI deployment - embedding_aoai_deployment_name: - type: string - description: The name of the embedding AOAI deployment - embeddings_dataset_name: - type: string - description: The name of the faiss index - max_tables: - type: integer - optional: true - max_columns: - type: integer - optional: true - max_rows: - type: integer - optional: true - max_sampling_rows: - type: integer - optional: true - max_text_length: - type: integer - optional: true - max_knowledge_pieces: - type: integer - optional: true - selected_tables: - type: string - optional: true - column_settings: - type: string - optional: true - llm_config: - type: string - optional: true - description: The name of the llm config - serverless_instance_count: - type: integer - optional: true - default: "1" - serverless_instance_type: - type: string - optional: true - default: Standard_DS3_v2 - embedding_connection: - type: string - optional: true - description: Azure OpenAI workspace connection ARM ID for embeddings - llm_connection: - type: string - optional: true - description: Azure OpenAI workspace connection ARM ID for LLM - runtime: - type: string - optional: false - description: The name of the runtime - sample_data: - type: uri_folder - description: "Sample data to be used for data ingestion. format: 'azureml:samples-test:1'" - optional: true - # path: "azureml:samples-test:1" - # data ingest setting - include_builtin_examples: - type: boolean - default: true - optional: true - tools: - type: string - optional: true - description: 'The name of the tools for dbcopilot. Supported tools: "tsql", "python". Format: ["tsql", "python"]' - knowledge_pieces: - type: string - optional: true - description: "The list of knowledge pieces to be used for grounding." - include_views: - type: boolean - optional: true - description: "Whether to turn on views." - instruct_template: - type: string - optional: true - description: "The instruct template for the LLM." - managed_identity_enabled: - type: boolean - default: false - optional: true - description: "Whether to connect using managed identity." -outputs: - grounding_index: - type: uri_folder - db_context: - type: uri_folder -jobs: - db_meta_loading_generator: - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - asset_uri: - path: ${{parent.inputs.db_datastore}} - max_tables: - path: ${{parent.inputs.max_tables}} - max_columns: - path: ${{parent.inputs.max_columns}} - max_rows: - path: ${{parent.inputs.max_rows}} - max_sampling_rows: - path: ${{parent.inputs.max_sampling_rows}} - max_text_length: - path: ${{parent.inputs.max_text_length}} - max_knowledge_pieces: - path: ${{parent.inputs.max_knowledge_pieces}} - selected_tables: - path: ${{parent.inputs.selected_tables}} - column_settings: - path: ${{parent.inputs.column_settings}} - include_views: - path :${{parent.inputs.include_views}} - outputs: - output_chunk_file: - type: uri_folder - output_grounding_context_file: ${{parent.outputs.db_context}} - environment_variables: - MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}} - component: "azureml:llm_dbcopilot_grounding:0.0.70" - type: command - generate_meta_embeddings: - type: command - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - component: "azureml:llm_rag_generate_embeddings:0.0.66" - inputs: - chunks_source: - type: uri_folder - path: ${{parent.jobs.db_meta_loading_generator.outputs.output_chunk_file}} - embeddings_model: ${{parent.inputs.embeddings_model}} - outputs: - embeddings: - mode: upload - type: uri_folder - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - # generate_meta_embeddings: - # resources: - # instance_count: ${{parent.inputs.serverless_instance_count}} - # instance_type: ${{parent.inputs.serverless_instance_type}} - # properties: - # compute_specification: - # automatic: true - # retry_settings: - # timeout: 3600 - # max_retries: 3 - # environment_variables: - # AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - # inputs: - # chunks_source: - # path: ${{parent.jobs.db_meta_loading_generator.outputs.output_chunk_file}} - # embeddings_model: - # path: ${{parent.inputs.embeddings_model}} - # outputs: - # embeddings: - # mode: upload - # type: uri_folder - # component: "azureml:llm_rag_generate_embeddings_parallel:0.0.9" - # type: parallel - create_meta_faiss_index_job: - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - embeddings: - path: ${{parent.jobs.generate_meta_embeddings.outputs.embeddings}} - outputs: - index: ${{parent.outputs.grounding_index}} - component: "azureml:llm_rag_create_faiss_index:0.0.71" - type: command - - ######################################### - db_sample_loading_generator: - type: command - component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.45" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - sample_folder: - type: uri_folder - path: ${{parent.inputs.sample_data}} - include_builtin: ${{parent.inputs.include_builtin_examples}} - tools: ${{parent.inputs.tools}} - grounding_context: ${{parent.jobs.db_meta_loading_generator.outputs.output_grounding_context_file}} - outputs: - output_chunk_file: - type: uri_folder - ######################################### - generate_sample_embeddings: - type: command - component: "azureml:llm_rag_generate_embeddings:0.0.66" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - chunks_source: - type: uri_file - path: ${{parent.jobs.db_sample_loading_generator.outputs.output_chunk_file}} - embeddings_model: ${{parent.inputs.embeddings_model}} - outputs: - embeddings: - type: uri_folder - mode: upload - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - ######################################### - create_sample_faiss_index_job: - type: command - component: "azureml:llm_rag_create_faiss_index:0.0.71" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - embeddings: - type: uri_folder - path: ${{parent.jobs.generate_sample_embeddings.outputs.embeddings}} - outputs: - index: - type: uri_folder - ######################################### - register_mlindex_asset_job: - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - storage_uri: - path: ${{parent.jobs.create_meta_faiss_index_job.outputs.index}} - asset_name: - path: ${{parent.inputs.embeddings_dataset_name}} - outputs: - asset_id: - type: uri_file - component: "azureml:llm_rag_register_mlindex_asset:0.0.70" - type: command - create_prompt_flow: - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING: ${{parent.inputs.embedding_connection}} - AZUREML_WORKSPACE_CONNECTION_ID_AOAI_CHAT: ${{parent.inputs.llm_connection}} - MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}} - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - identity: - type: user_identity - inputs: - index_name: - path: ${{parent.inputs.embeddings_dataset_name}} - grounding_embedding_uri: - path: ${{parent.jobs.create_meta_faiss_index_job.outputs.index}} - example_embedding_uri: - path: ${{parent.jobs.create_sample_faiss_index_job.outputs.index}} - db_context_uri: - path: ${{parent.jobs.db_meta_loading_generator.outputs.output_grounding_context_file}} - asset_uri: - path: ${{parent.inputs.db_datastore}} - embedding_aoai_deployment_name: - path: ${{parent.inputs.embedding_aoai_deployment_name}} - chat_aoai_deployment_name: - path: ${{parent.inputs.chat_aoai_deployment_name}} - llm_config: - path: ${{parent.inputs.llm_config}} - runtime: - path: ${{parent.inputs.runtime}} - knowledge_pieces: - path: ${{parent.inputs.knowledge_pieces}} - include_views: - path: ${{parent.inputs.include_views}} - instruct_template: - path: ${{parent.inputs.instruct_template}} - component: "azureml:llm_dbcopilot_create_promptflow:0.0.70" - type: command diff --git a/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_acs_e2e/asset.yaml b/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_acs_e2e/asset.yaml deleted file mode 100644 index dbebe8dc08..0000000000 --- a/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_acs_e2e/asset.yaml +++ /dev/null @@ -1,4 +0,0 @@ -spec: spec.yaml -type: component -categories: -- Retrieval Augmented Generation \ No newline at end of file diff --git a/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_acs_e2e/spec.yaml b/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_acs_e2e/spec.yaml deleted file mode 100644 index 16bea458ab..0000000000 --- a/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_acs_e2e/spec.yaml +++ /dev/null @@ -1,319 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -name: llm_ingest_dbcopilot_acs_e2e -version: 0.0.66 -display_name: Data Ingestion for DB Data Output to ACS E2E Deployment -description: Single job pipeline to chunk data from AzureML DB Datastore and create acs embeddings index - -settings: - default_compute: serverless - -inputs: - db_datastore: - type: string - description: "database datastore uri in the format of 'azureml://datastores/{datastore_name}'" - sample_data: - type: uri_folder - description: "Sample data to be used for data ingestion. format: 'azureml:samples-test:1'" - optional: true - # path: "azureml:samples-test:1" - # data ingest setting - embeddings_model: - type: string - description: "The model used to generate embeddings. 'azure_open_ai://endpoint/{endpoint_name}/deployment/{deployment_name}/model/{model_name}'" - chat_aoai_deployment_name: - type: string - description: "The name of the chat AOAI deployment" - optional: true - embedding_aoai_deployment_name: - type: string - description: "The name of the embedding AOAI deployment" - # grounding settings - max_tables: - type: integer - optional: true - max_columns: - type: integer - optional: true - max_rows: - type: integer - optional: true - max_sampling_rows: - type: integer - optional: true - max_text_length: - type: integer - optional: true - max_knowledge_pieces: - type: integer - optional: true - selected_tables: - type: string - optional: true - description: 'The list of tables to be ingested. If not specified, all tables will be ingested. Format: ["table1","table2","table3"]' - column_settings: - type: string - optional: true - # copilot settings - tools: - type: string - optional: true - description: 'The name of the tools for dbcopilot. Supported tools: "tsql", "python". Format: ["tsql", "python"]' - # deploy settings - endpoint_name: - type: string - description: "The name of the endpoint" - deployment_name: - type: string - description: "The name of the deployment" - default: "blue" - mir_environment: - type: string - description: "The name of the mir environment. Format: azureml://registries/{registry_name}/environments/llm-dbcopilot-mir" - # compute settings - serverless_instance_count: - type: integer - default: 1 - optional: true - serverless_instance_type: - type: string - default: "Standard_DS3_v2" - optional: true - embedding_connection: - type: string - optional: true - description: "Azure OpenAI workspace connection ARM ID for embeddings" - llm_connection: - type: string - optional: true - description: "Azure OpenAI workspace connection ARM ID for llm" - acs_connection: - type: string - optional: true - description: "Azure Cognitive Search workspace connection ARM ID" - acs_config_meta: - type: string - description: "JSON describing the acs index to create or update for embeddings" - sample_acs_config: - type: string - description: "JSON describing the acs index to create or update for samples" - temperature: - type: number - default: 0.0 - optional: true - top_p: - type: number - default: 0.0 - optional: true - include_builtin_examples: - type: boolean - default: true - optional: true - knowledge_pieces: - type: string - optional: true - description: "The list of knowledge pieces to be used for grounding." - include_views: - type: boolean - optional: true - description: "Whether to turn on views." - instruct_template: - type: string - optional: true - description: "The instruct template for the LLM." - managed_identity_enabled: - type: boolean - default: false - optional: true - description: "Whether to connect using managed identity." - egress_public_network_access: - type: string - optional: true - default: "enabled" - description: "This option allows the resource to send outbound traffic to the public Internet or not, there are two choices disabled and enabled, the default is enabled" -outputs: - grounding_index: - type: uri_folder - db_context: - type: uri_folder -jobs: - ######################################### - db_meta_loading_generator: - type: command - component: "azureml:llm_dbcopilot_grounding:0.0.70" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - asset_uri: ${{parent.inputs.db_datastore}} - max_tables: ${{parent.inputs.max_tables}} - max_columns: ${{parent.inputs.max_columns}} - max_rows: ${{parent.inputs.max_rows}} - max_sampling_rows: ${{parent.inputs.max_sampling_rows}} - max_text_length: ${{parent.inputs.max_text_length}} - max_knowledge_pieces : ${{parent.inputs.max_knowledge_pieces}} - selected_tables: ${{parent.inputs.selected_tables}} - column_settings: ${{parent.inputs.column_settings}} - include_views: ${{parent.inputs.include_views}} - outputs: - output_chunk_file: - type: uri_folder - output_grounding_context_file: - type: uri_folder - path: ${{parent.outputs.db_context}} - environment_variables: - MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}} - ######################################### - generate_meta_embeddings: - type: command - component: "azureml:llm_rag_generate_embeddings:0.0.66" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - chunks_source: - type: uri_folder - path: ${{parent.jobs.db_meta_loading_generator.outputs.output_chunk_file}} - embeddings_model: ${{parent.inputs.embeddings_model}} - outputs: - embeddings: - type: uri_folder - mode: upload - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - ######################################### - create_meta_acs_index_job: - type: command - component: "azureml:llm_rag_update_acs_index:0.0.70" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - embeddings: - type: uri_folder - path: ${{parent.jobs.generate_meta_embeddings.outputs.embeddings}} - acs_config: ${{parent.inputs.acs_config_meta}} - outputs: - index: - type: uri_folder - path: ${{parent.outputs.grounding_index}} - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - AZUREML_WORKSPACE_CONNECTION_ID_ACS: ${{parent.inputs.acs_connection}} - ######################################### - db_sample_loading_generator: - type: command - component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.45" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - sample_folder: - type: uri_folder - path: ${{parent.inputs.sample_data}} - include_builtin: ${{parent.inputs.include_builtin_examples}} - tools: ${{parent.inputs.tools}} - grounding_context: ${{parent.jobs.db_meta_loading_generator.outputs.output_grounding_context_file}} - outputs: - output_chunk_file: - type: uri_folder - ######################################### - generate_sample_embeddings: - type: command - component: "azureml:llm_rag_generate_embeddings:0.0.66" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - chunks_source: - type: uri_folder - path: ${{parent.jobs.db_sample_loading_generator.outputs.output_chunk_file}} - embeddings_model: ${{parent.inputs.embeddings_model}} - outputs: - embeddings: - type: uri_folder - mode: upload - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - ######################################### - create_sample_acs_index_job: - type: command - component: "azureml:llm_rag_update_acs_index:0.0.70" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - embeddings: - type: uri_folder - path: ${{parent.jobs.generate_sample_embeddings.outputs.embeddings}} - acs_config: ${{parent.inputs.sample_acs_config}} - outputs: - index: - type: uri_folder - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - AZUREML_WORKSPACE_CONNECTION_ID_ACS: ${{parent.inputs.acs_connection}} - ######################################### - endpoint_deployment_job: - type: command - component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.46" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - identity: - type: user_identity - inputs: - grounding_embedding_uri: - type: uri_folder - path: ${{parent.jobs.create_meta_acs_index_job.outputs.index}} - example_embedding_uri: - type: uri_folder - path: ${{parent.jobs.create_sample_acs_index_job.outputs.index}} - db_context_uri: - type: uri_file - path: ${{parent.jobs.db_meta_loading_generator.outputs.output_grounding_context_file}} - endpoint_name: ${{parent.inputs.endpoint_name}} - deployment_name: ${{parent.inputs.deployment_name}} - asset_uri: ${{parent.inputs.db_datastore}} - embedding_aoai_deployment_name: ${{parent.inputs.embedding_aoai_deployment_name}} - chat_aoai_deployment_name: ${{parent.inputs.chat_aoai_deployment_name}} - mir_environment: ${{parent.inputs.mir_environment}} - selected_tables: ${{parent.inputs.selected_tables}} - max_tables: ${{parent.inputs.max_tables}} - max_rows: ${{parent.inputs.max_rows}} - max_columns: ${{parent.inputs.max_columns}} - max_text_length: ${{parent.inputs.max_text_length}} - max_knowledge_pieces: ${{parent.inputs.max_knowledge_pieces}} - temperature: ${{parent.inputs.temperature}} - top_p: ${{parent.inputs.top_p}} - tools: ${{parent.inputs.tools}} - knowledge_pieces: ${{parent.inputs.knowledge_pieces}} - include_views: ${{parent.inputs.include_views}} - instruct_template: ${{parent.inputs.instruct_template}} - egress_public_network_access: ${{parent.inputs.egress_public_network_access}} - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING: ${{parent.inputs.embedding_connection}} - AZUREML_WORKSPACE_CONNECTION_ID_AOAI_CHAT: ${{parent.inputs.llm_connection}} - MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}} diff --git a/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_faiss_e2e/asset.yaml b/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_faiss_e2e/asset.yaml deleted file mode 100644 index dbebe8dc08..0000000000 --- a/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_faiss_e2e/asset.yaml +++ /dev/null @@ -1,4 +0,0 @@ -spec: spec.yaml -type: component -categories: -- Retrieval Augmented Generation \ No newline at end of file diff --git a/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_faiss_e2e/spec.yaml b/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_faiss_e2e/spec.yaml deleted file mode 100644 index b68803265d..0000000000 --- a/assets/large_language_models/components_pipelines/data_ingestion_dbcopilot_faiss_e2e/spec.yaml +++ /dev/null @@ -1,303 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -name: llm_ingest_dbcopilot_faiss_e2e -version: 0.0.66 -display_name: Data Ingestion for DB Data Output to FAISS E2E Deployment -description: Single job pipeline to chunk data from AzureML DB Datastore and create faiss embeddings index - -settings: - default_compute: serverless - -inputs: - db_datastore: - type: string - description: "database datastore uri in the format of 'azureml://datastores/{datastore_name}'" - sample_data: - type: uri_folder - description: "Sample data to be used for data ingestion. format: 'azureml:samples-test:1'" - optional: true - # path: "azureml:samples-test:1" - # data ingest setting - embeddings_model: - type: string - description: "The model used to generate embeddings. 'azure_open_ai://endpoint/{endpoint_name}/deployment/{deployment_name}/model/{model_name}'" - chat_aoai_deployment_name: - type: string - description: "The name of the chat AOAI deployment" - optional: true - embedding_aoai_deployment_name: - type: string - description: "The name of the embedding AOAI deployment" - # grounding settings - max_tables: - type: integer - optional: true - max_columns: - type: integer - optional: true - max_rows: - type: integer - optional: true - max_sampling_rows: - type: integer - optional: true - max_text_length: - type: integer - optional: true - max_knowledge_pieces: - type: integer - optional: true - selected_tables: - type: string - optional: true - description: 'The list of tables to be ingested. If not specified, all tables will be ingested. Format: ["table1","table2","table3"]' - column_settings: - type: string - optional: true - # copilot settings - tools: - type: string - optional: true - description: 'The name of the tools for dbcopilot. Supported tools: "tsql", "python". Format: ["tsql", "python"]' - # deploy settings - endpoint_name: - type: string - description: "The name of the endpoint" - deployment_name: - type: string - description: "The name of the deployment" - default: "blue" - mir_environment: - type: string - description: "The name of the mir environment. Format: azureml://registries/{registry_name}/environments/llm-dbcopilot-mir" - # compute settings - serverless_instance_count: - type: integer - default: 1 - optional: true - serverless_instance_type: - type: string - default: "Standard_DS3_v2" - optional: true - embedding_connection: - type: string - optional: true - description: "Azure OpenAI workspace connection ARM ID for embeddings" - llm_connection: - type: string - optional: true - description: "Azure OpenAI workspace connection ARM ID for llm" - temperature: - type: number - default: 0.0 - optional: true - top_p: - type: number - default: 0.0 - optional: true - include_builtin_examples: - type: boolean - default: true - optional: true - knowledge_pieces: - type: string - optional: true - description: "The list of knowledge pieces to be used for grounding." - include_views: - type: boolean - optional: true - description: "Whether to turn on views." - instruct_template: - type: string - optional: true - description: "The instruct template for the LLM." - managed_identity_enabled: - type: boolean - default: false - optional: true - description: "Whether to connect using managed identity." - egress_public_network_access: - type: string - optional: true - default: "enabled" - description: "This option allows the resource to send outbound traffic to the public Internet or not, there are two choices disabled and enabled, the default is enabled" -outputs: - grounding_index: - type: uri_folder - db_context: - type: uri_folder -jobs: - ######################################### - db_meta_loading_generator: - type: command - component: "azureml:llm_dbcopilot_grounding:0.0.70" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - asset_uri: ${{parent.inputs.db_datastore}} - max_tables: ${{parent.inputs.max_tables}} - max_columns: ${{parent.inputs.max_columns}} - max_rows: ${{parent.inputs.max_rows}} - max_sampling_rows: ${{parent.inputs.max_sampling_rows}} - max_text_length: ${{parent.inputs.max_text_length}} - max_knowledge_pieces: ${{parent.inputs.max_knowledge_pieces}} - selected_tables: ${{parent.inputs.selected_tables}} - column_settings: ${{parent.inputs.column_settings}} - include_views: ${{parent.inputs.include_views}} - outputs: - output_chunk_file: - type: uri_folder - output_grounding_context_file: - type: uri_folder - path: ${{parent.outputs.db_context}} - environment_variables: - MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}} - ######################################### - generate_meta_embeddings: - type: command - component: "azureml:llm_rag_generate_embeddings:0.0.66" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - chunks_source: - type: uri_folder - path: ${{parent.jobs.db_meta_loading_generator.outputs.output_chunk_file}} - embeddings_model: ${{parent.inputs.embeddings_model}} - outputs: - embeddings: - type: uri_folder - mode: upload - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - ######################################### - create_meta_faiss_index_job: - type: command - component: "azureml:llm_rag_create_faiss_index:0.0.71" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - embeddings: - type: uri_folder - path: ${{parent.jobs.generate_meta_embeddings.outputs.embeddings}} - outputs: - index: - type: uri_folder - path: ${{parent.outputs.grounding_index}} - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - ######################################### - db_sample_loading_generator: - type: command - component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.45" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - sample_folder: - type: uri_folder - path: ${{parent.inputs.sample_data}} - include_builtin: ${{parent.inputs.include_builtin_examples}} - tools: ${{parent.inputs.tools}} - grounding_context: ${{parent.jobs.db_meta_loading_generator.outputs.output_grounding_context_file}} - outputs: - output_chunk_file: - type: uri_folder - ######################################### - generate_sample_embeddings: - type: command - component: "azureml:llm_rag_generate_embeddings:0.0.66" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - chunks_source: - type: uri_file - path: ${{parent.jobs.db_sample_loading_generator.outputs.output_chunk_file}} - embeddings_model: ${{parent.inputs.embeddings_model}} - outputs: - embeddings: - type: uri_folder - mode: upload - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI: ${{parent.inputs.embedding_connection}} - ######################################### - create_sample_faiss_index_job: - type: command - component: "azureml:llm_rag_create_faiss_index:0.0.71" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - inputs: - embeddings: - type: uri_folder - path: ${{parent.jobs.generate_sample_embeddings.outputs.embeddings}} - outputs: - index: - type: uri_folder - ######################################### - endpoint_deployment_job: - type: command - component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.46" - resources: - instance_count: ${{parent.inputs.serverless_instance_count}} - instance_type: ${{parent.inputs.serverless_instance_type}} - properties: - compute_specification: - automatic: true - identity: - type: user_identity - inputs: - grounding_embedding_uri: - type: uri_folder - path: ${{parent.jobs.create_meta_faiss_index_job.outputs.index}} - example_embedding_uri: - type: uri_folder - path: ${{parent.jobs.create_sample_faiss_index_job.outputs.index}} - db_context_uri: - type: uri_file - path: ${{parent.jobs.db_meta_loading_generator.outputs.output_grounding_context_file}} - endpoint_name: ${{parent.inputs.endpoint_name}} - deployment_name: ${{parent.inputs.deployment_name}} - asset_uri: ${{parent.inputs.db_datastore}} - embedding_aoai_deployment_name: ${{parent.inputs.embedding_aoai_deployment_name}} - chat_aoai_deployment_name: ${{parent.inputs.chat_aoai_deployment_name}} - mir_environment: ${{parent.inputs.mir_environment}} - selected_tables: ${{parent.inputs.selected_tables}} - max_tables: ${{parent.inputs.max_tables}} - max_rows: ${{parent.inputs.max_rows}} - max_columns: ${{parent.inputs.max_columns}} - max_text_length: ${{parent.inputs.max_text_length}} - max_knowledge_pieces: ${{parent.inputs.max_knowledge_pieces}} - tools: ${{parent.inputs.tools}} - temperature: ${{parent.inputs.temperature}} - top_p: ${{parent.inputs.top_p}} - knowledge_pieces: ${{parent.inputs.knowledge_pieces}} - include_views: ${{parent.inputs.include_views}} - instruct_template: ${{parent.inputs.instruct_template}} - egress_public_network_access: ${{parent.inputs.egress_public_network_access}} - environment_variables: - AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING: ${{parent.inputs.embedding_connection}} - AZUREML_WORKSPACE_CONNECTION_ID_AOAI_CHAT: ${{parent.inputs.llm_connection}} - MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}} diff --git a/assets/large_language_models/components_pipelines/oai_v2_1p/openai_completions_finetune_pipeline/asset.yaml b/assets/large_language_models/components_pipelines/oai_v2_1p/openai_completions_finetune_pipeline/asset.yaml deleted file mode 100644 index 24bdd28b97..0000000000 --- a/assets/large_language_models/components_pipelines/oai_v2_1p/openai_completions_finetune_pipeline/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["OAI"] \ No newline at end of file diff --git a/assets/large_language_models/components_pipelines/oai_v2_1p/openai_completions_finetune_pipeline/spec.yaml b/assets/large_language_models/components_pipelines/oai_v2_1p/openai_completions_finetune_pipeline/spec.yaml deleted file mode 100644 index 3d6382f848..0000000000 --- a/assets/large_language_models/components_pipelines/oai_v2_1p/openai_completions_finetune_pipeline/spec.yaml +++ /dev/null @@ -1,83 +0,0 @@ -$schema: http://azureml/sdk-2-0/PipelineComponent.json -type: pipeline -version: 0.1.3 -name: openai_completions_finetune_pipeline -display_name: OpenAI Completions Finetune Pipeline -description: Finetune your own OAI model. Visit https://learn.microsoft.com/en-us/azure/cognitive-services/openai/ for more info. -tags: - contact: gpt3finetuning@microsoft.com -is_deterministic: true - -inputs: - model: - type: string - optional: False - default: gpt-35-turbo - description: GPT model engine - enum: - - babbage-002 - - davinci-002 - - gpt-35-turbo - - gpt-4 - train_dataset: - type: uri_folder - optional: False - description: Input dataset (file or folder). If a folder dataset is passed, includes all nested files. - validation_dataset: - type: uri_folder - optional: True - description: Input dataset (file or folder). If a folder dataset is passed, includes all nested files. - task_type: - type: string - optional: False - description: Dataset type - chat or completion - enum: - - chat - - completion - registered_model_name: - type: string - optional: False - description: User-defined registered model name - n_epochs: - type: integer - optional: False - default: -1 - description: Number of training epochs. If set to -1, number of epochs will be determined dynamically based on the input data. - learning_rate_multiplier: - type: number - optional: False - default: 1.0 - description: The learning rate multiplier to use for training. - batch_size: - type: integer - optional: False - default: -1 - description: Global batch size. If set to -1, batch size will be determined dynamically based on the input data. - -outputs: - output_model: - type: uri_folder - description: Dataset with the output model weights (LoRA weights) - mode: mount - -jobs: - openai_data_import: - type: command - component: azureml://registries/azure-openai-v2/components/openai_data_import/versions/0.3.5 - inputs: - train_dataset: ${{parent.inputs.train_dataset}} - validation_dataset: ${{parent.inputs.validation_dataset}} - model: ${{parent.inputs.model}} - openai_completions_finetune: - type: command - component: azureml://registries/azure-openai-v2/components/openai_completions_finetune/versions/0.4.5 - inputs: - input_dataset: ${{parent.jobs.openai_data_import.outputs.out_dataset}} - model: ${{parent.inputs.model}} - task_type: ${{parent.inputs.task_type}} - registered_model_name: ${{parent.inputs.registered_model_name}} - n_epochs: ${{parent.inputs.n_epochs}} - learning_rate_multiplier: ${{parent.inputs.learning_rate_multiplier}} - batch_size: ${{parent.inputs.batch_size}} - outputs: - output_model: ${{parent.outputs.output_model}} diff --git a/assets/oai/components_3p/oai_completions_finetune/asset.yaml b/assets/oai/components_3p/oai_completions_finetune/asset.yaml deleted file mode 100644 index 24bdd28b97..0000000000 --- a/assets/oai/components_3p/oai_completions_finetune/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["OAI"] \ No newline at end of file diff --git a/assets/oai/components_3p/oai_completions_finetune/spec.yaml b/assets/oai/components_3p/oai_completions_finetune/spec.yaml deleted file mode 100644 index a87164c79b..0000000000 --- a/assets/oai/components_3p/oai_completions_finetune/spec.yaml +++ /dev/null @@ -1,112 +0,0 @@ -$schema: http://azureml/sdk-2-0/PipelineComponent.json -display_name: OpenAI Completions Finetune Pipeline -name: openai_completions_finetune -version: 0.2.2 -type: pipeline -description: Finetune your own OAI model. Visit https://learn.microsoft.com/en-us/azure/cognitive-services/openai/ for more info. -settings: - default_compute: azureml:serverless - -inputs: - model: - type: string - optional: False - default: davinci - description: OAI model engine - enum: - - ada - - babbage - - curie - - davinci - - text-davinci-fine-tune-002 - registered_model_name: - type: string - optional: False - description: User-defined registered model name - train_dataset: - type: uri_folder - optional: False - description: Input dataset (file or folder). If a folder dataset is passed, includes all nested files. - validation_dataset: - type: uri_folder - optional: True - description: Input dataset (file or folder). If a folder dataset is passed, includes all nested files. - lora_weights: - type: uri_folder - description: LoRA weights for continual finetuning. This is optional. - optional: True - n_epochs: - type: integer - optional: True - default: 4 - description: Number of epochs for the training - batch_size: - type: integer - optional: True - default: -1 - description: The batch size to use for training. When set to -1, batch_size is calculated as 0.2% of examples in training set and the max is 256. - learning_rate_multiplier: - type: number - optional: True - default: 0.1 - description: The learning rate multiplier to use for training. Must be between 0.0 and 5.0. - prompt_loss_weight: - type: number - optional: True - default: 0.1 - min: 0 - max: 1 - description: The prompt loss weight to use for training - compute_classification_metrics: - type: boolean - optional: True - description: If set, we calculate classification-specific metrics such as accuracy and F-1 score using the validation set at the end of every epoch. In order to compute classification metrics, you must provide a validation_file. Additionally, you must specify classification_n_classes for multiclass classification or classification_positive_class for binary classification. - classification_n_classes: - type: integer - optional: True - description: The number of classes in a classification task. This parameter is required for multiclass classification. - classification_positive_class: - type: string - optional: True - description: The positive class in binary classification. This parameter is needed to generate precision, recall, and F1 metrics when doing binary classification. - classification_betas: - type: string - optional: True - description: If this is provided, we calculate F-beta scores at the specified beta values. The F-beta score is a generalization of F-1 score. This is only used for binary classification. With a beta of 1 (i.e. the F-1 score), precision and recall are given the same weight. A larger beta score puts more weight on recall and less on precision. A smaller beta score puts more weight on precision and less on recall. The value specified should be a comma separated list of doubles. - quota_enforcement_resource_id: - type: string - optional: True - description: Owner subscription id. - -outputs: - output_model: - type: uri_folder - mode: mount - description: Dataset with the output model weights (LoRA weights) - -jobs: - fine_tune: - type: command - resources: - properties: - quota_enforcement_resource_id: ${{parent.inputs.quota_enforcement_resource_id}} - component: azureml://registries/azure-openai-preview/components/openai_completions_finetune/versions/0.3.4 - inputs: - model: ${{parent.inputs.model}} - registered_model_name: ${{parent.inputs.registered_model_name}} - train_dataset: ${{parent.inputs.train_dataset}} - validation_dataset: ${{parent.inputs.validation_dataset}} - lora_weights: ${{parent.inputs.lora_weights}} - n_epochs: ${{parent.inputs.n_epochs}} - batch_size: ${{parent.inputs.batch_size}} - learning_rate_multiplier: ${{parent.inputs.learning_rate_multiplier}} - prompt_loss_weight: ${{parent.inputs.prompt_loss_weight}} - compute_classification_metrics: ${{parent.inputs.compute_classification_metrics}} - classification_n_classes: ${{parent.inputs.classification_n_classes}} - classification_positive_class: ${{parent.inputs.classification_positive_class}} - classification_betas: ${{parent.inputs.classification_betas}} - outputs: - output_model: - path: ${{parent.outputs.output_model}} - mode: mount - type: uri_folder diff --git a/assets/training/automl/components/automl_image_classification/asset.yaml b/assets/training/automl/components/automl_image_classification/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_image_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_image_classification/spec.yaml b/assets/training/automl/components/automl_image_classification/spec.yaml deleted file mode 100644 index 3ded8a2eff..0000000000 --- a/assets/training/automl/components/automl_image_classification/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_image_classification -display_name: AutoML Image Classification (Preview) -description: - Component that kicks off an AutoML job to train an image classification model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: image_classification -is_deterministic: false diff --git a/assets/training/automl/components/automl_image_classification_multilabel/asset.yaml b/assets/training/automl/components/automl_image_classification_multilabel/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_image_classification_multilabel/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_image_classification_multilabel/spec.yaml b/assets/training/automl/components/automl_image_classification_multilabel/spec.yaml deleted file mode 100644 index 3ec26054a7..0000000000 --- a/assets/training/automl/components/automl_image_classification_multilabel/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_image_classification_multilabel -display_name: AutoML Image Classification Multilabel (Preview) -description: - Component that kicks off an AutoML job to train an multilabel image classification model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: image_classification_multilabel -is_deterministic: false diff --git a/assets/training/automl/components/automl_image_instance_segmentation/asset.yaml b/assets/training/automl/components/automl_image_instance_segmentation/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_image_instance_segmentation/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_image_instance_segmentation/spec.yaml b/assets/training/automl/components/automl_image_instance_segmentation/spec.yaml deleted file mode 100644 index 021528a4da..0000000000 --- a/assets/training/automl/components/automl_image_instance_segmentation/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_image_instance_segmentation -display_name: AutoML Image Instance Segmentation (Preview) -description: - Component that kicks off an AutoML job to train an image instance segmentation model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: image_instance_segmentation -is_deterministic: false diff --git a/assets/training/automl/components/automl_image_object_detection/asset.yaml b/assets/training/automl/components/automl_image_object_detection/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_image_object_detection/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_image_object_detection/spec.yaml b/assets/training/automl/components/automl_image_object_detection/spec.yaml deleted file mode 100644 index 299b0bef75..0000000000 --- a/assets/training/automl/components/automl_image_object_detection/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_image_object_detection -display_name: AutoML Image Object Detection (Preview) -description: - Component that kicks off an AutoML job to train an image object detection model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: image_object_detection -is_deterministic: false diff --git a/assets/training/automl/components/automl_tabular_classification/asset.yaml b/assets/training/automl/components/automl_tabular_classification/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_tabular_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_tabular_classification/spec.yaml b/assets/training/automl/components/automl_tabular_classification/spec.yaml deleted file mode 100644 index 9ce70fc0c8..0000000000 --- a/assets/training/automl/components/automl_tabular_classification/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_classification -display_name: AutoML Classification (Preview) -description: - Component that kicks off an AutoML job to train a classification model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: classification -is_deterministic: false diff --git a/assets/training/automl/components/automl_tabular_forecasting/asset.yaml b/assets/training/automl/components/automl_tabular_forecasting/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_tabular_forecasting/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_tabular_forecasting/spec.yaml b/assets/training/automl/components/automl_tabular_forecasting/spec.yaml deleted file mode 100644 index 5c6dc56e45..0000000000 --- a/assets/training/automl/components/automl_tabular_forecasting/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_forecasting -display_name: AutoML Forecasting (Preview) -description: - Component that kicks off an AutoML job to train a forecasting model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: forecasting -is_deterministic: false diff --git a/assets/training/automl/components/automl_tabular_regression/asset.yaml b/assets/training/automl/components/automl_tabular_regression/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_tabular_regression/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_tabular_regression/spec.yaml b/assets/training/automl/components/automl_tabular_regression/spec.yaml deleted file mode 100644 index b86a82394f..0000000000 --- a/assets/training/automl/components/automl_tabular_regression/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_regression -display_name: AutoML Regression (Preview) -description: - Component that kicks off an AutoML job to train a regression model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: regression -is_deterministic: false diff --git a/assets/training/automl/components/automl_text_classification/asset.yaml b/assets/training/automl/components/automl_text_classification/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_text_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_text_classification/spec.yaml b/assets/training/automl/components/automl_text_classification/spec.yaml deleted file mode 100644 index 206e162f27..0000000000 --- a/assets/training/automl/components/automl_text_classification/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_text_classification -display_name: AutoML Text Classification (Preview) -description: - Component that kicks off an AutoML job to train a NLP text classification model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: text_classification -is_deterministic: false diff --git a/assets/training/automl/components/automl_text_classification_multilabel/asset.yaml b/assets/training/automl/components/automl_text_classification_multilabel/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_text_classification_multilabel/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_text_classification_multilabel/spec.yaml b/assets/training/automl/components/automl_text_classification_multilabel/spec.yaml deleted file mode 100644 index 0c77290025..0000000000 --- a/assets/training/automl/components/automl_text_classification_multilabel/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_text_classification_multilabel -display_name: AutoML Text Classification Multilabel (Preview) -description: - Component that kicks off an AutoML job to train a NLP text classification multilabel model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: text_classification_multilabel -is_deterministic: false diff --git a/assets/training/automl/components/automl_text_ner/asset.yaml b/assets/training/automl/components/automl_text_ner/asset.yaml deleted file mode 100644 index c587c80c9f..0000000000 --- a/assets/training/automl/components/automl_text_ner/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML"] diff --git a/assets/training/automl/components/automl_text_ner/spec.yaml b/assets/training/automl/components/automl_text_ner/spec.yaml deleted file mode 100644 index 6b6b8d7f27..0000000000 --- a/assets/training/automl/components/automl_text_ner/spec.yaml +++ /dev/null @@ -1,10 +0,0 @@ -$schema: http://azureml/sdk-2-0/AutoMLComponent.json -name: automl_text_ner -display_name: AutoML Text NER (Preview) -description: - Component that kicks off an AutoML job to train a NLP NE (Named Entity Recognition) model within an Azure Machine Learning pipeline. - For more details, you can look at the component documentation [here](https://aka.ms/automl_components) (Preview). -version: 0.0.1 -type: automl -task: text_ner -is_deterministic: false diff --git a/assets/training/distillation/components/pipeline/asset.yaml b/assets/training/distillation/components/pipeline/asset.yaml deleted file mode 100644 index d0767a9360..0000000000 --- a/assets/training/distillation/components/pipeline/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune", "Distillation"] diff --git a/assets/training/distillation/components/pipeline/spec.yaml b/assets/training/distillation/components/pipeline/spec.yaml deleted file mode 100644 index 2984f6a8e4..0000000000 --- a/assets/training/distillation/components/pipeline/spec.yaml +++ /dev/null @@ -1,547 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -name: oss_distillation_pipeline -version: 0.0.10 -type: pipeline - - -display_name: OSS Distillation Pipeline -description: Component to generate data from teacher model enpoint and finetune student model on generated dataset - -inputs: - # Compute parameters - instance_type_pipeline_validation: - type: string - optional: True - description: Instance type to be used for validation component. The parameter compute_pipeline_validation must be set to 'serverless' for instance_type to be used. - instance_type_data_generation: - type: string - optional: true - default: Standard_D4as_v4 - description: Instance type to be used for finetune component in case of virtual cluster compute, eg. Singularity.ND40_v2. The parameter compute_finetune must be set to 'serverless' for instance_type to be used - instance_type_data_import: - type: string - optional: true - default: Singularity.ND96amrs_A100_v4 - description: Instance type to be used for data_import component in case of virtual cluster compute, eg. Singularity.D8_v3. The parameter compute_data_import must be set to 'serverless' for instance_type to be used - instance_type_finetune: - type: string - optional: true - default: Singularity.ND96amrs_A100_v4 - description: Instance type to be used for finetune component in case of virtual cluster compute, eg. Singularity.ND40_v2. The parameter compute_finetune must be set to 'serverless' for instance_type to be used - - compute_pipeline_validation: - type: string - optional: True - default: 'serverless' - description: compute to be used for validation component - - compute_data_generation: - type: string - optional: true - default: 'serverless' - description: >- - compute to be used for model_import eg. provide 'FT-Cluster' if - your compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value. - If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used - compute_data_import: - type: string - optional: true - default: 'serverless' - description: >- - compute to be used for model_import eg. provide 'FT-Cluster' if - your compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value. - If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used - compute_finetune: - type: string - optional: true - default: 'serverless' - description: >- - compute to be used for finetune eg. provide 'FT-Cluster' if your - compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value. - If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used - - # ########################### Data Generator Component ########################### # - - train_file_path: - type: uri_file - description: Path to the registered training data asset. The supported data formats are `jsonl`, `json`, `csv`, `tsv` and `parquet`. - mode: rw_mount - - validation_file_path: - type: uri_file - optional: true - description: Path to the registered validation data asset. The supported data formats are `jsonl`, `json`, `csv`, `tsv` and `parquet`. - mode: rw_mount - - teacher_model_endpoint_name: - type: string - optional: true - description: Teacher model endpoint name - - teacher_model_endpoint_url: - type: string - optional: true - description: Teacher model endpoint URL - - teacher_model_endpoint_key: - type: string - optional: true - description: Teacher model endpoint key - - teacher_model_max_new_tokens: - type: integer - default: 128 - description: Teacher model max_new_tokens inference parameter - - teacher_model_temperature: - type: number - default: 0.2 - description: Teacher model temperature inference parameter - - teacher_model_top_p: - type: number - default: 0.1 - description: Teacher model top_p inference parameter - - teacher_model_frequency_penalty: - type: number - default: 0.0 - description: Teacher model frequency penalty inference parameter - - teacher_model_presence_penalty: - type: number - default: 0.0 - description: Teacher model presence penalty inference parameter - - teacher_model_stop: - type: string - optional: true - description: Teacher model stop inference parameter - - request_batch_size: - type: integer - default: 10 - description: No of data records to hit teacher model endpoint in one go - - min_endpoint_success_ratio: - type: number - default: 0.7 - description: > - The minimum value of (successful_requests / total_requests) required for classifying inference as successful. - If (successful_requests / total_requests) < min_endpoint_success_ratio, the experiment will be marked as failed. - By default it is 0.7 (0 means all requests are allowed to fail while 1 means no request should fail.) - - enable_chain_of_thought: - type: string - optional: true - default: "false" - description: Enable Chain of thought for data generation - enum: - - "true" - - "false" - - enable_chain_of_density: - type: string - optional: true - default: "false" - description: Enable Chain of density for text summarization - enum: - - "true" - - "false" - - max_len_summary: - type: integer - optional: true - default: 80 - description: Maximum Length Summary for text summarization - - data_generation_task_type: - type: string - enum: - - NLI - - CONVERSATION - - NLU_QA - - MATH - - SUMMARIZATION - description: > - Data generation task type. Supported values are: - 1. NLI: Generate Natural Language Inference data - 2. CONVERSATION: Generate conversational data (multi/single turn) - 3. NLU_QA: Generate Natural Language Understanding data for Question Answering data - 4. MATH: Generate Math data for numerical responses - 5. SUMMARIZATION: Generate Key Summary for an Article - - - # ########################### Batch Score Component ########################### # - authentication_type: - type: string - optional: False - description: Authentication type for endpoint. Either `azureml_workspace_connection` or `managed_identity`. - default: azureml_workspace_connection - enum: - - azureml_workspace_connection - - managed_identity - additional_headers: - type: string - optional: True - description: JSON serialized string expressing additional headers to be added to each request. - debug_mode: - type: boolean - optional: False - default: False - description: Enable debug mode to print all the debug logs in the score step. - ensure_ascii: - type: boolean - optional: False - default: False - description: If set to true, the output is guaranteed to have all incoming non-ASCII characters escaped. If set to false, these characters will be output as-is. More detailed information can be found at https://docs.python.org/3/library/json.html - max_retry_time_interval: - type: integer - optional: True - description: The maximum time (in seconds) spent retrying a payload. If unspecified, payloads are retried for unlimited time. - initial_worker_count: - type: integer - optional: False - default: 5 - description: The initial number of workers to use for scoring. - max_worker_count: - type: integer - optional: False - default: 200 - description: Overrides `initial_worker_count` if necessary. - instance_count: - type: integer - default: 1 - description: Number of nodes in a compute cluster we will run the batch score step on. - max_concurrency_per_instance: - type: integer - default: 1 - description: Number of processes that will be run concurrently on any given node. This number should not be larger than 1/2 of the number of cores in an individual node in the specified cluster. - mini_batch_size: - type: string - optional: true - default: 100KB - description: The mini batch size for parallel run. - - # ########################### Finetuning Component ########################### # - - number_of_gpu_to_use_finetuning: - type: integer - default: 1 - optional: true - description: >- - number of gpus to be used per node for finetuning, should be equal - to number of gpu per node in the compute SKU used for finetune - - # Continual-Finetuning model path - mlflow_model_path: - type: mlflow_model - optional: true - description: MLflow model asset path. Special characters like \ and ' are invalid in the parameter value. - mode: download - pytorch_model_path: - type: custom_model - optional: true - description: Pytorch model asset path. Special characters like \ and ' are invalid in the parameter value. - mode: download - - # Training parameters - num_train_epochs: - type: integer - default: 1 - optional: true - description: training epochs - - per_device_train_batch_size: - type: integer - default: 1 - optional: true - description: Train batch size - - learning_rate: - type: number - default: 3e-04 - optional: true - description: Start learning rate. - - # Validation parameters - system_properties: - type: string - optional: true - description: Validation parameters propagated from pipeline. - - # Student Model parameters - model_asset_id: - type: string - optional: false - description: Asset id of the student model - - # Model registration - registered_model_name: - type: string - optional: true - description: Name of the registered model - - validation_info: - type: uri_file - optional: true - description: Validation status. - mode: rw_mount - -outputs: - output_model: - type: uri_folder - description: Output dir to save the finetuned lora weights - mode: rw_mount - -jobs: - oss_distillation_validate_pipeline: - type: command - component: azureml:oss_distillation_validate_pipeline:0.0.5 - compute: '${{parent.inputs.compute_pipeline_validation}}' - resources: - instance_type: '${{parent.inputs.instance_type_pipeline_validation}}' - identity: - type: user_identity - inputs: - train_file_path: '${{parent.inputs.train_file_path}}' - validation_file_path: '${{parent.inputs.validation_file_path}}' - teacher_model_endpoint_name: '${{parent.inputs.teacher_model_endpoint_name}}' - teacher_model_endpoint_url: '${{parent.inputs.teacher_model_endpoint_url}}' - teacher_model_endpoint_key: '${{parent.inputs.teacher_model_endpoint_key}}' - enable_chain_of_thought: '${{parent.inputs.enable_chain_of_thought}}' - enable_chain_of_density: '${{parent.inputs.enable_chain_of_density}}' - max_len_summary: '${{parent.inputs.max_len_summary}}' - data_generation_task_type: '${{parent.inputs.data_generation_task_type}}' - teacher_model_max_new_tokens: '${{parent.inputs.teacher_model_max_new_tokens}}' - teacher_model_temperature: '${{parent.inputs.teacher_model_temperature}}' - teacher_model_top_p: '${{parent.inputs.teacher_model_top_p}}' - teacher_model_frequency_penalty: '${{parent.inputs.teacher_model_frequency_penalty}}' - teacher_model_presence_penalty: '${{parent.inputs.teacher_model_presence_penalty}}' - request_batch_size: '${{parent.inputs.request_batch_size}}' - min_endpoint_success_ratio: '${{parent.inputs.min_endpoint_success_ratio}}' - num_train_epochs: '${{parent.inputs.num_train_epochs}}' - per_device_train_batch_size: '${{parent.inputs.per_device_train_batch_size}}' - learning_rate: '${{parent.inputs.learning_rate}}' - model_asset_id: '${{parent.inputs.model_asset_id}}' - outputs: - validation_info: - type: uri_file - path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.json - - data_generation_batch_scoring_selector: - type: command - component: azureml:oss_distillation_data_generation_batch_scoring_selector:0.0.1 - compute: '${{parent.inputs.compute_pipeline_validation}}' - resources: - instance_type: '${{parent.inputs.instance_type_pipeline_validation}}' - identity: - type: user_identity - inputs: - data_generation_task_type: '${{parent.inputs.data_generation_task_type}}' - - validation_succeeded: - type: if_else - condition: ${{parent.jobs.data_generation_batch_scoring_selector.outputs.output}} - true_block: ${{parent.jobs.oss_distillation_batchscoring_datagen_pipeline}} - false_block: ${{parent.jobs.oss_distillation_seq_scoring_pipeline}} - - oss_distillation_batchscoring_datagen_pipeline: - type: pipeline - component: azureml:oss_distillation_batchscoring_datagen_pipeline:0.0.1 - inputs: - instance_type_pipeline_validation: '${{parent.inputs.instance_type_pipeline_validation}}' - instance_type_data_generation: '${{parent.inputs.instance_type_data_generation}}' - instance_type_data_import: '${{parent.inputs.instance_type_data_import}}' - instance_type_finetune: '${{parent.inputs.instance_type_finetune}}' - compute_pipeline_validation: '${{parent.inputs.compute_pipeline_validation}}' - compute_data_generation: '${{parent.inputs.compute_data_generation}}' - compute_data_import: '${{parent.inputs.compute_data_import}}' - compute_finetune: '${{parent.inputs.compute_finetune}}' - train_file_path: '${{parent.inputs.train_file_path}}' - validation_file_path: '${{parent.inputs.validation_file_path}}' - teacher_model_endpoint_url: '${{parent.inputs.teacher_model_endpoint_url}}' - teacher_model_endpoint_name: '${{parent.inputs.teacher_model_endpoint_name}}' - teacher_model_endpoint_key: '${{parent.inputs.teacher_model_endpoint_key}}' - teacher_model_max_new_tokens: '${{parent.inputs.teacher_model_max_new_tokens}}' - teacher_model_temperature: '${{parent.inputs.teacher_model_temperature}}' - teacher_model_top_p: '${{parent.inputs.teacher_model_top_p}}' - teacher_model_frequency_penalty: '${{parent.inputs.teacher_model_frequency_penalty}}' - teacher_model_presence_penalty: '${{parent.inputs.teacher_model_presence_penalty}}' - teacher_model_stop: '${{parent.inputs.teacher_model_stop}}' - min_endpoint_success_ratio: '${{parent.inputs.min_endpoint_success_ratio}}' - enable_chain_of_thought: '${{parent.inputs.enable_chain_of_thought}}' - enable_chain_of_density: '${{parent.inputs.enable_chain_of_density}}' - max_len_summary: '${{parent.inputs.max_len_summary}}' - data_generation_task_type: '${{parent.inputs.data_generation_task_type}}' - num_train_epochs: '${{parent.inputs.num_train_epochs}}' - per_device_train_batch_size: '${{parent.inputs.per_device_train_batch_size}}' - learning_rate: '${{parent.inputs.learning_rate}}' - authentication_type: '${{parent.inputs.authentication_type}}' - additional_headers: '${{parent.inputs.additional_headers}}' - debug_mode: '${{parent.inputs.debug_mode}}' - ensure_ascii: '${{parent.inputs.ensure_ascii}}' - max_retry_time_interval: '${{parent.inputs.max_retry_time_interval}}' - initial_worker_count: '${{parent.inputs.initial_worker_count}}' - max_worker_count: '${{parent.inputs.max_worker_count}}' - instance_count: '${{parent.inputs.instance_count}}' - max_concurrency_per_instance: '${{parent.inputs.max_concurrency_per_instance}}' - mini_batch_size: '${{parent.inputs.mini_batch_size}}' - validation_info: '${{parent.jobs.oss_distillation_validate_pipeline.outputs.validation_info}}' - - outputs: - generated_batch_train_file_path: - type: uri_file - path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl - generated_batch_validation_file_path: - type: uri_file - path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl - - oss_distillation_seq_scoring_pipeline: - type: pipeline - component: azureml:oss_distillation_seq_scoring_pipeline:0.0.1 - inputs: - instance_type_pipeline_validation: '${{parent.inputs.instance_type_pipeline_validation}}' - instance_type_data_generation: '${{parent.inputs.instance_type_data_generation}}' - instance_type_data_import: '${{parent.inputs.instance_type_data_import}}' - instance_type_finetune: '${{parent.inputs.instance_type_finetune}}' - compute_pipeline_validation: '${{parent.inputs.compute_pipeline_validation}}' - compute_data_generation: '${{parent.inputs.compute_data_generation}}' - compute_data_import: '${{parent.inputs.compute_data_import}}' - compute_finetune: '${{parent.inputs.compute_finetune}}' - train_file_path: '${{parent.inputs.train_file_path}}' - validation_file_path: '${{parent.inputs.validation_file_path}}' - teacher_model_endpoint_name: '${{parent.inputs.teacher_model_endpoint_name}}' - teacher_model_endpoint_url: '${{parent.inputs.teacher_model_endpoint_url}}' - teacher_model_endpoint_key: '${{parent.inputs.teacher_model_endpoint_key}}' - teacher_model_max_new_tokens: '${{parent.inputs.teacher_model_max_new_tokens}}' - teacher_model_temperature: '${{parent.inputs.teacher_model_temperature}}' - teacher_model_top_p: '${{parent.inputs.teacher_model_top_p}}' - teacher_model_frequency_penalty: '${{parent.inputs.teacher_model_frequency_penalty}}' - teacher_model_presence_penalty: '${{parent.inputs.teacher_model_presence_penalty}}' - teacher_model_stop: '${{parent.inputs.teacher_model_stop}}' - request_batch_size: '${{parent.inputs.request_batch_size}}' - min_endpoint_success_ratio: '${{parent.inputs.min_endpoint_success_ratio}}' - enable_chain_of_thought: '${{parent.inputs.enable_chain_of_thought}}' - enable_chain_of_density: '${{parent.inputs.enable_chain_of_density}}' - max_len_summary: '${{parent.inputs.max_len_summary}}' - data_generation_task_type: '${{parent.inputs.data_generation_task_type}}' - validation_output: '${{parent.jobs.oss_distillation_validate_pipeline.outputs.validation_info}}' - model_asset_id: '${{parent.inputs.model_asset_id}}' - outputs: - generated_train_file_path: - type: uri_file - path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl - generated_validation_file_path: - type: uri_file - path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl - - - oss_distillation_train_data_generation_file_selector: - type: command - component: azureml:oss_distillation_data_generation_file_selector:0.0.1 - compute: '${{parent.inputs.compute_pipeline_validation}}' - resources: - instance_type: '${{parent.inputs.instance_type_pipeline_validation}}' - identity: - type: user_identity - inputs: - generated_batch_train_file_path: '${{parent.jobs.oss_distillation_batchscoring_datagen_pipeline.outputs.generated_batch_train_file_path}}' - generated_batch_validation_file_path: '${{parent.jobs.oss_distillation_batchscoring_datagen_pipeline.outputs.generated_batch_validation_file_path}}' - generated_train_file_path: '${{parent.jobs.oss_distillation_seq_scoring_pipeline.outputs.generated_train_file_path}}' - generated_validation_file_path: '${{parent.jobs.oss_distillation_seq_scoring_pipeline.outputs.generated_validation_file_path}}' - condition: '${{parent.jobs.data_generation_batch_scoring_selector.outputs.output}}' - outputs: - ft_input_train_file_path: - type: uri_file - path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl - ft_input_validation_file_path: - type: uri_file - path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl - - - oss_text_generation_data_import: - type: command - component: azureml:oss_text_generation_data_import:0.0.26 - compute: '${{parent.inputs.compute_data_import}}' - resources: - instance_type: '${{parent.inputs.instance_type_data_import}}' - properties: - singularity: - imageVersion: '' - SLATier: 'Premium' - priority: 'Medium' - environment_variables: - _AZUREML_CR_ENABLE_ITP_CAP: "false" - inputs: - train_file_path: '${{parent.jobs.oss_distillation_train_data_generation_file_selector.outputs.ft_input_train_file_path}}' - validation_file_path: '${{parent.jobs.oss_distillation_train_data_generation_file_selector.outputs.ft_input_validation_file_path}}' - system_properties: '${{parent.inputs.system_properties}}' - - oss_chat_completion_finetune: - type: command - component: azureml:oss_chat_completion_finetune:0.0.26 - compute: '${{parent.inputs.compute_finetune}}' - resources: - instance_type: '${{parent.inputs.instance_type_finetune}}' - properties: - singularity: - imageVersion: '' - SLATier: 'Premium' - priority: 'Medium' - environment_variables: - _AZUREML_CR_ENABLE_ITP_CAP: "false" - inputs: - task_name: "ChatCompletion" - mlflow_model_path: '${{parent.inputs.mlflow_model_path}}' - model_asset_id: '${{parent.inputs.model_asset_id}}' - pytorch_model_path: '${{parent.inputs.pytorch_model_path}}' - dataset_input: '${{parent.jobs.oss_text_generation_data_import.outputs.output_dataset}}' - batch_size: 1000 - pad_to_max_length: "false" - max_seq_length: 8192 - number_of_gpu_to_use_finetuning: '${{parent.inputs.number_of_gpu_to_use_finetuning}}' - apply_lora: "true" - lora_alpha: 128 - lora_r: 8 - lora_dropout: 0 - num_train_epochs: '${{parent.inputs.num_train_epochs}}' - max_steps: -1 - per_device_train_batch_size: '${{parent.inputs.per_device_train_batch_size}}' - per_device_eval_batch_size: '${{parent.inputs.per_device_train_batch_size}}' - auto_find_batch_size: "false" - optim: adamw_hf - learning_rate: '${{parent.inputs.learning_rate}}' - warmup_steps: 0 - weight_decay: 0.1 - adam_beta1: 0.9 - adam_beta2: 0.95 - adam_epsilon: 1e-05 - gradient_accumulation_steps: 1 - eval_accumulation_steps: 1 - lr_scheduler_type: cosine - precision: 16 - seed: 42 - enable_full_determinism: "false" - dataloader_num_workers: 0 - ignore_mismatched_sizes: "false" - max_grad_norm: 1.0 - evaluation_strategy: epoch - evaluation_steps_interval: 0.0 - eval_steps: 500 - logging_strategy: steps - logging_steps: 10 - metric_for_best_model: loss - resume_from_checkpoint: "false" - save_total_limit: 1 - apply_early_stopping: "false" - early_stopping_patience: 0 - apply_deepspeed: "true" - deepspeed_stage: 3 - apply_ort: "false" - system_properties: '${{parent.inputs.system_properties}}' - registered_model_name: '${{parent.inputs.registered_model_name}}' - model_registration_tag: "isDistill:True" - outputs: - output_model: '${{parent.outputs.output_model}}' - \ No newline at end of file diff --git a/assets/training/finetune_acft_common/components/validation/asset.yaml b/assets/training/finetune_acft_common/components/validation/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_common/components/validation/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_common/components/validation/spec.yaml b/assets/training/finetune_acft_common/components/validation/spec.yaml deleted file mode 100644 index 9b8b77ff2c..0000000000 --- a/assets/training/finetune_acft_common/components/validation/spec.yaml +++ /dev/null @@ -1,170 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.8 -name: finetune_common_validation -display_name: Common Validation Component -description: Component to validate the finetune job against Validation Service - -is_deterministic: True - -environment: azureml://registries/azureml/environments/acpt-pytorch-2.2-cuda12.1/labels/latest - -code: ../../src/validation - -inputs: - - # component input: mlflow model path - mlflow_model_path: - type: mlflow_model - optional: true - description: MLflow model asset path. Special characters like \ and ' are invalid in the parameter value. - - # ###################################### Data validation ###################################### # - # component input: training mltable - train_mltable_path: - type: mltable - optional: false - description: Path to the mltable of the training dataset. - - # optional component input: validation mltable - validation_mltable_path: - type: mltable - optional: true - description: Path to the mltable of the validation dataset. - - # component input: test mltable - test_mltable_path: - type: mltable - optional: true - description: Path to the mltable of the test dataset. - - user_column_names: - type: string - optional: true - description: Comma separated list of column names to be used for training. - - # ###################################### Compute validation ###################################### # - compute_preprocess: - type: string - optional: true - description: Compute to be used for preprocess eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value. If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used. - - instance_type_preprocess: - type: string - optional: true - description: Instance type to be used for preprocess component in case of serverless compute, eg. standard_d12_v2. The parameter compute_preprocess must be set to 'serverless' for instance_type to be used - - compute_model_import: - type: string - optional: true - description: Compute to be used for model_import eg. provide 'FT-Cluster' if - your compute is named 'FT-Cluster' - - instance_type_model_import: - type: string - optional: true - description: Instance type to be used for model_import component in case of serverless compute, eg. standard_d12_v2. The parameter compute_model_import must be set to 'serverless' for instance_type to be used - - compute_finetune: - type: string - optional: true - description: Compute to be used for finetuning eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value. If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used - - instance_type_finetune: - type: string - optional: true - description: Instance type to be used for finetune component in case of serverless compute, eg. standard_nc24rs_v3. The parameter compute_finetune must be set to 'serverless' for instance_type to be used - - instance_count: - type: integer - default: 1 - optional: true - description: Number of nodes to be used for finetuning (used for distributed training) - - process_count_per_instance: - type: integer - default: 1 - optional: true - description: Number of gpus to be used per node for finetuning, should be equal - to number of gpu per node in the compute SKU used for finetune - - compute_model_evaluation: - type: string - optional: true - description: Compute to be used for model evaluation eg. provide 'FT-Cluster' if your - compute is named 'FT-Cluster' - - instance_type_model_evaluation: - type: string - optional: true - description: Instance type to be used for model_evaluation components in case of serverless compute, eg. standard_nc24rs_v3. The parameter compute_model_evaluation must be set to 'serverless' for instance_type to be used - - - task_name: - type: string - enum: - - tabular-classification - - tabular-classification-multilabel - - tabular-regression - - text-classification - - text-classification-multilabel - - text-named-entity-recognition - - text-summarization - - question-answering - - text-translation - - text-generation - - fill-mask - - image-classification - - image-classification-multilabel - - image-object-detection - - image-instance-segmentation - - video-multi-object-tracking - description: Which task the model is solving. - - # ###################################### ME validation ###################################### # - test_batch_size: - type: integer - default: 1 - optional: true - description: Test batch size. - - label_column_name: - type: string - default: label - optional: true - description: Label column name in provided test dataset, for example "label". - - device: - type: string - optional: False - default: auto - enum: - - auto - - cpu - - gpu - - evaluation_config: - type: uri_file - optional: true - description: Additional parameters for Computing Metrics. - - evaluation_config_params: - type: string - optional: true - description: Additional parameters as JSON serialized string. - -# ############################### Task Speciffic params validation ################################### # - task_specific_extra_params: - type: string - optional: true - description: All extra params. The values should be key values pairs separated by semi-colon. For example "param1=value1;param2=value2" - -outputs: - validation_info: - type: uri_file - description: Validation status. - -command: >- - python validation.py - --validation-info '${{outputs.validation_info}}' diff --git a/assets/training/finetune_acft_common/src/validation/validation.py b/assets/training/finetune_acft_common/src/validation/validation.py deleted file mode 100644 index c3579ed66c..0000000000 --- a/assets/training/finetune_acft_common/src/validation/validation.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -"""script to update validation info.""" - - -def main(): - """Script which runs as part of validation component to update output.""" - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("--validation-info", required=True, help="Model source ") - - args = parser.parse_args() - - print("Validation info: ", args.validation_info) - with open(args.validation_info, "w") as f: - f.write("Validation Completed") - - -if __name__ == "__main__": - main() diff --git a/assets/training/finetune_acft_hf_nlp/components/pipeline_components/nlp_ner/asset.yaml b/assets/training/finetune_acft_hf_nlp/components/pipeline_components/nlp_ner/asset.yaml deleted file mode 100644 index 8ac35d6091..0000000000 --- a/assets/training/finetune_acft_hf_nlp/components/pipeline_components/nlp_ner/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["NLP NER", "Finetune"] diff --git a/assets/training/finetune_acft_hf_nlp/components/pipeline_components/nlp_ner/spec.yaml b/assets/training/finetune_acft_hf_nlp/components/pipeline_components/nlp_ner/spec.yaml deleted file mode 100644 index 941c6f569c..0000000000 --- a/assets/training/finetune_acft_hf_nlp/components/pipeline_components/nlp_ner/spec.yaml +++ /dev/null @@ -1,264 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -name: nlp_textclassification_ner -version: 0.0.3 -type: pipeline -display_name: PipelineComponent for AutoML NLP NER -description: Pipeline component for AutoML NLP NER -inputs: - compute_model_import: - type: string - optional: false - description: compute to be used for model_selector eg. provide 'FT-Cluster' if - your compute is named 'FT-Cluster' - compute_preprocess: - type: string - optional: false - description: compute to be used for preprocess eg. provide 'FT-Cluster' if your - compute is named 'FT-Cluster' - compute_finetune: - type: string - optional: false - description: compute to be used for finetune eg. provide 'FT-Cluster' if your - compute is named 'FT-Cluster' - compute_test_model: - type: string - optional: false - description: compute to be used for test_model eg. provide 'FT-Cluster' if your - compute is named 'FT-Cluster' - num_nodes_finetune: - type: integer - default: 1 - optional: true - description: number of nodes to be used for finetuning (used for distributed training) - process_count_per_instance_finetune: - type: integer - default: 1 - optional: true - description: number of gpus to be used per node for finetuning, should be equal - to number of gpu per node in the compute SKU used for finetune - model_name: - type: string - default: bert-base-uncased - description: model id used to load model checkpoint. - - # Dataset parameters - training_data: - type: uri_file - optional: false - description: Enter the train file path - - validation_data: - type: uri_file - optional: false - description: Enter the validation file path - - # Training parameters - training_batch_size: - type: integer - default: 32 - optional: true - description: Train batch size - - validation_batch_size: - type: integer - default: 32 - optional: true - description: Validation batch size - - number_of_epochs: - type: integer - default: 3 - optional: true - description: Number of epochs to train - - gradient_accumulation_steps: - type: integer - default: 1 - optional: true - description: Gradient acc - - learning_rate: - type: number - default: 0.00005 - optional: true - description: Start learning rate. Defaults to linear scheduler. - - warmup_steps: - type: integer - default: 0 - optional: true - description: Number of steps used for a linear warmup from 0 to learning_rate - - weight_decay: - type: number - default: 0.0 - optional: true - description: The weight decay to apply (if not zero) to all layers except all - bias and LayerNorm weights in AdamW optimizer - - learning_rate_scheduler: - type: string - default: linear - optional: true - enum: - - linear - - cosine - - cosine_with_restarts - - polynomial - - constant - - constant_with_warmup - description: The scheduler type to use - - precision: - type: string - enum: - - '32' - - '16' - default: '16' - optional: true - description: Apply mixed precision training. This can reduce memory footprint - by performing operations in half-precision. - - # MLFlow Parameters - enable_full_determinism: - type: string - enum: - - 'true' - - 'false' - default: 'false' - optional: true - description: Ensure reproducible behavior during distributed training - - evaluation_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The evaluation strategy to adopt during training - - evaluation_steps_interval: - type: number - default: 0.0 - optional: true - description: The evaluation steps in fraction of an epoch steps to adopt during - training. Overwrites evaluation_steps if not 0. - - evaluation_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two evals if evaluation_strategy='steps' - - logging_strategy: - type: string - default: steps - optional: true - enum: - - epoch - - steps - description: The logging strategy to adopt during training. - - logging_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two logs if logging_strategy='steps' - - primary_metric: - type: string - default: accuracy - optional: true - enum: - - loss - - f1_macro - - mcc - - accuracy - - precision_macro - - recall_macro - description: Specify the metric to use to compare two different models - - # Deepspeed Parameters - apply_deepspeed: - type: string - enum: - - 'true' - - 'false' - default: 'true' - optional: true - description: If set to true, will enable deepspeed for training - - # ORT Parameters - apply_ort: - type: string - enum: - - 'true' - - 'false' - default: 'true' - optional: true - description: If set to true, will use the ONNXRunTime training - - deepspeed_config: - type: uri_file - optional: true - description: Deepspeed config to be used for finetuning - -outputs: - pytorch_model_folder_finetune: - type: uri_folder - description: Output dir to save the finetune model and other metadata - - mlflow_model_folder_finetune: - type: mlflow_model - description: Output dir to save the finetune model as mlflow model - -jobs: - model_import: - type: command - component: azureml:token_classification_model_import:0.0.66 - compute: ${{parent.inputs.compute_model_import}} - inputs: - huggingface_id: ${{parent.inputs.model_name}} - preprocess: - type: command - component: azureml:nlp_ner_datapreprocessing:0.0.2 - compute: ${{parent.inputs.compute_preprocess}} - inputs: - train_file_path: ${{parent.inputs.training_data}} - valid_file_path: ${{parent.inputs.validation_data}} - model_selector_output: ${{parent.jobs.model_import.outputs.output_dir}} - finetune: - type: command - component: azureml:token_classification_finetune:0.0.66 - compute: ${{parent.inputs.compute_finetune}} - distribution: - type: pytorch - process_count_per_instance: ${{parent.inputs.process_count_per_instance_finetune}} - resources: - instance_count: ${{parent.inputs.num_nodes_finetune}} - inputs: - per_device_train_batch_size: ${{parent.inputs.training_batch_size}} - per_device_eval_batch_size: ${{parent.inputs.validation_batch_size}} - num_train_epochs: ${{parent.inputs.number_of_epochs}} - gradient_accumulation_steps: ${{parent.inputs.gradient_accumulation_steps}} - learning_rate: ${{parent.inputs.learning_rate}} - warmup_steps: ${{parent.inputs.warmup_steps}} - weight_decay: ${{parent.inputs.weight_decay}} - lr_scheduler_type: ${{parent.inputs.learning_rate_scheduler}} - precision: ${{parent.inputs.precision}} - enable_full_determinism: ${{parent.inputs.enable_full_determinism}} - evaluation_strategy: ${{parent.inputs.evaluation_strategy}} - evaluation_steps_interval: ${{parent.inputs.evaluation_steps_interval}} - eval_steps: ${{parent.inputs.evaluation_steps}} - logging_strategy: ${{parent.inputs.logging_strategy}} - logging_steps: ${{parent.inputs.logging_steps}} - metric_for_best_model: ${{parent.inputs.primary_metric}} - apply_deepspeed: ${{parent.inputs.apply_deepspeed}} - deepspeed: ${{parent.inputs.deepspeed_config}} - apply_ort: ${{parent.inputs.apply_ort}} - model_selector_output: ${{parent.jobs.model_import.outputs.output_dir}} - preprocess_output: ${{parent.jobs.preprocess.outputs.output_dir}} - outputs: - pytorch_model_folder: ${{parent.outputs.pytorch_model_folder_finetune}} - mlflow_model_folder: ${{parent.outputs.mlflow_model_folder_finetune}} \ No newline at end of file diff --git a/assets/training/finetune_acft_hf_nlp/components/preprocess/nlp_ner/asset.yaml b/assets/training/finetune_acft_hf_nlp/components/preprocess/nlp_ner/asset.yaml deleted file mode 100644 index 8ac35d6091..0000000000 --- a/assets/training/finetune_acft_hf_nlp/components/preprocess/nlp_ner/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["NLP NER", "Finetune"] diff --git a/assets/training/finetune_acft_hf_nlp/components/preprocess/nlp_ner/spec.yaml b/assets/training/finetune_acft_hf_nlp/components/preprocess/nlp_ner/spec.yaml deleted file mode 100644 index e8dcb3a988..0000000000 --- a/assets/training/finetune_acft_hf_nlp/components/preprocess/nlp_ner/spec.yaml +++ /dev/null @@ -1,64 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: nlp_ner_datapreprocessing -version: 0.0.2 -type: command - -is_deterministic: True - -display_name: DataPreProcessing for AutoMLNLPNER -description: Component to preprocess data for automl nlp ner task - -environment: azureml://registries/azureml/environments/acft-hf-nlp-gpu/labels/latest - -code: ../../../src/preprocess - -inputs: - # Token Classification task arguments - token_key: - type: string - optional: true - description: token key name - - tag_key: - type: string - optional: true - description: tag key name - - batch_size: - type: integer - optional: true - default: 32 - description: Number of examples to batch before calling the tokenization function - - # Inputs - train_file_path: - type: uri_file - optional: false - description: Enter the train file path - - valid_file_path: - type: uri_file - optional: false - description: Enter the validation file path - - # Dataset parameters - model_selector_output: - type: uri_folder - optional: false - description: output folder of model selector containing model metadata like config, checkpoints, tokenizer config - -outputs: - output_dir: - type: uri_folder - description: folder to store preprocessed outputs of input data - -command: >- - python preprocess.py - --task_name NLPNER - $[[--token_key ${{inputs.token_key}}]] - $[[--tag_key ${{inputs.tag_key}}]] - $[[--batch_size ${{inputs.batch_size}}]] - --train_file_path ${{inputs.train_file_path}} - --validation_file_path ${{inputs.valid_file_path}} - --model_selector_output ${{inputs.model_selector_output}} - --output_dir ${{outputs.output_dir}} \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/finetune/diffusers_text_to_image/asset.yaml b/assets/training/finetune_acft_image/components/finetune/diffusers_text_to_image/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/finetune/diffusers_text_to_image/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/finetune/diffusers_text_to_image/spec.yaml b/assets/training/finetune_acft_image/components/finetune/diffusers_text_to_image/spec.yaml deleted file mode 100644 index 66b9486ea1..0000000000 --- a/assets/training/finetune_acft_image/components/finetune/diffusers_text_to_image/spec.yaml +++ /dev/null @@ -1,425 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.9 - -name: diffusers_text_to_image_finetune -display_name: Text to Image Diffusers Model Finetune -description: Component to finetune stable diffusion models using diffusers for text to image. - -is_deterministic: false - -environment: azureml://registries/azureml/environments/acft-transformers-image-gpu/versions/46 - -code: ../../../src/finetune - -distribution: - type: pytorch - -inputs: - # # component input: model path - model_path: - type: uri_folder - optional: false - description: Output folder of model selector containing model metadata like config, checkpoints, tokenizer config. - - # component input: Instance data dir - instance_data_dir: - type: uri_folder - optional: false - description: A folder containing the training data of instance images. - - # optional component input: Class data dir - class_data_dir: - type: uri_folder - optional: true - mode: download - description: A folder containing the training data of class images. - - task_name: - type: string - enum: - - stable-diffusion-text-to-image - description: Which task the model is solving. - - # Instance prompt - instance_prompt: - type: string - optional: true - description: The prompt with identifier specifying the instance. - - resolution: - type: integer - optional: true - default: 512 - description: The image resolution for training. - - # Lora parameters - # LoRA reduces the number of trainable parameters by learning pairs of rank-decompostion matrices while freezing the original weights. This vastly reduces the storage requirement for large language models adapted to specific tasks and enables efficient task-switching during deployment all without introducing inference latency. LoRA also outperforms several other adaptation methods including adapter, prefix-tuning, and fine-tuning. Currently, LoRA is supported for gpt2, bert, roberta, deberta, distilbert, t5, bart, mbart and camembert model families - apply_lora: - type: boolean - default: true - optional: false - description: If "true" enables lora. - - lora_alpha: - type: integer - default: 128 - optional: true - description: alpha attention parameter for lora. - - lora_r: - type: integer - default: 8 - optional: true - description: lora dimension - - lora_dropout: - type: number - default: 0.0 - optional: true - description: lora dropout value - - tokenizer_max_length: - type: integer - optional: true - description: The maximum length of the tokenizer. If not set, will default to the tokenizer's max length. - - # Text Encoder - text_encoder_type: - type: string - enum: - - CLIPTextModel - - T5EncoderModel - optional: true - description: Text encoder to be used. - - text_encoder_name: - type: string - optional: true - description: Huggingface id of text encoder. This model should of type specified in `text_encoder_type`. If not specified the default from the model will be used. - - train_text_encoder: - type: boolean - default: false - optional: true - description: Whether to train the text encoder. If set, the text encoder should be float32 precision. - - pre_compute_text_embeddings: - type: boolean - default: true - optional: true - description: Whether or not to pre-compute text embeddings. If text embeddings are pre-computed, the text encoder will not be kept in memory during training and will leave more GPU memory available for training the rest of the model. This is not compatible with `--train_text_encoder`. - - text_encoder_use_attention_mask: - type: boolean - default: false - optional: true - description: Whether to use attention mask for the text encoder - - # UNET related - class_labels_conditioning: - type: string - optional: true - description: The optional `class_label` conditioning to pass to the unet, available values are `timesteps`. - - # Noise Scheduler - noise_scheduler_name: - type: string - enum: - - DPMSolverMultistepScheduler - - DDPMScheduler - - PNDMScheduler - optional: true - description: Noise scheduler to be used. - - noise_scheduler_num_train_timesteps: - type: integer - optional: true - description: The number of diffusion steps to train the model. - - noise_scheduler_variance_type: - type: string - enum: - - fixed_small - - fixed_small_log - - fixed_large - - fixed_large_log - - learned - - learned_range - optional: true - description: Clip the variance when adding noise to the denoised sample. - - noise_scheduler_prediction_type: - type: string - enum: - - epsilon - - sample - - v_prediction - optional: true - description: Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen Video](https://imagen.research.google/video/paper.pdf) paper). - - noise_scheduler_timestep_spacing: - type: string - optional: true - description: The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. - - noise_scheduler_steps_offset: - type: integer - optional: true - description: An offset added to the inference steps. You can use a combination of `offset=1` and `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable Diffusion. - - extra_noise_scheduler_args: - type: string - optional: true - description: Optional additional arguments that are supplied to noise scheduler. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes. For example, "clip_sample_range=1.0; clip_sample=True" for DDPMScheduler. - - # Offset Noise - offset_noise: - type: boolean - optional: true - description: Fine-tuning against a modified noise. See https://www.crosslabs.org//blog/diffusion-with-offset-noise for more information. - - # Prior preservation loss - with_prior_preservation: - type: boolean - default: true - description: Flag to add prior preservation loss. - class_prompt: - type: string - optional: true - description: The prompt to specify images in the same class as provided instance images. - num_class_images: - type: integer - default: 100 - optional: true - description: Minimal class images for prior preservation loss. If there are not enough images already present in class_data_dir, additional images will be sampled with class_prompt. - prior_generation_precision: - type: string - optional: true - default: "fp32" - enum: - - "fp32" - - "fp16" - - "bf16" - description: Choose prior generation precision between fp32, fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10.and an Nvidia Ampere GPU. Default to fp16 if a GPU is available else fp32. - prior_loss_weight: - type: number - default: 1.0 - optional: true - description: The weight of prior preservation loss. - - sample_batch_size: - type: integer - default: 4 - optional: true - description: "Batch size (per device) for sampling class images when training with_prior_preservation set to True." - - num_validation_images: - type: integer - default: 0 - description: "Specify number of images to generate using instance_prompt. Images are stored in the output/checkpoint-* directories. Please note that this will increase the training time. If you select num_validation_images = 0, then run will generate 5 images in last checkpoint." - - number_of_workers: - type: integer - default: 6 - optional: true - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - - # Training parameters - number_of_epochs: - type: integer - optional: true - description: Number of training epochs. If left empty, will be chosen automatically based on the task type and model selected. - - max_steps: - type: integer - optional: true - description: If set to a positive number, the total number of training steps to perform. Overrides 'number_of_epochs'. In case of using a finite iterable dataset the training may stop before reaching the set number of steps when all data is exhausted. If left empty, will be chosen automatically based on the task type and model selected. - - training_batch_size: - type: integer - default: 1 - optional: true - description: Train batch size. If left empty, will be chosen automatically based on the task type and model selected. - - auto_find_batch_size: - type: boolean - default: false - optional: true - description: Flag to enable auto finding of batch size. If the provided 'per_device_train_batch_size' goes into Out Of Memory (OOM) enabling auto_find_batch_size will find the correct batch size by iteratively reducing 'per_device_train_batch_size' by a factor of 2 till the OOM is fixed. - - # learning rate and learning rate scheduler - learning_rate: - type: number - optional: true - description: Start learning rate. Defaults to linear scheduler. If left empty, will be chosen automatically based on the task type and model selected. - - learning_rate_scheduler: - type: string - optional: true - enum: - - warmup_linear - - warmup_cosine - - warmup_cosine_with_restarts - - warmup_polynomial - - constant - - warmup_constant - description: The scheduler type to use. If left empty, will be chosen automatically based on the task type and model selected. - - warmup_steps: - type: integer - default: 0 - optional: true - description: Number of steps used for a linear warmup from 0 to learning_rate. If left empty, will be chosen automatically based on the task type and model selected. - - # optimizer - optimizer: - type: string - optional: true - enum: - - adamw_hf - - adamw - # - adamw_torch_xla - # - adamw_apex_fused - # - adamw_bnb_8bit - # - adamw_anyprecision - - sgd - - adafactor - - adagrad - - adamw_ort_fused - description: optimizer to be used while training. 'adamw_ort_fused' optimizer is only supported for ORT training. If left empty, will be chosen automatically based on the task type and model selected. - - weight_decay: - type: number - default: 0 - optional: true - description: The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in AdamW and sgd optimizer. If left empty, will be chosen automatically based on the task type and model selected. - - extra_optim_args: - type: string - default: "" - optional: true - description: Optional additional arguments that are supplied to SGD Optimizer. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes. For example, "momentum=0.5; nesterov=True" for sgd. Please make sure to use a valid parameter names for the chosen optimizer. For exact parameter names, please refer https://pytorch.org/docs/1.13/generated/torch.optim.SGD.html#torch.optim.SGD for SGD. Parameters supplied in extra_optim_args will take precedence over the parameter supplied via other arguments such as weight_decay. If weight_decay is provided via "weight_decay" parameter and via extra_optim_args both, values specified in extra_optim_args will be used. - - - # gradient accumulation - gradient_accumulation_step: - type: integer - optional: true - description: Number of update steps to accumulate the gradients for, before performing a backward/update pass. If left empty, will be chosen automatically based on the task type and model selected. - - # mixed precision training - precision: - type: string - enum: - - "32" - - "16" - default: "32" - optional: true - description: Apply mixed precision training. This can reduce memory footprint by performing operations in half-precision. - - # random seed - random_seed: - type: integer - default: 42 - optional: true - description: Random seed that will be set at the beginning of training. - - # logging strategy parameters - logging_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The logging strategy to adopt during training. - - logging_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two logs if logging_strategy='steps'. - - # model checkpointing limit - save_total_limit: - type: integer - default: 5 - optional: true - description: If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in output_dir. If the value is -1 saves all checkpoints". - - # Grad Norm - max_grad_norm: - type: number - optional: true - description: Maximum gradient norm (for gradient clipping). If left empty, will be chosen automatically based on the task type and model selected. - - # save mlflow model - save_as_mlflow_model: - type: boolean - default: true - optional: true - description: Save as mlflow model with pyfunc as flavour. - -outputs: - mlflow_model_folder: - type: mlflow_model - description: Output dir to save the finetune model as mlflow model. - pytorch_model_folder: - type: custom_model - description: Output dir to save the finetune model as torch model. - -command: >- - - python finetune.py - --model_path ${{inputs.model_path}} - --train_mltable_path ${{inputs.instance_data_dir}} - $[[--class_data_dir ${{inputs.class_data_dir}}]] - --task_name ${{inputs.task_name}} - --apply_lora ${{inputs.apply_lora}} - --num_validation_images ${{inputs.num_validation_images}} - $[[--instance_prompt ${{inputs.instance_prompt}}]] - $[[--tokenizer_max_length ${{inputs.tokenizer_max_length}}]] - $[[--text_encoder_name ${{inputs.text_encoder_name}}]] - $[[--text_encoder_type ${{inputs.text_encoder_type}}]] - $[[--train_text_encoder ${{inputs.train_text_encoder}}]] - $[[--pre_compute_text_embeddings ${{inputs.pre_compute_text_embeddings}}]] - $[[--text_encoder_use_attention_mask ${{inputs.text_encoder_use_attention_mask}}]] - $[[--class_labels_conditioning ${{inputs.class_labels_conditioning}}]] - $[[--noise_scheduler_name ${{inputs.noise_scheduler_name}}]] - $[[--noise_scheduler_num_train_timesteps ${{inputs.noise_scheduler_num_train_timesteps}}]] - $[[--noise_scheduler_variance_type ${{inputs.noise_scheduler_variance_type}}]] - $[[--noise_scheduler_prediction_type ${{inputs.noise_scheduler_prediction_type}}]] - $[[--noise_scheduler_timestep_spacing ${{inputs.noise_scheduler_timestep_spacing}}]] - $[[--noise_scheduler_steps_offset ${{inputs.noise_scheduler_steps_offset}}]] - $[[--extra_noise_scheduler_args ${{inputs.extra_noise_scheduler_args}}]] - $[[--offset_noise ${{inputs.offset_noise}}]] - --with_prior_preservation ${{inputs.with_prior_preservation}} - $[[--class_prompt ${{inputs.class_prompt}}]] - $[[--num_class_images ${{inputs.num_class_images}}]] - $[[--prior_generation_precision ${{inputs.prior_generation_precision}}]] - $[[--prior_loss_weight ${{inputs.prior_loss_weight}}]] - --apply_augmentations "true" - $[[--dataloader_num_workers ${{inputs.number_of_workers}}]] - $[[--sample_batch_size ${{inputs.sample_batch_size}}]] - $[[--num_train_epochs ${{inputs.number_of_epochs}}]] - $[[--max_steps ${{inputs.max_steps}}]] - $[[--per_device_train_batch_size ${{inputs.training_batch_size}}]] - $[[--auto_find_batch_size ${{inputs.auto_find_batch_size}}]] - $[[--learning_rate ${{inputs.learning_rate}}]] - $[[--lr_scheduler_type ${{inputs.learning_rate_scheduler}}]] - $[[--warmup_steps ${{inputs.warmup_steps}}]] - $[[--optim ${{inputs.optimizer}}]] - $[[--weight_decay ${{inputs.weight_decay}}]] - $[[--extra_optim_args ${{inputs.extra_optim_args}}]] - $[[--gradient_accumulation_steps ${{inputs.gradient_accumulation_step}}]] - $[[--precision ${{inputs.precision}}]] - $[[--seed ${{inputs.random_seed}}]] - $[[--logging_strategy ${{inputs.logging_strategy}}]] - $[[--logging_steps ${{inputs.logging_steps}}]] - $[[--save_total_limit ${{inputs.save_total_limit}}]] - $$[[--max_grad_norm ${{inputs.max_grad_norm}}]] - $[[--save_as_mlflow_model ${{inputs.save_as_mlflow_model}}]] - --mlflow_model_folder ${{outputs.mlflow_model_folder}} - --pytorch_model_folder ${{outputs.pytorch_model_folder}} diff --git a/assets/training/finetune_acft_image/components/finetune/hf_classification/asset.yaml b/assets/training/finetune_acft_image/components/finetune/hf_classification/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/finetune/hf_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/finetune/hf_classification/spec.yaml b/assets/training/finetune_acft_image/components/finetune/hf_classification/spec.yaml deleted file mode 100644 index 5dd97a7b25..0000000000 --- a/assets/training/finetune_acft_image/components/finetune/hf_classification/spec.yaml +++ /dev/null @@ -1,356 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.22 -name: transformers_image_classification_finetune -display_name: Image Classification HuggingFace Transformers Model Finetune -description: Component to finetune HuggingFace transformers models for image classification. - -is_deterministic: false - -environment: azureml://registries/azureml/environments/acft-transformers-image-gpu/labels/latest - -code: ../../../src/finetune - -distribution: - type: pytorch - -inputs: - - # component input: model path - model_path: - type: uri_folder - optional: false - description: Output folder of model selector containing model metadata like config, checkpoints, tokenizer config. - - # component input: training mltable - training_data: - type: mltable - optional: false - description: Path to the mltable of the training dataset. - - # optional component input: validation mltable - validation_data: - type: mltable - optional: true - description: Path to the mltable of the validation dataset. - - image_width: - type: integer - default: -1 - optional: true - description: Final Image width after augmentation that is input to the network. - Default value is -1 which means it would be overwritten by default image - width in Hugging Face feature extractor. If either image_width or image_height - is set to -1, default value would be used for both width and height. - - image_height: - type: integer - default: -1 - optional: true - description: Final Image height after augmentation that is input to the network. - Default value is -1 which means it would be overwritten by default image - height in Hugging Face feature extractor. If either image_width or image_height - is set to -1, default value would be used for both width and height. - - task_name: - type: string - enum: - - image-classification - - image-classification-multilabel - description: Which task the model is solving. - - # primary metric - metric_for_best_model: - type: string - optional: true - enum: - - loss - - f1_score_macro - - accuracy - - precision_score_macro - - recall_score_macro - - iou - - iou_macro - - iou_micro - - iou_weighted - description: Specify the metric to use to compare two different models. If left empty, will be chosen automatically based on the task type and model selected. - - # Augmentation parameters - apply_augmentations: - type: boolean - default: true - optional: true - description: If set to true, will enable data augmentations for training. - - number_of_workers: - type: integer - default: 8 - optional: true - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - - # Deepspeed Parameters - apply_deepspeed: - type: boolean - optional: true - description: If set to true, will enable deepspeed for training. If left empty, will be chosen automatically based on the task type and model selected. - - # optional component input: deepspeed config - deepspeed_config: - type: uri_file - optional: true - description: Deepspeed config to be used for finetuning. - - apply_ort: - type: boolean - optional: true - description: If set to true, will use the ONNXRunTime training. If left empty, will be chosen automatically based on the task type and model selected. - - # Training parameters - number_of_epochs: - type: integer - optional: true - description: Number of training epochs. If left empty, will be chosen automatically based on the task type and model selected. - - max_steps: - type: integer - optional: true - description: If set to a positive number, the total number of training steps to perform. Overrides 'number_of_epochs'. In case of using a finite iterable dataset the training may stop before reaching the set number of steps when all data is exhausted. If left empty, will be chosen automatically based on the task type and model selected. - - training_batch_size: - type: integer - optional: true - description: Train batch size. If left empty, will be chosen automatically based on the task type and model selected. - - validation_batch_size: - type: integer - optional: true - description: Validation batch size. If left empty, will be chosen automatically based on the task type and model selected. - - auto_find_batch_size: - type: boolean - default: false - optional: true - description: Flag to enable auto finding of batch size. If the provided 'per_device_train_batch_size' goes into Out Of Memory (OOM) enabling auto_find_batch_size will find the correct batch size by iteratively reducing 'per_device_train_batch_size' by a factor of 2 till the OOM is fixed. - - # learning rate and learning rate scheduler - learning_rate: - type: number - optional: true - description: Start learning rate. Defaults to linear scheduler. If left empty, will be chosen automatically based on the task type and model selected. - - learning_rate_scheduler: - type: string - optional: true - enum: - - warmup_linear - - warmup_cosine - - warmup_cosine_with_restarts - - warmup_polynomial - - constant - - warmup_constant - description: The scheduler type to use. If left empty, will be chosen automatically based on the task type and model selected. - - warmup_steps: - type: integer - optional: true - description: Number of steps used for a linear warmup from 0 to learning_rate. If left empty, will be chosen automatically based on the task type and model selected. - - # optimizer - optimizer: - type: string - optional: true - enum: - - adamw_hf - - adamw - # - adamw_torch_xla - # - adamw_apex_fused - # - adamw_bnb_8bit - # - adamw_anyprecision - - sgd - - adafactor - - adagrad - - adamw_ort_fused - description: optimizer to be used while training. 'adamw_ort_fused' optimizer is only supported for ORT training. If left empty, will be chosen automatically based on the task type and model selected. - - weight_decay: - type: number - optional: true - description: The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in AdamW and sgd optimizer. If left empty, will be chosen automatically based on the task type and model selected. - - extra_optim_args: - type: string - default: "" - optional: true - description: Optional additional arguments that are supplied to SGD Optimizer. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes. For example, "momentum=0.5; nesterov=True" for sgd. Please make sure to use a valid parameter names for the chosen optimizer. For exact parameter names, please refer https://pytorch.org/docs/1.13/generated/torch.optim.SGD.html#torch.optim.SGD for SGD. Parameters supplied in extra_optim_args will take precedence over the parameter supplied via other arguments such as weight_decay. If weight_decay is provided via "weight_decay" parameter and via extra_optim_args both, values specified in extra_optim_args will be used. - - - # gradient accumulation - gradient_accumulation_step: - type: integer - optional: true - description: Number of update steps to accumulate the gradients for, before performing a backward/update pass. If left empty, will be chosen automatically based on the task type and model selected. - - # mixed precision training - precision: - type: string - enum: - - "32" - - "16" - default: "32" - optional: true - description: Apply mixed precision training. This can reduce memory footprint by performing operations in half-precision. - - # label smoothing factor - label_smoothing_factor: - type: number - optional: true - description: The label smoothing factor to use in range [0.0, 1,0). Zero means no label smoothing, otherwise the underlying onehot-encoded labels are changed from 0s and 1s to label_smoothing_factor/num_labels and 1 - label_smoothing_factor + label_smoothing_factor/num_labels respectively. Not applicable to multi-label classification. If left empty, will be chosen automatically based on the task type and model selected. - - # random seed - random_seed: - type: integer - default: 42 - optional: true - description: Random seed that will be set at the beginning of training. - - # evaluation strategy parameters - evaluation_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The evaluation strategy to adopt during training. Please note that the save_strategy and evaluation_strategy should match. - - evaluation_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two evals if evaluation_strategy='steps'. Please note that the saving steps should be a multiple of the evaluation steps. - - # logging strategy parameters - logging_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The logging strategy to adopt during training. - - logging_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two logs if logging_strategy='steps'. - - # Save strategy - save_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The checkpoint save strategy to adopt during training. Please note that the save_strategy and evaluation_strategy should match. - - save_steps: - type: integer - default: 500 - optional: true - description: Number of updates steps before two checkpoint saves if save_strategy="steps". Please note that the saving steps should be a multiple of the evaluation steps. - - # model checkpointing limit - save_total_limit: - type: integer - default: 5 - optional: true - description: If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in output_dir. If the value is -1 saves all checkpoints". - - # Early Stopping Parameters - early_stopping: - type: boolean - default: false - optional: true - description: Enable early stopping. - - early_stopping_patience: - type: integer - default: 1 - optional: true - description: Stop training when the specified metric worsens for early_stopping_patience evaluation calls. - - # Grad Norm - max_grad_norm: - type: number - optional: true - description: Maximum gradient norm (for gradient clipping). If left empty, will be chosen automatically based on the task type and model selected. - - # resume from the input model - resume_from_checkpoint: - type: boolean - default: false - optional: true - description: Loads optimizer, Scheduler and Trainer state for finetuning if true. - - # save mlflow model - save_as_mlflow_model: - type: boolean - default: true - optional: true - description: Save as mlflow model with pyfunc as flavour. - -outputs: - mlflow_model_folder: - type: mlflow_model - description: Output dir to save the finetune model as mlflow model. - pytorch_model_folder: - type: custom_model - description: Output dir to save the finetune model as torch model. - -command: >- - - python finetune.py - --model_path ${{inputs.model_path}} - --train_mltable_path ${{inputs.training_data}} - $[[--valid_mltable_path ${{inputs.validation_data}}]] - $[[--image_width ${{inputs.image_width}}]] - $[[--image_height ${{inputs.image_height}}]] - --task_name ${{inputs.task_name}} - $[[--metric_for_best_model ${{inputs.metric_for_best_model}}]] - $[[--apply_augmentations ${{inputs.apply_augmentations}}]] - $[[--dataloader_num_workers ${{inputs.number_of_workers}}]] - $[[--apply_deepspeed ${{inputs.apply_deepspeed}}]] - $[[--deepspeed_config ${{inputs.deepspeed_config}}]] - $[[--apply_ort ${{inputs.apply_ort}}]] - $[[--num_train_epochs ${{inputs.number_of_epochs}}]] - $[[--max_steps ${{inputs.max_steps}}]] - $[[--per_device_train_batch_size ${{inputs.training_batch_size}}]] - $[[--per_device_eval_batch_size ${{inputs.validation_batch_size}}]] - $[[--auto_find_batch_size ${{inputs.auto_find_batch_size}}]] - $[[--learning_rate ${{inputs.learning_rate}}]] - $[[--lr_scheduler_type ${{inputs.learning_rate_scheduler}}]] - $[[--warmup_steps ${{inputs.warmup_steps}}]] - $[[--optim ${{inputs.optimizer}}]] - $[[--weight_decay ${{inputs.weight_decay}}]] - $[[--extra_optim_args ${{inputs.extra_optim_args}}]] - $[[--gradient_accumulation_steps ${{inputs.gradient_accumulation_step}}]] - $[[--precision ${{inputs.precision}}]] - $[[--label_smoothing_factor ${{inputs.label_smoothing_factor}}]] - $[[--seed ${{inputs.random_seed}}]] - $[[--eval_strategy ${{inputs.evaluation_strategy}}]] - $[[--eval_steps ${{inputs.evaluation_steps}}]] - $[[--logging_strategy ${{inputs.logging_strategy}}]] - $[[--logging_steps ${{inputs.logging_steps}}]] - $[[--save_strategy ${{inputs.save_strategy}}]] - $[[--save_steps ${{inputs.save_steps}}]] - $[[--save_total_limit ${{inputs.save_total_limit}}]] - $[[--apply_early_stopping ${{inputs.early_stopping}}]] - $[[--early_stopping_patience ${{inputs.early_stopping_patience}}]] - $$[[--max_grad_norm ${{inputs.max_grad_norm}}]] - $[[--resume_from_checkpoint ${{inputs.resume_from_checkpoint}}]] - $[[--save_as_mlflow_model ${{inputs.save_as_mlflow_model}}]] - --mlflow_model_folder ${{outputs.mlflow_model_folder}} - --pytorch_model_folder ${{outputs.pytorch_model_folder}} diff --git a/assets/training/finetune_acft_image/components/finetune/mmd_od_is/asset.yaml b/assets/training/finetune_acft_image/components/finetune/mmd_od_is/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/finetune/mmd_od_is/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/finetune/mmd_od_is/spec.yaml b/assets/training/finetune_acft_image/components/finetune/mmd_od_is/spec.yaml deleted file mode 100644 index 6ed7bf2902..0000000000 --- a/assets/training/finetune_acft_image/components/finetune/mmd_od_is/spec.yaml +++ /dev/null @@ -1,347 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.22 -name: mmdetection_image_objectdetection_instancesegmentation_finetune -display_name: Image Object Detection and Instance Segmentation MMDetection Model Finetune -description: Component to finetune MMDetection models for image object detection and instance segmentation. - -is_deterministic: false - -environment: azureml://registries/azureml/environments/acft-mmdetection-image-gpu/labels/latest - -code: ../../../src/finetune - -distribution: - type: pytorch - -inputs: - - # component input: model path - model_path: - type: uri_folder - optional: false - description: Output folder of model selector containing model metadata like config, checkpoints, tokenizer config. - - # component input: training mltable - training_data: - type: mltable - optional: false - description: Path to the mltable of the training dataset. - - # optional component input: validation mltable - validation_data: - type: mltable - optional: true - description: Path to the mltable of the validation dataset. - - image_min_size: - type: integer - optional: true - description: Minimum image size after augmentation that is input to the network. If left empty, it would either be overwritten by image_scale in model config or would be chosen based on the task type and model selected. The image will be rescaled as large as possible within the range [image_min_size, image_max_size]. The image size will be constraint so that the max edge is no longer than image_max_size and short edge is no longer than image_min_size. - - image_max_size: - type: integer - optional: true - description: Maximum image size after augmentation that is input to the network. If left empty, it would either be overwritten by image_scale in model config or would be chosen based on the task type and model selected. The image will be rescaled as large as possible within the range [image_min_size, image_max_size]. The image size will be constraint so that the max edge is no longer than image_max_size and short edge is no longer than image_min_size. - - task_name: - type: string - enum: - - image-object-detection - - image-instance-segmentation - description: Which task the model is solving. - - # primary metric - metric_for_best_model: - type: string - optional: true - enum: - - mean_average_precision - - precision - - recall - description: Specify the metric to use to compare two different models. If left empty, will be chosen automatically based on the task type and model selected. - - # Augmentation parameters - apply_augmentations: - type: boolean - default: true - optional: true - description: If set to true, will enable data augmentations for training. - - number_of_workers: - type: integer - default: 8 - optional: true - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - - # Deepspeed Parameters - apply_deepspeed: - type: boolean - default: false - optional: true - description: If set to true, will enable deepspeed for training. Please note deepspeed is not yet supported for MMDetection, will be enabled in future. - - # optional component input: deepspeed config - deepspeed_config: - type: uri_file - optional: true - description: Deepspeed config to be used for finetuning. Please note deepspeed is not yet supported for MMDetection, will be enabled in future. - - apply_ort: - type: boolean - default: false - optional: true - description: If set to true, will use the ONNXRunTime training. Please note ONNXRunTime is not yet supported for MMDetection, will be enabled in future. - - # Training parameters - number_of_epochs: - type: integer - optional: true - description: Number of training epochs. If left empty, will be chosen automatically based on the task type and model selected. - - max_steps: - type: integer - optional: true - description: If set to a positive number, the total number of training steps to perform. Overrides 'number_of_epochs'. In case of using a finite iterable dataset the training may stop before reaching the set number of steps when all data is exhausted. If left empty, will be chosen automatically based on the task type and model selected. - - training_batch_size: - type: integer - optional: true - description: Train batch size. If left empty, will be chosen automatically based on the task type and model selected. - - validation_batch_size: - type: integer - optional: true - description: Validation batch size. If left empty, will be chosen automatically based on the task type and model selected. - - auto_find_batch_size: - type: boolean - default: false - optional: true - description: Flag to enable auto finding of batch size. If the provided 'per_device_train_batch_size' goes into Out Of Memory (OOM) enabling auto_find_batch_size will find the correct batch size by iteratively reducing 'per_device_train_batch_size' by a factor of 2 till the OOM is fixed. - - # learning rate and learning rate scheduler - learning_rate: - type: number - optional: true - description: Start learning rate. Defaults to linear scheduler. If left empty, will be chosen automatically based on the task type and model selected. - - learning_rate_scheduler: - type: string - optional: true - enum: - - warmup_linear - - warmup_cosine - - warmup_cosine_with_restarts - - warmup_polynomial - - constant - - warmup_constant - description: The scheduler type to use. If left empty, will be chosen automatically based on the task type and model selected. - - warmup_steps: - type: integer - optional: true - description: Number of steps used for a linear warmup from 0 to learning_rate. If left empty, will be chosen automatically based on the task type and model selected. - - # optimizer - optimizer: - type: string - optional: true - enum: - - adamw_hf - - adamw - # - adamw_torch_xla - # - adamw_apex_fused - # - adamw_bnb_8bit - # - adamw_anyprecision - - sgd - - adafactor - - adagrad - description: optimizer to be used while training. If left empty, will be chosen automatically based on the task type and model selected. - - weight_decay: - type: number - optional: true - description: The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in Adam, AdamW & SGD optimizer. If left empty, will be chosen automatically based on the task type and model selected. - - extra_optim_args: - type: string - default: "" - optional: true - description: Optional additional arguments that are supplied to SGD Optimizer. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes. For example, "momentum=0.5; nesterov=True" for sgd. Please make sure to use a valid parameter names for the chosen optimizer. For exact parameter names, please refer https://pytorch.org/docs/1.13/generated/torch.optim.SGD.html#torch.optim.SGD for SGD. Parameters supplied in extra_optim_args will take precedence over the parameter supplied via other arguments such as weight_decay. If weight_decay is provided via "weight_decay" parameter and via extra_optim_args both, values specified in extra_optim_args will be used. - - # gradient accumulation - gradient_accumulation_step: - type: integer - optional: true - description: Number of update steps to accumulate the gradients for, before performing a backward/update pass. If left empty, will be chosen automatically based on the task type and model selected. - - # mixed precision training - precision: - type: string - enum: - - "32" - - "16" - default: "32" - optional: true - description: Apply mixed precision training. This can reduce memory footprint by performing operations in half-precision. - - # metric thresholds - iou_threshold: - type: number - optional: true - description: IOU threshold used during inference in non-maximum suppression post processing. - - box_score_threshold: - type: number - optional: true - description: During inference, only return proposals with a score greater than `box_score_threshold`. The score is the multiplication of the objectness score and classification probability. - - # random seed - random_seed: - type: integer - default: 42 - optional: true - description: Random seed that will be set at the beginning of training. - - # evaluation strategy parameters - evaluation_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The evaluation strategy to adopt during training. Please note that the save_strategy and evaluation_strategy should match. - - evaluation_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two evals if evaluation_strategy='steps'. Please note that the saving steps should be a multiple of the evaluation steps. - - # logging strategy parameters - logging_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The logging strategy to adopt during training. - - logging_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two logs if logging_strategy='steps'. - - # Save strategy - save_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The checkpoint save strategy to adopt during training. Please note that the save_strategy and evaluation_strategy should match. - - save_steps: - type: integer - default: 500 - optional: true - description: Number of updates steps before two checkpoint saves if save_strategy="steps". Please note that the saving steps should be a multiple of the evaluation steps. - - # model checkpointing limit - save_total_limit: - type: integer - default: 5 - optional: true - description: If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in output_dir. If the value is -1 saves all checkpoints". - - # Early Stopping Parameters - early_stopping: - type: boolean - default: false - optional: true - description: Enable early stopping. - - early_stopping_patience: - type: integer - default: 1 - optional: true - description: Stop training when the specified metric worsens for early_stopping_patience evaluation calls. - - # Grad Norm - max_grad_norm: - type: number - optional: true - description: Maximum gradient norm (for gradient clipping). If left empty, will be chosen automatically based on the task type and model selected. - - # resume from the input model - resume_from_checkpoint: - type: boolean - default: false - optional: true - description: Loads optimizer, Scheduler and Trainer state for finetuning if true. - - save_as_mlflow_model: - type: boolean - default: true - optional: true - description: Save as mlflow model with pyfunc as flavour. - -outputs: - mlflow_model_folder: - type: mlflow_model - description: Output dir to save the finetune model as mlflow model. - pytorch_model_folder: - type: custom_model - description: Output dir to save the finetune model as torch model. - -command: >- - - python finetune.py - --model_path ${{inputs.model_path}} - --train_mltable_path ${{inputs.training_data}} - $[[--valid_mltable_path ${{inputs.validation_data}}]] - $[[--image_min_size ${{inputs.image_min_size}}]] - $[[--image_max_size ${{inputs.image_max_size}}]] - --task_name ${{inputs.task_name}} - $[[--metric_for_best_model ${{inputs.metric_for_best_model}}]] - $[[--apply_augmentations ${{inputs.apply_augmentations}}]] - $[[--dataloader_num_workers ${{inputs.number_of_workers}}]] - $[[--apply_deepspeed ${{inputs.apply_deepspeed}}]] - $[[--deepspeed_config ${{inputs.deepspeed_config}}]] - $[[--apply_ort ${{inputs.apply_ort}}]] - $[[--num_train_epochs ${{inputs.number_of_epochs}}]] - $[[--max_steps ${{inputs.max_steps}}]] - $[[--per_device_train_batch_size ${{inputs.training_batch_size}}]] - $[[--per_device_eval_batch_size ${{inputs.validation_batch_size}}]] - $[[--auto_find_batch_size ${{inputs.auto_find_batch_size}}]] - $[[--learning_rate ${{inputs.learning_rate}}]] - $[[--lr_scheduler_type ${{inputs.learning_rate_scheduler}}]] - $[[--warmup_steps ${{inputs.warmup_steps}}]] - $[[--optim ${{inputs.optimizer}}]] - $[[--weight_decay ${{inputs.weight_decay}}]] - $[[--extra_optim_args ${{inputs.extra_optim_args}}]] - $[[--gradient_accumulation_steps ${{inputs.gradient_accumulation_step}}]] - $[[--precision ${{inputs.precision}}]] - $[[--iou_threshold ${{inputs.iou_threshold}}]] - $[[--box_score_threshold ${{inputs.box_score_threshold}}]] - $[[--seed ${{inputs.random_seed}}]] - $[[--evaluation_strategy ${{inputs.evaluation_strategy}}]] - $[[--eval_steps ${{inputs.evaluation_steps}}]] - $[[--logging_strategy ${{inputs.logging_strategy}}]] - $[[--logging_steps ${{inputs.logging_steps}}]] - $[[--save_strategy ${{inputs.save_strategy}}]] - $[[--save_steps ${{inputs.save_steps}}]] - $[[--save_total_limit ${{inputs.save_total_limit}}]] - $[[--apply_early_stopping ${{inputs.early_stopping}}]] - $[[--early_stopping_patience ${{inputs.early_stopping_patience}}]] - $$[[--max_grad_norm ${{inputs.max_grad_norm}}]] - $[[--resume_from_checkpoint ${{inputs.resume_from_checkpoint}}]] - $[[--save_as_mlflow_model ${{inputs.save_as_mlflow_model}}]] - --mlflow_model_folder ${{outputs.mlflow_model_folder}} - --pytorch_model_folder ${{outputs.pytorch_model_folder}} diff --git a/assets/training/finetune_acft_image/components/finetune/mmt/asset.yaml b/assets/training/finetune_acft_image/components/finetune/mmt/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/finetune/mmt/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/finetune/mmt/spec.yaml b/assets/training/finetune_acft_image/components/finetune/mmt/spec.yaml deleted file mode 100644 index d57f86c241..0000000000 --- a/assets/training/finetune_acft_image/components/finetune/mmt/spec.yaml +++ /dev/null @@ -1,328 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.10 -name: mmtracking_video_multi_object_tracking_finetune -display_name: Video Multi Object Tracking MMTracking Model Finetune -description: Component to finetune MMTracking models for video multi-object tracking task. - -is_deterministic: false - -environment: azureml://registries/azureml/environments/acft-mmtracking-video-gpu/versions/33 - -code: ../../../src/finetune - -distribution: - type: pytorch - -inputs: - - # component input: model path - model_path: - type: uri_folder - optional: false - description: Output folder of model selector containing model metadata like config, checkpoints, tokenizer config. - - # component input: training mltable - training_data: - type: mltable - optional: false - description: Path to the mltable of the training dataset. - - # optional component input: validation mltable - validation_data: - type: mltable - optional: true - description: Path to the mltable of the validation dataset. - - image_width: - type: integer - default: -1 - optional: true - description: Image width that is input to the network. Default is -1 which means it would be overwritten by image_scale in model config. - - image_height: - type: integer - default: -1 - optional: true - description: Image height that is input to the network. Default is -1 which means it would be overwritten by image_scale in model config. - - task_name: - type: string - enum: - - video-multi-object-tracking - description: Which task the model is solving. - - number_of_workers: - type: integer - default: 8 - optional: true - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - - # Training parameters - number_of_epochs: - type: integer - default: 15 - optional: true - description: Number of training epochs. - - max_steps: - type: integer - default: -1 - optional: true - description: If set to a positive number, the total number of training steps to perform. Overrides 'number_of_epochs'. In case of using a finite iterable dataset the training may stop before reaching the set number of steps when all data is exhausted. - - training_batch_size: - type: integer - default: 1 - optional: true - description: Train batch size. - - auto_find_batch_size: - type: boolean - default: false - optional: true - description: Flag to enable auto finding of batch size. If the provided 'per_device_train_batch_size' goes into Out Of Memory (OOM) enabling auto_find_batch_size will find the correct batch size by iteratively reducing 'per_device_train_batch_size' by a factor of 2 till the OOM is fixed. - - # learning rate and learning rate scheduler - learning_rate: - type: number - default: 0.0001 - optional: true - description: Start learning rate. Defaults to linear scheduler. - - learning_rate_scheduler: - type: string - default: warmup_cosine_with_restarts - optional: true - enum: - - warmup_linear - - warmup_cosine - - warmup_cosine_with_restarts - - warmup_polynomial - - constant - - warmup_constant - description: The scheduler type to use. - - warmup_steps: - type: integer - default: 5 - optional: true - description: Number of steps used for a linear warmup from 0 to learning_rate. - - # optimizer - optimizer: - type: string - default: sgd - optional: true - enum: - - adamw_hf - - adamw - # - adamw_torch_xla - # - adamw_apex_fused - # - adamw_bnb_8bit - # - adamw_anyprecision - - sgd - - adafactor - - adagrad - description: optimizer to be used while training. - - weight_decay: - type: number - default: 0.0 - optional: true - description: The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in AdamW optimizer. - - extra_optim_args: - type: string - default: "" - optional: true - description: Optional additional arguments that are supplied to SGD Optimizer. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes. For example, "momentum=0.5; nesterov=True" for sgd. Please make sure to use a valid parameter names for the chosen optimizer. For exact parameter names, please refer https://pytorch.org/docs/1.13/generated/torch.optim.SGD.html#torch.optim.SGD for SGD. Parameters supplied in extra_optim_args will take precedence over the parameter supplied via other arguments such as weight_decay. If weight_decay is provided via "weight_decay" parameter and via extra_optim_args both, values specified in extra_optim_args will be used. - - # gradient accumulation - gradient_accumulation_step: - type: integer - default: 1 - optional: true - description: Number of update steps to accumulate the gradients for, before performing a backward/update pass. - - # mixed precision training - precision: - type: string - enum: - - "32" - - "16" - default: "32" - optional: true - description: Apply mixed precision training. This can reduce memory footprint by performing operations in half-precision. - - # primary metric - metric_for_best_model: - type: string - default: mean_average_precision - optional: true - enum: - - mean_average_precision - - precision - - recall - - MOTA - - MOTP - - IDF1 - description: Specify the metric to use to compare two different models. - - # metric thresholds - iou_threshold: - type: number - optional: true - description: IOU threshold used during inference in non-maximum suppression post processing. - - box_score_threshold: - type: number - optional: true - description: During inference, only return proposals with a score greater than `box_score_threshold`. The score is the multiplication of the objectness score and classification probability. - - # random seed - random_seed: - type: integer - default: 42 - optional: true - description: Random seed that will be set at the beginning of training. - - # evaluation strategy parameters - evaluation_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The evaluation strategy to adopt during training. Please note that the save_strategy and evaluation_strategy should match. - - evaluation_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two evals if evaluation_strategy='steps'. Please note that the saving steps should be a multiple of the evaluation steps. - - # logging strategy parameters - logging_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The logging strategy to adopt during training. - - logging_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two logs if logging_strategy='steps'. - - # Save strategy - save_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The checkpoint save strategy to adopt during training. Please note that the save_strategy and - evaluation_strategy should match. - - save_steps: - type: integer - default: 500 - optional: true - description: Number of updates steps before two checkpoint saves if save_strategy="steps". Please note that the saving steps should be a multiple of the evaluation steps. - - # model checkpointing limit - save_total_limit: - type: integer - default: 5 - optional: true - description: If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in output_dir. If the value is -1 saves all checkpoints". - - # Early Stopping Parameters - early_stopping: - type: boolean - default: false - optional: true - description: Enable early stopping. - - early_stopping_patience: - type: integer - default: 1 - optional: true - description: Stop training when the specified metric worsens for early_stopping_patience evaluation calls. - - # Grad Norm - max_grad_norm: - type: number - default: 1.0 - optional: true - description: "Maximum gradient norm (for gradient clipping)" - - # resume from the input model - resume_from_checkpoint: - type: boolean - default: false - optional: true - description: Loads optimizer, Scheduler and Trainer state for finetuning if true. - - save_as_mlflow_model: - type: boolean - default: true - optional: true - description: Save as mlflow model with pyfunc as flavour. - -outputs: - mlflow_model_folder: - type: mlflow_model - description: Output dir to save the finetune model as mlflow model. - pytorch_model_folder: - type: custom_model - description: Output dir to save the finetune model as torch model. - -command: >- - - python finetune.py - --model_path ${{inputs.model_path}} - --train_mltable_path ${{inputs.training_data}} - $[[--valid_mltable_path ${{inputs.validation_data}}]] - $[[--image_width ${{inputs.image_width}}]] - $[[--image_height ${{inputs.image_height}}]] - --task_name ${{inputs.task_name}} - $[[--dataloader_num_workers ${{inputs.number_of_workers}}]] - $[[--num_train_epochs ${{inputs.number_of_epochs}}]] - $[[--max_steps ${{inputs.max_steps}}]] - $[[--per_device_train_batch_size ${{inputs.training_batch_size}}]] - $[[--auto_find_batch_size ${{inputs.auto_find_batch_size}}]] - $[[--learning_rate ${{inputs.learning_rate}}]] - $[[--lr_scheduler_type ${{inputs.learning_rate_scheduler}}]] - $[[--warmup_steps ${{inputs.warmup_steps}}]] - $[[--optim ${{inputs.optimizer}}]] - $[[--weight_decay ${{inputs.weight_decay}}]] - $[[--extra_optim_args ${{inputs.extra_optim_args}}]] - $[[--gradient_accumulation_steps ${{inputs.gradient_accumulation_step}}]] - $[[--precision ${{inputs.precision}}]] - $[[--metric_for_best_model ${{inputs.metric_for_best_model}}]] - $[[--iou_threshold ${{inputs.iou_threshold}}]] - $[[--box_score_threshold ${{inputs.box_score_threshold}}]] - $[[--seed ${{inputs.random_seed}}]] - $[[--evaluation_strategy ${{inputs.evaluation_strategy}}]] - $[[--eval_steps ${{inputs.evaluation_steps}}]] - $[[--logging_strategy ${{inputs.logging_strategy}}]] - $[[--logging_steps ${{inputs.logging_steps}}]] - $[[--save_strategy ${{inputs.save_strategy}}]] - $[[--save_steps ${{inputs.save_steps}}]] - $[[--save_total_limit ${{inputs.save_total_limit}}]] - $[[--apply_early_stopping ${{inputs.early_stopping}}]] - $[[--early_stopping_patience ${{inputs.early_stopping_patience}}]] - $$[[--max_grad_norm ${{inputs.max_grad_norm}}]] - $[[--resume_from_checkpoint ${{inputs.resume_from_checkpoint}}]] - $[[--save_as_mlflow_model ${{inputs.save_as_mlflow_model}}]] - --mlflow_model_folder ${{outputs.mlflow_model_folder}} - --pytorch_model_folder ${{outputs.pytorch_model_folder}} - --per_device_eval_batch_size 1 diff --git a/assets/training/finetune_acft_image/components/framework_selector/asset.yaml b/assets/training/finetune_acft_image/components/framework_selector/asset.yaml deleted file mode 100644 index 82d6f77bef..0000000000 --- a/assets/training/finetune_acft_image/components/framework_selector/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune", "AutoML Image training"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/framework_selector/spec.yaml b/assets/training/finetune_acft_image/components/framework_selector/spec.yaml deleted file mode 100644 index 23fddb8527..0000000000 --- a/assets/training/finetune_acft_image/components/framework_selector/spec.yaml +++ /dev/null @@ -1,39 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/development/commandComponent.schema.json -type: command - -version: 0.0.20 -name: image_framework_selector -display_name: Framework Selector for Image Tasks -description: Framework selector control flow component for image tasks - -is_deterministic: true - -environment: azureml://registries/azureml/environments/acpt-automl-image-framework-selector-gpu/labels/latest - -code: ../../src/framework_selector - -inputs: - task_type: - type: string - description: Image task type. - optional: false - enum: ['image-classification', 'image-object-detection', 'image-instance-segmentation'] - - model_name: - type: string - description: Name of the model. Framework will be selected based on the model_name provided. - optional: true - - validation_output: - # Output of validation component - type: uri_file - optional: true - description: Validation status. - -outputs: - output: - type: boolean - mode: rw_mount - is_control: true - -command: mldesigner execute --source framework_selector.py --name framework_selector --inputs task_type="${{inputs.task_type}}" $[[model_name="${{inputs.model_name}}"]] --outputs output="${{outputs.output}}" diff --git a/assets/training/finetune_acft_image/components/model_import/diffusers_text_to_image/asset.yaml b/assets/training/finetune_acft_image/components/model_import/diffusers_text_to_image/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/model_import/diffusers_text_to_image/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/model_import/diffusers_text_to_image/spec.yaml b/assets/training/finetune_acft_image/components/model_import/diffusers_text_to_image/spec.yaml deleted file mode 100644 index fc23ccec9d..0000000000 --- a/assets/training/finetune_acft_image/components/model_import/diffusers_text_to_image/spec.yaml +++ /dev/null @@ -1,69 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.9 -name: diffusers_text_to_image_model_import -display_name: Text to Image Diffusers Model Import -description: Import PyTorch / MLflow model - -is_deterministic: True - -environment: azureml://registries/azureml/environments/acft-transformers-image-gpu/versions/46 - -code: ../../../src/model_selector - -inputs: - - # Model family - model_family: - type: string - optional: true - default: HuggingFaceImage - enum: - - HuggingFaceImage - description: Which framework the model belongs to. - - # Model name - model_name: - type: string - optional: true - description: Please select models from AzureML Model Assets for all supported models. For HuggingFace models, which are not supported in AuzreML model registry, input HuggingFace model_name here. The Model will be downloaded from HuggingFace hub using this model_name and are subject to third party license terms available on the HuggingFace model details page. It is the user responsibility to comply with the model's license terms. - - # Continual-Finetuning model path - pytorch_model: - type: custom_model - optional: true - description: Pytorch Model registered in AzureML Asset. - - mlflow_model: - type: mlflow_model - optional: true - description: Mlflow Model registered in AzureML Asset. - - validation_output: - # Output of validation component - type: uri_file - optional: true - description: Validation status. - - download_from_source: - type: boolean - optional: true - default: false - description: Download model directly from HuggingFace instead of system registry - -outputs: - output_dir: - type: uri_folder - description: Folder to store model metadata. - -command: >- - python model_selector.py - --component_type "diffusers" - $[[--model_family ${{inputs.model_family}}]] - $[[--model_name ${{inputs.model_name}}]] - $[[--pytorch_model ${{inputs.pytorch_model}}]] - $[[--mlflow_model ${{inputs.mlflow_model}}]] - $[[--download_from_source ${{inputs.download_from_source}}]] - --output_dir ${{outputs.output_dir}} - diff --git a/assets/training/finetune_acft_image/components/model_import/hf_classification/asset.yaml b/assets/training/finetune_acft_image/components/model_import/hf_classification/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/model_import/hf_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/model_import/hf_classification/spec.yaml b/assets/training/finetune_acft_image/components/model_import/hf_classification/spec.yaml deleted file mode 100644 index f5679d03ff..0000000000 --- a/assets/training/finetune_acft_image/components/model_import/hf_classification/spec.yaml +++ /dev/null @@ -1,67 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.20 -name: transformers_image_classification_model_import -display_name: Image Classification HuggingFace Transformers Model Import -description: Import PyTorch / MLflow model - -is_deterministic: True - -environment: azureml://registries/azureml/environments/acft-transformers-image-gpu/labels/latest - -code: ../../../src/model_selector - -inputs: - - # Model family - model_family: - type: string - optional: true - default: HuggingFaceImage - enum: - - HuggingFaceImage - description: Which framework the model belongs to. - - # Model name - model_name: - type: string - optional: true - description: Please select models from AzureML Model Assets for all supported models. For HuggingFace models, which are not supported in AuzreML model registry, input HuggingFace model_name here. The Model will be downloaded from HuggingFace hub using this model_name and are subject to third party license terms available on the HuggingFace model details page. It is the user responsibility to comply with the model's license terms. - - # Continual-Finetuning model path - pytorch_model: - type: custom_model - optional: true - description: Pytorch Model registered in AzureML Asset. - - mlflow_model: - type: mlflow_model - optional: true - description: Mlflow Model registered in AzureML Asset. - - validation_output: - # Output of validation component - type: uri_file - optional: true - description: Validation status. - - download_from_source: - type: boolean - optional: true - default: false - description: Download model directly from HuggingFace instead of system registry - -outputs: - output_dir: - type: uri_folder - description: Folder to store model metadata. - -command: >- - python model_selector.py - $[[--model_family ${{inputs.model_family}}]] - $[[--model_name ${{inputs.model_name}}]] - $[[--pytorch_model ${{inputs.pytorch_model}}]] - $[[--mlflow_model ${{inputs.mlflow_model}}]] - $[[--download_from_source ${{inputs.download_from_source}}]] - --output_dir ${{outputs.output_dir}} diff --git a/assets/training/finetune_acft_image/components/model_import/mmd_od_is/asset.yaml b/assets/training/finetune_acft_image/components/model_import/mmd_od_is/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/model_import/mmd_od_is/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/model_import/mmd_od_is/spec.yaml b/assets/training/finetune_acft_image/components/model_import/mmd_od_is/spec.yaml deleted file mode 100644 index dfd1a88b76..0000000000 --- a/assets/training/finetune_acft_image/components/model_import/mmd_od_is/spec.yaml +++ /dev/null @@ -1,67 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.20 -name: mmdetection_image_objectdetection_instancesegmentation_model_import -display_name: Image Object Detection and Instance Segmentation MMDetection Model Import -description: Import PyTorch / MLflow model - -is_deterministic: True - -environment: azureml://registries/azureml/environments/acft-mmdetection-image-gpu/labels/latest - -code: ../../../src/model_selector - -inputs: - - # Model family - model_family: - type: string - optional: true - default: MmDetectionImage - enum: - - MmDetectionImage - description: Which framework the model belongs to. - - # Model name - model_name: - type: string - optional: true - description: Please select models from AzureML Model Assets for all supported models. For MMDetection, provide the model's config name here, same as its specified in MMDetection Model Zoo, To find the correct model name, go to https://github.com/open-mmlab/mmdetection/tree/v3.1.0/configs click on the model type and you will find the model name in the metafile.yml file which is present at configs//metafile.yml location. It is the user responsibility to comply with the model's license terms. - - # Continual-Finetuning model path - pytorch_model: - type: custom_model - optional: true - description: Pytorch Model registered in AzureML Asset. - - mlflow_model: - type: mlflow_model - optional: true - description: Mlflow Model registered in AzureML Asset. - - validation_output: - # Output of validation component - type: uri_file - optional: true - description: Validation status. - - download_from_source: - type: boolean - optional: true - default: false - description: Download model directly from MMDetection instead of system registry - -outputs: - output_dir: - type: uri_folder - description: Folder to store model metadata. - -command: >- - python model_selector.py - $[[--model_family ${{inputs.model_family}}]] - $[[--model_name ${{inputs.model_name}}]] - $[[--pytorch_model ${{inputs.pytorch_model}}]] - $[[--mlflow_model ${{inputs.mlflow_model}}]] - $[[--download_from_source ${{inputs.download_from_source}}]] - --output_dir ${{outputs.output_dir}} diff --git a/assets/training/finetune_acft_image/components/model_import/mmt/asset.yaml b/assets/training/finetune_acft_image/components/model_import/mmt/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/model_import/mmt/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/model_import/mmt/spec.yaml b/assets/training/finetune_acft_image/components/model_import/mmt/spec.yaml deleted file mode 100644 index 06904fbe01..0000000000 --- a/assets/training/finetune_acft_image/components/model_import/mmt/spec.yaml +++ /dev/null @@ -1,67 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.10 -name: mmtracking_video_multi_object_tracking_model_import -display_name: Video Multi Object Tracking MMTracking Model Import -description: Import PyTorch / MLflow model - -is_deterministic: True - -environment: azureml://registries/azureml/environments/acft-mmtracking-video-gpu/versions/33 - -code: ../../../src/model_selector - -inputs: - - # Model family - model_family: - type: string - optional: true - default: MmTrackingVideo - enum: - - MmTrackingVideo - description: Which framework the model belongs to. - - # Model name - model_name: - type: string - optional: true - description: Please select models from AzureML Model Assets for all supported models. For MMTracking, provide the model's config name here, same as its specified in MMTracking Model Zoo. To find the correct model name, go to https://github.com/open-mmlab/mmtracking/tree/v0.14.0/configs/mot click on the model type and you will find the model name in the metafile.yml file which is present at configs//metafile.yml location. It is the user responsibility to comply with the model's license terms. - - # Continual-Finetuning model path - pytorch_model: - type: custom_model - optional: true - description: Pytorch Model registered in AzureML Asset. - - mlflow_model: - type: mlflow_model - optional: true - description: Mlflow Model registered in AzureML Asset. - - validation_output: - # Output of validation component - type: uri_file - optional: true - description: Validation status. - - download_from_source: - type: boolean - optional: true - default: false - description: Download model directly from MMTracking instead of system registry - -outputs: - output_dir: - type: uri_folder - description: Folder to store model metadata. - -command: >- - python model_selector.py - $[[--model_family ${{inputs.model_family}}]] - $[[--model_name ${{inputs.model_name}}]] - $[[--pytorch_model ${{inputs.pytorch_model}}]] - $[[--mlflow_model ${{inputs.mlflow_model}}]] - $[[--download_from_source ${{inputs.download_from_source}}]] - --output_dir ${{outputs.output_dir}} diff --git a/assets/training/finetune_acft_image/components/model_output_selector/asset.yaml b/assets/training/finetune_acft_image/components/model_output_selector/asset.yaml deleted file mode 100644 index 82d6f77bef..0000000000 --- a/assets/training/finetune_acft_image/components/model_output_selector/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune", "AutoML Image training"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/model_output_selector/spec.yaml b/assets/training/finetune_acft_image/components/model_output_selector/spec.yaml deleted file mode 100644 index 771a0f43d9..0000000000 --- a/assets/training/finetune_acft_image/components/model_output_selector/spec.yaml +++ /dev/null @@ -1,52 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.19 -name: image_model_output_selector -display_name: Model output selector for image components -description: Model output selector control flow component for image tasks - -is_deterministic: true - -environment: azureml://registries/azureml/environments/acpt-automl-image-framework-selector-gpu/labels/latest - -code: ../../src/model_output_selector - -inputs: - mlflow_model_t: - type: mlflow_model - optional: True - description: Input MLFlow model for true block. - mlflow_model_f: - type: mlflow_model - optional: True - description: Input MLFLow model for false block. - pytorch_model_t: - type: custom_model - optional: True - description: Input pytorch model for true block. - pytorch_model_f: - type: custom_model - optional: True - description: Input pytorch model for false block. - condition: - type: uri_file - description: Condition based on which output models will be selected. - -outputs: - mlflow_model_folder: - type: mlflow_model - description: Output MLFLow model selected based on given condition. - pytorch_model_folder: - type: custom_model - description: Output pytorch model selected based on given condition. - -command: >- - python model_output_selector.py - $[[--mlflow_model_t ${{inputs.mlflow_model_t}}]] - $[[--mlflow_model_f ${{inputs.mlflow_model_f}}]] - $[[--pytorch_model_t ${{inputs.pytorch_model_t}}]] - $[[--pytorch_model_f ${{inputs.pytorch_model_f}}]] - --condition ${{inputs.condition}} - --output_mlflow ${{outputs.mlflow_model_folder}} - --output_pytorch ${{outputs.pytorch_model_folder}} diff --git a/assets/training/finetune_acft_image/components/pipeline_components/classification/asset.yaml b/assets/training/finetune_acft_image/components/pipeline_components/classification/asset.yaml deleted file mode 100644 index 82f46ae376..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML Image training"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/pipeline_components/classification/spec.yaml b/assets/training/finetune_acft_image/components/pipeline_components/classification/spec.yaml deleted file mode 100644 index c97eaac65e..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/classification/spec.yaml +++ /dev/null @@ -1,385 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -version: 0.0.23 -name: image_classification_pipeline -display_name: Image Classification Pipeline -description: Pipeline component for image classification. - -is_deterministic: false - -inputs: - # ------------------- Computes ------------------- - compute_model_import: - type: string - optional: false - description: Compute to be used for framework_selector eg. provide 'cpu-cluster' if your compute is named 'cpu-cluster'. - - compute_finetune: - type: string - optional: false - description: Compute to be used for running the selected framework eg. provide 'gpu-cluster' if your compute is named 'gpu-cluster'. - - instance_count: - type: integer - default: 1 - optional: true - description: Number of nodes to be used for finetuning (used for distributed training) - - process_count_per_instance: - type: integer - default: 1 - optional: true - description: Number of gpus to be used per node for finetuning, should be equal to number of gpu per node in the compute SKU used for finetune. - - # ------------------- Model Framework Selector ------------------- - model_name: - description: Name of the model. Based on this model name, a framework will be selected (Hugging Face, MM Detection). - type: string - optional: true - - download_from_source: - type: boolean - optional: true - default: false - description: Download model directly from HuggingFace instead of system registry - - # ------------------- Data Inputs ------------------ - training_data: - type: mltable - optional: false - description: Path to MLTable for training data. - - validation_data: - type: mltable - optional: true - description: Path to MLTable for validation data. - - # ------------------- Classification Type ------------------ - task_type: - description: Whether a single image can have multiple labels. - type: string - enum: ['image-classification', 'image-classification-multilabel'] - - # ------------------- Primary Metric ---------------- - primary_metric: - description: Primary metric for the task - type: string - optional: true - enum: ['accuracy', 'iou'] - - # ------------------- Hyperparamters ------------------ - ams_gradient: - description: Enable ams_gradient when optimizer is adam or adamw. - type: boolean - optional: true - - beta1: - description: Value of beta1 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - beta2: - description: Value of beta2 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - checkpoint_frequency: - description: Frequency to store model checkpoints. Must be a positive integer. - type: integer - optional: true - min: 0 - - checkpoint_run_id: - description: The run ID of the experiment that has a pretrained checkpoint for incremental training. - type: string - optional: true - - early_stopping: - description: Enable early stopping logic during training. - type: boolean - optional: true - - early_stopping_patience: - description: Minimum number of epochs or validation evaluations with no primary metric improvement before the run is stopped. Must be a positive integer. - type: integer - optional: true - min: 1 - - early_stopping_delay: - description: Minimum number of epochs or validation evaluations to wait before primary metric improvement is tracked for early stopping. Must be a positive integer. - type: integer - optional: true - min: 1 - - evaluation_frequency: - description: Frequency to evaluate validation dataset to get metric scores. Must be a positive integer. - type: integer - optional: true - min: 1 - - gradient_accumulation_step: - description: Number of forward passes without updating the model weights while accumulating the gradients of those steps, and then using the accumulated gradients to compute the weight updates. Must be a positive integer. - type: integer - optional: true - min: 1 - - layers_to_freeze: - description: How many layers to freeze for your model. For instance, passing 2 as value for seresnext means freezing layer0 and layer1 referring to the below supported model layer info. Must be a positive integer. - type: integer - optional: true - min: 1 - - learning_rate: - description: Initial learning rate. - type: number - optional: true - min: 0 - max: 1 - - learning_rate_scheduler: - description: Type of learning rate scheduler. Must be warmup_cosine or step. - type: string - optional: true - enum: ['warmup_cosine', 'step'] - - momentum: - description: Value of momentum when optimizer is sgd. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - nesterov: - description: Enable nesterov when optimizer is sgd. - type: boolean - optional: true - - number_of_epochs: - description: Number of training epochs - type: integer - optional: true - min: 1 - - number_of_workers: - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - type: integer - optional: true - - optimizer: - description: Type of optimizer - type: string - optional: true - enum: ['sgd', 'adam', 'adamw'] - - random_seed: - description: Random seed that will be set at the beginning of training. - type: integer - optional: true - - step_lr_gamma: - description: Value of gamma when learning rate scheduler is step. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: number - optional: true - - step_lr_step_size: - description: Value of step size when learning rate scheduler is step. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: integer - optional: true - - training_batch_size: - description: Training batch size. - type: integer - optional: true - min: 1 - - training_crop_size: - description: Image crop size that's input to your neural network for training dataset. Notes - seresnext doesn't take an arbitrary size. ViT-variants should have the same validation_crop_size and training_crop_size. - type: integer - optional: true - min: 1 - - validation_batch_size: - description: Validation batch size. - type: integer - optional: true - min: 1 - - validation_crop_size: - description: Image crop size that's input to your neural network for validation dataset. Note - seresnext doesn't take an arbitrary size. ViT-variants should have the same validation_crop_size and training_crop_size. - type: integer - optional: true - min: 1 - - validation_resize_size: - description: Image size to which to resize before cropping for validation dataset. Note - seresnext doesn't take an arbitrary size. - type: integer - optional: true - min: 1 - - warmup_cosine_lr_cycles: - description: Value of cosine cycle when learning rate scheduler is warmup_cosine. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: number - optional: true - - warmup_cosine_lr_warmup_epochs: - description: Value of warmup epochs when learning rate scheduler is warmup_cosine. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: integer - optional: true - - weight_decay: - description: Value of weight decay used by the optimizer. - type: number - optional: true - min: 0 - max: 1 - - weighted_loss: - description: Value of weighted loss. - type: integer - optional: true - -outputs: - pytorch_model_folder: - type: custom_model - description: The trained pytorch model. - mlflow_model_folder: - type: mlflow_model - description: The trained MLFlow model. - -jobs: - - finetune_common_validation: - type: command - component: azureml:finetune_common_validation:0.0.8 - compute: ${{parent.inputs.compute_model_import}} - inputs: - train_mltable_path: ${{parent.inputs.training_data}} - validation_mltable_path: ${{parent.inputs.validation_data}} - compute_model_import: ${{parent.inputs.compute_model_import}} - compute_finetune: ${{parent.inputs.compute_finetune}} - task_name: ${{parent.inputs.task_type}} - label_column_name: label - user_column_names: image_url,label - task_specific_extra_params: '"model_family=HuggingFaceImage;model_name=${{parent.inputs.model_name}};metric_for_best_model=${{parent.inputs.primary_metric}};number_of_epochs=${{parent.inputs.number_of_epochs}}"' - - framework_selector: - type: command - component: azureml:image_framework_selector:0.0.20 - compute: ${{parent.inputs.compute_model_import}} - inputs: - task_type: 'image-classification' - model_name: ${{parent.inputs.model_name}} - validation_output: ${{parent.jobs.finetune_common_validation.outputs.validation_info}} - - image_classification_runtime_component: - type: command - component: azureml:train_image_classification_model:0.0.13 - compute: ${{parent.inputs.compute_finetune}} - resources: - shm_size: '16g' - inputs: - training_data: ${{parent.inputs.training_data}} - validation_data: ${{parent.inputs.validation_data}} - ams_gradient: ${{parent.inputs.ams_gradient}} - beta1: ${{parent.inputs.beta1}} - beta2: ${{parent.inputs.beta2}} - checkpoint_frequency: ${{parent.inputs.checkpoint_frequency}} - checkpoint_run_id: ${{parent.inputs.checkpoint_run_id}} - early_stopping: ${{parent.inputs.early_stopping}} - early_stopping_patience: ${{parent.inputs.early_stopping_patience}} - early_stopping_delay: ${{parent.inputs.early_stopping_delay}} - evaluation_frequency: ${{parent.inputs.evaluation_frequency}} - gradient_accumulation_step: ${{parent.inputs.gradient_accumulation_step}} - layers_to_freeze: ${{parent.inputs.layers_to_freeze}} - learning_rate: ${{parent.inputs.learning_rate}} - learning_rate_scheduler: ${{parent.inputs.learning_rate_scheduler}} - model_name: ${{parent.inputs.model_name}} - momentum: ${{parent.inputs.momentum}} - nesterov: ${{parent.inputs.nesterov}} - number_of_epochs: ${{parent.inputs.number_of_epochs}} - number_of_workers: ${{parent.inputs.number_of_workers}} - optimizer: ${{parent.inputs.optimizer}} - random_seed: ${{parent.inputs.random_seed}} - step_lr_gamma: ${{parent.inputs.step_lr_gamma}} - step_lr_step_size: ${{parent.inputs.step_lr_step_size}} - task_type: ${{parent.inputs.task_type}} - training_batch_size: ${{parent.inputs.training_batch_size}} - training_crop_size: ${{parent.inputs.training_crop_size}} - validation_batch_size: ${{parent.inputs.validation_batch_size}} - validation_crop_size: ${{parent.inputs.validation_crop_size}} - validation_resize_size: ${{parent.inputs.validation_resize_size}} - warmup_cosine_lr_cycles: ${{parent.inputs.warmup_cosine_lr_cycles}} - warmup_cosine_lr_warmup_epochs: ${{parent.inputs.warmup_cosine_lr_warmup_epochs}} - weight_decay: ${{parent.inputs.weight_decay}} - weighted_loss: ${{parent.inputs.weighted_loss}} - - hugging_face_model_import: - type: command - component: azureml:transformers_image_classification_model_import:0.0.20 - compute: ${{parent.inputs.compute_model_import}} - inputs: - model_family: 'HuggingFaceImage' - model_name: ${{parent.inputs.model_name}} - download_from_source: ${{parent.inputs.download_from_source}} - validation_output: ${{parent.jobs.finetune_common_validation.outputs.validation_info}} - - hugging_face_finetune: - type: command - component: azureml:transformers_image_classification_finetune:0.0.20 - compute: ${{parent.inputs.compute_finetune}} - distribution: - type: pytorch - process_count_per_instance: ${{parent.inputs.process_count_per_instance}} - resources: - instance_count: ${{parent.inputs.instance_count}} - shm_size: '16g' - inputs: - # Model path is same as what is output of model selector - model_path: ${{parent.jobs.hugging_face_model_import.outputs.output_dir}} - training_data: ${{parent.inputs.training_data}} - validation_data: ${{parent.inputs.validation_data}} - early_stopping: ${{parent.inputs.early_stopping}} - early_stopping_patience: ${{parent.inputs.early_stopping_patience}} - evaluation_steps: ${{parent.inputs.evaluation_frequency}} - gradient_accumulation_step: ${{parent.inputs.gradient_accumulation_step}} - image_height: ${{parent.inputs.training_crop_size}} - image_width: ${{parent.inputs.training_crop_size}} - learning_rate: ${{parent.inputs.learning_rate}} - learning_rate_scheduler: ${{parent.inputs.learning_rate_scheduler}} - number_of_epochs: ${{parent.inputs.number_of_epochs}} - number_of_workers: ${{parent.inputs.number_of_workers}} - optimizer: ${{parent.inputs.optimizer}} - random_seed: ${{parent.inputs.random_seed}} - save_as_mlflow_model: true - save_steps: ${{parent.inputs.checkpoint_frequency}} - task_name: ${{parent.inputs.task_type}} - metric_for_best_model: ${{parent.inputs.primary_metric}} - training_batch_size: ${{parent.inputs.training_batch_size}} - validation_batch_size: ${{parent.inputs.validation_batch_size}} - weight_decay: ${{parent.inputs.weight_decay}} - extra_optim_args: '"momentum=${{parent.inputs.momentum}};nesterov=${{parent.inputs.nesterov}}"' - - condition_node: - type: if_else - true_block: ${{parent.jobs.image_classification_runtime_component}} - condition: ${{parent.jobs.framework_selector.outputs.output}} - false_block: ${{parent.jobs.hugging_face_model_import}} - - output_selector: - type: command - component: azureml:image_model_output_selector:0.0.19 - compute: ${{parent.inputs.compute_model_import}} - inputs: - mlflow_model_t: ${{parent.jobs.image_classification_runtime_component.outputs.mlflow_model_folder}} - pytorch_model_t: ${{parent.jobs.image_classification_runtime_component.outputs.pytorch_model_folder}} - condition: ${{parent.jobs.framework_selector.outputs.output}} - mlflow_model_f: ${{parent.jobs.hugging_face_finetune.outputs.mlflow_model_folder}} - pytorch_model_f: ${{parent.jobs.hugging_face_finetune.outputs.pytorch_model_folder}} - outputs: - mlflow_model_folder: ${{parent.outputs.mlflow_model_folder}} - pytorch_model_folder: ${{parent.outputs.pytorch_model_folder}} diff --git a/assets/training/finetune_acft_image/components/pipeline_components/diffusers_text_to_image/asset.yaml b/assets/training/finetune_acft_image/components/pipeline_components/diffusers_text_to_image/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/diffusers_text_to_image/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/pipeline_components/diffusers_text_to_image/spec.yaml b/assets/training/finetune_acft_image/components/pipeline_components/diffusers_text_to_image/spec.yaml deleted file mode 100644 index 0146f4e3c1..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/diffusers_text_to_image/spec.yaml +++ /dev/null @@ -1,489 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -version: 0.0.10 -name: diffusers_text_to_image_dreambooth_pipeline -display_name: Text to Image Dreambooth Finetuning Diffusers Pipeline -description: Pipeline component for text to image dreambooth training using diffusers library and transformers models. - -is_deterministic: false - -inputs: - compute_model_import: - type: string - optional: false - description: Compute to be used for model_import eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster' - - compute_finetune: - type: string - optional: false - description: Compute to be used for finetune eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster' - - instance_count: - type: integer - default: 1 - optional: true - description: Number of nodes to be used for finetuning (used for distributed training) - - process_count_per_instance: - type: integer - default: 1 - optional: true - description: Number of gpus to be used per node for finetuning, should be equal to number of gpu per node in the compute SKU used for finetune - - # ########################### Model Selector Component ########################### # - # Model family - model_family: - type: string - optional: true - default: HuggingFaceImage - enum: - - HuggingFaceImage - description: Which framework the model belongs to. - - model_name: - type: string - optional: true - description: Please select models from AzureML Model Assets for all supported models. For HuggingFace models, which are not supported in AuzreML model registry, input HuggingFace model_name here. The Model will be downloaded from HuggingFace hub using this model_name and are subject to third party license terms available on the HuggingFace model details page. It is the user responsibility to comply with the model's license terms. - - pytorch_model: - type: custom_model - optional: true - description: Pytorch Model registered in AzureML Asset. - - mlflow_model: - type: mlflow_model - optional: true - description: Mlflow Model registered in AzureML Asset. - - download_from_source: - type: boolean - optional: true - default: false - description: Download model directly from HuggingFace instead of system registry - - # ########################### Finetuning Component ########################### # - - # component input: Instance data dir - instance_data_dir: - type: uri_folder - optional: false - description: A folder containing the training data of instance images. - - class_data_dir: - type: uri_folder - optional: true - mode: download - description: (Optional) A folder containing the training data of class images. You can place existing images in class_data_dir, and the training job will generate any additional images so that num_class_images are present in class_data_dir during training time. - - task_name: - type: string - enum: - - stable-diffusion-text-to-image - description: Which task the model is solving. - - # Instance prompt - instance_prompt: - type: string - optional: true # Failure to be caught on ES side - description: The prompt with identifier specifying the instance. - - resolution: - type: integer - optional: true - default: 512 - description: The image resolution for training. - - # Lora parameters - # LoRA reduces the number of trainable parameters by learning pairs of rank-decompostion matrices while freezing the original weights. This vastly reduces the storage requirement for large models adapted to specific tasks and enables efficient task-switching during deployment all without introducing inference latency. LoRA also outperforms several other adaptation methods including adapter, prefix-tuning, and fine-tuning. - apply_lora: - type: boolean - default: true - optional: false - description: If "true" enables lora. - - lora_alpha: - type: integer - default: 128 - optional: true - description: alpha attention parameter for lora. - - lora_r: - type: integer - default: 8 - optional: true - description: lora dimension - - lora_dropout: - type: number - default: 0.0 - optional: true - description: lora dropout value - - # Tokenizer - tokenizer_max_length: - type: integer - optional: true - description: The maximum length of the tokenizer. If not set, will default to the tokenizer's max length. - - # Text Encoder - text_encoder_type: - type: string - enum: - - CLIPTextModel - - T5EncoderModel - optional: true - description: Text encoder to be used. - - text_encoder_name: - type: string - optional: true - description: Huggingface id of text encoder. This model should of type specified in `text_encoder_type`. If not specified the default from the model will be used. - - train_text_encoder: - type: boolean - default: false - optional: true - description: Whether to train the text encoder. If set, the text encoder should be float32 precision. - - pre_compute_text_embeddings: - type: boolean - default: true - optional: true - description: Whether or not to pre-compute text embeddings. If text embeddings are pre-computed, the text encoder will not be kept in memory during training and will leave more GPU memory available for training the rest of the model. This is not compatible with `--train_text_encoder`. - - text_encoder_use_attention_mask: - type: boolean - default: false - optional: true - description: Whether to use attention mask for the text encoder - - # UNET related - class_labels_conditioning: - type: string - optional: true - description: The optional `class_label` conditioning to pass to the unet, available values are `timesteps`. - - # Noise Scheduler - noise_scheduler_name: - type: string - enum: - - DPMSolverMultistepScheduler - - DDPMScheduler - - PNDMScheduler - optional: true - description: Noise scheduler to be used. - - noise_scheduler_num_train_timesteps: - type: integer - optional: true - description: The number of diffusion steps to train the model. - - noise_scheduler_variance_type: - type: string - enum: - - fixed_small - - fixed_small_log - - fixed_large - - fixed_large_log - - learned - - learned_range - optional: true - description: Clip the variance when adding noise to the denoised sample. - - noise_scheduler_prediction_type: - type: string - enum: - - epsilon - - sample - - v_prediction - optional: true - description: Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen Video](https://imagen.research.google/video/paper.pdf) paper). - - noise_scheduler_timestep_spacing: - type: string - optional: true - description: The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. - - noise_scheduler_steps_offset: - type: integer - optional: true - description: An offset added to the inference steps. You can use a combination of `offset=1` and `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable Diffusion. - - extra_noise_scheduler_args: - type: string - optional: true - description: Optional additional arguments that are supplied to noise scheduler. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes. For example, "clip_sample_range=1.0; clip_sample=True" for DDPMScheduler. - - offset_noise: - type: boolean - optional: true - description: Fine-tuning against a modified noise. See https://www.crosslabs.org//blog/diffusion-with-offset-noise for more information. - - # Prior preservation loss - with_prior_preservation: - type: boolean - default: true - description: Flag to add prior preservation loss. - class_prompt: - type: string - optional: true - description: The prompt to specify images in the same class as provided instance images. - num_class_images: - type: integer - default: 100 - optional: true - description: Minimal class images for prior preservation loss. If there are not enough images already present in class_data_dir, additional images will be sampled with class_prompt. - prior_generation_precision: - type: string - optional: true - default: "fp32" - enum: - - "fp32" - - "fp16" - - "bf16" - description: Choose prior generation precision between fp32, fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10.and an Nvidia Ampere GPU. Default to fp16 if a GPU is available else fp32. - prior_loss_weight: - type: number - default: 1.0 - optional: true - description: The weight of prior preservation loss. - - sample_batch_size: - type: integer - default: 4 - optional: true - description: "Batch size (per device) for sampling class images when training with_prior_preservation set to True." - - # Validation parameters - num_validation_images: - type: integer - default: 0 - description: "Specify number of images to generate using instance_prompt. Images are stored in the output/checkpoint-* directories. Please note that this will increase the training time. If you select num_validation_images = 0, then run will generate 5 images in last checkpoint." - - # Training related - number_of_workers: - type: integer - default: 6 - optional: true - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - - number_of_epochs: - type: integer - optional: true - description: Number of training epochs. If left empty, will be chosen automatically based on the task type and model selected. - - max_steps: - type: integer - optional: true - description: If set to a positive number, the total number of training steps to perform. Overrides 'number_of_epochs'. In case of using a finite iterable dataset the training may stop before reaching the set number of steps when all data is exhausted. If left empty, will be chosen automatically based on the task type and model selected. - - training_batch_size: - type: integer - default: 1 - optional: true - description: Train batch size. If left empty, will be chosen automatically based on the task type and model selected. - - auto_find_batch_size: - type: boolean - default: false - optional: true - description: Flag to enable auto finding of batch size. If the provided 'per_device_train_batch_size' goes into Out Of Memory (OOM) enabling auto_find_batch_size will find the correct batch size by iteratively reducing 'per_device_train_batch_size' by a factor of 2 till the OOM is fixed. - - # learning rate and learning rate scheduler - learning_rate: - type: number - optional: true - description: Start learning rate. Defaults to linear scheduler. If left empty, will be chosen automatically based on the task type and model selected. - - learning_rate_scheduler: - type: string - optional: true - enum: - - warmup_linear - - warmup_cosine - - warmup_cosine_with_restarts - - warmup_polynomial - - constant - - warmup_constant - description: The scheduler type to use. If left empty, will be chosen automatically based on the task type and model selected. - - warmup_steps: - type: integer - default: 0 - optional: true - description: Number of steps used for a linear warmup from 0 to learning_rate. If left empty, will be chosen automatically based on the task type and model selected. - - # optimizer - optimizer: - type: string - optional: true - enum: - - adamw_hf - - adamw - # - adamw_torch_xla - # - adamw_apex_fused - # - adamw_bnb_8bit - # - adamw_anyprecision - - sgd - - adafactor - - adagrad - - adamw_ort_fused - description: optimizer to be used while training. 'adamw_ort_fused' optimizer is only supported for ORT training. If left empty, will be chosen automatically based on the task type and model selected. - - weight_decay: - type: number - default: 0 - optional: true - description: The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in AdamW and sgd optimizer. If left empty, will be chosen automatically based on the task type and model selected. - - extra_optim_args: - type: string - default: "" - optional: true - description: Optional additional arguments that are supplied to SGD Optimizer. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes. For example, "momentum=0.5; nesterov=True" for sgd. Please make sure to use a valid parameter names for the chosen optimizer. For exact parameter names, please refer https://pytorch.org/docs/1.13/generated/torch.optim.SGD.html#torch.optim.SGD for SGD. Parameters supplied in extra_optim_args will take precedence over the parameter supplied via other arguments such as weight_decay. If weight_decay is provided via "weight_decay" parameter and via extra_optim_args both, values specified in extra_optim_args will be used. - - # gradient accumulation - gradient_accumulation_step: - type: integer - optional: true - description: Number of update steps to accumulate the gradients for, before performing a backward/update pass. If left empty, will be chosen automatically based on the task type and model selected. - - max_grad_norm: - type: number - optional: true - description: Maximum gradient norm (for gradient clipping). If left empty, will be chosen automatically based on the task type and model selected. - - # mixed precision training - precision: - type: string - enum: - - "32" - - "16" - default: "32" - optional: true - description: Apply mixed precision training. This can reduce memory footprint by performing operations in half-precision. - - # random seed - random_seed: - type: integer - default: 42 - optional: true - description: Random seed that will be set at the beginning of training. - - # logging strategy parameters - logging_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The logging strategy to adopt during training. - - logging_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two logs if logging_strategy='steps'. - - save_total_limit: - type: integer - default: 5 - optional: true - description: If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in output_dir. If the value is -1 saves all checkpoints". - - # save mlflow model - save_as_mlflow_model: - type: boolean - default: true - optional: true - description: Save as mlflow model with pyfunc as flavour. - -outputs: - # ########################### Finetuning Component ########################### # - mlflow_model_folder: - type: mlflow_model - description: Output dir to save the finetune model as mlflow model. - pytorch_model_folder: - type: custom_model - description: Output dir to save the finetune model as torch model. - -jobs: - text_to_image_model_import: - type: command - component: azureml:diffusers_text_to_image_model_import:0.0.9 - compute: ${{parent.inputs.compute_model_import}} - inputs: - model_family: ${{parent.inputs.model_family}} - model_name: ${{parent.inputs.model_name}} - pytorch_model: ${{parent.inputs.pytorch_model}} - mlflow_model: ${{parent.inputs.mlflow_model}} - download_from_source: ${{parent.inputs.download_from_source}} - - text_to_image_dreambooth_finetune: - type: command - component: azureml:diffusers_text_to_image_finetune:0.0.9 - compute: ${{parent.inputs.compute_finetune}} - distribution: - type: pytorch - process_count_per_instance: ${{parent.inputs.process_count_per_instance}} - resources: - instance_count: ${{parent.inputs.instance_count}} - shm_size: "16g" - inputs: - # Model path is same as what is output of model selector - model_path: ${{parent.jobs.text_to_image_model_import.outputs.output_dir}} - instance_data_dir: ${{parent.inputs.instance_data_dir}} - class_data_dir: ${{parent.inputs.class_data_dir}} - task_name: ${{parent.inputs.task_name}} - instance_prompt: ${{parent.inputs.instance_prompt}} - resolution: ${{parent.inputs.resolution}} - apply_lora: ${{parent.inputs.apply_lora}} - lora_alpha: ${{parent.inputs.lora_alpha}} - lora_r: ${{parent.inputs.lora_r}} - lora_dropout: ${{parent.inputs.lora_dropout}} - tokenizer_max_length: ${{parent.inputs.tokenizer_max_length}} - text_encoder_type: ${{parent.inputs.text_encoder_type}} - text_encoder_name: ${{parent.inputs.text_encoder_name}} - train_text_encoder: ${{parent.inputs.train_text_encoder}} - pre_compute_text_embeddings: ${{parent.inputs.pre_compute_text_embeddings}} - text_encoder_use_attention_mask: ${{parent.inputs.text_encoder_use_attention_mask}} - class_labels_conditioning: ${{parent.inputs.class_labels_conditioning}} - noise_scheduler_name: ${{parent.inputs.noise_scheduler_name}} - noise_scheduler_num_train_timesteps: ${{parent.inputs.noise_scheduler_num_train_timesteps}} - noise_scheduler_variance_type: ${{parent.inputs.noise_scheduler_variance_type}} - noise_scheduler_prediction_type: ${{parent.inputs.noise_scheduler_prediction_type}} - noise_scheduler_timestep_spacing: ${{parent.inputs.noise_scheduler_timestep_spacing}} - noise_scheduler_steps_offset: ${{parent.inputs.noise_scheduler_steps_offset}} - extra_noise_scheduler_args: ${{parent.inputs.extra_noise_scheduler_args}} - offset_noise: ${{parent.inputs.offset_noise}} - with_prior_preservation: ${{parent.inputs.with_prior_preservation}} - class_prompt: ${{parent.inputs.class_prompt}} - num_class_images: ${{parent.inputs.num_class_images}} - prior_generation_precision: ${{parent.inputs.prior_generation_precision}} - prior_loss_weight: ${{parent.inputs.prior_loss_weight}} - sample_batch_size: ${{parent.inputs.sample_batch_size}} - num_validation_images: ${{parent.inputs.num_validation_images}} - number_of_workers: ${{parent.inputs.number_of_workers}} - number_of_epochs: ${{parent.inputs.number_of_epochs}} - max_steps: ${{parent.inputs.max_steps}} - training_batch_size: ${{parent.inputs.training_batch_size}} - auto_find_batch_size: ${{parent.inputs.auto_find_batch_size}} - learning_rate: ${{parent.inputs.learning_rate}} - learning_rate_scheduler: ${{parent.inputs.learning_rate_scheduler}} - warmup_steps: ${{parent.inputs.warmup_steps}} - optimizer: ${{parent.inputs.optimizer}} - weight_decay: ${{parent.inputs.weight_decay}} - extra_optim_args: ${{parent.inputs.extra_optim_args}} - gradient_accumulation_step: ${{parent.inputs.gradient_accumulation_step}} - max_grad_norm: ${{parent.inputs.max_grad_norm}} - precision: ${{parent.inputs.precision}} - random_seed: ${{parent.inputs.random_seed}} - logging_strategy: ${{parent.inputs.logging_strategy}} - logging_steps: ${{parent.inputs.logging_steps}} - save_total_limit: ${{parent.inputs.save_total_limit}} - save_as_mlflow_model: ${{parent.inputs.save_as_mlflow_model}} - outputs: - mlflow_model_folder: ${{parent.outputs.mlflow_model_folder}} - pytorch_model_folder: ${{parent.outputs.pytorch_model_folder}} diff --git a/assets/training/finetune_acft_image/components/pipeline_components/instance_segmentation/asset.yaml b/assets/training/finetune_acft_image/components/pipeline_components/instance_segmentation/asset.yaml deleted file mode 100644 index 82f46ae376..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/instance_segmentation/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML Image training"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/pipeline_components/instance_segmentation/spec.yaml b/assets/training/finetune_acft_image/components/pipeline_components/instance_segmentation/spec.yaml deleted file mode 100644 index 4e9a3e2fa3..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/instance_segmentation/spec.yaml +++ /dev/null @@ -1,437 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -version: 0.0.25 -name: image_instance_segmentation_pipeline -display_name: Image Instance Segmentation Pipeline -description: Pipeline component for image instance segmentation. - -is_deterministic: false - -inputs: - # ------------------- Computes ------------------- - compute_model_import: - type: string - optional: false - description: Compute to be used for framework_selector eg. provide 'gpu-cluster' if your compute is named 'gpu-cluster'. - - compute_finetune: - type: string - optional: false - description: Compute to be used for running the selected framework eg. provide 'gpu-cluster' if your compute is named 'gpu-cluster'. - - instance_count: - type: integer - default: 1 - optional: true - description: Number of nodes to be used for finetuning (used for distributed training). - - process_count_per_instance: - type: integer - default: 1 - optional: true - description: Number of gpus to be used per node for finetuning, should be equal to number of gpu per node in the compute SKU used for finetune. - - # ------------------- Model Framework Selector ------------------- - model_name: - description: Name of the model. Based on this model name, a framework will be selected (Hugging Face, MM Detection). - type: string - optional: true - - download_from_source: - type: boolean - optional: true - default: false - description: Download model directly from MMDetection instead of system registry - - # ------------------- Data Inputs ------------------ - training_data: - type: mltable - optional: false - description: Path to MLTable for training data. - - validation_data: - type: mltable - optional: true - description: Path to MLTable for validation data. - - # ------------------- Task Type ------------------ - task_type: - description: Type of the task - type: string - optional: false - default: 'image-instance-segmentation' - enum: ['image-instance-segmentation'] - - # ------------------- Primary Metric ---------------- - primary_metric: - description: Primary metric for the task - type: string - optional: true - default: 'mean_average_precision' - enum: ['mean_average_precision'] - - # ------------------- Hyperparamters ------------------ - ams_gradient: - description: Enable ams_gradient when optimizer is adam or adamw. - type: boolean - optional: true - - beta1: - description: Value of beta1 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - beta2: - description: Value of beta2 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - box_detections_per_image: - description: Maximum number of detections per image, for all classes. Must be a positive integer. - type: integer - optional: true - min: 1 - - box_score_threshold: - description: During inference, only return proposals with a classification score greater than box_score_threshold. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - checkpoint_frequency: - description: Frequency to store model checkpoints. Must be a positive integer. - type: integer - optional: true - min: 0 - - checkpoint_run_id: - description: The run ID of the experiment that has a pretrained checkpoint for incremental training. - type: string - optional: true - - early_stopping: - description: Enable early stopping logic during training. - type: boolean - optional: true - - early_stopping_patience: - description: Minimum number of epochs or validation evaluations with no primary metric improvement before the run is stopped. Must be a positive integer. - type: integer - optional: true - min: 1 - - early_stopping_delay: - description: Minimum number of epochs or validation evaluations to wait before primary metric improvement is tracked for early stopping. Must be a positive integer. - type: integer - optional: true - min: 1 - - evaluation_frequency: - description: Frequency to evaluate validation dataset to get metric scores. Must be a positive integer. - type: integer - optional: true - min: 1 - - gradient_accumulation_step: - description: Number of forward passes without updating the model weights while accumulating the gradients of those steps, and then using the accumulated gradients to compute the weight updates. Must be a positive integer. - type: integer - optional: true - min: 1 - - layers_to_freeze: - description: How many layers to freeze for your model. For instance, passing 2 as value for seresnext means freezing layer0 and layer1 referring to the below supported model layer info. Must be a positive integer. - type: integer - optional: true - min: 1 - - learning_rate: - description: Initial learning rate - type: number - optional: true - min: 0 - max: 1 - - learning_rate_scheduler: - description: Type of learning rate scheduler. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: string - optional: true - enum: ['warmup_cosine', 'step'] - - max_size: - description: Maximum size of the image to be rescaled before feeding it to the backbone. - type: integer - optional: true - min: 1 - - min_size: - description: Minimum size of the image to be rescaled before feeding it to the backbone. Must be a positive integer. - type: integer - optional: true - min: 1 - - momentum: - description: Value of momentum when optimizer is sgd. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - nesterov: - description: Enable nesterov when optimizer is sgd. - type: boolean - optional: true - - nms_iou_threshold: - description: IOU threshold used during inference in non-maximum suppression post processing. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - number_of_epochs: - description: Number of training epochs - type: integer - optional: true - min: 1 - - number_of_workers: - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - type: integer - optional: true - - optimizer: - description: Type of optimizer - type: string - optional: true - enum: ['sgd', 'adam', 'adamw'] - - random_seed: - description: Random seed that will be set at the beginning of training. - type: integer - optional: true - - step_lr_gamma: - description: Value of gamma when learning rate scheduler is step. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: number - optional: true - - step_lr_step_size: - description: Value of step size when learning rate scheduler is step. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: integer - optional: true - - tile_grid_size: - description: The grid size to use for tiling each image. Should be passed as a string in '3x2' format. Example --tile_grid_size '3x2' - type: string - optional: true - - tile_overlap_ratio: - description: Overlap ratio between adjacent tiles in each dimension. Must be float in the range of [0, 1). - type: number - optional: true - min: 0 - max: 1 - - tile_predictions_nms_threshold: - description: The IOU threshold to use to perform NMS while merging predictions from tiles and image. Used in validation/ inference. Must be float in the range of [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - training_batch_size: - description: Training batch size. - type: integer - optional: true - min: 1 - - validation_batch_size: - description: Validation batch size. - type: integer - optional: true - min: 1 - - validation_iou_threshold: - description: IOU threshold for box matching when computing validation metrics. Must be a float in the range [0.1, 1]. - type: number - optional: true - min: 0.1 - max: 1 - - validation_metric_type: - description: Metric computation method to use for validation metrics. Must be none, coco, voc, or coco_voc. - type: string - optional: true - default: 'voc' - enum: ['none', 'coco', 'voc', 'coco_voc'] - - warmup_cosine_lr_cycles: - description: Value of cosine cycle when learning rate scheduler is warmup_cosine. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: number - optional: true - - warmup_cosine_lr_warmup_epochs: - description: Value of warmup epochs when learning rate scheduler is warmup_cosine. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: integer - optional: true - - weight_decay: - description: Value of weight decay used by the optimizer. - type: number - optional: true - min: 0 - max: 1 - -outputs: - pytorch_model_folder: - type: custom_model - description: Trained pytorch model. - mlflow_model_folder: - type: mlflow_model - description: The trained MLFlow model. - -jobs: - - finetune_common_validation: - type: command - component: azureml:finetune_common_validation:0.0.8 - compute: ${{parent.inputs.compute_model_import}} - inputs: - train_mltable_path: ${{parent.inputs.training_data}} - validation_mltable_path: ${{parent.inputs.validation_data}} - compute_model_import: ${{parent.inputs.compute_model_import}} - compute_finetune: ${{parent.inputs.compute_finetune}} - task_name: ${{parent.inputs.task_type}} - label_column_name: label - user_column_names: image_url,label - task_specific_extra_params: '"model_family=MmDetectionImage;model_name=${{parent.inputs.model_name}};metric_for_best_model=${{parent.inputs.primary_metric}};number_of_epochs=${{parent.inputs.number_of_epochs}}"' - - framework_selector: - type: command - component: azureml:image_framework_selector:0.0.20 - compute: ${{parent.inputs.compute_model_import}} - inputs: - task_type: ${{parent.inputs.task_type}} - model_name: ${{parent.inputs.model_name}} - validation_output: ${{parent.jobs.finetune_common_validation.outputs.validation_info}} - - image_instance_segmentation_runtime_component: - type: command - component: azureml:train_instance_segmentation_model:0.0.13 - compute: ${{parent.inputs.compute_finetune}} - resources: - shm_size: '16g' - inputs: - training_data: ${{parent.inputs.training_data}} - validation_data: ${{parent.inputs.validation_data}} - ams_gradient: ${{parent.inputs.ams_gradient}} - beta1: ${{parent.inputs.beta1}} - beta2: ${{parent.inputs.beta2}} - box_detections_per_image: ${{parent.inputs.box_detections_per_image}} - box_score_threshold: ${{parent.inputs.box_score_threshold}} - checkpoint_frequency: ${{parent.inputs.checkpoint_frequency}} - checkpoint_run_id: ${{parent.inputs.checkpoint_run_id}} - early_stopping: ${{parent.inputs.early_stopping}} - early_stopping_patience: ${{parent.inputs.early_stopping_patience}} - early_stopping_delay: ${{parent.inputs.early_stopping_delay}} - evaluation_frequency: ${{parent.inputs.evaluation_frequency}} - gradient_accumulation_step: ${{parent.inputs.gradient_accumulation_step}} - layers_to_freeze: ${{parent.inputs.layers_to_freeze}} - learning_rate: ${{parent.inputs.learning_rate}} - learning_rate_scheduler: ${{parent.inputs.learning_rate_scheduler}} - max_size: ${{parent.inputs.max_size}} - min_size: ${{parent.inputs.min_size}} - model_name: ${{parent.inputs.model_name}} - momentum: ${{parent.inputs.momentum}} - nesterov: ${{parent.inputs.nesterov}} - nms_iou_threshold: ${{parent.inputs.nms_iou_threshold}} - number_of_epochs: ${{parent.inputs.number_of_epochs}} - number_of_workers: ${{parent.inputs.number_of_workers}} - optimizer: ${{parent.inputs.optimizer}} - random_seed: ${{parent.inputs.random_seed}} - step_lr_gamma: ${{parent.inputs.step_lr_gamma}} - step_lr_step_size: ${{parent.inputs.step_lr_step_size}} - tile_grid_size: ${{parent.inputs.tile_grid_size}} - tile_overlap_ratio: ${{parent.inputs.tile_overlap_ratio}} - tile_predictions_nms_threshold: ${{parent.inputs.tile_predictions_nms_threshold}} - training_batch_size: ${{parent.inputs.training_batch_size}} - validation_batch_size: ${{parent.inputs.validation_batch_size}} - validation_iou_threshold: ${{parent.inputs.validation_iou_threshold}} - validation_metric_type: ${{parent.inputs.validation_metric_type}} - warmup_cosine_lr_cycles: ${{parent.inputs.warmup_cosine_lr_cycles}} - warmup_cosine_lr_warmup_epochs: ${{parent.inputs.warmup_cosine_lr_warmup_epochs}} - weight_decay: ${{parent.inputs.weight_decay}} - - mm_detection_model_import: - type: command - component: azureml:mmdetection_image_objectdetection_instancesegmentation_model_import:0.0.20 - compute: ${{parent.inputs.compute_model_import}} - inputs: - model_family: 'MmDetectionImage' - model_name: ${{parent.inputs.model_name}} - download_from_source: ${{parent.inputs.download_from_source}} - validation_output: ${{parent.jobs.finetune_common_validation.outputs.validation_info}} - - mm_detection_finetune: - type: command - component: azureml:mmdetection_image_objectdetection_instancesegmentation_finetune:0.0.21 - compute: ${{parent.inputs.compute_finetune}} - distribution: - type: pytorch - process_count_per_instance: ${{parent.inputs.process_count_per_instance}} - resources: - instance_count: ${{parent.inputs.instance_count}} - shm_size: '16g' - inputs: - # Model path is same as what is output of model selector - model_path: ${{parent.jobs.mm_detection_model_import.outputs.output_dir}} - training_data: ${{parent.inputs.training_data}} - validation_data: ${{parent.inputs.validation_data}} - image_min_size: ${{parent.inputs.min_size}} - image_max_size: ${{parent.inputs.max_size}} - iou_threshold: ${{parent.inputs.nms_iou_threshold}} - box_score_threshold: ${{parent.inputs.box_score_threshold}} - early_stopping: ${{parent.inputs.early_stopping}} - early_stopping_patience: ${{parent.inputs.early_stopping_patience}} - evaluation_steps: ${{parent.inputs.evaluation_frequency}} - gradient_accumulation_step: ${{parent.inputs.gradient_accumulation_step}} - learning_rate: ${{parent.inputs.learning_rate}} - learning_rate_scheduler: ${{parent.inputs.learning_rate_scheduler}} - number_of_epochs: ${{parent.inputs.number_of_epochs}} - number_of_workers: ${{parent.inputs.number_of_workers}} - optimizer: ${{parent.inputs.optimizer}} - random_seed: ${{parent.inputs.random_seed}} - save_as_mlflow_model: true - save_steps: ${{parent.inputs.checkpoint_frequency}} - task_name: ${{parent.inputs.task_type}} - metric_for_best_model: ${{parent.inputs.primary_metric}} - training_batch_size: ${{parent.inputs.training_batch_size}} - validation_batch_size: ${{parent.inputs.validation_batch_size}} - weight_decay: ${{parent.inputs.weight_decay}} - extra_optim_args: '"momentum=${{parent.inputs.momentum}};nesterov=${{parent.inputs.nesterov}}"' - - condition_node: - type: if_else - true_block: ${{parent.jobs.image_instance_segmentation_runtime_component}} - condition: ${{parent.jobs.framework_selector.outputs.output}} - false_block: ${{parent.jobs.mm_detection_model_import}} - - output_selector: - type: command - component: azureml:image_model_output_selector:0.0.19 - compute: ${{parent.inputs.compute_model_import}} - inputs: - mlflow_model_t: ${{parent.jobs.image_instance_segmentation_runtime_component.outputs.mlflow_model_folder}} - pytorch_model_t: ${{parent.jobs.image_instance_segmentation_runtime_component.outputs.pytorch_model_folder}} - condition: ${{parent.jobs.framework_selector.outputs.output}} - mlflow_model_f: ${{parent.jobs.mm_detection_finetune.outputs.mlflow_model_folder}} - pytorch_model_f: ${{parent.jobs.mm_detection_finetune.outputs.pytorch_model_folder}} - outputs: - mlflow_model_folder: ${{parent.outputs.mlflow_model_folder}} - pytorch_model_folder: ${{parent.outputs.pytorch_model_folder}} diff --git a/assets/training/finetune_acft_image/components/pipeline_components/mmt/asset.yaml b/assets/training/finetune_acft_image/components/pipeline_components/mmt/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/mmt/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/pipeline_components/mmt/spec.yaml b/assets/training/finetune_acft_image/components/pipeline_components/mmt/spec.yaml deleted file mode 100644 index f17a5b1d74..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/mmt/spec.yaml +++ /dev/null @@ -1,382 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -version: 0.0.14 -name: mmtracking_video_multi_object_tracking_pipeline -display_name: Video Multi-Object Tracking MMTracking Pipeline -description: Pipeline component for multi-object tracking using MMTracking models. - -is_deterministic: false - -inputs: - compute_model_import: - type: string - optional: false - description: Compute to be used for model_import eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. - - compute_finetune: - type: string - optional: false - description: Compute to be used for finetune eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. - - # ########################### Model Selector Component ########################### # - # Model family - model_family: - type: string - optional: true - default: MmTrackingVideo - enum: - - MmTrackingVideo - description: Which framework the model belongs to. - - model_name: - type: string - optional: true - description: Please select models from AzureML Model Assets for all supported models. For MMTracking, provide the model's config name here, same as its specified in MMTracking Model Zoo. To find the correct model name, go to https://github.com/open-mmlab/mmtracking/tree/v0.14.0/configs/mot click on the model type and you will find the model name in the metafile.yml file which is present at configs//metafile.yml location. It is the user responsibility to comply with the model's license terms. - - pytorch_model: - type: custom_model - optional: true - description: Pytorch Model registered in AzureML Asset. - - mlflow_model: - type: mlflow_model - optional: true - description: Mlflow Model registered in AzureML Asset. - - download_from_source: - type: boolean - optional: true - default: false - description: Download model directly from MmTracking instead of system registry - - # ########################### Finetuning Component ########################### # - - # component input: training mltable - training_data: - type: mltable - optional: false - description: Path to the mltable of the training dataset. - - # optional component input: validation mltable - validation_data: - type: mltable - optional: true - description: Path to the mltable of the validation dataset. - - image_width: - type: integer - default: -1 - optional: true - description: Image width that is input to the network. - Default is -1 which means it would be overwritten by image_scale in model config. - - image_height: - type: integer - default: -1 - optional: true - description: Image height that is input to the network. - Default is -1 which means it would be overwritten by image_scale in model config. - - task_name: - type: string - enum: - - video-multi-object-tracking - description: Which task the model is solving. - - # primary metric #todo: add MOTA/ MOTP when the metrics are avaialble - metric_for_best_model: - type: string - optional: true - enum: - - mean_average_precision - - precision - - recall - - MOTA - - MOTP - - IDF1 - description: Specify the metric to use to compare two different models. If left empty, will be chosen automatically based on the task type and model selected. - - # Training parameters - number_of_workers: - type: integer - default: 8 - optional: true - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - - number_of_epochs: - type: integer - optional: true - description: Number of training epochs. If left empty, will be chosen automatically based on the task type and model selected. - - max_steps: - type: integer - optional: true - description: If set to a positive number, the total number of training steps to perform. Overrides 'number_of_epochs'. In case of using a finite iterable dataset the training may stop before reaching the set number of steps when all data is exhausted. If left empty, will be chosen automatically based on the task type and model selected. - - training_batch_size: - type: integer - default: 1 - optional: true - description: Train batch size. If left empty, will be chosen automatically based on the task type and model selected. - - auto_find_batch_size: - type: boolean - default: false - optional: true - description: Flag to enable auto finding of batch size. If the provided 'per_device_train_batch_size' goes into Out Of Memory (OOM) enabling auto_find_batch_size will find the correct batch size by iteratively reducing 'per_device_train_batch_size' by a factor of 2 till the OOM is fixed. - - # learning rate and learning rate scheduler - learning_rate: - type: number - default: 0.0001 - optional: true - description: Start learning rate. Defaults to linear scheduler. If left empty, will be chosen automatically based on the task type and model selected. - - learning_rate_scheduler: - type: string - optional: true - enum: - - warmup_linear - - warmup_cosine - - warmup_cosine_with_restarts - - warmup_polynomial - - constant - - warmup_constant - description: The scheduler type to use. If left empty, will be chosen automatically based on the task type and model selected. - - warmup_steps: - type: integer - default: 5 - optional: true - description: Number of steps used for a linear warmup from 0 to learning_rate. If left empty, will be chosen automatically based on the task type and model selected. - - # optimizer - optimizer: - type: string - default: sgd - optional: true - enum: - - adamw_hf - - adamw - - sgd - - adafactor - - adagrad - description: optimizer to be used while training. If left empty, will be chosen automatically based on the task type and model selected. - - weight_decay: - type: number - default: 0.0 - optional: true - description: The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in Adam, AdamW & SGD optimizer. If left empty, will be chosen automatically based on the task type and model selected. - - extra_optim_args: - type: string - default: "" - optional: true - description: Optional additional arguments that are supplied to SGD Optimizer. The arguments should be semi-colon separated key value pairs and should be enclosed in double quotes. For example, "momentum=0.5; nesterov=True" for sgd. Please make sure to use a valid parameter names for the chosen optimizer. For exact parameter names, please refer https://pytorch.org/docs/1.13/generated/torch.optim.SGD.html#torch.optim.SGD for SGD. Parameters supplied in extra_optim_args will take precedence over the parameter supplied via other arguments such as weight_decay. If weight_decay is provided via "weight_decay" parameter and via extra_optim_args both, values specified in extra_optim_args will be used. - - # gradient accumulation - gradient_accumulation_step: - type: integer - default: 1 - optional: true - description: Number of update steps to accumulate the gradients for, before performing a backward/update pass. If left empty, will be chosen automatically based on the task type and model selected. - - # mixed precision training - precision: - type: string - enum: - - "32" - - "16" - default: "32" - optional: true - description: Apply mixed precision training. This can reduce memory footprint by performing operations in half-precision. - - # metric thresholds - iou_threshold: - type: number - optional: true - description: IOU threshold used during inference in non-maximum suppression post processing. - - box_score_threshold: - type: number - optional: true - description: During inference, only return proposals with a score greater than `box_score_threshold`. The score is the multiplication of the objectness score and classification probability. - - # random seed - random_seed: - type: integer - default: 42 - optional: true - description: Random seed that will be set at the beginning of training. - - # evaluation strategy parameters - evaluation_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The evaluation strategy to adopt during training. Please note that the save_strategy and evaluation_strategy should match. - - evaluation_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two evals if evaluation_strategy='steps'. Please note that the saving steps should be a multiple of the evaluation steps. - - # logging strategy parameters - logging_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The logging strategy to adopt during training. - - logging_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two logs if logging_strategy='steps'. - - # Save strategy - save_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The checkpoint save strategy to adopt during training. Please note that the save_strategy and evaluation_strategy should match. - - save_steps: - type: integer - default: 500 - optional: true - description: Number of updates steps before two checkpoint saves if save_strategy="steps". Please note that the saving steps should be a multiple of the evaluation steps. - - # model checkpointing limit - save_total_limit: - type: integer - default: 5 - optional: true - description: If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in output_dir. If the value is -1 saves all checkpoints". - - # Early Stopping Parameters - early_stopping: - type: boolean - default: false - optional: true - description: Enable early stopping. - - early_stopping_patience: - type: integer - default: 1 - optional: true - description: Stop training when the specified metric worsens for early_stopping_patience evaluation calls. - - # Grad Norm - max_grad_norm: - type: number - optional: true - description: Maximum gradient norm (for gradient clipping). If left empty, will be chosen automatically based on the task type and model selected. - - # resume from the input model - resume_from_checkpoint: - type: boolean - default: false - optional: true - description: Loads optimizer, Scheduler and Trainer state for finetuning if true. - - save_as_mlflow_model: - type: boolean - default: true - optional: true - description: Save as mlflow model with pyfunc as flavour. - -outputs: - # ########################### Finetuning Component ########################### # - mlflow_model_folder: - type: mlflow_model - description: Output dir to save the finetune model as mlflow model. - pytorch_model_folder: - type: custom_model - description: Output dir to save the finetune model as torch model. - -jobs: - finetune_common_validation: - type: command - component: azureml:finetune_common_validation:0.0.6 - compute: ${{parent.inputs.compute_model_import}} - inputs: - mlflow_model_path: ${{parent.inputs.mlflow_model}} - train_mltable_path: ${{parent.inputs.training_data}} - validation_mltable_path: ${{parent.inputs.validation_data}} - compute_model_import: ${{parent.inputs.compute_model_import}} - compute_finetune: ${{parent.inputs.compute_finetune}} - task_name: ${{parent.inputs.task_name}} - user_column_names: image,label - task_specific_extra_params: '"model_family=${{parent.inputs.model_family}};model_name=${{parent.inputs.model_name}};metric_for_best_model=${{parent.inputs.metric_for_best_model}};number_of_epochs=${{parent.inputs.number_of_epochs}}"' - - model_import: - type: command - component: azureml:mmtracking_video_multi_object_tracking_model_import:0.0.10 - compute: ${{parent.inputs.compute_model_import}} - inputs: - model_family: ${{parent.inputs.model_family}} - model_name: ${{parent.inputs.model_name}} - pytorch_model: ${{parent.inputs.pytorch_model}} - mlflow_model: ${{parent.inputs.mlflow_model}} - download_from_source: ${{parent.inputs.download_from_source}} - validation_output: ${{parent.jobs.finetune_common_validation.outputs.validation_info}} - - finetune: - type: command - component: azureml:mmtracking_video_multi_object_tracking_finetune:0.0.10 - compute: ${{parent.inputs.compute_finetune}} - resources: - shm_size: '16g' - inputs: - # Model path is same as what is output of model selector - model_path: ${{parent.jobs.model_import.outputs.output_dir}} - training_data: ${{parent.inputs.training_data}} - validation_data: ${{parent.inputs.validation_data}} - image_width: ${{parent.inputs.image_width}} - image_height: ${{parent.inputs.image_height}} - task_name: ${{parent.inputs.task_name}} - metric_for_best_model: ${{parent.inputs.metric_for_best_model}} - number_of_workers: ${{parent.inputs.number_of_workers}} - number_of_epochs: ${{parent.inputs.number_of_epochs}} - max_steps: ${{parent.inputs.max_steps}} - training_batch_size: ${{parent.inputs.training_batch_size}} - auto_find_batch_size: ${{parent.inputs.auto_find_batch_size}} - learning_rate: ${{parent.inputs.learning_rate}} - learning_rate_scheduler: ${{parent.inputs.learning_rate_scheduler}} - warmup_steps: ${{parent.inputs.warmup_steps}} - optimizer: ${{parent.inputs.optimizer}} - weight_decay: ${{parent.inputs.weight_decay}} - extra_optim_args: ${{parent.inputs.extra_optim_args}} - gradient_accumulation_step: ${{parent.inputs.gradient_accumulation_step}} - precision: ${{parent.inputs.precision}} - iou_threshold: ${{parent.inputs.iou_threshold}} - box_score_threshold: ${{parent.inputs.box_score_threshold}} - random_seed: ${{parent.inputs.random_seed}} - evaluation_strategy: ${{parent.inputs.evaluation_strategy}} - evaluation_steps: ${{parent.inputs.evaluation_steps}} - logging_strategy: ${{parent.inputs.logging_strategy}} - logging_steps: ${{parent.inputs.logging_steps}} - save_strategy: ${{parent.inputs.save_strategy}} - save_steps: ${{parent.inputs.save_steps}} - save_total_limit: ${{parent.inputs.save_total_limit}} - early_stopping: ${{parent.inputs.early_stopping}} - early_stopping_patience: ${{parent.inputs.early_stopping_patience}} - max_grad_norm: ${{parent.inputs.max_grad_norm}} - resume_from_checkpoint: ${{parent.inputs.resume_from_checkpoint}} - save_as_mlflow_model: ${{parent.inputs.save_as_mlflow_model}} - outputs: - mlflow_model_folder: ${{parent.outputs.mlflow_model_folder}} - pytorch_model_folder: ${{parent.outputs.pytorch_model_folder}} diff --git a/assets/training/finetune_acft_image/components/pipeline_components/object_detection/asset.yaml b/assets/training/finetune_acft_image/components/pipeline_components/object_detection/asset.yaml deleted file mode 100644 index 82f46ae376..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/object_detection/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML Image training"] \ No newline at end of file diff --git a/assets/training/finetune_acft_image/components/pipeline_components/object_detection/spec.yaml b/assets/training/finetune_acft_image/components/pipeline_components/object_detection/spec.yaml deleted file mode 100644 index 01ef5a60d5..0000000000 --- a/assets/training/finetune_acft_image/components/pipeline_components/object_detection/spec.yaml +++ /dev/null @@ -1,464 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -version: 0.0.25 -name: image_object_detection_pipeline -display_name: Image Object Detection Pipeline -description: Pipeline component for image object detection. - -is_deterministic: false - -inputs: - # ------------------- Computes ------------------- - compute_model_import: - type: string - optional: false - description: Compute to be used for framework_selector eg. provide 'gpu-cluster' if your compute is named 'gpu-cluster'. - - compute_finetune: - type: string - optional: false - description: Compute to be used for running the selected framework eg. provide 'gpu-cluster' if your compute is named 'gpu-cluster'. - - instance_count: - type: integer - default: 1 - optional: true - description: Number of nodes to be used for finetuning (used for distributed training). - - process_count_per_instance: - type: integer - default: 1 - optional: true - description: Number of gpus to be used per node for finetuning, should be equal to number of gpu per node in the compute SKU used for finetune. - - # ------------------- Model Framework Selector ------------------- - model_name: - type: string - optional: true - description: Name of the model. Based on this model name, a framework will be selected (Hugging Face, MM Detection). - - download_from_source: - type: boolean - optional: true - default: false - description: Download model directly from MMDetection instead of system registry - - # ------------------- Data Inputs ------------------ - training_data: - type: mltable - optional: false - description: Path to MLTable for training data. - - validation_data: - type: mltable - optional: true - description: Path to MLTable for validation data. - - # ------------------- Task Type ------------------ - task_type: - description: Type of the task - type: string - optional: false - default: 'image-object-detection' - enum: ['image-object-detection'] - - # ------------------- Primary Metric ---------------- - primary_metric: - description: Primary metric for the task - type: string - optional: true - default: 'mean_average_precision' - enum: ['mean_average_precision'] - - # ------------------- Hyperparamters ------------------ - ams_gradient: - description: Enable ams_gradient when optimizer is adam or adamw. - type: boolean - optional: true - - beta1: - description: Value of beta1 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - beta2: - description: Value of beta2 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - box_detections_per_image: - description: Maximum number of detections per image, for all classes. Must be a positive integer. - type: integer - optional: true - min: 1 - - box_score_threshold: - description: During inference, only return proposals with a classification score greater than box_score_threshold. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - checkpoint_frequency: - description: Frequency to store model checkpoints. Must be a positive integer. - type: integer - optional: true - min: 0 - - checkpoint_run_id: - description: The run ID of the experiment that has a pretrained checkpoint for incremental training. - type: string - optional: true - - early_stopping: - description: Enable early stopping logic during training. - type: boolean - optional: true - - early_stopping_patience: - description: Minimum number of epochs or validation evaluations with no primary metric improvement before the run is stopped. Must be a positive integer. - type: integer - optional: true - min: 1 - - early_stopping_delay: - description: Minimum number of epochs or validation evaluations to wait before primary metric improvement is tracked for early stopping. Must be a positive integer. - type: integer - optional: true - min: 1 - - evaluation_frequency: - description: Frequency to evaluate validation dataset to get metric scores. Must be a positive integer. - type: integer - optional: true - min: 1 - - gradient_accumulation_step: - description: Number of forward passes without updating the model weights while accumulating the gradients of those steps, and then using the accumulated gradients to compute the weight updates. Must be a positive integer. - type: integer - optional: true - min: 1 - - layers_to_freeze: - description: How many layers to freeze for your model. For instance, passing 2 as value for seresnext means freezing layer0 and layer1 referring to the below supported model layer info. Must be a positive integer. - type: integer - optional: true - min: 1 - - learning_rate: - description: Initial learning rate - type: number - optional: true - min: 0 - max: 1 - - learning_rate_scheduler: - description: Type of learning rate scheduler. Must be warmup_cosine or step. - type: string - optional: true - enum: ['warmup_cosine', 'step'] - - max_size: - description: Maximum size of the image to be rescaled before feeding it to the backbone. - type: integer - optional: true - min: 1 - - min_size: - description: Minimum size of the image to be rescaled before feeding it to the backbone. Must be a positive integer. - type: integer - optional: true - min: 1 - - momentum: - description: Value of momentum when optimizer is sgd. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - nesterov: - description: Enable nesterov when optimizer is sgd. - type: boolean - optional: true - - nms_iou_threshold: - description: IOU threshold used during inference in non-maximum suppression post processing. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - number_of_epochs: - description: Number of training epochs - type: integer - optional: true - min: 1 - - number_of_workers: - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - type: integer - optional: true - - optimizer: - description: Type of optimizer - type: string - optional: true - enum: ['sgd', 'adam', 'adamw'] - - random_seed: - description: Random seed that will be set at the beginning of training. - type: integer - optional: true - - step_lr_gamma: - description: Value of gamma when learning rate scheduler is step. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - step_lr_step_size: - description: Value of step size when learning rate scheduler is step. Must be a positive integer. - type: integer - optional: true - min: 0 - - tile_grid_size: - description: The grid size to use for tiling each image. Should be passed as a string in '3x2' format. Example --tile_grid_size '3x2'. For more information please visit https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-automl-small-object-detect?tabs=CLI-v2. - type: string - optional: true - - tile_overlap_ratio: - description: Overlap ratio between adjacent tiles in each dimension. Must be float in the range of [0, 1). - type: number - optional: true - min: 0 - max: 1 - - tile_predictions_nms_threshold: - description: The IOU threshold to use to perform NMS while merging predictions from tiles and image. Used in validation/ inference. Must be float in the range of [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - training_batch_size: - description: Training batch size. - type: integer - optional: true - min: 1 - - validation_batch_size: - description: Validation batch size. - type: integer - optional: true - min: 1 - - validation_iou_threshold: - description: IOU threshold for box matching when computing validation metrics. Must be a float in the range [0.1, 1]. - type: number - optional: true - min: 0.1 - max: 1 - - validation_metric_type: - description: Metric computation method to use for validation metrics. Must be none, coco, voc, or coco_voc. - type: string - optional: true - default: 'voc' - enum: ['none', 'coco', 'voc', 'coco_voc'] - - warmup_cosine_lr_cycles: - description: Value of cosine cycle when learning rate scheduler is warmup_cosine. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - - warmup_cosine_lr_warmup_epochs: - description: Value of warmup epochs when learning rate scheduler is warmup_cosine. Must be a positive integer. - type: integer - optional: true - min: 0 - - weight_decay: - description: Value of weight decay used by the optimizer. - type: number - optional: true - min: 0 - max: 1 - - # ------------------- Yolov5 Model Specific Hyperparamters ------------------ - model_size: - description: Model size for yolov5. - type: string - optional: true - default: medium - enum: ['small', 'medium', 'large', 'xlarge'] - - multi_scale: - description: Enable multi-scale image by varying image size by +/- 50%. - type: boolean - optional: true - - image_size: - description: Image size for train and validation for yolov5 model. - type: integer - optional: true - min: 1 - -outputs: - pytorch_model_folder: - type: custom_model - description: Trained pytorch model. - mlflow_model_folder: - type: mlflow_model - description: The trained MLFlow model. - -jobs: - - finetune_common_validation: - type: command - component: azureml:finetune_common_validation:0.0.8 - compute: ${{parent.inputs.compute_model_import}} - inputs: - train_mltable_path: ${{parent.inputs.training_data}} - validation_mltable_path: ${{parent.inputs.validation_data}} - compute_model_import: ${{parent.inputs.compute_model_import}} - compute_finetune: ${{parent.inputs.compute_finetune}} - task_name: ${{parent.inputs.task_type}} - label_column_name: label - user_column_names: image_url,label - task_specific_extra_params: '"model_family=MmDetectionImage;model_name=${{parent.inputs.model_name}};metric_for_best_model=${{parent.inputs.primary_metric}};number_of_epochs=${{parent.inputs.number_of_epochs}}"' - - framework_selector: - type: command - component: azureml:image_framework_selector:0.0.20 - compute: ${{parent.inputs.compute_model_import}} - inputs: - task_type: ${{parent.inputs.task_type}} - model_name: ${{parent.inputs.model_name}} - validation_output: ${{parent.jobs.finetune_common_validation.outputs.validation_info}} - - image_object_detection_runtime_component: - type: command - component: azureml:train_object_detection_model:0.0.13 - compute: ${{parent.inputs.compute_finetune}} - resources: - shm_size: '16g' - inputs: - training_data: ${{parent.inputs.training_data}} - validation_data: ${{parent.inputs.validation_data}} - ams_gradient: ${{parent.inputs.ams_gradient}} - beta1: ${{parent.inputs.beta1}} - beta2: ${{parent.inputs.beta2}} - box_detections_per_image: ${{parent.inputs.box_detections_per_image}} - box_score_threshold: ${{parent.inputs.box_score_threshold}} - checkpoint_frequency: ${{parent.inputs.checkpoint_frequency}} - checkpoint_run_id: ${{parent.inputs.checkpoint_run_id}} - early_stopping: ${{parent.inputs.early_stopping}} - early_stopping_patience: ${{parent.inputs.early_stopping_patience}} - early_stopping_delay: ${{parent.inputs.early_stopping_delay}} - evaluation_frequency: ${{parent.inputs.evaluation_frequency}} - gradient_accumulation_step: ${{parent.inputs.gradient_accumulation_step}} - layers_to_freeze: ${{parent.inputs.layers_to_freeze}} - learning_rate: ${{parent.inputs.learning_rate}} - learning_rate_scheduler: ${{parent.inputs.learning_rate_scheduler}} - max_size: ${{parent.inputs.max_size}} - min_size: ${{parent.inputs.min_size}} - model_name: ${{parent.inputs.model_name}} - model_size: ${{parent.inputs.model_size}} - momentum: ${{parent.inputs.momentum}} - multi_scale: ${{parent.inputs.multi_scale}} - nesterov: ${{parent.inputs.nesterov}} - nms_iou_threshold: ${{parent.inputs.nms_iou_threshold}} - number_of_epochs: ${{parent.inputs.number_of_epochs}} - number_of_workers: ${{parent.inputs.number_of_workers}} - optimizer: ${{parent.inputs.optimizer}} - random_seed: ${{parent.inputs.random_seed}} - step_lr_gamma: ${{parent.inputs.step_lr_gamma}} - step_lr_step_size: ${{parent.inputs.step_lr_step_size}} - tile_grid_size: ${{parent.inputs.tile_grid_size}} - tile_overlap_ratio: ${{parent.inputs.tile_overlap_ratio}} - tile_predictions_nms_threshold: ${{parent.inputs.tile_predictions_nms_threshold}} - training_batch_size: ${{parent.inputs.training_batch_size}} - validation_batch_size: ${{parent.inputs.validation_batch_size}} - validation_iou_threshold: ${{parent.inputs.validation_iou_threshold}} - validation_metric_type: ${{parent.inputs.validation_metric_type}} - warmup_cosine_lr_cycles: ${{parent.inputs.warmup_cosine_lr_cycles}} - warmup_cosine_lr_warmup_epochs: ${{parent.inputs.warmup_cosine_lr_warmup_epochs}} - weight_decay: ${{parent.inputs.weight_decay}} - - mm_detection_model_import: - type: command - component: azureml:mmdetection_image_objectdetection_instancesegmentation_model_import:0.0.20 - compute: ${{parent.inputs.compute_model_import}} - inputs: - model_family: 'MmDetectionImage' - model_name: ${{parent.inputs.model_name}} - download_from_source: ${{parent.inputs.download_from_source}} - validation_output: ${{parent.jobs.finetune_common_validation.outputs.validation_info}} - - mm_detection_finetune: - type: command - component: azureml:mmdetection_image_objectdetection_instancesegmentation_finetune:0.0.21 - compute: ${{parent.inputs.compute_finetune}} - distribution: - type: pytorch - process_count_per_instance: ${{parent.inputs.process_count_per_instance}} - resources: - instance_count: ${{parent.inputs.instance_count}} - shm_size: '16g' - inputs: - # Model path is same as what is output of model selector - model_path: ${{parent.jobs.mm_detection_model_import.outputs.output_dir}} - training_data: ${{parent.inputs.training_data}} - validation_data: ${{parent.inputs.validation_data}} - image_min_size: ${{parent.inputs.min_size}} - image_max_size: ${{parent.inputs.max_size}} - iou_threshold: ${{parent.inputs.nms_iou_threshold}} - box_score_threshold: ${{parent.inputs.box_score_threshold}} - early_stopping: ${{parent.inputs.early_stopping}} - early_stopping_patience: ${{parent.inputs.early_stopping_patience}} - evaluation_steps: ${{parent.inputs.evaluation_frequency}} - gradient_accumulation_step: ${{parent.inputs.gradient_accumulation_step}} - learning_rate: ${{parent.inputs.learning_rate}} - learning_rate_scheduler: ${{parent.inputs.learning_rate_scheduler}} - number_of_epochs: ${{parent.inputs.number_of_epochs}} - number_of_workers: ${{parent.inputs.number_of_workers}} - optimizer: ${{parent.inputs.optimizer}} - random_seed: ${{parent.inputs.random_seed}} - save_as_mlflow_model: true - save_steps: ${{parent.inputs.checkpoint_frequency}} - task_name: ${{parent.inputs.task_type}} - metric_for_best_model: ${{parent.inputs.primary_metric}} - training_batch_size: ${{parent.inputs.training_batch_size}} - validation_batch_size: ${{parent.inputs.validation_batch_size}} - weight_decay: ${{parent.inputs.weight_decay}} - extra_optim_args: '"momentum=${{parent.inputs.momentum}};nesterov=${{parent.inputs.nesterov}}"' - - condition_node: - type: if_else - true_block: ${{parent.jobs.image_object_detection_runtime_component}} - condition: ${{parent.jobs.framework_selector.outputs.output}} - false_block: ${{parent.jobs.mm_detection_model_import}} - - output_selector: - type: command - component: azureml:image_model_output_selector:0.0.19 - compute: ${{parent.inputs.compute_model_import}} - inputs: - mlflow_model_t: ${{parent.jobs.image_object_detection_runtime_component.outputs.mlflow_model_folder}} - pytorch_model_t: ${{parent.jobs.image_object_detection_runtime_component.outputs.pytorch_model_folder}} - condition: ${{parent.jobs.framework_selector.outputs.output}} - mlflow_model_f: ${{parent.jobs.mm_detection_finetune.outputs.mlflow_model_folder}} - pytorch_model_f: ${{parent.jobs.mm_detection_finetune.outputs.pytorch_model_folder}} - outputs: - mlflow_model_folder: ${{parent.outputs.mlflow_model_folder}} - pytorch_model_folder: ${{parent.outputs.pytorch_model_folder}} diff --git a/assets/training/finetune_acft_multimodal/components/finetune/multimodal_classification/asset.yaml b/assets/training/finetune_acft_multimodal/components/finetune/multimodal_classification/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_multimodal/components/finetune/multimodal_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_multimodal/components/finetune/multimodal_classification/spec.yaml b/assets/training/finetune_acft_multimodal/components/finetune/multimodal_classification/spec.yaml deleted file mode 100644 index 1a244aada3..0000000000 --- a/assets/training/finetune_acft_multimodal/components/finetune/multimodal_classification/spec.yaml +++ /dev/null @@ -1,361 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.3 -name: multimodal_classification_finetune -display_name: Multimodal Classification using MMEFT -description: Component to finetune multimodal models for classification using MMEFT - -is_deterministic: True - -environment: azureml://registries/azureml/environments/acft-multimodal-gpu/versions/20 - -code: ../../../src/finetune - -distribution: - type: pytorch - -inputs: - problem_type: - type: string - default: multimodal-classification-singlelabel - optional: false - enum: - - multimodal-classification-singlelabel - - multimodal-classification-multilabel - description: Specify whether its single-label or multi-label multimodal classification task. - - # Training parameters - number_of_epochs: - type: integer - default: 1 - optional: true - description: training epochs - - max_steps: - type: integer - default: -1 - optional: true - description: If set to a positive number, the total number of training steps to perform. Overrides 'epochs'. In case of using a finite iterable dataset the training may stop before reaching the set number of steps when all data is exhausted. - - training_batch_size: - type: integer - default: 8 - optional: true - description: Train batch size - - validation_batch_size: - type: integer - default: 64 - optional: true - description: Validation batch size - - auto_find_batch_size: - type: string - enum: - - "true" - - "false" - default: "false" - optional: true - description: Flag to enable auto finding of batch size. If the provided 'training_batch_size' goes into Out Of Memory (OOM) enabling auto_find_batch_size will find the correct batch size by iteratively reducing 'training_batch_size' by a factor of 2 till the OOM is fixed - - optimizer: - type: string - default: adamw_hf - optional: true - enum: - - adamw_hf - - adamw_torch - # - adamw_apex_fused - - adafactor - description: Optimizer to be used while training - - learning_rate: - type: number - default: 0.001 - optional: true - description: Start learning rate. Defaults to linear scheduler. - - warmup_steps: - type: integer - default: 0 - optional: true - description: Number of steps used for a linear warmup from 0 to learning_rate - - weight_decay: - type: number - default: 0.0 - optional: true - description: The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in AdamW optimizer - - adam_beta1: - type: number - default: 0.9 - optional: true - description: The beta1 hyperparameter for the AdamW optimizer - - adam_beta2: - type: number - default: 0.999 - optional: true - description: The beta2 hyperparameter for the AdamW optimizer - - adam_epsilon: - type: number - default: 1e-8 - optional: true - description: The epsilon hyperparameter for the AdamW optimizer - - gradient_accumulation_steps: - type: integer - default: 64 - optional: true - description: Number of updates steps to accumulate the gradients for, before performing a backward/update pass - - learning_rate_scheduler: - type: string - default: linear - optional: true - enum: - - linear - - cosine - - cosine_with_restarts - - polynomial - - constant - - constant_with_warmup - description: The scheduler type to use - - precision: - type: string - enum: - - "32" - - "16" - default: "32" - optional: true - description: Apply mixed precision training. This can reduce memory footprint by performing operations in half-precision. - - random_seed: - type: integer - default: 42 - optional: true - description: Random seed that will be set at the beginning of training - - enable_full_determinism: - type: string - enum: - - "true" - - "false" - default: "false" - optional: true - description: Ensure reproducible behavior during distributed training - - dataloader_num_workers: - type: integer - default: 0 - optional: true - description: Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process. - - ignore_mismatched_sizes: - type: string - enum: - - "true" - - "false" - default: "true" - optional: true - description: Whether or not to raise an error if some of the weights from the checkpoint do not have the same size as the weights of the model - - max_grad_norm: - type: number - default: 1.0 - optional: true - description: "Maximum gradient norm (for gradient clipping)" - - evaluation_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The evaluation strategy to adopt during training - - evaluation_steps_interval: - type: number - default: 0.0 - optional: true - description: The evaluation steps in fraction of an epoch steps to adopt during training. Overwrites evaluation_steps if not 0. - - evaluation_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two evals if evaluation_strategy='steps' - - logging_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The logging strategy to adopt during training. - - logging_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two logs if logging_strategy='steps' - - primary_metric: - type: string - default: loss - optional: true - enum: - - loss - - f1_macro - - mcc - - accuracy - - precision_macro - - recall_macro - description: Specify the metric to use to compare two different models - - resume_from_checkpoint: - type: string - default: "false" - optional: true - enum: - - "true" - - "false" - description: Loads Optimizer, Scheduler and Trainer state for finetuning if true - - save_total_limit: - type: integer - default: -1 - optional: true - description: If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in output_dir. If the value is -1 saves all checkpoints" - - # Early Stopping Parameters - apply_early_stopping: - type: string - default: "false" - optional: true - enum: - - "true" - - "false" - description: Enable early stopping - - early_stopping_patience: - type: integer - default: 1 - optional: true - description: Stop training when the specified metric worsens for early_stopping_patience evaluation calls - - early_stopping_threshold: - type: number - default: 0.0 - optional: true - description: Denotes how much the specified metric must improve to satisfy early stopping conditions - - # Deepspeed Parameters - apply_deepspeed: - type: string - enum: - - "true" - - "false" - default: "false" - optional: true - description: If set to true, will enable deepspeed for training - - deepspeed_config: - type: uri_file - optional: true - description: Deepspeed config to be used for finetuning - - # ORT Parameters - apply_ort: - type: string - enum: - - "true" - - "false" - default: "false" - optional: true - description: If set to true, will use the ONNXRunTime training - - # MLFlow Parameters - save_as_mlflow_model: - type: string - enum: - - "true" - - "false" - default: "true" - optional: true - description: If set to true, will save as mlflow model with pyfunc as flavour - - # Dataset parameterss - preprocess_output: - type: uri_folder - optional: false - description: output folder of preprocessor containing preprocessed metadata information - - model_selector_output: - type: uri_folder - optional: false - description: output folder of model selector containing model metadata like config, checkpoints, tokenizer config - -outputs: - pytorch_model_folder: - type: uri_folder - description: Output dir to save the finetune model and other metadata - - mlflow_model_folder: - type: mlflow_model - description: Output dir to save the finetune model as mlflow model - -command: >- - python finetune.py - --problem_type ${{inputs.problem_type}} - --apply_lora "false" - --merge_lora_weights "true" - --lora_alpha 128 - --lora_r 8 - --lora_dropout 0.0 - $[[--num_train_epochs ${{inputs.number_of_epochs}}]] - $[[--max_steps ${{inputs.max_steps}}]] - $[[--per_device_train_batch_size ${{inputs.training_batch_size}}]] - $[[--per_device_eval_batch_size ${{inputs.validation_batch_size}}]] - $[[--auto_find_batch_size ${{inputs.auto_find_batch_size}}]] - $[[--optim ${{inputs.optimizer}}]] - $[[--learning_rate ${{inputs.learning_rate}}]] - $[[--warmup_steps ${{inputs.warmup_steps}}]] - $[[--weight_decay ${{inputs.weight_decay}}]] - $[[--adam_beta1 ${{inputs.adam_beta1}}]] - $[[--adam_beta2 ${{inputs.adam_beta2}}]] - $[[--adam_epsilon ${{inputs.adam_epsilon}}]] - $[[--gradient_accumulation_steps ${{inputs.gradient_accumulation_steps}}]] - $[[--lr_scheduler_type ${{inputs.learning_rate_scheduler}}]] - $[[--precision ${{inputs.precision}}]] - $[[--seed ${{inputs.random_seed}}]] - $[[--enable_full_determinism - ${{inputs.enable_full_determinism}}]] - $[[--dataloader_num_workers ${{inputs.dataloader_num_workers}}]] - $[[--ignore_mismatched_sizes ${{inputs.ignore_mismatched_sizes}}]] - $[[--max_grad_norm ${{inputs.max_grad_norm}}]] - $[[--evaluation_strategy ${{inputs.evaluation_strategy}}]] - $[[--evaluation_steps_interval ${{inputs.evaluation_steps_interval}}]] - $[[--eval_steps ${{inputs.evaluation_steps}}]] - $[[--logging_strategy ${{inputs.logging_strategy}}]] - $[[--logging_steps ${{inputs.logging_steps}}]] - $[[--metric_for_best_model ${{inputs.primary_metric}}]] - $[[--resume_from_checkpoint ${{inputs.resume_from_checkpoint}}]] - $[[--save_total_limit ${{inputs.save_total_limit}}]] - $[[--apply_early_stopping ${{inputs.apply_early_stopping}}]] - $[[--early_stopping_patience ${{inputs.early_stopping_patience}}]] - $[[--early_stopping_threshold ${{inputs.early_stopping_threshold}}]] - $[[--apply_ort ${{inputs.apply_ort}}]] - $[[--apply_deepspeed ${{inputs.apply_deepspeed}}]] - $[[--deepspeed ${{inputs.deepspeed_config}}]] - $[[--save_as_mlflow_model ${{inputs.save_as_mlflow_model}}]] - --model_selector_output ${{inputs.model_selector_output}} - --preprocess_output ${{inputs.preprocess_output}} - --pytorch_model_folder ${{outputs.pytorch_model_folder}} - --mlflow_model_folder ${{outputs.mlflow_model_folder}} diff --git a/assets/training/finetune_acft_multimodal/components/model_import/multimodal_classification/asset.yaml b/assets/training/finetune_acft_multimodal/components/model_import/multimodal_classification/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_multimodal/components/model_import/multimodal_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_multimodal/components/model_import/multimodal_classification/spec.yaml b/assets/training/finetune_acft_multimodal/components/model_import/multimodal_classification/spec.yaml deleted file mode 100644 index fa5d97fc51..0000000000 --- a/assets/training/finetune_acft_multimodal/components/model_import/multimodal_classification/spec.yaml +++ /dev/null @@ -1,47 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.3 -name: multimodal_classification_model_import -display_name: Multimodal Classification Model Import -description: Import PyTorch / MLflow model - -is_deterministic: True - -environment: azureml://registries/azureml/environments/acft-multimodal-gpu/versions/20 - -code: ../../../src/model_import/ - -inputs: - - data_modalities: - type: string - enum: - - "text-image" - - "text-image-tabular" - default: "text-image-tabular" - description: Modalities to be supported - - # Continual-Finetuning model path - pytorch_model_path: - type: custom_model - optional: true - description: Input folder path containing pytorch model for further finetuning. Proper model/huggingface id must be passed. - - mlflow_model_path: - type: mlflow_model - optional: true - description: Input folder path containing mlflow model for further finetuning. Proper model/huggingface id must be passed. - -outputs: - output_dir: - type: uri_folder - description: folder to store model metadata - -command: >- - python model_import.py - --model_name_or_path "openai/clip-vit-base-patch32" - --data_modalities ${{inputs.data_modalities}} - $[[--pytorch_model_path ${{inputs.pytorch_model_path}}]] - $[[--mlflow_model_path ${{inputs.mlflow_model_path}}]] - --output_dir ${{outputs.output_dir}} diff --git a/assets/training/finetune_acft_multimodal/components/pipeline_components/multimodal_classification/asset.yaml b/assets/training/finetune_acft_multimodal/components/pipeline_components/multimodal_classification/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_multimodal/components/pipeline_components/multimodal_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_multimodal/components/pipeline_components/multimodal_classification/spec.yaml b/assets/training/finetune_acft_multimodal/components/pipeline_components/multimodal_classification/spec.yaml deleted file mode 100644 index 4efb4b9946..0000000000 --- a/assets/training/finetune_acft_multimodal/components/pipeline_components/multimodal_classification/spec.yaml +++ /dev/null @@ -1,434 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -type: pipeline - -version: 0.0.3 -name: multimodal_classification_pipeline -display_name: Multimodal Classification Pipeline -description: Pipeline component for multimodal classification models. - -is_deterministic: false - -inputs: - # Compute parameters - compute_model_import: - type: string - optional: false - description: compute to be used for model_selector eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. - - compute_preprocess: - type: string - optional: false - description: compute to be used for preprocess eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. - - compute_finetune: - type: string - optional: false - description: compute to be used for finetune eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. - - instance_count: - type: integer - default: 1 - optional: true - description: Number of nodes to be used for finetuning (used for distributed training). - - process_count_per_instance: - type: integer - default: 1 - optional: true - description: Number of gpus to be used per node for finetuning, should be equal to number of gpu per node in the compute SKU used for finetune. - - # ########################### Model Selector Component ########################### # - data_modalities: - type: string - enum: - - "text-image" - - "text-image-tabular" - default: "text-image-tabular" - description: Modalities to be supported. - - # pytorch_model_path: - # type: custom_model - # optional: true - # description: Input folder path containing pytorch model in azureml registry. - - mlflow_model_path: - type: mlflow_model - optional: false - description: Path to multimodal model in azureml registry. - - # ########################### Data Preprocessing Component ########################### # - problem_type: - type: string - default: multimodal-classification-singlelabel - optional: false - enum: - - multimodal-classification-singlelabel - - multimodal-classification-multilabel - description: Specify whether its single-label or multi-label classification task. - - label_column: - type: string - optional: false - description: label column name. - - image_column: - type: string - optional: false - description: Image column name. - - drop_columns: - type: string - default: "" - optional: true - description: Columns to ignore. - - numerical_columns_overrides: - type: string - default: "" - optional: true - description: Columns to treat as numerical. Overrides automatic column purpose detection. - - categorical_columns_overrides: - type: string - default: "" - optional: true - description: Columns to treat as categorical. Overrides automatic column purpose detection. - - text_columns_overrides: - type: string - default: "" - optional: true - description: Columns to treat as text. Overrides automatic column purpose detection. - - # Inputs - training_data: - type: mltable - optional: false - description: Enter the train mltable path. - - validation_data: - type: mltable - optional: false - description: Enter the validation mltable path. - - # ########################### Finetuning Component ########################### # - - # Training parameters - number_of_epochs: - type: integer - default: 15 - optional: true - description: training epochs - - max_steps: - type: integer - default: -1 - optional: true - description: If set to a positive number, the total number of training steps to perform. Overrides 'number_of_epochs'. In case of using a finite iterable dataset the training may stop before reaching the set number of steps when all data is exhausted. - - training_batch_size: - type: integer - default: 1 - optional: true - description: Train batch size. - - validation_batch_size: - type: integer - default: 1 - optional: true - description: Validation batch size. - - auto_find_batch_size: - type: string - enum: - - "true" - - "false" - default: "false" - optional: true - description: Flag to enable auto finding of batch size. If the provided 'training_batch_size' goes into Out Of Memory (OOM) enabling auto_find_batch_size will find the correct batch size by iteratively reducing 'training_batch_size' by a factor of 2 till the OOM is fixed. - - optimizer: - type: string - default: adamw_hf - optional: true - enum: - - adamw_hf - - adamw_torch - # - adamw_apex_fused - - adafactor - description: Optimizer to be used while training. - - learning_rate: - type: number - default: 0.00002 - optional: true - description: Start learning rate. Defaults to linear scheduler. - - warmup_steps: - type: integer - default: 0 - optional: true - description: Number of steps used for a linear warmup from 0 to learning_rate. - - weight_decay: - type: number - default: 0.0 - optional: true - description: The weight decay to apply (if not zero) to all layers except all bias and LayerNorm weights in AdamW optimizer. - - adam_beta1: - type: number - default: 0.9 - optional: true - description: The beta1 hyperparameter for the AdamW optimizer. - - adam_beta2: - type: number - default: 0.999 - optional: true - description: The beta2 hyperparameter for the AdamW optimizer. - - adam_epsilon: - type: number - default: 1e-8 - optional: true - description: The epsilon hyperparameter for the AdamW optimizer. - - gradient_accumulation_steps: - type: integer - default: 64 - optional: true - description: Number of updates steps to accumulate the gradients for, before performing a backward/update pass. - - learning_rate_scheduler: - type: string - default: linear - optional: true - enum: - - linear - - cosine - - cosine_with_restarts - - polynomial - - constant - - constant_with_warmup - description: The scheduler type to use. - - precision: - type: string - enum: - - "32" - - "16" - default: "32" - optional: true - description: Apply mixed precision training. This can reduce memory footprint by performing operations in half-precision. - - random_seed: - type: integer - default: 42 - optional: true - description: Random seed that will be set at the beginning of training. - - evaluation_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The evaluation strategy to adopt during training. - - evaluation_steps_interval: - type: number - default: 0.0 - optional: true - description: The evaluation steps in fraction of an epoch steps to adopt during training. Overwrites evaluation_steps if not 0. - - evaluation_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two evals if evaluation_strategy='steps'. - - logging_strategy: - type: string - default: epoch - optional: true - enum: - - epoch - - steps - description: The logging strategy to adopt during training. - - logging_steps: - type: integer - default: 500 - optional: true - description: Number of update steps between two logs if logging_strategy='steps'. - - primary_metric: - type: string - default: loss - optional: true - enum: - - loss - - f1_macro - - mcc - - accuracy - - precision_macro - - recall_macro - description: Specify the metric to use to compare two different models. - - resume_from_checkpoint: - type: string - default: "false" - optional: true - enum: - - "true" - - "false" - description: Loads Optimizer, Scheduler and Trainer state for finetuning if true. - - save_total_limit: - type: integer - default: -1 - optional: true - description: If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in output_dir. If the value is -1 saves all checkpoints". - - # Early Stopping Parameters - apply_early_stopping: - type: string - default: "false" - optional: true - enum: - - "true" - - "false" - description: Enable early stopping. - - early_stopping_patience: - type: integer - default: 1 - optional: true - description: Stop training when the specified metric worsens for early_stopping_patience evaluation calls. - - early_stopping_threshold: - type: number - default: 0.0 - optional: true - description: Denotes how much the specified metric must improve to satisfy early stopping conditions. - - # Deepspeed Parameters - apply_deepspeed: - type: string - enum: - - "true" - - "false" - default: "false" - optional: true - description: If set to true, will enable deepspeed for training. - - deepspeed_config: - type: uri_file - optional: true - description: Deepspeed config to be used for finetuning. - - # ORT Parameters - apply_ort: - type: string - enum: - - "true" - - "false" - default: "false" - optional: true - description: If set to true, will use the ONNXRunTime training. - - # MLFlow Parameters - save_as_mlflow_model: - type: string - enum: - - "true" - - "false" - default: "true" - optional: true - description: If set to true, will save as mlflow model with pyfunc as flavour. - - -outputs: - # ########################### Finetuning Component ########################### # - mlflow_model_folder: - type: mlflow_model - description: Output dir to save the finetune model as mlflow model. - pytorch_model_folder: - type: custom_model - description: Output dir to save the finetune model as torch model. - -jobs: - multimodal_classification_model_import: - type: command - component: azureml:multimodal_classification_model_import:0.0.2 - compute: ${{parent.inputs.compute_model_import}} - inputs: - data_modalities: ${{parent.inputs.data_modalities}} - # pytorch_model_path: ${{parent.inputs.pytorch_model_path}} - mlflow_model_path: ${{parent.inputs.mlflow_model_path}} - - multimodal_classification_data_preprocess: - type: command - component: azureml:multimodal_classification_datapreprocessing:0.0.2 - compute: ${{parent.inputs.compute_preprocess}} - inputs: - problem_type: ${{parent.inputs.problem_type}} - label_column: ${{parent.inputs.label_column}} - image_column: ${{parent.inputs.image_column}} - drop_columns: ${{parent.inputs.drop_columns}} - numerical_columns_overrides: ${{parent.inputs.numerical_columns_overrides}} - categorical_columns_overrides: ${{parent.inputs.categorical_columns_overrides}} - text_columns_overrides: ${{parent.inputs.text_columns_overrides}} - training_data: ${{parent.inputs.training_data}} - validation_data: ${{parent.inputs.validation_data}} - model_selector_output: ${{parent.jobs.multimodal_classification_model_import.outputs.output_dir}} - - multimodal_classification_finetune: - type: command - component: azureml:multimodal_classification_finetune:0.0.2 - compute: ${{parent.inputs.compute_finetune}} - distribution: - type: pytorch - process_count_per_instance: ${{parent.inputs.process_count_per_instance}} - resources: - instance_count: ${{parent.inputs.instance_count}} - inputs: - problem_type: ${{parent.inputs.problem_type}} - number_of_epochs: ${{parent.inputs.number_of_epochs}} - max_steps: ${{parent.inputs.max_steps}} - training_batch_size: ${{parent.inputs.training_batch_size}} - validation_batch_size: ${{parent.inputs.validation_batch_size}} - auto_find_batch_size: ${{parent.inputs.auto_find_batch_size}} - optimizer: ${{parent.inputs.optimizer}} - learning_rate: ${{parent.inputs.learning_rate}} - warmup_steps: ${{parent.inputs.warmup_steps}} - weight_decay: ${{parent.inputs.weight_decay}} - adam_beta1: ${{parent.inputs.adam_beta1}} - adam_beta2: ${{parent.inputs.adam_beta2}} - adam_epsilon: ${{parent.inputs.adam_epsilon}} - gradient_accumulation_steps: ${{parent.inputs.gradient_accumulation_steps}} - learning_rate_scheduler: ${{parent.inputs.learning_rate_scheduler}} - precision: ${{parent.inputs.precision}} - random_seed: ${{parent.inputs.random_seed}} - evaluation_strategy: ${{parent.inputs.evaluation_strategy}} - evaluation_steps_interval: ${{parent.inputs.evaluation_steps_interval}} - evaluation_steps: ${{parent.inputs.evaluation_steps}} - logging_strategy: ${{parent.inputs.logging_strategy}} - logging_steps: ${{parent.inputs.logging_steps}} - primary_metric: ${{parent.inputs.primary_metric}} - resume_from_checkpoint: ${{parent.inputs.resume_from_checkpoint}} - save_total_limit: ${{parent.inputs.save_total_limit}} - apply_early_stopping: ${{parent.inputs.apply_early_stopping}} - early_stopping_patience: ${{parent.inputs.early_stopping_patience}} - early_stopping_threshold: ${{parent.inputs.early_stopping_threshold}} - apply_deepspeed: ${{parent.inputs.apply_deepspeed}} - deepspeed_config: ${{parent.inputs.deepspeed_config}} - apply_ort: ${{parent.inputs.apply_ort}} - save_as_mlflow_model: ${{parent.inputs.save_as_mlflow_model}} - preprocess_output: ${{parent.jobs.multimodal_classification_data_preprocess.outputs.output_dir}} - model_selector_output: ${{parent.jobs.multimodal_classification_model_import.outputs.output_dir}} - outputs: - pytorch_model_folder: ${{parent.outputs.pytorch_model_folder}} - mlflow_model_folder: ${{parent.outputs.mlflow_model_folder}} diff --git a/assets/training/finetune_acft_multimodal/components/preprocess/multimodal_classification/asset.yaml b/assets/training/finetune_acft_multimodal/components/preprocess/multimodal_classification/asset.yaml deleted file mode 100644 index 5b6586b9b2..0000000000 --- a/assets/training/finetune_acft_multimodal/components/preprocess/multimodal_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Foundational Models", "Finetune"] \ No newline at end of file diff --git a/assets/training/finetune_acft_multimodal/components/preprocess/multimodal_classification/spec.yaml b/assets/training/finetune_acft_multimodal/components/preprocess/multimodal_classification/spec.yaml deleted file mode 100644 index b9bb893f4e..0000000000 --- a/assets/training/finetune_acft_multimodal/components/preprocess/multimodal_classification/spec.yaml +++ /dev/null @@ -1,93 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -version: 0.0.3 -name: multimodal_classification_datapreprocessing -display_name: Multimodal Classification Data Preprocessing -description: Component to preprocess data for multimodal classification task - -is_deterministic: True - -environment: azureml://registries/azureml/environments/acft-multimodal-gpu/versions/20 - -code: ../../../src/preprocess - -inputs: - # Multimodal Classification task arguments - problem_type: - type: string - default: multimodal-classification - optional: false - enum: - - multimodal-classification-singlelabel - - multimodal-classification-multilabel - description: Specify whether its single-label or multi-label multimodal classification task. - - label_column: - type: string - optional: false - description: label column name - - image_column: - type: string - optional: false - description: Image column name - - drop_columns: - type: string - default: "" - optional: true - description: Set of columns to ignore. Provide string that has comma separated column names. - - numerical_columns_overrides: - type: string - default: "" - optional: true - description: columns to treat as numerical. Overrides automatic column purpose detection. - - categorical_columns_overrides: - type: string - default: "" - optional: true - description: columns to treat as categorical. Overrides automatic column purpose detection. - - text_columns_overrides: - type: string - default: "" - optional: true - description: columns to treat as text. Overrides automatic column purpose detection. - - # Inputs - training_data: - type: mltable - optional: false - description: Enter the train mltable path - - validation_data: - type: mltable - optional: false - description: Enter the validation mltable path - - model_selector_output: - type: uri_folder - optional: false - description: output folder of model selector containing model metadata like config, checkpoints, tokenizer config - -outputs: - output_dir: - type: uri_folder - description: folder to store preprocessed outputs of input data - -command: >- - python preprocess.py - --problem_type ${{inputs.problem_type}} - --label_column ${{inputs.label_column}} - --image_column ${{inputs.image_column}} - $[[--drop_columns "${{inputs.drop_columns}}"]] - $[[--numerical_columns_overrides "${{inputs.numerical_columns_overrides}}"]] - $[[--categorical_columns_overrides "${{inputs.categorical_columns_overrides}}"]] - $[[--text_columns_overrides "${{inputs.text_columns_overrides}}"]] - --train_mltable_path ${{inputs.training_data}} - --validation_mltable_path ${{inputs.validation_data}} - --model_selector_output ${{inputs.model_selector_output}} - --output_dir ${{outputs.output_dir}} diff --git a/assets/training/model_evaluation/components/compute_metrics/README.md b/assets/training/model_evaluation/components/compute_metrics/README.md deleted file mode 100644 index eaedd3f004..0000000000 --- a/assets/training/model_evaluation/components/compute_metrics/README.md +++ /dev/null @@ -1,37 +0,0 @@ -## Compute Metrics Component - -### Name - -compute_metrics - -### Version - -0.0.28 - -### Type - -command - -### Description - -This component enables user to evaluate a model by providing generated predictions and true values to return generated metrics. (Scores the predictions provided by user. No model is required in this case). - -## Inputs - - -| Name | Description | Type | Optional | -| ------------------ | ----------------------------------------------------------------------------------- | ------- | ------- | -| task | Task type for which model is trained | string | True | -| ground_truth | Actual ground truth to evaluate predictions against. The file should be of JSON lines format containing only one key. | uri_file | True | -| ground_truth_column_name | Column name which contains ground truths in provided uri file for ground_truths. | string | True | | -| prediction | Actual predictions which are to be evaluated. They should be in json lines too with only one key. | uri_file | True | | -| prediction_column_name | Column name which contains predictions in provided uri file for predictions. | string | True | | -| prediction_probabilites | Prediction probabilities in order to calculate better set of metrics for classification tasks. This file should be in JSON lines format as well with number of keys equals to number of unique labels. | uri_file | True | | -| evaluation_config | Additional config file required by metrics package. This data asset should contain a JSON Config file. | uri_file | True | | -| evaluation_config_params | JSON Serielized string of evaluation_config | string | True | - -## Outputs - -| Name | Description | Type | -| -------------------- | -------------------------------------------------------- | ------------ | -| evaluationResult | Output dir to save the finetune model and other metadata | uri_folder | diff --git a/assets/training/model_evaluation/components/compute_metrics/asset.yaml b/assets/training/model_evaluation/components/compute_metrics/asset.yaml deleted file mode 100644 index be37aa12b3..0000000000 --- a/assets/training/model_evaluation/components/compute_metrics/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Model Evaluation"] \ No newline at end of file diff --git a/assets/training/model_evaluation/components/compute_metrics/spec.yaml b/assets/training/model_evaluation/components/compute_metrics/spec.yaml deleted file mode 100644 index caeff6f5fd..0000000000 --- a/assets/training/model_evaluation/components/compute_metrics/spec.yaml +++ /dev/null @@ -1,94 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: compute_metrics -display_name: Compute Metrics -description: Calculate model performance metrics, given ground truth and prediction data. - -version: 0.0.36 -type: command -tags: - type: evaluation - sub_type: compute_metrics - -inputs: - task: - type: string - optional: false - default: tabular-classification - enum: [ - tabular-classification, - tabular-classification-multilabel, - tabular-regression, - tabular-forecasting, - text-classification, - text-classification-multilabel, - text-named-entity-recognition, - text-summarization, - question-answering, - text-translation, - text-generation, - fill-mask, - image-classification, - image-classification-multilabel, - chat-completion, - image-object-detection, - image-instance-segmentation, - ] - description: "Task type" - ground_truth: - type: uri_folder - optional: true - mode: ro_mount - description: "Ground Truths of Test Data as a 1-column JSON Lines file" - ground_truth_column_name: - type: string - optional: true - description: "Column name which contains ground truths in provided uri file for ground_truth. (Optional if we have one column name.)" - prediction: - type: uri_folder - optional: false - mode: ro_mount - description: "Model Predictions as a 1-column JSON Lines file" - prediction_column_name: - type: string - optional: true - description: "Column name which contains ground truths in provided uri file for prediction. (Optional if we have one column name.)" - prediction_probabilities: - type: uri_folder - optional: true - mode: ro_mount - description: "Predictions Probabilities as 1-column JSON Lines file" - evaluation_config: - type: uri_file - optional: true - mode: ro_mount - description: "Additional parameters required for evaluation." - evaluation_config_params: - type: string - optional: true - description: "JSON Serialized string of evaluation_config" - openai_config_params: - type: string - optional: true - description: "Required OpenAI Params for calculating GPT Based metrics for QnA task" - -outputs: - evaluation_result: - type: uri_folder - -is_deterministic: True -code: ../../src -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest - -command: >- - python download_metrics_dependencies.py && - python compute_metrics.py - --task '${{inputs.task}}' - $[[--ground_truths '${{inputs.ground_truth}}']] - --predictions '${{inputs.prediction}}' - --output '${{outputs.evaluation_result}}' - $[[--prediction_probabilities '${{inputs.prediction_probabilities}}']] - $[[--config-file-name '${{inputs.evaluation_config}}']] - $[[--ground_truths_column_name '${{inputs.ground_truth_column_name}}']] - $[[--predictions_column_name '${{inputs.prediction_column_name}}']] - $[[--config_str '${{inputs.evaluation_config_params}}']] - $[[--openai-config-params '${{inputs.openai_config_params}}']] diff --git a/assets/training/model_evaluation/components/evaluate_model/README.md b/assets/training/model_evaluation/components/evaluate_model/README.md deleted file mode 100644 index e8052f2d42..0000000000 --- a/assets/training/model_evaluation/components/evaluate_model/README.md +++ /dev/null @@ -1,40 +0,0 @@ -## Evaluate Model Component - -### Name - -evaluate_model - -### Version - -0.0.27 - -### Type - -command - -### Description - -This component enables user to evaluate a model by providing the supported model, run inference to generate predictions first followed by computing metrics against a dataset. You can find the component in your workspace components page. - -## Inputs - - -| Name | Description | Type | Optional | -| ------------------ | ----------------------------------------------------------------------------------- | ------- | ------- | -| task | Task type for which model is trained | string | True | -| test_data | Path to file containing test data in `jsonl` format | uri_file | True -| test_data_mltable | Test data in the form of mltables | ml_table | True -| test_data_input_column_names | Name of the columns in the test dataset that should be used for prediction. More than one columns should be separated by the comma(,) delimiter without any whitespaces in between | string | True -| test_data_label_column_name | Name of the key containing target values in test data. | string | True -| mlflow_model |MLFlow model (either registered or output of another job) | mlflow_model | True -| model_uri | MLFlow model uri of the form -
fetched from azureml run as `runs://run-relative/path/to/model`
fetched from azureml model registry as `models://` | string | True -| evaluation_config | Additional config file required by metrics package. This data asset should contain a JSON Config file. | uri_file | True | | -| evaluation_config_params | JSON Serielized string of evaluation_config | string | True | -| device | Option to run the experiment on CPU or GPU provided that the compute that they are choosing has Cuda support. | string | True -| batch_size | Option to run the experiment on batch support. | integer | True - -## Outputs - -| Name | Description | Type | -| -------------------- | -------------------------------------------------------- | ------------ | -| evaluationResult | Output dir to save the finetune model and other metadata | uri_folder | diff --git a/assets/training/model_evaluation/components/evaluate_model/asset.yaml b/assets/training/model_evaluation/components/evaluate_model/asset.yaml deleted file mode 100644 index be37aa12b3..0000000000 --- a/assets/training/model_evaluation/components/evaluate_model/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Model Evaluation"] \ No newline at end of file diff --git a/assets/training/model_evaluation/components/evaluate_model/spec.yaml b/assets/training/model_evaluation/components/evaluate_model/spec.yaml deleted file mode 100644 index dc31d4fd22..0000000000 --- a/assets/training/model_evaluation/components/evaluate_model/spec.yaml +++ /dev/null @@ -1,94 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: evaluate_model -display_name: Evaluate Model -description: Evaluate MLFlow models for supported task types. - -version: 0.0.19 -type: command -tags: - type: evaluation - sub_type: evaluate_model - -inputs: - task: - type: string - optional: false - default: tabular-classification - enum: [ - tabular-classification, - tabular-classification-multilabel, - tabular-regression, - tabular-forecasting, - text-classification, - text-classification-multilabel, - text-named-entity-recognition, - text-summarization, - question-answering, - text-translation, - fill-mask, - text-generation, - image-classification, - image-classification-multilabel, - chat-completion, - image-object-detection, - image-instance-segmentation, - ] - description: "Task type" - test_data: - type: uri_folder - optional: false - mode: ro_mount - description: "Test Data as JSON Lines URI_FILE" - evaluation_config: - type: uri_file - optional: true - mode: ro_mount - description: "Additional parameters required for evaluation." - test_data_label_column_name: - type: string - optional: false - description: Column name of target values - test_data_input_column_names: - type: string - optional: true - description: Comma separated values of feature column names - mlflow_model: - type: mlflow_model - optional: false - mode: ro_mount - description: "Mlflow Model - registered model or output of a job with type mlflow_model in a pipeline" - device: - type: string - optional: false - default: auto - enum: [auto, cpu, gpu] - batch_size: - type: integer - optional: true - evaluation_config_params: - type: string - optional: true - description: "JSON Serialized string of evaluation_config" - -outputs: - evaluation_result: - type: uri_folder - -is_deterministic: True -code: ../../src -environment: azureml://registries/azureml/environments/model-evaluation/versions/16 - -command: >- - python download_dependencies.py - --mlflow-model '${{inputs.mlflow_model}}' && - python evaluate_model.py - --task '${{inputs.task}}' - --output '${{outputs.evaluation_result}}' - --label-column-name '${{inputs.test_data_label_column_name}}' - --mlflow-model '${{inputs.mlflow_model}}' - --device '${{inputs.device}}' - --data '${{inputs.test_data}}' - $[[--input-column-names '${{inputs.test_data_input_column_names}}']] - $[[--config-file-name '${{inputs.evaluation_config}}']] - $[[--batch-size '${{inputs.batch_size}}']] - $[[--config_str '${{inputs.evaluation_config_params}}']] \ No newline at end of file diff --git a/assets/training/model_evaluation/components/pipeline_component/README.md b/assets/training/model_evaluation/components/pipeline_component/README.md deleted file mode 100644 index 579d1d2562..0000000000 --- a/assets/training/model_evaluation/components/pipeline_component/README.md +++ /dev/null @@ -1,38 +0,0 @@ -## Model Evaluation Pipeline Component - -### Name - -model_evaluation_pipeline_component - -### Version - -0.0.28 - -### Type - -pipeline - -### Description - -This pipeline component for model evaluation for supported tasks. Generates predictions on a given model, followed by computing model performance metrics to score the model quality for supported tasks. - -## Inputs - - -| Name | Description | Type | Optional | -| ------------------ | ----------------------------------------------------------------------------------- | ------- | ------- | -| task | Task type for which model is trained | string | True | -| test_data | Path to file containing test data in `jsonl` format | uri_file | True -| input_column_names | Name of the columns in the test dataset that should be used for prediction. More than one columns should be separated by the comma(,) delimiter without any whitespaces in between | string | True -| label_column_name | Name of the key containing target values in test data. | string | True -| mlflow_model |MLFlow model (either registered or output of another job) | mlflow_model | True -| evaluation_config | Additional config file required by metrics package. This data asset should contain a JSON Config file. | uri_file | True | | -| evaluation_config_params | JSON Serielized string of evaluation_config | string | True | -| device | Option to run the experiment on CPU or GPU provided that the compute that they are choosing has Cuda support. | string | True -| batch_size | Option to run the experiment on batch support. | integer | True - -## Outputs - -| Name | Description | Type | -| -------------------- | -------------------------------------------------------- | ------------ | -| evaluationResult | Output dir to save the finetune model and other metadata | uri_folder | diff --git a/assets/training/model_evaluation/components/pipeline_component/asset.yaml b/assets/training/model_evaluation/components/pipeline_component/asset.yaml deleted file mode 100644 index be37aa12b3..0000000000 --- a/assets/training/model_evaluation/components/pipeline_component/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Model Evaluation"] \ No newline at end of file diff --git a/assets/training/model_evaluation/components/pipeline_component/spec.yaml b/assets/training/model_evaluation/components/pipeline_component/spec.yaml deleted file mode 100644 index c450e7e74a..0000000000 --- a/assets/training/model_evaluation/components/pipeline_component/spec.yaml +++ /dev/null @@ -1,146 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json -name: model_evaluation_pipeline -version: 0.0.36 -type: pipeline -display_name: Model Evaluation Pipeline -description: Pipeline component for model evaluation for supported tasks. \ - Generates predictions on a given model, followed by computing model performance metrics to score the model quality for supported tasks. - -tags: - type: evaluation - sub_type: subgraph - -inputs: - compute_name: - type: string - default: serverless - instance_type: - type: string - default: STANDARD_NC24S_V3 - # model prediction - task: - type: string - default: tabular-classification - enum: [ - tabular-classification, - tabular-classification-multilabel, - tabular-regression, - text-classification, - text-classification-multilabel, - text-named-entity-recognition, - text-summarization, - question-answering, - text-translation, - text-generation, - fill-mask, - image-classification, - image-classification-multilabel, - chat-completion, - image-object-detection, - image-instance-segmentation, - ] - description: "Task type" - test_data: - type: uri_folder - optional: false - description: "Test Data" - mlflow_model: - type: mlflow_model - optional: false - description: "Mlflow Model (could be a registered model or part of another pipeline" - label_column_name: - type: string - optional: true - description: "Label column name in provided test dataset (Ex: label)" - input_column_names: - type: string - optional: true - description: "Input column names in provided test dataset (Ex : column1). Add comma delimited values in case of multiple input columns (Ex : column1,column2)" - device: - type: string - optional: false - default: auto - enum: [auto, cpu, gpu] - batch_size: - type: integer - optional: true - - # compute metrics - evaluation_config: - type: uri_file - optional: true - description: "Additional parameters required for evaluation. See How to create a config [here](https://microsoft.sharepoint.com/:f:/t/SDAutoML/EhDl9iADAR5MlCnlG4sy1NkBi5SfbdaZwKSFnUQD6ckeRg?e=cI7kaB)" - evaluation_config_params: - type: string - optional: true - description: "JSON Serialized string of evaluation_config" - openai_config_params: - type: string - optional: true - description: "Required OpenAI Params for calculating GPT Based metrics for QnA task" - -outputs: - evaluation_result: - type: uri_folder - description: Output dir to save the evaluation result - -jobs: - validation_trigger_model_evaluation: - type: command - component: azureml:validation_trigger_model_evaluation:0.0.36 - compute: '${{parent.inputs.compute_name}}' - resources: - instance_type: '${{parent.inputs.instance_type}}' - inputs: - compute_name: '${{parent.inputs.compute_name}}' - compute_instance_type: '${{parent.inputs.instance_type}}' - task: '${{parent.inputs.task}}' - test_data: '${{parent.inputs.test_data}}' - mlflow_model_path: '${{parent.inputs.mlflow_model}}' - label_column_name: '${{parent.inputs.label_column_name}}' - input_column_names: '${{parent.inputs.input_column_names}}' - device: '${{parent.inputs.device}}' - batch_size: '${{parent.inputs.batch_size}}' - evaluation_config: '${{parent.inputs.evaluation_config}}' - evaluation_config_params: '${{parent.inputs.evaluation_config_params}}' - - validation_succeeded: - type: if_else - condition: ${{parent.jobs.validation_trigger_model_evaluation.outputs.output}} - true_block: ${{parent.jobs.model_prediction}} - - model_prediction: - type: command - component: azureml:model_prediction:0.0.36 - compute: '${{parent.inputs.compute_name}}' - resources: - instance_type: '${{parent.inputs.instance_type}}' - inputs: - task: '${{parent.inputs.task}}' - test_data: '${{parent.inputs.test_data}}' - mlflow_model: '${{parent.inputs.mlflow_model}}' - label_column_name: '${{parent.inputs.label_column_name}}' - input_column_names: '${{parent.inputs.input_column_names}}' - device: '${{parent.inputs.device}}' - batch_size: '${{parent.inputs.batch_size}}' - evaluation_config: '${{parent.inputs.evaluation_config}}' - evaluation_config_params: '${{parent.inputs.evaluation_config_params}}' - - compute_metrics: - type: command - component: azureml:compute_metrics:0.0.36 - compute: '${{parent.inputs.compute_name}}' - resources: - instance_type: '${{parent.inputs.instance_type}}' - inputs: - task: '${{parent.inputs.task}}' - ground_truth: '${{parent.jobs.model_prediction.outputs.ground_truth}}' - ground_truth_column_name: '${{parent.inputs.label_column_name}}' - prediction: '${{parent.jobs.model_prediction.outputs.predictions}}' - prediction_column_name: predictions - prediction_probabilities: '${{parent.jobs.model_prediction.outputs.prediction_probabilities}}' - evaluation_config: '${{parent.inputs.evaluation_config}}' - evaluation_config_params: '${{parent.inputs.evaluation_config_params}}' - openai_config_params: '${{parent.inputs.openai_config_params}}' - outputs: - evaluation_result: '${{parent.outputs.evaluation_result}}' diff --git a/assets/training/model_evaluation/components/validation_trigger_model_evaluation/README.md b/assets/training/model_evaluation/components/validation_trigger_model_evaluation/README.md deleted file mode 100644 index 703213d3f4..0000000000 --- a/assets/training/model_evaluation/components/validation_trigger_model_evaluation/README.md +++ /dev/null @@ -1,38 +0,0 @@ -## Model Prediction Component - -### Name - -validation_trigger_model_evaluation - -### Version - -0.0.28 - -### Type - -command - -### Description - -This component which validates inputs given to model evaluation pipeline by user. - -## Inputs - - -| Name | Description | Type | Optional | -|--------------------| ----------------------------------------------------------------------------------- | ------- | ------- | -| task | Task type for which model is trained | string | True | -| test_data | Path to file containing test data in `jsonl` format | uri_file | True -| input_column_names | Name of the columns in the test dataset that should be used for prediction. More than one columns should be separated by the comma(,) delimiter without any whitespaces in between | string | True -| label_column_name | Name of the key containing target values in test data. | string | True -| mlflow_model_path |MLFlow model (either registered or output of another job) | mlflow_model | True -| device | Option to run the experiment on CPU or GPU provided that the compute that they are choosing has Cuda support. | string | True -| batch_size | Option to run the experiment on batch support. | integer | True -| evaluation_config | Additional config file required by metrics package. This data asset should contain a JSON Config file. | uri_file | True | | -| evaluation_config_params | JSON Serielized string of evaluation_config | string | True | - -## Outputs - -| Name | Description | Type | -| -------------------- | -------------------------------------------------------- | ------------ | -| validation_info | Validation status of the model evaluation pipeline inputs. | uri_file | \ No newline at end of file diff --git a/assets/training/model_evaluation/components/validation_trigger_model_evaluation/asset.yaml b/assets/training/model_evaluation/components/validation_trigger_model_evaluation/asset.yaml deleted file mode 100644 index f64312dfdc..0000000000 --- a/assets/training/model_evaluation/components/validation_trigger_model_evaluation/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Model Evaluation"] diff --git a/assets/training/model_evaluation/components/validation_trigger_model_evaluation/spec.yaml b/assets/training/model_evaluation/components/validation_trigger_model_evaluation/spec.yaml deleted file mode 100644 index dd3de1cc5d..0000000000 --- a/assets/training/model_evaluation/components/validation_trigger_model_evaluation/spec.yaml +++ /dev/null @@ -1,98 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -name: validation_trigger_model_evaluation -display_name: Validation Trigger Model Evaluation -description: Component for enabling validation of model evaluation pipeline. - -version: 0.0.36 -type: command -tags: - type: evaluation - sub_type: validation - Internal: "" - -# Pipeline inputs -inputs: - compute_name: - type: string - default: serverless - compute_instance_type: - type: string - default: STANDARD_NC24S_V3 - # model prediction - task: - type: string - default: tabular-classification - enum: [ - tabular-classification, - tabular-classification-multilabel, - tabular-regression, - text-classification, - text-classification-multilabel, - text-named-entity-recognition, - text-summarization, - question-answering, - text-translation, - text-generation, - fill-mask, - image-classification, - image-classification-multilabel, - image-object-detection, - image-instance-segmentation, - ] - description: "Task type" - test_data: - type: uri_folder - optional: false - description: "Test Data" - mlflow_model_path: - type: mlflow_model - optional: false - description: "Mlflow Model (could be a registered model or part of another pipeline" - label_column_name: - type: string - optional: true - description: "Label column name in provided test dataset (Ex: label)" - input_column_names: - type: string - optional: true - description: "Input column names in provided test dataset (Ex : column1). Add comma delimited values in case of multiple input columns (Ex : column1,column2)" - device: - type: string - optional: false - default: auto - enum: [auto, cpu, gpu] - batch_size: - type: integer - optional: true - - # compute metrics - evaluation_config: - type: uri_file - optional: true - description: "Additional parameters required for evaluation. See How to create a config [here](https://microsoft.sharepoint.com/:f:/t/SDAutoML/EhDl9iADAR5MlCnlG4sy1NkBi5SfbdaZwKSFnUQD6ckeRg?e=cI7kaB)" - evaluation_config_params: - type: string - optional: true - description: "JSON Serialized string of evaluation_config" - -outputs: - output: - type: boolean - is_control: true - -is_deterministic: True -environment: azureml://registries/azureml/environments/model-evaluation/labels/latest -code: ../../src - -command: > - mldesigner execute --source run_model_validate.py --name validate - --inputs task='${{inputs.task}}' - data='${{inputs.test_data}}' - mlflow_model='${{inputs.mlflow_model_path}}' - $[[label_column_name='${{inputs.label_column_name}}']] - $[[input_column_names='${{inputs.input_column_names}}']] - device='${{inputs.device}}' - $[[batch_size='${{inputs.batch_size}}']] - $[[config_file_name='${{inputs.evaluation_config}}']] - $[[config_str='${{inputs.evaluation_config_params}}']] - --outputs output='${{outputs.output}}' diff --git a/assets/training/model_management/components/download_model/asset.yaml b/assets/training/model_management/components/download_model/asset.yaml deleted file mode 100644 index 5d0befc5f8..0000000000 --- a/assets/training/model_management/components/download_model/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["Models"] \ No newline at end of file diff --git a/assets/training/model_management/components/download_model/spec.yaml b/assets/training/model_management/components/download_model/spec.yaml deleted file mode 100644 index 16dce86729..0000000000 --- a/assets/training/model_management/components/download_model/spec.yaml +++ /dev/null @@ -1,67 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json - -name: download_model -version: 0.0.31 -type: command - -is_deterministic: True - -display_name: Download model -description: Downloads a publicly available model - -environment: azureml://registries/azureml/environments/model-management/versions/41 - -code: ../../src/ -command: > - python run_model_download.py - --model-source ${{inputs.model_source}} - --model-id '${{inputs.model_id}}' - $[[--update-existing-model ${{inputs.update_existing_model}}]] - $[[--validation-info ${{inputs.validation_info}}]] - $[[--token ${{inputs.token}}]] - --model-download-metadata ${{outputs.model_download_metadata}} - --model-output-dir ${{outputs.model_output}} - -inputs: - model_source: - type: string - description: Storage containers from where model will be sourced from. - default: Huggingface - enum: - - AzureBlob - - GIT - - Huggingface - - model_id: - type: string - description: A valid model id for the model source selected. For example you can specify `bert-base-uncased` for importing HuggingFace bert base uncased model. Please specify the complete URL if **GIT** or **AzureBlob** is selected in `model_source` - - validation_info: - type: uri_file - description: Path to the validation info file - optional: true - - update_existing_model: - type: boolean - default: false - description: If set to true, will update the existing model. If set to false, will create a new model. - optional: true - - token: - type: string - description: If set use it to access the private models or authenticate the user. For example, user can get the token for HF private model by creating account in Huggingface, accept the condition for models that needs to be downloaded and create access token from browser. For more details please visit - https://huggingface.co/docs/hub/security-tokens - optional: true - -outputs: - model_download_metadata: - type: uri_file - description: File name to which model download details will be written. File would contain details that could be useful for model registration in forms of model tags and properties - - model_output: - type: uri_folder - description: Path to the dowloaded model - mode: rw_mount - -tags: - Preview: "" - diff --git a/assets/training/vision/components/image_classification/asset.yaml b/assets/training/vision/components/image_classification/asset.yaml deleted file mode 100644 index 3741c29da5..0000000000 --- a/assets/training/vision/components/image_classification/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML Image training"] diff --git a/assets/training/vision/components/image_classification/spec.yaml b/assets/training/vision/components/image_classification/spec.yaml deleted file mode 100644 index 7c140f2365..0000000000 --- a/assets/training/vision/components/image_classification/spec.yaml +++ /dev/null @@ -1,231 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -description: Component to finetune AutoML legacy models for image classification. - -name: train_image_classification_model -display_name: Image Classification AutoML Legacy Model Finetune -version: 0.0.13 - -is_deterministic: false - -inputs: - training_data: - type: mltable - description: Path to MLTable for training data. - validation_data: - type: mltable - optional: true - description: Path to MLTable for validation data. - task_type: - description: Whether a single image can have multiple labels. - type: string - enum: ['image-classification', 'image-classification-multilabel'] - ams_gradient: - description: Enable ams_gradient when optimizer is adam or adamw. - type: boolean - optional: true - beta1: - description: Value of beta1 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - beta2: - description: Value of beta2 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - checkpoint_frequency: - description: Frequency to store model checkpoints. Must be a positive integer. - type: integer - optional: true - min: 0 - checkpoint_run_id: - description: The run ID of the experiment that has a pretrained checkpoint for incremental training. - type: string - optional: true - early_stopping: - description: Enable early stopping logic during training. - type: boolean - optional: true - early_stopping_patience: - description: Minimum number of epochs or validation evaluations with no primary metric improvement before the run is stopped. Must be a positive integer. - type: integer - optional: true - min: 1 - early_stopping_delay: - description: Minimum number of epochs or validation evaluations to wait before primary metric improvement is tracked for early stopping. Must be a positive integer. - type: integer - optional: true - min: 1 - evaluation_frequency: - description: Frequency to evaluate validation dataset to get metric scores. Must be a positive integer. - type: integer - optional: true - min: 1 - gradient_accumulation_step: - description: Number of forward passes without updating the model weights while accumulating the gradients of those steps, and then using the accumulated gradients to compute the weight updates. Must be a positive integer. - type: integer - optional: true - min: 1 - layers_to_freeze: - description: How many layers to freeze for your model. For instance, passing 2 as value for seresnext means freezing layer0 and layer1 referring to the below supported model layer info. Must be a positive integer. - type: integer - optional: true - min: 1 - learning_rate: - description: Initial learning rate. - type: number - optional: true - min: 0 - max: 1 - learning_rate_scheduler: - description: Type of learning rate scheduler. Must be warmup_cosine or step. - type: string - optional: true - default: warmup_cosine - enum: ['warmup_cosine', 'step'] - model_name: - type: string - description: Model name - optional: true - momentum: - description: Value of momentum when optimizer is sgd. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - nesterov: - description: Enable nesterov when optimizer is sgd. - type: boolean - optional: true - number_of_epochs: - description: Number of training epochs - type: integer - optional: true - min: 1 - number_of_workers: - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - type: integer - optional: true - optimizer: - description: Type of optimizer - type: string - optional: true - default: sgd - enum: ['sgd', 'adam', 'adamw'] - random_seed: - description: Random seed that will be set at the beginning of training. - type: integer - optional: true - step_lr_gamma: - description: Value of gamma when learning rate scheduler is step. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: number - optional: true - min: 0 - max: 1 - step_lr_step_size: - description: Value of step size when learning rate scheduler is step. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: integer - optional: true - min: 0 - training_batch_size: - description: Training batch size. - type: integer - optional: true - min: 1 - training_crop_size: - description: Image crop size that's input to your neural network for training dataset. Notes - seresnext doesn't take an arbitrary size. ViT-variants should have the same validation_crop_size and training_crop_size. - type: integer - optional: true - min: 1 - validation_batch_size: - description: Validation batch size. - type: integer - optional: true - min: 1 - validation_crop_size: - description: Image crop size that's input to your neural network for validation dataset. Note - seresnext doesn't take an arbitrary size. ViT-variants should have the same validation_crop_size and training_crop_size. - type: integer - optional: true - min: 1 - validation_resize_size: - description: Image size to which to resize before cropping for validation dataset. Note - seresnext doesn't take an arbitrary size. - type: integer - optional: true - min: 1 - warmup_cosine_lr_cycles: - description: Value of cosine cycle when learning rate scheduler is warmup_cosine. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: number - optional: true - min: 0 - max: 1 - warmup_cosine_lr_warmup_epochs: - description: Value of warmup epochs when learning rate scheduler is warmup_cosine. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: integer - optional: true - min: 0 - weight_decay: - description: Value of weight decay used by the optimizer. - type: number - optional: true - min: 0 - max: 1 - weighted_loss: - description: 0 for no weighted loss, 1 for weighted loss with sqrt (class_weights), 2 for weighted loss with class_weights. - type: integer - optional: true - -outputs: - mlflow_model_folder: - description: Trained MLFlow model. - type: mlflow_model - pytorch_model_folder: - description: Trained Pytorch model. - type: custom_model - -code: ../src - -environment: azureml://registries/azureml/environments/automl-dnn-vision-gpu/labels/latest - -command: >- - python -m image_classification.run - --training_data ${{inputs.training_data}} - $[[--validation_data ${{inputs.validation_data}}]] - --task_type ${{inputs.task_type}} - $[[--amsgrad ${{inputs.ams_gradient}}]] - $[[--beta1 ${{inputs.beta1}}]] - $[[--beta2 ${{inputs.beta2}}]] - $[[--checkpoint_frequency ${{inputs.checkpoint_frequency}}]] - $[[--checkpoint_run_id ${{inputs.checkpoint_run_id}}]] - $[[--early_stopping ${{inputs.early_stopping}}]] - $[[--early_stopping_patience ${{inputs.early_stopping_patience}}]] - $[[--early_stopping_delay ${{inputs.early_stopping_delay}}]] - $[[--evaluation_frequency ${{inputs.evaluation_frequency}}]] - $[[--grad_accumulation_step ${{inputs.gradient_accumulation_step}}]] - $[[--layers_to_freeze ${{inputs.layers_to_freeze}}]] - $[[--learning_rate ${{inputs.learning_rate}}]] - $[[--lr_scheduler ${{inputs.learning_rate_scheduler}}]] - $[[--model_name ${{inputs.model_name}}]] - $[[--momentum ${{inputs.momentum}}]] - $[[--nesterov ${{inputs.nesterov}}]] - $[[--number_of_epochs ${{inputs.number_of_epochs}}]] - $[[--number_of_workers ${{inputs.number_of_workers}}]] - $[[--optimizer ${{inputs.optimizer}}]] - $[[--random_seed ${{inputs.random_seed}}]] - $[[--step_lr_gamma ${{inputs.step_lr_gamma}}]] - $[[--step_lr_step_size ${{inputs.step_lr_step_size}}]] - $[[--training_batch_size ${{inputs.training_batch_size}}]] - $[[--train_crop_size ${{inputs.training_crop_size}}]] - $[[--validation_batch_size ${{inputs.validation_batch_size}}]] - $[[--valid_crop_size ${{inputs.validation_crop_size}}]] - $[[--valid_resize_size ${{inputs.validation_resize_size}}]] - $[[--weight_decay ${{inputs.weight_decay}}]] - $[[--weighted_loss ${{inputs.weighted_loss}}]] - --mlflow_model_output ${{outputs.mlflow_model_folder}} - --pytorch_model_output ${{outputs.pytorch_model_folder}} - -distribution: - type: pytorch diff --git a/assets/training/vision/components/instance_segmentation/asset.yaml b/assets/training/vision/components/instance_segmentation/asset.yaml deleted file mode 100644 index 3741c29da5..0000000000 --- a/assets/training/vision/components/instance_segmentation/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML Image training"] diff --git a/assets/training/vision/components/instance_segmentation/spec.yaml b/assets/training/vision/components/instance_segmentation/spec.yaml deleted file mode 100644 index 9c4f9a4369..0000000000 --- a/assets/training/vision/components/instance_segmentation/spec.yaml +++ /dev/null @@ -1,270 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -description: Component to finetune AutoML legacy models for instance segmentation. - -name: train_instance_segmentation_model -display_name: Image Instance Segmentation AutoML Legacy Model Finetune -version: 0.0.13 - -is_deterministic: false - -inputs: - training_data: - description: Path to MLTable for training data. - type: mltable - validation_data: - description: Path to MLTable for validation data. - type: mltable - optional: true - ams_gradient: - description: Enable ams_gradient when optimizer is adam or adamw. - type: boolean - optional: true - beta1: - description: Value of beta1 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - beta2: - description: Value of beta2 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - box_detections_per_image: - description: Maximum number of detections per image, for all classes. Must be a positive integer. - type: integer - optional: true - min: 1 - box_score_threshold: - description: During inference, only return proposals with a classification score greater than box_score_threshold. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - checkpoint_frequency: - description: Frequency to store model checkpoints. Must be a positive integer. - type: integer - optional: true - min: 0 - checkpoint_run_id: - description: The run ID of the experiment that has a pretrained checkpoint for incremental training. - type: string - optional: true - early_stopping: - description: Enable early stopping logic during training. - type: boolean - optional: true - early_stopping_patience: - description: Minimum number of epochs or validation evaluations with no primary metric improvement before the run is stopped. Must be a positive integer. - type: integer - optional: true - min: 1 - early_stopping_delay: - description: Minimum number of epochs or validation evaluations to wait before primary metric improvement is tracked for early stopping. Must be a positive integer. - type: integer - optional: true - min: 1 - evaluation_frequency: - description: Frequency to evaluate validation dataset to get metric scores. Must be a positive integer. - type: integer - optional: true - min: 1 - gradient_accumulation_step: - description: Number of forward passes without updating the model weights while accumulating the gradients of those steps, and then using the accumulated gradients to compute the weight updates. Must be a positive integer. - type: integer - optional: true - min: 1 - layers_to_freeze: - description: How many layers to freeze for your model. For instance, passing 2 as value for seresnext means freezing layer0 and layer1 referring to the below supported model layer info. Must be a positive integer. - type: integer - optional: true - min: 1 - learning_rate: - description: Initial learning rate - type: number - optional: true - min: 0 - max: 1 - learning_rate_scheduler: - description: Type of learning rate scheduler. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: string - optional: true - default: warmup_cosine - enum: ['warmup_cosine', 'step'] - max_size: - description: Maximum size of the image to be rescaled before feeding it to the backbone. - type: integer - optional: true - min: 1 - min_size: - description: Minimum size of the image to be rescaled before feeding it to the backbone. Must be a positive integer. - type: integer - optional: true - min: 1 - model_name: - description: Model name - type: string - optional: true - momentum: - description: Value of momentum when optimizer is sgd. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - nesterov: - description: Enable nesterov when optimizer is sgd. - type: boolean - optional: true - nms_iou_threshold: - description: IOU threshold used during inference in non-maximum suppression post processing. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - number_of_epochs: - description: Number of training epochs - type: integer - optional: true - min: 1 - number_of_workers: - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - type: integer - optional: true - optimizer: - description: Type of optimizer - type: string - optional: true - default: sgd - enum: ['sgd', 'adam', 'adamw'] - random_seed: - description: Random seed that will be set at the beginning of training. - type: integer - optional: true - step_lr_gamma: - description: Value of gamma when learning rate scheduler is step. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: number - optional: true - min: 0 - max: 1 - step_lr_step_size: - description: Value of step size when learning rate scheduler is step. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: integer - optional: true - min: 0 - tile_grid_size: - description: The grid size to use for tiling each image. Should be passed as a string in '3x2' format. Example --tile_grid_size '3x2' - type: string - optional: true - tile_overlap_ratio: - description: Overlap ratio between adjacent tiles in each dimension. Must be float in the range of [0, 1). - type: number - optional: true - min: 0 - max: 1 - tile_predictions_nms_threshold: - description: The IOU threshold to use to perform NMS while merging predictions from tiles and image. Used in validation/ inference. Must be float in the range of [0, 1]. - type: number - optional: true - min: 0 - max: 1 - training_batch_size: - description: Training batch size. - type: integer - optional: true - min: 1 - validation_batch_size: - description: Validation batch size. - type: integer - optional: true - min: 1 - validation_iou_threshold: - description: IOU threshold for box matching when computing validation metrics. Must be a float in the range [0.1, 1]. - type: number - optional: true - min: 0.1 - max: 1 - validation_metric_type: - description: Metric computation method to use for validation metrics. Must be none, coco, voc, or coco_voc. - type: string - optional: true - default: 'voc' - enum: ['none', 'coco', 'voc', 'coco_voc'] - warmup_cosine_lr_cycles: - description: Value of cosine cycle when learning rate scheduler is warmup_cosine. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: number - optional: true - min: 0 - max: 1 - warmup_cosine_lr_warmup_epochs: - description: Value of warmup epochs when learning rate scheduler is warmup_cosine. Please check for https://learn.microsoft.com/azure/machine-learning/reference-automl-images-hyperparameters more information. - type: integer - optional: true - min: 0 - weight_decay: - description: Value of weight decay used by the optimizer. - type: number - optional: true - min: 0 - max: 1 - -outputs: - mlflow_model_folder: - description: Trained MLFlow model. - type: mlflow_model - pytorch_model_folder: - description: Trained Pytorch model. - type: custom_model - -code: ../src - -environment: azureml://registries/azureml/environments/automl-dnn-vision-gpu/labels/latest - -command: >- - python -m instance_segmentation.run - --training_data ${{inputs.training_data}} - $[[--validation_data ${{inputs.validation_data}}]] - $[[--amsgrad ${{inputs.ams_gradient}}]] - $[[--beta1 ${{inputs.beta1}}]] - $[[--beta2 ${{inputs.beta2}}]] - $[[--box_detections_per_img ${{inputs.box_detections_per_image}}]] - $[[--box_score_thresh ${{inputs.box_score_threshold}}]] - $[[--checkpoint_frequency ${{inputs.checkpoint_frequency}}]] - $[[--checkpoint_run_id ${{inputs.checkpoint_run_id}}]] - $[[--early_stopping ${{inputs.early_stopping}}]] - $[[--early_stopping_patience ${{inputs.early_stopping_patience}}]] - $[[--early_stopping_delay ${{inputs.early_stopping_delay}}]] - $[[--evaluation_frequency ${{inputs.evaluation_frequency}}]] - $[[--grad_accumulation_step ${{inputs.gradient_accumulation_step}}]] - $[[--layers_to_freeze ${{inputs.layers_to_freeze}}]] - $[[--learning_rate ${{inputs.learning_rate}}]] - $[[--lr_scheduler ${{inputs.learning_rate_scheduler}}]] - $[[--max_size ${{inputs.max_size}}]] - $[[--min_size ${{inputs.min_size}}]] - $[[--model_name ${{inputs.model_name}}]] - $[[--momentum ${{inputs.momentum}}]] - $[[--nesterov ${{inputs.nesterov}}]] - $[[--nms_iou_thresh ${{inputs.nms_iou_threshold}}]] - $[[--number_of_epochs ${{inputs.number_of_epochs}}]] - $[[--number_of_workers ${{inputs.number_of_workers}}]] - $[[--optimizer ${{inputs.optimizer}}]] - $[[--random_seed ${{inputs.random_seed}}]] - $[[--step_lr_gamma ${{inputs.step_lr_gamma}}]] - $[[--step_lr_step_size ${{inputs.step_lr_step_size}}]] - $[[--tile_grid_size ${{inputs.tile_grid_size}}]] - $[[--tile_overlap_ratio ${{inputs.tile_overlap_ratio}}]] - $[[--tile_predictions_nms_thresh ${{inputs.tile_predictions_nms_threshold}}]] - $[[--training_batch_size ${{inputs.training_batch_size}}]] - $[[--validation_batch_size ${{inputs.validation_batch_size}}]] - $[[--validation_iou_threshold ${{inputs.validation_iou_threshold}}]] - $[[--validation_metric_type ${{inputs.validation_metric_type}}]] - $[[--warmup_cosine_lr_cycles ${{inputs.warmup_cosine_lr_cycles}}]] - $[[--warmup_cosine_lr_warmup_epochs ${{inputs.warmup_cosine_lr_warmup_epochs}}]] - $[[--weight_decay ${{inputs.weight_decay}}]] - --mlflow_model_output ${{outputs.mlflow_model_folder}} - --pytorch_model_output ${{outputs.pytorch_model_folder}} - -distribution: - type: pytorch diff --git a/assets/training/vision/components/object_detection/asset.yaml b/assets/training/vision/components/object_detection/asset.yaml deleted file mode 100644 index 3741c29da5..0000000000 --- a/assets/training/vision/components/object_detection/asset.yaml +++ /dev/null @@ -1,3 +0,0 @@ -type: component -spec: spec.yaml -categories: ["AutoML Image training"] diff --git a/assets/training/vision/components/object_detection/spec.yaml b/assets/training/vision/components/object_detection/spec.yaml deleted file mode 100644 index dcb181a978..0000000000 --- a/assets/training/vision/components/object_detection/spec.yaml +++ /dev/null @@ -1,282 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json -type: command - -description: Component to finetune AutoML legacy models for object detection. - -name: train_object_detection_model -display_name: Image Object Detection AutoML Legacy Model Finetune -version: 0.0.13 - -is_deterministic: false - -inputs: - training_data: - description: Path to MLTable for training data. - type: mltable - validation_data: - description: Path to MLTable for validation data. - type: mltable - optional: true - ams_gradient: - description: Enable ams_gradient when optimizer is adam or adamw. - type: boolean - optional: true - beta1: - description: Value of beta1 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - beta2: - description: Value of beta2 when optimizer is adam or adamw. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - box_detections_per_image: - description: Maximum number of detections per image, for all classes. Must be a positive integer. - type: integer - optional: true - min: 1 - box_score_threshold: - description: During inference, only return proposals with a classification score greater than box_score_threshold. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - checkpoint_frequency: - description: Frequency to store model checkpoints. Must be a positive integer. - type: integer - optional: true - min: 0 - checkpoint_run_id: - description: The run ID of the experiment that has a pretrained checkpoint for incremental training. - type: string - optional: true - early_stopping: - description: Enable early stopping logic during training. - type: boolean - optional: true - early_stopping_patience: - description: Minimum number of epochs or validation evaluations with no primary metric improvement before the run is stopped. Must be a positive integer. - type: integer - optional: true - min: 1 - early_stopping_delay: - description: Minimum number of epochs or validation evaluations to wait before primary metric improvement is tracked for early stopping. Must be a positive integer. - type: integer - optional: true - min: 1 - evaluation_frequency: - description: Frequency to evaluate validation dataset to get metric scores. Must be a positive integer. - type: integer - optional: true - min: 1 - gradient_accumulation_step: - description: Number of forward passes without updating the model weights while accumulating the gradients of those steps, and then using the accumulated gradients to compute the weight updates. Must be a positive integer. - type: integer - optional: true - min: 1 - layers_to_freeze: - description: How many layers to freeze for your model. For instance, passing 2 as value for seresnext means freezing layer0 and layer1 referring to the below supported model layer info. Must be a positive integer. - type: integer - optional: true - min: 1 - learning_rate: - description: Initial learning rate - type: number - optional: true - min: 0 - max: 1 - learning_rate_scheduler: - description: Type of learning rate scheduler. Must be warmup_cosine or step. - type: string - optional: true - default: warmup_cosine - enum: ['warmup_cosine', 'step'] - max_size: - description: Maximum size of the image to be rescaled before feeding it to the backbone. - type: integer - optional: true - min: 1 - min_size: - description: Minimum size of the image to be rescaled before feeding it to the backbone. Must be a positive integer. - type: integer - optional: true - min: 1 - model_name: - description: Model name - type: string - optional: true - momentum: - description: Value of momentum when optimizer is sgd. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - model_size: - description: Model size for yolov5. - type: string - optional: true - default: medium - enum: ['small', 'medium', 'large', 'xlarge'] - multi_scale: - description: Enable multi-scale image by varying image size by +/- 50%. - type: boolean - optional: true - nesterov: - description: Enable nesterov when optimizer is sgd. - type: boolean - optional: true - nms_iou_threshold: - description: IOU threshold used during inference in non-maximum suppression post processing. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - number_of_epochs: - description: Number of training epochs - type: integer - optional: true - min: 1 - number_of_workers: - description: Number of subprocesses to use for data loading (PyTorch only). 0 means that the data will be loaded in the main process. - type: integer - optional: true - optimizer: - description: Type of optimizer - type: string - optional: true - default: sgd - enum: ['sgd', 'adam', 'adamw'] - random_seed: - description: Random seed that will be set at the beginning of training. - type: integer - optional: true - step_lr_gamma: - description: Value of gamma when learning rate scheduler is step. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - step_lr_step_size: - description: Value of step size when learning rate scheduler is step. Must be a positive integer. - type: integer - optional: true - min: 0 - tile_grid_size: - description: The grid size to use for tiling each image. Should be passed as a string in '3x2' format. Example --tile_grid_size '3x2'. For more information please visit https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-automl-small-object-detect?tabs=CLI-v2. - type: string - optional: true - tile_overlap_ratio: - description: Overlap ratio between adjacent tiles in each dimension. Must be float in the range of [0, 1). - type: number - optional: true - min: 0 - max: 1 - tile_predictions_nms_threshold: - description: The IOU threshold to use to perform NMS while merging predictions from tiles and image. Used in validation/ inference. Must be float in the range of [0, 1]. - type: number - optional: true - min: 0 - max: 1 - training_batch_size: - description: Training batch size. - type: integer - optional: true - min: 1 - validation_batch_size: - description: Validation batch size. - type: integer - optional: true - min: 1 - validation_iou_threshold: - description: IOU threshold for box matching when computing validation metrics. Must be a float in the range [0.1, 1]. - type: number - optional: true - min: 0.1 - max: 1 - validation_metric_type: - description: Metric computation method to use for validation metrics. Must be none, coco, voc, or coco_voc. - type: string - optional: true - default: 'voc' - enum: ['none', 'coco', 'voc', 'coco_voc'] - warmup_cosine_lr_cycles: - description: Value of cosine cycle when learning rate scheduler is warmup_cosine. Must be a float in the range [0, 1]. - type: number - optional: true - min: 0 - max: 1 - warmup_cosine_lr_warmup_epochs: - description: Value of warmup epochs when learning rate scheduler is warmup_cosine. Must be a positive integer. - type: integer - optional: true - min: 0 - weight_decay: - description: Value of weight decay used by the optimizer. - type: number - optional: true - min: 0 - max: 1 - -outputs: - mlflow_model_folder: - description: Trained MLFlow model. - type: mlflow_model - pytorch_model_folder: - description: Trained Pytorch model. - type: custom_model - -code: ../src - -environment: azureml://registries/azureml/environments/automl-dnn-vision-gpu/labels/latest - -command: >- - python -m object_detection.run - --training_data ${{inputs.training_data}} - $[[--validation_data ${{inputs.validation_data}}]] - $[[--amsgrad ${{inputs.ams_gradient}}]] - $[[--beta1 ${{inputs.beta1}}]] - $[[--beta2 ${{inputs.beta2}}]] - $[[--box_detections_per_img ${{inputs.box_detections_per_image}}]] - $[[--box_score_thresh ${{inputs.box_score_threshold}}]] - $[[--checkpoint_frequency ${{inputs.checkpoint_frequency}}]] - $[[--checkpoint_run_id ${{inputs.checkpoint_run_id}}]] - $[[--early_stopping ${{inputs.early_stopping}}]] - $[[--early_stopping_patience ${{inputs.early_stopping_patience}}]] - $[[--early_stopping_delay ${{inputs.early_stopping_delay}}]] - $[[--evaluation_frequency ${{inputs.evaluation_frequency}}]] - $[[--grad_accumulation_step ${{inputs.gradient_accumulation_step}}]] - $[[--layers_to_freeze ${{inputs.layers_to_freeze}}]] - $[[--learning_rate ${{inputs.learning_rate}}]] - $[[--lr_scheduler ${{inputs.learning_rate_scheduler}}]] - $[[--max_size ${{inputs.max_size}}]] - $[[--min_size ${{inputs.min_size}}]] - $[[--model_name ${{inputs.model_name}}]] - $[[--model_size ${{inputs.model_size}}]] - $[[--momentum ${{inputs.momentum}}]] - $[[--multi_scale ${{inputs.multi_scale}}]] - $[[--nesterov ${{inputs.nesterov}}]] - $[[--nms_iou_thresh ${{inputs.nms_iou_threshold}}]] - $[[--number_of_epochs ${{inputs.number_of_epochs}}]] - $[[--number_of_workers ${{inputs.number_of_workers}}]] - $[[--optimizer ${{inputs.optimizer}}]] - $[[--random_seed ${{inputs.random_seed}}]] - $[[--step_lr_gamma ${{inputs.step_lr_gamma}}]] - $[[--step_lr_step_size ${{inputs.step_lr_step_size}}]] - $[[--tile_grid_size ${{inputs.tile_grid_size}}]] - $[[--tile_overlap_ratio ${{inputs.tile_overlap_ratio}}]] - $[[--tile_predictions_nms_thresh ${{inputs.tile_predictions_nms_threshold}}]] - $[[--training_batch_size ${{inputs.training_batch_size}}]] - $[[--validation_batch_size ${{inputs.validation_batch_size}}]] - $[[--validation_iou_threshold ${{inputs.validation_iou_threshold}}]] - $[[--validation_metric_type ${{inputs.validation_metric_type}}]] - $[[--warmup_cosine_lr_cycles ${{inputs.warmup_cosine_lr_cycles}}]] - $[[--warmup_cosine_lr_warmup_epochs ${{inputs.warmup_cosine_lr_warmup_epochs}}]] - $[[--weight_decay ${{inputs.weight_decay}}]] - --mlflow_model_output ${{outputs.mlflow_model_folder}} - --pytorch_model_output ${{outputs.pytorch_model_folder}} - -distribution: - type: pytorch