Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# basic setup
FROM mcr.microsoft.com/devcontainers/python:3.8
FROM mcr.microsoft.com/devcontainers/python:3.10
RUN apt-get update && apt-get -y update
RUN apt-get install -y sudo git npm

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ FLAML has a .NET implementation in [ML.NET](http://dot.net/ml), an open-source,

## Installation

FLAML requires **Python version >= 3.8**. It can be installed from pip:
FLAML requires **Python version >= 3.9**. It can be installed from pip:

```bash
pip install flaml
Expand Down
58 changes: 47 additions & 11 deletions flaml/automl/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import random
import sys
import time
from concurrent.futures import as_completed
from functools import partial
from typing import Callable, List, Optional, Union

Expand Down Expand Up @@ -187,7 +188,8 @@ def custom_metric(
mem_thres: A float of the memory size constraint in bytes.
pred_time_limit: A float of the prediction latency constraint in seconds.
It refers to the average prediction time per row in validation data.
train_time_limit: A float of the training time constraint in seconds.
train_time_limit: None or a float of the training time constraint in seconds for each trial.
Only valid for sequential search.
verbose: int, default=3 | Controls the verbosity, higher means more
messages.
retrain_full: bool or str, default=True | whether to retrain the
Expand Down Expand Up @@ -1334,7 +1336,8 @@ def custom_metric(
mem_thres: A float of the memory size constraint in bytes.
pred_time_limit: A float of the prediction latency constraint in seconds.
It refers to the average prediction time per row in validation data.
train_time_limit: None or a float of the training time constraint in seconds.
train_time_limit: None or a float of the training time constraint in seconds for each trial.
Only valid for sequential search.
X_val: None or a numpy array or a pandas dataframe of validation data.
y_val: None or a numpy array or a pandas series of validation labels.
sample_weight_val: None or a numpy array of the sample weight of
Expand Down Expand Up @@ -1625,6 +1628,13 @@ def cv_score_agg_func(val_loss_folds, log_metrics_folds):
_ch.setFormatter(logger_formatter)
logger.addHandler(_ch)

if model_history:
logger.warning(
"With `model_history` set to `True` by default, all intermediate models are retained in memory, "
"which may significantly increase memory usage and slow down training. "
"Consider setting `model_history=False` to optimize memory and accelerate the training process."
)

if not use_ray and not use_spark and n_concurrent_trials > 1:
if ray_available:
logger.warning(
Expand Down Expand Up @@ -2717,16 +2727,42 @@ def _search(self):
):
if mlflow.active_run() is None:
mlflow.start_run(run_id=self.mlflow_integration.parent_run_id)
self.mlflow_integration.log_model(
self._trained_estimator.model,
self.best_estimator,
signature=self.estimator_signature,
)
self.mlflow_integration.pickle_and_log_automl_artifacts(
self, self.model, self.best_estimator, signature=self.pipeline_signature
)
if self.best_estimator.endswith("_spark"):
self.mlflow_integration.log_model(
self._trained_estimator.model,
self.best_estimator,
signature=self.estimator_signature,
run_id=self.mlflow_integration.parent_run_id,
)
else:
self.mlflow_integration.pickle_and_log_automl_artifacts(
self,
self.model,
self.best_estimator,
signature=self.pipeline_signature,
run_id=self.mlflow_integration.parent_run_id,
)
else:
logger.info("not retraining because the time budget is too small.")
logger.warning("not retraining because the time budget is too small.")
if self.mlflow_integration is not None:
logger.debug("Collecting results from submitted record_state tasks")
t1 = time.perf_counter()
for future in as_completed(self.mlflow_integration.futures):
_task = self.mlflow_integration.futures[future]
try:
result = future.result()
logger.debug(f"Result for record_state task {_task}: {result}")
except Exception as e:
logger.warning(f"Exception for record_state task {_task}: {e}")
for future in as_completed(self.mlflow_integration.futures_log_model):
_task = self.mlflow_integration.futures_log_model[future]
try:
result = future.result()
logger.debug(f"Result for log_model task {_task}: {result}")
except Exception as e:
logger.warning(f"Exception for log_model task {_task}: {e}")
t2 = time.perf_counter()
logger.debug(f"Collecting results from tasks submitted to executors costs {t2-t1} seconds.")

def __del__(self):
if (
Expand Down
Loading
Loading