microsoft · thinkall · Jan 9, 2026 · Dec 23, 2025 · Dec 23, 2025 · Dec 23, 2025
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -22,8 +22,12 @@ on:
       - 'setup.py'
   merge_group:
     types: [checks_requested]
+  schedule:
+    # Every other day at 02:00 UTC
+    - cron: '0 2 */2 * *'
 
-permissions: {}
+permissions:
+  contents: write
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
@@ -36,15 +40,18 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11"]
+        exclude:
+          - os: macos-latest
+            python-version: "3.10"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: On mac, install libomp to facilitate lgbm and xgboost install
-        if: matrix.os == 'macOS-latest'
+        if: matrix.os == 'macos-latest'
         run: |
           brew update
           brew install libomp
@@ -70,62 +77,68 @@ jobs:
         run: |
           pip install pyspark==3.5.1
           pip list | grep "pyspark"
-      - name: If linux and python<3.11, install ray 2
-        if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.11'
+      - name: On Ubuntu python 3.12, install pyspark 4.0.1
+        if: matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest'
         run: |
-          pip install "ray[tune]<2.5.0"
-      - name: If mac and python 3.10, install ray and xgboost 1
-        if: matrix.os == 'macOS-latest' && matrix.python-version == '3.10'
-        run: |
-          pip install -e .[ray]
-          # use macOS to test xgboost 1, but macOS also supports xgboost 2
-          pip install "xgboost<2"
-      - name: If linux, install prophet on python < 3.9
-        if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.8'
+          pip install pyspark==4.0.1
+          pip list | grep "pyspark"
+      # # TODO: support ray
+      # - name: If linux and python<3.11, install ray 2
+      #   if: matrix.os == 'ubuntu-latest' && matrix.python-version < '3.11'
+      #   run: |
+      #     pip install "ray[tune]<2.5.0"
+      - name: Install prophet when on linux
+        if: matrix.os == 'ubuntu-latest'
         run: |
           pip install -e .[forecast]
-      - name: Install vw on python < 3.10
-        if: matrix.python-version == '3.8' || matrix.python-version == '3.9'
+      # TODO: support vw for python 3.10+
+      - name:  If linux and python<3.10, install vw
+        if: matrix.os == 'ubuntu-latest' && matrix.python-version < '3.10'
         run: |
           pip install -e .[vw]
+      - name: Pip freeze
+        run: |
+          pip freeze
+      - name: Check dependencies
+        run: |
+          python test/check_dependency.py
+      - name: Clear pip cache
+        run: |
+          pip cache purge
       - name: Test with pytest
         if: matrix.python-version != '3.10'
         run: |
-          pytest test/ --ignore=test/autogen
+          pytest test/ --ignore=test/autogen --reruns 2 --reruns-delay 10
       - name: Coverage
         if: matrix.python-version == '3.10'
         run: |
           pip install coverage
-          coverage run -a -m pytest test --ignore=test/autogen
+          coverage run -a -m pytest test --ignore=test/autogen --reruns 2 --reruns-delay 10
           coverage xml
       - name: Upload coverage to Codecov
         if: matrix.python-version == '3.10'
         uses: codecov/codecov-action@v3
         with:
           file: ./coverage.xml
           flags: unittests
+      - name: Save dependencies
+        shell: bash
+        run: |
+          git config --global user.name 'github-actions[bot]'
+          git config --global user.email 'github-actions[bot]@users.noreply.github.com'
+          git config advice.addIgnoredFile false
 
-  # docs:
-
-  #   runs-on: ubuntu-latest
+          BRANCH=unit-tests-installed-dependencies
+          git fetch origin
+          git checkout -B "$BRANCH"
+          if git show-ref --verify --quiet "refs/remotes/origin/$BRANCH"; then
+            git rebase "origin/$BRANCH"
+          fi
 
-  #   steps:
-  #     - uses: actions/checkout@v3
-  #     - name: Setup Python
-  #       uses: actions/setup-python@v4
-  #       with:
-  #         python-version: '3.8'
-  #     - name: Compile documentation
-  #       run: |
-  #           pip install -e .
-  #           python -m pip install sphinx sphinx_rtd_theme
-  #           cd docs
-  #           make html
-  #     - name: Deploy to GitHub pages
-  #       if: ${{ github.ref == 'refs/heads/main' }}
-  #       uses: JamesIves/github-pages-deploy-action@3.6.2
-  #       with:
-  #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  #         BRANCH: gh-pages
-  #         FOLDER: docs/_build/html
-  #         CLEAN: true
+          pip freeze > installed_all_dependencies_${{ matrix.python-version }}_${{ matrix.os }}.txt
+          python test/check_dependency.py > installed_first_tier_dependencies_${{ matrix.python-version }}_${{ matrix.os }}.txt
+          git add installed_*dependencies*.txt
+          mv coverage.xml ./coverage_${{ matrix.python-version }}_${{ matrix.os }}.xml || true
+          git add -f ./coverage_${{ matrix.python-version }}_${{ matrix.os }}.xml || true
+          git commit -m "Update installed dependencies for Python ${{ matrix.python-version }} on ${{ matrix.os }}" || exit 0
+          git push origin "$BRANCH"
diff --git a/.gitignore b/.gitignore
@@ -172,7 +172,7 @@ test/default
 test/housing.json
 test/nlp/default/transformer_ms/seq-classification.json
 
-flaml/fabric/fanova/_fanova.c
+flaml/fabric/fanova/*fanova.c
 # local config files
 *.config.local
 
@@ -184,3 +184,7 @@ notebook/lightning_logs/
 lightning_logs/
 flaml/autogen/extensions/tmp/
 test/autogen/my_tmp/
+catboost_*
+
+# Internal configs
+.pypirc
diff --git a/README.md b/README.md
@@ -14,23 +14,17 @@
     <br>
 </p>
 
-:fire: FLAML supports AutoML and Hyperparameter Tuning in [Microsoft Fabric Data Science](https://learn.microsoft.com/en-us/fabric/data-science/automated-machine-learning-fabric). In addition, we've introduced Python 3.11 support, along with a range of new estimators, and comprehensive integration with MLflow—thanks to contributions from the Microsoft Fabric product team.
+:fire: FLAML supports AutoML and Hyperparameter Tuning in [Microsoft Fabric Data Science](https://learn.microsoft.com/en-us/fabric/data-science/automated-machine-learning-fabric). In addition, we've introduced Python 3.11 and 3.12 support, along with a range of new estimators, and comprehensive integration with MLflow—thanks to contributions from the Microsoft Fabric product team.
 
-:fire: Heads-up: We have migrated [AutoGen](https://microsoft.github.io/autogen/) into a dedicated [github repository](https://github.com/microsoft/autogen). Alongside this move, we have also launched a dedicated [Discord](https://discord.gg/pAbnFJrkgZ) server and a [website](https://microsoft.github.io/autogen/) for comprehensive documentation.
-
-:fire: The automated multi-agent chat framework in [AutoGen](https://microsoft.github.io/autogen/) is in preview from v2.0.0.
-
-:fire: FLAML is highlighted in OpenAI's [cookbook](https://github.com/openai/openai-cookbook#related-resources-from-around-the-web).
-
-:fire: [autogen](https://microsoft.github.io/autogen/) is released with support for ChatGPT and GPT-4, based on [Cost-Effective Hyperparameter Optimization for Large Language Model Generation Inference](https://arxiv.org/abs/2303.04673).
+:fire: Heads-up: [AutoGen](https://microsoft.github.io/autogen/) has moved to a dedicated [GitHub repository](https://github.com/microsoft/autogen). FLAML no longer includes the `autogen` module—please use AutoGen directly.
 
 ## What is FLAML
 
 FLAML is a lightweight Python library for efficient automation of machine
 learning and AI operations. It automates workflow based on large language models, machine learning models, etc.
 and optimizes their performance.
 
-- FLAML enables building next-gen GPT-X applications based on multi-agent conversations with minimal effort. It simplifies the orchestration, automation and optimization of a complex GPT-X workflow. It maximizes the performance of GPT-X models and augments their weakness.
+- FLAML enables economical automation and tuning for ML/AI workflows, including model selection and hyperparameter optimization under resource constraints.
 - For common machine learning tasks like classification and regression, it quickly finds quality models for user-provided data with low computational resources. It is easy to customize or extend. Users can find their desired customizability from a smooth range.
 - It supports fast and economical automatic tuning (e.g., inference hyperparameters for foundation models, configurations in MLOps/LMOps workflows, pipelines, mathematical/statistical models, algorithms, computing experiments, software configurations), capable of handling large search space with heterogeneous evaluation cost and complex constraints/guidance/early stopping.
 
@@ -46,50 +40,17 @@ FLAML requires **Python version >= 3.9**. It can be installed from pip:
 pip install flaml
 ```
 
-Minimal dependencies are installed without extra options. You can install extra options based on the feature you need. For example, use the following to install the dependencies needed by the [`autogen`](https://microsoft.github.io/autogen/) package.
+Minimal dependencies are installed without extra options. You can install extra options based on the feature you need. For example, use the following to install the dependencies needed by the [`automl`](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML) module.
 
 ```bash
-pip install "flaml[autogen]"
+pip install "flaml[automl]"
 ```
 
 Find more options in [Installation](https://microsoft.github.io/FLAML/docs/Installation).
 Each of the [`notebook examples`](https://github.com/microsoft/FLAML/tree/main/notebook) may require a specific option to be installed.
 
 ## Quickstart
 
-- (New) The [autogen](https://microsoft.github.io/autogen/) package enables the next-gen GPT-X applications with a generic multi-agent conversation framework.
-  It offers customizable and conversable agents which integrate LLMs, tools and human.
-  By automating chat among multiple capable agents, one can easily make them collectively perform tasks autonomously or with human feedback, including tasks that require using tools via code. For example,
-
-```python
-from flaml import autogen
-
-assistant = autogen.AssistantAgent("assistant")
-user_proxy = autogen.UserProxyAgent("user_proxy")
-user_proxy.initiate_chat(
-    assistant,
-    message="Show me the YTD gain of 10 largest technology companies as of today.",
-)
-# This initiates an automated chat between the two agents to solve the task
-```
-
-Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers a drop-in replacement of `openai.Completion` or `openai.ChatCompletion` with powerful functionalites like tuning, caching, templating, filtering. For example, you can optimize generations by LLM with your own tuning data, success metrics and budgets.
-
-```python
-# perform tuning
-config, analysis = autogen.Completion.tune(
-    data=tune_data,
-    metric="success",
-    mode="max",
-    eval_func=eval_func,
-    inference_budget=0.05,
-    optimization_budget=3,
-    num_samples=-1,
-)
-# perform inference for a test instance
-response = autogen.Completion.create(context=test_instance, **config)
-```
-
 - With three lines of code, you can start using this economical and fast
   AutoML engine as a [scikit-learn style estimator](https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML).
 

diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
@@ -401,6 +401,24 @@ def custom_metric(
         self._estimator_type = "classifier" if settings["task"] in CLASSIFICATION else "regressor"
         self.best_run_id = None
 
+    def __getstate__(self):
+        """Customize pickling to avoid serializing runtime-only objects.
+
+        MLflow's sklearn flavor serializes estimators via (cloud)pickle. During
+        AutoML fitting we may attach an internal mlflow integration instance
+        which holds `concurrent.futures.Future` objects and executors containing
+        thread locks, which are not picklable.
+        """
+
+        state = self.__dict__.copy()
+        state.pop("mlflow_integration", None)
+        return state
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        # Ensure attribute exists post-unpickle.
+        self.mlflow_integration = None
+
     def get_params(self, deep: bool = False) -> dict:
         return self._settings.copy()
 

diff --git a/flaml/automl/data.py b/flaml/automl/data.py
@@ -50,7 +50,10 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0, dataset_forma
     """
     import pickle
 
-    import openml
+    try:
+        import openml
+    except ImportError:
+        openml = None
     from sklearn.model_selection import train_test_split
 
     filename = "openml_ds" + str(dataset_id) + ".pkl"
@@ -61,15 +64,15 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0, dataset_forma
             dataset = pickle.load(f)
     else:
         print("download dataset from openml")
-        dataset = openml.datasets.get_dataset(dataset_id)
+        dataset = openml.datasets.get_dataset(dataset_id) if openml else None
         if not os.path.exists(data_dir):
             os.makedirs(data_dir)
         with open(filepath, "wb") as f:
             pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
-    print("Dataset name:", dataset.name)
+    print("Dataset name:", dataset.name) if dataset else None
     try:
         X, y, *__ = dataset.get_data(target=dataset.default_target_attribute, dataset_format=dataset_format)
-    except ValueError:
+    except (ValueError, AttributeError, TypeError):
         from sklearn.datasets import fetch_openml
 
         X, y = fetch_openml(data_id=dataset_id, return_X_y=True)

diff --git a/flaml/automl/ml.py b/flaml/automl/ml.py
@@ -127,9 +127,21 @@ def metric_loss_score(
             import datasets
 
             datasets_metric_name = huggingface_submetric_to_metric.get(metric_name, metric_name.split(":")[0])
-            metric = datasets.load_metric(datasets_metric_name, trust_remote_code=True)
             metric_mode = huggingface_metric_to_mode[datasets_metric_name]
 
+            # datasets>=3 removed load_metric; prefer evaluate if available
+            try:
+                import evaluate
+
+                metric = evaluate.load(datasets_metric_name, trust_remote_code=True)
+            except Exception:
+                if hasattr(datasets, "load_metric"):
+                    metric = datasets.load_metric(datasets_metric_name, trust_remote_code=True)
+                else:
+                    from datasets import load_metric as _load_metric  # older datasets
+
+                    metric = _load_metric(datasets_metric_name, trust_remote_code=True)
+
             if metric_name.startswith("seqeval"):
                 y_processed_true = [[labels[tr] for tr in each_list] for each_list in y_processed_true]
             elif metric in ("pearsonr", "spearmanr"):

diff --git a/flaml/automl/model.py b/flaml/automl/model.py
@@ -111,7 +111,7 @@ def limit_resource(memory_limit, time_limit):
                 pass
 
 
-class BaseEstimator:
+class BaseEstimator(sklearn.base.ClassifierMixin, sklearn.base.BaseEstimator):
     """The abstract class for all learners.
 
     Typical examples:

diff --git a/flaml/automl/nlp/huggingface/training_args.py b/flaml/automl/nlp/huggingface/training_args.py
@@ -77,6 +77,14 @@ class TrainingArgumentsForAuto(TrainingArguments):
 
     logging_steps: int = field(default=500, metadata={"help": "Log every X updates steps."})
 
+    # Newer versions of HuggingFace Transformers may access `TrainingArguments.generation_config`
+    # (e.g., in generation-aware trainers/callbacks). Keep this attribute to remain compatible
+    # while defaulting to None for non-generation tasks.
+    generation_config: Optional[object] = field(
+        default=None,
+        metadata={"help": "Optional generation config (or path) used by generation-aware trainers."},
+    )
+
     @staticmethod
     def load_args_from_console():
         from dataclasses import fields