microsoft
diff --git a/‎.coveragerc‎
Lines changed: 4 additions & 2 deletions b/‎.coveragerc‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/workflows/CD.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/CD.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/deploy-website.yml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/deploy-website.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/openai.yml‎
Lines changed: 9 additions & 8 deletions b/‎.github/workflows/openai.yml‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎.github/workflows/python-package.yml‎
Lines changed: 9 additions & 15 deletions b/‎.github/workflows/python-package.yml‎
Lines changed: 9 additions & 15 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎flaml/autogen/__init__.py‎
Lines changed: 9 additions & 0 deletions b/‎flaml/autogen/__init__.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎flaml/automl/automl.py‎
Lines changed: 13 additions & 7 deletions b/‎flaml/automl/automl.py‎
Lines changed: 13 additions & 7 deletions
diff --git a/‎flaml/automl/data.py‎
Lines changed: 14 additions & 1 deletion b/‎flaml/automl/data.py‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎flaml/automl/model.py‎
Lines changed: 5 additions & 2 deletions b/‎flaml/automl/model.py‎
Lines changed: 5 additions & 2 deletions
@@ -1,5 +1,7 @@
 [run]
 branch = True
-source = flaml
+source =
+  flaml
 omit =
-  *test*
+  */test/*
+  */flaml/autogen/*
@@ -13,7 +13,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest"]
-        python-version: ["3.10"]
+        python-version: ["3.12"]
     runs-on: ${{ matrix.os }}
     environment: package
     steps:
 
@@ -37,11 +37,11 @@ jobs:
       - name: setup python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.12"
       - name: pydoc-markdown install
         run: |
           python -m pip install --upgrade pip
-          pip install pydoc-markdown==4.7.0
+          pip install pydoc-markdown==4.7.0 setuptools
       - name: pydoc-markdown run
         run: |
           pydoc-markdown
@@ -73,11 +73,11 @@ jobs:
       - name: setup python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.12"
       - name: pydoc-markdown install
         run: |
           python -m pip install --upgrade pip
-          pip install pydoc-markdown==4.7.0
+          pip install pydoc-markdown==4.7.0 setuptools
       - name: pydoc-markdown run
         run: |
           pydoc-markdown
 
@@ -4,14 +4,15 @@
 name: OpenAI
 
 on:
-  pull_request:
-    branches: ['main']
-    paths:
-      - 'flaml/autogen/**'
-      - 'test/autogen/**'
-      - 'notebook/autogen_openai_completion.ipynb'
-      - 'notebook/autogen_chatgpt_gpt4.ipynb'
-      - '.github/workflows/openai.yml'
+  workflow_dispatch:
+#   pull_request:
+#     branches: ['main']
+#     paths:
+#       - 'flaml/autogen/**'
+#       - 'test/autogen/**'
+#       - 'notebook/autogen_openai_completion.ipynb'
+#       - 'notebook/autogen_chatgpt_gpt4.ipynb'
+#       - '.github/workflows/openai.yml'
 
 permissions: {}
 
 
@@ -40,10 +40,12 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
         exclude:
           - os: macos-latest
-            python-version: "3.10"
+            python-version: "3.10"  # macOS runners will hang on python 3.10 for unknown reasons
+          - os: macos-latest
+            python-version: "3.12"  # macOS runners will hang on python 3.12 for unknown reasons
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python-version }}
@@ -67,11 +69,6 @@ jobs:
           pip install -e .
           python -c "import flaml"
           pip install -e .[test]
-      - name: On Ubuntu python 3.10, install pyspark 3.4.1
-        if: matrix.python-version == '3.10' && matrix.os == 'ubuntu-latest'
-        run: |
-          pip install pyspark==3.4.1
-          pip list | grep "pyspark"
       - name: On Ubuntu python 3.11, install pyspark 3.5.1
         if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest'
         run: |
@@ -106,17 +103,17 @@ jobs:
         run: |
           pip cache purge
       - name: Test with pytest
-        if: matrix.python-version != '3.10'
+        if: matrix.python-version != '3.11'
         run: |
           pytest test/ --ignore=test/autogen --reruns 2 --reruns-delay 10
       - name: Coverage
-        if: matrix.python-version == '3.10'
+        if: matrix.python-version == '3.11'
         run: |
           pip install coverage
           coverage run -a -m pytest test --ignore=test/autogen --reruns 2 --reruns-delay 10
           coverage xml
       - name: Upload coverage to Codecov
-        if: matrix.python-version == '3.10'
+        if: matrix.python-version == '3.11'
         uses: codecov/codecov-action@v3
         with:
           file: ./coverage.xml
@@ -130,15 +127,12 @@ jobs:
 
           BRANCH=unit-tests-installed-dependencies
           git fetch origin
-          git checkout -B "$BRANCH"
-          if git show-ref --verify --quiet "refs/remotes/origin/$BRANCH"; then
-            git rebase "origin/$BRANCH"
-          fi
+          git checkout -B "$BRANCH" "origin/$BRANCH"
 
           pip freeze > installed_all_dependencies_${{ matrix.python-version }}_${{ matrix.os }}.txt
           python test/check_dependency.py > installed_first_tier_dependencies_${{ matrix.python-version }}_${{ matrix.os }}.txt
           git add installed_*dependencies*.txt
           mv coverage.xml ./coverage_${{ matrix.python-version }}_${{ matrix.os }}.xml || true
           git add -f ./coverage_${{ matrix.python-version }}_${{ matrix.os }}.xml || true
           git commit -m "Update installed dependencies for Python ${{ matrix.python-version }} on ${{ matrix.os }}" || exit 0
-          git push origin "$BRANCH"
+          git push origin "$BRANCH" --force
@@ -60,6 +60,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 cover/
+junit
 
 # Translations
 *.mo
 
@@ -1,3 +1,12 @@
+import warnings
+
 from .agentchat import *
 from .code_utils import DEFAULT_MODEL, FAST_MODEL
 from .oai import *
+
+warnings.warn(
+    "The `flaml.autogen` module is deprecated and will be removed in a future release. "
+    "Please refer to `https://github.com/microsoft/autogen` for latest usage.",
+    DeprecationWarning,
+    stacklevel=2,
+)
@@ -4,6 +4,7 @@
 #  * project root for license information.
 from __future__ import annotations
 
+import inspect
 import json
 import logging
 import os
@@ -177,10 +178,11 @@ def custom_metric(
                 ['auto', 'cv', 'holdout'].
             split_ratio: A float of the valiation data percentage for holdout.
             n_splits: An integer of the number of folds for cross - validation.
-            log_type: A string of the log type, one of
-                ['better', 'all'].
-                'better' only logs configs with better loss than previos iters
-                'all' logs all the tried configs.
+            log_type: Specifies which logs to save. One of ['better', 'all']. Default is 'better'.
+                - 'better': Logs configs and models (if `model_history` is True) only when the loss improves,
+                  to `log_file_name` and MLflow, respectively.
+                - 'all': Logs all configs and models (if `model_history` is True), regardless of performance.
+                Note: Configs are always logged to MLflow if MLflow logging is enabled.
             model_history: A boolean of whether to keep the best
                 model per estimator. Make sure memory is large enough if setting to True. Default False.
             log_training_metric: A boolean of whether to log the training
@@ -2174,7 +2176,7 @@ def _search_parallel(self):
                 use_spark=True,
                 force_cancel=self._force_cancel,
                 mlflow_exp_name=self._mlflow_exp_name,
-                automl_info=(mlflow_log_latency,),  # pass automl info to tune.run
+                automl_info=(mlflow_log_latency, self._log_type),  # pass automl info to tune.run
                 extra_tag=self.autolog_extra_tag,
                 # raise_on_failed_trial=False,
                 # keep_checkpoints_num=1,
@@ -2237,7 +2239,9 @@ def _search_parallel(self):
                 if better or self._log_type == "all":
                     self._log_trial(search_state, estimator)
                 if self.mlflow_integration:
-                    self.mlflow_integration.record_state(self, search_state, estimator)
+                    self.mlflow_integration.record_state(
+                        self, search_state, estimator, better or self._log_type == "all"
+                    )
 
     def _log_trial(self, search_state, estimator):
         if self._training_log:
@@ -2479,7 +2483,9 @@ def _search_sequential(self):
                 if better or self._log_type == "all":
                     self._log_trial(search_state, estimator)
                 if self.mlflow_integration:
-                    self.mlflow_integration.record_state(self, search_state, estimator)
+                    self.mlflow_integration.record_state(
+                        self, search_state, estimator, better or self._log_type == "all"
+                    )
 
                 logger.info(
                     " at {:.1f}s,\testimator {}'s best error={:.4f},\tbest estimator {}'s best error={:.4f}".format(
 
@@ -5,6 +5,7 @@
 import json
 import os
 import random
+import re
 import uuid
 from datetime import datetime, timedelta
 from decimal import ROUND_HALF_UP, Decimal
@@ -708,6 +709,14 @@ def auto_convert_dtypes_pandas(
     """
     if na_values is None:
         na_values = {"NA", "na", "NULL", "null", ""}
+    # Remove the empty string separately (handled by the regex `^\s*$`)
+    vals = [re.escape(v) for v in na_values if v != ""]
+    # Build inner alternation group
+    inner = "|".join(vals) if vals else ""
+    if inner:
+        pattern = re.compile(rf"^\s*(?:{inner})?\s*$")
+    else:
+        pattern = re.compile(r"^\s*$")
 
     df_converted = df.convert_dtypes()
     schema = {}
@@ -721,7 +730,11 @@ def auto_convert_dtypes_pandas(
     for col in df.columns:
         series = df[col]
         # Replace NA-like values if string
-        series_cleaned = series.map(lambda x: np.nan if isinstance(x, str) and x.strip() in na_values else x)
+        if series.dtype == object:
+            mask = series.astype(str).str.match(pattern)
+            series_cleaned = series.where(~mask, np.nan)
+        else:
+            series_cleaned = series
 
         # Skip conversion if already non-object data type, except bool which can potentially be categorical
         if (
 
@@ -2347,8 +2347,11 @@ def config2params(self, config: dict) -> dict:
         params = super().config2params(config)
         params["tol"] = params.get("tol", 0.0001)
         params["loss"] = params.get("loss", None)
-        if params["loss"] is None and self._task.is_classification():
-            params["loss"] = "log_loss" if SKLEARN_VERSION >= "1.1" else "log"
+        if params["loss"] is None:
+            if self._task.is_classification():
+                params["loss"] = "log_loss" if SKLEARN_VERSION >= "1.1" else "log"
+            else:
+                params["loss"] = "squared_error"
         if not self._task.is_classification() and "n_jobs" in params:
             params.pop("n_jobs")