diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
index 3eba9233b..d35293189 100644
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -21,9 +21,9 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v5
     - name: Set up Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v6
       with:
         python-version: '3.x'
     - name: Install dependencies
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index efb84d712..9f5e18475 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -5,7 +5,7 @@ on:
   pull_request:
     branches:
       - master
-      - 1.0.x
+      - 1.1.x
 
 jobs:
   install_and_run_tests:
@@ -13,7 +13,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12', '3.13' ]
+        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14t']
 
     services:
       # mongo:
@@ -41,9 +41,9 @@ jobs:
 
     steps:
       - name: Checkout source
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 0d3fc1b10..9c9d14cae 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -11,12 +11,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12', '3.13' ]
+        python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13', '3.14t' ]
     steps:
       - name: Checkout source
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
diff --git a/CHANGES.txt b/CHANGES.txt
index 970cb4e6f..dc8ee9420 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,67 @@
 
+v1.1.0 (2026/05/27)
+    Highlights:
+        - Added Python 3.14 support and dropped Python 3.8 support.
+        - Reworked biothings CLI configuration, pathing, and dataplugin command organization.
+        - Added MongoDB build cleanup management APIs and commands.
+
+    biothings.hub improvements:
+        - Added prefix support to DataTransformMDB. ([#411](https://github.com/biothings/biothings.api/pull/411))
+        - Added TAR and Zstandard file handling. ([#424](https://github.com/biothings/biothings.api/pull/424), [#428](https://github.com/biothings/biothings.api/pull/428))
+        - Added support for custom local uploader and dumper classes in dataplugin manifests. ([#431](https://github.com/biothings/biothings.api/pull/431))
+        - Added MongoDB build cleanup validation and deletion tooling. ([#437](https://github.com/biothings/biothings.api/pull/437))
+
+    biothings.utils improvements:
+        - Fixed dict_sweep handling for NaN-like values, including pandas.NA and pandas.NaT. ([#442](https://github.com/biothings/biothings.api/pull/442))
+        - Centralized orjson usage across the library. ([#435](https://github.com/biothings/biothings.api/pull/435))
+
+    biothings.web improvements:
+        - Removed deprecated Google Analytics event support. ([#439](https://github.com/biothings/biothings.api/pull/439))
+        - Removed old doc_type and deprecated Elasticsearch compatibility code.
+        - Improved Elasticsearch exception handling and query response behavior.
+        - Optimized Elasticsearch memory usage when using scroll API.
+
+    biothings.cli improvements:
+        - Added CLI config and pathing commands. ([#418](https://github.com/biothings/biothings.api/pull/418), [#422](https://github.com/biothings/biothings.api/pull/422))
+        - Added CLI dump mark-success support. ([#423](https://github.com/biothings/biothings.api/pull/423))
+        - Fixed Typer/rich_utils loading behavior. ([#426](https://github.com/biothings/biothings.api/pull/426))
+
+    Misc improvements:
+        - Updated dependencies and package metadata for Python 3.9+, including tornado, typer, pymongo, orjson, and zstandard.
+        - Updated GitHub Actions test, build, and publish workflows.
+
+
+v1.1.0 (2026/05/27)
+    Highlights:
+        - Added Python 3.14 support and dropped Python 3.8 support.
+        - Reworked biothings CLI configuration, pathing, and dataplugin command organization.
+        - Added MongoDB build cleanup management APIs and commands.
+
+    biothings.hub improvements:
+        - Added prefix support to DataTransformMDB. ([#411](https://github.com/biothings/biothings.api/pull/411))
+        - Added TAR and Zstandard file handling. ([#424](https://github.com/biothings/biothings.api/pull/424), [#428](https://github.com/biothings/biothings.api/pull/428))
+        - Added support for custom local uploader and dumper classes in dataplugin manifests. ([#431](https://github.com/biothings/biothings.api/pull/431))
+        - Added MongoDB build cleanup validation and deletion tooling. ([#437](https://github.com/biothings/biothings.api/pull/437))
+
+    biothings.utils improvements:
+        - Fixed dict_sweep handling for NaN-like values, including pandas.NA and pandas.NaT. ([#442](https://github.com/biothings/biothings.api/pull/442))
+        - Centralized orjson usage across the library. ([#435](https://github.com/biothings/biothings.api/pull/435))
+
+    biothings.web improvements:
+        - Removed deprecated Google Analytics event support. ([#439](https://github.com/biothings/biothings.api/pull/439))
+        - Removed old doc_type and deprecated Elasticsearch compatibility code.
+        - Improved Elasticsearch exception handling and query response behavior.
+        - Optimized Elasticsearch memory usage when using scroll API.
+
+    biothings.cli improvements:
+        - Added CLI config and pathing commands. ([#418](https://github.com/biothings/biothings.api/pull/418), [#422](https://github.com/biothings/biothings.api/pull/422))
+        - Added CLI dump mark-success support. ([#423](https://github.com/biothings/biothings.api/pull/423))
+        - Fixed Typer/rich_utils loading behavior. ([#426](https://github.com/biothings/biothings.api/pull/426))
+
+    Misc improvements:
+        - Updated dependencies and package metadata for Python 3.9+, including tornado, typer, pymongo, orjson, and zstandard.
+        - Updated GitHub Actions test, build, and publish workflows.
+
 v1.0.2 (2025/10/15)
     Bugfix:
         - Fixed an import issue in inspector.py that was causing the mapping inspect to fail.
diff --git a/biothings/__init__.py b/biothings/__init__.py
index 67d5f1993..93255fdd2 100644
--- a/biothings/__init__.py
+++ b/biothings/__init__.py
@@ -8,7 +8,7 @@ class _version_info(NamedTuple):
     micro: int
 
 
-version_info = _version_info(1, 0, 2)
+version_info = _version_info(1, 1, 0)
 __version__ = ".".join(map(str, version_info))
 
 
diff --git a/biothings/cli/__init__.py b/biothings/cli/__init__.py
index cfb946c6d..2dbe6f2c1 100644
--- a/biothings/cli/__init__.py
+++ b/biothings/cli/__init__.py
@@ -2,34 +2,41 @@
 Entrypoint for the biothings-cli tool
 """
 
+from typing import Literal
 import importlib.util
 import logging
 import os
 import sys
 
+import typer
+from rich.logging import RichHandler
 
-from biothings.cli.settings import (
-    setup_biothings_configuration,
-    setup_commandline_configuration,
-    setup_logging_configuration,
-)
+from biothings.cli.commands.admin import build_admin_application
+from biothings.cli.commands.config import config_application, load_configuration
+from biothings.cli.commands.dataplugin import dataplugin_application
+from biothings.cli.commands.pathing import path_application
 
 
-def check_module_import_status(module: str) -> bool:
+def setup_logging_configuration(logging_level: Literal[10, 20, 30, 40, 50]) -> None:
     """
-    Verify that we can import a module prior to proceeding with creating our commandline
-    tooling that depends on those modules
+    Configures the logging based off our environment configuration
     """
-    module_specification = importlib.util.find_spec(module)
-    status = module_specification is not None
-    return status
+    rich_handler = RichHandler(
+        level=logging_level,
+        markup=True,
+        rich_tracebacks=False,  # typer creates it already
+        show_path=False,
+        tracebacks_suppress=[typer],
+    )
+    logging.basicConfig(level=logging_level, format="%(message)s", datefmt="[%X]", handlers=[rich_handler])
 
 
 def main():
     """
-    The entrypoint for running the BioThings CLI to test your local data plugin
+    The entrypoint for running the BioThings CLI
     """
-    typer_status = check_module_import_status("typer")
+    module_specification = importlib.util.find_spec("typer")
+    typer_status = module_specification is not None
     if not typer_status:
         logging.error(
             (
@@ -48,14 +55,14 @@ def main():
     cli_debug_flag = os.environ.get("BTCLI_DEBUG", False)
     cli_rich_traceback_flag = os.environ.get("BTCLI_RICH_TRACEBACK", False)
 
-    cli = setup_commandline_configuration(debug=cli_debug_flag, rich_traceback=cli_rich_traceback_flag)
+    admin_application = build_admin_application(debug=cli_debug_flag, rich_traceback=cli_rich_traceback_flag)
     logging_level = logging.WARNING
     if cli_debug_flag:
         logging_level = logging.DEBUG
     setup_logging_configuration(logging_level)
-    setup_biothings_configuration()
+    load_configuration()
 
-    from biothings.cli.dataplugin import dataplugin_application
-
-    cli.add_typer(dataplugin_application, name="dataplugin")
-    return cli()
+    admin_application.add_typer(dataplugin_application, name="dataplugin")
+    admin_application.add_typer(config_application, name="config")
+    admin_application.add_typer(path_application, name="path")
+    return admin_application()
diff --git a/biothings/cli/assistant.py b/biothings/cli/assistant.py
index 0e0ecf7d0..7aad1f5e8 100644
--- a/biothings/cli/assistant.py
+++ b/biothings/cli/assistant.py
@@ -38,7 +38,7 @@ class CLIAssistant(BaseAssistant):
 
     plugin_type = "CLI"
 
-    def __init__(self, plugin_name: Optional[str] = None, job_manager: "JobManager" = None):
+    def __init__(self, plugin_name: Optional[str] = None, job_manager: CLIJobManager = None):
         from biothings import config
         from biothings.hub.databuild.builder import BuilderManager
         from biothings.hub.dataindex.indexer import IndexManager
diff --git a/biothings/cli/commands/__init__.py b/biothings/cli/commands/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/biothings/cli/commands/admin.py b/biothings/cli/commands/admin.py
new file mode 100644
index 000000000..e735c57fe
--- /dev/null
+++ b/biothings/cli/commands/admin.py
@@ -0,0 +1,48 @@
+"""
+Configuration settings for the biothings-cli tool
+
+> Logging
+> Tool Configuration
+    > Creates a mock config used in the biothings.api backend
+"""
+
+import sys
+
+import typer
+import typer.rich_utils
+
+
+def build_admin_application(debug: bool, rich_traceback: bool) -> typer.Typer:
+    """
+    Builds the main administrative command line application for the
+    biothings-cli application
+    """
+    pretty_exceptions_show_locals = False
+    pretty_exceptions_enable = False
+    sys.tracebacklimit = 1
+
+    if rich_traceback:
+        pretty_exceptions_enable = True
+        sys.tracebacklimit = 1000
+
+    if debug:
+        pretty_exceptions_enable = True
+        pretty_exceptions_show_locals = True
+        sys.tracebacklimit = 1000
+
+    # prevent dimming the help text from the 2nd line
+    # see: https://github.com/tiangolo/typer/issues/437#issuecomment-1224149402
+    typer.rich_utils.STYLE_HELPTEXT = ""
+
+    context_settings = {"help_option_names": ["-h", "--help"]}
+    typer_instance = typer.Typer(
+        help="[green]BioThings Admin CLI to test your local data plugins. See helps for each command for specific usage.[/green]",
+        rich_help_panel="Help and Others",
+        rich_markup_mode="rich",
+        context_settings=context_settings,
+        no_args_is_help=True,
+        pretty_exceptions_show_locals=pretty_exceptions_show_locals,
+        pretty_exceptions_enable=pretty_exceptions_enable,
+    )
+
+    return typer_instance
diff --git a/biothings/cli/commands/config.py b/biothings/cli/commands/config.py
new file mode 100644
index 000000000..1f2df74ba
--- /dev/null
+++ b/biothings/cli/commands/config.py
@@ -0,0 +1,350 @@
+"""
+Module for creating the CLI application for the configuration interface
+
+Provides the following capabilities:
+
+- View the default configuration file
+- Generate a local configuration file
+- Delete a local configuration file
+- Modify the default configuration file
+
+By default, a config.py module isn't required to the biothings-cli locally.
+A default config module is setup at launch, however an additional config module
+can be provided to override the default config settings.
+
+The available config settings can be found at biothings.hub.default_config module (note that
+not all settings are relevant to the CLI)
+
+*** HUB MODE ***
+config.py
+.biothings_hub
+  .data_src_database
+  archive
+  biothings_hubdb
+data_plugin0
+  ...
+data_plugin1
+  ...
+data_plugin2
+  ...
+
+*** SINGULAR MODE ***
+config.py
+.biothings_hub
+  .data_src_database
+  archive
+  biothings_hubdb
+manifest.json
+parser.py
+
+*** Example Configuration ***
+########################################
+# DATA PLUGIN CONFIGURATION VARIABLES #
+########################################
+DATA_SRC_DATABASE = '.data_src_database'
+DATA_HUB_DB_DATABASE = 'data_hub_db_database'
+HUB_DB_BACKEND = {
+    "module": "biothings.utils.sqlite3",
+    "sqlite_db_folder": ".biothings_hub""
+}
+DATA_ARCHIVE_ROOT = ".biothings_hub/archive"
+
+# Add new entry in DOCKER_CONFIG if you want to use a different docker host for your
+# docker-based data plugin, other than the default docker host running on your localhost.
+DOCKER_CONFIG = {
+    "docker1": {"tls_cert_path": None, "tls_key_path": None, "client_url": ""},
+    "localhost": {"client_url": "unix://var/run/docker.sock"},
+}
+"""
+import enum
+import importlib
+import json
+import logging
+import os
+import pathlib
+import sys
+import types
+from typing import Union
+
+from rich import box
+from rich.console import Console
+from rich.panel import Panel
+import typer
+from typing_extensions import Annotated
+
+from biothings.utils.common import DummyConfig
+from biothings.utils.configuration import ConfigurationError
+
+
+
+SHORT_HELP = "[green]CLI tool for handling the biothings configuration.[/green]"
+FULL_HELP = (
+    f"{SHORT_HELP}"
+    "\n"
+    "\n[green] * View the default configuration file [/green]"
+    "\n[green] * Generate a local configuration file [/green]"
+    "\n[green] * Modify the backend storage configuration [/green]"
+    "\n"
+    "\nBy default, a config.py module isn't required to the biothings-cli locally."
+    "\nA default config module is setup at launch, however an additional config module "
+    "\ncan be provided to override the default config settings."
+    "\n"
+    "\nThe available config settings can be found at biothings.hub.default_config module "
+    "\n(note that not all settings are relevant to the CLI)"
+)
+
+config_application = typer.Typer(
+    help=FULL_HELP,
+    short_help=SHORT_HELP,
+    no_args_is_help=True,
+    rich_markup_mode="rich",
+)
+
+logger = logging.getLogger(name="biothings-cli")
+
+
+@config_application.command(name="display")
+def display_default_configuration():
+    """
+    Displays the default configuration stored for the biothings-cli
+    """
+    default_configuration = default_biothings_configuration()
+    console = Console()
+    panel = Panel(
+        f"{build_configuration_repr(default_configuration)}\n",
+        title="[white]Default Biothings Configuration[/white]",
+        title_align="left",
+        box=box.ROUNDED,
+    )
+    console.print(panel)
+
+
+@config_application.command(name="create")
+def create_local_configuration(
+    db_backend: Annotated[
+        bool,
+        typer.Option("--override-backend", help="If provided, will prompt for overriding the HUB_DB_BACKEND value"),
+    ] = False,
+    index_backend: Annotated[
+        bool,
+        typer.Option("--override-index", help="If provided, will prompt for overriding the INDEX_CONFIG value"),
+    ] = False,
+):
+    """
+    Creates a local configuration file (named config.py) in the current working directory
+    """
+    configuration = default_biothings_configuration()
+
+    class BackendType(str, enum.Enum):
+        SQLITE3 = "sqlite3"
+        MONGODB = "mongodb"
+
+    if db_backend:
+        db_type = typer.prompt(
+            "What backend would you like to use? (supported options \"sqlite3\"|\"mongodb\"",
+            type=BackendType
+
+        )
+        if db_type == "sqlite3":
+            backend = {
+                "module": "biothings.utils.sqlite3",
+                "sqlite_db_folder": ".biothings_hub",
+            }
+            logger.info("Setting HUB_DB_BACKEND:\n%s", json.dumps(backend, indent=2))
+        elif db_type == "mongodb":
+            backend = {
+                "module" : "biothings.utils.mongo",
+                "uri" : "mongodb://localhost:27017",
+            }
+            custom_uri = typer.prompt(
+                "Please specify the server uri for mongodb", default="mongodb://localhost:27017"
+            )
+            if custom_uri is not None:
+                backend["uri"] = custom_uri
+            logger.info("Setting HUB_DB_BACKEND:\n%s", json.dumps(backend, indent=2))
+        configuration["HUB_DB_BACKEND"] = backend
+
+    if index_backend:
+        host_address = typer.prompt(
+            "Please specify the host address for elasticsearch you would like to use", default="http://localhost:9200"
+        )
+        backend = {
+            "indexer_select": {},
+            "env": {
+                "commandhub": {
+                    "host": host_address,
+                    "indexer": {
+                        "args": {
+                            "request_timeout": 300,
+                            "retry_on_timeout": True,
+                            "max_retries": 10
+                        }
+                    }
+                }
+            }
+        }
+        logger.info("Setting INDEX_CONFIG:\n%s", json.dumps(backend, indent=2))
+        configuration["INDEX_CONFIG"] = backend
+
+    with open("config.py", "w", encoding="utf-8") as handle:
+        configuration_repr = build_configuration_repr(configuration)
+        handle.write(configuration_repr)
+
+    console = Console()
+    panel = Panel(
+        f"{build_configuration_repr(configuration)}\n",
+        title="[white]Local Biothings Configuration[/white]",
+        title_align="left",
+        box=box.ROUNDED,
+    )
+    console.print(panel)
+
+
+
+def build_configuration_repr(configuration_values: dict) -> str:
+    """
+    Generates a string representation of the configuration
+    """
+    header_string = (
+        "########################################\n"
+        "# DATA PLUGIN CONFIGURATION VARIABLES  #\n"
+        "########################################"
+    )
+    configuration_repr = [header_string]
+    for configuration_key, configuration_value in configuration_values.items():
+        if isinstance(configuration_value, dict):
+            mapping_repr = f"{configuration_key} = {json.dumps(configuration_value, indent=2)}"
+            mapping_repr = mapping_repr.replace("true", "True")
+            mapping_repr = mapping_repr.replace("false", "False")
+            configuration_repr.append(mapping_repr)
+        elif isinstance(configuration_value, (pathlib.Path, str)):
+            configuration_repr.append(
+                f"{configuration_key} = \"{configuration_value}\""
+            )
+        else:
+            configuration_repr.append(
+                f"{configuration_key} = {configuration_value}"
+            )
+    return "\n".join(configuration_repr).rstrip("\n")
+
+
+def default_biothings_configuration() -> dict:
+    """
+    Function call to build the default biothings configuration
+
+    Stores all the default values for the biothings configuration
+    for reference and updating
+    """
+
+    configuration = {
+        "HUB_DB_BACKEND": {
+            "module": "biothings.utils.sqlite3",
+            "sqlite_db_folder": ".biothings_hub",
+        },
+        "DATA_SRC_SERVER": "localhost",
+        "DATA_SRC_DATABASE": "data_src_database",
+        "DATA_ARCHIVE_ROOT": ".biothings_hub/archive",
+        "LOG_FOLDER": ".biothings_hub/logs",
+        "DATA_PLUGIN_FOLDER": pathlib.Path().cwd(),
+        "DATA_TARGET_SERVER": "localhost",
+        "DATA_TARGET_PORT": 27017,
+        "DATA_TARGET_DATABASE": "plugin-hub",
+        "INDEX_CONFIG": {
+            "indexer_select": {},
+            "env": {
+                "commandhub": {
+                    "host": "http://localhost:9200",
+                    "indexer": {
+                        "args": {
+                            "request_timeout": 300,
+                            "retry_on_timeout": True,
+                            "max_retries": 10
+                        }
+                    }
+                }
+            }
+        },
+        "RUN_DIR": pathlib.Path().cwd(),
+        "HUB_MAX_WORKERS": os.cpu_count(),
+        "MAX_QUEUED_JOBS": 1000
+    }
+
+    # specific attributes to the biothings-cli application
+    cli_configuration = {
+        "BIOTHINGS_CLI_PATH": "biothings_hub/path",
+    }
+    configuration.update(cli_configuration)
+
+    return configuration
+
+
+
+def load_local_configuration() -> types.ModuleType:
+    """
+    Attempts to load a local configuration file first before
+    falling back to a default configuration
+    """
+    current_directory = pathlib.Path.cwd()
+    config_module_file = current_directory.joinpath("config.py")
+    if config_module_file.exists():
+        spec = importlib.util.spec_from_file_location("config", location=str(config_module_file))
+        config_module = importlib.util.module_from_spec(spec)
+        sys.modules["config"] = config_module
+        sys.modules["biothings.config"] = config_module
+
+        spec.loader.exec_module(config_module)
+
+        try:
+            backend = getattr(config_module, "HUB_DB_BACKEND")
+            setattr(config_module, "hub_db", importlib.import_module(backend["module"]))
+        except ImportError as import_err:
+            logging.exception(import_err)
+            raise import_err
+
+        for attr in dir(config_module):
+            value = getattr(config_module, attr)
+            if isinstance(value, ConfigurationError):
+                raise ConfigurationError(f"{attr}: {value}")
+
+        return config_module
+    return None
+
+
+def load_default_configuration():
+    """
+    Loads the default configuration into a DummyConfig
+    """
+    config_module = DummyConfig("config")
+    default_configuration_values = default_biothings_configuration()
+    for configuration_key, configuration_value in default_configuration_values.items():
+        setattr(config_module, configuration_key, configuration_value)
+
+    try:
+        backend = getattr(config_module, "HUB_DB_BACKEND")
+        setattr(config_module, "hub_db", importlib.import_module(backend["module"]))
+    except ImportError as import_err:
+        logging.exception(import_err)
+        raise import_err
+
+    sys.modules["config"] = config_module
+    sys.modules["biothings.config"] = config_module
+
+    return config_module
+
+def load_configuration() -> Union[types.ModuleType, DummyConfig]:
+    """
+    Setup a config module necessary to launch the biothings-cli.
+
+    Attempts to load a local file named config.py in the current working directory,
+    otherwise loads a default configuration through a DummyConfig instance
+
+    Depending on the backend hub database, the order of configuration
+    matters. If we attempt to load a module that checks for the configuration
+    we'll have to ensure that the configuration is properly configured prior
+    to loading the module
+    """
+    configuration = load_local_configuration()
+    if configuration is None:
+        logging.debug("Unable to find `config` module. Using the default configuration")
+        configuration = load_default_configuration()
+    return configuration
diff --git a/biothings/cli/dataplugin.py b/biothings/cli/commands/dataplugin.py
similarity index 96%
rename from biothings/cli/dataplugin.py
rename to biothings/cli/commands/dataplugin.py
index 8b805db97..c202fa0e6 100644
--- a/biothings/cli/dataplugin.py
+++ b/biothings/cli/commands/dataplugin.py
@@ -8,7 +8,7 @@
 import typer
 from typing_extensions import Annotated
 
-from biothings.cli import operations
+from biothings.cli.commands import operations
 
 SHORT_HELP = "[green]CLI tool for locally evaluating a biothings dataplugin. Allows for simple querying and data inspection.[/green]"
 FULL_HELP = (
@@ -54,6 +54,10 @@ def create_data_plugin(
 @dataplugin_application.command(name="dump")
 def dump_source(
     plugin_name: Annotated[Optional[str], typer.Option("--name", "-n", help=PLUGIN_NAME_HELP)] = None,
+    mark_success: Annotated[
+        Optional[bool],
+        typer.Option("--mark-sucess", "-m", help="Mark dump as success without attempting to actually dump the files"),
+    ] = False,
     show_dump: Annotated[
         Optional[bool],
         typer.Option("--show-dump", help="Displays the dump source result output after dump operation"),
@@ -62,7 +66,7 @@ def dump_source(
     """
     Download the source data files to the local file system
     """
-    asyncio.run(operations.do_dump(plugin_name=plugin_name, show_dumped=show_dump))
+    asyncio.run(operations.do_dump(plugin_name=plugin_name, show_dumped=show_dump, mark_success=mark_success))
 
 
 @dataplugin_application.command(name="upload")
@@ -133,7 +137,7 @@ def listing(
 
 
 @dataplugin_application.command(name="inspect")
-def inspect_source(
+def inspect_source(  # pylint: disable=too-many-arguments,too-many-positional-arguments
     plugin_name: Annotated[Optional[str], typer.Option("--name", "-n", help=PLUGIN_NAME_HELP)] = None,
     sub_source_name: Annotated[
         Optional[str], typer.Option("--sub-source-name", "-s", help="Your sub source name")
diff --git a/biothings/cli/commands/decorators.py b/biothings/cli/commands/decorators.py
new file mode 100644
index 000000000..5289ab01b
--- /dev/null
+++ b/biothings/cli/commands/decorators.py
@@ -0,0 +1,139 @@
+"""
+Collection of decorators for usage within the biothings-cli
+
+These are often method we want associated with many of the plugin methods we
+use, but don't directly impact the logic of the actual operation. Typically things
+related to paths and configurations that apply to large swaths of the cli
+would make sense as a decorator
+"""
+
+import functools
+import inspect
+import logging
+import pathlib
+import sys
+from typing import Callable
+
+from biothings.cli.exceptions import MissingPluginName
+
+logger = logging.getLogger(name="biothings-cli")
+
+
+def get_biothings_config():
+    try:
+        return sys.modules["biothings.config"]
+    except KeyError as exc:
+        raise RuntimeError("BioThings CLI configuration has not been loaded") from exc
+
+
+def operation_mode(operation: Callable):
+    """
+    Based off the directory structure for where the biothings-cli
+    was invoked we set the "mode" to one of two states:
+
+    0) singular
+    The current working directory contains a singular data-plugin
+
+    In this case we don't require a plugin_name argument to be passed
+    at the command-line
+
+    1) hub
+    The current working directory contains N directories operating as a
+    "hub" or collection of data-plugins under one umbrella
+
+    In this case we do require a plugin_name argument to be passed
+    at the command-line. Otherwise we have no idea which data-plugin to
+    refer to
+
+    We attempt to load the plugin from this working directory. If we sucessfully load
+    either a manifest or advanced plugin, then we can safely say this is a singular
+    dataplugin
+
+    If we cannot load either a manifest or advanced plugin then we default assume that
+    the mode is hub
+    """
+
+    @functools.wraps(operation)
+    def determine_operation_mode(*args, **kwargs):
+
+        def determine_hub_mode():
+            working_directory = pathlib.Path.cwd()
+            working_directory_files = {file.name for file in working_directory.iterdir()}
+
+            mode = None
+            if "manifest.json" in working_directory_files or "manifest.yaml" in working_directory_files:
+                logger.debug("Inferring singular manifest plugin from directory structure")
+                mode = "SINGULAR"
+            elif "__init__.py" in working_directory_files:
+                logger.debug("Inferring singular advanced plugin from directory structure")
+                mode = "SINGULAR"
+            else:
+                logger.debug("Inferring multiple plugins from directory structure")
+                mode = "HUB"
+
+            if mode == "SINGULAR":
+                if kwargs.get("plugin_name", None) is not None:
+                    kwargs["plugin_name"] = None
+            elif mode == "HUB":
+                if kwargs.get("plugin_name", None) is None:
+                    raise MissingPluginName(working_directory)
+
+        @functools.wraps(operation)
+        def handle_function(*args, **kwargs):
+            operation_result = operation(*args, **kwargs)
+            return operation_result
+
+        @functools.wraps(operation)
+        async def handle_corountine(*args, **kwargs):
+            operation_result = await operation(*args, **kwargs)
+            return operation_result
+
+        determine_hub_mode()
+
+        if inspect.iscoroutinefunction(operation):
+            return handle_corountine(*args, **kwargs)
+        return handle_function(*args, **kwargs)
+
+    return determine_operation_mode
+
+
+def cli_system_path(operation: Callable):  # pylint: disable=unused-argument
+    """
+    Used for ensuring that if we've appended files to biothings-cli
+    path file (stored under config.BIOTHINGS_CLI_PATH), then we need to update
+    the system path so we can discover the modules at runtime
+    """
+
+    @functools.wraps(operation)
+    def update_system_path(*args, **kwargs):
+
+        def update_system_path_from_file():
+            config = get_biothings_config()
+            discovery_path = pathlib.Path(config.BIOTHINGS_CLI_PATH).resolve().absolute()
+            path_file = discovery_path.joinpath("biothings_cli.pth")
+
+            if path_file.exists():
+                with open(path_file, "r", encoding="utf-8") as handle:
+                    path_entries = handle.readlines()
+                    path_entries = [entry.strip("\n") for entry in path_entries]
+                    sys.path.extend(path_entries)
+                    for path in path_entries:
+                        logger.debug("Adding %s to system path", path)
+
+        @functools.wraps(operation)
+        def handle_function(*args, **kwargs):
+            operation_result = operation(*args, **kwargs)
+            return operation_result
+
+        @functools.wraps(operation)
+        async def handle_corountine(*args, **kwargs):
+            operation_result = await operation(*args, **kwargs)
+            return operation_result
+
+        update_system_path_from_file()
+
+        if inspect.iscoroutinefunction(operation):
+            return handle_corountine(*args, **kwargs)
+        return handle_function(*args, **kwargs)
+
+    return update_system_path
diff --git a/biothings/cli/operations.py b/biothings/cli/commands/operations.py
similarity index 89%
rename from biothings/cli/operations.py
rename to biothings/cli/commands/operations.py
index 0be38234d..d55119ec8 100644
--- a/biothings/cli/operations.py
+++ b/biothings/cli/commands/operations.py
@@ -47,7 +47,6 @@
 """
 
 import asyncio
-import functools
 import logging
 import multiprocessing
 import os
@@ -57,7 +56,8 @@
 import shutil
 import sys
 import uuid
-from typing import Callable, Optional, Union
+from importlib import import_module
+from typing import Optional, Union
 
 import jsonschema
 import rich
@@ -67,7 +67,8 @@
 from rich.console import Console
 from rich.panel import Panel
 
-from biothings.cli.exceptions import MissingPluginName, UnknownUploaderSource
+from biothings.cli.commands.decorators import cli_system_path, get_biothings_config, operation_mode
+from biothings.cli.exceptions import UnknownUploaderSource
 from biothings.cli.structure import TEMPLATE_DIRECTORY
 from biothings.cli.utils import (
     clean_dumped_files,
@@ -87,65 +88,12 @@
 logger = logging.getLogger(name="biothings-cli")
 
 
-def operation_mode(operation_method: Callable):
-    """
-    Based off the directory structure for where the biothings-cli
-    was invoked we set the "mode" to one of two states:
-
-    0) singular
-    The current working directory contains a singular data-plugin
-
-    In this case we don't require a plugin_name argument to be passed
-    at the command-line
-
-    1) hub
-    The current working directory contains N directories operating as a
-    "hub" or collection of data-plugins under one umbrella
-
-    In this case we do require a plugin_name argument to be passed
-    at the command-line. Otherwise we have no idea which data-plugin to
-    refer to
-
-    We attempt to load the plugin from this working directory. If we sucessfully load
-    either a manifest or advanced plugin, then we can safely say this is a singular
-    dataplugin
-
-    If we cannot load either a manifest or advanced plugin then we default assume that
-    the mode is hub
-    """
-
-    @functools.wraps(operation_method)
-    def determine_operation_mode(*args, **kwargs):
-        working_directory = pathlib.Path.cwd()
-        working_directory_files = {file.name for file in working_directory.iterdir()}
-
-        mode = None
-        if "manifest.json" in working_directory_files or "manifest.yaml" in working_directory_files:
-            logger.debug("Inferring singular manifest plugin from directory structure")
-            mode = "SINGULAR"
-        elif "__init__.py" in working_directory_files:
-            logger.debug("Inferring singular advanced plugin from directory structure")
-            mode = "SINGULAR"
-        else:
-            logger.debug("Inferring multiple plugins from directory structure")
-            mode = "HUB"
-
-        if mode == "SINGULAR":
-            if kwargs.get("plugin_name", None) is not None:
-                kwargs["plugin_name"] = None
-        elif mode == "HUB":
-            if kwargs.get("plugin_name", None) is None:
-                raise MissingPluginName(working_directory)
-
-        operation_result = operation_method(*args, **kwargs)
-        return operation_result
-
-    return determine_operation_mode
+def _load_attr(module_path: str, attr_name: str):
+    return getattr(import_module(module_path), attr_name)
 
 
 # do not apply operation_mode decorator since this operation means to create a new plugin
 # regardless what the current working directory has
-# @operation_mode
 def do_create(plugin_name: str, multi_uploaders: bool = False, parallelizer: bool = False):
     """
     Create a new data plugin from the template
@@ -178,14 +126,15 @@ def do_create(plugin_name: str, multi_uploaders: bool = False, parallelizer: boo
     logger.info("Successfully created data plugin template at: %s\n", new_plugin_directory)
 
 
+@cli_system_path
 @operation_mode
-async def do_dump(plugin_name: Optional[str] = None, show_dumped: bool = True) -> None:
+async def do_dump(plugin_name: Optional[str] = None, show_dumped: bool = True, mark_success: bool = False) -> None:
     """
     Perform dump for the given plugin
     """
-    from biothings import config
-    from biothings.cli.assistant import CLIAssistant
-    from biothings.utils import hub_db
+    config = get_biothings_config()
+    CLIAssistant = _load_attr("biothings.cli.assistant", "CLIAssistant")
+    hub_db = import_module("biothings.utils.hub_db")
 
     hub_db.setup(config)
     assistant_instance = CLIAssistant(plugin_name)
@@ -206,11 +155,15 @@ async def do_dump(plugin_name: Optional[str] = None, show_dumped: bool = True) -
         )
         logger.warning(attribute_warning)
 
-    dump_job = dumper_instance.dump(
-        job_manager=assistant_instance.job_manager,
-        force=False,
-    )
-    await asyncio.gather(dump_job)
+    if mark_success:
+        logger.warning("Marking dump as successful without running the dumper")
+        dumper_instance.mark_success(dry_run=True)
+    else:
+        dump_job = dumper_instance.dump(
+            job_manager=assistant_instance.job_manager,
+            force=False,
+        )
+        await asyncio.gather(dump_job)
 
     dp = hub_db.get_data_plugin()
     dp.remove({"_id": assistant_instance.plugin_name})
@@ -223,6 +176,7 @@ async def do_dump(plugin_name: Optional[str] = None, show_dumped: bool = True) -
         show_dumped_files(data_folder, assistant_instance.plugin_name)
 
 
+@cli_system_path
 @operation_mode
 async def do_upload(plugin_name: Optional[str] = None, batch_limit: int = 10000, show_uploaded: bool = True) -> None:
     """
@@ -235,8 +189,7 @@ async def do_upload(plugin_name: Optional[str] = None, batch_limit: int = 10000,
     >>>     self.commands["upload_all"] = self.managers["upload_manager"].upload_all
     >>>     self.commands["update_source_meta"] = self.managers["upload_manager"].update_source_meta
     """
-    from biothings.cli.assistant import CLIAssistant
-
+    CLIAssistant = _load_attr("biothings.cli.assistant", "CLIAssistant")
     assistant_instance = CLIAssistant(plugin_name)
     uploader_classes = assistant_instance.get_uploader_class()
     for uploader_class in uploader_classes:
@@ -277,6 +230,7 @@ async def do_upload(plugin_name: Optional[str] = None, batch_limit: int = 10000,
         show_uploaded_sources(pathlib.Path(assistant_instance.plugin_directory), assistant_instance.plugin_name)
 
 
+@cli_system_path
 @operation_mode
 async def do_parallel_upload(
     plugin_name: Optional[str] = None, batch_limit: int = 10000, show_uploaded: bool = True
@@ -293,8 +247,7 @@ async def do_parallel_upload(
 
     This is a modified version of the ParallelUploader `update_data` source call
     """
-    from biothings.cli.assistant import CLIAssistant
-
+    CLIAssistant = _load_attr("biothings.cli.assistant", "CLIAssistant")
     assistant_instance = CLIAssistant(plugin_name)
     uploader_classes = assistant_instance.get_uploader_class()
     for uploader_class in uploader_classes:
@@ -344,6 +297,7 @@ async def do_parallel_upload(
         show_uploaded_sources(pathlib.Path(assistant_instance.plugin_directory), assistant_instance.plugin_name)
 
 
+@cli_system_path
 @operation_mode
 async def do_dump_and_upload(plugin_name: str) -> None:
     """
@@ -354,6 +308,7 @@ async def do_dump_and_upload(plugin_name: str) -> None:
     logger.info("[green]Success![/green] :rocket:", extra={"markup": True})
 
 
+@cli_system_path
 @operation_mode
 async def do_index(plugin_name: Optional[str] = None, sub_source_name: Optional[str] = None) -> None:
     """
@@ -416,10 +371,10 @@ async def do_index(plugin_name: Optional[str] = None, sub_source_name: Optional[
     The default location is localhost:9200. If successful a couple frames detailing the build and
     index information will be displayed to the enduser
     """
-    from biothings import config
-    from biothings.cli.assistant import CLIAssistant
-    from biothings.hub.databuild.builder import BuilderException
-    from biothings.utils.manager import JobManager
+    config = get_biothings_config()
+    CLIAssistant = _load_attr("biothings.cli.assistant", "CLIAssistant")
+    BuilderException = _load_attr("biothings.hub.databuild.builder", "BuilderException")
+    JobManager = _load_attr("biothings.utils.manager", "JobManager")
 
     if platform.system() == "Windows":
         logger.warning("The `biothings-cli dataplugin index` command isn't supported on windows")
@@ -540,6 +495,7 @@ async def do_index(plugin_name: Optional[str] = None, sub_source_name: Optional[
     await show_source_index(index_name, assistant_instance.index_manager, elasticsearch_mapping)
 
 
+@cli_system_path
 @operation_mode
 async def do_list(
     plugin_name: Optional[str] = None, dump: bool = True, upload: bool = True, hubdb: bool = False
@@ -547,8 +503,7 @@ async def do_list(
     """
     List the dumped files, uploaded sources, or hubdb content.
     """
-    from biothings.cli.assistant import CLIAssistant
-
+    CLIAssistant = _load_attr("biothings.cli.assistant", "CLIAssistant")
     assistant_instance = CLIAssistant(plugin_name)
     if dump:
         dumper_instance = assistant_instance.get_dumper_class()
@@ -569,8 +524,9 @@ async def do_list(
         show_hubdb_content()
 
 
+@cli_system_path
 @operation_mode
-async def do_inspect(
+async def do_inspect(  # pylint: disable=too-many-arguments,too-many-positional-arguments
     plugin_name: Optional[str] = None,
     sub_source_name: Optional[str] = None,
     mode: str = "type,stats",
@@ -581,8 +537,7 @@ async def do_inspect(
     """
     Perform inspection on a data plugin.
     """
-    from biothings.cli.assistant import CLIAssistant
-
+    CLIAssistant = _load_attr("biothings.cli.assistant", "CLIAssistant")
     assistant_instance = CLIAssistant(plugin_name)
     uploader_classes = assistant_instance.get_uploader_class()
 
@@ -633,14 +588,15 @@ async def do_inspect(
                 write_mapping_to_file(sub_output, inspection_mapping)
 
 
+@cli_system_path
 @operation_mode
 async def do_serve(plugin_name: Optional[str] = None, host: str = "localhost", port: int = 9999):
     """
     Handles creation of a basic web server for hosting files using for a dataplugin
     """
-    from biothings.cli.assistant import CLIAssistant
-    from biothings.cli.web_app import main
-    from biothings.utils import hub_db
+    CLIAssistant = _load_attr("biothings.cli.assistant", "CLIAssistant")
+    main = _load_attr("biothings.cli.web_app", "main")
+    hub_db = import_module("biothings.utils.hub_db")
 
     assistant_instance = CLIAssistant(plugin_name)
     uploader_classes = assistant_instance.get_uploader_class()
@@ -651,6 +607,7 @@ async def do_serve(plugin_name: Optional[str] = None, host: str = "localhost", p
     await main(host=host, port=port, db=src_db, table_space=table_space)
 
 
+@cli_system_path
 @operation_mode
 async def do_clean(
     plugin_name: Optional[str] = None, dump: bool = False, upload: bool = False, clean_all: bool = False
@@ -658,8 +615,7 @@ async def do_clean(
     """
     Clean the dumped files, uploaded sources, or both.
     """
-    from biothings.cli.assistant import CLIAssistant
-
+    CLIAssistant = _load_attr("biothings.cli.assistant", "CLIAssistant")
     if clean_all:
         dump = True
         upload = True
@@ -688,8 +644,7 @@ async def display_schema():
     Loads the jsonschema definition file and displays it to the
     console
     """
-    from biothings.hub.dataplugin.loaders.schema import load_manifest_schema
-
+    load_manifest_schema = _load_attr("biothings.hub.dataplugin.loaders.schema", "load_manifest_schema")
     manifest_schema = load_manifest_schema()
     schema_validator = jsonschema.validators.validator_for(manifest_schema)
     valid_schema = False
@@ -714,14 +669,14 @@ async def display_schema():
     console.print(panel)
 
 
+@cli_system_path
 @operation_mode
 async def validate_manifest(plugin_name: Optional[str] = None):
     """
     Loads the manifest file and validates it against the schema file
     If an error exists it will display the error to the enduser
     """
-    from biothings.hub.dataplugin.loaders.loader import ManifestBasedPluginLoader
-
+    ManifestBasedPluginLoader = _load_attr("biothings.hub.dataplugin.loaders.loader", "ManifestBasedPluginLoader")
     if plugin_name is None:
         plugin_directory = pathlib.Path.cwd().resolve().absolute()
         plugin_name = plugin_directory.name
diff --git a/biothings/cli/commands/pathing.py b/biothings/cli/commands/pathing.py
new file mode 100644
index 000000000..3874752bb
--- /dev/null
+++ b/biothings/cli/commands/pathing.py
@@ -0,0 +1,168 @@
+"""
+Module for creating the cli interface for the path interface
+"""
+
+import logging
+import pathlib
+import sys
+
+import typer
+from rich.console import Console
+from rich.table import Table
+
+from biothings.cli.commands.decorators import cli_system_path, get_biothings_config, operation_mode
+
+SHORT_HELP = (
+    "[green]CLI tool for viewing the python system path and adding external directories to the system path[/green]"
+)
+FULL_HELP = (
+    SHORT_HELP
+    + "\n\n[magenta] :sparkles: Run from an existing data plugin folder to evaluate a singular data plugin.[/magenta]"
+)
+path_application = typer.Typer(
+    help=FULL_HELP,
+    short_help=SHORT_HELP,
+    no_args_is_help=True,
+    rich_markup_mode="rich",
+)
+
+logger = logging.getLogger(name="biothings-cli")
+
+
+@path_application.command(name="view")
+def view_system_path() -> None:
+    """
+    View the system paths current discovered by python, along with potential hub directories of interest
+    that the user may wish to add to the system path for usage in data plugin testing
+    """
+    display_system_paths()
+
+
+@path_application.command(name="add")
+def add_parser_to_system_path() -> None:
+    """
+    Add discovered hub directory paths to the python system path for aiding in testing various data plugins
+    Creates the file "bt_custom.pth" (uses .pth extension to mimic the `site` module internal to
+    python). It creates this file in the .biothings_hub/path directory. If found while running a
+    command, then the paths in the file with be added the system path prior to executing the command
+    """
+    update_system_paths()
+    display_system_paths()
+
+
+@path_application.command(name="remove")
+def remove_parser_from_system_path() -> None:
+    """
+    Remove the hub directories discovered from the python system path
+    Simply removes the bt_custom.pth file from the biothings-cli directory
+    """
+    remove_system_paths()
+
+
+@cli_system_path
+@operation_mode
+def display_system_paths() -> None:
+    """
+    Method for displaying the system path information used for the
+    biothing-cli application
+
+    External method so we can call it from multiple typer commands
+    """
+    path_table = Table(title="Python System Path(s)")
+
+    path_table.add_column("Index", style="cyan")
+    path_table.add_column("Paths", style="green")
+
+    system_paths = sys.path
+    for index, system_path in enumerate(system_paths):
+        path_table.add_row(str(index), str(system_path))
+
+    parser_table = Table(title="External Parser Path(s)")
+
+    parser_table.add_column("Index", style="cyan")
+    parser_table.add_column("Paths", style="magenta")
+    parser_table.add_column("On System Path?", style="steel_blue1")
+
+    hub_parser_paths = find_hub_parsers()
+    for index, parser_path in enumerate(hub_parser_paths):
+        parser_table.add_row(str(index), str(parser_path), str(str(parser_path.parent) in system_paths))
+
+    console = Console()
+    console.print(path_table)
+    console.print(parser_table)
+
+
+@cli_system_path
+@operation_mode
+def update_system_paths() -> None:
+    config = get_biothings_config()
+    discovery_path = pathlib.Path(config.BIOTHINGS_CLI_PATH).resolve().absolute()
+    discovery_path.mkdir(parents=True, exist_ok=True)
+
+    hub_parser_paths = find_hub_parsers()
+
+    # The actual path that needs to be added is the parent of the hub directory
+    hub_parser_paths = [path.parent for path in hub_parser_paths]
+
+    path_file = discovery_path.joinpath("biothings_cli.pth")
+    with open(path_file, "w", encoding="utf-8") as path_handle:
+        for parser_path in hub_parser_paths:
+            logger.info("Adding %s -> %s", parser_path, path_file)
+            path_handle.write(f"{parser_path}\n")
+
+
+@cli_system_path
+@operation_mode
+def remove_system_paths() -> None:
+    config = get_biothings_config()
+    discovery_path = pathlib.Path(config.BIOTHINGS_CLI_PATH).resolve().absolute()
+    path_file = discovery_path.joinpath("biothings_cli.pth")
+    path_file.unlink(missing_ok=True)
+
+    hub_parser_paths = find_hub_parsers()
+    for parser_path in hub_parser_paths:
+        try:
+            sys.path.remove(str(parser_path))
+        except ValueError:
+            pass
+
+
+def find_hub_parsers(upward_depth: int = 2) -> list[pathlib.Path]:
+    """
+    Attempts to locate any potential hub-based parsers that are use across different plugins
+    within a shared hub instance
+
+    Will attempt to traverse recursively by <upward_depth> levels (defaults to 2 levels) above the present working directory
+    The typical hub structure has the plugins directory at the same level as the hub directory
+
+    pending.api structure:
+    root
+        ├── hub
+        ├── plugins
+
+    (mygene, mychem, myvariant, ...) structure
+    root
+    ├── src
+    │   ├── hub
+    │   ├── plugins
+
+    In either structure, the user is expected to be operating within the directory of a specific
+    plugin (root/plugin/plugin_directory/) or acting as a HUB within the (root/plugin) directory
+    Either case we should be able to find the shared parsers within 2 upper levels
+    """
+    directory_pointer = pathlib.Path.cwd()
+
+    traversal_counter = 0
+    external_parser_paths = []
+
+    # Match any path ending explicitly in hub. The bracket "[a]" matches the character literal
+    # enclosed in the bracket, so [h][u][b] matches the literal hub
+    match_expr = "**/[h][u][b]"
+    while traversal_counter < upward_depth:
+        directory_pointer = directory_pointer.parent
+        for hub_path in directory_pointer.glob(match_expr):
+            hub_dataload = hub_path.joinpath("dataload")
+            if hub_dataload.exists():
+                external_parser_paths.append(hub_path.resolve().absolute())
+        traversal_counter += 1
+    return external_parser_paths
diff --git a/biothings/cli/configuration/config.py.sample b/biothings/cli/configuration/config.py.sample
deleted file mode 100644
index 5e89c5d3a..000000000
--- a/biothings/cli/configuration/config.py.sample
+++ /dev/null
@@ -1,47 +0,0 @@
-########################################
-# DATA PLUGIN CONFIGURATION VARIABLES #
-########################################
-# Typicaly, you don't need to include a config.py module to run the BioThings CLI tool to
-# test your data plugin locally. A default config module is setup at the launch of the CLI.
-# However, you can always include an additional config.py module to override the default
-# config settings, e.g. alternative DATA_ARCHIVE_ROOT, HUB_DB_BACKEND for different db path.
-# The available config settings can be found at biothings.hub.default_config module (note that
-# not all settings are relevant to the CLI)
-
-# This file should be place at the same directory with developed data plugin:
-
-# When using dataplugin-hub sub commands
-# $ ls -al
-#  config.py
-#  .biothings_hub
-#     .data_src_database
-#     archive
-#     biothings_hubdb
-#  your_data_plugin_folder
-#     manifest.json
-#     parser.py
-
-# When using dataplugin sub commands inside a data plugin folder
-# $ ls -al
-#  config.py
-#  .biothings_hub
-#     .data_src_database
-#     archive
-#     biothings_hubdb
-#  manifest.json
-#  parser.py
-
-DATA_SRC_DATABASE = '.data_src_database'
-DATA_HUB_DB_DATABASE = 'data_hub_db_database'
-HUB_DB_BACKEND = {
-    "module": "biothings.utils.sqlite3",
-    "sqlite_db_folder": ".biothings_hub""
-}
-DATA_ARCHIVE_ROOT = ".biothings_hub/archive"
-
-# Add new entry in DOCKER_CONFIG if you want to use a different docker host for your
-# docker-based data plugin, other than the default docker host running on your localhost.
-# DOCKER_CONFIG = {
-#     "docker1": {"tls_cert_path": None, "tls_key_path": None, "client_url": ""},
-#     "localhost": {"client_url": "unix://var/run/docker.sock"},
-# }
diff --git a/biothings/cli/manager.py b/biothings/cli/manager.py
index 511903b9a..eb0cd565a 100644
--- a/biothings/cli/manager.py
+++ b/biothings/cli/manager.py
@@ -29,13 +29,13 @@ async def defer_to_process(self, pinfo=None, func=None, *args, **kwargs):
     async def defer_to_thread(self, pinfo=None, func=None, *args):
         """keep the same signature as JobManager.defer_to_thread. The passed pinfo is ignored"""
 
-        async def run(fut, func):
+        async def run(fut, func, *args):
             try:
-                res = func()
+                res = func(*args)
                 fut.set_result(res)
             except Exception as gen_exc:
                 fut.set_exception(gen_exc)
 
         fut = self.loop.create_future()
-        self.loop.create_task(run(fut, func))
+        self.loop.create_task(run(fut, func, *args))
         return fut
diff --git a/biothings/cli/settings.py b/biothings/cli/settings.py
deleted file mode 100644
index 4cb9416ce..000000000
--- a/biothings/cli/settings.py
+++ /dev/null
@@ -1,134 +0,0 @@
-"""
-Configuration settings for the biothings-cli tool
-
-> Logging
-> Tool Configuration
-    > Creates a mock config used in the biothings.api backend
-"""
-
-from typing import Literal
-import importlib
-import importlib.util
-import logging
-import os
-import pathlib
-import sys
-
-from rich.logging import RichHandler
-import typer
-
-
-from biothings.utils.common import DummyConfig
-from biothings.utils.configuration import ConfigurationError
-
-
-def setup_commandline_configuration(debug: bool, rich_traceback: bool) -> typer.Typer:
-    """
-    Sets up the typer command line tooling
-    """
-    pretty_exceptions_show_locals = False
-    pretty_exceptions_enable = False
-    sys.tracebacklimit = 1
-
-    if rich_traceback:
-        pretty_exceptions_enable = True
-        sys.tracebacklimit = 1000
-
-    if debug:
-        pretty_exceptions_enable = True
-        pretty_exceptions_show_locals = True
-        sys.tracebacklimit = 1000
-
-    # prevent dimming the help text from the 2nd line
-    # see: https://github.com/tiangolo/typer/issues/437#issuecomment-1224149402
-    typer.rich_utils.STYLE_HELPTEXT = ""
-
-    context_settings = {"help_option_names": ["-h", "--help"]}
-    typer_instance = typer.Typer(
-        help="[green]BioThings Admin CLI to test your local data plugins. See helps for each command for specific usage.[/green]",
-        rich_help_panel="Help and Others",
-        rich_markup_mode="rich",
-        context_settings=context_settings,
-        no_args_is_help=True,
-        pretty_exceptions_show_locals=pretty_exceptions_show_locals,
-        pretty_exceptions_enable=pretty_exceptions_enable,
-    )
-
-    return typer_instance
-
-
-def setup_logging_configuration(logging_level: Literal[10, 20, 30, 40, 50]) -> None:
-    """
-    Configures the logging based off our environment configuration
-    """
-    rich_handler = RichHandler(
-        level=logging_level,
-        markup=True,
-        rich_tracebacks=False,  # typer creates it already
-        show_path=False,
-        tracebacks_suppress=[typer],
-    )
-    logging.basicConfig(level=logging_level, format="%(message)s", datefmt="[%X]", handlers=[rich_handler])
-
-
-def setup_biothings_configuration():
-    """
-    Setup a config module necessary to launch the CLI
-
-    Depending on the backend hub database, the order of configuration
-    matters. If we attempt to load a module that checks for the configuration
-    we'll have to ensure that the configuration is properly configured prior
-    to loading the module
-    """
-    working_dir = pathlib.Path().resolve()
-    configuration_instance = DummyConfig("config")
-
-    try:
-        config_mod = importlib.import_module("config")
-        for attr in dir(config_mod):
-            value = getattr(config_mod, attr)
-            if isinstance(value, ConfigurationError):
-                raise ConfigurationError(f"{attr}: {value}")
-            setattr(configuration_instance, attr, value)
-    except ModuleNotFoundError:
-        logging.debug(ModuleNotFoundError)
-        logging.debug("Unable to find `config` module. Using the default configuration")
-    finally:
-        sys.modules["config"] = configuration_instance
-        sys.modules["biothings.config"] = configuration_instance
-
-    configuration_instance.HUB_DB_BACKEND = {
-        "module": "biothings.utils.sqlite3",
-        "sqlite_db_folder": ".biothings_hub",
-    }
-    configuration_instance.DATA_SRC_SERVER = "localhost"
-    configuration_instance.DATA_SRC_DATABASE = "data_src_database"
-    configuration_instance.DATA_ARCHIVE_ROOT = ".biothings_hub/archive"
-    configuration_instance.LOG_FOLDER = ".biothings_hub/logs"
-    configuration_instance.DATA_PLUGIN_FOLDER = f"{working_dir}"
-
-    configuration_instance.DATA_TARGET_SERVER = "localhost"
-    configuration_instance.DATA_TARGET_PORT = 27017
-    configuration_instance.DATA_TARGET_DATABASE = "plugin-hub"
-    configuration_instance.INDEX_CONFIG = {
-        "indexer_select": {},
-        "env": {
-            "commandhub": {
-                "host": "http://localhost:9200",
-                "indexer": {"args": {"request_timeout": 300, "retry_on_timeout": True, "max_retries": 10}},
-            }
-        },
-    }
-
-    # job manager configuration properties
-    configuration_instance.RUN_DIR = pathlib.Path().cwd()
-    configuration_instance.HUB_MAX_WORKERS = os.cpu_count()
-    configuration_instance.MAX_QUEUED_JOBS = 1000
-
-    try:
-        configuration_instance.hub_db = importlib.import_module(configuration_instance.HUB_DB_BACKEND["module"])
-    except ImportError as import_err:
-        logging.exception(import_err)
-        raise import_err
-
-    return configuration_instance
diff --git a/biothings/cli/templates/manifest.yaml.tpl b/biothings/cli/templates/manifest.yaml.tpl
index ce53daa28..d398efbb2 100644
--- a/biothings/cli/templates/manifest.yaml.tpl
+++ b/biothings/cli/templates/manifest.yaml.tpl
@@ -4,7 +4,7 @@ display_name: # Optional. This will be displayed as friendly name on the Biothin
 biothing_type: # Optional. Can be used to provide the default value to some hub functions (e.g. in quick_index as the default doc_type value.
 __metadata__: # Optional.
   license_url: https://example.com/  # Optional. Put your license url here
-  licence: ABCXYZ # Optional. Your license name
+  license: ABCXYZ # Optional. Your license name
   url: https://example.com/  # Your site url
   description: # Optional. More description for this data plugin
 requires:  # Optional. Listing all extra packages if need
diff --git a/biothings/hub/__init__.py b/biothings/hub/__init__.py
index a792c45db..faf716055 100644
--- a/biothings/hub/__init__.py
+++ b/biothings/hub/__init__.py
@@ -101,7 +101,7 @@ def _config_for_app(config_mod=None):
 #     _config.HUB_DB_BACKEND = {
 #         "module": "biothings.utils.sqlite3",
 #         "sqlite_db_folder": "."}
-#     _config.DATA_HUB_DB_DATABASE = ".hubdb"
+#     _config.DATA_HUB_DB_DATABASE = "biothings_hubdb"
 #     _config_for_app(_config)
 
 
@@ -534,6 +534,14 @@ def configure(self):
     def configure_ioloop(self):
         import tornado.platform.asyncio
 
+        # In Python 3.14, get_event_loop raises a RuntimeError if there is no current event loop.
+        # Eventually this probably should not be needed when tornado handles this internally.
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+
         tornado.platform.asyncio.AsyncIOMainLoop().install()
 
     def before_start(self):
@@ -693,6 +701,7 @@ def configure_index_manager(self):
 
     def configure_snapshot_manager(self):
         assert "index" in self.features, "'snapshot' feature requires 'index'"
+        from biothings.hub.dataindex.mongo_build_cleanup import MongoBuildCleanupManager
         from biothings.hub.dataindex.snapshooter import SnapshotManager
 
         args = self.mixargs("snapshot")
@@ -705,6 +714,7 @@ def configure_snapshot_manager(self):
         snapshot_manager.configure(config.SNAPSHOT_CONFIG)
         snapshot_manager.poll("snapshot", snapshot_manager.snapshot_a_build)
         self.managers["snapshot_manager"] = snapshot_manager
+        self.managers["mongo_build_cleanup_manager"] = MongoBuildCleanupManager(job_manager=self.managers["job_manager"])
 
     def configure_auto_snapshot_cleaner_manager(self):
         assert "snapshot" in self.features, "'auto_snapshot_cleaner' feature requires 'snapshot'"
@@ -1140,6 +1150,10 @@ def configure_commands(self):
             self.commands["list_snapshots"] = self.managers["snapshot_manager"].list_snapshots
             self.commands["delete_snapshots"] = self.managers["snapshot_manager"].delete_snapshots
             self.commands["validate_snapshots"] = self.managers["snapshot_manager"].validate_snapshots
+        if self.managers.get("mongo_build_cleanup_manager"):
+            self.commands["list_mongo_builds"] = self.managers["mongo_build_cleanup_manager"].list_mongo_builds
+            self.commands["delete_mongo_builds"] = self.managers["mongo_build_cleanup_manager"].delete_mongo_builds
+            self.commands["validate_mongo_builds"] = self.managers["mongo_build_cleanup_manager"].validate_mongo_builds
         # data release commands
         if self.managers.get("release_manager"):
             self.commands["create_release_note"] = self.managers["release_manager"].create_release_note
@@ -1506,6 +1520,16 @@ def configure_api_endpoints(self):
             )
         if "validate_snapshots" in cmdnames:
             self.api_endpoints["validate_snapshots"] = EndpointDefinition(name="validate_snapshots", method="post")
+        if "list_mongo_builds" in cmdnames:
+            self.api_endpoints["mongo_builds"] = EndpointDefinition(name="list_mongo_builds", method="get")
+        if "delete_mongo_builds" in cmdnames:
+            self.api_endpoints["mongo_builds/delete"] = EndpointDefinition(
+                name="delete_mongo_builds", method="put", force_bodyargs=True
+            )
+        if "validate_mongo_builds" in cmdnames:
+            self.api_endpoints["mongo_builds/validate"] = EndpointDefinition(
+                name="validate_mongo_builds", method="post"
+            )
         if "sync" in cmdnames:
             self.api_endpoints["sync"] = EndpointDefinition(name="sync", method="post", force_bodyargs=True)
         if "whatsnew" in cmdnames:
diff --git a/biothings/hub/api/handlers/base.py b/biothings/hub/api/handlers/base.py
index 3746ef9c1..1da37fb39 100644
--- a/biothings/hub/api/handlers/base.py
+++ b/biothings/hub/api/handlers/base.py
@@ -6,10 +6,10 @@
 # see https://github.com/biothings/biothings.api/commit/59c0d78f758018b0d87836657a2b5d1a700503a1
 # import pandas.io.json as pdjson
 # replace pandas json encoder with orjson:
-import orjson
 from tornado.web import RequestHandler
 
 from biothings import config
+from biothings.utils import serializer
 
 
 class DefaultHandler(RequestHandler):
@@ -26,10 +26,9 @@ def write(self, result):
             #     "result": result,
             #     "status": "ok"
             # }, iso_dates=True)
-            orjson.dumps(
-                {"result": result, "status": "ok"},
-                option=orjson.OPT_NON_STR_KEYS | orjson.OPT_NAIVE_UTC,
-            ).decode()
+            serializer.to_json({
+                "result": result, "status": "ok"
+            })
         )
 
     def write_error(self, status_code, **kwargs):
diff --git a/biothings/hub/autoupdate/uploader.py b/biothings/hub/autoupdate/uploader.py
index 439f03716..1a358c347 100644
--- a/biothings/hub/autoupdate/uploader.py
+++ b/biothings/hub/autoupdate/uploader.py
@@ -348,13 +348,13 @@ async def apply_diff(self, build_meta, job_manager, **kwargs):
             # ----------------------------------------
             #  self.target_backend.target_name,
             # ----------------------------------------
-            self.target_backend.target_esidxer._doc_type,
+            # self.target_backend.target_esidxer._doc_type,  # remove the use of doc_type here, remove this line if confirmed
         )
         # new: index's data we will reach once updated (just informative)
         new = (
             self.target_backend.target_esidxer.es_host,
             meta["new"]["backend"],
-            self.target_backend.target_esidxer._doc_type,
+            # self.target_backend.target_esidxer._doc_type,   # remove the use of doc_type here, remove this line if confirmed
         )
         await self.syncer_func(old_db_col_names=old, new_db_col_names=new, diff_folder=self.data_folder)
         # return current number of docs in index (even if diff update)
diff --git a/biothings/hub/databuild/backend.py b/biothings/hub/databuild/backend.py
index a86de24c9..0b41ae32c 100644
--- a/biothings/hub/databuild/backend.py
+++ b/biothings/hub/databuild/backend.py
@@ -315,7 +315,6 @@ def create_backend(db_col_names, name_only=False, follow_ref=False, **kwargs):
         is_mongo = False
         idxr = ESIndexer(
             index=db_col_names[1],
-            doc_type=db_col_names[2],
             es_host=db_col_names[0],
             **kwargs,
         )
diff --git a/biothings/hub/databuild/builder.py b/biothings/hub/databuild/builder.py
index f9275b147..46b5bbac6 100644
--- a/biothings/hub/databuild/builder.py
+++ b/biothings/hub/databuild/builder.py
@@ -20,6 +20,15 @@
 
 from biothings import config as btconfig
 from biothings.hub import BUILDER_CATEGORY, UPLOADER_CATEGORY
+from biothings.hub.databuild.backend import (
+    LinkTargetDocMongoBackend,
+    SourceDocMongoBackend,
+    TargetDocMongoBackend,
+    create_backend,
+)
+from biothings.hub.databuild.buildconfig import AutoBuildConfig
+from biothings.hub.databuild.mapper import TransparentMapper
+from biothings.hub.dataload.uploader import ResourceNotReady
 from biothings.hub.manager import BaseManager
 from biothings.utils import mongo
 from biothings.utils.backend import DocMongoBackend
@@ -36,24 +45,14 @@
     get_source_fullname,
     get_src_build,
     get_src_build_config,
+    get_src_db,
     get_src_dump,
     get_src_master,
-    get_src_db,
 )
 from biothings.utils.loggers import get_logger
 from biothings.utils.manager import JobManager
 from biothings.utils.mongo import doc_feeder, id_feeder
 
-from biothings.hub.databuild.backend import (
-    LinkTargetDocMongoBackend,
-    SourceDocMongoBackend,
-    TargetDocMongoBackend,
-    create_backend,
-)
-from biothings.hub.databuild.buildconfig import AutoBuildConfig
-from biothings.hub.databuild.mapper import TransparentMapper
-from biothings.hub.dataload.uploader import ResourceNotReady
-
 logging = btconfig.logger
 
 
@@ -1618,7 +1617,6 @@ def build_info(
         only_archived=True will return archived merges only
         status: will return only successful/failed builds. Can be "success" or "failed"
         """
-        res = {}
         q = self.get_query_for_list_merge(only_archived=only_archived, status=status)
         if id is not None:
             q = {"_id": id}
@@ -1640,6 +1638,12 @@ def build_info(
             b["status"] = "unknown"
             if jobs:
                 b["status"] = jobs[-1]["status"]
+            stored_total = b.get("_meta", {}).get("stats", {}).get("total")
+            if stored_total is not None:
+                b["count"] = stored_total
+                continue
+
+            # Fallback for older build docs missing _meta.stats.total.
             try:
                 backend = create_backend(b["backend_url"])
                 b["count"] = backend.count()
diff --git a/biothings/hub/databuild/syncer.py b/biothings/hub/databuild/syncer.py
index 2935e411b..916e4ad17 100644
--- a/biothings/hub/databuild/syncer.py
+++ b/biothings/hub/databuild/syncer.py
@@ -202,7 +202,7 @@ async def sync_cols(
             if diff_mapping_file:
                 # old_db_col_names is actually the index name in that case
                 index_name = old_db_col_names[1]
-                doc_type = self._meta["build_config"]["doc_type"]
+                # doc_type = self._meta["build_config"]["doc_type"]     # remove doc_type, delete the line above after confirmed
                 indexer = create_backend(old_db_col_names).target_esidxer
                 pinfo["step"] = "mapping"
                 pinfo["description"] = diff_mapping_file
@@ -212,7 +212,8 @@ def update_mapping():
                     ops = loadobj(diffm)
                     mapping = indexer.get_mapping()
                     # we should have the same doc type declared in the mapping
-                    mapping[doc_type]["properties"] = jsonpatch.apply_patch(mapping[doc_type]["properties"], ops)
+                    # mapping[doc_type]["properties"] = jsonpatch.apply_patch(mapping[doc_type]["properties"], ops)
+                    mapping["properties"] = jsonpatch.apply_patch(mapping["properties"], ops)   # remove doc_type, delete the line above after confirmed
                     res = indexer.update_mapping(mapping)
                     return res
 
@@ -311,7 +312,7 @@ def synced(f):
         if "meta" in steps and self.target_backend_type == "es":
             # old_db_col_names is actually the index name in that case
             index_name = old_db_col_names[1]
-            doc_type = self._meta["build_config"]["doc_type"]
+            # doc_type = self._meta["build_config"]["doc_type"]     # remove doc_type, delete this line after confirmed
             indexer = create_backend(old_db_col_names).target_esidxer
             new_meta = self._meta["_meta"]
             pinfo["step"] = "metadata"
@@ -619,13 +620,13 @@ def sync_es_jsondiff_worker(
         # (not allowed within an ES document (_source))
         [d.pop("_timestamp", None) for d in docs]
         try:
-            res["added"] += indexer.index_bulk(docs, batch_size, action="create")[0]
+            res["added"] += indexer.index_bulk(docs, batch_size, op_type="create")[0]
         except BulkIndexError:
             for doc in docs:
                 _id = doc.pop("_id")
                 try:
                     # force action=create to spot docs already added
-                    indexer.index(doc, _id, action="create")
+                    indexer.index(doc, _id, op_type="create")
                     res["added"] += 1
                 except ConflictError:
                     # already added
diff --git a/biothings/hub/dataindex/indexer_task.py b/biothings/hub/dataindex/indexer_task.py
index d9f4a25a4..cfcfb52a4 100644
--- a/biothings/hub/dataindex/indexer_task.py
+++ b/biothings/hub/dataindex/indexer_task.py
@@ -9,6 +9,7 @@
 
 from biothings.utils.es import ESIndex as BaseESIndex
 from biothings.utils.loggers import get_logger
+from biothings.utils.serializer import to_json
 
 try:
     from biothings.utils.mongo import doc_feeder
@@ -92,8 +93,12 @@ def _action(doc):
                 self.logger.error(error)
                 self.logger.error("Document ID %s failed: %s", document_id, reason)
 
-            self.logger.warning("Discovered errors during the bulk index task. Defaulting to 0 indexed documents")
-            return 0
+            serialized_errors = to_json(errors, indent=True)
+            message = (
+                f"Bulk indexing failed for index '{self.index_name}'. "
+                f"Elasticsearch responded with errors:\n{serialized_errors}"
+            )
+            raise helpers.BulkIndexError(message, errors) from e
 
     # NOTE
     # Why doesn't "mget", "mexists", "mindex" belong to the base class?
diff --git a/biothings/hub/dataindex/mongo_build_cleanup.py b/biothings/hub/dataindex/mongo_build_cleanup.py
new file mode 100644
index 000000000..71eb086f5
--- /dev/null
+++ b/biothings/hub/dataindex/mongo_build_cleanup.py
@@ -0,0 +1,182 @@
+from datetime import datetime
+from functools import partial
+
+from config import logger as logging
+
+from biothings import config as btconfig
+from biothings.hub.manager import BaseManager
+from biothings.utils import mongo
+from biothings.utils.hub_db import get_src_build
+
+
+class MongoBuildCleaner:
+    def __init__(self, job_manager):
+        self.job_manager = job_manager
+
+    def list_builds(self, build_config=None, build_name=None, year=None):
+        collection = get_src_build()
+
+        filters = {}
+        if build_config:
+            filters["build_config._id"] = build_config
+        if build_name:
+            filters["_id"] = build_name
+        if year:
+            year = int(year)
+            filters["started_at"] = {
+                "$gte": datetime(year, 1, 1),
+                "$lt": datetime(year + 1, 1, 1),
+            }
+
+        projection = {
+            "_id": 1,
+            "build_config": 1,
+            "started_at": 1,
+            "archived": 1,
+            "target_name": 1,
+        }
+        builds = list(collection.find(filters, projection).sort("started_at", -1))
+
+        grouped = {}
+        for build in builds:
+            group_name = build.get("build_config", {}).get("_id") or "N/A"
+            grouped.setdefault(group_name, []).append(build)
+
+        return [{"_id": key, "items": items} for key, items in grouped.items()]
+
+    async def delete_builds(self, build_ids):
+        if not build_ids:
+            return {
+                "deleted_count": 0,
+                "target_collections_deleted_count": 0,
+                "target_collections_deleted": [],
+            }
+
+        conn = mongo.get_hub_db_async_conn()
+        try:
+            src_build = mongo.get_src_build_async(conn)
+
+            build_docs = []
+            async for doc in src_build.find({"_id": {"$in": build_ids}}, {"_id": 1, "target_name": 1}):
+                build_docs.append(doc)
+
+            target_collection_candidates = set()
+            for doc in build_docs:
+                build_id = doc["_id"]
+                target_name = doc.get("target_name")
+                target_collection_candidates.add(build_id)
+                if target_name and target_name != build_id:
+                    target_collection_candidates.add(target_name)
+
+            target_collections_deleted = []
+            if target_collection_candidates:
+                target_db = conn[btconfig.DATA_TARGET_DATABASE]
+                existing_collections = await target_db.list_collection_names(
+                    filter={"name": {"$in": list(target_collection_candidates)}}
+                )
+
+                for collection_name in existing_collections:
+                    await target_db[collection_name].drop()
+                    target_collections_deleted.append(collection_name)
+
+            result = await src_build.delete_many({"_id": {"$in": build_ids}})
+            return {
+                "deleted_count": result.deleted_count,
+                "target_collections_deleted_count": len(target_collections_deleted),
+                "target_collections_deleted": sorted(target_collections_deleted),
+            }
+        finally:
+            await conn.close()
+
+    async def validate_builds(self):
+        """Validate that target collections exist for each build record.
+
+        Checks every build in src_build to see if its target collection still
+        exists in the target database.  Build records whose target collections
+        have been removed are deleted, keeping the database in sync with the
+        actual data.
+
+        Returns a dict with ``builds_removed`` (count) and ``builds_removed_names``.
+        """
+        logging.info("Starting validation of MongoDB builds...")
+        conn = mongo.get_hub_db_async_conn()
+        try:
+            src_build = mongo.get_src_build_async(conn)
+            target_db = conn[btconfig.DATA_TARGET_DATABASE]
+
+            existing_collections = set(await target_db.list_collection_names())
+
+            orphaned_ids = []
+            async for doc in src_build.find({}, {"_id": 1, "target_name": 1}):
+                build_id = doc["_id"]
+                target_name = doc.get("target_name") or build_id
+                if target_name not in existing_collections:
+                    orphaned_ids.append(build_id)
+
+            if orphaned_ids:
+                result = await src_build.delete_many({"_id": {"$in": orphaned_ids}})
+                deleted_count = result.deleted_count
+            else:
+                deleted_count = 0
+
+            logging.info(
+                "Build validation complete: removed %d orphaned build record(s)",
+                deleted_count,
+                extra={"notify": True},
+            )
+            return {
+                "builds_removed": deleted_count,
+                "builds_removed_names": sorted(orphaned_ids),
+            }
+        finally:
+            await conn.close()
+
+    def done(self, future):
+        try:
+            result = future.result()
+            logging.info(
+                "Deleted %d MongoDB builds and dropped %d target collections",
+                result.get("deleted_count", 0),
+                result.get("target_collections_deleted_count", 0),
+                extra={"notify": True},
+            )
+        except Exception as exc:
+            logging.exception("Failed to delete MongoDB builds: %s", exc, extra={"notify": True})
+
+    def validate_done(self, future):
+        try:
+            result = future.result()
+            logging.info(
+                "Build validation complete: removed %d orphaned build record(s)",
+                result.get("builds_removed", 0),
+                extra={"notify": True},
+            )
+        except Exception as exc:
+            logging.exception("Failed to validate MongoDB builds: %s", exc, extra={"notify": True})
+
+
+class MongoBuildCleanupManager(BaseManager):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.cleaner = MongoBuildCleaner(self.job_manager)
+
+    def list_mongo_builds(self, build_config=None, build_name=None, year=None):
+        return self.cleaner.list_builds(build_config=build_config, build_name=build_name, year=year)
+
+    def delete_mongo_builds(self, build_ids):
+        try:
+            job = self.job_manager.submit(partial(self.cleaner.delete_builds, build_ids))
+            job.add_done_callback(self.cleaner.done)
+        except Exception as ex:
+            logging.exception("Error while submitting MongoDB build deletion job: %s", ex, extra={"notify": True})
+            raise
+        return job
+
+    def validate_mongo_builds(self):
+        try:
+            job = self.job_manager.submit(partial(self.cleaner.validate_builds))
+            job.add_done_callback(self.cleaner.validate_done)
+        except Exception as ex:
+            logging.exception("Error while submitting MongoDB build validation job: %s", ex, extra={"notify": True})
+            raise
+        return job
diff --git a/biothings/hub/dataindex/snapshooter.py b/biothings/hub/dataindex/snapshooter.py
index 153c87659..7f62cfeb2 100644
--- a/biothings/hub/dataindex/snapshooter.py
+++ b/biothings/hub/dataindex/snapshooter.py
@@ -8,9 +8,8 @@
 from functools import partial
 
 import boto3
-from config import logger as logging
 from elasticsearch import Elasticsearch
-from elasticsearch.exceptions import TransportError, NotFoundError
+from elasticsearch.exceptions import NotFoundError, TransportError
 
 from biothings import config as btconfig
 from biothings.hub import SNAPSHOOTER_CATEGORY
@@ -22,6 +21,7 @@
 from biothings.utils.hub import template_out
 from biothings.utils.hub_db import get_src_build
 from biothings.utils.loggers import get_logger
+from config import logger as logging
 
 from . import snapshot_cleanup as cleaner, snapshot_registrar as registrar
 from .snapshot_repo import Repository
diff --git a/biothings/hub/dataload/dumper.py b/biothings/hub/dataload/dumper.py
index 44476fedf..b4e44c785 100644
--- a/biothings/hub/dataload/dumper.py
+++ b/biothings/hub/dataload/dumper.py
@@ -20,6 +20,8 @@
 from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union
 from urllib import parse as urlparse
 
+from biothings.utils import serializer
+
 try:
     import docker
     from docker.errors import ImageNotFound, NotFound, NullResource
@@ -28,7 +30,6 @@
 except ImportError:
     docker_avail = False
 
-import orjson
 import requests
 
 from biothings import config as btconfig
@@ -873,9 +874,9 @@ class HTTPDumper(BaseDumper):
     Dumper using HTTP protocol and "requests" library
     """
 
-    VERIFY_CERT = True
-    IGNORE_HTTP_CODE = []  # list of HTTP code to ignore in case on non-200 response
-    RESOLVE_FILENAME = False  # global trigger to get filenames from headers
+    VERIFY_CERT: bool = True
+    IGNORE_HTTP_CODE: List[int] = []  # list of HTTP code to ignore in case on non-200 response
+    RESOLVE_FILENAME: bool = False  # global trigger to get filenames from headers
 
     def prepare_client(self) -> None:
         self.client = requests.Session()
@@ -1883,7 +1884,7 @@ def _run_api_and_store_to_disk(
     try:
         for filename, obj in fn():
             fn_byte_arr = buffer.setdefault(filename, bytearray())
-            fn_byte_arr.extend(orjson.dumps(obj) + b"\n")
+            fn_byte_arr.extend(serializer.to_json(obj, return_bytes=True) + b"\n")
             if len(fn_byte_arr) >= buffer_size:
                 with open(f"{filename}.{pid}", "ab") as f:
                     f.write(fn_byte_arr)
diff --git a/biothings/hub/dataplugin/loaders/dumper.py.tpl b/biothings/hub/dataplugin/loaders/dumper.py.tpl
index 3e714e81f..1e7a02521 100644
--- a/biothings/hub/dataplugin/loaders/dumper.py.tpl
+++ b/biothings/hub/dataplugin/loaders/dumper.py.tpl
@@ -6,6 +6,7 @@ from config import DATA_ARCHIVE_ROOT
 
 from biothings.utils.common import uncompressall
 
+import $PLUGIN_MODULE
 
 import biothings.hub.dataload.dumper
 
diff --git a/biothings/hub/dataplugin/loaders/loader.py b/biothings/hub/dataplugin/loaders/loader.py
index 55dd7d89d..ef36355c6 100644
--- a/biothings/hub/dataplugin/loaders/loader.py
+++ b/biothings/hub/dataplugin/loaders/loader.py
@@ -201,10 +201,10 @@ def get_code_for_mod_name(self, plugin_directory: Union[str, pathlib.Path], mod_
         """
         try:
             module, funcname = map(str.strip, mod_name.split(":"))
-        except ValueError:
+        except ValueError as exc:
             raise LoaderException(
                 "Invalid format for module '%s', it must be use the following format 'module:func'", mod_name
-            )
+            ) from exc
 
         plugin_directory = pathlib.Path(plugin_directory).resolve().absolute()
         module_file = plugin_directory.joinpath(module).with_suffix(".py")
@@ -271,6 +271,8 @@ def get_dumper_dynamic_class(
                 dumper_class = self.dumper_registry.get(scheme)
                 dumper_configuration["BASE_CLASSES"] = "biothings.hub.dataload.dumper.%s" % dumper_class.__name__
 
+            dumper_configuration["PLUGIN_MODULE"] = dumper_configuration["BASE_CLASSES"].split(".")[0]
+
             if not dumper_class:
                 raise LoaderException("No dumper class registered to handle scheme '%s'", scheme)
 
@@ -282,18 +284,12 @@ def get_dumper_dynamic_class(
             if dumper_section.get("release"):
                 indentfunc, func = self.get_code_for_mod_name(plugin_directory, dumper_section["release"])
                 assert func != "set_release", "'set_release' is a reserved method name, pick another name"
-                dumper_configuration["SET_RELEASE_FUNC"] = (
-                    """
-%s
+                dumper_configuration["SET_RELEASE_FUNC"] = f"""
+{indentfunc}
 
     def set_release(self):
-        self.release = self.%s()
+        self.release = self.{func}()
 """
-                    % (
-                        indentfunc,
-                        func,
-                    )
-                )
 
             else:
                 dumper_configuration["SET_RELEASE_FUNC"] = ""
@@ -352,20 +348,17 @@ def get_uploader_dynamic_class(
                 if uploader_section.get("parser_kwargs"):
                     parser_kwargs_serialized = repr(uploader_section["parser_kwargs"])
 
-                    confdict["PARSER_FACTORY_CODE"] = textwrap.dedent(
-                        f"""
+                    confdict["PARSER_FACTORY_CODE"] = textwrap.dedent(f"""
                         # Setup parser to parser factory
                         from {mod} import {func} as parser_func
 
                         parser_kwargs = {parser_kwargs_serialized}
-                    """
-                    )
+                    """)
                 else:
                     # create empty parser_kwargs to pass to parser_func
                     parser_kwargs_serialized = repr({})
 
-                    confdict["PARSER_FACTORY_CODE"] = textwrap.dedent(
-                        f"""
+                    confdict["PARSER_FACTORY_CODE"] = textwrap.dedent(f"""
                     # when code is exported, import becomes relative
                     try:
                         from {self.plugin_path_name}.{mod} import {func} as parser_func
@@ -381,8 +374,7 @@ def get_uploader_dynamic_class(
                             importlib.reload({mod})
                             from {mod} import {func} as parser_func
                     parser_kwargs = {parser_kwargs_serialized}
-                    """
-                    )
+                    """)
             except ValueError as value_error:
                 loader_error_message = (
                     f"`parser` must be defined as `module:parser_func` but got: `{uploader_section['parser']}`"
@@ -426,38 +418,35 @@ def get_uploader_dynamic_class(
                     assert func != "jobs", "'jobs' is a reserved method name, pick another name"
                     confdict["BASE_CLASSES"] = "biothings.hub.dataload.uploader.ParallelizedSourceUploader"
                     confdict["IMPORT_FROM_PARALLELIZER"] = ""
-                    confdict["JOBS_FUNC"] = (
-                        """
-%s
+                    confdict["JOBS_FUNC"] = f"""
+{indentfunc}
     def jobs(self):
-        return self.%s()
+        return self.{func}()
 """
-                        % (
-                            indentfunc,
-                            func,
-                        )
-                    )
                 else:
-                    confdict["BASE_CLASSES"] = "biothings.hub.dataload.uploader.BaseSourceUploader"
+                    # use specified custom class
+                    klass = uploader_section.get("class")
+                    if klass:
+                        get_class_from_classpath(klass)
+                        confdict["BASE_CLASSES"] = klass
+                    else:
+                        confdict["BASE_CLASSES"] = "biothings.hub.dataload.uploader.BaseSourceUploader"
+
+                    confdict["PLUGIN_MODULE"] = confdict["BASE_CLASSES"].split(".")[0]
+
                     confdict["JOBS_FUNC"] = ""
 
                 if uploader_section.get("mapping"):
                     indentfunc, func = self.get_code_for_mod_name(plugin_directory, uploader_section["mapping"])
                     assert func != "get_mapping", "'get_mapping' is a reserved class method name, pick another name"
-                    confdict["MAPPING_FUNC"] = (
-                        """
+                    confdict["MAPPING_FUNC"] = f"""
     @classmethod
-%s
+{indentfunc}
 
     @classmethod
     def get_mapping(cls):
-        return cls.%s()
+        return cls.{func}()
 """
-                        % (
-                            indentfunc,
-                            func,
-                        )
-                    )
                 else:
                     confdict["MAPPING_FUNC"] = ""
 
@@ -587,8 +576,7 @@ def can_load_plugin(self) -> bool:
         if df.exists():
             data_folder_files = {file.name for file in df.iterdir()}
             return "__init__.py" in data_folder_files
-        else:
-            return False
+        return False
 
     def load_plugin(self):
         plugin = self.get_plugin_obj()
diff --git a/biothings/hub/dataplugin/loaders/schema/manifest.json b/biothings/hub/dataplugin/loaders/schema/manifest.json
index 950dc2d09..4fe9b3fe9 100644
--- a/biothings/hub/dataplugin/loaders/schema/manifest.json
+++ b/biothings/hub/dataplugin/loaders/schema/manifest.json
@@ -143,6 +143,10 @@
                 "disabled": {
                     "type": "boolean",
                     "description": "If true, then the dumper will not be run. This is useful for testing purposes or if you want to disable the dumper without removing it from the manifest"
+                },
+                "class": {
+                  "type": "string",
+                  "description": "Reference to a locally defined dumper class. Format: 'module:class_name'"
                 }
             },
             "required": [
@@ -264,7 +268,11 @@
                         "examples": [
                             "parallelizer:parallel_jobs"
                         ]
-                    }
+                    },
+                    "class": {
+                      "type": "string",
+                      "description": "Reference to a locally defined uploader class. Format: 'module:class_name'"
+                  }
                 },
                 "required": [
                     "name",
diff --git a/biothings/hub/dataplugin/loaders/subuploader.py.tpl b/biothings/hub/dataplugin/loaders/subuploader.py.tpl
index 1d7ef8a5b..11e090292 100644
--- a/biothings/hub/dataplugin/loaders/subuploader.py.tpl
+++ b/biothings/hub/dataplugin/loaders/subuploader.py.tpl
@@ -4,7 +4,7 @@ import biothings, config
 biothings.config_for_app(config)
 
 import biothings.hub.dataload.uploader
-
+import $PLUGIN_MODULE
 
 $PARSER_FACTORY_CODE
 
diff --git a/biothings/hub/dataplugin/loaders/uploader.py.tpl b/biothings/hub/dataplugin/loaders/uploader.py.tpl
index 8f621eff3..9e23ab5bc 100644
--- a/biothings/hub/dataplugin/loaders/uploader.py.tpl
+++ b/biothings/hub/dataplugin/loaders/uploader.py.tpl
@@ -5,6 +5,7 @@ biothings.config_for_app(config)
 
 import biothings.hub.dataload.uploader
 
+import $PLUGIN_MODULE
 
 $PARSER_FACTORY_CODE
 
diff --git a/biothings/hub/datatransform/datatransform_mdb.py b/biothings/hub/datatransform/datatransform_mdb.py
index 687ff715e..abd5b9d17 100644
--- a/biothings/hub/datatransform/datatransform_mdb.py
+++ b/biothings/hub/datatransform/datatransform_mdb.py
@@ -188,6 +188,11 @@ def __init__(self, graph, *args, **kwargs):
                               source document regardless as to weather it matches an
                               edge or not. (advanced usage)
         :type copy_from_doc: bool
+
+        Note: Prefixes can be defined at the node level using:
+              graph.add_node("chebi", prefix="CHEBI")
+              When an identifier is converted to a node with a prefix attribute,
+              the prefix will be automatically added to the _id.
         """
         if not isinstance(graph, nx.DiGraph):
             raise ValueError("key_lookup configuration error:  graph must be of type nx.DiGraph")
@@ -198,6 +203,29 @@ def __init__(self, graph, *args, **kwargs):
         super(DataTransformMDB, self).__init__(*args, **kwargs)
         self._precompute_paths()
 
+    def _apply_prefix(self, identifier, output_type):
+        """
+        Apply prefix to identifier based on output type.
+
+        Prefixes are defined as node attributes in the graph:
+        graph.add_node("chebi", prefix="CHEBI")
+
+        :param identifier: The identifier value to potentially prefix
+        :param output_type: The output type to check for prefix
+        :return: The identifier with prefix applied if configured
+        """
+        # Check if the node has a prefix attribute
+        if output_type in self.graph.nodes():
+            node_data = self.graph.nodes[output_type]
+            if 'prefix' in node_data:
+                prefix = node_data['prefix']
+                identifier_str = str(identifier)
+                # Only add prefix if it's not already there
+                if not identifier_str.startswith(prefix + ":"):
+                    return f"{prefix}:{identifier_str}"
+
+        return str(identifier)
+
     def _valid_input_type(self, input_type):
         return input_type.lower() in self.graph.nodes()
 
@@ -292,7 +320,7 @@ def key_lookup_batch(self, batchiter):
                         (hit_lst, miss_lst) = self.travel(input_type, output_type, miss_lst)
                     # or if copy is allowed, we get the value from the doc
                     elif self.copy_from_doc:
-                        (hit_lst, miss_lst) = self._copy(input_type, miss_lst)
+                        (hit_lst, miss_lst) = self._copy(input_type, output_type, miss_lst)
                 else:
                     (hit_lst, miss_lst) = self.travel(input_type, output_type, miss_lst)
 
@@ -305,15 +333,15 @@ def key_lookup_batch(self, batchiter):
             for doc in miss_lst:
                 yield doc
 
-    def _copy(self, input_type, doc_lst):
+    def _copy(self, input_type, output_type, doc_lst):
         """Copy ids in the case where input_type == output_type"""
         hit_lst = []
         miss_lst = []
         for doc in doc_lst:
             val = nested_lookup(doc, input_type[1])
             if val:
-                # ensure _id is always a str
-                doc["_id"] = str(val)
+                # ensure _id is always a str and apply prefix if configured
+                doc["_id"] = self._apply_prefix(val, output_type)
                 hit_lst.append(doc)
                 # retain debug information if available (assumed dt_debug already in place)
                 if self.debug:
@@ -371,8 +399,8 @@ def _build_hit_miss_lsts(doc_lst, id_strct, debug):
                 value = nested_lookup(doc, input_type[1])
                 for lookup_id in id_strct.find_left(value):
                     new_doc = copy.deepcopy(doc)
-                    # ensure _id is always a str
-                    new_doc["_id"] = str(lookup_id)
+                    # ensure _id is always a str and apply prefix if configured
+                    new_doc["_id"] = self._apply_prefix(lookup_id, target)
                     # capture debug information
                     if debug:
                         new_doc["dt_debug"]["start_field"] = input_type[1]
diff --git a/biothings/hub/default_config.py b/biothings/hub/default_config.py
index 4a5084dd2..e7c7f3f5c 100644
--- a/biothings/hub/default_config.py
+++ b/biothings/hub/default_config.py
@@ -83,8 +83,15 @@
 import biothings.utils.jsondiff
 
 # set_default_folder is needed for evaluating some default values below
-from biothings.utils.configuration import set_default_folder  # pylint: disable=unused-import      # noqa
-from biothings.utils.configuration import ConfigurationDefault, ConfigurationError, ConfigurationValue
+from biothings.utils.configuration import (
+    ConfigurationDefault,
+    ConfigurationError,
+    ConfigurationValue,
+    set_default_folder,
+)
+
+# ConfigurationValue evaluates these symbols dynamically from this module's namespace.
+_CONFIGURATION_EVAL_SYMBOLS = (logging, set_default_folder)
 
 # * 1. General *#
 # Hub name/icon url/version, for display purpose
@@ -508,11 +515,21 @@
 ####################################################
 
 # for running tests locally in our biothings hub with testing api
-APITEST_PATH = ConfigurationError("Define path to folder which will contain pytests")
-
+APITEST_ROOT = ConfigurationDefault(
+    default="./tests",
+    desc="Define the root path to a folder that contains API tests",
+)
+APITEST_PATH = ConfigurationDefault(
+    default="",
+    desc="Define the path to a sub-folder of `APITEST_ROOT` that contains API tests",
+)
+APITEST_CONFIG_ROOT = ConfigurationDefault(
+    default=".",
+    desc="Define the root path containing the config_web to run a dev API for testing",
+)
 APITEST_CONFIG = ConfigurationDefault(
-    default=ConfigurationValue("""'config_web_local'"""),
-    desc="Provide a default hub logger instance (use setup_default_log(name,log_folder)",
+    default="config_web_local",
+    desc="Define the name of the config_web file to run a dev API for testing",
 )
 
 
diff --git a/biothings/utils/backend.py b/biothings/utils/backend.py
index 100874311..11a4d0e67 100644
--- a/biothings/utils/backend.py
+++ b/biothings/utils/backend.py
@@ -216,7 +216,7 @@ def count_from_ids(self, ids, step=100000):
         """
         total_cnt = 0
         for i in range(0, len(ids), step):
-            _ids = ids[i : i + step]
+            _ids = ids[i:i + step]
             _cnt = self.target_collection.count_documents({"_id": {"$in": _ids}})
             total_cnt += _cnt
         return total_cnt
@@ -230,7 +230,7 @@ def finalize(self):
     def remove_from_ids(self, ids, step=10000):
         deleted = 0
         for i in range(0, len(ids), step):
-            res = self.target_collection.delete_many({"_id": {"$in": ids[i : i + step]}})
+            res = self.target_collection.delete_many({"_id": {"$in": ids[i:i + step]}})
             deleted += res.deleted_count
         return deleted
 
@@ -244,7 +244,7 @@ class DocESBackend(DocBackendBase):
 
     def __init__(self, esidxer=None):
         """esidxer is an instance of utils.es.ESIndexer class."""
-        if type(esidxer) == partial:
+        if isinstance(esidxer, partial):
             self._target_esidxer_provider = esidxer
             self._target_esidxer = None
         else:
@@ -253,7 +253,7 @@ def __init__(self, esidxer=None):
 
     @property
     def target_esidxer(self):
-        if not self._target_esidxer:
+        if not self._target_esidxer and self._target_esidxer_provider:
             self._target_esidxer = self._target_esidxer_provider()
         return self._target_esidxer
 
@@ -339,14 +339,15 @@ def query(self, query=None, verbose=False, step=10000, scroll="10m", only_source
     def create_from_options(cls, options):
         """Function that recreates itself from a DocBackendOptions class.  Probably a needless
         rewrite of __init__..."""
-        if not options.es_index or not options.es_host or not options.es_doc_type:
+        if not options.es_index or not options.es_host:
             raise Exception(
-                "Cannot create backend class from options, ensure that es_index, es_host, and es_doc_type are set"
+                "Cannot create backend class from options, ensure that es_index, es_host are set"
             )
-        return cls(ESIndexer(index=options.es_index, doc_type=options.es_doc_type, es_host=options.es_host))
+        return cls(ESIndexer(index=options.es_index, es_host=options.es_host))
 
 
 class DocBackendOptions(object):
+    # Deprecated, not used anywhere
     def __init__(
         self, cls, es_index=None, es_host=None, es_doc_type=None, mongo_target_db=None, mongo_target_collection=None
     ):
diff --git a/biothings/utils/common.py b/biothings/utils/common.py
index ec5577908..fe31a49ea 100644
--- a/biothings/utils/common.py
+++ b/biothings/utils/common.py
@@ -19,14 +19,17 @@
 import os.path
 import pickle
 import random
+import shutil
 import string
 import sys
+import tarfile
+import tempfile
 import time
 import types
 import urllib.parse
 import warnings
 from collections import UserDict, UserList
-from contextlib import contextmanager
+from contextlib import closing, contextmanager
 from datetime import date, datetime, timezone
 from functools import partial
 from itertools import islice
@@ -35,6 +38,11 @@
 import requests
 import yaml
 
+try:
+    import zstandard as zstd
+except ImportError:
+    zstd = None
+
 # from json serial, catching special type
 # import _sre     # TODO: unused import;remove it once confirmed
 
@@ -160,7 +168,7 @@ def safewfile(filename, prompt=True, default="C", mode="w"):
 
 def anyfile(infile, mode="r"):
     """
-    return a file handler with the support for gzip/zip comppressed files.
+    return a file handler with the support for gzip/zip compressed files.
     if infile is a two value tuple, then first one is the compressed file;
     the second one is the actual filename in the compressed file.
     e.g., ('a.zip', 'aa.txt')
@@ -171,6 +179,58 @@ def anyfile(infile, mode="r"):
     else:
         rawfile = os.path.splitext(infile)[0]
     filetype = os.path.splitext(infile)[1].lower()
+
+    # check if lower version zst handling is needed
+    lower_version_zst = False
+    if sys.version_info < (3, 14) and filetype == ".zst":
+        if zstd is None:
+            raise ImportError("zstandard is required to open .zst files on Python versions below 3.14")
+        lower_version_zst = True
+
+    # tarfile handling. works for zst in Python >= 3.14
+    if lower_version_zst or tarfile.is_tarfile(infile):
+        if lower_version_zst:
+            with open(infile, "rb") as compressed_file:
+                dctx = zstd.ZstdDecompressor()
+                with closing(dctx.stream_reader(compressed_file)) as reader:
+                    with tarfile.open(fileobj=reader, mode="r|") as tar_file:
+                        for member in tar_file:
+                            if member.name == rawfile:
+                                extracted = tar_file.extractfile(member)
+                                break
+                        else:
+                            extracted = None
+
+                        # Keep the returned file readable after closing the tar and zst streams.
+                        if extracted is not None:
+                            with extracted:
+                                spooled_file = tempfile.SpooledTemporaryFile(  # pylint: disable=consider-using-with
+                                    max_size=1024 * 1024
+                                )
+                                shutil.copyfileobj(extracted, spooled_file)
+                            spooled_file.seek(0)
+
+            # extracted member is not a regular file or link
+            if extracted is None:
+                raise ValueError("invalid target file: must be a regular file or a link")
+
+            return spooled_file
+
+        tar_file = tarfile.open(infile, mode)  # pylint: disable=consider-using-with
+        try:
+            extracted = tar_file.extractfile(rawfile)
+        except KeyError as exc:
+            # provided rawfile does not appear in the tarball
+            tar_file.close()
+            raise FileNotFoundError("target member does not contain the provided tar file.") from exc
+
+        # extracted member is not a regular file or link
+        if extracted is None:
+            tar_file.close()
+            raise ValueError("invalid target file: must be a regular file or a link")
+
+        return io.TextIOWrapper(extracted)
+
     if filetype == ".gz":
         # import gzip
         in_f = io.TextIOWrapper(gzip.GzipFile(infile, mode))
@@ -747,7 +807,7 @@ def sanitize_tarfile(tar_object, directory):
         abs_target = os.path.abspath(target)
         prefix = os.path.commonprefix([abs_directory, abs_target])
         if not prefix == abs_directory:
-            raise Exception("Attempted Path Traversal in Tar File")
+            raise ValueError("Attempted Path Traversal in Tar File")
 
 
 def sizeof_fmt(num, suffix="B"):
diff --git a/biothings/utils/dataload.py b/biothings/utils/dataload.py
index 8a733b440..41c416db3 100644
--- a/biothings/utils/dataload.py
+++ b/biothings/utils/dataload.py
@@ -9,6 +9,7 @@
 # from __future__ import unicode_literals
 import itertools
 import json
+import math
 import os
 import os.path
 from collections import Counter, OrderedDict
@@ -21,9 +22,57 @@
 csv.field_size_limit(10000000)  # default is 131072, too small for some big files
 
 
+def _missing_value_kind(val):
+    """Return a stable kind for NaN-like values without importing optional deps."""
+    val_cls = val.__class__
+    cls_module = getattr(val_cls, "__module__", "")
+    cls_name = getattr(val_cls, "__name__", "")
+
+    if (cls_module == "pandas" or cls_module.startswith("pandas.")) and cls_name in ("NAType", "NaTType"):
+        return cls_name
+
+    try:
+        if math.isnan(val):
+            return "NaN"
+    except (TypeError, ValueError):
+        pass
+
+    return None
+
+
+def _val_to_delete(val, vals):
+    """Return True if val is considered as a value to delete, False otherwise.
+
+    NaN-like values (float NaN, pandas.NA, pandas.NaT) are only removed when
+    explicitly included in the vals list.
+    """
+    if is_str(vals):
+        vals = [vals]
+
+    val_missing_kind = _missing_value_kind(val)
+
+    for candidate in vals:
+        candidate_missing_kind = _missing_value_kind(candidate)
+        if val_missing_kind or candidate_missing_kind:
+            if val_missing_kind == candidate_missing_kind:
+                return True
+            continue
+
+        try:
+            if val == candidate:
+                return True
+        except (TypeError, ValueError):
+            continue
+
+    return False
+
+
 def dict_sweep(d, vals=None, remove_invalid_list=False):
     """
-    Remove keys whose values are ".", "-", "", "NA", "none", " "; and remove empty dictionaries
+    Remove keys whose values are ".", "-", "", "NA", "none", " "; and remove empty dictionaries.
+
+    NaN-like values (float NaN, pandas.NA, pandas.NaT) are only removed when
+    explicitly included in the ``vals`` list.
 
     Args:
         d (dict): a dictionary
@@ -45,11 +94,11 @@ def dict_sweep(d, vals=None, remove_invalid_list=False):
     # set default supported vals for empty values
     vals = vals or [".", "-", "", "NA", "none", " ", "Not Available", "unknown"]
     for key, val in list(d.items()):
-        if val in vals:
+        if _val_to_delete(val, vals):
             del d[key]
         elif isinstance(val, list):
             if remove_invalid_list:
-                val = [v for v in val if v not in vals]
+                val = [v for v in val if not _val_to_delete(v, vals)]
                 for item in val:
                     if isinstance(item, dict):
                         dict_sweep(item, vals, remove_invalid_list=remove_invalid_list)
@@ -59,12 +108,14 @@ def dict_sweep(d, vals=None, remove_invalid_list=False):
                 else:
                     d[key] = val
             else:
-                for item in val:
-                    if item in vals:
-                        val.remove(item)
+                i = 0
+                while i < len(val):
+                    item = val[i]
+                    if _val_to_delete(item, vals):
+                        del val[i]
                     elif isinstance(item, dict):
                         dict_sweep(item, vals, remove_invalid_list=remove_invalid_list)
-                # if len(val) == 0:
+                    i += 1
                 if not val:
                     del d[key]
         elif isinstance(val, dict):
diff --git a/biothings/utils/dotfield.py b/biothings/utils/dotfield.py
index 27c0c541e..bcffcd61b 100644
--- a/biothings/utils/dotfield.py
+++ b/biothings/utils/dotfield.py
@@ -1,4 +1,4 @@
-import orjson
+from biothings.utils import serializer
 
 
 def make_object(attr, value):
@@ -21,10 +21,9 @@ def make_object(attr, value):
     # s += "}" * (len(attr_list))
     # return json.loads(s)
 
-    # New implementation using orjson module
-    s += orjson.dumps(value).decode("utf-8")  # decoding is necessary because orjson dumps into bytes
+    s += serializer.to_json(value)
     s += "}" * (len(attr_list))
-    return orjson.loads(s)
+    return serializer.load_json(s)
 
 
 def merge_object(obj1, obj2):
diff --git a/biothings/utils/es.py b/biothings/utils/es.py
index 3cea7352d..cb7c78271 100644
--- a/biothings/utils/es.py
+++ b/biothings/utils/es.py
@@ -40,13 +40,11 @@ def verify_ids(
     doc_iter,
     es_host,
     index,
-    doc_type=None,
     step=100000,
 ):
     """verify how many docs from input interator/list overlapping with existing docs."""
 
     index = index
-    doc_type = doc_type
     es = get_es(es_host)
     q = {"query": {"ids": {"values": []}}}
     total_cnt = 0
@@ -56,7 +54,7 @@ def verify_ids(
         id_li = [doc["_id"] for doc in doc_batch]
         # id_li = [doc['_id'].replace('chr', '') for doc in doc_batch]
         q["query"]["ids"]["values"] = id_li
-        xres = es.search(index=index, doc_type=doc_type, body=q, _source=False)
+        xres = es.search(index=index, body=q, source=False)
         found_cnt += xres["hits"]["total"]
         total_cnt += len(id_li)
         out.extend([x["_id"] for x in xres["hits"]["hits"]])
@@ -64,7 +62,7 @@ def verify_ids(
 
 
 def get_es(es_host, timeout=120, max_retries=3, retry_on_timeout=False):
-    es = Elasticsearch(es_host, timeout=timeout, max_retries=max_retries, retry_on_timeout=retry_on_timeout)
+    es = Elasticsearch(es_host, request_timeout=timeout, max_retries=max_retries, retry_on_timeout=retry_on_timeout)
     return es
 
 
@@ -76,9 +74,7 @@ def wrapper(func):
     def outter_fn(*args, **kwargs):
         self = args[0]
         index = kwargs.pop("index", self._index)  # pylint: disable=protected-access
-        doc_type = kwargs.pop("doc_type", self._doc_type)  # pylint: disable=protected-access
         self._index = index  # pylint: disable=protected-access
-        self._doc_type = doc_type  # pylint: disable=protected-access
         return func(*args, **kwargs)
 
     outter_fn.__doc__ = func.__doc__
@@ -141,7 +137,6 @@ class ESIndexer:
     def __init__(
         self,
         index,
-        doc_type="_doc",
         es_host="http://localhost:9200",
         step=500,
         step_size=10,  # elasticsearch.helpers.bulk
@@ -150,6 +145,8 @@ def __init__(
         check_index=True,
         **kwargs,
     ):
+        # some old caller may still pass doc_type, we will ignore it here since it's no longer used.
+        kwargs.pop("doc_type", None)
         self.es_host = es_host
         self._es = get_es(es_host, **kwargs)
         self._host_major_ver = int(self._es.info()["version"]["number"].split(".")[0])
@@ -161,18 +158,6 @@ def __init__(
             # the real underlying index
             self.check_index()
 
-        self._doc_type = None
-        if doc_type:
-            self._doc_type = doc_type
-        else:
-            # assuming index exists, get mapping to discover doc_type
-            try:
-                m = self.get_mapping()
-                assert len(m) == 1, "Expected only one doc type, got: %s" % m.keys()
-                self._doc_type = list(m).pop()
-            except Exception as e:  # pylint: disable=broad-except
-                if check_index:
-                    logging.info("Failed to guess doc_type: %s", e)
         # set number_of_shards when create_index
         self.number_of_shards = number_of_shards
         # set number_of_replicas when create_index
@@ -204,7 +189,7 @@ def check_index(self):
 
     @wrapper
     def get_biothing(self, bid, only_source=False, **kwargs):
-        rawdoc = self._es.get(index=self._index, id=bid, doc_type=self._doc_type, **kwargs)
+        rawdoc = self._es.get(index=self._index, id=bid, **kwargs)
         if not only_source:
             return rawdoc
         else:
@@ -226,7 +211,6 @@ def mexists(self, bid_list):
         q = {"query": {"ids": {"values": bid_list}}}
         res = self._es.search(
             index=self._index,
-            doc_type=self._doc_type,
             body=q,
             stored_fields=None,
             size=len(bid_list),
@@ -240,7 +224,7 @@ def count(self, q=None, raw=False):
         try:
             count_kwargs = {"index": self._index}
             if q is not None:
-                count_kwargs.update({"doc_type": self._doc_type, "q": q})
+                count_kwargs.update({"q": q})
             _res = self._es.count(**count_kwargs)
             return _res if raw else _res["count"]
         except NotFoundError:
@@ -294,16 +278,15 @@ def exists_index(self, index: Optional[str] = None):
             index = self._index
         return self._es.indices.exists(index=index)
 
-    def index(self, doc, id=None, action="index"):  # pylint: disable=redefined-builtin
+    def index(self, doc, id=None, op_type="index"):  # pylint: disable=redefined-builtin
         """add a doc to the index. If id is not None, the existing doc will be
         updated.
         """
-        self._es.index(index=self._index, doc_type=self._doc_type, body=doc, id=id, params={"op_type": action})
+        self._es.index(index=self._index, body=doc, id=id, op_type=op_type)
 
-    def index_bulk(self, docs, step=None, action="index"):
+    def index_bulk(self, docs, step=None, op_type="index"):
         self._populate_es_version()
         index_name = self._index
-        doc_type = self._doc_type
         step = step or self.step
 
         def _get_bulk(doc):
@@ -312,12 +295,9 @@ def _get_bulk(doc):
             ndoc.update(
                 {
                     "_index": index_name,
-                    "_type": doc_type,
-                    "_op_type": action,
+                    "_op_type": op_type,
                 }
             )
-            if self._host_major_ver > 6:
-                ndoc.pop("_type")
             return ndoc
 
         actions = (_get_bulk(doc) for doc in docs)
@@ -329,19 +309,15 @@ def _get_bulk(doc):
 
     def delete_doc(self, id):  # pylint: disable=redefined-builtin
         """delete a doc from the index based on passed id."""
-        return self._es.delete(index=self._index, doc_type=self._doc_type, id=id)
+        return self._es.delete(index=self._index, id=id)
 
     def delete_docs(self, ids, step=None):
         """delete a list of docs in bulk."""
         index_name = self._index
-        doc_type = self._doc_type
         step = step or self.step
 
         def _get_bulk(_id):
-            if self._host_major_ver >= 7:
-                doc = {"_op_type": "delete", "_index": index_name, "_id": _id}
-            else:
-                doc = {"_op_type": "delete", "_index": index_name, "_type": doc_type, "_id": _id}
+            doc = {"_op_type": "delete", "_index": index_name, "_id": _id}
             return doc
 
         actions = (_get_bulk(_id) for _id in ids)
@@ -359,27 +335,17 @@ def update(self, id, extra_doc, upsert=True):  # pylint: disable=redefined-built
         body = {"doc": extra_doc}
         if upsert:
             body["doc_as_upsert"] = True
-        return self._es.update(index=self._index, doc_type=self._doc_type, id=id, body=body)
+        return self._es.update(index=self._index, id=id, body=body)
 
     def update_docs(self, partial_docs, upsert=True, step=None, **kwargs):
         """update a list of partial_docs in bulk.
         allow to set upsert=True, to insert new docs.
         """
         index_name = self._index
-        doc_type = self._doc_type
         step = step or self.step
 
         def _get_bulk(doc):
-            if self._host_major_ver >= 7:
-                doc = {"_op_type": "update", "_index": index_name, "_id": doc["_id"], "doc": doc}
-            else:
-                doc = {
-                    "_op_type": "update",
-                    "_index": index_name,
-                    "_type": doc_type,
-                    "_id": doc["_id"],
-                    "doc": doc,
-                }
+            doc = {"_op_type": "update", "_index": index_name, "_id": doc["_id"], "doc": doc}
             if upsert:
                 doc["doc_as_upsert"] = True
             return doc
@@ -387,31 +353,22 @@ def _get_bulk(doc):
         actions = (_get_bulk(doc) for doc in partial_docs)
         return helpers.bulk(self._es, actions, chunk_size=step, **kwargs)
 
-    def get_mapping(self):
+    def get_mapping(self, with_doc_type=False):
         """return the current index mapping"""
-        if self._host_major_ver <= 6:
-            m = self._es.indices.get_mapping(
-                index=self._index,
-                doc_type=self._doc_type,
-            )
-            return m[self._index]["mappings"]
-        elif self._host_major_ver <= 8:
+        if self._host_major_ver >= 7:
             m = self._es.indices.get_mapping(index=self._index)
-            # fake the mapping doc_type
-            m = {self._doc_type: m[self._index]["mappings"]}
-            return m
-        else:
-            raise RuntimeError(
-                f"Server Elasticsearch version is {self._host_major_ver} "
-                "which is unsupported when using old ESIndexer class"
-            )
+            if with_doc_type:
+                # use "_doc" as a fake doc_type to make it compatible with old behavior
+                # in case some caller expects a doc_type level key
+                return {"_doc": m[self._index]["mappings"]}
+            return m[self._index]["mappings"]
+        raise RuntimeError(
+            f"Server Elasticsearch version is {self._host_major_ver} "
+            "which is unsupported (must >=7) when using old ESIndexer class"
+        )
 
     def update_mapping(self, m):
-        if self._host_major_ver <= 6:
-            assert list(m) == [self._doc_type], "Bad mapping format, should have one doc_type, got: %s" % list(m)
-            assert "properties" in m[self._doc_type], "Bad mapping format, no 'properties' key"
-            return self._es.indices.put_mapping(index=self._index, doc_type=self._doc_type, body=m)
-        elif self._host_major_ver <= 8:
+        if self._host_major_ver >= 7:
             # this is basically guessing based on heuristics
             if len(m) == 1:
                 if "properties" not in m:  # basically {'_doc': mapping}
@@ -425,21 +382,13 @@ def update_mapping(self, m):
         else:
             raise RuntimeError(
                 f"Server Elasticsearch version is {self._host_major_ver} "
-                "which is unsupported when using old ESIndexer class"
+                "which is unsupported (must >=7) when using old ESIndexer class"
             )
 
     def get_mapping_meta(self):
         """return the current _meta field."""
         m = self.get_mapping()
-        doc_type = self._doc_type
-        if doc_type is None:
-            # fetch doc_type from mapping
-
-            assert len(m) == 1, (
-                "More than one doc_type found, not supported when self._doc_type " + "is not initialized"
-            )
-            doc_type = list(m.keys())[0]
-        return {"_meta": m[doc_type]["_meta"]}
+        return {"_meta": m["_meta"]}
 
     def update_mapping_meta(self, meta):
         allowed_keys = {"_meta", "_timestamp"}
@@ -450,9 +399,11 @@ def update_mapping_meta(self, meta):
                     index=self._index,
                     body=meta,
                 )
-            else:  # not sure if _type needs to be specified
-                body = {self._doc_type: meta}
-                return self._es.indices.put_mapping(doc_type=self._doc_type, body=body, index=self._index)
+            else:
+                raise RuntimeError(
+                    f"Server Elasticsearch version is {self._host_major_ver} "
+                    "which is unsupported (must >=7) when using old ESIndexer class"
+                )
         else:
             raise ValueError('Input "meta" should have and only have "_meta" field.')
 
@@ -531,10 +482,10 @@ def rate_control(cnt, t):
     def optimize(self, max_num_segments=1):
         """optimize the default index."""
         params = {
-            "wait_for_merge": False,
+            "wait_for_completion": False,
             "max_num_segments": max_num_segments,
         }
-        return self._es.indices.forcemerge(index=self._index, params=params)
+        return self._es.indices.forcemerge(index=self._index, **params)
 
     def clean_field(self, field, dryrun=True, step=5000):
         """remove a top-level field from ES index, if the field is the only field of the doc,
@@ -543,7 +494,7 @@ def clean_field(self, field, dryrun=True, step=5000):
         try first with dryrun turned on, and then perform the actual updates with dryrun off.
         """
         if self._host_major_ver >= 7:
-            raise RuntimeError("clean_field is no longer supported")
+            raise RuntimeError("clean_field is no longer supported")  # It may still work, but untested yet
         q = {"query": {"constant_score": {"filter": {"exists": {"field": field}}}}}
         cnt_orphan_doc = 0
         cnt = 0
@@ -552,10 +503,10 @@ def clean_field(self, field, dryrun=True, step=5000):
             if set(doc) == {"_id", field}:
                 cnt_orphan_doc += 1
                 # delete orphan doc
-                _li.append({"delete": {"_index": self._index, "_type": self._doc_type, "_id": doc["_id"]}})
+                _li.append({"delete": {"_index": self._index, "_id": doc["_id"]}})
             else:
                 # otherwise, just remove the field from the doc
-                _li.append({"update": {"_index": self._index, "_type": self._doc_type, "_id": doc["_id"]}})
+                _li.append({"update": {"_index": self._index, "_id": doc["_id"]}})
                 # this script update requires "script.disable_dynamic: false" setting
                 # in elasticsearch.yml
                 _li.append({"script": 'ctx._source.remove("{}")'.format(field)})
@@ -581,7 +532,6 @@ def doc_feeder_using_helper(self, step=None, verbose=True, query=None, scroll="1
             query=q,
             scroll=scroll,
             index=self._index,
-            doc_type=self._doc_type,
             **kwargs,
         ):
             if rawdoc.get("_source", False):
@@ -596,6 +546,8 @@ def doc_feeder(self, step=None, verbose=True, query=None, scroll="10m", only_sou
         step = step or self.step
         q = query if query else {"query": {"match_all": {}}}
         _q_cnt = self.count(q=q, raw=True)
+        if not _q_cnt:
+            return
         n = _q_cnt["count"]
         n_shards = _q_cnt["_shards"]["total"]
         assert n_shards == _q_cnt["_shards"]["successful"]
@@ -609,7 +561,6 @@ def doc_feeder(self, step=None, verbose=True, query=None, scroll="10m", only_sou
 
         res = self._es.search(
             index=self._index,
-            doc_type=self._doc_type,
             body=q,
             size=_size,
             search_type="scan",
@@ -656,10 +607,7 @@ def get_docs(self, ids, step=None, only_source=True, **mget_args):
         # chunkify
         step = step or self.step
         for chunk in iter_n(ids, step):
-            if self._host_major_ver > 6:
-                chunk_res = self._es.mget(body={"ids": chunk}, index=self._index, **mget_args)
-            else:
-                chunk_res = self._es.mget(body={"ids": chunk}, index=self._index, doc_type=self._doc_type, **mget_args)
+            chunk_res = self._es.mget(body={"ids": chunk}, index=self._index, **mget_args)
             for rawdoc in chunk_res["docs"]:
                 if ("found" not in rawdoc) or (("found" in rawdoc) and not rawdoc["found"]):
                     continue
@@ -677,9 +625,9 @@ def find_biggest_doc(self, fields_li, min=5, return_doc=False):  # pylint: disab
                 q = " AND ".join(["_exists_:" + field for field in field_set])
                 q = {"query": {"query_string": {"query": q}}}
                 cnt = self.count(q)
-                if cnt > 0:
+                if cnt and cnt > 0:
                     if return_doc:
-                        res = self._es.search(index=self._index, doc_type=self._doc_type, body=q, size=cnt)
+                        res = self._es.search(index=self._index, body=q, size=cnt)
                         return res
                     else:
                         return (cnt, q)
@@ -704,7 +652,7 @@ def snapshot(self, repo, snapshot, mode=None, **params):
                 # ok, nothing to delete/purge
                 pass
         try:
-            return self._es.snapshot.create(repository=repo, snapshot=snapshot, body=body, params=params)
+            return self._es.snapshot.create(repository=repo, snapshot=snapshot, body=body, **params)
         except RequestError as e:
             try:
                 err_msg = e.info["error"]["reason"]
@@ -950,7 +898,7 @@ def update_settings(self, settings, close=False, **params):
         self._es.indices.put_settings(
             body=settings,
             index=self._index,
-            params=params,
+            **params,
         )
 
         if close:
diff --git a/biothings/utils/manager.py b/biothings/utils/manager.py
index 929672e54..2c611a183 100644
--- a/biothings/utils/manager.py
+++ b/biothings/utils/manager.py
@@ -3,8 +3,10 @@
 import copy
 import datetime
 import glob
+import multiprocessing
 import os
 import re
+import sys
 import threading
 import time
 import types
@@ -166,6 +168,23 @@ class JobManager:
     HEADERLINE = "{pid:^10}|{source:^35}|{category:^10}|{step:^20}|{description:^30}|{mem:^10}|{cpu:^6}|{started_at:^20}|{duration:^10}"
     DATALINE = HEADERLINE.replace("^", "<")
 
+    def _get_process_executor(self):
+        kwargs = {}
+        if sys.version_info >= (3, 7):
+            # since Python 3.14, multiprocessing uses `forkserver` as the default, instead of 'fork'
+            # on POSIX systems. This breaks our current biothings JobManager when creating dynamic
+            # classes in worker processes (e.g. AssistedDumper_<src_name> class), as the 'forkserver'
+            # context does not inherit resources from the parent process.
+            # This is a quick fix to force using 'fork' context for ProcessPoolExecutor in 3.14,
+            # consistent with previous Python versions.
+            # REF: https://docs.python.org/3.14/library/multiprocessing.html#contexts-and-start-methods
+            # TODO: we should consider refactoring the code to be compatible with 'forkserver' context in the future.
+            try:
+                kwargs["mp_context"] = multiprocessing.get_context("fork")
+            except ValueError:
+                pass
+        return concurrent.futures.ProcessPoolExecutor(max_workers=self.num_workers, **kwargs)
+
     def __init__(
         self,
         loop,
@@ -185,7 +204,7 @@ def __init__(
             logger.debug("Adjusting number of worker to 1")
             self.num_workers = 1
         self.num_threads = num_threads or self.num_workers
-        self.process_queue = process_queue or concurrent.futures.ProcessPoolExecutor(max_workers=self.num_workers)
+        self.process_queue = process_queue or self._get_process_executor()
         # notes on fixing BPE (BrokenProcessPool Exception):
         # whenever a process exits unexpectedly, BPE is raised, and while that
         # all the processes in the pool gets a SIGTERM from the management
@@ -255,7 +274,7 @@ async def do():
                 if recycling:
                     # now replace
                     logger.info("Replacing process queue with new one")
-                    self.process_queue = concurrent.futures.ProcessPoolExecutor(max_workers=self.num_workers)
+                    self.process_queue = self._get_process_executor()
                 else:
                     self.process_queue = None
             except Exception as e:
@@ -469,7 +488,7 @@ async def run(future, job_id):
                 # we don't need to care about the remaining tasks because
                 # they'd all be SIGTERM'd anyways. But ...
                 logger.warning("Broken Process Pool: %s, restarting.", e)
-                self.process_queue = concurrent.futures.ProcessPoolExecutor(max_workers=self.num_workers)
+                self.process_queue = self._get_process_executor()
                 for stale_id in self._process_job_ids:
                     self.jobs.pop(stale_id, None)  # in the rare case that
                     # somehow they de-sync
diff --git a/biothings/utils/mongo.py b/biothings/utils/mongo.py
index 1a6c1f069..bc7d06fe5 100644
--- a/biothings/utils/mongo.py
+++ b/biothings/utils/mongo.py
@@ -10,7 +10,7 @@
 
 import bson
 import dateutil.parser as date_parser
-from pymongo import DESCENDING, MongoClient
+from pymongo import DESCENDING, AsyncMongoClient, MongoClient
 from pymongo.client_session import ClientSession
 from pymongo.collection import Collection as PymongoCollection
 from pymongo.database import Database as PymongoDatabase
@@ -155,6 +155,10 @@ def __getitem__(self, name):
         return Database(self, name)
 
 
+class AsyncDatabaseClient(AsyncMongoClient):
+    pass
+
+
 def requires_config(func):
     @wraps(func)
     def func_wrapper(*args, **kwargs):
@@ -192,6 +196,12 @@ def get_hub_db_conn():
     return conn
 
 
+@requires_config
+def get_hub_db_async_conn():
+    conn = AsyncDatabaseClient(config.HUB_DB_BACKEND["uri"])
+    return conn
+
+
 @requires_config
 def get_src_conn():
     return get_conn(config.DATA_SRC_SERVER, getattr(config, "DATA_SRC_PORT", 27017))
@@ -221,6 +231,12 @@ def get_src_build(conn=None):
     return conn[config.DATA_HUB_DB_DATABASE][config.DATA_SRC_BUILD_COLLECTION]
 
 
+@requires_config
+def get_src_build_async(conn=None):
+    conn = conn or get_hub_db_async_conn()
+    return conn[config.DATA_HUB_DB_DATABASE][config.DATA_SRC_BUILD_COLLECTION]
+
+
 @requires_config
 def get_src_build_config(conn=None):
     conn = conn or get_hub_db_conn()
diff --git a/biothings/utils/parsers.py b/biothings/utils/parsers.py
index 0f8a5a12d..273135da6 100644
--- a/biothings/utils/parsers.py
+++ b/biothings/utils/parsers.py
@@ -3,7 +3,7 @@
 from typing import Callable, Generator, Iterable, Optional
 from urllib.parse import parse_qsl, urlparse
 
-import orjson
+from biothings.utils import serializer
 
 
 def ndjson_parser(
@@ -31,7 +31,7 @@ def ndjson_parser_func(data_folder):
             for filename in work_dir.glob(pattern):
                 with open(filename, "rb") as f:
                     for line in f:
-                        doc = orjson.loads(line)
+                        doc = serializer.load_json(line)
                         yield doc
 
     return ndjson_parser_func
@@ -60,7 +60,7 @@ def json_array_parser(data_folder):
         for pattern in patterns:
             for filename in work_dir.glob(pattern):
                 with open(filename, "r") as f:
-                    data = orjson.loads(f.read())
+                    data = serializer.load_json(f.read())
                     try:
                         iterator = iter(data)
                     except TypeError:
diff --git a/biothings/utils/serializer.py b/biothings/utils/serializer.py
index a17733390..a15932ead 100644
--- a/biothings/utils/serializer.py
+++ b/biothings/utils/serializer.py
@@ -28,7 +28,7 @@ def orjson_default(o):
     raise TypeError(f"Type {type(o)} not serializable")
 
 
-def to_json(data, indent=False, sort_keys=False):
+def to_json(data, indent=False, sort_keys=False, return_bytes=False):
     # default option:
     #    OPT_NON_STR_KEYS: non string dictionary key, e.g. integer
     #    OPT_NAIVE_UTC: use UTC as the timezone when it's missing
@@ -37,7 +37,12 @@ def to_json(data, indent=False, sort_keys=False):
         option |= orjson.OPT_INDENT_2
     if sort_keys:
         option |= orjson.OPT_SORT_KEYS
-    return orjson.dumps(data, default=orjson_default, option=option).decode()
+
+    byte_dump = orjson.dumps(data, default=orjson_default, option=option)
+    if return_bytes:
+        return byte_dump
+
+    return byte_dump.decode()
 
 
 def to_json_file(data, fobj, indent=False, sort_keys=False):
diff --git a/biothings/web/analytics/channels.py b/biothings/web/analytics/channels.py
index ad0c97ab7..1c52e31bb 100644
--- a/biothings/web/analytics/channels.py
+++ b/biothings/web/analytics/channels.py
@@ -1,10 +1,11 @@
-import aiohttp
 import asyncio
-import certifi
 import logging
-import orjson
 import ssl
 
+import aiohttp
+import certifi
+
+from biothings.utils import serializer
 from biothings.web.analytics.events import Event, Message
 
 
@@ -34,30 +35,6 @@ async def send_request(self, session, url, event):
             pass
 
 
-class GAChannel(Channel):
-    def __init__(self, tracking_id, uid_version=1):
-        self.tracking_id = tracking_id
-        self.uid_version = uid_version
-        self.url = "http://www.google-analytics.com/batch"
-
-    async def handles(self, event):
-        return isinstance(event, Event)
-
-    async def send(self, event):
-        events = event.to_GA_payload(self.tracking_id, self.uid_version)
-        async with aiohttp.ClientSession() as session:
-            # The pagination of 20 is defined according to the context of the current application
-            # Usually, each client request is going to make just 1 request to the GA API.
-            # However, it's possible to collect data to GA in other parts of the application.
-            for i in range(0, len(events), 20):
-                data = "\n".join(events[i : i + 20])
-                await self.send_request(session, self.url, data)
-
-    async def send_request(self, session, url, data):
-        async with session.post(url, data=data) as _:
-            pass
-
-
 class GA4Channel(Channel):
     def __init__(self, measurement_id, api_secret, uid_version=1):
         self.measurement_id = measurement_id
@@ -81,7 +58,7 @@ async def send(self, event):
                     "user_id": str(event._cid(1)),
                     "events": events[i : i + 25],
                 }
-                await self.send_request(session, self.url, orjson.dumps(data))
+                await self.send_request(session, self.url, serializer.to_json(data, return_bytes=True))
 
     async def send_request(self, session, url, data):
         retries = 0
diff --git a/biothings/web/analytics/events.py b/biothings/web/analytics/events.py
index fb148bab9..79b58533b 100644
--- a/biothings/web/analytics/events.py
+++ b/biothings/web/analytics/events.py
@@ -1,15 +1,10 @@
 import hashlib
-
-# import smtplib
 import uuid
 from collections import UserDict
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
 from ipaddress import IPv4Address, IPv6Address, ip_address
 from pprint import pformat
 from random import randint
 from typing import Union
-from urllib.parse import urlencode
 
 
 class Event(UserDict):
@@ -74,41 +69,11 @@ def _cid_v2(self):
     def _cid(self, version):
         if version == 1:
             return self._cid_v1()
-        elif version == 2:
+        if version == 2:
             return self._cid_v2()
 
-        # this is a required GA field
         raise ValueError("CID Version.")
 
-    def to_GA_payload(self, tracking_id, cid_version=1):
-        # by default implements
-        # a GA PageView hit-type
-
-        # In the future, consider adding additional
-        # keys as cutomized dimensions or metrics.
-
-        payload = {
-            "v": 1,  # protocol version
-            "t": "pageview",
-            "tid": tracking_id,
-            "cid": self._cid(cid_version),
-            "uip": self.user_ip,
-            "dh": self.host,
-            "dp": self.path,
-        }
-
-        # add document referer
-        if isinstance(self.referer, str):
-            if len(self.referer) <= 2048:  # GA Limit
-                payload["dr"] = self.referer
-
-        # add user_agent
-        if self.user_agent:
-            payload["ua"] = self.user_agent
-
-        # this also escapes payload vals
-        return [urlencode(payload)]
-
     def to_GA4_payload(self, measurement_id, cid_version=1):
         # Document about page_view event: https://support.google.com/analytics/answer/9964640#pageviews&zippy=%2Cin-this-article
         # GA4 does not support [Document path as UA](https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#dp)
@@ -154,32 +119,6 @@ class GAEvent(Event):
     #   "value": "60"
     # }
 
-    def to_GA_payload(self, tracking_id, cid_version=1):
-        payloads = super().to_GA_payload(tracking_id, cid_version)
-        if self.get("category") and self.get("action"):
-            payloads.append(
-                urlencode(
-                    _clean(
-                        {
-                            "v": 1,  # protocol version
-                            "t": "event",
-                            "tid": tracking_id,
-                            "cid": self._cid(cid_version),
-                            "ec": self["category"],
-                            "ea": self["action"],
-                            "el": self.get("label", ""),
-                            "ev": self.get("value", ""),
-                        }
-                    )
-                )
-            )
-        for event in self.get("__secondary__", []):
-            event["__request__"] = self["__request__"]
-            payloads.extend(event.to_GA_payload(tracking_id, cid_version)[1:])
-            # ignore the first event (pageview)
-            # which is already generated once
-        return payloads
-
     def to_GA4_payload(self, measurement_id, cid_version=1):
         payloads = super().to_GA4_payload(measurement_id, cid_version)
         if self.get("category") and self.get("action"):
diff --git a/biothings/web/analytics/notifiers.py b/biothings/web/analytics/notifiers.py
index 16fa410e7..ec103477a 100644
--- a/biothings/web/analytics/notifiers.py
+++ b/biothings/web/analytics/notifiers.py
@@ -1,8 +1,9 @@
 import asyncio
-
 from collections import defaultdict
+
 from tornado.web import RequestHandler
-from biothings.web.analytics.channels import GA4Channel, GAChannel, SlackChannel
+
+from biothings.web.analytics.channels import GA4Channel, SlackChannel
 
 
 class Notifier:
@@ -11,13 +12,6 @@ def __init__(self, settings):
 
         if hasattr(settings, "SLACK_WEBHOOKS"):
             self.channels.append(SlackChannel(getattr(settings, "SLACK_WEBHOOKS")))
-        if getattr(settings, "GA_ACCOUNT", None):
-            self.channels.append(
-                GAChannel(
-                    getattr(settings, "GA_ACCOUNT"),
-                    getattr(settings, "GA_UID_GENERATOR_VERSION", 1),
-                )
-            )
         if getattr(settings, "GA4_MEASUREMENT_ID", None):
             self.channels.append(
                 GA4Channel(
diff --git a/biothings/web/connections.py b/biothings/web/connections.py
index 5738a83fd..f8103bd58 100644
--- a/biothings/web/connections.py
+++ b/biothings/web/connections.py
@@ -5,7 +5,6 @@
 from functools import partial
 
 import elasticsearch
-import elasticsearch_dsl
 import requests
 from tornado.ioloop import IOLoop
 
@@ -27,7 +26,10 @@
 
 def _log_pkg():
     es_ver = elasticsearch.__version__
-    es_dsl_ver = elasticsearch_dsl.__versionstr__
+
+    # since v8.18.0, the DSL is released as part of the main elasticsearch package.
+    # dsl version is therefore aligned with base es version
+    es_dsl_ver = es_ver
 
     logger.info("Elasticsearch Package Version: %s", ".".join(map(str, es_ver)))
     logger.info("Elasticsearch DSL Package Version: %s", ".".join(map(str, es_dsl_ver)))
diff --git a/biothings/web/handlers/base.py b/biothings/web/handlers/base.py
index 0d7617c28..965174734 100644
--- a/biothings/web/handlers/base.py
+++ b/biothings/web/handlers/base.py
@@ -20,7 +20,6 @@
 """
 import logging
 
-import orjson
 import yaml
 from tornado.web import HTTPError, RequestHandler
 
@@ -105,8 +104,8 @@ def _parse_json(self):
         if not self.request.body:
             return {}
         try:
-            return orjson.loads(self.request.body)
-        except orjson.JSONDecodeError:
+            return serializer.load_json(self.request.body)
+        except serializer.JSONDecodeError:
             raise HTTPError(400, reason="Invalid JSON body.")
 
     def _parse_yaml(self):
diff --git a/biothings/web/options/manager.py b/biothings/web/options/manager.py
index c886599b5..d71c03227 100644
--- a/biothings/web/options/manager.py
+++ b/biothings/web/options/manager.py
@@ -10,7 +10,7 @@
 from types import MappingProxyType
 
 import jmespath
-import orjson
+from biothings.utils import serializer
 
 try:
     from re import Pattern  # py>=3.7
@@ -250,8 +250,8 @@ def __init__(self, **kwargs):
     def convert_to(self, value, to_type):
         if self.jsoninput:
             try:  # attempt to load as json first
-                _value = orjson.loads(value)
-            except orjson.JSONDecodeError as exc:
+                _value = serializer.load_json(value)
+            except serializer.JSONDecodeError as exc:
                 logging.debug(repr(exc))
             else:  # no more conversions
                 if isinstance(_value, to_type):
diff --git a/biothings/web/query/builder.py b/biothings/web/query/builder.py
index 8fdcccb97..005efba3f 100644
--- a/biothings/web/query/builder.py
+++ b/biothings/web/query/builder.py
@@ -38,10 +38,10 @@ class implementations or not defined.
 import re
 from typing import Iterable, List, Set, Tuple, Union
 
-from elasticsearch_dsl import MultiSearch, Q, Search
-from elasticsearch_dsl.exceptions import IllegalOperation
-import orjson
+from elasticsearch.dsl import MultiSearch, Q, Search
+from elasticsearch.dsl.exceptions import IllegalOperation
 
+from biothings.utils import serializer
 from biothings.utils.common import dotdict
 from biothings.web.query.formatter import ESResultFormatter
 from biothings.web.services.metadata import BiothingsMetadata
@@ -124,7 +124,7 @@ def _build_endpoint_metadata_fields(self, metadata: BiothingsMetadata) -> Set[st
                                 "url": "https://github.com/ericz1803/doid/tree/37c9bda7ba0e0569dad3181842ebc14d3af6c6a9/"
                             },
                             "download_date": "2023-06-02T01:24:14.106000",
-                            "licence": "Creative Commons \nPublic Domain Dedication CC0 \n1.0 Universal license",
+                            "license": "Creative Commons \nPublic Domain Dedication CC0 \n1.0 Universal license",
                             "license_url": "https://creativecommons.org/publicdomain/zero/1.0/",
                             "stats": {
                                 "doid": 11314
@@ -159,7 +159,7 @@ def _build_endpoint_metadata_fields(self, metadata: BiothingsMetadata) -> Set[st
                                 "url": "https://github.com/ericz1803/doid/tree/37c9bda7ba0e0569dad3181842ebc14d3af6c6a9/"
                             },
                             "download_date": "2023-06-02T01:24:14.106000",
-                            "licence": "Creative Commons \nPublic Domain Dedication CC0 \n1.0 Universal license",
+                            "license": "Creative Commons \nPublic Domain Dedication CC0 \n1.0 Universal license",
                             "license_url": "https://creativecommons.org/publicdomain/zero/1.0/",
                             "stats": {
                                 "doid": 11314
@@ -430,9 +430,9 @@ def __init__(self, path):
                             ## alternative implementation  # noqa: E266
                             # self._queries[os.path.basename(dirpath)] = text_file.read()
                             ##
-                            self._queries[os.path.basename(dirpath)] = orjson.loads(text_file.read())
+                            self._queries[os.path.basename(dirpath)] = serializer.load_json(text_file.read())
                         elif "filter" in filename:
-                            self._filters[os.path.basename(dirpath)] = orjson.loads(text_file.read())
+                            self._filters[os.path.basename(dirpath)] = serializer.load_json(text_file.read())
         except Exception:
             self.logger.exception("Error loading user queries.")
 
diff --git a/biothings/web/query/engine.py b/biothings/web/query/engine.py
index e1fe2023e..c3d5fe35e 100644
--- a/biothings/web/query/engine.py
+++ b/biothings/web/query/engine.py
@@ -11,7 +11,7 @@
 
 >>> from biothings.web.query import ESQueryBackend
 >>> from elasticsearch import Elasticsearch
->>> from elasticsearch_dsl import Search
+>>> from elasticsearch.dsl import Search
 
 >>> backend = ESQueryBackend(Elasticsearch())
 >>> backend.execute(Search().query("match", _id="1017"))
@@ -22,12 +22,15 @@
 """
 
 import asyncio
+import logging
 
 from elasticsearch import NotFoundError, RequestError
-from elasticsearch_dsl import MultiSearch, Search
+from elasticsearch.dsl import MultiSearch, Search
 
 from biothings.web.query.builder import ESScrollID
 
+logger = logging.getLogger(__name__)
+
 
 class ResultInterrupt(Exception):
     def __init__(self, data):
@@ -139,6 +142,13 @@ async def execute(self, query, **options):
                     raise RawResultInterrupt(res)
 
                 if not res["hits"]["hits"]:
+                    scroll_id=query.data
+                    try:
+                        await self.client.clear_scroll(scroll_id=scroll_id)
+                        logger.info("Scroll context cleared: %s", scroll_id)
+                    except NotFoundError as e:
+                        logger.warning("Scroll context not found (ID: %s): %s", scroll_id, str(e))
+                    # Always raise this exception regardless of whether clear_scroll succeeds
                     raise EndScrollInterrupt()
 
                 return res
diff --git a/biothings/web/query/formatter.py b/biothings/web/query/formatter.py
index 5d5810425..aa6f46a9c 100644
--- a/biothings/web/query/formatter.py
+++ b/biothings/web/query/formatter.py
@@ -84,13 +84,26 @@ class ESResultFormatter(ResultFormatter):
     class _Hits(Hits):
         def __init__(self, *args, **kwargs):
             super().__init__(*args, **kwargs)
+            # Check if this is an error response from Elasticsearch
+            if "error" in self.data:
+                logger.error("ES returned error response: %s", self.data)
+                raise ValueError("Invalid response format")
+
             # make sure the document is coming from
             # elasticsearch at initialization time
-            assert "hits" in self.data
-            assert "total" in self.data["hits"]
-            assert "hits" in self.data["hits"]
+            if "hits" not in self.data:
+                logger.error("ES response missing 'hits' field. Response data: %s", self.data)
+                raise ValueError("Response missing 'hits' field")
+            if "total" not in self.data["hits"]:
+                logger.error("ES response missing 'hits.total' field. Response data: %s", self.data)
+                raise ValueError("Response missing 'hits.total' field")
+            if "hits" not in self.data["hits"]:
+                logger.error("ES response missing 'hits.hits' field. Response data: %s", self.data)
+                raise ValueError("Response missing 'hits.hits' field")
             for hit in self.data["hits"]["hits"]:
-                assert "_source" in hit
+                if "_source" not in hit:
+                    logger.error("ES hit missing '_source' field. Hit data: %s", hit)
+                    raise ValueError("Hit missing '_source' field")
 
     class _Doc(Doc):
         pass
diff --git a/biothings/web/query/pipeline.py b/biothings/web/query/pipeline.py
index 39bc3872c..73ceb04ba 100644
--- a/biothings/web/query/pipeline.py
+++ b/biothings/web/query/pipeline.py
@@ -79,7 +79,7 @@ def _simplify_ES_exception(exc, debug=False):
         root_cause = root_cause.replace('"', "'").split("\n")
         for index, cause in enumerate(root_cause):
             result["root_cause_line_" + f"{index:02}"] = cause
-    except IndexError:
+    except (IndexError, KeyError):
         pass  # no root cause
     except Exception:
         logger.exception(
@@ -147,6 +147,12 @@ async def _(*args, **kwargs):
                 elif error_type == "index_not_found_exception":
                     raise QueryPipelineException(500, error_type)
 
+                elif error_type == "es_rejected_execution_exception":
+                    # ES cluster is overloaded, all thread pools at capacity
+                    raise QueryPipelineException(
+                        503, "Service Unavailable", "Elasticsearch cluster overloaded"
+                    ) from exc
+
                 else:  # unexpected
                     raise
 
diff --git a/biothings/web/settings/default.py b/biothings/web/settings/default.py
index 95636e0c7..211983986 100644
--- a/biothings/web/settings/default.py
+++ b/biothings/web/settings/default.py
@@ -209,9 +209,6 @@
 # Sentry project address
 SENTRY_CLIENT_KEY = ""
 
-# Google Analytics Account ID
-GA_ACCOUNT = ""
-
 # *****************************************************************************
 # Endpoints Specifics & Others
 # *****************************************************************************
diff --git a/config.py.example b/config.py.example
index 7c32bc221..e860cafce 100644
--- a/config.py.example
+++ b/config.py.example
@@ -110,10 +110,6 @@ GA4_UID_GENERATOR_VERSION = 1
 # Analytics Settings
 # *****************************************************************************
 
-# Google Analytics Account ID
-
-GA_ACCOUNT = 'UA-123123-1'
-
 # Google Measurement ID
 GA4_MEASUREMENT_ID = 'G-KXzzzzLBN'
 
diff --git a/docs/tutorial/studio_guide.rst b/docs/tutorial/studio_guide.rst
index f09694f78..5d2e2ecaa 100644
--- a/docs/tutorial/studio_guide.rst
+++ b/docs/tutorial/studio_guide.rst
@@ -298,7 +298,7 @@ A manifest file is defined like this:
 	    "__metadata__" : { # optional
 	        "url" : "<datasource website/url>",
 	        "license_url" : "<url>",
-	        "licence" : "<license name>",
+	        "license" : "<license name>",
             "author" : {
                 "name" : "<author name>",
                 "url" : "<link to github's author for instance>"
@@ -327,7 +327,7 @@ or with multiple uploader
 		"__metadata__" : { # optional
 	        "url" : "<datasource website/url>",
 	        "license_url" : "<url>",
-	        "licence" : "<license name>",
+	        "license" : "<license name>",
             "author" : {
                 "name" : "<author name>",
                 "url" : "<link to github's author for instance>"
diff --git a/docs/tutorial/studio_tutorial.rst b/docs/tutorial/studio_tutorial.rst
index 3168e1553..4668711ca 100644
--- a/docs/tutorial/studio_tutorial.rst
+++ b/docs/tutorial/studio_tutorial.rst
@@ -820,7 +820,7 @@ A ``tutorials`` folder can be found and contains the exported code:
   -rw-rw-r-- 1 biothings biothings  1190 Jan 22 19:32 parser.py
   -rw-rw-r-- 1 biothings biothings  2334 Jan 22 19:32 upload.py
 
-Some files were copied from data plugin repository (``LICENCE``, ``README`` and ``parser.py``), the others are the exported ones:  ``dump.py`` for the dumper, ``upload.py``
+Some files were copied from data plugin repository (``LICENSE``, ``README`` and ``parser.py``), the others are the exported ones:  ``dump.py`` for the dumper, ``upload.py``
 for the uploader and the mappings, and ``__init__.py`` so the **Hub** can find these components upon start. We'll go in further details later, specially when we'll add more
 uploaders.
 
diff --git a/docs/tutorial/web.rst b/docs/tutorial/web.rst
index e78a8d101..e4c63b789 100644
--- a/docs/tutorial/web.rst
+++ b/docs/tutorial/web.rst
@@ -295,7 +295,7 @@ with a few rules to increase result relevancy. Additionally add to the ``pipelin
 .. code-block:: python
 
     from biothings.web.query import ESQueryBuilder 
-    from elasticsearch_dsl import Search
+    from elasticsearch.dsl import Search
 
     class MyQueryBuilder(ESQueryBuilder):
 
diff --git a/pyproject.toml b/pyproject.toml
index 9570a401a..dc5698b68 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,11 +20,11 @@ dynamic = ["version"]    # version is dynamically generated from setup.py
 authors = [
     {name = "The BioThings Team", email="dev@biothings.io"},
 ]
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 description = "a toolkit for building high-performance data & knowledge APIs in biology"
 readme = "README.md"
-# license = "Apache-2.0"               # when we drop Python 3.8 support, we can use this new format
-license = {text = "Apache-2.0"}        # this is an old format, but works for Python 3.8+
+license = "Apache-2.0"                   # this new format works for Python 3.9+
+# license = {text = "Apache-2.0"}        # this is an old format, but works for Python 3.8+
 keywords = [
     "biology",
     "medicine",
@@ -38,12 +38,12 @@ keywords = [
 classifiers = [
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Development Status :: 5 - Production/Stable",
     "Operating System :: OS Independent",
     "Operating System :: POSIX",
@@ -60,17 +60,18 @@ dependencies = [
     "requests>=2.21.0",
     'tornado==6.1.0; python_version < "3.7.0"',
     'tornado==6.2.0; python_version == "3.7.0"',
-    'tornado==6.4.2; python_version >= "3.8.0"',
+    'tornado==6.4.2; python_version == "3.8.0"',
+    'tornado==6.5.3; python_version >= "3.9.0"',
     "gitpython>=3.1.0",
     "elasticsearch[async]>=7, <8; python_version < '3.7.0'",
-    "elasticsearch-dsl>=7, <8; python_version < '3.7.0'",
     "elasticsearch[async]>=8, <9; python_version >= '3.7.0'",
-    "elasticsearch-dsl>=8, <9; python_version >= '3.7.0'",
     'singledispatchmethod; python_version < "3.8.0"',
     'dataclasses; python_version < "3.7.0"',
     "jmespath>=0.7.1,<2.0.0",  # support jmespath query parameter
     "PyYAML>=5.1",
-    "orjson>=3.6.1",  # a faster json lib supports inf/nan and datetime, v3.6.1 is the last version supports Python 3.6
+    'orjson>=3.10.16; python_version < "3.14.0"',  # a faster json lib supports inf/nan and datetime, v3.10.16 is the first version requires Python 3.9+
+    'orjson==3.11.4; python_version >= "3.14.0"',  # orjson 3.11.5 cannot be built on Python 3.14t for now
+    'zstandard>=0.21.0; python_version<"3.14"', # we need zst library before 3.14
 ]
 
 [project.optional-dependencies]
@@ -86,11 +87,11 @@ opensearch = [
 ]
 # minimal requirements for running biothings.hub, e.g. in CLI mode
 hubcore = [
-    "pymongo>=4.1.0,<5.0",  # support MongoDB 5.0 since v3.12.0
+    "pymongo>=4.13.0,<5.0",  # AsyncMongoClient stable since 4.13.0
 ]
 # extra requirements to run a full biothings.hub
 hub = [
-    "pymongo>=4.1.0,<5.0",
+    "pymongo>=4.13.0,<5.0",
     "beautifulsoup4",  # used in dumper.GoogleDriveDumper
     "aiocron==1.8",  # setup scheduled jobs
     # "aiohttp==3.8.4",  # elasticsearch requires aiohttp>=3,<4
@@ -120,10 +121,10 @@ hub = [
 ]
 # minimal requirements for to run biothings CLI
 cli = [
-    "pymongo>=4.1.0,<5.0",  # support MongoDB 5.0 since v3.12.0
+    "pymongo>=4.13.0,<5.0",  # AsyncMongoClient stable since 4.13.0
     "psutil",
     "jsonschema>=2.6.0",
-    "typer>=0.12.1",  # required for CLI, also installs rich package
+    "typer>=0.17.0",  # required for CLI, also installs rich package
 ]
 # if DockerContainerDumper is used, requires this Docker SDK for Python
 docker = [
diff --git a/tests/conftest.py b/tests/conftest.py
index 04118a1cc..b8ad7f7b0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -13,8 +13,8 @@
 import types
 from pathlib import Path
 
-import pytest
 import _pytest
+import pytest
 
 from biothings.utils.loggers import setup_default_log
 
@@ -90,7 +90,7 @@ def pytest_sessionstart(session: _pytest.main.Session):
         },
         "HUB_ENV": "",
         "ACTIVE_DATASOURCES": [],
-        "DATA_HUB_DB_DATABASE": ".hubdb",
+        "DATA_HUB_DB_DATABASE": "biothings_hubdb",
         "DATA_PLUGIN_FOLDER": "/tmp/testhub/plugins",
         "DATA_ARCHIVE_ROOT": "/tmp/testhub/datasources",
         "DIFF_PATH": "/tmp/testhub/datasources/diff",
diff --git a/tests/hub/config/data/base_configuration.py b/tests/hub/config/data/base_configuration.py
index c1a144de6..2ef6b9ebc 100644
--- a/tests/hub/config/data/base_configuration.py
+++ b/tests/hub/config/data/base_configuration.py
@@ -90,7 +90,7 @@
 
 S3_SNAPSHOT_BUCKET = ""
 S3_REGION = ""
-DATA_HUB_DB_DATABASE = ".hubdb"
+DATA_HUB_DB_DATABASE = "biothings_hubdb"
 APITEST_PATH = str(Path(__file__).parent.absolute().resolve())
 
 # descONE
diff --git a/tests/hub/config/data/deep_configuration.py b/tests/hub/config/data/deep_configuration.py
index f0392df9e..04ff7774a 100644
--- a/tests/hub/config/data/deep_configuration.py
+++ b/tests/hub/config/data/deep_configuration.py
@@ -90,7 +90,7 @@
 
 S3_SNAPSHOT_BUCKET = ""
 S3_REGION = ""
-DATA_HUB_DB_DATABASE = ".hubdb"
+DATA_HUB_DB_DATABASE = "biothings_hubdb"
 APITEST_PATH = str(Path(__file__).parent.absolute().resolve())
 
 # redefine some params
diff --git a/tests/hub/dataload/test_dump.py b/tests/hub/dataload/test_dump.py
index 8d91aeb92..85c24c57e 100644
--- a/tests/hub/dataload/test_dump.py
+++ b/tests/hub/dataload/test_dump.py
@@ -2,6 +2,7 @@
 Tests for the various dumper classes
 """
 
+import pathlib
 import tempfile
 
 import pytest
@@ -61,7 +62,11 @@ def test_http_dumper_properties():
 @pytest.mark.parametrize(
     "remoteurl,resolve_filepath",
     [
-        ("https://github.com/biothings/biothings.api/archive/refs/tags/v0.12.5.zip", True),
+        # this URL contains a "content-disposition" header with a filename as "biothings.api-0.12.5.zip"
+        # if resolve_filepath is True, the dumper should resolve the filename from this header
+        ("https://codeload.github.com/biothings/biothings.api/zip/refs/tags/v0.12.5", True),
+        # this URL does not contain a "content-disposition" header, and resolve_filepath is False
+        ("https://biothings.io/static/img/transition.svg", False),
     ],
 )
 def test_http_dumper_download(remoteurl: str, resolve_filepath: bool):
@@ -71,9 +76,19 @@ def test_http_dumper_download(remoteurl: str, resolve_filepath: bool):
     with tempfile.NamedTemporaryFile() as temp_local_file:
         dumper_instance = HTTPDumper()
         HTTPDumper.RESOLVE_FILENAME = resolve_filepath
-        assert dumper_instance.remote_is_better(remoteurl, temp_local_file)
+        assert dumper_instance.remote_is_better(remoteurl, temp_local_file.name)
         download_headers = {}
         response = dumper_instance.download(
             remoteurl=remoteurl, localfile=temp_local_file.name, headers=download_headers
         )
         assert isinstance(response, requests.models.Response)
+        if resolve_filepath:
+            assert response.headers.get("content-disposition")
+            assert response.headers["content-disposition"].endswith("biothings.api-0.12.5.zip")
+            local_file = pathlib.Path(pathlib.Path(temp_local_file.name).parent, "biothings.api-0.12.5.zip")
+            assert local_file.exists()
+            assert local_file.stat().st_size > 0
+        else:
+            local_file = pathlib.Path(pathlib.Path(temp_local_file.name))
+            assert local_file.exists()
+            assert local_file.stat().st_size > 0
diff --git a/tests/hub/dataplugin/data/manifests/malformed_manifest0.json b/tests/hub/dataplugin/data/manifests/malformed_manifest0.json
index 5552267ce..3299004fa 100644
--- a/tests/hub/dataplugin/data/manifests/malformed_manifest0.json
+++ b/tests/hub/dataplugin/data/manifests/malformed_manifest0.json
@@ -2,7 +2,7 @@
     "version": "1.0",
     "__metadata__": {
         "license_url": "http://www.mock-license-url.gov",
-        "licence": "",
+        "license": "",
         "url": "http://www.mock-reference.org/"
     },
     "requires": [
diff --git a/tests/hub/dataplugin/data/manifests/malformed_manifest1.json b/tests/hub/dataplugin/data/manifests/malformed_manifest1.json
index e607e1ad2..5c273a469 100644
--- a/tests/hub/dataplugin/data/manifests/malformed_manifest1.json
+++ b/tests/hub/dataplugin/data/manifests/malformed_manifest1.json
@@ -2,7 +2,7 @@
     "version": "1.0",
     "__metadata__": {
         "license_url": "http://www.mock-license-url.gov",
-        "licence": "",
+        "license": "",
         "url": "http://www.mock-reference.org/"
     },
     "requires": [
diff --git a/tests/hub/dataplugin/data/manifests/malformed_manifest2.json b/tests/hub/dataplugin/data/manifests/malformed_manifest2.json
index 3c3f130b5..2ffed70cf 100644
--- a/tests/hub/dataplugin/data/manifests/malformed_manifest2.json
+++ b/tests/hub/dataplugin/data/manifests/malformed_manifest2.json
@@ -2,7 +2,7 @@
     "version": "1.0",
     "__metadata__": {
         "license_url": "http://www.mock-license-url.gov",
-        "licence": "",
+        "license": "",
         "url": "http://www.mock-reference.org/"
     },
     "requires": [
diff --git a/tests/hub/dataplugin/data/manifests/malformed_manifest3.json b/tests/hub/dataplugin/data/manifests/malformed_manifest3.json
index de8a36080..6fd890b1a 100644
--- a/tests/hub/dataplugin/data/manifests/malformed_manifest3.json
+++ b/tests/hub/dataplugin/data/manifests/malformed_manifest3.json
@@ -2,7 +2,7 @@
     "version": "1.0",
     "__metadata__": {
         "license_url": "http://www.mock-license-url.gov",
-        "licence": "",
+        "license": "",
         "url": "http://www.mock-reference.org/"
     },
     "requires": [
diff --git a/tests/hub/dataplugin/data/manifests/malformed_manifest4.json b/tests/hub/dataplugin/data/manifests/malformed_manifest4.json
index fd7022e3d..598357b5c 100644
--- a/tests/hub/dataplugin/data/manifests/malformed_manifest4.json
+++ b/tests/hub/dataplugin/data/manifests/malformed_manifest4.json
@@ -2,7 +2,7 @@
     "version": "1.0",
     "__metadata__": {
         "license_url": "http://www.mock-license-url.gov",
-        "licence": "",
+        "license": "",
         "url": "http://www.mock-reference.org/"
     },
     "requires": [
diff --git a/tests/hub/dataplugin/data/manifests/malformed_manifest5.json b/tests/hub/dataplugin/data/manifests/malformed_manifest5.json
index 3dd44a904..06a12f615 100644
--- a/tests/hub/dataplugin/data/manifests/malformed_manifest5.json
+++ b/tests/hub/dataplugin/data/manifests/malformed_manifest5.json
@@ -2,7 +2,7 @@
     "version": "1.0",
     "__metadata__": {
         "license_url": "http://www.mock-license-url.gov",
-        "licence": "",
+        "license": "",
         "url": "http://www.mock-reference.org/"
     },
     "requires": [
diff --git a/tests/hub/dataplugin/data/manifests/malformed_manifest6.json b/tests/hub/dataplugin/data/manifests/malformed_manifest6.json
index 8d13edc17..f75da7ced 100644
--- a/tests/hub/dataplugin/data/manifests/malformed_manifest6.json
+++ b/tests/hub/dataplugin/data/manifests/malformed_manifest6.json
@@ -2,7 +2,7 @@
     "version": "1.0",
     "__metadata__": {
         "license_url": "http://www.mock-license-url.gov",
-        "licence": "",
+        "license": "",
         "url": "http://www.mock-reference.org/"
     },
     "requires": [
diff --git a/tests/hub/dataplugin/data/manifests/mock_manifest.json b/tests/hub/dataplugin/data/manifests/mock_manifest.json
index 8f977d807..791d4595c 100644
--- a/tests/hub/dataplugin/data/manifests/mock_manifest.json
+++ b/tests/hub/dataplugin/data/manifests/mock_manifest.json
@@ -2,7 +2,7 @@
     "version": "1.0",
     "__metadata__": {
         "license_url": "http://www.mock-license-url.gov",
-        "licence": "",
+        "license": "",
         "url": "http://www.mock-reference.org/"
     },
     "requires": [
diff --git a/tests/hub/datarelease/conftest.py b/tests/hub/datarelease/conftest.py
index 2810c4ef4..4a4d256c5 100644
--- a/tests/hub/datarelease/conftest.py
+++ b/tests/hub/datarelease/conftest.py
@@ -1,12 +1,11 @@
-from pathlib import Path
 import copy
 import logging
 import sys
+from pathlib import Path
 
 import pytest
 from pytest_mock import MockerFixture
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -14,7 +13,7 @@
 def releasenote_configuration(root_configuration: "TestConfig"):
     releasenote_configuration = {
         "HUB_DB_BACKEND": {"module": "biothings.utils.sqlite3", "sqlite_db_folder": "./dummy_db"},
-        "DATA_HUB_DB_DATABASE": "mock_releasenote.hubdb",
+        "DATA_HUB_DB_DATABASE": "mock_releasenote_hubdb",
     }
     root_configuration.override(releasenote_configuration)
 
@@ -278,10 +277,7 @@ def mock_fn(col_name):
 def release_note_source(
     mock_get_source_fullname, cold_src_build_doc, hot_src_build_doc, old_cold_src_build_docs, old_hot_src_build_docs
 ):
-    from biothings.hub.datarelease.releasenote import (
-        ReleaseNoteSource,
-        ReleaseNoteSrcBuildReader,
-    )
+    from biothings.hub.datarelease.releasenote import ReleaseNoteSource, ReleaseNoteSrcBuildReader
 
     old_src_build_reader = ReleaseNoteSrcBuildReader(old_hot_src_build_docs)
     old_src_build_reader.attach_cold_src_build_reader(ReleaseNoteSrcBuildReader(old_cold_src_build_docs))
diff --git a/tests/utils/test_dataload.py b/tests/utils/test_dataload.py
new file mode 100644
index 000000000..66a919b71
--- /dev/null
+++ b/tests/utils/test_dataload.py
@@ -0,0 +1,384 @@
+"""
+Tests for dict_sweep and _val_to_delete in biothings.utils.dataload,
+specifically the handling of NaN-like values.
+
+NaN-like values (float NaN, pandas.NA, pandas.NaT) are only removed
+when explicitly included in the vals list (opt-in).
+"""
+
+import math
+
+import pytest
+
+from biothings.utils.dataload import _val_to_delete, dict_sweep
+
+# ---------------------------------------------------------------------------
+# Fake pandas-like NA / NaT types for testing without importing pandas.
+# __module__ mirrors real pandas internals closely enough to exercise fallback
+# detection without importing pandas.
+# ---------------------------------------------------------------------------
+
+
+class NAType:
+    """Mimics pandas.NA (pandas.core.arrays.masked.NAType)."""
+
+    __module__ = "pandas._libs.missing"
+
+    def __bool__(self):
+        raise TypeError("boolean value of NA is ambiguous")
+
+    def __eq__(self, other):
+        raise TypeError("boolean value of NA is ambiguous")
+
+    def __hash__(self):
+        return 0
+
+    def __repr__(self):
+        return "<NA>"
+
+
+class NaTType:
+    """Mimics pandas.NaT (pandas._libs.tslibs.nattype.NaTType)."""
+
+    __module__ = "pandas._libs.tslibs.nattype"
+
+    def __bool__(self):
+        raise TypeError("boolean value of NaT is ambiguous")
+
+    def __eq__(self, other):
+        raise TypeError("boolean value of NaT is ambiguous")
+
+    def __hash__(self):
+        return 0
+
+    def __repr__(self):
+        return "NaT"
+
+
+_NA = NAType()
+_NaT = NaTType()
+
+_DEFAULT_VALS = [".", "-", "", "NA", "none", " ", "Not Available", "unknown"]
+_VALS_WITH_NAN = _DEFAULT_VALS + [float("nan"), _NA, _NaT]
+
+
+@pytest.fixture(scope="module")
+def pandas():
+    return pytest.importorskip("pandas")
+
+
+@pytest.fixture(scope="module")
+def numpy():
+    return pytest.importorskip("numpy")
+
+
+# ---------------------------------------------------------------------------
+# _val_to_delete helper tests
+# ---------------------------------------------------------------------------
+
+
+class TestValToDelete:
+    # -- default vals (no NaN entries) -----------------------------------
+
+    def test_default_vals_matched(self):
+        for val in _DEFAULT_VALS:
+            assert _val_to_delete(val, _DEFAULT_VALS) is True, f"should delete {val!r}"
+
+    def test_regular_values_not_deleted(self):
+        for val in [0, 1, -1, "hello", None, [], {}, 0.0, 1.5, True, False]:
+            assert _val_to_delete(val, _DEFAULT_VALS) is False, f"should keep {val!r}"
+
+    def test_string_vals_matched(self):
+        assert _val_to_delete("NA", "NA") is True
+
+    def test_float_nan_not_deleted_by_default(self):
+        """float NaN is kept when vals does not contain a NaN float."""
+        assert _val_to_delete(float("nan"), _DEFAULT_VALS) is False
+
+    def test_na_not_deleted_by_default(self):
+        """Mock pandas NA is kept when vals does not contain a pandas NA."""
+        assert _val_to_delete(_NA, _DEFAULT_VALS) is False
+
+    def test_nat_not_deleted_by_default(self):
+        """Mock pandas NaT is kept when vals does not contain a pandas NaT."""
+        assert _val_to_delete(_NaT, _DEFAULT_VALS) is False
+
+    # -- vals with NaN entries (opt-in) ----------------------------------
+
+    def test_float_nan_deleted_when_in_vals(self):
+        assert _val_to_delete(float("nan"), _VALS_WITH_NAN) is True
+
+    def test_na_deleted_when_in_vals(self):
+        assert _val_to_delete(_NA, _VALS_WITH_NAN) is True
+
+    def test_nat_deleted_when_in_vals(self):
+        assert _val_to_delete(_NaT, _VALS_WITH_NAN) is True
+
+    def test_na_and_nat_are_not_interchangeable(self):
+        assert _val_to_delete(_NA, _DEFAULT_VALS + [_NaT]) is False
+        assert _val_to_delete(_NaT, _DEFAULT_VALS + [_NA]) is False
+
+    def test_regular_match_after_na_like_value(self):
+        assert _val_to_delete("", [_NA, ""]) is True
+
+
+# ---------------------------------------------------------------------------
+# dict_sweep — default vals (NaN kept)
+# ---------------------------------------------------------------------------
+
+
+class TestDictSweepDefaultKeepsNan:
+    def test_float_nan_kept(self):
+        d = {"a": 1, "b": float("nan")}
+        result = dict_sweep(d)
+        assert "b" in result
+        assert math.isnan(result["b"])
+
+    def test_na_kept(self):
+        d = {"a": 1, "b": _NA}
+        result = dict_sweep(d)
+        assert "b" in result
+        assert result["b"] is _NA
+
+    def test_nat_kept(self):
+        d = {"a": 1, "b": _NaT}
+        result = dict_sweep(d)
+        assert "b" in result
+        assert result["b"] is _NaT
+
+    def test_nan_kept_in_list(self):
+        d = {"a": [1, float("nan"), 2]}
+        result = dict_sweep(d)
+        assert len(result["a"]) == 3
+
+    def test_default_vals_still_removed(self):
+        d = {"a": ".", "b": "-", "c": "", "d": "keep"}
+        result = dict_sweep(d)
+        assert result == {"d": "keep"}
+
+
+# ---------------------------------------------------------------------------
+# dict_sweep — opt-in NaN removal (NaN in vals)
+# ---------------------------------------------------------------------------
+
+
+class TestDictSweepOptInNanRemoval:
+    def test_float_nan_removed(self):
+        d = {"a": 1, "b": float("nan")}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"a": 1}
+
+    def test_na_removed(self):
+        d = {"a": 1, "b": _NA}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"a": 1}
+
+    def test_nat_removed(self):
+        d = {"a": 1, "b": _NaT}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"a": 1}
+
+    def test_multiple_nan_types_removed(self):
+        d = {"a": float("nan"), "b": _NA, "c": _NaT, "d": "keep"}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"d": "keep"}
+
+
+# ---------------------------------------------------------------------------
+# dict_sweep — NaN inside lists (opt-in)
+# ---------------------------------------------------------------------------
+
+
+class TestDictSweepNanInList:
+    def test_nan_removed_from_list(self):
+        d = {"a": [1, float("nan"), 2]}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"a": [1, 2]}
+
+    def test_na_removed_from_list(self):
+        d = {"a": [1, _NA, 2]}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"a": [1, 2]}
+
+    def test_nat_removed_from_list(self):
+        d = {"a": [1, _NaT, 2]}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"a": [1, 2]}
+
+    def test_list_becomes_empty_after_nan_removal(self):
+        d = {"a": [float("nan")]}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert "a" not in result
+
+    def test_nan_in_list_with_remove_invalid_list(self):
+        d = {"a": [float("nan"), _NA, "valid"]}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN, remove_invalid_list=True)
+        assert result == {"a": ["valid"]}
+
+    def test_all_nan_list_removed_with_remove_invalid_list(self):
+        d = {"a": [float("nan"), _NA, _NaT]}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN, remove_invalid_list=True)
+        assert "a" not in result
+
+    def test_all_invalid_list_preserves_false_mode_behavior(self):
+        d = {"gene": [None, None], "site": ["Intron", None], "snp_build": 136}
+        result = dict_sweep(d, vals=[None], remove_invalid_list=False)
+        assert result == {"gene": [None], "site": ["Intron"], "snp_build": 136}
+
+    def test_all_invalid_list_removed_with_remove_invalid_list(self):
+        d = {"gene": [None, None], "site": ["Intron", None], "snp_build": 136}
+        result = dict_sweep(d, vals=[None], remove_invalid_list=True)
+        assert result == {"site": ["Intron"], "snp_build": 136}
+
+
+# ---------------------------------------------------------------------------
+# dict_sweep — NaN in nested dicts (opt-in)
+# ---------------------------------------------------------------------------
+
+
+class TestDictSweepNanNested:
+    def test_nan_in_nested_dict(self):
+        d = {"a": {"b": float("nan"), "c": 1}}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"a": {"c": 1}}
+
+    def test_na_in_nested_dict(self):
+        d = {"a": {"b": _NA, "c": 1}}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"a": {"c": 1}}
+
+    def test_nested_dict_removed_when_empty_after_sweep(self):
+        d = {"a": {"b": float("nan")}}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert "a" not in result
+
+    def test_nan_in_list_of_dicts(self):
+        d = {"a": [{"x": float("nan"), "y": 1}, {"x": _NA, "y": 2}]}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"a": [{"y": 1}, {"y": 2}]}
+
+
+# ---------------------------------------------------------------------------
+# dict_sweep — default vals behaviour unchanged
+# ---------------------------------------------------------------------------
+
+
+class TestDictSweepDefaultBehavior:
+    def test_normal_values_kept(self):
+        d = {"a": 1, "b": "hello", "c": [1, 2], "d": {"nested": True}}
+        result = dict_sweep(d)
+        assert result == {"a": 1, "b": "hello", "c": [1, 2], "d": {"nested": True}}
+
+    def test_mixed_nan_and_default_vals_with_optin(self):
+        d = {"a": float("nan"), "b": ".", "c": _NA, "d": "keep", "e": ""}
+        result = dict_sweep(d, vals=_VALS_WITH_NAN)
+        assert result == {"d": "keep"}
+
+    def test_string_vals_supported(self):
+        d = {"a": "NA", "b": "keep", "c": ["NA", "keep"]}
+        result = dict_sweep(d, vals="NA")
+        assert result == {"b": "keep", "c": ["keep"]}
+
+
+# ---------------------------------------------------------------------------
+# Tests using real pandas and numpy types, when the optional deps are installed.
+# ---------------------------------------------------------------------------
+
+
+class TestValToDeleteRealTypes:
+    def test_numpy_nan_not_deleted_by_default(self, numpy):
+        assert _val_to_delete(numpy.nan, _DEFAULT_VALS) is False
+
+    def test_pandas_na_not_deleted_by_default(self, pandas):
+        assert _val_to_delete(pandas.NA, _DEFAULT_VALS) is False
+
+    def test_pandas_nat_not_deleted_by_default(self, pandas):
+        assert _val_to_delete(pandas.NaT, _DEFAULT_VALS) is False
+
+    def test_numpy_nan_deleted_when_in_vals(self, numpy):
+        vals_with_nan = _DEFAULT_VALS + [float("nan")]
+        assert _val_to_delete(numpy.nan, vals_with_nan) is True
+
+    def test_pandas_na_deleted_when_in_vals(self, pandas):
+        vals_with_na = _DEFAULT_VALS + [pandas.NA]
+        assert _val_to_delete(pandas.NA, vals_with_na) is True
+
+    def test_pandas_nat_deleted_when_in_vals(self, pandas):
+        vals_with_nat = _DEFAULT_VALS + [pandas.NaT]
+        assert _val_to_delete(pandas.NaT, vals_with_nat) is True
+
+    def test_pandas_na_and_nat_are_not_interchangeable(self, pandas):
+        assert _val_to_delete(pandas.NA, _DEFAULT_VALS + [pandas.NaT]) is False
+        assert _val_to_delete(pandas.NaT, _DEFAULT_VALS + [pandas.NA]) is False
+
+    def test_regular_match_after_pandas_na(self, pandas):
+        assert _val_to_delete("", [pandas.NA, ""]) is True
+
+    def test_numpy_float32_nan_deleted_when_in_vals(self, numpy):
+        vals_with_nan = _DEFAULT_VALS + [float("nan")]
+        assert _val_to_delete(numpy.float32("nan"), vals_with_nan) is True
+
+    def test_numpy_float64_nan_deleted_when_in_vals(self, numpy):
+        vals_with_nan = _DEFAULT_VALS + [float("nan")]
+        assert _val_to_delete(numpy.float64("nan"), vals_with_nan) is True
+
+
+class TestDictSweepRealPandas:
+    """dict_sweep with real pandas/numpy NaN types and opt-in removal."""
+
+    def _vals_with(self, *extras):
+        return _DEFAULT_VALS + list(extras)
+
+    def test_pandas_na_top_level(self, pandas):
+        d = {"a": 1, "b": pandas.NA}
+        result = dict_sweep(d, vals=self._vals_with(pandas.NA))
+        assert result == {"a": 1}
+
+    def test_pandas_nat_top_level(self, pandas):
+        d = {"a": 1, "b": pandas.NaT}
+        result = dict_sweep(d, vals=self._vals_with(pandas.NaT))
+        assert result == {"a": 1}
+
+    def test_numpy_nan_top_level(self, numpy):
+        d = {"a": 1, "b": numpy.nan}
+        result = dict_sweep(d, vals=self._vals_with(float("nan")))
+        assert result == {"a": 1}
+
+    def test_pandas_na_in_list(self, pandas):
+        d = {"a": [1, pandas.NA, 2]}
+        result = dict_sweep(d, vals=self._vals_with(pandas.NA))
+        assert result == {"a": [1, 2]}
+
+    def test_pandas_nat_in_list(self, pandas):
+        d = {"a": [1, pandas.NaT, 2]}
+        result = dict_sweep(d, vals=self._vals_with(pandas.NaT))
+        assert result == {"a": [1, 2]}
+
+    def test_numpy_nan_in_list(self, numpy):
+        d = {"a": [1, numpy.nan, 2]}
+        result = dict_sweep(d, vals=self._vals_with(float("nan")))
+        assert result == {"a": [1, 2]}
+
+    def test_pandas_na_in_nested_dict(self, pandas):
+        d = {"a": {"b": pandas.NA, "c": 1}}
+        result = dict_sweep(d, vals=self._vals_with(pandas.NA))
+        assert result == {"a": {"c": 1}}
+
+    def test_pandas_na_in_list_with_remove_invalid_list(self, pandas):
+        d = {"a": [pandas.NA, pandas.NaT, "valid"]}
+        result = dict_sweep(d, vals=self._vals_with(pandas.NA, pandas.NaT), remove_invalid_list=True)
+        assert result == {"a": ["valid"]}
+
+    def test_all_real_nan_types_removed(self, pandas, numpy):
+        d = {"a": numpy.nan, "b": pandas.NA, "c": pandas.NaT, "d": "keep"}
+        result = dict_sweep(d, vals=self._vals_with(float("nan"), pandas.NA, pandas.NaT))
+        assert result == {"d": "keep"}
+
+    def test_real_nan_kept_by_default(self, pandas, numpy):
+        """Without opting in, real NaN types are preserved."""
+        d = {"a": numpy.nan, "b": pandas.NA, "c": pandas.NaT, "d": "keep"}
+        result = dict_sweep(d)
+        assert "a" in result
+        assert "b" in result
+        assert "c" in result
+        assert result["d"] == "keep"
diff --git a/tests/web/analytics/test_channels.py b/tests/web/analytics/test_channels.py
index acc219947..a530df461 100644
--- a/tests/web/analytics/test_channels.py
+++ b/tests/web/analytics/test_channels.py
@@ -1,12 +1,13 @@
-import aiohttp
 import asyncio
-import orjson
-import pytest
+from unittest.mock import patch
 
+import aiohttp
+import pytest
 from aioresponses import aioresponses
-from biothings.web.analytics.channels import SlackChannel, GA4Channel, GAChannel
+
+from biothings.utils import serializer
+from biothings.web.analytics.channels import GA4Channel, SlackChannel
 from biothings.web.analytics.events import GAEvent, Message
-from unittest.mock import patch
 
 
 @pytest.mark.asyncio
@@ -31,34 +32,6 @@ async def test_send_Slack():
             await channel.send(message)
 
 
-@pytest.mark.asyncio
-async def test_send_GA():
-    event = GAEvent(
-        {
-            "__request__": {
-                "user_agent": "Opera/9.60 (Windows NT 6.0; U; en) Presto/2.1.1",
-                "referer": None,
-                "user_ip": "127.0.0.1",
-                "host": "example.org",
-                "path": "/",
-            },
-            "category": "test",
-            "action": "play",
-            "label": "sample.mp4",
-            "value": 60,
-        }
-    )
-    channel = GAChannel("G-XXXXXX", 2)
-    assert await channel.handles(event)
-
-    with aioresponses() as responses:
-        # Mock the URL to return a 200 OK response
-        responses.post(channel.url, status=200)
-
-        # If the function completes without raising an exception, the test will pass
-        await channel.send(event)
-
-
 @pytest.mark.asyncio
 async def test_send_GA4():
     event = GAEvent(
@@ -91,7 +64,8 @@ async def test_send_GA4():
 async def test_send_GA4_request_retries():
     channel = GA4Channel("G-XXXXXX", "SECRET")
     url = channel.url
-    data = orjson.dumps({"test": "data"})
+    # data = orjson.dumps({"test": "data"})
+    data = serializer.to_json({"test": "data"}, return_bytes=True)
 
     async with aiohttp.ClientSession() as session:
         with aioresponses() as responses:
@@ -109,7 +83,7 @@ async def test_send_GA4_request_retries():
 async def test_send_GA4_request_max_retries():
     channel = GA4Channel("G-XXXXXX", "SECRET")
     url = channel.url
-    data = orjson.dumps({"test": "data"})
+    data = serializer.to_json({"test": "data"}, return_bytes=True)
 
     async with aiohttp.ClientSession() as session:
         with aioresponses() as responses:
diff --git a/tests/web/analytics/test_events.py b/tests/web/analytics/test_events.py
index 823584a8e..a1dc46c2c 100644
--- a/tests/web/analytics/test_events.py
+++ b/tests/web/analytics/test_events.py
@@ -2,77 +2,6 @@
 
 from biothings.web.analytics.events import Event, GAEvent
 
-# validator
-# https://ga-dev-tools.web.app/hit-builder/
-
-
-def test_pageview_1():
-    event = Event(
-        dict(
-            __request__={
-                "user_agent": "Opera/9.60 (Windows NT 6.0; U; en) Presto/2.1.1",
-                "referer": "https://example.com/",
-                "user_ip": "127.0.0.1",
-                "host": "example.org",
-                "path": "/",
-            }
-        )
-    )
-    print(event.to_GA_payload("UA-000000-2"))
-    print(event.to_GA_payload("UA-000000-2", 2))
-
-
-def test_pageview_2():
-    event = Event(
-        dict(
-            __request__={
-                "user_agent": None,
-                "referer": None,
-                "user_ip": "127.0.0.1",
-                "host": "example.org",
-                "path": "/404.html",
-            }
-        )
-    )
-    print(event.to_GA_payload("UA-000000-2"))
-    print(event.to_GA_payload("UA-000000-2", 2))
-
-
-def test_event_1():
-    event = GAEvent(
-        {
-            "__request__": {
-                "user_agent": "Opera/9.60 (Windows NT 6.0; U; en) Presto/2.1.1",
-                "referer": "https://example.com/",
-                "user_ip": "127.0.0.1",
-                "host": "example.org",
-                "path": "/",
-            },
-            "category": "video",
-            "action": "play",
-            "label": "sample.mp4",
-            "value": 60,
-        }
-    )
-    print(event.to_GA_payload("UA-000000-2"))
-    print(event.to_GA_payload("UA-000000-2", 2))
-
-
-def test_event_2():
-    event = GAEvent(
-        {
-            "__request__": {
-                "user_agent": "Opera/9.60 (Windows NT 6.0; U; en) Presto/2.1.1",
-                "referer": "https://example.com/",
-                "user_ip": "127.0.0.1",
-                "host": "example.org",
-                "path": "/",
-            }
-        }
-    )
-    print(event.to_GA_payload("UA-000000-2"))
-    print(event.to_GA_payload("UA-000000-2", 2))
-
 
 def test_pageview_ga4_1():
     event = Event(
diff --git a/tests/web/handlers/data/test_mapping.json b/tests/web/handlers/data/test_mapping.json
index 9f8f7066a..ba41dcf5f 100644
--- a/tests/web/handlers/data/test_mapping.json
+++ b/tests/web/handlers/data/test_mapping.json
@@ -111,7 +111,7 @@
             "version": "68"
           },
           "clingen": {
-            "licence": "CC0 1.0 Universal",
+            "license": "CC0 1.0 Universal",
             "code": {
               "file": "src/hub/dataload/sources/clingen/upload.py",
               "repo": "https://github.com/biothings/mygene.info.git",
@@ -446,7 +446,7 @@
             "version": "20191218"
           },
           "pantherdb": {
-            "licence": "GNU General Public License Version 2",
+            "license": "GNU General Public License Version 2",
             "code": {
               "folder": "src/plugins/pantherdb",
               "repo": "https://github.com/biothings/mygene.info.git",
diff --git a/tests/web/test_connections.py b/tests/web/test_connections.py
index c250c6736..35bf29e67 100644
--- a/tests/web/test_connections.py
+++ b/tests/web/test_connections.py
@@ -13,8 +13,8 @@ def test_es_1():
 
 def test_es_2():  # see if the client is reused
     client1 = connections.es.get_client("http://localhost:9200")
-    client2 = connections.es.get_client("http://localhost:9200", timeout=20)
-    client3 = connections.es.get_client("http://localhost:9200", timeout=20)
+    client2 = connections.es.get_client("http://localhost:9200", request_timeout=20)
+    client3 = connections.es.get_client("http://localhost:9200", request_timeout=20)
     print(id(client1))
     print(id(client2))
     print(id(client3))
diff --git a/tests/web/test_es_exceptions.py b/tests/web/test_es_exceptions.py
index 36b61f5c5..14e51f88d 100644
--- a/tests/web/test_es_exceptions.py
+++ b/tests/web/test_es_exceptions.py
@@ -1,18 +1,20 @@
+from unittest.mock import Mock
+
 import pytest
 
 from biothings.web.query.pipeline import (
-    capturesESExceptions,
-    RawQueryInterrupt,
-    QueryPipelineInterrupt,
+    AuthenticationException,
+    AuthorizationException,
+    ConflictError,
     EndScrollInterrupt,
-    RawResultInterrupt,
+    NotFoundError,
     QueryPipelineException,
+    QueryPipelineInterrupt,
+    RawQueryInterrupt,
+    RawResultInterrupt,
     RequestError,
-    NotFoundError,
-    ConflictError,
-    AuthenticationException,
-    AuthorizationException,
     TransportError,
+    capturesESExceptions,
 )
 
 
@@ -25,7 +27,7 @@ async def func():
     with pytest.raises(QueryPipelineInterrupt) as exc_info:
         await func()
     assert exc_info.value.code == 200
-    assert exc_info.value.summary == None
+    assert exc_info.value.summary is None
     assert exc_info.value.details == {"error": "test_error"}
 
 
@@ -38,7 +40,7 @@ async def func():
     with pytest.raises(QueryPipelineInterrupt) as exc_info:
         await func()
     assert exc_info.value.code == 200
-    assert exc_info.value.summary == None
+    assert exc_info.value.summary is None
     assert exc_info.value.details == {"success": False, "error": "No more results to return."}
 
 
@@ -51,7 +53,7 @@ async def func():
     with pytest.raises(QueryPipelineInterrupt) as exc_info:
         await func()
     assert exc_info.value.code == 200
-    assert exc_info.value.summary == None
+    assert exc_info.value.summary is None
     assert exc_info.value.details == "test_body"
 
 
@@ -65,7 +67,7 @@ async def func():
         await func()
     assert exc_info.value.code == 500
     assert exc_info.value.summary == "test_assertion_error"
-    assert exc_info.value.details == None
+    assert exc_info.value.details is None
 
 
 @pytest.mark.asyncio
@@ -96,9 +98,12 @@ async def func():
 
 @pytest.mark.asyncio
 async def test_request_error():
+    _meta = Mock()
+    _meta.status = 400
+
     @capturesESExceptions
     async def func():
-        raise RequestError(message="test_request_error", meta={}, body={})
+        raise RequestError(message="test_request_error", meta=_meta, body={})
 
     with pytest.raises(QueryPipelineException) as exc_info:
         await func()
@@ -108,9 +113,12 @@ async def func():
 
 @pytest.mark.asyncio
 async def test_not_found_error():
+    _meta = Mock()
+    _meta.status = 404
+
     @capturesESExceptions
     async def func():
-        raise NotFoundError(message="test_not_found_error", meta={}, body={})
+        raise NotFoundError(message="test_not_found_error", meta=_meta, body={})
 
     with pytest.raises(QueryPipelineException) as exc_info:
         await func()
@@ -122,9 +130,12 @@ async def func():
 
 @pytest.mark.asyncio
 async def test_conflict_error():
+    _meta = Mock()
+    _meta.status = 409
+
     @capturesESExceptions
     async def func():
-        raise ConflictError(message="test_conflict_error", meta={}, body={})
+        raise ConflictError(message="test_conflict_error", meta=_meta, body={})
 
     with pytest.raises(QueryPipelineException) as exc_info:
         await func()
@@ -135,9 +146,12 @@ async def func():
 
 @pytest.mark.asyncio
 async def test_authentication_exception():
+    _meta = Mock()
+    _meta.status = 403
+
     @capturesESExceptions
     async def func():
-        raise AuthenticationException(message="test_authentication_exception", meta={}, body={})
+        raise AuthenticationException(message="test_authentication_exception", meta=_meta, body={})
 
     with pytest.raises(QueryPipelineException) as exc_info:
         await func()
@@ -148,9 +162,12 @@ async def func():
 
 @pytest.mark.asyncio
 async def test_authorization_exception():
+    _meta = Mock()
+    _meta.status = 403
+
     @capturesESExceptions
     async def func():
-        raise AuthorizationException(message="test_authorization_exception", meta={}, body={})
+        raise AuthorizationException(message="test_authorization_exception", meta=_meta, body={})
 
     with pytest.raises(QueryPipelineException) as exc_info:
         await func()
@@ -160,10 +177,10 @@ async def func():
 
 
 @pytest.mark.asyncio
-async def test_generic_exception():
+async def test_index_not_found_exception():
     @capturesESExceptions
     async def func():
-        exc = Exception(message="test_generic_exception", meta={}, body={})
+        exc = Exception(message="test_index_not_found_exception", meta={}, body={})
         exc.status_code = 500
         exc.info = {"error": {"type": "index_not_found_exception", "reason": "test_reason"}}
         raise exc
@@ -175,6 +192,22 @@ async def func():
     assert exc_info.value.details == "Exception() takes no keyword arguments"
 
 
+@pytest.mark.asyncio
+async def test_es_rejected_execution_exception():
+    @capturesESExceptions
+    async def func():
+        exc = TransportError("test_es_rejected_execution_exception")
+        exc.status_code = 503
+        exc.info = {"error": {"type": "es_rejected_execution_exception", "reason": "rejected execution of TimedRunnable..."}}
+        raise exc
+
+    with pytest.raises(QueryPipelineException) as exc_info:
+        await func()
+    assert exc_info.value.code == 503
+    assert exc_info.value.summary == "Service Unavailable"
+    assert exc_info.value.details == "Elasticsearch cluster overloaded"
+
+
 @pytest.mark.asyncio
 async def test_search_phase_execution_exception_rejected_execution():
     @capturesESExceptions
@@ -188,7 +221,7 @@ async def func():
         await func()
     assert exc_info.value.code == 503
     assert exc_info.value.summary == ""
-    assert exc_info.value.details == None
+    assert exc_info.value.details is None
 
 
 @pytest.mark.asyncio
@@ -235,4 +268,4 @@ async def func():
 
     assert exc_info.value.code == 503
     assert exc_info.value.summary == ""
-    assert exc_info.value.details == None
+    assert exc_info.value.details is None