Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
281 changes: 281 additions & 0 deletions compute-feasibility-advisor-proposal.md

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ dependencies = [
"aiofiles (>=24.1.0,<25.0.0)",
"threadpoolctl (>=3.0.0,<4.0.0)",
"packaging (>=23.2)",
"psutil (>=5.9.0,<8.0.0)",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -121,6 +122,7 @@ typing = [
"joblib-stubs (>=1.4.2.5.20240918,<2.0.0)",
"pandas-stubs (>= 2.2.3.250527, <3.0.0)",
"types-aiofiles (>=24.1.0.20250606)",
"types-psutil>=7.2.2.20260518",
]
docs = [
"sphinx (>=8.1.3,<9.0.0)",
Expand All @@ -145,6 +147,7 @@ Documentation = "https://deeppavlov.github.io/AutoIntent/"
[project.scripts]
"basic-aug" = "autointent.generation.utterances._basic.cli:main"
"evolution-aug" = "autointent.generation.utterances._evolution.cli:main"
"advisor" = "autointent._advisor._cli:main"

[build-system]
requires = ["uv_build>=0.8.7,<0.9.0"]
Expand Down
29 changes: 29 additions & 0 deletions src/autointent/_advisor/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Pre-flight compute feasibility advisor.

Exposes a small surface used by both ``Pipeline.fit()`` (future integration) and
the ``autointent-advisor`` CLI script. See ``compute-feasibility-advisor-proposal.md``
at the repo root for the design document.
"""

from __future__ import annotations

from ._hardware import HardwareProfile, detect_hardware
from ._report import DatasetStats, Finding, PreflightReport, RecommendationResult, ResourceEstimate, Severity
from .runner import run_preflight
from .workflows import inspect, load_config, recommend, stats_from_dataset

__all__ = [
"DatasetStats",
"Finding",
"HardwareProfile",
"PreflightReport",
"RecommendationResult",
"ResourceEstimate",
"Severity",
"detect_hardware",
"inspect",
"load_config",
"recommend",
"run_preflight",
"stats_from_dataset",
]
134 changes: 134 additions & 0 deletions src/autointent/_advisor/_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Console-script entry point for the pre-flight advisor.

Two subcommands:

* ``inspect`` — show what a given preset / config will cost on this machine.
* ``recommend`` — pick the best-fitting bundled preset for this machine.

Both subcommands accept either a real ``--dataset`` (Hub id or local
csv/json/jsonl/parquet path loaded via ``datasets.load_dataset``) or
``--n-samples / --n-classes / --avg-tokens`` placeholders so the script is
useful before the user has built a dataset.

The CLI is a thin wrapper around :func:`autointent._advisor.inspect` and
:func:`autointent._advisor.recommend`; callers that don't need argparse can
import those helpers directly.
"""

from __future__ import annotations

import argparse
import json
import logging
import sys

from autointent._advisor import inspect, recommend, stats_from_dataset

from ._render import render_json, render_recommendation, render_text
from ._report import DatasetStats

logger = logging.getLogger("autointent.advisor")


def _stats_from_args(args: argparse.Namespace) -> DatasetStats:
multilabel = args.task == "multilabel"
if args.dataset:
return stats_from_dataset(args.dataset, multilabel=multilabel)
return DatasetStats.placeholder(
n_samples=args.n_samples,
n_classes=args.n_classes,
avg_tokens=args.avg_tokens,
multilabel=multilabel,
)


def _add_common_dataset_args(p: argparse.ArgumentParser) -> None:
p.add_argument("--dataset", help="Path or hub id of a dataset; overrides placeholders.")
p.add_argument("--n-samples", type=int, default=1_000, help="Placeholder training set size.")
p.add_argument("--n-classes", type=int, default=10, help="Placeholder class count.")
p.add_argument("--avg-tokens", type=int, default=32, help="Placeholder average token length.")
p.add_argument(
"--task",
choices=("multiclass", "multilabel"),
default="multiclass",
help="Placeholder task type when --dataset isn't given.",
)


def cmd_inspect(args: argparse.Namespace) -> int:
report = inspect(
args.target,
stats=_stats_from_args(args),
budget_vram_gb=args.budget_vram_gb,
)
if args.json:
sys.stdout.write(render_json(report))
else:
sys.stdout.write(render_text(report))
sys.stdout.write("\n")
return 0 if report.is_feasible else 1


def cmd_recommend(args: argparse.Namespace) -> int:
result = recommend(
stats=_stats_from_args(args),
budget_vram_gb=args.budget_vram_gb,
budget_time_h=args.budget_time_h,
)
if args.json:
sys.stdout.write(json.dumps(result.to_dict(), indent=2, default=str))
sys.stdout.write("\n")
else:
sys.stdout.write(render_recommendation(result.results, result.chosen))
sys.stdout.write("\n")
if result.chosen:
sys.stdout.write("\n")
sys.stdout.write(render_text(dict(result.results)[result.chosen]))
sys.stdout.write("\n")
return 0 if result.chosen else 1


def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="autointent-advisor",
description="Pre-flight feasibility advisor for AutoIntent search-space optimization.",
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.")

sub = parser.add_subparsers(dest="cmd", required=True)

p_inspect = sub.add_parser(
"inspect",
help="Inspect a preset or OptimizationConfig and print a feasibility report.",
)
p_inspect.add_argument("target", help="Preset name (e.g. transformers-light) or path to a YAML config.")
p_inspect.add_argument("--json", action="store_true", help="Emit a structured JSON report.")
p_inspect.add_argument("--budget-vram-gb", type=float, default=None, help="Override detected VRAM budget.")
_add_common_dataset_args(p_inspect)
p_inspect.set_defaults(func=cmd_inspect)

p_rec = sub.add_parser(
"recommend",
help="Detect hardware and recommend the best-fitting bundled preset.",
)
p_rec.add_argument("--json", action="store_true", help="Emit a structured JSON report.")
p_rec.add_argument("--budget-vram-gb", type=float, default=None, help="Override detected VRAM budget.")
p_rec.add_argument("--budget-time-h", type=float, default=None, help="Optional wall-time ceiling in hours.")
_add_common_dataset_args(p_rec)
p_rec.set_defaults(func=cmd_recommend)

return parser


def main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.WARNING,
format="%(levelname)s %(name)s: %(message)s",
)
return int(args.func(args))


if __name__ == "__main__":
main()
Empty file.
Loading