-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathconftest.py
More file actions
75 lines (48 loc) · 1.95 KB
/
conftest.py
File metadata and controls
75 lines (48 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from __future__ import annotations
import importlib.resources as ires
from typing import TYPE_CHECKING, Literal
import pytest
from autointent import Dataset
from autointent.utils import load_search_space
if TYPE_CHECKING:
from pathlib import Path
def setup_environment() -> Path:
return ires.files("tests").joinpath("logs")
def get_dataset_path():
return ires.files("tests.assets.data").joinpath("clinc_subset.json")
@pytest.fixture
def dataset():
return Dataset.from_json(get_dataset_path())
@pytest.fixture
def dataset_unsplitted():
path = ires.files("tests.assets.data").joinpath("clinc_subset_unsplitted.json")
return Dataset.from_json(path)
@pytest.fixture
def dataset_no_oos():
path = ires.files("tests.assets.data").joinpath("clinc_no_oos.json")
return Dataset.from_json(path)
TaskType = Literal["multiclass", "multilabel", "description_no_llm", "description_with_llm", "optuna", "light", "regex"]
def get_search_space_path(task_type: TaskType):
return ires.files("tests.assets.configs").joinpath(f"{task_type}.yaml")
def get_search_space(task_type: TaskType):
path = get_search_space_path(task_type)
return load_search_space(path)
def get_test_embedder_config(**kwargs):
"""Get lightweight embedder config for tests (HashingVectorizer-based).
This function returns a HashingVectorizer-based embedder config that is:
- Fast (no model downloads or loading)
- Lightweight (minimal memory usage)
- Stateless (no training required)
Perfect for testing non-embedder specific functionality.
Args:
**kwargs: Additional keyword arguments to override defaults.
Returns:
HashingVectorizerEmbeddingConfig: Configured embedder for testing.
"""
from autointent.configs import HashingVectorizerEmbeddingConfig
defaults = {
"n_features": 512,
"use_cache": False,
}
defaults.update(kwargs)
return HashingVectorizerEmbeddingConfig(**defaults)