Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/modelgauge/annotation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Dict, Optional, Sequence

from pydantic import BaseModel

from typing import Dict, Optional
from modelgauge.model_options import TopTokens


class MissingAnnotationError(Exception):
Expand All @@ -16,6 +18,7 @@ class SafetyAnnotation(BaseModel):

is_safe: bool
is_valid: bool = True
top_logprobs: Optional[Sequence[TopTokens]] = None

metadata: Optional[dict] = None

Expand Down
6 changes: 3 additions & 3 deletions src/modelgauge/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
SUTResponseAnnotations,
TestItem,
)
from modelgauge.sut import SUTOptions
from modelgauge.model_options import ModelOptions
from modelgauge.sut_capabilities import SUTCapability
from modelgauge.tracked_object import TrackedObject
from modelgauge.typed_data import Typeable, TypedData
Expand All @@ -27,7 +27,7 @@ class attribute `requires_sut_capabilities` as well as `initialization_record` o
initialization_record: Initialization data that can be used to reconstruct a test instance.
"""

_sut_options = SUTOptions()
_sut_options = ModelOptions()

# Set automatically by @modelgauge_test()
requires_sut_capabilities: Sequence[Type[SUTCapability]]
Expand All @@ -38,7 +38,7 @@ def __init__(self, uid: str):
self.initialization_record: InitializationRecord

@classmethod
def sut_options(cls) -> SUTOptions:
def sut_options(cls) -> ModelOptions:
"""Returns the SUT options that are supplied in each test item.
Concrete subclasses can override this method to specify their own SUT options."""
return cls._sut_options
Expand Down
10 changes: 5 additions & 5 deletions src/modelgauge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
from modelgauge.secret_values import get_all_secrets, RawSecrets
from modelgauge.simple_test_runner import run_prompt_response_test
from modelgauge.single_turn_prompt_response import SUTResponse, TestItem
from modelgauge.sut import PromptResponseSUT, SUTOptions
from modelgauge.sut import PromptResponseSUT
from modelgauge.model_options import ModelOptions
from modelgauge.sut_capabilities import AcceptsTextPrompt
from modelgauge.sut_registry import SUTS
from modelgauge.test_registry import TESTS
Expand Down Expand Up @@ -141,16 +142,15 @@ def list_secrets() -> None:
def run_sut(
sut: str,
prompt: str,
max_tokens: Optional[int],
max_tokens: int,
temp: Optional[float],
top_logprobs: Optional[int],
top_p: Optional[float],
top_k: Optional[int],
):
"""Send a prompt from the command line to a SUT."""

# TODO Consider a SUT factory that takes in a SUTDefinition and returns a SUT
options = SUTOptions.create_from_arguments(max_tokens, temp, top_p, top_k, top_logprobs)
options = ModelOptions.create_from_arguments(max_tokens, temp, top_p, top_k, top_logprobs)

# Current this only knows how to do prompt response, so assert that is what we have.
sut_instance = make_sut(sut)
Expand Down Expand Up @@ -344,7 +344,7 @@ def run_job(
# make sure the job has everything it needs to run
secrets = load_secrets_from_config()
if sut:
sut_options = SUTOptions.create_from_arguments(max_tokens, temp, top_p, top_k)
sut_options = ModelOptions.create_from_arguments(max_tokens, temp, top_p, top_k)
sut_instance = make_sut(sut)
if AcceptsTextPrompt not in sut_instance.capabilities:
raise click.BadParameter(f"{sut} does not accept text prompts")
Expand Down
2 changes: 1 addition & 1 deletion src/modelgauge/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def load_local_plugins(_, __, path: pathlib.Path):
)

MAX_TOKENS_OPTION = click.option(
"--max-tokens", default=None, type=click.IntRange(1), help="How many tokens to generate for each completion."
"--max-tokens", default=100, type=click.IntRange(1), help="How many tokens to generate for each completion."
)
TEMP_OPTION = click.option("--temp", default=None, type=float, help="SUT temperature value.")
TOP_P_OPTION = click.option("--top-p", default=None, type=float, help="SUT top-p value.")
Expand Down
85 changes: 85 additions & 0 deletions src/modelgauge/model_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from typing import Optional, List, Sequence

from pydantic import BaseModel, model_validator


class ModelOptions(BaseModel):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything in this file was moved from sut.py

"""
An exhaustive set of options that could potentially be desired by a model.

Not all SUTs and annotators respect all options.
"""

max_tokens: Optional[int] = None
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This no longer defaults to 100. The CLI sets the default instead.

"""Maximum number of tokens to generate (per completion)"""

max_total_output_tokens: Optional[int] = None
"""Maximum number of tokens for all generated SUT outputs, including reasoning."""

temperature: Optional[float] = None
"""Temperature parameter that governs diversity"""

top_k_per_token: Optional[int] = None
"""Take this many highest probability candidates per token in the completion"""

stop_sequences: Optional[List[str]] = None
"""Stop generating once we hit one of these strings."""

top_p: Optional[float] = None
"""Same from tokens that occupy this probability mass (nucleus sampling)"""

presence_penalty: Optional[float] = None
"""Penalize repetition (OpenAI & Writer only)"""

frequency_penalty: Optional[float] = None
"""Penalize repetition (OpenAI & Writer only)"""

random: Optional[str] = None
"""Used to control randomness. Expect different responses for the same
request but with different values for `random`."""

# Must specify SUTCapabilities for these
top_logprobs: Optional[int] = None
"""If present, will request the log probabilities for this
many of the top tokens at each token position."""

@model_validator(mode="after")
def check_max_total_output_tokens(self):
if (
self.max_total_output_tokens is not None
and self.max_tokens is not None
and self.max_total_output_tokens < self.max_tokens
):
raise ValueError(
f"Invalid ModelOptions. max_total_output_tokens ({self.max_total_output_tokens}) must be >= max_tokens ({self.max_tokens})."
)
return self

@staticmethod
def create_from_arguments(max_tokens=None, temp=None, top_p=None, top_k=None, top_logprobs=None):
options = ModelOptions()
if max_tokens is not None:
options.max_tokens = max_tokens
if temp is not None:
options.temperature = temp
if top_p is not None:
options.top_p = top_p
if top_k is not None:
options.top_k_per_token = top_k
if top_logprobs is not None:
options.top_logprobs = top_logprobs

return options


class TokenProbability(BaseModel):
"""Probability assigned to a given token."""

token: str
logprob: float


class TopTokens(BaseModel):
"""List of most likely tokens and their probabilities."""

top_tokens: Sequence[TokenProbability]
15 changes: 9 additions & 6 deletions src/modelgauge/pipeline_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from modelgauge.pipeline import Pipeline
from modelgauge.prompt_pipeline import PromptSink, PromptSource, PromptSutAssigner, PromptSutWorkers
from modelgauge.ready import ReadyResponses, Readyable
from modelgauge.sut import SUTOptions
from modelgauge.model_options import ModelOptions

logger = get_logger(__name__)

Expand All @@ -26,14 +26,12 @@ def __init__(
input_dataset,
output_dir,
cache_dir=None,
sut_options=SUTOptions(),
tag=None,
):
self.num_workers = num_workers
self.input_dataset = input_dataset
self.root_dir = output_dir
self.cache_dir = cache_dir
self.sut_options = sut_options
self.tag = tag
self.pipeline_segments = []
self.start_time = datetime.datetime.now()
Expand Down Expand Up @@ -120,7 +118,9 @@ def _write_metadata(self):


class PromptRunner(PipelineRunner):
def __init__(self, suts, **kwargs):
def __init__(self, suts, sut_options=ModelOptions(), **kwargs):
self.sut_options = sut_options
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved this out of the PipelineRunner base class because it doesn't make sense for the annotator-only runner to have sut_options.

logger.info(f"Using SUT options: {self.sut_options}")
self.suts = suts
self.sut_worker = None # Convenience pointer.
super().__init__(**kwargs)
Expand Down Expand Up @@ -278,6 +278,7 @@ def build_runner(
sut_uid_col=None,
sut_response_col=None,
jailbreak=False,
sut_options=None,
**kwargs,
):
if jailbreak and not (annotators and suts):
Expand All @@ -304,9 +305,11 @@ def build_runner(
)
# Build runner
if suts and annotators:
pipeline_runner = PromptPlusAnnotatorRunner(suts=suts, annotators=annotators, input_dataset=dataset, **kwargs)
pipeline_runner = PromptPlusAnnotatorRunner(
suts=suts, annotators=annotators, input_dataset=dataset, sut_options=sut_options, **kwargs
)
elif suts:
pipeline_runner = PromptRunner(suts=suts, input_dataset=dataset, **kwargs)
pipeline_runner = PromptRunner(suts=suts, input_dataset=dataset, sut_options=sut_options, **kwargs)
elif annotators:
pipeline_runner = AnnotatorRunner(annotators=annotators, input_dataset=dataset, **kwargs)
else:
Expand Down
5 changes: 3 additions & 2 deletions src/modelgauge/prompt_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from modelgauge.pipeline import CachingPipe, Pipe, Sink, Source
from modelgauge.prompt import TextPrompt
from modelgauge.single_turn_prompt_response import SUTInteraction, TestItem
from modelgauge.sut import PromptResponseSUT, SUT, SUTOptions, SUTResponse
from modelgauge.sut import PromptResponseSUT, SUT, SUTResponse
from modelgauge.model_options import ModelOptions

logger = get_logger(__name__)

Expand All @@ -31,7 +32,7 @@ def handle_item(self, item):


class PromptSutWorkers(CachingPipe):
def __init__(self, suts: dict[str, SUT], sut_options: Optional[SUTOptions] = None, workers=None, cache_path=None):
def __init__(self, suts: dict[str, SUT], sut_options: Optional[ModelOptions] = None, workers=None, cache_path=None):
self.sleep_time = 10
if workers is None:
workers = 8
Expand Down
4 changes: 2 additions & 2 deletions src/modelgauge/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
SUTResponseAnnotations,
TestItem,
)
from modelgauge.sut import SUTOptions
from modelgauge.model_options import ModelOptions
from pydantic import AwareDatetime, BaseModel, Field
from typing import Dict, List, Mapping

Expand Down Expand Up @@ -37,7 +37,7 @@ class TestRecord(BaseModel):
run_timestamp: AwareDatetime = Field(default_factory=current_local_datetime)
test_uid: str
test_initialization: InitializationRecord
sut_options: SUTOptions
sut_options: ModelOptions
dependency_versions: Mapping[str, str]
sut_uid: str
sut_initialization: InitializationRecord
Expand Down
Loading