Skip to content

Commit ebfae71

Browse files
authored
Adds several QoL features for user convenience (#33)
* Add timestamp to outputs.jsonl, add flag to optionally dump eventsdb to CSV file, pad sample count to DS size multiple * Make dataset alignment optional, update tests * Use approximate datetime strings in outputs.jsonl for human-readability * Minor fixes suggested by gemini, condition guards
1 parent 4b7e8f6 commit ebfae71

6 files changed

Lines changed: 124 additions & 5 deletions

File tree

examples/01_LocalBenchmark/run_tinyllm.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,11 @@ def issue(self, sample):
184184
action="store_true",
185185
help="Enable streaming mode for TTFT metrics",
186186
)
187+
parser.add_argument(
188+
"--dump-events-csv",
189+
action="store_true",
190+
help="Dump the events to a CSV file",
191+
)
187192
args = parser.parse_args()
188193

189194
# Set up progress bar hook to monitor sample completion
@@ -238,6 +243,7 @@ def issue(self, sample):
238243
stop_sample_issuer_on_test_end=False,
239244
report_path="tinyllm_benchmark_report",
240245
tokenizer_override=model_runner.tokenizer,
246+
dump_events_csv=args.dump_events_csv,
241247
)
242248
sess.wait_for_test_end()
243249

src/inference_endpoint/config/runtime_settings.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,9 @@ def _from_config_default(
166166

167167
return cls(**kwargs)
168168

169-
def total_samples_to_issue(self, padding_factor: float = 1.1) -> int:
169+
def total_samples_to_issue(
170+
self, padding_factor: float = 1.1, align_to_dataset_size: bool = True
171+
) -> int:
170172
"""Calculate the total number of samples to issue to the SUT throughout the course of the test run.
171173
172174
Priority:
@@ -177,6 +179,8 @@ def total_samples_to_issue(self, padding_factor: float = 1.1) -> int:
177179
Args:
178180
padding_factor (float): Factor to multiply the expected number of samples by to account for variance.
179181
Use 1.0 for no padding. (Default: 1.1)
182+
align_to_dataset_size (bool): Whether to pad the total number of samples up to the nearest multiple of
183+
dataset size. (Default: True)
180184
181185
Returns:
182186
int: The total number of samples to issue to the SUT throughout the course of the test run.
@@ -213,4 +217,13 @@ def total_samples_to_issue(self, padding_factor: float = 1.1) -> int:
213217
logger.debug(
214218
f"Sample count: {result} (calculated from duration={self.min_duration_ms}ms × target_qps={self.metric_target.target} × padding={padding_factor})"
215219
)
220+
221+
# Pad to multiples of dataset size
222+
if (
223+
align_to_dataset_size
224+
and self.n_samples_from_dataset > 0
225+
and (rem := result % self.n_samples_from_dataset) != 0
226+
):
227+
result += self.n_samples_from_dataset - rem
228+
logger.debug(f"Padded sample count: {result}")
216229
return result

src/inference_endpoint/load_generator/session.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def _run_test(
6565
max_shutdown_timeout_s: float = 300.0,
6666
report_dir: os.PathLike | None = None,
6767
tokenizer_override: AutoTokenizer | None = None,
68+
dump_events_csv: bool = False,
6869
):
6970
with self.event_recorder:
7071
try:
@@ -157,6 +158,9 @@ def _run_test(
157158
with (Path(report_dir) / "runtime_settings.json").open("w") as f:
158159
f.write(orjson.dumps(rt_settings_data).decode("utf-8"))
159160

161+
if dump_events_csv:
162+
reporter.dump_to_csv(Path(report_path) / "events.csv")
163+
160164
# Print summary
161165
report.display()
162166

@@ -186,6 +190,7 @@ def start(
186190
max_shutdown_timeout_s: float = 300.0,
187191
report_dir: os.PathLike | None = None,
188192
tokenizer_override: AutoTokenizer | None = None,
193+
dump_events_csv: bool = False,
189194
) -> BenchmarkSession:
190195
"""Start a new BenchmarkSession in a thread.
191196
@@ -201,6 +206,8 @@ def start(
201206
report_dir: The path to save the report to. If None, no report will be saved.
202207
tokenizer_override: The tokenizer to use for the session. If None, a tokenizer will be automatically selected
203208
based on the model name in the runtime settings.
209+
dump_events_csv: Whether to dump the events to a CSV file. Only use for debugging
210+
purposes, as the events database can get quite large.
204211
205212
Returns:
206213
The new BenchmarkSession.
@@ -215,6 +222,7 @@ def start(
215222
max_shutdown_timeout_s,
216223
report_dir,
217224
tokenizer_override,
225+
dump_events_csv,
218226
),
219227
)
220228
session.thread.start()

src/inference_endpoint/metrics/recorder.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import threading
2727
import time
2828
import uuid
29+
from datetime import datetime
2930
from functools import partial
3031
from pathlib import Path
3132
from typing import ClassVar
@@ -39,6 +40,29 @@
3940
logger = logging.getLogger(__name__)
4041

4142

43+
_G_MONOTIME_DELTA = time.time_ns() - time.monotonic_ns()
44+
"""Approximate delta between monotonic and wall-clock time in nanoseconds. See
45+
monotime_to_datetime() for more details.
46+
"""
47+
48+
49+
def monotime_to_datetime(monotime_ns: int) -> datetime:
50+
"""Monotonic clock has an undefined starting point. To convert to human readable timestamp,
51+
we can add a constant delta to any monotonic timestamp to get an approximate equivalent wall-clock
52+
timestamp. Note that the result will not be completely accurate, but it will be a consistent
53+
offset from the real time, as long as this function is called in the same process. Any durations
54+
and deltas calculated from resulting datetimes will be accurate, but absolute times will not be.
55+
56+
Args:
57+
monotime_ns: The monotonic timestamp in nanoseconds.
58+
59+
Returns:
60+
The datetime object corresponding to the approximate wall-clock timestamp.
61+
"""
62+
wall_time = (monotime_ns + _G_MONOTIME_DELTA) / 1e9
63+
return datetime.fromtimestamp(wall_time)
64+
65+
4266
@contextlib.contextmanager
4367
def sqlite3_cursor(path: Path):
4468
"""Context manager for SQLite cursor that properly handles connection lifecycle.
@@ -272,7 +296,11 @@ def commit_buffer():
272296
if item[1] == SampleEvent.FIRST_CHUNK.value:
273297
# In post-processing, we use this to validate that the first chunk is the response output is the same as the data in the FIRST_CHUNK_RECEIVED event
274298
output_buffer.append(
275-
{"s_uuid": item[0], "first_chunk": item[-1]}
299+
{
300+
"timestamp": str(monotime_to_datetime(item[2])),
301+
"s_uuid": item[0],
302+
"first_chunk": item[-1],
303+
}
276304
)
277305
elif item[1] == SampleEvent.COMPLETE.value:
278306
output_data = item[-1]
@@ -282,13 +310,15 @@ def commit_buffer():
282310
)
283311
output_buffer.append(
284312
{
313+
"timestamp": str(monotime_to_datetime(item[2])),
285314
"s_uuid": item[0],
286315
"output": output_data,
287316
}
288317
)
289318
elif item[1] == SessionEvent.ERROR.value:
290319
output_buffer.append(
291320
{
321+
"timestamp": str(monotime_to_datetime(item[2])),
292322
"s_uuid": item[0],
293323
"error_type": item[1],
294324
"error_message": item[-1],

src/inference_endpoint/metrics/reporter.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from __future__ import annotations
1717

18+
import csv
1819
import dataclasses
1920
import functools
2021
import importlib
@@ -832,6 +833,44 @@ def close(self):
832833
self.cur_.close()
833834
self.conn.close()
834835

836+
def dump_to_csv(self, csv_path: Path):
837+
output_values = defaultdict(dict)
838+
839+
if self.outputs_path.exists():
840+
with self.outputs_path.open("r") as outputs:
841+
for line in outputs:
842+
if line.strip() == "":
843+
continue
844+
845+
data = orjson.loads(line)
846+
if "s_uuid" not in data:
847+
continue
848+
849+
if "first_chunk" in data:
850+
output_values[data["s_uuid"]]["first_chunk"] = data[
851+
"first_chunk"
852+
]
853+
elif "output" in data:
854+
output_values[data["s_uuid"]]["output"] = data["output"]
855+
elif "error_message" in data:
856+
output_values[data["s_uuid"]]["error_message"] = data[
857+
"error_message"
858+
]
859+
860+
with csv_path.open("w") as f:
861+
writer = csv.writer(f)
862+
writer.writerow(["sample_uuid", "event_type", "timestamp_ns", "value"])
863+
864+
for row in self.cur_.execute("SELECT * FROM events"):
865+
value = ""
866+
if row[1] == SampleEvent.FIRST_CHUNK.value:
867+
value = output_values[row[0]].get("first_chunk", "<NOT_FOUND>")
868+
elif row[1] == SampleEvent.COMPLETE.value:
869+
value = output_values[row[0]].get("output", "<NOT_FOUND>")
870+
elif row[1] == SessionEvent.ERROR.value:
871+
value = output_values[row[0]].get("error_message", "<NOT_FOUND>")
872+
writer.writerow([row[0], row[1], row[2], value])
873+
835874
def __enter__(self):
836875
if self.is_closed:
837876
self.init_connection()

tests/unit/config/rulesets/mlcommons/test_rules.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,16 @@ def test_apply_user_config():
6060
)
6161

6262
# Metric type should be throughput
63-
expected_sample_count = 1234.5 * (rt_settings.min_duration_ms / 1000)
63+
expected_sample_count = int(1234.5 * 10 * 60)
64+
assert (
65+
rt_settings.total_samples_to_issue(
66+
padding_factor=1.0, align_to_dataset_size=False
67+
)
68+
== expected_sample_count
69+
)
70+
71+
if (rem := expected_sample_count % rt_settings.n_samples_from_dataset) != 0:
72+
expected_sample_count += rt_settings.n_samples_from_dataset - rem
6473
assert (
6574
rt_settings.total_samples_to_issue(padding_factor=1.0) == expected_sample_count
6675
)
@@ -73,7 +82,15 @@ def test_apply_user_config_insufficient_qps():
7382
user_config=user_config,
7483
opt_prio=OptimizationPriority.LOW_LATENCY_INTERACTIVE,
7584
)
76-
assert rt_settings.total_samples_to_issue(padding_factor=1.0) == 270336
85+
86+
# Expected is 270336 padded up to multiple of dataset size, which is 13368
87+
assert rt_settings.total_samples_to_issue(padding_factor=1.0) == 280728
88+
assert (
89+
rt_settings.total_samples_to_issue(
90+
padding_factor=1.0, align_to_dataset_size=False
91+
)
92+
== 270336
93+
)
7794

7895

7996
def test_apply_user_config_min_sample_count_override():
@@ -83,4 +100,10 @@ def test_apply_user_config_min_sample_count_override():
83100
user_config=user_config,
84101
opt_prio=OptimizationPriority.LOW_LATENCY_INTERACTIVE,
85102
)
86-
assert rt_settings.total_samples_to_issue(padding_factor=1.0) == 2 * 10 * 60
103+
assert rt_settings.total_samples_to_issue(padding_factor=1.0) == 13368
104+
assert (
105+
rt_settings.total_samples_to_issue(
106+
padding_factor=1.0, align_to_dataset_size=False
107+
)
108+
== 2 * 10 * 60
109+
)

0 commit comments

Comments
 (0)