Skip to content

Commit 5f7cbbd

Browse files
committed
refactor(tests): speed up unused metrics check with single grep
Replace per-field grep in test_unused_metrics (one subprocess per metric field, ~208 invocations) with a single grep that matches all field names at once and filters results in Python. Changes in fcmetrics.py: - Remove is_metric_used() which ran grep -RPzo per field - Add find_unused_metrics() which builds a combined alternation of all field names and runs a single grep -rPn across src/ - Filter grep output in Python to skip test files and metrics definition files (via renamed is_file_test_or_definition), comment lines, and string literals - Rename is_file_production to is_file_test_or_definition to reflect what it actually checks Changes in test_rust.py: - Simplify test_unused_metrics to call find_unused_metrics() directly - Remove unused imports (defaultdict, extract_fields, find_metrics_files, is_metric_used) Signed-off-by: Riccardo Mancini <mancio@amazon.com>
1 parent 0f72a01 commit 5f7cbbd

2 files changed

Lines changed: 58 additions & 34 deletions

File tree

tests/host_tools/fcmetrics.py

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import logging
1313
import math
1414
import platform
15+
import re
1516
import time
1617
from threading import Thread
1718

@@ -630,8 +631,9 @@ def extract_fields(file_path):
630631
return [field.split(": ", maxsplit=1) for field in fields.splitlines()]
631632

632633

633-
def is_file_production(filepath):
634-
"""Returns True iff accesses to metric fields in the given file should cause the metric be considered 'used in production code'. Excludes, for example, files in which the metrics are defined, where accesses happen as part of copy constructors, etc."""
634+
def is_file_test_or_definition(filepath):
635+
"""Returns True for test files and metrics definition files, where field
636+
references should not count as production usage."""
635637
path = filepath.lower()
636638
return (
637639
"/test/" in path
@@ -652,23 +654,60 @@ def is_file_production(filepath):
652654
]
653655

654656

655-
def is_metric_used(field, field_type):
656-
"""Returns True iff the given metric has a production use in the firecracker codebase"""
657-
if field in KNOWN_FALSE_POSITIVES:
658-
return True
657+
def find_unused_metrics():
658+
"""Find all unused metrics in a single grep pass instead of one per field.
659659
660-
if field_type in ("SharedIncMetric", "SharedStoreMetric"):
661-
pattern = rf"{field}\s*\.\s*store|{field}\s*\.\s*inc|{field}\s*\.\s*add|{field}\s*\.\s*fetch|METRICS.*{field}"
662-
elif field_type == "LatencyAggregateMetrics":
663-
pattern = rf"{field}\s*\.\s*record_latency_metrics"
664-
else:
665-
raise RuntimeError(f"Unknown metric type: {field_type}")
660+
Returns a dict mapping metrics file paths to lists of (field, type) tuples
661+
for metrics that have no production usage.
662+
"""
663+
metrics_files = find_metrics_files()
664+
if not metrics_files:
665+
return {}
666+
667+
all_fields = []
668+
for file_path in metrics_files:
669+
for field, ty in extract_fields(file_path):
670+
all_fields.append((file_path, field, ty))
671+
672+
if not all_fields:
673+
return {}
674+
675+
non_fp_fields = [
676+
(fp, f, t) for fp, f, t in all_fields if f not in KNOWN_FALSE_POSITIVES
677+
]
678+
if not non_fp_fields:
679+
return {}
666680

667-
result = utils.run_cmd(f'grep -RPzo "{pattern}" ../src')
681+
# One grep for all field names
682+
field_names = sorted({f for _, f, _ in non_fp_fields})
683+
combined = "|".join(field_names)
684+
result = utils.run_cmd(f'grep -rPn "{combined}" ../src')
668685

669-
for line in result.stdout.strip().split("\0"):
686+
used_fields = set()
687+
for line in result.stdout.strip().splitlines():
670688
if not line:
671689
continue
672-
if not is_file_production(line.split(":", maxsplit=1)[0]):
673-
return True
674-
return False
690+
parts = line.split(":", maxsplit=2)
691+
if len(parts) < 3:
692+
continue
693+
filepath, _, code = parts
694+
if is_file_test_or_definition(filepath):
695+
continue
696+
# Strip comments and string literals so we only match real code.
697+
code = code.lstrip()
698+
if code.startswith("//") or code.startswith("/*") or code.startswith("*"):
699+
continue
700+
code = re.sub(r"//.*", "", code)
701+
code = re.sub(r'"[^"]*"', "", code)
702+
for field in field_names:
703+
if field in code:
704+
used_fields.add(field)
705+
706+
unused = {}
707+
for file_path, field, ty in all_fields:
708+
if field in KNOWN_FALSE_POSITIVES:
709+
continue
710+
if field not in used_fields:
711+
unused.setdefault(file_path, []).append((field, ty))
712+
713+
return unused

tests/integration_tests/style/test_rust.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@
22
# SPDX-License-Identifier: Apache-2.0
33
"""Tests ensuring codebase style compliance for Rust."""
44

5-
from collections import defaultdict
6-
75
from framework import utils
8-
from host_tools.fcmetrics import extract_fields, find_metrics_files, is_metric_used
6+
from host_tools.fcmetrics import find_unused_metrics
97

108

119
def test_rust_order():
@@ -29,21 +27,8 @@ def test_rust_style():
2927
def test_unused_metrics():
3028
"""Tests that all metrics defined in Firecracker's metrics.rs files actually have code
3129
paths that increment them."""
32-
metrics_files = find_metrics_files()
33-
unused = defaultdict(list)
34-
35-
assert metrics_files
36-
37-
for file_path in metrics_files:
38-
fields = extract_fields(file_path)
39-
if not fields:
40-
continue
41-
42-
for field, ty in fields:
43-
if not is_metric_used(field, ty):
44-
unused[file_path].append((field, ty))
30+
unused = find_unused_metrics()
4531

46-
# Grouped output
4732
for file_path, fields in unused.items():
4833
print(f"📄 Defined in: {file_path}")
4934
print("Possibly Unused: \n")

0 commit comments

Comments
 (0)