Skip to content

Commit e240314

Browse files
committed
analysis
1 parent 187078c commit e240314

24 files changed

Lines changed: 4749 additions & 19 deletions
616 KB
Loading
2.49 MB
Loading
3.37 MB
Loading
2.49 MB
Loading
591 KB
Loading
2.54 MB
Loading
599 KB
Loading
Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import csv
5+
from collections import Counter, defaultdict
6+
from pathlib import Path
7+
8+
import matplotlib.pyplot as plt
9+
import numpy as np
10+
11+
12+
def iter_records(path):
13+
with open(path, "r", encoding="utf-8") as f:
14+
for line in f:
15+
idx = line.find("vtime_id=")
16+
if idx == -1:
17+
continue
18+
s = line[idx:].strip()
19+
parts = s.split(", ")
20+
yield (
21+
int(parts[0].split("=")[1]),
22+
int(parts[1].split("=")[1]),
23+
int(parts[2].split("=")[1]),
24+
)
25+
26+
27+
def analyze(files, bin_size):
28+
names = [Path(f).stem for f in files]
29+
streams = [iter_records(f) for f in files]
30+
31+
total = [0, 0, 0]
32+
hits = [0, 0, 0]
33+
cumulative_hits = [[], [], []]
34+
outcome_counter = Counter()
35+
36+
bin_total = []
37+
bin_hits = [[], [], []]
38+
by_vtime = defaultdict(lambda: [0, 0, 0, 0])
39+
obj_patterns = defaultdict(Counter)
40+
41+
current_bin_total = 0
42+
current_bin_hits = [0, 0, 0]
43+
44+
for req_idx, rows in enumerate(zip(*streams), start=1):
45+
vtimes = [r[0] for r in rows]
46+
obj_ids = [r[1] for r in rows]
47+
hs = [r[2] for r in rows]
48+
if len(set(vtimes)) != 1 or len(set(obj_ids)) != 1:
49+
raise ValueError(f"Misaligned rows at request {req_idx}: {rows}")
50+
51+
vtime = vtimes[0]
52+
obj_id = obj_ids[0]
53+
key = tuple(hs)
54+
55+
current_bin_total += 1
56+
for i, h in enumerate(hs):
57+
total[i] += 1
58+
hits[i] += h
59+
current_bin_hits[i] += h
60+
cumulative_hits[i].append(hits[i] / total[i])
61+
62+
outcome_counter[key] += 1
63+
by_vtime[vtime][0] += 1
64+
by_vtime[vtime][1] += hs[0]
65+
by_vtime[vtime][2] += hs[1]
66+
by_vtime[vtime][3] += hs[2]
67+
obj_patterns[obj_id][key] += 1
68+
69+
if current_bin_total == bin_size:
70+
bin_total.append(current_bin_total)
71+
for i in range(3):
72+
bin_hits[i].append(current_bin_hits[i] / current_bin_total)
73+
current_bin_total = 0
74+
current_bin_hits = [0, 0, 0]
75+
76+
if current_bin_total:
77+
bin_total.append(current_bin_total)
78+
for i in range(3):
79+
bin_hits[i].append(current_bin_hits[i] / current_bin_total)
80+
81+
object_rows = []
82+
for obj_id, counter in obj_patterns.items():
83+
total_req = sum(counter.values())
84+
lose_vs_2 = counter[(1, 1, 0)] + counter[(0, 1, 0)]
85+
lose_vs_1 = counter[(1, 0, 0)] + counter[(1, 1, 0)]
86+
gain_vs_2 = counter[(1, 0, 1)] + counter[(0, 0, 1)]
87+
gain_vs_1 = counter[(0, 0, 1)] + counter[(0, 1, 1)]
88+
object_rows.append(
89+
{
90+
"obj_id": obj_id,
91+
"requests": total_req,
92+
"lose_vs_result2": lose_vs_2,
93+
"lose_vs_result1": lose_vs_1,
94+
"gain_vs_result2": gain_vs_2,
95+
"gain_vs_result1": gain_vs_1,
96+
"patterns": dict(counter),
97+
}
98+
)
99+
100+
vtime_rows = []
101+
for vtime, vals in by_vtime.items():
102+
reqs, h1, h2, h3 = vals
103+
vtime_rows.append(
104+
{
105+
"vtime_id": vtime,
106+
"requests": reqs,
107+
"result1_hits": h1,
108+
"result2_hits": h2,
109+
"result3_hits": h3,
110+
"result3_minus_result1": h3 - h1,
111+
"result3_minus_result2": h3 - h2,
112+
}
113+
)
114+
115+
return {
116+
"names": names,
117+
"total": total,
118+
"hits": hits,
119+
"cumulative_hits": cumulative_hits,
120+
"bin_hits": bin_hits,
121+
"bin_total": bin_total,
122+
"outcome_counter": outcome_counter,
123+
"object_rows": object_rows,
124+
"vtime_rows": vtime_rows,
125+
}
126+
127+
128+
def write_csvs(output_dir, data):
129+
output_dir.mkdir(parents=True, exist_ok=True)
130+
131+
summary_path = output_dir / "summary.csv"
132+
with open(summary_path, "w", newline="", encoding="utf-8") as f:
133+
writer = csv.writer(f)
134+
writer.writerow(["result", "requests", "hits", "hit_rate"])
135+
for name, total, hits in zip(data["names"], data["total"], data["hits"]):
136+
writer.writerow([name, total, hits, hits / total if total else 0])
137+
138+
objects_path = output_dir / "top_object_diffs.csv"
139+
top_objects = sorted(
140+
data["object_rows"],
141+
key=lambda row: (row["lose_vs_result2"], row["lose_vs_result1"], row["requests"]),
142+
reverse=True,
143+
)
144+
with open(objects_path, "w", newline="", encoding="utf-8") as f:
145+
writer = csv.writer(f)
146+
writer.writerow(
147+
[
148+
"obj_id",
149+
"requests",
150+
"lose_vs_result2",
151+
"lose_vs_result1",
152+
"gain_vs_result2",
153+
"gain_vs_result1",
154+
"patterns",
155+
]
156+
)
157+
for row in top_objects[:200]:
158+
writer.writerow(
159+
[
160+
row["obj_id"],
161+
row["requests"],
162+
row["lose_vs_result2"],
163+
row["lose_vs_result1"],
164+
row["gain_vs_result2"],
165+
row["gain_vs_result1"],
166+
row["patterns"],
167+
]
168+
)
169+
170+
vtime_path = output_dir / "worst_vtimes.csv"
171+
worst_vtimes = sorted(
172+
data["vtime_rows"],
173+
key=lambda row: (row["result3_minus_result2"], row["result3_minus_result1"]),
174+
)
175+
with open(vtime_path, "w", newline="", encoding="utf-8") as f:
176+
writer = csv.writer(f)
177+
writer.writerow(
178+
[
179+
"vtime_id",
180+
"requests",
181+
"result1_hits",
182+
"result2_hits",
183+
"result3_hits",
184+
"result3_minus_result1",
185+
"result3_minus_result2",
186+
]
187+
)
188+
for row in worst_vtimes[:200]:
189+
writer.writerow(
190+
[
191+
row["vtime_id"],
192+
row["requests"],
193+
row["result1_hits"],
194+
row["result2_hits"],
195+
row["result3_hits"],
196+
row["result3_minus_result1"],
197+
row["result3_minus_result2"],
198+
]
199+
)
200+
201+
202+
def plot(output_dir, data, bin_size):
203+
output_dir.mkdir(parents=True, exist_ok=True)
204+
205+
fig, axes = plt.subplots(2, 2, figsize=(16, 10), constrained_layout=True)
206+
ax1, ax2, ax3, ax4 = axes.flat
207+
colors = ["#4C78A8", "#54A24B", "#E45756"]
208+
209+
x = np.arange(1, data["total"][0] + 1)
210+
for i, name in enumerate(data["names"]):
211+
ax1.plot(x, data["cumulative_hits"][i], label=name, linewidth=1.2, color=colors[i])
212+
ax1.set_title("Cumulative Hit Rate After Adjustment")
213+
ax1.set_xlabel("Post-adjustment Request Index")
214+
ax1.set_ylabel("Hit Rate")
215+
ax1.grid(alpha=0.25)
216+
ax1.legend()
217+
218+
bx = np.arange(1, len(data["bin_total"]) + 1) * bin_size
219+
for i, name in enumerate(data["names"]):
220+
ax2.plot(bx, data["bin_hits"][i], label=name, linewidth=1.5, color=colors[i])
221+
ax2.set_title(f"Binned Hit Rate After Adjustment ({bin_size} requests/bin)")
222+
ax2.set_xlabel("Post-adjustment Request Index")
223+
ax2.set_ylabel("Hit Rate")
224+
ax2.grid(alpha=0.25)
225+
226+
labels = ["111", "110", "000", "010", "101", "011", "001", "100"]
227+
counts = [data["outcome_counter"][tuple(int(c) for c in label)] for label in labels]
228+
ax3.bar(labels, counts, color="#72B7B2")
229+
ax3.set_title("Per-request Outcome Pattern Counts")
230+
ax3.set_xlabel("(result1, result2, result3)")
231+
ax3.set_ylabel("Requests")
232+
ax3.grid(axis="y", alpha=0.25)
233+
234+
top_objects = sorted(
235+
[row for row in data["object_rows"] if row["lose_vs_result2"] > 0],
236+
key=lambda row: (row["lose_vs_result2"], row["requests"]),
237+
reverse=True,
238+
)[:15]
239+
obj_labels = [str(row["obj_id"])[-8:] for row in top_objects][::-1]
240+
lose2 = [row["lose_vs_result2"] for row in top_objects][::-1]
241+
lose1 = [row["lose_vs_result1"] for row in top_objects][::-1]
242+
ax4.barh(obj_labels, lose2, color="#E45756", label="lose vs result2")
243+
ax4.barh(obj_labels, lose1, color="#4C78A8", alpha=0.5, label="lose vs result1")
244+
ax4.set_title("Top Objects Lost By result3")
245+
ax4.set_xlabel("Hit Loss Count")
246+
ax4.set_ylabel("Object ID suffix")
247+
ax4.grid(axis="x", alpha=0.25)
248+
ax4.legend()
249+
250+
fig.suptitle("S4FIFO Compare Analysis (Post-adjustment Only)", fontsize=16)
251+
fig.savefig(output_dir / "compare_analysis.png", dpi=180)
252+
plt.close(fig)
253+
254+
255+
def main():
256+
parser = argparse.ArgumentParser(description="Analyze compare.sh result*.txt files.")
257+
parser.add_argument("--result1", default="result1.txt")
258+
parser.add_argument("--result2", default="result2.txt")
259+
parser.add_argument("--result3", default="result3.txt")
260+
parser.add_argument("--bin-size", type=int, default=10000)
261+
parser.add_argument("--output-dir", default="grid_search/analysis_output")
262+
args = parser.parse_args()
263+
264+
files = [args.result1, args.result2, args.result3]
265+
data = analyze(files, args.bin_size)
266+
output_dir = Path(args.output_dir)
267+
write_csvs(output_dir, data)
268+
plot(output_dir, data, args.bin_size)
269+
270+
print("Summary")
271+
for name, total, hits in zip(data["names"], data["total"], data["hits"]):
272+
print(f"{name}: requests={total}, hits={hits}, hit_rate={hits / total:.6f}")
273+
274+
print("\nMost common patterns")
275+
for pattern, count in data["outcome_counter"].most_common(8):
276+
print(f"{pattern}: {count}")
277+
278+
print("\nWorst vtime windows for result3 vs result2")
279+
for row in sorted(
280+
data["vtime_rows"],
281+
key=lambda r: (r["result3_minus_result2"], r["result3_minus_result1"]),
282+
)[:10]:
283+
print(row)
284+
285+
print("\nTop objects lost by result3 vs result2")
286+
for row in sorted(
287+
data["object_rows"],
288+
key=lambda r: (r["lose_vs_result2"], r["lose_vs_result1"], r["requests"]),
289+
reverse=True,
290+
)[:15]:
291+
print(
292+
{
293+
"obj_id": row["obj_id"],
294+
"requests": row["requests"],
295+
"lose_vs_result2": row["lose_vs_result2"],
296+
"lose_vs_result1": row["lose_vs_result1"],
297+
"patterns": row["patterns"],
298+
}
299+
)
300+
301+
print(f"\nArtifacts written to: {output_dir}")
302+
303+
304+
if __name__ == "__main__":
305+
main()

grid_search/compare.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
_build/bin/cachesim data/tencentBlock_4712.oracleGeneral.zst oracleGeneral S4FIFO 0.2 -e "small-size-ratio=0.10,ghost-size-ratio=0.90,move-to-main-threshold=2,small-skip-ratio=0,ghost-to-main-threshold=0,after-n-reqs=619392,ns=0.10,ng=0.90,nst=2,ngt=0,nk=0.10" --ignore-obj-size 1
2+
_build/bin/cachesim data/tencentBlock_4712.oracleGeneral.zst oracleGeneral S4FIFO 0.2 -e "small-size-ratio=0.2,ghost-size-ratio=3,move-to-main-threshold=1,small-skip-ratio=0.25,ghost-to-main-threshold=1,after-n-reqs=619392,ns=0.2,ng=3,nst=1,ngt=1,nk=0.25" --ignore-obj-size 1
3+
_build/bin/cachesim data/tencentBlock_4712.oracleGeneral.zst oracleGeneral S4FIFO 0.2 -e "small-size-ratio=0.1,ghost-size-ratio=0.9,move-to-main-threshold=2,small-skip-ratio=0,ghost-to-main-threshold=0,after-n-reqs=619392,ns=0.2,ng=3,nst=1,ngt=1,nk=0.25" --ignore-obj-size 1

grid_search/compare_v2.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
_build/bin/cachesim data/tencentBlock_4712.oracleGeneral.zst oracleGeneral S4FIFO 0.1 -e "small-size-ratio=0.10,ghost-size-ratio=0.90,move-to-main-threshold=2,small-skip-ratio=0,ghost-to-main-threshold=0,after-n-reqs=619392,ns=0.10,ng=0.90,nst=2,ngt=0,nk=0.10" --ignore-obj-size 1 > result1.txt
2+
_build/bin/cachesim data/tencentBlock_4712.oracleGeneral.zst oracleGeneral S4FIFO 0.1 -e "small-size-ratio=0.2,ghost-size-ratio=3,move-to-main-threshold=1,small-skip-ratio=0.25,ghost-to-main-threshold=1,after-n-reqs=619392,ns=0.2,ng=3,nst=1,ngt=1,nk=0.25" --ignore-obj-size 1 > result2.txt
3+
_build/bin/cachesim data/tencentBlock_4712.oracleGeneral.zst oracleGeneral S4FIFO 0.1 -e "small-size-ratio=0.1,ghost-size-ratio=0.9,move-to-main-threshold=2,small-skip-ratio=0,ghost-to-main-threshold=0,after-n-reqs=619392,ns=0.2,ng=3,nst=1,ngt=1,nk=0.25" --ignore-obj-size 1 > result3.txt
4+
_build/bin/cachesim data/tencentBlock_4712.oracleGeneral.zst oracleGeneral s4fifov2 0.1 -e "small-size-ratio=0.1,ghost-size-ratio=0.9,move-to-main-threshold=2,small-skip-ratio=0,ghost-to-main-threshold=0,after-n-reqs=619392,ns=0.2,ng=3,nst=1,ngt=1,nk=0.25" --ignore-obj-size 1 > result4_v2.txt

0 commit comments

Comments
 (0)