|
| 1 | + |
| 2 | +"""Matplotlib experiments for Range–CoMine + baselines with profiling overlays. |
| 3 | +
|
| 4 | +- Supports synthetic or CSV datasets |
| 5 | +- Side-by-side series for Range–CoMine, Naïve, RangeInc-Mining |
| 6 | +- Optional SVG export |
| 7 | +- Records average runtime (ms) and peak memory (KB) per algorithm and overlays in legend |
| 8 | +
|
| 9 | +Examples: |
| 10 | + python experiments.py --mode min_prev --mins 0.2,0.4,0.6 --d1 10 --d2 35 --features 4 --instances 8 --seed 13 --algos range,naive,range_inc --export_svg |
| 11 | + python experiments.py --mode range --min_prev 0.5 --d1s 5,10 --d2s 20,30 --csv examples/toy.csv --export_svg |
| 12 | +""" |
| 13 | +import os, csv, argparse, time, tracemalloc, statistics as stats |
| 14 | +from pathlib import Path |
| 15 | +import matplotlib.pyplot as plt |
| 16 | + |
| 17 | +from range_comine.synthetic import generate_synthetic |
| 18 | +from range_comine.data import load_objects_csv |
| 19 | +from range_comine.mining import range_comine |
| 20 | +from range_comine.baselines import naive_range, range_inc_mining |
| 21 | + |
| 22 | +PLOTS = Path("plots"); PLOTS.mkdir(exist_ok=True, parents=True) |
| 23 | + |
| 24 | +ALGOS = { |
| 25 | + "range": ("Range–CoMine", range_comine), |
| 26 | + "naive": ("Naïve", naive_range), |
| 27 | + "range_inc": ("RangeInc-Mining", range_inc_mining), |
| 28 | +} |
| 29 | + |
| 30 | +def _count_patterns(col): |
| 31 | + s = set() |
| 32 | + for d, pats in col.items(): |
| 33 | + for p in pats: |
| 34 | + s.add(tuple(p)) |
| 35 | + return len(s) |
| 36 | + |
| 37 | +def _ensure_list_str(csvish): |
| 38 | + if isinstance(csvish, (list, tuple)): |
| 39 | + return list(csvish) |
| 40 | + if isinstance(csvish, str) and csvish.strip(): |
| 41 | + return [x.strip() for x in csvish.split(",")] |
| 42 | + return [] |
| 43 | + |
| 44 | +def _get_objects(args): |
| 45 | + if args.csv: |
| 46 | + return load_objects_csv(args.csv) |
| 47 | + return generate_synthetic(n_features=args.features, instances_per_feat=args.instances, seed=args.seed) |
| 48 | + |
| 49 | +def _savefig(basepath, export_svg=False): |
| 50 | + png_path = basepath.with_suffix(".png") |
| 51 | + plt.savefig(png_path, dpi=160) |
| 52 | + if export_svg: |
| 53 | + svg_path = basepath.with_suffix(".svg") |
| 54 | + plt.savefig(svg_path) |
| 55 | + plt.close() |
| 56 | + |
| 57 | +def _run_profiled(fn, objs, d1, d2, min_prev): |
| 58 | + # measure wall time ms and peak kb using tracemalloc |
| 59 | + tracemalloc.start() |
| 60 | + t0 = time.perf_counter() |
| 61 | + col = fn(objs, d1=float(d1), d2=float(d2), min_prev=float(min_prev)) |
| 62 | + elapsed_ms = (time.perf_counter() - t0) * 1000.0 |
| 63 | + current, peak = tracemalloc.get_traced_memory() |
| 64 | + tracemalloc.stop() |
| 65 | + peak_kb = peak / 1024.0 |
| 66 | + return col, elapsed_ms, peak_kb |
| 67 | + |
| 68 | +def sweep_min_prev(args, mins, d1=10.0, d2=35.0, algos=("range","naive","range_inc")): |
| 69 | + objs = _get_objects(args) |
| 70 | + xs = [float(x) for x in mins] |
| 71 | + series = {} |
| 72 | + overlay = {} |
| 73 | + for a in algos: |
| 74 | + name, fn = ALGOS[a] |
| 75 | + rows, ys, times, mems = [], [], [], [] |
| 76 | + for m in xs: |
| 77 | + col, t_ms, pk_kb = _run_profiled(fn, objs, d1, d2, m) |
| 78 | + cnt = _count_patterns(col) |
| 79 | + rows.append({"min_prev": m, "num_patterns": cnt, "time_ms": round(t_ms,3), "peak_kb": round(pk_kb,1)}) |
| 80 | + ys.append(cnt); times.append(t_ms); mems.append(pk_kb) |
| 81 | + # CSV + single |
| 82 | + csv_path = PLOTS / f"sweep_min_prev_{a}.csv" |
| 83 | + with open(csv_path, "w", newline="") as f: |
| 84 | + w = csv.DictWriter(f, fieldnames=["min_prev","num_patterns","time_ms","peak_kb"]) |
| 85 | + w.writeheader(); w.writerows(rows) |
| 86 | + # Plot |
| 87 | + plt.figure() |
| 88 | + plt.plot(xs, ys, marker="o", label=f"{name} (avg {stats.mean(times):.0f} ms, {stats.mean(mems):.0f} KB)") |
| 89 | + plt.xlabel("min_prev"); plt.ylabel("Number of prevalent patterns") |
| 90 | + plt.title(f"Effect of min_prev — {name}") |
| 91 | + plt.legend() |
| 92 | + plt.grid(True, linestyle="--", alpha=0.6); plt.tight_layout() |
| 93 | + _savefig(PLOTS / f"sweep_min_prev_{a}", export_svg=args.export_svg) |
| 94 | + series[a] = (name, ys) |
| 95 | + overlay[a] = (stats.mean(times), stats.mean(mems)) |
| 96 | + |
| 97 | + # combined |
| 98 | + plt.figure() |
| 99 | + for a, (name, ys) in series.items(): |
| 100 | + avg_t, avg_m = overlay[a] |
| 101 | + plt.plot(xs, ys, marker="o", label=f"{name} (avg {avg_t:.0f} ms, {avg_m:.0f} KB)") |
| 102 | + plt.xlabel("min_prev"); plt.ylabel("Number of prevalent patterns") |
| 103 | + plt.title("Effect of min_prev — comparison"); plt.legend() |
| 104 | + plt.grid(True, linestyle="--", alpha=0.6); plt.tight_layout() |
| 105 | + _savefig(PLOTS / "sweep_min_prev_all", export_svg=args.export_svg) |
| 106 | + |
| 107 | +def sweep_range(args, min_prev=0.5, d1s=(5,10,15), d2s=(20,25,30,35), algos=("range","naive","range_inc")): |
| 108 | + objs = _get_objects(args) |
| 109 | + pairs = [(float(d1), float(d2)) for d1 in d1s for d2 in d2s if float(d1) < float(d2)] |
| 110 | + xlabels = [f"{int(d1)}-{int(d2)}" if (float(d1).is_integer() and float(d2).is_integer()) else f"{d1}-{d2}" for d1,d2 in pairs] |
| 111 | + xs = list(range(len(pairs))) |
| 112 | + series = {} |
| 113 | + overlay = {} |
| 114 | + for a in algos: |
| 115 | + name, fn = ALGOS[a] |
| 116 | + rows, ys, times, mems = [], [], [], [] |
| 117 | + for (d1, d2) in pairs: |
| 118 | + col, t_ms, pk_kb = _run_profiled(fn, objs, d1, d2, min_prev) |
| 119 | + cnt = _count_patterns(col) |
| 120 | + rows.append({"d1": d1, "d2": d2, "num_patterns": cnt, "time_ms": round(t_ms,3), "peak_kb": round(pk_kb,1)}) |
| 121 | + ys.append(cnt); times.append(t_ms); mems.append(pk_kb) |
| 122 | + # CSV + single |
| 123 | + csv_path = PLOTS / f"sweep_range_{a}.csv" |
| 124 | + with open(csv_path, "w", newline="") as f: |
| 125 | + w = csv.DictWriter(f, fieldnames=["d1","d2","num_patterns","time_ms","peak_kb"]) |
| 126 | + w.writeheader(); w.writerows(rows) |
| 127 | + # Plot |
| 128 | + plt.figure() |
| 129 | + plt.plot(xs, ys, marker="o", label=f"{name} (avg {stats.mean(times):.0f} ms, {stats.mean(mems):.0f} KB)") |
| 130 | + plt.xticks(xs, xlabels, rotation=45, ha="right") |
| 131 | + plt.xlabel("Distance range [d1–d2]"); plt.ylabel("Number of prevalent patterns") |
| 132 | + plt.title(f"Effect of distance range — {name}"); plt.legend() |
| 133 | + plt.grid(True, linestyle="--", alpha=0.6); plt.tight_layout() |
| 134 | + _savefig(PLOTS / f"sweep_range_{a}", export_svg=args.export_svg) |
| 135 | + series[a] = (name, ys) |
| 136 | + overlay[a] = (stats.mean(times), stats.mean(mems)) |
| 137 | + |
| 138 | + # combined |
| 139 | + plt.figure() |
| 140 | + for a, (name, ys) in series.items(): |
| 141 | + avg_t, avg_m = overlay[a] |
| 142 | + plt.plot(xs, ys, marker="o", label=f"{name} (avg {avg_t:.0f} ms, {avg_m:.0f} KB)") |
| 143 | + plt.xticks(xs, xlabels, rotation=45, ha="right") |
| 144 | + plt.xlabel("Distance range [d1–d2]"); plt.ylabel("Number of prevalent patterns") |
| 145 | + plt.title("Effect of distance range — comparison"); plt.legend() |
| 146 | + plt.grid(True, linestyle="--", alpha=0.6); plt.tight_layout() |
| 147 | + _savefig(PLOTS / "sweep_range_all", export_svg=args.export_svg) |
| 148 | + |
| 149 | +def parse_args(): |
| 150 | + ap = argparse.ArgumentParser(description="Experiments for Range–CoMine and baselines (with profiling overlays)") |
| 151 | + ap.add_argument("--mode", choices=["min_prev","range"], required=True, help="Sweep mode") |
| 152 | + ap.add_argument("--mins", type=str, default="0.2,0.3,0.4,0.5,0.6,0.7", help="CSV of min_prev values (mode=min_prev)") |
| 153 | + ap.add_argument("--d1", type=float, default=10.0, help="Lower distance bound (mode=min_prev)") |
| 154 | + ap.add_argument("--d2", type=float, default=35.0, help="Upper distance bound (mode=min_prev)") |
| 155 | + ap.add_argument("--min_prev", type=float, default=0.5, help="min_prev (mode=range)") |
| 156 | + ap.add_argument("--d1s", type=str, default="5,10,15", help="CSV of d1 values (mode=range)") |
| 157 | + ap.add_argument("--d2s", type=str, default="20,25,30,35", help="CSV of d2 values (mode=range)") |
| 158 | + ap.add_argument("--features", type=int, default=4, help="Number of features for synthetic data") |
| 159 | + ap.add_argument("--instances", type=int, default=8, help="Instances per feature for synthetic data") |
| 160 | + ap.add_argument("--seed", type=int, default=13, help="Random seed for synthetic data") |
| 161 | + ap.add_argument("--csv", type=str, default="", help="Path to CSV dataset (overrides synthetic)") |
| 162 | + ap.add_argument("--algos", type=str, default="range,naive,range_inc", help="CSV of algos to include (range,naive,range_inc)") |
| 163 | + ap.add_argument("--export_svg", action="store_true", help="Also export SVG versions of plots") |
| 164 | + return ap.parse_args() |
| 165 | + |
| 166 | +def main(): |
| 167 | + args = parse_args() |
| 168 | + algos = _ensure_list_str(args.algos) |
| 169 | + if args.mode == "min_prev": |
| 170 | + mins = _ensure_list_str(args.mins) |
| 171 | + sweep_min_prev(args, mins, d1=float(args.d1), d2=float(args.d2), algos=algos) |
| 172 | + else: |
| 173 | + d1s = [float(x) for x in _ensure_list_str(args.d1s)] |
| 174 | + d2s = [float(x) for x in _ensure_list_str(args.d2s)] |
| 175 | + sweep_range(args, min_prev=float(args.min_prev), d1s=d1s, d2s=d2s, algos=algos) |
| 176 | + |
| 177 | +if __name__ == "__main__": |
| 178 | + main() |
0 commit comments