-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
88 lines (75 loc) · 4.01 KB
/
Copy pathMakefile
File metadata and controls
88 lines (75 loc) · 4.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Makefile for the Schema Conversion Orchestrator evaluation pipeline.
#
# Dependency model (see also eval/README.md):
# benchmarks/*.py + conversion src --benchmark_runner.py--> accuracy_scores.json (+ benchmark figs/csvs)
# conversion src + real_world_inputs --evaluate.py--> review/*.csv (+ MANUAL annotation)
# (evaluate's ranking also reads accuracy_scores.json + edge_robustness_scores.json)
# conversion src --path_count_stats.py--> results/path_count_stats{,_core}.json
# review/*.csv + src --plot_orchestrator_evaluation.py--> plots/*.png (+ edge_robustness_scores.json)
# conversion src + benchmarks --cache_timing_analysis.py--> results/cache_timing_results.*
#
# Notes:
# * Review CSVs carry human G/L/I annotations; `eval` carries them over, so new
# rows come back blank to fill, then re-run `make`.
# * The two *_scores.json are precomputed RANKING inputs (accuracy<-benchmark,
# robustness<-plots) consumed by `eval`. They are committed artifacts here (no
# recipe) so there is no Make cycle; `refresh` sequences the steps explicitly.
# * Slow targets (benchmark, eval, cache-timing) depend on the *conversion*
# source only, so editing a reporting/plotting module does not retrigger them.
PY := ./venv/bin/python
SRC := $(shell find src -name '*.py')
CONV_SRC := $(shell find src/schema_conversion_orchestrator/converters \
src/schema_conversion_orchestrator/domain \
src/schema_conversion_orchestrator/application -name '*.py' 2>/dev/null)
BENCH_DEFS := $(shell find eval/benchmarks -name '*.py' 2>/dev/null)
INPUTS := $(shell find eval/real_world_inputs -type f 2>/dev/null)
RES := eval/results
OUT := $(RES)/orchestrator_outputs
REVIEW := $(OUT)/review
PLOTS := $(OUT)/plots
EDGE_CSV := $(REVIEW)/edge_outputs.csv
FINAL_CSV := $(REVIEW)/final_outputs.csv
# Precomputed ranking inputs (committed; regenerated by benchmark/plots).
ACC_SCORES := src/schema_conversion_orchestrator/data/accuracy_scores.json
ROB_SCORES := src/schema_conversion_orchestrator/data/edge_robustness_scores.json
.PHONY: all refresh stats plots eval benchmark cache-timing clean help
.DEFAULT_GOAL := help
all: stats plots ## Build the cheap, deterministic outputs (stats + plots)
## refresh: run every automatable step in dependency order, each only if its
## inputs changed (annotation stays manual; in-text numbers stay manual)
refresh:
$(MAKE) benchmark
$(MAKE) eval
$(MAKE) cache-timing
$(MAKE) stats
$(MAKE) plots
## benchmark: SHACL<->JSON accuracy benchmark -> accuracy_scores.json (+ figs/csvs)
benchmark: $(RES)/.benchmark.stamp
$(RES)/.benchmark.stamp: $(BENCH_DEFS) $(CONV_SRC)
$(PY) eval/benchmark_runner.py
@touch $@
## eval: (re)run the orchestrator evaluation (carries over annotations)
eval: $(RES)/.eval.stamp
$(RES)/.eval.stamp: $(CONV_SRC) $(INPUTS) $(ACC_SCORES) $(ROB_SCORES)
$(PY) eval/evaluate.py
@touch $@
## cache-timing: re-measure the sub-path caching runtime effect
cache-timing: $(RES)/.cache-timing.stamp
$(RES)/.cache-timing.stamp: $(CONV_SRC) $(BENCH_DEFS)
$(PY) eval/cache_timing_analysis.py
@touch $@
## stats: path-count statistics for the full and core graphs
stats: $(RES)/path_count_stats.json
$(RES)/path_count_stats.json: $(CONV_SRC)
$(PY) eval/path_count_stats.py
$(PY) eval/path_count_stats.py --core --output $(RES)/path_count_stats_core.json
## plots: regenerate figures from the annotated review CSVs (robustness is
## recomputed here from edge_outputs.csv; also rewrites edge_robustness_scores.json)
plots: $(PLOTS)/.stamp
$(PLOTS)/.stamp: $(EDGE_CSV) $(FINAL_CSV) $(SRC)
$(PY) eval/plot_orchestrator_evaluation.py
@touch $@
clean: ## Remove make stamps (does not touch data or annotations)
rm -f $(RES)/.benchmark.stamp $(RES)/.eval.stamp $(RES)/.cache-timing.stamp $(PLOTS)/.stamp
help: ## Show this help
@grep -hE '^[a-zA-Z_-]+:.*?## |^## ' $(MAKEFILE_LIST) | sed -E 's/:.*## /: /; s/^## //' | awk '{printf " %s\n", $$0}'