schema-conversion-orchestrator/Makefile at main · MetaConfigurator/schema-conversion-orchestrator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Makefile for the Schema Conversion Orchestrator evaluation pipeline.
#
# Dependency model (see also eval/README.md):
#   benchmarks/*.py + conversion src --benchmark_runner.py-->  accuracy_scores.json (+ benchmark figs/csvs)
#   conversion src + real_world_inputs --evaluate.py-->  review/*.csv (+ MANUAL annotation)
#       (evaluate's ranking also reads accuracy_scores.json + edge_robustness_scores.json)
#   conversion src                   --path_count_stats.py-->  results/path_count_stats{,_core}.json
#   review/*.csv + src               --plot_orchestrator_evaluation.py-->  plots/*.png (+ edge_robustness_scores.json)
#   conversion src + benchmarks      --cache_timing_analysis.py-->  results/cache_timing_results.*
#
# Notes:
#  * Review CSVs carry human G/L/I annotations; `eval` carries them over, so new
#    rows come back blank to fill, then re-run `make`.
#  * The two *_scores.json are precomputed RANKING inputs (accuracy<-benchmark,
#    robustness<-plots) consumed by `eval`. They are committed artifacts here (no
#    recipe) so there is no Make cycle; `refresh` sequences the steps explicitly.
#  * Slow targets (benchmark, eval, cache-timing) depend on the *conversion*
#    source only, so editing a reporting/plotting module does not retrigger them.

PY       := ./venv/bin/python
SRC      := $(shell find src -name '*.py')
CONV_SRC := $(shell find src/schema_conversion_orchestrator/converters \
                          src/schema_conversion_orchestrator/domain \
                          src/schema_conversion_orchestrator/application -name '*.py' 2>/dev/null)
BENCH_DEFS := $(shell find eval/benchmarks -name '*.py' 2>/dev/null)
INPUTS   := $(shell find eval/real_world_inputs -type f 2>/dev/null)

RES     := eval/results
OUT     := $(RES)/orchestrator_outputs
REVIEW  := $(OUT)/review
PLOTS   := $(OUT)/plots
EDGE_CSV  := $(REVIEW)/edge_outputs.csv
FINAL_CSV := $(REVIEW)/final_outputs.csv

# Precomputed ranking inputs (committed; regenerated by benchmark/plots).
ACC_SCORES := src/schema_conversion_orchestrator/data/accuracy_scores.json
ROB_SCORES := src/schema_conversion_orchestrator/data/edge_robustness_scores.json

.PHONY: all refresh stats plots eval benchmark cache-timing clean help
.DEFAULT_GOAL := help

all: stats plots          ## Build the cheap, deterministic outputs (stats + plots)

## refresh: run every automatable step in dependency order, each only if its
##          inputs changed (annotation stays manual; in-text numbers stay manual)
refresh:
	$(MAKE) benchmark
	$(MAKE) eval
	$(MAKE) cache-timing
	$(MAKE) stats
	$(MAKE) plots

## benchmark: SHACL<->JSON accuracy benchmark -> accuracy_scores.json (+ figs/csvs)
benchmark: $(RES)/.benchmark.stamp
$(RES)/.benchmark.stamp: $(BENCH_DEFS) $(CONV_SRC)
	$(PY) eval/benchmark_runner.py
	@touch $@

## eval: (re)run the orchestrator evaluation (carries over annotations)
eval: $(RES)/.eval.stamp
$(RES)/.eval.stamp: $(CONV_SRC) $(INPUTS) $(ACC_SCORES) $(ROB_SCORES)
	$(PY) eval/evaluate.py
	@touch $@

## cache-timing: re-measure the sub-path caching runtime effect
cache-timing: $(RES)/.cache-timing.stamp
$(RES)/.cache-timing.stamp: $(CONV_SRC) $(BENCH_DEFS)
	$(PY) eval/cache_timing_analysis.py
	@touch $@

## stats: path-count statistics for the full and core graphs
stats: $(RES)/path_count_stats.json
$(RES)/path_count_stats.json: $(CONV_SRC)
	$(PY) eval/path_count_stats.py
	$(PY) eval/path_count_stats.py --core --output $(RES)/path_count_stats_core.json

## plots: regenerate figures from the annotated review CSVs (robustness is
##        recomputed here from edge_outputs.csv; also rewrites edge_robustness_scores.json)
plots: $(PLOTS)/.stamp
$(PLOTS)/.stamp: $(EDGE_CSV) $(FINAL_CSV) $(SRC)
	$(PY) eval/plot_orchestrator_evaluation.py
	@touch $@

clean:                    ## Remove make stamps (does not touch data or annotations)
	rm -f $(RES)/.benchmark.stamp $(RES)/.eval.stamp $(RES)/.cache-timing.stamp $(PLOTS)/.stamp

help:                     ## Show this help
	@grep -hE '^[a-zA-Z_-]+:.*?## |^## ' $(MAKEFILE_LIST) | sed -E 's/:.*## /: /; s/^## //' | awk '{printf "  %s\n", $$0}'