-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjustfile
More file actions
259 lines (210 loc) · 10 KB
/
justfile
File metadata and controls
259 lines (210 loc) · 10 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# CommunityMech Justfile
# Task runner for common development commands
set dotenv-load := true
# List all commands
default:
@just --list
# Install dependencies
install:
uv sync --group dev
# Validate a single community YAML file against schema
validate FILE:
uv run linkml-validate -s src/communitymech/schema/communitymech.yaml {{FILE}}
# Validate all community files
validate-all:
#!/usr/bin/env bash
for file in kb/communities/*.yaml; do
echo "Validating $file..."
uv run linkml-validate -s src/communitymech/schema/communitymech.yaml "$file"
done
# Strict in-process validation in *closed* mode (rejects unknown fields).
# Emits reports/instance_validation_failures.tsv and exits 1 on any ERROR.
# Catches the same drift class that gave CultureMech 59k silent errors;
# closed-mode + non-zero exit is what the per-file linkml-validate loop
# above silently passes today. Use this for the corpus-wide health check.
validate-strict *args:
uv run python scripts/validate_strict.py {{args}}
# Audit every YAML-writing Python module under scripts/ and
# src/communitymech/ for safeguards (curation_history append,
# --dry-run/--apply, validates before write, wired into justfile).
# Writes reports/pipeline_writers_audit.tsv. Useful for tracking
# adoption of write_validated_community + record_curation_event.
audit-writers *args:
uv run python scripts/audit_writers.py {{args}}
# Validate evidence references in a community file
validate-references FILE:
uv run linkml-reference-validator validate data {{FILE}} -s src/communitymech/schema/communitymech.yaml --config conf/reference_validator.yaml
# Validate references in all community files
validate-references-all:
#!/usr/bin/env bash
for file in kb/communities/*.yaml; do
echo "\\nValidating references in $file..."
uv run linkml-reference-validator validate data "$file" -s src/communitymech/schema/communitymech.yaml --config conf/reference_validator.yaml
done
# Validate cross-repo IDs (CultureMech, MediaIngredientMech) in one community file.
# Pattern checks always run; existence checks run when sibling-repo paths are
# configured via COMMUNITYMECH_SIBLING_REPOS env (Name=path,Name=path).
validate-cross-repo-ids FILE:
PYTHONPATH=src uv run python scripts/validate_cross_repo_ids.py {{FILE}}
# Validate cross-repo IDs across all community files.
validate-cross-repo-ids-all:
PYTHONPATH=src uv run python scripts/validate_cross_repo_ids.py kb/communities/*.yaml
# Validate ontology terms in a community file
validate-terms FILE:
uv run linkml-term-validator validate-data {{FILE}} -s src/communitymech/schema/communitymech.yaml --labels
# Validate terms in all community files
validate-terms-all:
#!/usr/bin/env bash
for file in kb/communities/*.yaml; do
echo "\\nValidating terms in $file..."
uv run linkml-term-validator validate-data "$file" -s src/communitymech/schema/communitymech.yaml --labels
done
# Validate schema-level ontology term meanings
validate-schema-terms:
uv run linkml-term-validator validate-schema src/communitymech/schema/communitymech.yaml
# Repair references with suggested fixes (dry-run)
repair-references FILE:
uv run linkml-reference-validator repair data {{FILE}} -s src/communitymech/schema/communitymech.yaml --dry-run
# Run tests
test:
uv run pytest tests/ -v
# Generate Python datamodel from schema
gen-python:
uv run gen-python src/communitymech/schema/communitymech.yaml > src/communitymech/datamodel/communitymech.py
# Generate schema documentation
gen-doc:
uv run gen-doc src/communitymech/schema/communitymech.yaml -d docs/
# Generate browser data for faceted search
gen-browser:
uv run python -m communitymech.export.browser_export
# Generate HTML pages for communities
gen-html:
uv run python -m communitymech.render
# Generate UMAP visualization of community embedding space
gen-umap:
uv run communitymech generate-umap
@echo "✅ UMAP visualization generated at docs/community_umap.html"
# Generate all HTML (communities + UMAP)
gen-all: gen-html gen-umap
@echo "✅ All HTML pages regenerated"
# Clean generated files
clean:
rm -rf src/communitymech/datamodel/*.py
rm -rf docs/*.md
rm -rf .linkml-cache
# Format code
format:
uv run black src/ tests/
uv run ruff check --fix src/ tests/
# Run linting
lint:
uv run black --check src/ tests/
uv run ruff check src/ tests/
uv run mypy src/
# Full QC (validate + strict validate + lint + test)
qc: validate-all validate-strict validate-terms-all validate-references-all lint test
@echo "✅ All QC checks passed!"
# Check which community strains are represented in UniProt reference proteomes
uniprot-reference COMMUNITY_PATH="kb/communities":
uv run python -m communitymech.uniprot_reference_proteomes {{COMMUNITY_PATH}}
# Build proteome-oriented CSV with communities per UniProt proteome/taxon
uniprot-proteome-csv COMMUNITY_PATH="kb/communities" OUT="reports/uniprot_strain_proteome_communities.csv":
uv run python -m communitymech.uniprot_reference_proteomes {{COMMUNITY_PATH}} --proteome-csv-out {{OUT}}
# Audit network integrity for all communities
audit-network:
uv run communitymech audit-network
# Check network quality (CI mode - exits with error if issues found)
check-network-quality:
uv run communitymech audit-network --check-only
# Audit network integrity with JSON output
audit-network-json:
uv run communitymech audit-network --json
# Audit network integrity and write report to file
audit-network-report FILE="network_integrity_audit.txt":
uv run communitymech audit-network --report {{FILE}}
# LLM-assisted network repair for a single community (requires ANTHROPIC_API_KEY)
repair-network FILE:
uv run communitymech repair-network {{FILE}}
# LLM-assisted repair in dry-run mode (show suggestions only)
repair-network-dry FILE:
uv run communitymech repair-network {{FILE}} --dry-run
# ============== Deep Research ==============
research_dir := "research"
templates_dir := "templates"
# Deep research on a community using a specified provider.
# Examples:
# just research-community falcon Yogurt_TwoSpecies_Starter_Culture --dry-run
# just research-community falcon CommunityMech:000164
research-community provider target *args="":
uv run --extra dev python scripts/research_community.py \
--provider {{provider}} \
--target {{target}} \
--template {{templates_dir}}/community_mechanism_research.md \
--research-dir {{research_dir}} \
{{args}}
# Alias for repo-specific entity research.
research-entity provider target *args="": (research-community provider target args)
# List available deep-research-client providers.
research-providers:
#!/usr/bin/env bash
set -euo pipefail
if [[ -z "${EDISON_API_KEY:-}" && -n "${FUTUREHOUSE_API_KEY:-}" ]]; then
EDISON_API_KEY="${FUTUREHOUSE_API_KEY}" uv run --extra dev deep-research-client providers
else
uv run --extra dev deep-research-client providers
fi
# Show detailed availability and parameters for one provider.
research-provider provider:
#!/usr/bin/env bash
set -euo pipefail
if [[ -z "${EDISON_API_KEY:-}" && -n "${FUTUREHOUSE_API_KEY:-}" ]]; then
EDISON_API_KEY="${FUTUREHOUSE_API_KEY}" uv run --extra dev deep-research-client providers --provider {{provider}}
else
uv run --extra dev deep-research-client providers --provider {{provider}}
fi
# Generate LLM-assisted repair suggestions for all communities
suggest-network-repairs:
uv run communitymech repair-network-batch --report-only
# Generate repair suggestions with limits
suggest-network-repairs-limited MAX='10':
uv run communitymech repair-network-batch --report-only --max-communities {{MAX}}
# Apply approved suggestions from batch report
apply-batch-repairs REPORT:
uv run communitymech repair-network-batch --apply-from {{REPORT}}
# Link growth media to CultureMech/MediaIngredientMech (dry-run)
link-media-dry:
uv run python scripts/link_growth_media.py --dry-run \
--culturemech-index ../../CultureMech/data/normalized_yaml/recipe_index.json \
--mediaingredientmech-index ../../MediaIngredientMech/data/curated/all_ingredients_index.json
# Link growth media to CultureMech/MediaIngredientMech (apply)
link-media:
uv run python scripts/link_growth_media.py \
--culturemech-index ../../CultureMech/data/normalized_yaml/recipe_index.json \
--mediaingredientmech-index ../../MediaIngredientMech/data/curated/all_ingredients_index.json
# Generate ingredient/media mapping reports
link-media-report:
uv run python scripts/link_growth_media.py --dry-run \
--culturemech-index ../../CultureMech/data/normalized_yaml/recipe_index.json \
--mediaingredientmech-index ../../MediaIngredientMech/data/curated/all_ingredients_index.json \
--ingredient-report reports/ingredient_mapping.csv \
--media-report reports/media_mapping.csv \
--summary-report reports/media_linking_summary.txt
# Export the community knowledge graph as KGX TSV (nodes.tsv +
# edges.tsv) with publications and supporting_text propagated from
# evidence claims. Phase 3 of the dismech-pattern port. See
# ../../culturebotai-claw/docs/proposals/phase3_communitymech_kgx_export_with_publications.md
kgx-export:
PYTHONPATH=src /opt/homebrew/bin/python3.13 -m communitymech.export \
--kb kb/communities --output output/kgx
# Lightweight structural validation of the KGX TSV outputs.
# No external deps; checks columns, CURIE shape, biolink predicate
# names, duplicate IDs, dangling subjects/objects.
kgx-validate:
PYTHONPATH=src /opt/homebrew/bin/python3.13 -m communitymech.export.validate_kgx \
--kgx-dir output/kgx --strict
# Render per-community HTML detail pages from kb/communities/*.yaml
# into pages/community/. Includes a Mermaid membership flowchart via
# the shared kg_microbe_browser.graph builder in claw. See
# ../../culturebotai-claw/docs/proposals/phase5_mkdocs_material_and_browser_parity.md
gen-community-pages *args:
/opt/homebrew/bin/python3.13 src/communitymech/render_community_pages.py {{args}}