Skip to content

Commit 7f83f7b

Browse files
committed
Add commercial license filter for doc index exports
1 parent f7d7c18 commit 7f83f7b

3 files changed

Lines changed: 224 additions & 1 deletion

File tree

Makefile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# `make serve` - serve the documentation.
99
# `make doc-index` - build the Faust library documentation JSON index.
1010
# `make doc-index-split` - build a compact index plus one detailed JSON per module.
11+
# `make doc-index-commercial` - build a JSON index filtered to commercially compatible symbols.
1112

1213
FAUST ?= faust
1314
FAUST_OPT ?= -double -t 0
@@ -23,6 +24,8 @@ PYTHON ?= python3
2324
DOC_INDEX_SCRIPT ?= ./scripts/build_faust_doc_index.py
2425
DOC_INDEX_OUTPUT ?= tests/faust-doc-index.json
2526
DOC_INDEX_SPLIT_DIR ?= tests/faust-doc
27+
DOC_INDEX_LICENSE_ALLOWLIST_FILE ?=
28+
DOC_INDEX_LICENSE_DENYLIST_FILE ?=
2629

2730
ARCH := arch/print_arch.cpp
2831
BUILD_DIR := tests/build
@@ -32,7 +35,7 @@ DSP_TEST_DIR := tests
3235
DSP_FILES := $(shell find $(DSP_TEST_DIR) -maxdepth 1 -name '*.dsp' | sort)
3336
BENCH_LOG := tests/bench.log
3437

35-
.PHONY: reference check clean help bench doc-index doc-index-split
38+
.PHONY: reference check clean help bench doc-index doc-index-split doc-index-commercial
3639

3740
help: ## Show available targets and descriptions
3841
@printf "Usage:\n make \033[36m<target>\033[0m\n\n"
@@ -136,6 +139,11 @@ doc-index-split: ## Build a compact JSON index and one detailed JSON per library
136139
printf '[doc-index-split] writing %s and %s\n' '$(DOC_INDEX_OUTPUT)' '$(DOC_INDEX_SPLIT_DIR)'; \
137140
$(PYTHON) $(DOC_INDEX_SCRIPT) --repo-root . --output $(DOC_INDEX_OUTPUT) --split-output-dir $(DOC_INDEX_SPLIT_DIR) --pretty
138141

142+
doc-index-commercial: ## Build a JSON index filtered to commercially compatible symbols
143+
@set -e; \
144+
printf '[doc-index-commercial] writing %s and %s\n' '$(DOC_INDEX_OUTPUT)' '$(DOC_INDEX_SPLIT_DIR)'; \
145+
$(PYTHON) $(DOC_INDEX_SCRIPT) --repo-root . --output $(DOC_INDEX_OUTPUT) --split-output-dir $(DOC_INDEX_SPLIT_DIR) --license-policy commercial-compatible $(if $(DOC_INDEX_LICENSE_ALLOWLIST_FILE),--license-allowlist-file $(DOC_INDEX_LICENSE_ALLOWLIST_FILE),) $(if $(DOC_INDEX_LICENSE_DENYLIST_FILE),--license-denylist-file $(DOC_INDEX_LICENSE_DENYLIST_FILE),) --pretty
146+
139147
build: ## Build the documentation
140148
$(MAKE) -C doc build
141149

doc/docs/contributing.md

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ extracts for each documented symbol:
273273
- `io` with `inSignals` / `outSignals` when derivable
274274
- `testCode`
275275
- `references`
276+
- `license` when a per-symbol `declare ... license|licence "..."` is present
276277
- `source`
277278

278279
Two JSON layouts are supported:
@@ -285,6 +286,7 @@ Default Make targets:
285286
```bash
286287
make doc-index
287288
make doc-index-split
289+
make doc-index-commercial
288290
```
289291

290292
Default output locations:
@@ -294,6 +296,9 @@ Default output locations:
294296
- `tests/faust-doc-index.json`
295297
- `tests/faust-doc/index.json`
296298
- `tests/faust-doc/modules/*.json`
299+
- `make doc-index-commercial` writes the same paths as `make doc-index-split`,
300+
but filters the exported symbols using the `commercial-compatible` license
301+
policy
297302

298303
You can override the output paths:
299304

@@ -307,6 +312,62 @@ You can also run the generator directly:
307312
```bash
308313
python3 scripts/build_faust_doc_index.py --repo-root . --output tests/faust-doc-index.json --pretty
309314
python3 scripts/build_faust_doc_index.py --repo-root . --output tests/faust-doc-index.json --split-output-dir tests/faust-doc --pretty
315+
python3 scripts/build_faust_doc_index.py --repo-root . --output tests/faust-doc-index.json --split-output-dir tests/faust-doc --license-policy commercial-compatible --pretty
316+
```
317+
318+
License-policy filtering is optional. The supported values are:
319+
320+
- `all`: export every documented symbol
321+
- `commercial-compatible`: keep only symbols that pass a conservative
322+
per-symbol license heuristic
323+
324+
The current `commercial-compatible` heuristic:
325+
326+
- accepts missing per-symbol licenses and treats them as falling back to the
327+
library default
328+
- accepts common permissive or weak-copyleft markers such as `MIT`, `BSD`,
329+
`Apache`, `LGPL`, `LGPL with exception`, `MPL`, `ISC`, `zlib`, `Boost`,
330+
`Unlicense`, `public domain`, and `STK-4.3`
331+
- rejects markers such as `GPL`, `AGPL`, and explicitly non-commercial terms
332+
333+
This is a practical export filter for tooling, not a legal opinion.
334+
335+
The policy can also be customized with external allowlist/denylist files:
336+
337+
```bash
338+
python3 scripts/build_faust_doc_index.py \
339+
--repo-root . \
340+
--output tests/faust-doc-index.json \
341+
--split-output-dir tests/faust-doc \
342+
--license-policy commercial-compatible \
343+
--license-allowlist-file /path/to/license-allowlist.txt \
344+
--license-denylist-file /path/to/license-denylist.txt \
345+
--pretty
346+
```
347+
348+
These files use a simple newline-based format:
349+
350+
- one token or pattern per line
351+
- matching is case-insensitive and based on substring inclusion
352+
- empty lines are ignored
353+
- lines starting with `#` are treated as comments
354+
355+
Example:
356+
357+
```text
358+
# Allow permissive licenses and a specific local marker
359+
mit
360+
bsd
361+
apache
362+
my-company-approved-license
363+
```
364+
365+
The Make target also supports these overrides:
366+
367+
```bash
368+
make doc-index-commercial \
369+
DOC_INDEX_LICENSE_ALLOWLIST_FILE=/path/to/license-allowlist.txt \
370+
DOC_INDEX_LICENSE_DENYLIST_FILE=/path/to/license-denylist.txt
310371
```
311372

312373
The split layout is recommended for LLM or retrieval-based use because it avoids

scripts/build_faust_doc_index.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,28 @@
5656
DECLARE_LICENSE_RE = re.compile(
5757
r'^declare\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s+(license|licence)\s+"([^"]*)"\s*;'
5858
)
59+
COMMERCIAL_COMPATIBLE_TOKENS = (
60+
"mit",
61+
"bsd",
62+
"apache",
63+
"lgpl with exception",
64+
"lgpl",
65+
"mpl",
66+
"unlicense",
67+
"isc",
68+
"zlib",
69+
"boost",
70+
"public domain",
71+
"stk-4.3",
72+
)
73+
COMMERCIAL_INCOMPATIBLE_TOKENS = (
74+
"agpl",
75+
"gpl",
76+
"non-commercial",
77+
"non commercial",
78+
"cc-by-nc",
79+
"creativecommons.org/licenses/by-nc",
80+
)
5981

6082

6183
@dataclass(frozen=True)
@@ -254,6 +276,92 @@ def extract_symbol_licenses(lines: Iterable[str]) -> dict[str, str]:
254276
return licenses
255277

256278

279+
def load_license_token_file(path: Path | None) -> tuple[str, ...]:
280+
"""Load one newline-based license token file.
281+
282+
Empty lines and lines starting with `#` are ignored. Matching is done with
283+
case-insensitive substring checks, so each non-empty line is interpreted as
284+
one token/pattern to search for in the normalized license string.
285+
"""
286+
287+
if path is None:
288+
return ()
289+
290+
tokens: list[str] = []
291+
for raw_line in path.read_text(encoding="utf-8").splitlines():
292+
token = raw_line.strip().lower()
293+
if not token or token.startswith("#"):
294+
continue
295+
tokens.append(token)
296+
return tuple(tokens)
297+
298+
299+
def is_commercial_compatible_license(
300+
license_name: str | None,
301+
allow_tokens: tuple[str, ...] = COMMERCIAL_COMPATIBLE_TOKENS,
302+
deny_tokens: tuple[str, ...] = COMMERCIAL_INCOMPATIBLE_TOKENS,
303+
) -> bool:
304+
"""Return whether a license looks commercially compatible.
305+
306+
This heuristic is intentionally conservative for LLM-assisted code
307+
generation workflows. It allows common permissive licenses and LGPL-style
308+
cases, rejects GPL/AGPL/non-commercial markers as not suitable for a
309+
generic "commercial-compatible" export, and treats the absence of an
310+
explicit per-symbol license as compatible with the library default.
311+
"""
312+
313+
if not license_name:
314+
return True
315+
316+
normalized = str(license_name).strip().lower()
317+
if not normalized:
318+
return True
319+
320+
if any(token in normalized for token in deny_tokens):
321+
return False
322+
return any(token in normalized for token in allow_tokens)
323+
324+
325+
def filter_index_for_license_policy(
326+
index: dict[str, object],
327+
policy: str,
328+
allow_tokens: tuple[str, ...] = COMMERCIAL_COMPATIBLE_TOKENS,
329+
deny_tokens: tuple[str, ...] = COMMERCIAL_INCOMPATIBLE_TOKENS,
330+
) -> dict[str, object]:
331+
"""Filter an already-built index according to one license policy."""
332+
333+
if policy == "all":
334+
return index
335+
if policy != "commercial-compatible":
336+
raise ValueError(f"Unsupported license policy: {policy}")
337+
338+
filtered_libraries: list[dict[str, object]] = []
339+
filtered_symbols: list[dict[str, object]] = []
340+
341+
for library in index["libraries"]:
342+
kept_symbols = [
343+
symbol for symbol in library.get("symbols", [])
344+
if is_commercial_compatible_license(
345+
symbol.get("license"),
346+
allow_tokens=allow_tokens,
347+
deny_tokens=deny_tokens,
348+
)
349+
]
350+
if not kept_symbols:
351+
continue
352+
353+
filtered_library = dict(library)
354+
filtered_library["symbols"] = kept_symbols
355+
filtered_libraries.append(filtered_library)
356+
filtered_symbols.extend(kept_symbols)
357+
358+
filtered_index = dict(index)
359+
filtered_index["libraries"] = filtered_libraries
360+
filtered_index["symbols"] = filtered_symbols
361+
filtered_index["licensePolicy"] = policy
362+
return filtered_index
363+
364+
257365
def extract_doc_block(lines: list[str], start_index: int) -> dict[str, object] | None:
258366
"""Extract the full documentation block starting at `start_index`.
259367
@@ -875,6 +983,34 @@ def parse_args() -> argparse.Namespace:
875983
default=None,
876984
help="Optional directory for a split index: compact index.json + detailed modules/*.json.",
877985
)
986+
parser.add_argument(
987+
"--license-policy",
988+
choices=["all", "commercial-compatible"],
989+
default="all",
990+
help=(
991+
"Optional license filter for exported symbols. "
992+
"'commercial-compatible' keeps only symbols whose per-function license "
993+
"matches a conservative allow-list heuristic."
994+
),
995+
)
996+
parser.add_argument(
997+
"--license-allowlist-file",
998+
type=Path,
999+
default=None,
1000+
help=(
1001+
"Optional newline-based file extending the built-in allowlist "
1002+
"used by --license-policy commercial-compatible."
1003+
),
1004+
)
1005+
parser.add_argument(
1006+
"--license-denylist-file",
1007+
type=Path,
1008+
default=None,
1009+
help=(
1010+
"Optional newline-based file extending the built-in denylist "
1011+
"used by --license-policy commercial-compatible."
1012+
),
1013+
)
8781014
return parser.parse_args()
8791015

8801016

@@ -894,8 +1030,21 @@ def main() -> int:
8941030
repo_root = args.repo_root.resolve()
8951031
stdlib = args.stdlib.resolve() if args.stdlib else (repo_root / "stdfaust.lib").resolve()
8961032
output = args.output.resolve()
1033+
allow_tokens = COMMERCIAL_COMPATIBLE_TOKENS
1034+
deny_tokens = COMMERCIAL_INCOMPATIBLE_TOKENS
1035+
1036+
if args.license_allowlist_file is not None:
1037+
allow_tokens = allow_tokens + load_license_token_file(args.license_allowlist_file.resolve())
1038+
if args.license_denylist_file is not None:
1039+
deny_tokens = deny_tokens + load_license_token_file(args.license_denylist_file.resolve())
8971040

8981041
index = build_index(repo_root=repo_root, stdlib=stdlib)
1042+
index = filter_index_for_license_policy(
1043+
index,
1044+
args.license_policy,
1045+
allow_tokens=allow_tokens,
1046+
deny_tokens=deny_tokens,
1047+
)
8991048
write_json_document(output, index, args.pretty)
9001049

9011050
split_summary = {}
@@ -907,7 +1056,12 @@ def main() -> int:
9071056
"rootLibPath": index["rootLibPath"],
9081057
"librariesCount": len(index["libraries"]),
9091058
"symbolsCount": len(index["symbols"]),
1059+
"licensePolicy": args.license_policy,
9101060
}
1061+
if args.license_allowlist_file is not None:
1062+
summary["licenseAllowlistFile"] = normalize_posix_path(args.license_allowlist_file.resolve())
1063+
if args.license_denylist_file is not None:
1064+
summary["licenseDenylistFile"] = normalize_posix_path(args.license_denylist_file.resolve())
9111065
summary.update(split_summary)
9121066
print(json.dumps(summary, ensure_ascii=True))
9131067
return 0

0 commit comments

Comments
 (0)