5656DECLARE_LICENSE_RE = re .compile (
5757 r'^declare\s+([A-Za-z_][A-Za-z0-9_\[\]]*)\s+(license|licence)\s+"([^"]*)"\s*;'
5858)
59+ COMMERCIAL_COMPATIBLE_TOKENS = (
60+ "mit" ,
61+ "bsd" ,
62+ "apache" ,
63+ "lgpl with exception" ,
64+ "lgpl" ,
65+ "mpl" ,
66+ "unlicense" ,
67+ "isc" ,
68+ "zlib" ,
69+ "boost" ,
70+ "public domain" ,
71+ "stk-4.3" ,
72+ )
73+ COMMERCIAL_INCOMPATIBLE_TOKENS = (
74+ "agpl" ,
75+ "gpl" ,
76+ "non-commercial" ,
77+ "non commercial" ,
78+ "cc-by-nc" ,
79+ "creativecommons.org/licenses/by-nc" ,
80+ )
5981
6082
6183@dataclass (frozen = True )
@@ -254,6 +276,92 @@ def extract_symbol_licenses(lines: Iterable[str]) -> dict[str, str]:
254276 return licenses
255277
256278
279+ def load_license_token_file (path : Path | None ) -> tuple [str , ...]:
280+ """Load one newline-based license token file.
281+
282+ Empty lines and lines starting with `#` are ignored. Matching is done with
283+ case-insensitive substring checks, so each non-empty line is interpreted as
284+ one token/pattern to search for in the normalized license string.
285+ """
286+
287+ if path is None :
288+ return ()
289+
290+ tokens : list [str ] = []
291+ for raw_line in path .read_text (encoding = "utf-8" ).splitlines ():
292+ token = raw_line .strip ().lower ()
293+ if not token or token .startswith ("#" ):
294+ continue
295+ tokens .append (token )
296+ return tuple (tokens )
297+
298+
299+ def is_commercial_compatible_license (
300+ license_name : str | None ,
301+ allow_tokens : tuple [str , ...] = COMMERCIAL_COMPATIBLE_TOKENS ,
302+ deny_tokens : tuple [str , ...] = COMMERCIAL_INCOMPATIBLE_TOKENS ,
303+ ) -> bool :
304+ """Return whether a license looks commercially compatible.
305+
306+ This heuristic is intentionally conservative for LLM-assisted code
307+ generation workflows. It allows common permissive licenses and LGPL-style
308+ cases, rejects GPL/AGPL/non-commercial markers as not suitable for a
309+ generic "commercial-compatible" export, and treats the absence of an
310+ explicit per-symbol license as compatible with the library default.
311+ """
312+
313+ if not license_name :
314+ return True
315+
316+ normalized = str (license_name ).strip ().lower ()
317+ if not normalized :
318+ return True
319+
320+ if any (token in normalized for token in deny_tokens ):
321+ return False
322+ return any (token in normalized for token in allow_tokens )
323+
324+
325+ def filter_index_for_license_policy (
326+ index : dict [str , object ],
327+ policy : str ,
328+ allow_tokens : tuple [str , ...] = COMMERCIAL_COMPATIBLE_TOKENS ,
329+ deny_tokens : tuple [str , ...] = COMMERCIAL_INCOMPATIBLE_TOKENS ,
330+ ) -> dict [str , object ]:
331+ """Filter an already-built index according to one license policy."""
332+
333+ if policy == "all" :
334+ return index
335+ if policy != "commercial-compatible" :
336+ raise ValueError (f"Unsupported license policy: { policy } " )
337+
338+ filtered_libraries : list [dict [str , object ]] = []
339+ filtered_symbols : list [dict [str , object ]] = []
340+
341+ for library in index ["libraries" ]:
342+ kept_symbols = [
343+ symbol for symbol in library .get ("symbols" , [])
344+ if is_commercial_compatible_license (
345+ symbol .get ("license" ),
346+ allow_tokens = allow_tokens ,
347+ deny_tokens = deny_tokens ,
348+ )
349+ ]
350+ if not kept_symbols :
351+ continue
352+
353+ filtered_library = dict (library )
354+ filtered_library ["symbols" ] = kept_symbols
355+ filtered_libraries .append (filtered_library )
356+ filtered_symbols .extend (kept_symbols )
357+
358+ filtered_index = dict (index )
359+ filtered_index ["libraries" ] = filtered_libraries
360+ filtered_index ["symbols" ] = filtered_symbols
361+ filtered_index ["licensePolicy" ] = policy
362+ return filtered_index
363+
364+
257365def extract_doc_block (lines : list [str ], start_index : int ) -> dict [str , object ] | None :
258366 """Extract the full documentation block starting at `start_index`.
259367
@@ -875,6 +983,34 @@ def parse_args() -> argparse.Namespace:
875983 default = None ,
876984 help = "Optional directory for a split index: compact index.json + detailed modules/*.json." ,
877985 )
986+ parser .add_argument (
987+ "--license-policy" ,
988+ choices = ["all" , "commercial-compatible" ],
989+ default = "all" ,
990+ help = (
991+ "Optional license filter for exported symbols. "
992+ "'commercial-compatible' keeps only symbols whose per-function license "
993+ "matches a conservative allow-list heuristic."
994+ ),
995+ )
996+ parser .add_argument (
997+ "--license-allowlist-file" ,
998+ type = Path ,
999+ default = None ,
1000+ help = (
1001+ "Optional newline-based file extending the built-in allowlist "
1002+ "used by --license-policy commercial-compatible."
1003+ ),
1004+ )
1005+ parser .add_argument (
1006+ "--license-denylist-file" ,
1007+ type = Path ,
1008+ default = None ,
1009+ help = (
1010+ "Optional newline-based file extending the built-in denylist "
1011+ "used by --license-policy commercial-compatible."
1012+ ),
1013+ )
8781014 return parser .parse_args ()
8791015
8801016
@@ -894,8 +1030,21 @@ def main() -> int:
8941030 repo_root = args .repo_root .resolve ()
8951031 stdlib = args .stdlib .resolve () if args .stdlib else (repo_root / "stdfaust.lib" ).resolve ()
8961032 output = args .output .resolve ()
1033+ allow_tokens = COMMERCIAL_COMPATIBLE_TOKENS
1034+ deny_tokens = COMMERCIAL_INCOMPATIBLE_TOKENS
1035+
1036+ if args .license_allowlist_file is not None :
1037+ allow_tokens = allow_tokens + load_license_token_file (args .license_allowlist_file .resolve ())
1038+ if args .license_denylist_file is not None :
1039+ deny_tokens = deny_tokens + load_license_token_file (args .license_denylist_file .resolve ())
8971040
8981041 index = build_index (repo_root = repo_root , stdlib = stdlib )
1042+ index = filter_index_for_license_policy (
1043+ index ,
1044+ args .license_policy ,
1045+ allow_tokens = allow_tokens ,
1046+ deny_tokens = deny_tokens ,
1047+ )
8991048 write_json_document (output , index , args .pretty )
9001049
9011050 split_summary = {}
@@ -907,7 +1056,12 @@ def main() -> int:
9071056 "rootLibPath" : index ["rootLibPath" ],
9081057 "librariesCount" : len (index ["libraries" ]),
9091058 "symbolsCount" : len (index ["symbols" ]),
1059+ "licensePolicy" : args .license_policy ,
9101060 }
1061+ if args .license_allowlist_file is not None :
1062+ summary ["licenseAllowlistFile" ] = normalize_posix_path (args .license_allowlist_file .resolve ())
1063+ if args .license_denylist_file is not None :
1064+ summary ["licenseDenylistFile" ] = normalize_posix_path (args .license_denylist_file .resolve ())
9111065 summary .update (split_summary )
9121066 print (json .dumps (summary , ensure_ascii = True ))
9131067 return 0
0 commit comments