Skip to content

Commit 59026a3

Browse files
Add --composite-rules to annotate multi license rules with required phrases
1 parent ea42c1d commit 59026a3

1 file changed

Lines changed: 136 additions & 0 deletions

File tree

src/licensedcode/required_phrases.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,123 @@ def update_rules_using_license_attributes(
593593
dry_run=dry_run,
594594
)
595595

596+
def update_composite_rules_using_license_attributes(
597+
license_expression=None,
598+
write_phrase_source=False,
599+
verbose=False,
600+
dry_run=False,
601+
):
602+
"""
603+
Add required phrases to composite (multi license) rules using license attributes
604+
605+
For each composite rule, parse its expression into individual license keys, then collect
606+
candidate phrases for each key from the license database and from is_required_phrase rules.
607+
A rule is only marked if ALL keys in the expression have at least one matching phrase found
608+
in the rule text
609+
"""
610+
licenses_by_key = get_licenses_db()
611+
licensing = Licensing()
612+
613+
# collect known required phrase texts per license key from is_required_phrase rules
614+
is_required_phrases_by_key = {}
615+
all_rules_by_expression = get_base_rules_by_expression()
616+
for expression, rules in all_rules_by_expression.items():
617+
for rule in rules:
618+
if rule.is_required_phrase:
619+
if expression not in is_required_phrases_by_key:
620+
is_required_phrases_by_key[expression] = []
621+
is_required_phrases_by_key[expression].append(rule.text.strip())
622+
623+
rules_by_expression = get_updatable_rules_by_expression(
624+
license_expression, simple_expression=False,
625+
)
626+
627+
for expression, rules in rules_by_expression.items():
628+
try:
629+
keys = licensing.license_keys(expression, unique=True)
630+
except Exception:
631+
if verbose:
632+
click.echo(f' Skipping unparseable expression: {expression}')
633+
continue
634+
635+
# single key expressions are handled by --from-license-attributes
636+
if len(keys) < 2:
637+
continue
638+
639+
# collect candidate phrases for each key from license attributes and
640+
# is_required_phrase rules, longest first so we match the most specific
641+
phrases_by_key = {}
642+
skip_expression = False
643+
for key in keys:
644+
lic = licenses_by_key.get(key)
645+
if not lic:
646+
skip_expression = True
647+
break
648+
649+
candidates = []
650+
if lic.name:
651+
candidates.append(lic.name)
652+
if lic.short_name and lic.short_name != lic.name:
653+
candidates.append(lic.short_name)
654+
if lic.spdx_license_key and lic.spdx_license_key not in candidates:
655+
candidates.append(lic.spdx_license_key)
656+
# add texts from is_required_phrase rules for this key
657+
for phrase_text in is_required_phrases_by_key.get(key, []):
658+
if phrase_text not in candidates:
659+
candidates.append(phrase_text)
660+
candidates.sort(key=len, reverse=True)
661+
phrases_by_key[key] = candidates
662+
663+
if skip_expression or not phrases_by_key:
664+
continue
665+
666+
if verbose:
667+
click.echo(f'Processing composite expression: {expression}')
668+
669+
for rule in rules:
670+
# try to find a matching phrase for each key in the rule text
671+
matched_phrases = {}
672+
all_keys_found = True
673+
674+
for key in keys:
675+
candidates = phrases_by_key.get(key, [])
676+
found = False
677+
for phrase in candidates:
678+
spans = find_phrase_spans_in_text(
679+
text=rule.text,
680+
phrase_text=phrase,
681+
)
682+
if spans:
683+
matched_phrases[key] = phrase
684+
found = True
685+
break
686+
if not found:
687+
all_keys_found = False
688+
break
689+
690+
if not all_keys_found:
691+
continue
692+
693+
# all keys matched, inject markers for each phrase
694+
for key, phrase in matched_phrases.items():
695+
source = rule.source or ""
696+
if write_phrase_source:
697+
source += f" {key}.LICENSE : composite"
698+
699+
add_required_phrase_to_rule(
700+
rule=rule,
701+
required_phrase=phrase,
702+
source=source,
703+
dry_run=dry_run,
704+
)
705+
706+
if verbose:
707+
click.echo(
708+
f' {rule.identifier}: marked phrases: '
709+
f'{list(matched_phrases.values())}'
710+
)
711+
712+
596713
####################################################################################################
597714
#
598715
# Inject new required phrase in rules
@@ -629,6 +746,15 @@ def delete_required_phrase_rules_source_debug(rules_data_dir):
629746
"Mutually exclusive with --from-other-rule.",
630747
cls=PluggableCommandLineOption,
631748
)
749+
@click.option(
750+
"-c",
751+
"--composite-rules",
752+
is_flag=True,
753+
default=False,
754+
help="Add required phrases to composite (multi license) rules using license attributes. "
755+
"Only marks a rule if all license keys in the expression have a matching phrase in the text",
756+
cls=PluggableCommandLineOption,
757+
)
632758
@click.option(
633759
"-l",
634760
"--license-expression",
@@ -691,6 +817,7 @@ def delete_required_phrase_rules_source_debug(rules_data_dir):
691817
def add_required_phrases(
692818
from_other_rules,
693819
from_license_attributes,
820+
composite_rules,
694821
license_expression,
695822
validate,
696823
reindex,
@@ -726,6 +853,15 @@ def add_required_phrases(
726853
verbose=verbose,
727854
)
728855

856+
elif composite_rules:
857+
click.echo('Updating composite rules from license attributes.')
858+
update_composite_rules_using_license_attributes(
859+
license_expression=license_expression,
860+
write_phrase_source=write_phrase_source,
861+
dry_run=dry_run,
862+
verbose=verbose,
863+
)
864+
729865
validate_and_reindex(validate, reindex, verbose)
730866

731867

0 commit comments

Comments
 (0)