@@ -165,8 +165,15 @@ class LicenseDetection:
165165 license_expression = attr .ib (
166166 default = None ,
167167 metadata = dict (
168- help = 'Full license expression string '
169- 'using the SPDX license expression syntax and ScanCode license keys.' )
168+ help = 'A license expression string using the SPDX license expression'
169+ ' syntax and ScanCode license keys, the effective license expression'
170+ ' for this license detection.' )
171+ )
172+
173+ license_expression_spdx = attr .ib (
174+ default = None ,
175+ metadata = dict (
176+ help = 'SPDX license expression string with SPDX ids.' )
170177 )
171178
172179 matches = attr .ib (
@@ -248,8 +255,17 @@ def from_matches(
248255 detection_log = detection_log ,
249256 )
250257 detection .identifier = detection .identifier_with_expression
258+ detection .license_expression_spdx = detection .spdx_license_expression ()
251259 return detection
252260
261+ def spdx_license_expression (self ):
262+ from licensedcode .cache import build_spdx_license_expression
263+ from licensedcode .cache import get_cache
264+ return str (build_spdx_license_expression (
265+ license_expression = self .license_expression ,
266+ licensing = get_cache ().licensing ,
267+ ))
268+
253269 def __eq__ (self , other ):
254270 return (
255271 isinstance (other , LicenseDetection )
@@ -515,6 +531,7 @@ def from_license_detection_mapping(
515531
516532 detection = cls (
517533 license_expression = license_detection_mapping ["license_expression" ],
534+ license_expression_spdx = license_detection_mapping ["license_expression_spdx" ],
518535 detection_log = license_detection_mapping .get ("detection_log" , []) or None ,
519536 identifier = license_detection_mapping ["identifier" ],
520537 matches = matches ,
@@ -590,6 +607,12 @@ class LicenseMatchFromResult(LicenseMatch):
590607 help = 'Text which was matched' )
591608 )
592609
610+ matched_text_diagnostics = attr .ib (
611+ default = None ,
612+ metadata = dict (
613+ help = 'Text which was matched, with extra diagnostics information.' )
614+ )
615+
593616 def score (self ):
594617 return self .match_score
595618
@@ -615,15 +638,18 @@ def from_dict(cls, license_match_mapping):
615638 """
616639 rule = Rule .from_match_data (license_match_mapping )
617640 matched_text = license_match_mapping .get ("matched_text" ) or None
641+ matched_text_diagnostics = license_match_mapping .get ("matched_text_diagnostics" ) or None
618642
619643 return cls (
644+ from_file = license_match_mapping ["from_file" ],
620645 start_line = license_match_mapping ["start_line" ],
621646 end_line = license_match_mapping ["end_line" ],
622647 match_score = license_match_mapping ["score" ],
623648 matched_length = license_match_mapping ["matched_length" ],
624649 match_coverage = license_match_mapping ["match_coverage" ],
625650 matcher = license_match_mapping ["matcher" ],
626651 text = matched_text ,
652+ matched_text_diagnostics = matched_text_diagnostics ,
627653 rule = rule ,
628654 qspan = None ,
629655 ispan = None ,
@@ -642,35 +668,57 @@ def to_dict(
642668 include_text = False ,
643669 license_text_diagnostics = False ,
644670 whole_lines = True ,
671+ rule_details = False ,
645672 ):
646673 """
647674 Return a "result" scan data built from a LicenseMatch object.
648675 """
649- matched_text = None
650- if include_text :
651- matched_text = self .matched_text
652-
653676 result = {}
654677
655- # Detection Level Information
656- result ['score' ] = self .score ()
678+ result ['license_expression' ] = self .rule .license_expression
679+ result ['license_expression_spdx' ] = self .rule .spdx_license_expression ()
680+ result ['from_file' ] = self .from_file
657681 result ['start_line' ] = self .start_line
658682 result ['end_line' ] = self .end_line
683+ if rule_details :
684+ result .update (self .rule .get_flags_mapping ())
685+ result ['matcher' ] = self .matcher
686+ result ['score' ] = self .score ()
659687 result ['matched_length' ] = self .len ()
688+ if rule_details :
689+ result ["rule_length" ] = self .rule .length
660690 result ['match_coverage' ] = self .coverage ()
661- result ['matcher' ] = self .matcher
662-
663- # LicenseDB Level Information (Rule that was matched)
664- result ['license_expression' ] = self .rule .license_expression
665- result ['rule_identifier' ] = self .rule .identifier
666691 result ['rule_relevance' ] = self .rule .relevance
692+ result ['rule_identifier' ] = self .rule .identifier
667693 result ['rule_url' ] = self .rule .rule_url
694+ if rule_details :
695+ result ["rule_notes" ] = self .rule .notes
696+ result ["referenced_filenames" ] = self .rule .referenced_filenames
697+ if include_text and self .matched_text :
698+ result ['matched_text' ] = self .matched_text
699+ if license_text_diagnostics and self .matched_text_diagnostics :
700+ result ['matched_text_diagnostics' ] = self .matched_text_diagnostics
701+ if rule_details :
702+ result ["rule_text" ] = self .rule .text
668703
669- if include_text :
670- result ['matched_text' ] = matched_text
671704 return result
672705
673706
707+ def populate_matches_with_path (matches , path ):
708+ """
709+ Given `matches` list of LicenseMatch objects, populate the `from_file`
710+ attribute in them with `path` which is the path for the origin file for
711+ that license match.
712+ """
713+ for match in matches :
714+ # Here if we have the `from_file` attribute populated already,
715+ # they are from other files, and if it's empty, they are from
716+ # the original resource, so we populate the files with the resource
717+ # path for the original resource of their origin
718+ if not match ["from_file" ]:
719+ match ["from_file" ] = path
720+
721+
674722def collect_license_detections (codebase , include_license_clues = True ):
675723 """
676724 Return a list of LicenseDetectionFromResult object rehydrated from
@@ -680,7 +728,10 @@ def collect_license_detections(codebase, include_license_clues=True):
680728 according to their license detections. This is required because package fields
681729 are populated in package plugin, which runs before the license plugin, and thus
682730 the license plugin step where unknown references to other files are dereferenced
683- does not show up automatically in package attributes.
731+ does not show up automatically in package attributes.
732+
733+ Also populate from_file attributes with resource paths for matches which have
734+ origin in the same file.
684735 """
685736 has_packages = hasattr (codebase .root , 'package_data' )
686737 has_licenses = hasattr (codebase .root , 'license_detections' )
@@ -692,7 +743,11 @@ def collect_license_detections(codebase, include_license_clues=True):
692743 resource_license_detections = []
693744 if has_licenses :
694745 license_detections = getattr (resource , 'license_detections' , []) or []
746+ for detection in license_detections :
747+ populate_matches_with_path (matches = detection ["matches" ], path = resource .path )
695748 license_clues = getattr (resource , 'license_clues' , []) or []
749+ populate_matches_with_path (matches = license_clues , path = resource .path )
750+ codebase .save_resource (resource )
696751
697752 if license_detections :
698753 license_detection_objects = detections_from_license_detection_mappings (
@@ -729,6 +784,9 @@ def collect_license_detections(codebase, include_license_clues=True):
729784
730785 package_license_detections = package ["license_detections" ]
731786 if package_license_detections :
787+ for detection in package_license_detections :
788+ populate_matches_with_path (matches = detection ["matches" ], path = resource .path )
789+ modified = True
732790 package_license_detection_mappings .extend (package_license_detections )
733791 detection_is_same , license_expression = verify_package_license_expression (
734792 license_detection_mappings = package_license_detections ,
@@ -828,6 +886,7 @@ class UniqueDetection:
828886 """
829887 identifier = attr .ib (default = None )
830888 license_expression = attr .ib (default = None )
889+ license_expression_spdx = attr .ib (default = None )
831890 detection_count = attr .ib (default = None )
832891 matches = attr .ib (default = attr .Factory (list ))
833892 detection_log = attr .ib (default = attr .Factory (list ))
@@ -860,12 +919,14 @@ def get_unique_detections(cls, license_detections):
860919 for match in detection .matches
861920 ]
862921 ))
922+ detection .license_expression_spdx = detection .spdx_license_expression ()
863923 detection .identifier = detection .identifier_with_expression
864924
865925 unique_license_detections .append (
866926 cls (
867927 identifier = detection .identifier ,
868928 license_expression = detection .license_expression ,
929+ license_expression_spdx = detection .license_expression_spdx ,
869930 detection_log = detection_log or [],
870931 matches = detection .matches ,
871932 detection_count = len (file_regions ),
@@ -875,7 +936,11 @@ def get_unique_detections(cls, license_detections):
875936
876937 return unique_license_detections
877938
878- def to_dict (self , license_diagnostics ):
939+ def to_dict (self ,
940+ include_text = False ,
941+ license_text_diagnostics = False ,
942+ license_diagnostics = False ,
943+ ):
879944
880945 def dict_fields (attr , value ):
881946
@@ -890,11 +955,20 @@ def dict_fields(attr, value):
890955
891956 return True
892957
893- return attr .asdict (self , filter = dict_fields )
958+ detection_mapping = attr .asdict (self , filter = dict_fields )
959+ detection_mapping ["reference_matches" ] = [
960+ match .to_dict (
961+ include_text = include_text ,
962+ license_text_diagnostics = license_text_diagnostics ,
963+ )
964+ for match in self .matches
965+ ]
966+ return detection_mapping
894967
895968 def get_license_detection_object (self ):
896969 return LicenseDetection (
897970 license_expression = self .license_expression ,
971+ license_expression_spdx = self .license_expression_spdx ,
898972 detection_log = self .detection_log ,
899973 matches = self .matches ,
900974 identifier = self .identifier ,
0 commit comments