77# See https://aboutcode.org for more information about nexB OSS projects.
88#
99
10- import logging
11- import os
10+ import io
1211
1312import saneyaml
1413
2019See https://github.com/publiccodeyml/publiccode.yml
2120"""
2221
23- TRACE = os .environ .get ('SCANCODE_DEBUG_PACKAGE' , False )
24-
25- logger = logging .getLogger (__name__ )
22+ EXTRA_DATA_KEYS = (
23+ 'publiccodeYmlVersion' ,
24+ 'platforms' ,
25+ 'developmentStatus' ,
26+ 'softwareType' ,
27+ )
2628
2729
2830class PubliccodeYmlHandler (models .DatafileHandler ):
2931 datasource_id = 'publiccode_yml'
30- path_patterns = ('*/ publiccode.yml' , '*/ publiccode.yaml' )
32+ path_patterns = ('*publiccode.yml' , '*publiccode.yaml' )
3133 default_package_type = 'publiccode'
3234 default_primary_language = None
3335 description = 'publiccode.yml metadata file'
3436 documentation_url = 'https://github.com/publiccodeyml/publiccode.yml'
3537
3638 @classmethod
3739 def parse (cls , location , package_only = False ):
38- with open (location , 'rb' ) as f :
39- data = saneyaml .load (f .read ())
40-
41- if not data or not isinstance (data , dict ):
42- return
40+ with io .open (location , encoding = 'utf-8' ) as loc :
41+ data = saneyaml .load (loc .read ())
4342
44- # Validate: a publiccode.yml must have 'publiccodeYmlVersion'
45- if 'publiccodeYmlVersion' not in data :
43+ if not is_publiccode_yml_data (data ):
4644 return
4745
48- name = data .get ('name' )
49- version = data .get ('softwareVersion' )
50- vcs_url = data .get ('url' )
51- homepage_url = data .get ('landingURL' ) or vcs_url
52-
53- # License is under legal.license (SPDX expression)
54- legal = data .get ('legal' ) or {}
55- declared_license = legal .get ('license' )
56- copyright_statement = legal .get ('mainCopyrightOwner' ) or legal .get ('repoOwner' )
57-
58- # Description: prefer English, fall back to first available language
59- description = _get_description (data )
60-
61- # Keywords from categories
62- categories = data .get ('categories' ) or []
63- keywords = ', ' .join (categories ) if categories else None
64-
65- # Parties from maintenance.contacts
66- parties = []
67- maintenance = data .get ('maintenance' ) or {}
68- for contact in maintenance .get ('contacts' ) or []:
69- contact_name = contact .get ('name' )
70- contact_email = contact .get ('email' )
71- if contact_name or contact_email :
72- parties .append (
73- models .Party (
74- type = models .party_person ,
75- name = contact_name ,
76- email = contact_email ,
77- role = 'maintainer' ,
78- )
79- )
80-
81- # Extra data
82- extra_data = {}
83- schema_version = data .get ('publiccodeYmlVersion' )
84- if schema_version :
85- extra_data ['publiccodeYmlVersion' ] = schema_version
86- platforms = data .get ('platforms' )
87- if platforms :
88- extra_data ['platforms' ] = platforms
89- development_status = data .get ('developmentStatus' )
90- if development_status :
91- extra_data ['developmentStatus' ] = development_status
92- software_type = data .get ('softwareType' )
93- if software_type :
94- extra_data ['softwareType' ] = software_type
95-
96- yield models .PackageData (
46+ package_data = dict (
9747 datasource_id = cls .datasource_id ,
9848 type = cls .default_package_type ,
99- name = name ,
100- version = version ,
101- vcs_url = vcs_url ,
102- homepage_url = homepage_url ,
103- description = description ,
104- declared_license_expression = declared_license ,
105- copyright = copyright_statement ,
106- keywords = keywords ,
107- parties = parties ,
108- extra_data = extra_data or None ,
49+ name = data . get ( ' name' ) ,
50+ version = data . get ( 'softwareVersion' ) ,
51+ vcs_url = data . get ( 'url' ) ,
52+ homepage_url = data . get ( 'landingURL' ) or data . get ( 'url' ) ,
53+ description = get_description ( data ) ,
54+ extracted_license_statement = get_extracted_license_statement ( data ) ,
55+ copyright = get_copyright_statement ( data ) ,
56+ keywords = get_categories ( data ) ,
57+ parties = get_parties ( data ) ,
58+ extra_data = get_extra_data ( data ) or None ,
10959 )
60+ yield models .PackageData .from_data (package_data , package_only )
61+
11062
63+ def is_publiccode_yml_data (data ):
64+ return isinstance (data , dict ) and 'publiccodeYmlVersion' in data
11165
112- def _get_description (data ):
66+
67+ def get_description (data ):
11368 """
11469 Extract the best available description from publiccode.yml's
11570 multilingual 'description' block. Prefer English, fall back to
@@ -119,15 +74,78 @@ def _get_description(data):
11974 if not description_block :
12075 return
12176
122- lang_data = (
123- description_block .get ('en' )
124- or description_block .get ('eng' )
125- or next (iter (description_block .values ()), None )
126- )
77+ lang_data = None
78+ for language , localized_description in description_block .items ():
79+ primary_language = language .lower ().split ('-' )[0 ]
80+ if primary_language == 'en' :
81+ lang_data = localized_description
82+ break
83+
84+ if not lang_data :
85+ lang_data = next (iter (description_block .values ()), None )
86+
12787 if not lang_data :
12888 return
12989
13090 long_desc = lang_data .get ('longDescription' , '' ).strip ()
13191 short_desc = lang_data .get ('shortDescription' , '' ).strip ()
13292
13393 return long_desc or short_desc or None
94+
95+
96+ def get_extracted_license_statement (data ):
97+ legal = data .get ('legal' ) or {}
98+ return legal .get ('license' )
99+
100+
101+ def get_copyright_statement (data ):
102+ legal = data .get ('legal' ) or {}
103+ copyright_holders = []
104+
105+ for key in ('mainCopyrightOwner' , 'repoOwner' ):
106+ value = legal .get (key )
107+ if value and value not in copyright_holders :
108+ copyright_holders .append (value )
109+
110+ return '\n ' .join (copyright_holders ) or None
111+
112+
113+ def get_categories (data ):
114+ categories = data .get ('categories' ) or []
115+ if isinstance (categories , str ):
116+ return [categories ]
117+ return categories
118+
119+
120+ def get_parties (data ):
121+ parties = []
122+ maintenance = data .get ('maintenance' ) or {}
123+
124+ for contact in maintenance .get ('contacts' ) or []:
125+ contact_name = contact .get ('name' )
126+ contact_email = contact .get ('email' )
127+
128+ if not (contact_name or contact_email ):
129+ continue
130+
131+ parties .append (
132+ models .Party (
133+ type = models .party_person ,
134+ name = contact_name ,
135+ email = contact_email ,
136+ role = 'maintainer' ,
137+ )
138+ )
139+
140+ return parties
141+
142+
143+ def get_extra_data (data ):
144+ extra_data = {}
145+
146+ for key in EXTRA_DATA_KEYS :
147+ value = data .get (key )
148+ if value :
149+ extra_data [key ] = value
150+
151+ return extra_data
0 commit comments