-
-
Notifications
You must be signed in to change notification settings - Fork 723
Expand file tree
/
Copy pathpubliccode.py
More file actions
133 lines (110 loc) · 4.4 KB
/
publiccode.py
File metadata and controls
133 lines (110 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import logging
import os
import saneyaml
from packagedcode import models
"""
Handle publiccode.yml metadata files.
publiccode.yml is a metadata standard for public sector open source software.
See https://github.com/publiccodeyml/publiccode.yml
"""
TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
logger = logging.getLogger(__name__)
class PubliccodeYmlHandler(models.DatafileHandler):
datasource_id = 'publiccode_yml'
path_patterns = ('*/publiccode.yml', '*/publiccode.yaml')
default_package_type = 'publiccode'
default_primary_language = None
description = 'publiccode.yml metadata file'
documentation_url = 'https://github.com/publiccodeyml/publiccode.yml'
@classmethod
def parse(cls, location, package_only=False):
with open(location, 'rb') as f:
data = saneyaml.load(f.read())
if not data or not isinstance(data, dict):
return
# Validate: a publiccode.yml must have 'publiccodeYmlVersion'
if 'publiccodeYmlVersion' not in data:
return
name = data.get('name')
version = data.get('softwareVersion')
vcs_url = data.get('url')
homepage_url = data.get('landingURL') or vcs_url
# License is under legal.license (SPDX expression)
legal = data.get('legal') or {}
declared_license = legal.get('license')
copyright_statement = legal.get('mainCopyrightOwner') or legal.get('repoOwner')
# Description: prefer English, fall back to first available language
description = _get_description(data)
# Keywords from categories
categories = data.get('categories') or []
keywords = ', '.join(categories) if categories else None
# Parties from maintenance.contacts
parties = []
maintenance = data.get('maintenance') or {}
for contact in maintenance.get('contacts') or []:
contact_name = contact.get('name')
contact_email = contact.get('email')
if contact_name or contact_email:
parties.append(
models.Party(
type=models.party_person,
name=contact_name,
email=contact_email,
role='maintainer',
)
)
# Extra data
extra_data = {}
schema_version = data.get('publiccodeYmlVersion')
if schema_version:
extra_data['publiccodeYmlVersion'] = schema_version
platforms = data.get('platforms')
if platforms:
extra_data['platforms'] = platforms
development_status = data.get('developmentStatus')
if development_status:
extra_data['developmentStatus'] = development_status
software_type = data.get('softwareType')
if software_type:
extra_data['softwareType'] = software_type
yield models.PackageData(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=name,
version=version,
vcs_url=vcs_url,
homepage_url=homepage_url,
description=description,
declared_license_expression=declared_license,
copyright=copyright_statement,
keywords=keywords,
parties=parties,
extra_data=extra_data or None,
)
def _get_description(data):
"""
Extract the best available description from publiccode.yml's
multilingual 'description' block. Prefer English, fall back to
any available language. Returns longDescription, else shortDescription.
"""
description_block = data.get('description') or {}
if not description_block:
return
lang_data = (
description_block.get('en')
or description_block.get('eng')
or next(iter(description_block.values()), None)
)
if not lang_data:
return
long_desc = lang_data.get('longDescription', '').strip()
short_desc = lang_data.get('shortDescription', '').strip()
return long_desc or short_desc or None