Skip to content

Commit f3be189

Browse files
committed
Add publiccode.yml package handler\n\nImplements a new DatafileHandler to parse publiccode.yml files.\npubliccode.yml is a metadata standard for public sector open source\nsoftware. See https://github.com/publiccodeyml/publiccode.yml\n\nResolves #2851
1 parent 2eae344 commit f3be189

File tree

4 files changed

+229
-0
lines changed

4 files changed

+229
-0
lines changed

src/packagedcode/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from packagedcode import opam
3434
from packagedcode import phpcomposer
3535
from packagedcode import pubspec
36+
from packagedcode import publiccode
3637
from packagedcode import pypi
3738
from packagedcode import readme
3839
from packagedcode import rpm
@@ -77,6 +78,8 @@
7778
conda.CondaMetaYamlHandler,
7879
conda.CondaYamlHandler,
7980

81+
publiccode.PubliccodeYmlHandler,
82+
8083
conan.ConanFileHandler,
8184
conan.ConanDataHandler,
8285

src/packagedcode/publiccode.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# ScanCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/scancode-toolkit for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
import os
12+
13+
import saneyaml
14+
15+
from packagedcode import models
16+
17+
"""
18+
Handle publiccode.yml metadata files.
19+
publiccode.yml is a metadata standard for public sector open source software.
20+
See https://github.com/publiccodeyml/publiccode.yml
21+
"""
22+
23+
TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
24+
25+
logger = logging.getLogger(__name__)
26+
27+
28+
class PubliccodeYmlHandler(models.DatafileHandler):
29+
datasource_id = 'publiccode_yml'
30+
path_patterns = ('*/publiccode.yml', '*/publiccode.yaml')
31+
default_package_type = 'publiccode'
32+
default_primary_language = None
33+
description = 'publiccode.yml metadata file'
34+
documentation_url = 'https://github.com/publiccodeyml/publiccode.yml'
35+
36+
@classmethod
37+
def parse(cls, location, package_only=False):
38+
with open(location, 'rb') as f:
39+
data = saneyaml.load(f.read())
40+
41+
if not data or not isinstance(data, dict):
42+
return
43+
44+
# Validate: a publiccode.yml must have 'publiccodeYmlVersion'
45+
if 'publiccodeYmlVersion' not in data:
46+
return
47+
48+
name = data.get('name')
49+
version = data.get('softwareVersion')
50+
vcs_url = data.get('url')
51+
homepage_url = data.get('landingURL') or vcs_url
52+
53+
# License is under legal.license (SPDX expression)
54+
legal = data.get('legal') or {}
55+
declared_license = legal.get('license')
56+
copyright_statement = legal.get('mainCopyrightOwner') or legal.get('repoOwner')
57+
58+
# Description: prefer English, fall back to first available language
59+
description = _get_description(data)
60+
61+
# Keywords from categories
62+
categories = data.get('categories') or []
63+
keywords = ', '.join(categories) if categories else None
64+
65+
# Parties from maintenance.contacts
66+
parties = []
67+
maintenance = data.get('maintenance') or {}
68+
for contact in maintenance.get('contacts') or []:
69+
contact_name = contact.get('name')
70+
contact_email = contact.get('email')
71+
if contact_name or contact_email:
72+
parties.append(
73+
models.Party(
74+
type=models.party_person,
75+
name=contact_name,
76+
email=contact_email,
77+
role='maintainer',
78+
)
79+
)
80+
81+
# Extra data
82+
extra_data = {}
83+
schema_version = data.get('publiccodeYmlVersion')
84+
if schema_version:
85+
extra_data['publiccodeYmlVersion'] = schema_version
86+
platforms = data.get('platforms')
87+
if platforms:
88+
extra_data['platforms'] = platforms
89+
development_status = data.get('developmentStatus')
90+
if development_status:
91+
extra_data['developmentStatus'] = development_status
92+
software_type = data.get('softwareType')
93+
if software_type:
94+
extra_data['softwareType'] = software_type
95+
96+
yield models.PackageData(
97+
datasource_id=cls.datasource_id,
98+
type=cls.default_package_type,
99+
name=name,
100+
version=version,
101+
vcs_url=vcs_url,
102+
homepage_url=homepage_url,
103+
description=description,
104+
declared_license_expression=declared_license,
105+
copyright=copyright_statement,
106+
keywords=keywords,
107+
parties=parties,
108+
extra_data=extra_data or None,
109+
)
110+
111+
112+
def _get_description(data):
113+
"""
114+
Extract the best available description from publiccode.yml's
115+
multilingual 'description' block. Prefer English, fall back to
116+
any available language. Returns longDescription, else shortDescription.
117+
"""
118+
description_block = data.get('description') or {}
119+
if not description_block:
120+
return
121+
122+
lang_data = (
123+
description_block.get('en')
124+
or description_block.get('eng')
125+
or next(iter(description_block.values()), None)
126+
)
127+
if not lang_data:
128+
return
129+
130+
long_desc = lang_data.get('longDescription', '').strip()
131+
short_desc = lang_data.get('shortDescription', '').strip()
132+
133+
return long_desc or short_desc or None
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
publiccodeYmlVersion: "0.4"
2+
3+
name: Medusa
4+
url: "https://example.com/italia/medusa.git"
5+
landingURL: "https://example.com/medusa"
6+
softwareVersion: "1.0.3"
7+
8+
platforms:
9+
- web
10+
- linux
11+
12+
categories:
13+
- financial-reporting
14+
- accounting
15+
16+
developmentStatus: stable
17+
softwareType: "standalone/desktop"
18+
19+
description:
20+
en:
21+
shortDescription: >
22+
A short description of this software.
23+
longDescription: >
24+
A very long description of this software. It explains what it does,
25+
who it is for, and why you might want to use it in a public
26+
administration context.
27+
features:
28+
- Feature one
29+
- Feature two
30+
31+
legal:
32+
license: AGPL-3.0-or-later
33+
mainCopyrightOwner: City of Example
34+
repoOwner: City of Example
35+
36+
maintenance:
37+
type: "contract"
38+
contacts:
39+
- name: Francesco Rossi
40+
email: f.rossi@example.com
41+
affiliation: City of Example
42+
43+
localisation:
44+
localisationReady: true
45+
availableLanguages:
46+
- en
47+
- it
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# ScanCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
#
6+
7+
import os
8+
import pytest
9+
10+
from packagedcode.publiccode import PubliccodeYmlHandler
11+
12+
TESTDATA_DIR = os.path.join(os.path.dirname(__file__), 'data', 'publiccode')
13+
14+
15+
def test_publiccode_yml_basic():
16+
location = os.path.join(TESTDATA_DIR, 'publiccode.yml')
17+
packages = list(PubliccodeYmlHandler.parse(location))
18+
assert len(packages) == 1
19+
pkg = packages[0]
20+
21+
assert pkg.name == 'Medusa'
22+
assert pkg.version == '1.0.3'
23+
assert pkg.vcs_url == 'https://example.com/italia/medusa.git'
24+
assert pkg.homepage_url == 'https://example.com/medusa'
25+
assert pkg.declared_license_expression == 'AGPL-3.0-or-later'
26+
assert pkg.copyright == 'City of Example'
27+
assert 'financial-reporting' in pkg.keywords
28+
assert len(pkg.parties) == 1
29+
assert pkg.parties[0].name == 'Francesco Rossi'
30+
assert pkg.parties[0].email == 'f.rossi@example.com'
31+
assert pkg.parties[0].role == 'maintainer'
32+
33+
34+
def test_publiccode_yml_no_version_key_returns_nothing(tmp_path):
35+
"""A YAML file without publiccodeYmlVersion should yield nothing."""
36+
f = tmp_path / 'publiccode.yml'
37+
f.write_text('name: something\nversion: 1.0\n')
38+
packages = list(PubliccodeYmlHandler.parse(str(f)))
39+
assert packages == []
40+
41+
42+
def test_publiccode_yml_path_patterns():
43+
assert PubliccodeYmlHandler.path_patterns == (
44+
'*/publiccode.yml',
45+
'*/publiccode.yaml',
46+
)

0 commit comments

Comments
 (0)