|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# ScanCode is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/nexB/scancode-toolkit for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | + |
| 10 | +import logging |
| 11 | +import os |
| 12 | + |
| 13 | +import saneyaml |
| 14 | + |
| 15 | +from packagedcode import models |
| 16 | + |
| 17 | +""" |
| 18 | +Handle publiccode.yml metadata files. |
| 19 | +publiccode.yml is a metadata standard for public sector open source software. |
| 20 | +See https://github.com/publiccodeyml/publiccode.yml |
| 21 | +""" |
| 22 | + |
| 23 | +TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False) |
| 24 | + |
| 25 | +logger = logging.getLogger(__name__) |
| 26 | + |
| 27 | + |
| 28 | +class PubliccodeYmlHandler(models.DatafileHandler): |
| 29 | + datasource_id = 'publiccode_yml' |
| 30 | + path_patterns = ('*/publiccode.yml', '*/publiccode.yaml') |
| 31 | + default_package_type = 'publiccode' |
| 32 | + default_primary_language = None |
| 33 | + description = 'publiccode.yml metadata file' |
| 34 | + documentation_url = 'https://github.com/publiccodeyml/publiccode.yml' |
| 35 | + |
| 36 | + @classmethod |
| 37 | + def parse(cls, location, package_only=False): |
| 38 | + with open(location, 'rb') as f: |
| 39 | + data = saneyaml.load(f.read()) |
| 40 | + |
| 41 | + if not data or not isinstance(data, dict): |
| 42 | + return |
| 43 | + |
| 44 | + # Validate: a publiccode.yml must have 'publiccodeYmlVersion' |
| 45 | + if 'publiccodeYmlVersion' not in data: |
| 46 | + return |
| 47 | + |
| 48 | + name = data.get('name') |
| 49 | + version = data.get('softwareVersion') |
| 50 | + vcs_url = data.get('url') |
| 51 | + homepage_url = data.get('landingURL') or vcs_url |
| 52 | + |
| 53 | + # License is under legal.license (SPDX expression) |
| 54 | + legal = data.get('legal') or {} |
| 55 | + declared_license = legal.get('license') |
| 56 | + copyright_statement = legal.get('mainCopyrightOwner') or legal.get('repoOwner') |
| 57 | + |
| 58 | + # Description: prefer English, fall back to first available language |
| 59 | + description = _get_description(data) |
| 60 | + |
| 61 | + # Keywords from categories |
| 62 | + categories = data.get('categories') or [] |
| 63 | + keywords = ', '.join(categories) if categories else None |
| 64 | + |
| 65 | + # Parties from maintenance.contacts |
| 66 | + parties = [] |
| 67 | + maintenance = data.get('maintenance') or {} |
| 68 | + for contact in maintenance.get('contacts') or []: |
| 69 | + contact_name = contact.get('name') |
| 70 | + contact_email = contact.get('email') |
| 71 | + if contact_name or contact_email: |
| 72 | + parties.append( |
| 73 | + models.Party( |
| 74 | + type=models.party_person, |
| 75 | + name=contact_name, |
| 76 | + email=contact_email, |
| 77 | + role='maintainer', |
| 78 | + ) |
| 79 | + ) |
| 80 | + |
| 81 | + # Extra data |
| 82 | + extra_data = {} |
| 83 | + schema_version = data.get('publiccodeYmlVersion') |
| 84 | + if schema_version: |
| 85 | + extra_data['publiccodeYmlVersion'] = schema_version |
| 86 | + platforms = data.get('platforms') |
| 87 | + if platforms: |
| 88 | + extra_data['platforms'] = platforms |
| 89 | + development_status = data.get('developmentStatus') |
| 90 | + if development_status: |
| 91 | + extra_data['developmentStatus'] = development_status |
| 92 | + software_type = data.get('softwareType') |
| 93 | + if software_type: |
| 94 | + extra_data['softwareType'] = software_type |
| 95 | + |
| 96 | + yield models.PackageData( |
| 97 | + datasource_id=cls.datasource_id, |
| 98 | + type=cls.default_package_type, |
| 99 | + name=name, |
| 100 | + version=version, |
| 101 | + vcs_url=vcs_url, |
| 102 | + homepage_url=homepage_url, |
| 103 | + description=description, |
| 104 | + declared_license_expression=declared_license, |
| 105 | + copyright=copyright_statement, |
| 106 | + keywords=keywords, |
| 107 | + parties=parties, |
| 108 | + extra_data=extra_data or None, |
| 109 | + ) |
| 110 | + |
| 111 | + |
| 112 | +def _get_description(data): |
| 113 | + """ |
| 114 | + Extract the best available description from publiccode.yml's |
| 115 | + multilingual 'description' block. Prefer English, fall back to |
| 116 | + any available language. Returns longDescription, else shortDescription. |
| 117 | + """ |
| 118 | + description_block = data.get('description') or {} |
| 119 | + if not description_block: |
| 120 | + return |
| 121 | + |
| 122 | + lang_data = ( |
| 123 | + description_block.get('en') |
| 124 | + or description_block.get('eng') |
| 125 | + or next(iter(description_block.values()), None) |
| 126 | + ) |
| 127 | + if not lang_data: |
| 128 | + return |
| 129 | + |
| 130 | + long_desc = lang_data.get('longDescription', '').strip() |
| 131 | + short_desc = lang_data.get('shortDescription', '').strip() |
| 132 | + |
| 133 | + return long_desc or short_desc or None |
0 commit comments