Skip to content

Commit 72615b8

Browse files
authored
Merge pull request #964 from KnowledgeCaptureAndDiscovery/dev
Dev
2 parents 1e4d14d + 30a10fd commit 72615b8

5 files changed

Lines changed: 45 additions & 7 deletions

File tree

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,12 @@ Given a readme file (or a GitHub/Gitlab repository) SOMEF will extract the follo
2727
- **Build file**: Build file(s) of the project. For example, files used to create a Docker image for the target software, package files, etc.
2828
- **Citation**: Preferred citation(s) as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation).
2929
For CITATION.cff files, SOMEF now generates two separate entries: one for the software tool and another for the preferred citation (if available). This ensures metadata like DOI or version is correctly assigned to each entity.
30-
We aim to recognize the following properties:
30+
We recognize the following properties:
3131
- Title: Title of the publication
3232
- Author: list of author names in the publication
3333
- URL: URL of the publication
3434
- DOI: Digital object identifier of the publication
3535
- Date published
36-
- Version: Software version (if applicable)
3736
- Journal: Journal name where the paper was published
3837
- Year: Year of publication
3938
- Pages: Page range in the journal

docs/citationcff.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ These fields are defined in the [CITATION.cff specification](https://citation-fi
1818
| license - value | license[i].result.value | license |
1919
| license - spdx_id | license[i].result.spdx_id | license |
2020
| license - name | license[i].result.name | license |
21-
21+
| version - value | version[i].result.value |version
2222
---
2323

2424
*(1)*

docs/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ Given a readme file (or a GitHub repository) SOMEF will extract the following ca
3434
- **Build file**: Build file(s) of the project. For example, files used to create a Docker image for the target software, package files, etc.
3535
- **Citation**: Preferred citation(s) as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation).
3636
For CITATION.cff files, SOMEF now generates two separate entries: one for the software tool and another for the preferred citation (if available). This ensures metadata like DOI or version is correctly assigned to each entity.
37-
We aim to recognize the following properties:
37+
We recognize the following properties:
3838
- Title: Title of the publication
3939
- Author: list of author names in the publication
4040
- URL: URL of the publication

src/somef/process_files.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,12 +576,20 @@ def get_file_content_or_link(repo_type, file_path, owner, repo_name, repo_defaul
576576

577577
if yaml_content:
578578
license_value = yaml_content.get("license")
579+
version_value = yaml_content.get("version")
580+
579581
logging.info(f"Extracted license value from CFF: {license_value}")
580582
if license_value:
581583
if isinstance(license_value, list):
582584
license_value = license_value[0]
583585
parse_license_cff(license_value, metadata_result, url)
584586

587+
logging.info(f"Extracted version value from CFF: {version_value}")
588+
if version_value:
589+
if isinstance(version_value, list):
590+
version_value = version_value[0]
591+
parse_version_cff(version_value, metadata_result, url)
592+
585593
root_result = parse_cff_root(yaml_content, metadata_result,url)
586594
root_result[constants.PROP_VALUE] = file_text
587595
# root_result[constants.PROP_TYPE] = constants.FILE_DUMP
@@ -722,7 +730,7 @@ def parse_cff_root(yaml_content, metadata_result, url):
722730

723731
result[constants.PROP_TITLE] = yaml_content.get("title")
724732
result["authors"] = parse_authors_citation(yaml_content.get("authors", []))
725-
result[constants.PROP_VERSION] = yaml_content.get("version")
733+
# result[constants.PROP_VERSION] = yaml_content.get("version")
726734
result[constants.PROP_DOI] = yaml_content.get("doi")
727735
result[constants.PROP_URL] = yaml_content.get("url")
728736
result[constants.PROP_TYPE] = constants.SOFTWARE_APPLICATION
@@ -789,4 +797,25 @@ def parse_license_cff(license_value, metadata_result, url):
789797
logging.error(f"Error parsing license from CFF: {str(e)}")
790798

791799

800+
def parse_version_cff(version_value, metadata_result, url):
801+
"""
802+
Parses the version from a CFF file and adds it to the global version metadata.
803+
"""
804+
try:
805+
806+
version_result = {
807+
constants.PROP_VALUE: str(version_value),
808+
constants.PROP_TYPE: "String"
809+
810+
}
811+
812+
metadata_result.add_result(
813+
constants.CAT_VERSION,
814+
version_result,
815+
1,
816+
constants.TECHNIQUE_FILE_EXPLORATION,
817+
url
818+
)
792819

820+
except Exception as e:
821+
logging.error(f"Error parsing version from CFF: {str(e)}")

src/somef/test/test_JSON_export.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -630,15 +630,25 @@ def test_new_properties_citation_issue_935(self):
630630
assert software_entry is not None, "Software citation (root) not found"
631631
sw_result = software_entry["result"]
632632
assert sw_result["title"] == 'SOMEF: Software metadata extraction framework'
633-
assert sw_result["version"] == "0.1.0"
633+
# assert sw_result["version"] == "0.1.0"
634634
assert "doi" not in sw_result or sw_result.get("doi") is None # it is in preferred (referencePublication) but not in the root
635635

636636
assert preferred_entry is not None, "Preferred citation (article) not found"
637637
pref_result = preferred_entry["result"]
638638
assert pref_result["title"] == "A Framework for Creating Knowledge Graphs of Scientific Software Metadata"
639639
assert pref_result["doi"] == "10.1162/qss_a_00167"
640640
assert pref_result["journal"] == "Quantitative Science Studies"
641-
assert "version" not in pref_result # it is in the root in citation but not in the preferred (referencePublication)
641+
# assert "version" not in pref_result # it is in the root in citation but not in the preferred (referencePublication)
642+
643+
versions = json_content.get(constants.CAT_VERSION, [])
644+
cff_version_entry = next(
645+
(v for v in versions if "CITATION.cff" in v.get("source", "")),
646+
None
647+
)
648+
649+
# 2. Validamos que la versión existe en su nueva ubicación
650+
assert cff_version_entry is not None, "Version from CFF not found in global version field"
651+
assert cff_version_entry["result"]["value"] == "0.1.0"
642652

643653
os.remove(test_data_path + "test_new_properties_citation_issue_935.json")
644654

0 commit comments

Comments
 (0)