Skip to content

Commit 08ff74e

Browse files
Added patches for P001 and W004 addressing issues #78 #81
1 parent 4720b15 commit 08ff74e

9 files changed

Lines changed: 286 additions & 25 deletions

File tree

README.md

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,36 @@ The `repositories.json` file should be structured as follows:
118118
poetry run rsmetacheck --input repositories.json \
119119
--somef-output ./results/somef \
120120
--pitfalls-output ./results/pitfalls \
121-
--analysis-output ./results/summary.json
121+
--analysis-output ./results/summary.json \
122+
--notes-output ./results/notes.json
122123
```
123124

125+
#### Version Discrepancy Notes
126+
127+
When a metadata version differs from the release version by a small margin (all version components differ by less than 2, e.g., `0.4.3.dev1` vs `0.4.2`), MetaCheck records a **note** rather than a full pitfall. To capture these observations, use the `--notes-output` flag:
128+
129+
```bash
130+
poetry run rsmetacheck --input https://github.com/example/repo --notes-output ./notes.json
131+
```
132+
133+
The notes file is only created when there are observations to report and the `--notes-output` path is specified. Its structure is:
134+
135+
```json
136+
{
137+
"total_notes": 1,
138+
"notes": [
139+
{
140+
"repository": "example/repo",
141+
"file_name": "repo_output.json",
142+
"code": "P001",
143+
"note": "Version discrepancy: metadata '0.4.3.dev1' vs release '0.4.2'"
144+
}
145+
]
146+
}
147+
```
148+
149+
If the version difference is significant (any component differs by 2 or more, e.g., `0.12.4` vs `0.12.1`), it is still flagged as a pitfall.
150+
124151
#### Skip SoMEF and Analyze Existing Outputs
125152

126153
If you've already run SoMEF separately:

src/rsmetacheck/cli.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ def cli():
4040
default=os.path.join(os.getcwd(), "analysis_results.json"),
4141
help="File path for summary results (default: ./analysis_results.json).",
4242
)
43+
parser.add_argument(
44+
"--notes-output",
45+
default=None,
46+
help="File path for notes output (default: None, notes file is not created unless specified).",
47+
)
4348
parser.add_argument(
4449
"--threshold",
4550
type=float,
@@ -89,6 +94,7 @@ def cli():
8994
args.pitfalls_output,
9095
args.analysis_output,
9196
verbose=args.verbose,
97+
notes_output=args.notes_output,
9298
)
9399

94100
else:
@@ -134,6 +140,7 @@ def cli():
134140
args.pitfalls_output,
135141
args.analysis_output,
136142
verbose=args.verbose,
143+
notes_output=args.notes_output,
137144
)
138145

139146

src/rsmetacheck/detect_pitfalls_main.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from rsmetacheck.scripts.warnings.w010 import detect_git_remote_shorthand_pitfall
4141

4242

43-
def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[str, Path], output_file: Union[str, Path], verbose: bool = False):
43+
def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[str, Path], output_file: Union[str, Path], verbose: bool = False, notes_output: Union[str, Path] = None):
4444
"""
4545
Detect all software repository pitfalls in SoMEF output files using modular detectors.
4646
Now also generates individual JSON-LD files for each repository.
@@ -279,6 +279,7 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
279279
repos_with_target_languages = 0
280280
jsonld_files_created = 0
281281
pitfall_counts = [0] * 29
282+
notes_list = []
282283

283284
pitfall_detectors = [
284285
(detect_version_mismatch, "P001"), # Index 0 -> P001
@@ -357,13 +358,28 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
357358
issue_type = "Pitfall" if pitfall_result.get("has_pitfall", False) else "Warning"
358359
print(f"{pitfall_code} - {issue_type} found in {json_file.name}")
359360

361+
if pitfall_result.get("has_note", False):
362+
repo_name = json_file.name
363+
if "full_name" in somef_data and somef_data["full_name"]:
364+
for item in somef_data["full_name"]:
365+
if "result" in item and "value" in item["result"]:
366+
repo_name = item["result"]["value"]
367+
break
368+
notes_list.append({
369+
"repository": repo_name,
370+
"file_name": json_file.name,
371+
"code": pitfall_code,
372+
"note": pitfall_result.get("note_text", "")
373+
})
374+
print(f"{pitfall_code} - Note added for {json_file.name}")
375+
360376
except Exception as e:
361377
print(f"Error running {pitfall_code} detector on {json_file.name}: {e}")
362378
continue
363379

364380
try:
365381
has_any_issue = any(
366-
result.get("has_pitfall", False) or result.get("has_warning", False)
382+
result.get("has_pitfall", False) or result.get("has_warning", False) or result.get("has_note", False)
367383
for result in repo_pitfall_results
368384
)
369385

@@ -437,11 +453,28 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
437453

438454
print(f"Summary results saved to: {output_file}")
439455

456+
if notes_list and notes_output:
457+
try:
458+
notes_path = Path(notes_output)
459+
notes_data = {
460+
"total_notes": len(notes_list),
461+
"notes": notes_list
462+
}
463+
with open(notes_path, 'w', encoding='utf-8') as f:
464+
json.dump(notes_data, f, indent=2, ensure_ascii=False)
465+
print(f"\nNotes ({len(notes_list)}) saved to: {notes_path}")
466+
except Exception as e:
467+
print(f"Error writing notes file: {e}")
468+
elif notes_list:
469+
print(f"\n{len(notes_list)} note(s) were found but no --notes-output path was provided. Skipping notes file.")
470+
else:
471+
print("\nNo notes generated.")
472+
440473
except Exception as e:
441474
print(f"Error writing output file: {e}")
442475

443476

444-
def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_output=None, verbose=False):
477+
def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_output=None, verbose=False, notes_output=None):
445478
"""
446479
Main function to run all pitfall detections.
447480
@@ -451,6 +484,7 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp
451484
pitfalls_dir (str|Path, optional): Directory to save pitfall JSON-LD files.
452485
analysis_output (str|Path, optional): Path to save summary results JSON.
453486
verbose (bool, optional): Include both detected AND undetected pitfalls in JSON-LD.
487+
notes_output (str|Path, optional): Path to save notes JSON file.
454488
455489
Note: Provide either input_dir OR somef_json_paths, not both.
456490
If both are provided, somef_json_paths takes precedence.
@@ -481,7 +515,7 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp
481515
print("No JSON files found for analysis.")
482516
return
483517

484-
detect_all_pitfalls(json_files, pitfalls_directory, output_file, verbose)
518+
detect_all_pitfalls(json_files, pitfalls_directory, output_file, verbose, notes_output)
485519

486520
if __name__ == "__main__":
487521
main()

src/rsmetacheck/run_analyzer.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from rsmetacheck.detect_pitfalls_main import main
44

55

6-
def run_analysis(somef_input: Union[str, Path, Iterable[Path]], pitfalls_dir: Union[str, Path], analysis_file: Union[str, Path], verbose: bool = False):
6+
def run_analysis(somef_input: Union[str, Path, Iterable[Path]], pitfalls_dir: Union[str, Path], analysis_file: Union[str, Path], verbose: bool = False, notes_output: Union[str, Path] = None):
77
"""
88
Run metadata analysis using existing code.
99
@@ -13,17 +13,18 @@ def run_analysis(somef_input: Union[str, Path, Iterable[Path]], pitfalls_dir: Un
1313
pitfalls_dir: Directory to save pitfall JSON-LD files
1414
analysis_file: Path to save summary results JSON
1515
verbose: bool indicating if both detected and undetected checks should be logged.
16+
notes_output: Path to save notes JSON file.
1617
"""
1718
print(f"\nRunning analysis...")
1819

1920
if isinstance(somef_input, (str, Path)):
2021
somef_path = Path(somef_input)
2122
if somef_path.is_dir():
2223
print(f"Using directory: {somef_input}")
23-
main(input_dir=somef_input, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file, verbose=verbose)
24+
main(input_dir=somef_input, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file, verbose=verbose, notes_output=notes_output)
2425
else:
2526
print(f"Error: {somef_input} is not a valid directory")
2627
else:
2728
json_files = list(somef_input)
2829
print(f"Using {len(json_files)} specified JSON files")
29-
main(somef_json_paths=json_files, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file, verbose=verbose)
30+
main(somef_json_paths=json_files, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file, verbose=verbose, notes_output=notes_output)

src/rsmetacheck/scripts/pitfalls/p001.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,33 @@
1+
import re
12
from typing import Dict, Optional
23
from rsmetacheck.utils.pitfall_utils import normalize_version
34
from rsmetacheck.utils.pitfall_utils import extract_metadata_source_filename
45

6+
7+
def _parse_version_components(version_str: str) -> tuple:
8+
cleaned = re.sub(r"[-_.]?(dev|alpha|beta|rc|pre|post|a|b)\d*.*", "", version_str, flags=re.IGNORECASE)
9+
parts = re.findall(r"\d+", cleaned)
10+
components = [int(p) for p in parts[:3]]
11+
while len(components) < 3:
12+
components.append(0)
13+
return tuple(components)
14+
15+
16+
def _version_diff_significant(v1: str, v2: str) -> bool:
17+
c1 = _parse_version_components(v1)
18+
c2 = _parse_version_components(v2)
19+
for a, b in zip(c1, c2):
20+
if abs(a - b) >= 2:
21+
return True
22+
return False
23+
24+
25+
def _metadata_ahead_of_release(metadata_version: str, release_version: str) -> bool:
26+
mc = _parse_version_components(metadata_version)
27+
rc = _parse_version_components(release_version)
28+
return mc > rc
29+
30+
531
def extract_version_from_metadata(somef_data: Dict) -> Optional[Dict[str, str]]:
632
"""
733
Extract version from metadata files (codemeta.json, DESCRIPTION, etc.) in SoMEF output.
@@ -69,11 +95,13 @@ def detect_version_mismatch(somef_data: Dict, file_name: str) -> Dict:
6995
"""
7096
result = {
7197
"has_pitfall": False,
98+
"has_note": False,
7299
"file_name": file_name,
73100
"metadata_version": None,
74101
"release_version": None,
75102
"metadata_source": None,
76-
"metadata_source_file": None
103+
"metadata_source_file": None,
104+
"note_text": None
77105
}
78106

79107
metadata_version_info = extract_version_from_metadata(somef_data)
@@ -89,7 +117,11 @@ def detect_version_mismatch(somef_data: Dict, file_name: str) -> Dict:
89117
result["metadata_source"] = metadata_version_info["source"]
90118
result["metadata_source_file"] = extract_metadata_source_filename(metadata_version_info["source"])
91119

92-
if metadata_version != normalized_release_version:
93-
result["has_pitfall"] = True
120+
if _metadata_ahead_of_release(metadata_version, normalized_release_version):
121+
if _version_diff_significant(metadata_version, normalized_release_version):
122+
result["has_pitfall"] = True
123+
else:
124+
result["has_note"] = True
125+
result["note_text"] = f"Version discrepancy: metadata '{metadata_version}' vs release '{normalized_release_version}'"
94126

95127
return result

src/rsmetacheck/scripts/warnings/w004.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
1+
import re
12
from typing import Dict
23

4+
5+
def _name_contains_version(name: str) -> bool:
6+
return bool(re.search(r"\d", name))
7+
8+
9+
def _value_is_list(result_data: Dict) -> bool:
10+
name_val = result_data.get("name")
11+
value_val = result_data.get("value")
12+
return isinstance(name_val, list) or isinstance(value_val, list)
13+
14+
315
def detect_programming_language_no_version_pitfall(somef_data: Dict, file_name: str) -> Dict:
416
"""
517
Detect when programming languages or requirements in codemeta.json do not have versions
@@ -26,6 +38,13 @@ def detect_programming_language_no_version_pitfall(somef_data: Dict, file_name:
2638

2739
if "version" not in result_data or result_data.get("version") is None:
2840
lang_name = result_data.get("name", "Unknown")
41+
42+
if _value_is_list(result_data):
43+
continue
44+
45+
if isinstance(lang_name, str) and _name_contains_version(lang_name):
46+
continue
47+
2948
result["programming_languages_without_version"].append(lang_name)
3049
result["source"] = source
3150
result["has_warning"] = True

src/rsmetacheck/utils/json_ld_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def fetch_latest_commit_id(repo_url: str) -> str:
6161

6262
elif repo_url.startswith("https://"):
6363
# Handles gitlab.com and any self-hosted GitLab instance.
64-
# The GitLab API v4 is tried; a failed request returns 'Unknown' gracefully.
64+
# The GitLab API v4 is tried; a failed request returns 'Unknown'
6565
parsed = urllib.parse.urlparse(repo_url)
6666
host = f"{parsed.scheme}://{parsed.netloc}"
6767
project_path = parsed.path.strip("/")

0 commit comments

Comments
 (0)