Skip to content

Commit 04ca67c

Browse files
Merge pull request #88 from SoftwareUnderstanding/config
Add root config file support for analysis tuning and improve SoMEF failure handling
2 parents 8dd621b + f508923 commit 04ca67c

10 files changed

Lines changed: 491 additions & 29 deletions

File tree

README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,54 @@ By default, the JSON-LD files generated by RsMetaCheck will only contain informa
170170
poetry run rsmetacheck --input https://github.com/tidyverse/tidyverse --verbose
171171
```
172172

173+
#### Configure Analysis with a Root Config File
174+
175+
You can configure RsMetaCheck with a TOML file at the repository root named `.rsmetacheck.toml` (auto-detected), or pass a custom path with `--config`.
176+
177+
Supported options:
178+
179+
- `ignore`: warnings/pitfalls to ignore (e.g. `P001`, `W002`)
180+
- `exclude_files`: metadata sources to ignore (glob, filename, or substring match)
181+
- `parameters`: per-check parameters for configurable checks
182+
- `profiles`: alternate configurations such as `unstable` or `prerelease`
183+
184+
Example:
185+
186+
```toml
187+
ignore = ["W002"]
188+
exclude_files = ["**/generated/**", "tmp_metadata.json"]
189+
190+
[parameters.P001]
191+
ahead_significant_diff = 2
192+
193+
[parameters.W002]
194+
stale_after_days = 3
195+
196+
[profiles.unstable]
197+
ignore = ["W002", "P017"]
198+
199+
[profiles.unstable.parameters.P001]
200+
ahead_significant_diff = 10
201+
202+
[profiles.prerelease]
203+
ignore = []
204+
205+
[profiles.prerelease.parameters.P001]
206+
ahead_significant_diff = 1
207+
```
208+
209+
Use a specific profile:
210+
211+
```bash
212+
poetry run rsmetacheck --input https://github.com/example/repo --config-profile unstable
213+
```
214+
215+
Use a custom config path:
216+
217+
```bash
218+
poetry run rsmetacheck --input https://github.com/example/repo --config ./ci/rsmetacheck.toml
219+
```
220+
173221
### Output
174222

175223
The tool will:

docs/usage.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,36 @@ Run the analysis:
3535
poetry run rsmetacheck --input repositories.json
3636
```
3737

38+
### Configure Analysis Rules
39+
40+
RsMetaCheck can load a root-level `.rsmetacheck.toml` file to customize analysis behavior.
41+
42+
```toml
43+
ignore = ["W002"]
44+
exclude_files = ["tmp_metadata.json"]
45+
46+
[parameters.P001]
47+
ahead_significant_diff = 10
48+
49+
[profiles.prerelease]
50+
ignore = []
51+
52+
[profiles.unstable]
53+
ignore = ["W002", "P017"]
54+
```
55+
56+
Use a profile:
57+
58+
```bash
59+
poetry run rsmetacheck --input https://github.com/example/repo --config-profile unstable
60+
```
61+
62+
Use an explicit config path:
63+
64+
```bash
65+
poetry run rsmetacheck --input https://github.com/example/repo --config ./ci/rsmetacheck.toml
66+
```
67+
3868
## GitHub Action
3969

4070
You can integrate RSMetaCheck into your GitHub workflows:

src/rsmetacheck/cli.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
from pathlib import Path
44

5+
from rsmetacheck.config import load_analysis_config
56
from rsmetacheck.run_analyzer import run_analysis
67
from rsmetacheck.run_somef import (
78
ensure_somef_configured,
@@ -69,9 +70,28 @@ def cli():
6970
action="store_true",
7071
help="Include both detected AND undetected pitfalls in the output JSON-LD.",
7172
)
73+
parser.add_argument(
74+
"--config",
75+
default=None,
76+
help="Path to RsMetaCheck TOML config file (default: auto-detect .rsmetacheck.toml at repository root).",
77+
)
78+
parser.add_argument(
79+
"--config-profile",
80+
default=None,
81+
help="Name of config profile to apply (e.g., unstable, prerelease).",
82+
)
7283

7384
args = parser.parse_args()
7485

86+
try:
87+
analysis_config = load_analysis_config(
88+
config_path=args.config,
89+
profile=args.config_profile,
90+
)
91+
except (FileNotFoundError, ValueError, OSError, Exception) as exc:
92+
print(f"Error loading config: {exc}")
93+
return
94+
7595
if args.skip_somef:
7696
print(
7797
f"Skipping SoMEF execution. Analyzing {len(args.input)} existing SoMEF output files..."
@@ -95,6 +115,7 @@ def cli():
95115
args.analysis_output,
96116
verbose=args.verbose,
97117
notes_output=args.notes_output,
118+
analysis_config=analysis_config,
98119
)
99120

100121
else:
@@ -110,37 +131,51 @@ def cli():
110131
"Codemeta generation is ENABLED. Codemeta files will be created for each repository."
111132
)
112133

134+
any_somef_success = False
135+
113136
for input_item in args.input:
114137
if input_item.startswith("http://") or input_item.startswith("https://"):
115138
print(f"Processing repository URL: {input_item}")
116-
run_somef_single(
139+
success = run_somef_single(
117140
input_item,
118141
somef_output_dir,
119142
threshold,
120143
branch=args.branch,
121144
generate_codemeta=generate_codemeta,
122145
)
146+
any_somef_success = any_somef_success or bool(success)
123147
elif os.path.exists(input_item):
124148
print(f"Processing repositories from file: {input_item}")
125-
run_somef_batch(
149+
success = run_somef_batch(
126150
input_item,
127151
somef_output_dir,
128152
threshold,
129153
branch=args.branch,
130154
generate_codemeta=generate_codemeta,
131155
)
156+
any_somef_success = any_somef_success or bool(success)
132157
else:
133158
print(
134159
f"Warning: Skipping invalid input (not a URL or existing file): {input_item}"
135160
)
136161

162+
if not any_somef_success:
163+
print(
164+
"Error: SoMEF did not produce any outputs. Analysis is aborted."
165+
)
166+
print(
167+
"Fix SoMEF/authentication issues and rerun, or run with --skip-somef on existing SoMEF JSON files."
168+
)
169+
return
170+
137171
print(f"\nRunning analysis on outputs in {somef_output_dir}...")
138172
run_analysis(
139173
somef_output_dir,
140174
args.pitfalls_output,
141175
args.analysis_output,
142176
verbose=args.verbose,
143177
notes_output=args.notes_output,
178+
analysis_config=analysis_config,
144179
)
145180

146181

src/rsmetacheck/detect_pitfalls_main.py

Lines changed: 129 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import json
2+
import copy
3+
import fnmatch
4+
import inspect
25
from pathlib import Path
36
from typing import Iterable, Union
47
from rsmetacheck.run_somef import CODEMETA_DEFAULT_NAME
8+
from rsmetacheck.config import AnalysisConfig
59
from rsmetacheck.utils.pitfall_utils import extract_programming_languages
610
from rsmetacheck.utils.json_ld_utils import create_pitfall_jsonld, save_individual_pitfall_jsonld
711
from rsmetacheck.utils.somef_compat import normalize_somef_data
@@ -40,7 +44,89 @@
4044
from rsmetacheck.scripts.warnings.w010 import detect_git_remote_shorthand_pitfall
4145

4246

43-
def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[str, Path], output_file: Union[str, Path], verbose: bool = False, notes_output: Union[str, Path] = None):
47+
def _source_matches_exclude_patterns(source_value: str, exclude_patterns: list[str]) -> bool:
48+
source = str(source_value)
49+
basename = Path(source).name
50+
51+
for pattern in exclude_patterns:
52+
if fnmatch.fnmatch(source, pattern):
53+
return True
54+
if fnmatch.fnmatch(basename, pattern):
55+
return True
56+
if pattern in source:
57+
return True
58+
59+
return False
60+
61+
62+
def _filter_somef_data_by_excluded_files(data, exclude_patterns: list[str]):
63+
if isinstance(data, dict):
64+
filtered_dict = {}
65+
66+
for key, value in data.items():
67+
if key == "source":
68+
if isinstance(value, list):
69+
kept_sources = [
70+
src for src in value if not _source_matches_exclude_patterns(src, exclude_patterns)
71+
]
72+
if not kept_sources:
73+
return None
74+
filtered_dict[key] = kept_sources
75+
else:
76+
if _source_matches_exclude_patterns(value, exclude_patterns):
77+
return None
78+
filtered_dict[key] = value
79+
continue
80+
81+
filtered_value = _filter_somef_data_by_excluded_files(value, exclude_patterns)
82+
if filtered_value is not None:
83+
filtered_dict[key] = filtered_value
84+
85+
return filtered_dict
86+
87+
if isinstance(data, list):
88+
filtered_list = []
89+
for item in data:
90+
filtered_item = _filter_somef_data_by_excluded_files(item, exclude_patterns)
91+
if filtered_item is not None:
92+
filtered_list.append(filtered_item)
93+
return filtered_list
94+
95+
return data
96+
97+
98+
def _run_detector_with_parameters(detector_func, somef_data, file_name: str, parameters: dict):
99+
if not parameters:
100+
return detector_func(somef_data, file_name)
101+
102+
signature = inspect.signature(detector_func)
103+
accepts_kwargs = any(
104+
param.kind == inspect.Parameter.VAR_KEYWORD
105+
for param in signature.parameters.values()
106+
)
107+
108+
if accepts_kwargs:
109+
return detector_func(somef_data, file_name, **parameters)
110+
111+
accepted_parameter_names = set(signature.parameters.keys()) - {"somef_data", "file_name"}
112+
filtered_parameters = {
113+
key: value for key, value in parameters.items() if key in accepted_parameter_names
114+
}
115+
116+
if filtered_parameters:
117+
return detector_func(somef_data, file_name, **filtered_parameters)
118+
119+
return detector_func(somef_data, file_name)
120+
121+
122+
def detect_all_pitfalls(
123+
json_files: Iterable[Path],
124+
pitfalls_output_dir: Union[str, Path],
125+
output_file: Union[str, Path],
126+
verbose: bool = False,
127+
notes_output: Union[str, Path] = None,
128+
analysis_config: AnalysisConfig = None,
129+
):
44130
"""
45131
Detect all software repository pitfalls in SoMEF output files using modular detectors.
46132
Now also generates individual JSON-LD files for each repository.
@@ -49,12 +135,21 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
49135
pitfalls_output_dir = Path(pitfalls_output_dir)
50136
pitfalls_output_dir.mkdir(exist_ok=True, parents=True)
51137
json_files = list(json_files)
138+
config = analysis_config or AnalysisConfig.empty()
52139

53140
if not json_files:
54141
print("No JSON files found for analysis.")
55142
return
56143

57144
print(f"Analyzing {len(json_files)} SoMEF JSON files...")
145+
if config.source_path:
146+
print(f"Using config file: {config.source_path}")
147+
if config.profile:
148+
print(f"Using config profile: {config.profile}")
149+
if config.ignored_checks:
150+
print(f"Ignoring checks: {', '.join(sorted(config.ignored_checks))}")
151+
if config.exclude_files:
152+
print(f"Excluded source patterns: {config.exclude_files}")
58153

59154
results = {
60155
"summary": {
@@ -321,6 +416,13 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
321416
somef_data = json.load(f)
322417

323418
somef_data = normalize_somef_data(somef_data)
419+
if config.exclude_files:
420+
somef_data = _filter_somef_data_by_excluded_files(
421+
copy.deepcopy(somef_data),
422+
config.exclude_files,
423+
)
424+
if somef_data is None:
425+
somef_data = {}
324426

325427
languages = extract_programming_languages(somef_data)
326428

@@ -330,8 +432,16 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
330432
repo_pitfall_results = []
331433

332434
for idx, (detector_func, pitfall_code) in enumerate(pitfall_detectors):
435+
if config.is_ignored(pitfall_code):
436+
continue
437+
333438
try:
334-
detector_results = detector_func(somef_data, json_file.name)
439+
detector_results = _run_detector_with_parameters(
440+
detector_func,
441+
somef_data,
442+
json_file.name,
443+
config.get_parameters(pitfall_code),
444+
)
335445
if not isinstance(detector_results, list):
336446
detector_results = [detector_results]
337447

@@ -500,7 +610,15 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
500610
print(f"Error writing output file: {e}")
501611

502612

503-
def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_output=None, verbose=False, notes_output=None):
613+
def main(
614+
input_dir=None,
615+
somef_json_paths=None,
616+
pitfalls_dir=None,
617+
analysis_output=None,
618+
verbose=False,
619+
notes_output=None,
620+
analysis_config: AnalysisConfig = None,
621+
):
504622
"""
505623
Main function to run all pitfall detections.
506624
@@ -541,7 +659,14 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp
541659
print("No JSON files found for analysis.")
542660
return
543661

544-
detect_all_pitfalls(json_files, pitfalls_directory, output_file, verbose, notes_output)
662+
detect_all_pitfalls(
663+
json_files,
664+
pitfalls_directory,
665+
output_file,
666+
verbose,
667+
notes_output,
668+
analysis_config=analysis_config,
669+
)
545670

546671
if __name__ == "__main__":
547672
main()

0 commit comments

Comments
 (0)