11import json
2+ import copy
3+ import fnmatch
4+ import inspect
25from pathlib import Path
36from typing import Iterable , Union
47from rsmetacheck .run_somef import CODEMETA_DEFAULT_NAME
8+ from rsmetacheck .config import AnalysisConfig
59from rsmetacheck .utils .pitfall_utils import extract_programming_languages
610from rsmetacheck .utils .json_ld_utils import create_pitfall_jsonld , save_individual_pitfall_jsonld
711from rsmetacheck .utils .somef_compat import normalize_somef_data
4044from rsmetacheck .scripts .warnings .w010 import detect_git_remote_shorthand_pitfall
4145
4246
43- def detect_all_pitfalls (json_files : Iterable [Path ], pitfalls_output_dir : Union [str , Path ], output_file : Union [str , Path ], verbose : bool = False , notes_output : Union [str , Path ] = None ):
47+ def _source_matches_exclude_patterns (source_value : str , exclude_patterns : list [str ]) -> bool :
48+ source = str (source_value )
49+ basename = Path (source ).name
50+
51+ for pattern in exclude_patterns :
52+ if fnmatch .fnmatch (source , pattern ):
53+ return True
54+ if fnmatch .fnmatch (basename , pattern ):
55+ return True
56+ if pattern in source :
57+ return True
58+
59+ return False
60+
61+
62+ def _filter_somef_data_by_excluded_files (data , exclude_patterns : list [str ]):
63+ if isinstance (data , dict ):
64+ filtered_dict = {}
65+
66+ for key , value in data .items ():
67+ if key == "source" :
68+ if isinstance (value , list ):
69+ kept_sources = [
70+ src for src in value if not _source_matches_exclude_patterns (src , exclude_patterns )
71+ ]
72+ if not kept_sources :
73+ return None
74+ filtered_dict [key ] = kept_sources
75+ else :
76+ if _source_matches_exclude_patterns (value , exclude_patterns ):
77+ return None
78+ filtered_dict [key ] = value
79+ continue
80+
81+ filtered_value = _filter_somef_data_by_excluded_files (value , exclude_patterns )
82+ if filtered_value is not None :
83+ filtered_dict [key ] = filtered_value
84+
85+ return filtered_dict
86+
87+ if isinstance (data , list ):
88+ filtered_list = []
89+ for item in data :
90+ filtered_item = _filter_somef_data_by_excluded_files (item , exclude_patterns )
91+ if filtered_item is not None :
92+ filtered_list .append (filtered_item )
93+ return filtered_list
94+
95+ return data
96+
97+
98+ def _run_detector_with_parameters (detector_func , somef_data , file_name : str , parameters : dict ):
99+ if not parameters :
100+ return detector_func (somef_data , file_name )
101+
102+ signature = inspect .signature (detector_func )
103+ accepts_kwargs = any (
104+ param .kind == inspect .Parameter .VAR_KEYWORD
105+ for param in signature .parameters .values ()
106+ )
107+
108+ if accepts_kwargs :
109+ return detector_func (somef_data , file_name , ** parameters )
110+
111+ accepted_parameter_names = set (signature .parameters .keys ()) - {"somef_data" , "file_name" }
112+ filtered_parameters = {
113+ key : value for key , value in parameters .items () if key in accepted_parameter_names
114+ }
115+
116+ if filtered_parameters :
117+ return detector_func (somef_data , file_name , ** filtered_parameters )
118+
119+ return detector_func (somef_data , file_name )
120+
121+
122+ def detect_all_pitfalls (
123+ json_files : Iterable [Path ],
124+ pitfalls_output_dir : Union [str , Path ],
125+ output_file : Union [str , Path ],
126+ verbose : bool = False ,
127+ notes_output : Union [str , Path ] = None ,
128+ analysis_config : AnalysisConfig = None ,
129+ ):
44130 """
45131 Detect all software repository pitfalls in SoMEF output files using modular detectors.
46132 Now also generates individual JSON-LD files for each repository.
@@ -49,12 +135,21 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
49135 pitfalls_output_dir = Path (pitfalls_output_dir )
50136 pitfalls_output_dir .mkdir (exist_ok = True , parents = True )
51137 json_files = list (json_files )
138+ config = analysis_config or AnalysisConfig .empty ()
52139
53140 if not json_files :
54141 print ("No JSON files found for analysis." )
55142 return
56143
57144 print (f"Analyzing { len (json_files )} SoMEF JSON files..." )
145+ if config .source_path :
146+ print (f"Using config file: { config .source_path } " )
147+ if config .profile :
148+ print (f"Using config profile: { config .profile } " )
149+ if config .ignored_checks :
150+ print (f"Ignoring checks: { ', ' .join (sorted (config .ignored_checks ))} " )
151+ if config .exclude_files :
152+ print (f"Excluded source patterns: { config .exclude_files } " )
58153
59154 results = {
60155 "summary" : {
@@ -321,6 +416,13 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
321416 somef_data = json .load (f )
322417
323418 somef_data = normalize_somef_data (somef_data )
419+ if config .exclude_files :
420+ somef_data = _filter_somef_data_by_excluded_files (
421+ copy .deepcopy (somef_data ),
422+ config .exclude_files ,
423+ )
424+ if somef_data is None :
425+ somef_data = {}
324426
325427 languages = extract_programming_languages (somef_data )
326428
@@ -330,8 +432,16 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
330432 repo_pitfall_results = []
331433
332434 for idx , (detector_func , pitfall_code ) in enumerate (pitfall_detectors ):
435+ if config .is_ignored (pitfall_code ):
436+ continue
437+
333438 try :
334- detector_results = detector_func (somef_data , json_file .name )
439+ detector_results = _run_detector_with_parameters (
440+ detector_func ,
441+ somef_data ,
442+ json_file .name ,
443+ config .get_parameters (pitfall_code ),
444+ )
335445 if not isinstance (detector_results , list ):
336446 detector_results = [detector_results ]
337447
@@ -500,7 +610,15 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
500610 print (f"Error writing output file: { e } " )
501611
502612
503- def main (input_dir = None , somef_json_paths = None , pitfalls_dir = None , analysis_output = None , verbose = False , notes_output = None ):
613+ def main (
614+ input_dir = None ,
615+ somef_json_paths = None ,
616+ pitfalls_dir = None ,
617+ analysis_output = None ,
618+ verbose = False ,
619+ notes_output = None ,
620+ analysis_config : AnalysisConfig = None ,
621+ ):
504622 """
505623 Main function to run all pitfall detections.
506624
@@ -541,7 +659,14 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp
541659 print ("No JSON files found for analysis." )
542660 return
543661
544- detect_all_pitfalls (json_files , pitfalls_directory , output_file , verbose , notes_output )
662+ detect_all_pitfalls (
663+ json_files ,
664+ pitfalls_directory ,
665+ output_file ,
666+ verbose ,
667+ notes_output ,
668+ analysis_config = analysis_config ,
669+ )
545670
546671if __name__ == "__main__" :
547672 main ()
0 commit comments