Skip to content

Commit 6cd699b

Browse files
committed
added fixed scripts
1 parent 1a3b3fa commit 6cd699b

2 files changed

Lines changed: 47 additions & 1 deletion

File tree

tools/import_validation/runner.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ class ValidationRunner:
4141

4242
def __init__(self, validation_config_path: str, differ_output: str,
4343
stats_summary: str, lint_report: str, validation_output: str):
44+
self.validation_config_path = validation_config_path
45+
self.stats_summary = stats_summary
4446
self.config = ValidationConfig(validation_config_path)
4547
self.validation_output = validation_output
4648
self.validator = Validator()
@@ -212,6 +214,48 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
212214
if output_dir:
213215
rule_params.setdefault('output_path', output_dir)
214216

217+
# Resolve paths relative to the directory of the validation config.
218+
if 'summary_report' in rule.get('rule_id', ''):
219+
# Helper to find a base directory containing target_sub_path by walking up
220+
def find_base_dir(start_path: str, target_sub_path: str):
221+
if not start_path:
222+
return None
223+
curr = os.path.abspath(start_path)
224+
for _ in range(10): # limit to 10 levels up
225+
if os.path.exists(os.path.join(curr, target_sub_path)):
226+
return curr
227+
parent = os.path.dirname(curr)
228+
if parent == curr:
229+
break
230+
curr = parent
231+
return None
232+
233+
config_dir = None
234+
# Walk up from validation_config_path, self.stats_summary, or CWD to find where 'golden_data' lives
235+
for start in [self.validation_config_path, self.stats_summary, os.getcwd()]:
236+
config_dir = find_base_dir(start, 'golden_data')
237+
if config_dir:
238+
break
239+
240+
if not config_dir:
241+
config_dir = os.path.dirname(os.path.abspath(self.validation_config_path))
242+
243+
print(f"DEBUG: Found summary_report rule: '{rule.get('rule_id')}'")
244+
print(f"DEBUG: Config directory resolved to: '{config_dir}'")
245+
for path_key in ['golden_files', 'input_files']:
246+
if path_key in rule_params:
247+
val = rule_params[path_key]
248+
print(f"DEBUG: Before resolve '{path_key}': '{val}'")
249+
if isinstance(val, str):
250+
if val and not os.path.isabs(val) and not val.startswith('gs://') and not val.startswith('http://') and not val.startswith('https://'):
251+
rule_params[path_key] = os.path.join(config_dir, val)
252+
elif isinstance(val, list):
253+
rule_params[path_key] = [
254+
os.path.join(config_dir, item) if isinstance(item, str) and item and not os.path.isabs(item) and not item.startswith('gs://') and not item.startswith('http://') and not item.startswith('https://') else item
255+
for item in val
256+
]
257+
print(f"DEBUG: After resolve '{path_key}': '{rule_params[path_key]}'")
258+
215259
if validator_name == 'SQL_VALIDATOR':
216260
result = validation_func(self.data_sources['stats'],
217261
self.data_sources['differ'],

tools/import_validation/validator_goldens.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,9 @@ def load_nodes_from_file(files: str) -> dict:
298298
file_nodes = file_util.file_load_csv_dict(input_file,
299299
key_index=True)
300300
for node in file_nodes.values():
301-
nodes[len(nodes)] = node
301+
# Clean up None/empty keys and strip whitespace from headers/keys to ensure robust parsing
302+
cleaned_node = {k.strip(): v for k, v in node.items() if k is not None and isinstance(k, str) and k.strip() != ''}
303+
nodes[len(nodes)] = cleaned_node
302304
else:
303305
# For MCF or JSON, we assume nodes are already keyed by DCID.
304306
file_nodes = mcf_file_util.load_mcf_nodes(input_file)

0 commit comments

Comments
 (0)