Skip to content

Commit a9512a3

Browse files
authored
218 jb 1 (#228)
* #218 * #218 * #218 * #218
1 parent 364d796 commit a9512a3

3 files changed

Lines changed: 68 additions & 50 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning].
77

88
## [Unreleased]
99

10+
## [0.0.27] - 2025-08-06
11+
12+
### Changed in 0.0.27
13+
14+
- Corrected relationship counts in sz_snapshot
15+
- Corrected reported bugs and requests in sz_explorer
16+
1017
## [0.0.26] - 2025-07-11
1118

1219
### Changed in 0.0.26

sz_tools/sz_explorer

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,12 +1206,9 @@ class EdaSdkWrapper:
12061206
distinct_usage_type = distinct_feat_record.get("USAGE_TYPE", "")
12071207
# search request does contain feat_desc_values
12081208
if not distinct_feat_record.get("FEAT_DESC_VALUES"):
1209-
feat_record = distinct_feat_record.copy()
1210-
distinct_feat_record["FEAT_DESC_VALUES"] = [
1211-
feat_record,
1212-
]
1209+
distinct_feat_record["FEAT_DESC_VALUES"] = [distinct_feat_record]
12131210
for feat_record in distinct_feat_record["FEAT_DESC_VALUES"]:
1214-
feat_record["RECORD_COUNT"] = 0 # will be incremented later
1211+
feat_record = feat_record.copy()
12151212
feat_record["FTYPE_CODE"] = ftype_code
12161213
feat_record["USAGE_TYPE"] = feat_record.get("USAGE_TYPE", distinct_usage_type)
12171214
if feat_record["USAGE_TYPE"] == "PRIMARY":
@@ -1220,25 +1217,26 @@ class EdaSdkWrapper:
12201217
feat_record["USAGE_TYPE_SORT"] = f"2-{feat_record['USAGE_TYPE']}"
12211218
else:
12221219
feat_record["USAGE_TYPE_SORT"] = "3-UNSPECIFIED"
1223-
features_by_type[ftype_code].append(feat_record)
12241220
lib_feat_id = feat_record["LIB_FEAT_ID"]
12251221
if lib_feat_id in features_by_id: # may be a different usage type
1226-
usage_type1 = features_by_id.get("USAGE_TYPE", "")
1222+
usage_type1 = features_by_id[lib_feat_id].get("USAGE_TYPE", "")
12271223
usage_type2 = feat_record.get("USAGE_TYPE", "")
1228-
if usage_type2 and usage_type2 != usage_type1:
1229-
delim = ", " if usage_type1 else ""
1230-
features_by_id[lib_feat_id]["USAGE_TYPE"] += delim + usage_type2
1224+
if usage_type1 and usage_type2 and usage_type2 != usage_type1:
1225+
features_by_id[lib_feat_id]["USAGE_TYPE"] += ", " + usage_type2
12311226
else:
12321227
features_by_id[lib_feat_id] = feat_record
1228+
1229+
# features_by_type is updated after in case duplicate lib_feat_ids
1230+
for feat_record in features_by_id.values():
1231+
features_by_type[feat_record["FTYPE_CODE"]].append(feat_record)
1232+
12331233
return {"BY_ID": features_by_id, "BY_TYPE": features_by_type}
12341234

12351235
def get_record_features(self, entity_features, feature_list):
12361236
features_by_id = {}
1237-
features_by_type = {}
12381237
for feat_record in feature_list:
12391238
lib_feat_id = feat_record["LIB_FEAT_ID"]
12401239
if lib_feat_id in entity_features:
1241-
entity_features[lib_feat_id]["RECORD_COUNT"] += 1
12421240
feature_data = entity_features[lib_feat_id].copy()
12431241
feature_data["USAGE_TYPE"] = feat_record.get("USAGE_TYPE", "")
12441242
if feature_data["USAGE_TYPE"] == "PRIMARY":
@@ -1248,18 +1246,21 @@ class EdaSdkWrapper:
12481246
else:
12491247
feature_data["USAGE_TYPE_SORT"] = "3-UNSPECIFIED"
12501248
ftype_code = feature_data["FTYPE_CODE"]
1251-
if ftype_code not in features_by_type:
1252-
features_by_type[ftype_code] = [feature_data]
1253-
else:
1254-
features_by_type[ftype_code].append(feature_data)
12551249
if lib_feat_id in features_by_id: # may be a different usage type
1256-
usage_type1 = features_by_id.get("USAGE_TYPE", "")
1257-
usage_type2 = feature_data.get("USAGE_TYPE", "")
1258-
if usage_type2 and usage_type2 != usage_type1:
1259-
delim = ", " if usage_type1 else ""
1260-
features_by_id[lib_feat_id]["USAGE_TYPE"] += delim + usage_type2
1250+
usage_type1 = features_by_id[lib_feat_id].get("USAGE_TYPE", "")
1251+
usage_type2 = feat_record.get("USAGE_TYPE", "")
1252+
if usage_type1 and usage_type2 and usage_type2 != usage_type1:
1253+
features_by_id[lib_feat_id]["USAGE_TYPE"] += ", " + usage_type2
12611254
else:
12621255
features_by_id[lib_feat_id] = feature_data
1256+
# features_by_type is updated after in case duplicate lib_feat_ids
1257+
features_by_type = {}
1258+
for feat_record in features_by_id.values():
1259+
if feat_record["FTYPE_CODE"] not in features_by_type:
1260+
features_by_type[feat_record["FTYPE_CODE"]] = [feat_record]
1261+
else:
1262+
features_by_type[feat_record["FTYPE_CODE"]].append(feat_record)
1263+
12631264
return {"BY_ID": features_by_id, "BY_TYPE": features_by_type}
12641265

12651266
def regroup_by_type(self, features_by_id):
@@ -1340,11 +1341,9 @@ class EdaSdkWrapper:
13401341
score_records = {}
13411342
if match_info.get("WHY_KEY_DETAILS"):
13421343
for best_score in match_info["WHY_KEY_DETAILS"].get("CONFIRMATIONS", []):
1343-
best_score["TOKEN"] = "+" + best_score["TOKEN"]
13441344
key = f"{best_score['INBOUND_FEAT_ID']}-{best_score['CANDIDATE_FEAT_ID']}"
13451345
score_records[key] = best_score
13461346
for best_score in match_info["WHY_KEY_DETAILS"].get("DENIALS", []):
1347-
best_score["TOKEN"] = "-" + best_score["TOKEN"]
13481347
key = f"{best_score['INBOUND_FEAT_ID']}-{best_score['CANDIDATE_FEAT_ID']}"
13491348
score_records[key] = best_score
13501349
for ftype_code in match_info.get("FEATURE_SCORES", {}):
@@ -1496,6 +1495,11 @@ class EdaSdkWrapper:
14961495

14971496
errule_code = step["MATCH_INFO"].get("ERRULE_CODE", "None")
14981497
match_key = step["MATCH_INFO"].get("MATCH_KEY", "None")
1498+
if not match_key.startswith("+NAME"):
1499+
# eventually get match_key details and check name score
1500+
reason = "Match_key has no or partial name match"
1501+
summary["INTERESTING_STEP"][step_num] = reason
1502+
14991503
if errule_code not in summary["PRINCIPLE"]:
15001504
summary["PRINCIPLE"][errule_code] = {"COUNT": 1, "MATCH_KEY": {}}
15011505
else:
@@ -1522,7 +1526,6 @@ class EdaSdkWrapper:
15221526
"STEPS": steps,
15231527
"FINAL_ENTITIES": final_entities,
15241528
}
1525-
15261529
return how_data
15271530

15281531
def reorder_search_results(self, resolved_entities):
@@ -1660,11 +1663,6 @@ class EdaSdkWrapper:
16601663
if any(x in stats for x in ("~", "!", "#")):
16611664
feat_color += ",dim" if feat_color else "dim"
16621665

1663-
# mixed_color = False / mixed_color and wrap text would not work well with each other
1664-
if feature.get("MATCH_KEY_TOKEN"):
1665-
feat_color = "good" if feature["MATCH_KEY_TOKEN"].startswith("+") else "bad"
1666-
# mixed_color = feature["FTYPE_CODE"] = feature["MATCH_KEY_TOKEN"][1:]
1667-
16681666
display_list.extend(wrap_text(f"{feat_desc} {stats}", attr_width, feat_color))
16691667

16701668
if feature.get("MATCHED_FEAT_DESC"):
@@ -1960,7 +1958,7 @@ class EdaSdkWrapper:
19601958

19611959
entity_only_features = []
19621960
for feat_data in entity_features.values():
1963-
if feat_data["RECORD_COUNT"] == 0:
1961+
if feat_data["FTYPE_CODE"] in self.senzing_features:
19641962
entity_only_features.append(f"{colorize(feat_data['FTYPE_CODE'], 'bad')}: {feat_data['FEAT_DESC']}")
19651963
if entity_only_features:
19661964
tbl.rows.insert(0, [colorize_dsrc("SENZING"), "\n".join(entity_only_features), ""])
@@ -2297,7 +2295,7 @@ class EdaSdkWrapper:
22972295
virtual_id1 = step_data["ENTITY_LIST"][0]["VIRTUAL_ID"]
22982296
virtual_id2 = step_data["ENTITY_LIST"][1]["VIRTUAL_ID"]
22992297

2300-
# debug_print(step_data["ENTITY_LIST"][0])
2298+
# debug_print(step_data)
23012299
# source_row.append(self.fmt_record_list(records, 1, attr_width))
23022300

23032301
tbl = eda_table()
@@ -2315,6 +2313,13 @@ class EdaSdkWrapper:
23152313
self.fmt_record_list(step_data["ENTITY_LIST"][1]["RECORDS"], **kwargs), # .replace("\n", " | ")[0:80],
23162314
],
23172315
]
2316+
if step_data["STEP_TYPE"] == "Create virtual entity":
2317+
left_side_feat = "CANDIDATE_FEAT_DESC"
2318+
right_side_feat = "INBOUND_FEAT_DESC"
2319+
else:
2320+
left_side_feat = "INBOUND_FEAT_DESC"
2321+
right_side_feat = "CANDIDATE_FEAT_DESC"
2322+
23182323
match_key = step_data["MATCH_INFO"].get("MATCH_KEY", "")
23192324
feature_scores = step_data["MATCH_INFO"]["FEATURE_SCORES"]
23202325
for ftype_code in sorted(set(feature_scores), key=lambda k: self.ftype_code_order[k]):
@@ -2344,9 +2349,9 @@ class EdaSdkWrapper:
23442349
tbl.rows.append(
23452350
[
23462351
colorize(ftype_code, "dim"),
2347-
best_score["INBOUND_FEAT_DESC"],
2352+
best_score[left_side_feat],
23482353
colorize(score_value, feat_color),
2349-
best_score["CANDIDATE_FEAT_DESC"],
2354+
best_score[right_side_feat],
23502355
]
23512356
)
23522357
# tbl.rows[0][0].append(colorize(ftype_code, "dim"))
@@ -2605,7 +2610,7 @@ class EdaSdkWrapper:
26052610

26062611
def how_summary(self, how_data):
26072612
summary_node = eda_node("summary")
2608-
summary_node.node_desc = colorize("HOW STATISTICS", "") # "highlight2")
2613+
summary_node.node_desc = f"How summary for {self.fmt_entity_desc(how_data)}"
26092614

26102615
category_node = eda_node("resolution")
26112616
category_node.node_desc = self.fmt_how_statistic_hdr("RESOLUTION SUMMARY")
@@ -2705,12 +2710,14 @@ class EdaSdkWrapper:
27052710
root_node.node_desc = "n/a"
27062711
else:
27072712
root_node.node_desc = colorize("Re-evaluation needed! ", "bad")
2713+
final_node_cnt = 0
27082714
for final_entity in final_entities:
27092715
final_id = final_entity["VIRTUAL_ENTITY_ID"]
27102716
if len(final_entities) == 1:
27112717
final_node_msg = "final entity"
27122718
else:
2713-
final_node_msg = "final entity"
2719+
final_node_cnt += 1
2720+
final_node_msg = f"final entity {final_node_cnt} of {len(final_entities)}"
27142721
final_node = eda_node(final_id)
27152722
final_node.node_desc = f"{colorize_entity(final_id)} {final_node_msg}"
27162723
final_node.step_created = 0
@@ -2768,7 +2775,7 @@ class EdaSdkWrapper:
27682775
node.node_desc += " was orphaned!"
27692776
ordered_nodes.append(node)
27702777
root_node.children = ordered_nodes
2771-
return root_node.render_tree()
2778+
return f"How decision tree for {self.fmt_entity_desc(how_data)}\n\n" + root_node.render_tree()
27722779

27732780
def how_columnar(self, how_data, **kwargs):
27742781
entity_id = how_data["ENTITY_ID"]
@@ -2786,7 +2793,8 @@ class EdaSdkWrapper:
27862793
"MATCH_INFO": step_data[step_num]["MATCH_INFO"],
27872794
}
27882795
# entities, how_data = self.why_how(entity_id)
2789-
kwargs["report_title"] = f"How for entity: {colorize_entity(entity_id)}"
2796+
2797+
kwargs["report_title"] = f"Columnar how for {self.fmt_entity_desc(how_data)}"
27902798
kwargs["row1_title"] = "VIRTUAL_ID"
27912799
if how_data["SUMMARY"]["REEVALUATION_MSG"]:
27922800
kwargs["report_title"] += " " + colorize(how_data["SUMMARY"]["REEVALUATION_MSG"], "bad")
@@ -3396,7 +3404,7 @@ class EdaCmd(cmd.Cmd):
33963404
elif token.upper() in sdk_wrapper.ftype_code_lookup:
33973405
feature_list.append(token)
33983406
elif token.lower() in sdk_wrapper.how_views.values():
3399-
kwarg_dict["how_view"] = token.lower
3407+
kwarg_dict["how_view"] = token.lower()
34003408
else:
34013409
remaining_tokens.append(token)
34023410
prior_token = token.upper()

sz_tools/sz_snapshot

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -479,10 +479,8 @@ class SnapshotWriter:
479479
stat_keys.extend(["PRINCIPLES", principle_matchkey])
480480
self.update_stat_pack(stat_keys, {"COUNT": 1, "SAMPLE": [entity_id]})
481481

482-
# if len(entity0_sources) > 1:
483-
# include single source so can find non-matches, ie customers not on watch list or in reference file
484-
multi_source_key = "||".join(sorted(entity0_sources.keys()))
485-
self.update_stat_pack(["ENTITY_SOURCES", multi_source_key], {"ENTITY_COUNT": 1, "SAMPLE": [entity_id]})
482+
source_key = "||".join(sorted(entity0_sources.keys()))
483+
self.update_stat_pack(["ENTITY_SOURCES", source_key], {"ENTITY_COUNT": 1, "SAMPLE": [entity_id]})
486484
elif related_id > entity_id:
487485
sample = f"{entity_id} {related_id}"
488486
principle_matchkey = list(resume_data[related_id]["PRINCIPLES"].keys())[0]
@@ -494,16 +492,21 @@ class SnapshotWriter:
494492

495493
for data_source1 in entity0_sources:
496494
for data_source2 in resume_data[related_id]["DATA_SOURCES"]:
495+
stat_key_list = []
497496
if data_source1 == data_source2:
498-
stat_keys = ["DATA_SOURCES", data_source1, match_level]
497+
stat_key_list.append(["DATA_SOURCES", data_source1, match_level])
499498
else:
500-
data_source_pair = f"{data_source1}||{data_source2}"
501-
if data_source_pair not in self.stat_pack["CROSS_SOURCES"]:
502-
self.initialize_match_levels(["CROSS_SOURCES", data_source_pair])
503-
stat_keys = ["CROSS_SOURCES", data_source_pair, match_level]
504-
self.update_stat_pack(stat_keys, {"RELATION_COUNT": 1})
505-
stat_keys.extend(["PRINCIPLES", principle_matchkey])
506-
self.update_stat_pack(stat_keys, {"COUNT": 1, "SAMPLE": [sample]})
499+
for data_source_pair in [
500+
f"{data_source1}||{data_source2}",
501+
f"{data_source2}||{data_source1}",
502+
]:
503+
if data_source_pair not in self.stat_pack["CROSS_SOURCES"]:
504+
self.initialize_match_levels(["CROSS_SOURCES", data_source_pair])
505+
stat_key_list.append(["CROSS_SOURCES", data_source_pair, match_level])
506+
for stat_keys in stat_key_list:
507+
self.update_stat_pack(stat_keys, {"RELATION_COUNT": 1})
508+
stat_keys.extend(["PRINCIPLES", principle_matchkey])
509+
self.update_stat_pack(stat_keys, {"COUNT": 1, "SAMPLE": [sample]})
507510

508511

509512
def check_stat_pack(stats_file_name, csv_file_name, args):

0 commit comments

Comments
 (0)