Skip to content

Commit 1c3352d

Browse files
committed
Harden exports and matching determinism
- SQLite/CSV/HTML exports: format u64 addresses as zero-padded hex strings to avoid signed-INTEGER overflow on kernel addresses; escape HTML in names/match_type; defuse CSV formula injection; narrow broad except clauses. - FFI: wrap every extern "C" entry point in catch_unwind to prevent panics unwinding across the C boundary. - Matching: switch to FxHashMap/FxHashSet and a total_cmp-based tie-breaker for deterministic results; skip auto-generated names (sub_/FUN_/loc_/...) in name matching; pre-bucket structural matching by BB count to cut O(n²) work on large binaries. - Scoring: sanitize per-metric and aggregate scores (NaN→0, clamp [0,1]); fix normalized_edit_distance for two empty strings. - types.rs: add #[serde(default)] + Default derives so older JSON exports deserialize cleanly. - database.rs: rename export_to_sqlite → export_to_sql_script (writes a .sql script, not a .db).
1 parent 5d18aa3 commit 1c3352d

8 files changed

Lines changed: 306 additions & 174 deletions

File tree

diff_results_ui.py

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import csv
66
import difflib
7+
import html
78
import json
89
import os
910
import sqlite3
@@ -1434,6 +1435,18 @@ def export_to_csv(self):
14341435
if not filename:
14351436
return
14361437

1438+
def _csv_safe(v):
1439+
s = str(v) if v is not None else ''
1440+
if s and s[0] in ('=', '+', '-', '@', '\t', '\r'):
1441+
return "'" + s
1442+
return s
1443+
1444+
def _fmt_addr(v):
1445+
try:
1446+
return f"0x{int(v):016x}"
1447+
except (TypeError, ValueError):
1448+
return str(v)
1449+
14371450
try:
14381451
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
14391452
writer = csv.writer(csvfile)
@@ -1451,13 +1464,13 @@ def export_to_csv(self):
14511464
func_b = result.get('function_b', {})
14521465

14531466
writer.writerow([
1454-
func_a.get('name', ''),
1455-
f"0x{func_a.get('address', 0):x}",
1456-
func_b.get('name', ''),
1457-
f"0x{func_b.get('address', 0):x}",
1467+
_csv_safe(func_a.get('name', '')),
1468+
_fmt_addr(func_a.get('address', 0)),
1469+
_csv_safe(func_b.get('name', '')),
1470+
_fmt_addr(func_b.get('address', 0)),
14581471
f"{result.get('similarity', 0):.4f}",
14591472
f"{result.get('confidence', 0):.4f}",
1460-
result.get('match_type', ''),
1473+
_csv_safe(result.get('match_type', '')),
14611474
func_a.get('size', 0),
14621475
func_b.get('size', 0),
14631476
len(func_a.get('basic_blocks', [])),
@@ -1468,7 +1481,7 @@ def export_to_csv(self):
14681481

14691482
QMessageBox.information(self, "Export Complete", f"Results exported to {filename}")
14701483

1471-
except Exception as e:
1484+
except (OSError, ValueError, TypeError) as e:
14721485
QMessageBox.critical(self, "Export Error", f"Failed to export CSV: {str(e)}")
14731486

14741487
def export_to_sqlite(self):
@@ -1540,7 +1553,7 @@ def _fmt_addr(v):
15401553

15411554
QMessageBox.information(self, "Export Complete", f"Results exported to {filename}")
15421555

1543-
except Exception as e:
1556+
except (OSError, sqlite3.Error, ValueError, TypeError) as e:
15441557
QMessageBox.critical(self, "Export Error", f"Failed to export SQLite: {str(e)}")
15451558

15461559
def export_to_json(self):
@@ -1565,7 +1578,7 @@ def export_to_json(self):
15651578

15661579
QMessageBox.information(self, "Export Complete", f"Results exported to {filename}")
15671580

1568-
except Exception as e:
1581+
except (OSError, ValueError, TypeError) as e:
15691582
QMessageBox.critical(self, "Export Error", f"Failed to export JSON: {str(e)}")
15701583

15711584
def export_to_html(self):
@@ -1581,7 +1594,7 @@ def export_to_html(self):
15811594

15821595
QMessageBox.information(self, "Export Complete", f"Results exported to {filename}")
15831596

1584-
except Exception as e:
1597+
except (OSError, ValueError, TypeError) as e:
15851598
QMessageBox.critical(self, "Export Error", f"Failed to export HTML: {str(e)}")
15861599

15871600
def export_current_diff_to_json(self):
@@ -1668,7 +1681,7 @@ def export_current_diff_to_json(self):
16681681

16691682
QMessageBox.information(self, "Export Complete", f"Current diff view exported to {filename}")
16701683

1671-
except Exception as e:
1684+
except (OSError, ValueError, TypeError) as e:
16721685
QMessageBox.critical(self, "Export Error", f"Failed to export current diff: {str(e)}")
16731686

16741687
def generate_html_report(self):
@@ -1699,8 +1712,8 @@ def generate_html_report(self):
16991712
17001713
<div class="summary">
17011714
<h2>Summary</h2>
1702-
<p><strong>Binary A:</strong> {self.results_data.get('binary_a_name', 'N/A')}</p>
1703-
<p><strong>Binary B:</strong> {self.results_data.get('binary_b_name', 'N/A')}</p>
1715+
<p><strong>Binary A:</strong> {html.escape(str(self.results_data.get('binary_a_name', 'N/A')))}</p>
1716+
<p><strong>Binary B:</strong> {html.escape(str(self.results_data.get('binary_b_name', 'N/A')))}</p>
17041717
<p><strong>Total Matches:</strong> {len(self.filtered_results)}</p>
17051718
<p><strong>Analysis Time:</strong> {self.results_data.get('analysis_time', 0):.2f} seconds</p>
17061719
</div>
@@ -1724,7 +1737,13 @@ def generate_html_report(self):
17241737

17251738
def generate_html_table_rows(self):
17261739
"""Generate HTML table rows for results"""
1727-
rows = ""
1740+
def _fmt_addr(v):
1741+
try:
1742+
return f"0x{int(v):016x}"
1743+
except (TypeError, ValueError):
1744+
return html.escape(str(v))
1745+
1746+
parts = []
17281747
for result in self.filtered_results:
17291748
func_a = result.get('function_a', {})
17301749
func_b = result.get('function_b', {})
@@ -1737,18 +1756,18 @@ def generate_html_table_rows(self):
17371756
else:
17381757
css_class = 'low-confidence'
17391758

1740-
rows += f'''
1741-
<tr class="{css_class}">
1742-
<td>{func_a.get('name', '')}</td>
1743-
<td>0x{func_a.get('address', 0):x}</td>
1744-
<td>{func_b.get('name', '')}</td>
1745-
<td>0x{func_b.get('address', 0):x}</td>
1746-
<td>{result.get('similarity', 0):.4f}</td>
1747-
<td>{result.get('confidence', 0):.4f}</td>
1748-
<td>{result.get('match_type', '')}</td>
1749-
</tr>
1750-
'''
1751-
return rows
1759+
parts.append(
1760+
f'<tr class="{css_class}">'
1761+
f'<td>{html.escape(str(func_a.get("name", "")))}</td>'
1762+
f'<td>{_fmt_addr(func_a.get("address", 0))}</td>'
1763+
f'<td>{html.escape(str(func_b.get("name", "")))}</td>'
1764+
f'<td>{_fmt_addr(func_b.get("address", 0))}</td>'
1765+
f'<td>{result.get("similarity", 0):.4f}</td>'
1766+
f'<td>{result.get("confidence", 0):.4f}</td>'
1767+
f'<td>{html.escape(str(result.get("match_type", "")))}</td>'
1768+
f'</tr>\n'
1769+
)
1770+
return ''.join(parts)
17521771

17531772

17541773
def show_diff_results(results_data, binary_view_a=None, binary_view_b=None):

src/algorithms.rs

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,31 @@ use sha2::{Sha256, Digest};
66

77
pub struct DiffAlgorithms;
88

9+
/// Clamp a score to [0.0, 1.0] and replace NaN with 0.0.
10+
#[inline]
11+
fn sanitize_score(x: f64) -> f64 {
12+
if x.is_nan() { 0.0 } else { x.clamp(0.0, 1.0) }
13+
}
14+
915
impl DiffAlgorithms {
1016
/// Calculate similarity between two functions using multiple metrics
1117
/// and return both the weighted score and detailed per-metric breakdown.
1218
pub fn compute_match_details(func_a: &FunctionInfo, func_b: &FunctionInfo) -> (f64, MatchDetails) {
13-
let cfg_similarity = Self::calculate_cfg_similarity(func_a, func_b);
14-
let bb_similarity = Self::calculate_basic_block_similarity(func_a, func_b);
15-
let instruction_similarity = Self::calculate_instruction_similarity(func_a, func_b);
16-
let edge_similarity = Self::calculate_edge_similarity(func_a, func_b);
17-
let name_similarity = SimilarityAnalyzer::normalized_edit_distance(&func_a.name, &func_b.name);
18-
let call_similarity = SimilarityAnalyzer::function_call_similarity(func_a, func_b);
19-
20-
let weighted_similarity = cfg_similarity * 0.30
21-
+ call_similarity * 0.20
22-
+ bb_similarity * 0.15
23-
+ instruction_similarity * 0.15
24-
+ name_similarity * 0.10
25-
+ edge_similarity * 0.10;
19+
let cfg_similarity = sanitize_score(Self::calculate_cfg_similarity(func_a, func_b));
20+
let bb_similarity = sanitize_score(Self::calculate_basic_block_similarity(func_a, func_b));
21+
let instruction_similarity = sanitize_score(Self::calculate_instruction_similarity(func_a, func_b));
22+
let edge_similarity = sanitize_score(Self::calculate_edge_similarity(func_a, func_b));
23+
let name_similarity = sanitize_score(SimilarityAnalyzer::normalized_edit_distance(&func_a.name, &func_b.name));
24+
let call_similarity = sanitize_score(SimilarityAnalyzer::function_call_similarity(func_a, func_b));
25+
26+
let weighted_similarity = sanitize_score(
27+
cfg_similarity * 0.30
28+
+ call_similarity * 0.20
29+
+ bb_similarity * 0.15
30+
+ instruction_similarity * 0.15
31+
+ name_similarity * 0.10
32+
+ edge_similarity * 0.10,
33+
);
2634

2735
let details = MatchDetails {
2836
cfg_similarity,
@@ -224,7 +232,7 @@ impl DiffAlgorithms {
224232
}
225233
}
226234

227-
confidence.min(1.0)
235+
sanitize_score(confidence)
228236
}
229237

230238
/// Perform isomorphic subgraph matching (edge-degree distribution check)

src/database.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,10 @@ impl DatabaseManager {
115115
Ok(())
116116
}
117117

118-
/// Export results to SQLite database
119-
pub fn export_to_sqlite(database: &DiffDatabase, output_path: &Path) -> Result<()> {
120-
// For now, create a simple SQL script that can be imported
118+
/// Export results as a `.sql` script (CREATE TABLE + INSERT statements)
119+
/// that can be imported into SQLite via `sqlite3 target.db < script.sql`.
120+
/// Does not open a SQLite database itself.
121+
pub fn export_to_sql_script(database: &DiffDatabase, output_path: &Path) -> Result<()> {
121122
let mut sql_content = String::new();
122123

123124
// Create table

0 commit comments

Comments
 (0)