Skip to content

Commit 7a47b96

Browse files
committed
feat: add comments and clean up black lines
1 parent 8072da8 commit 7a47b96

3 files changed

Lines changed: 245 additions & 4 deletions

File tree

ghidra_common.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,66 @@ def should_skip_function(func, program):
298298
# ============================================================
299299

300300

301+
def clean_decompiled_code(code):
302+
"""
303+
Clean up decompiled code by removing unnecessary comments and blank lines.
304+
305+
Removes:
306+
- Function signature comments like /* FuncName(args) */
307+
- Excessive blank lines (keep max 1 between statements, none inside blocks)
308+
309+
Args:
310+
code: Raw decompiled C code
311+
312+
Returns:
313+
Cleaned up code
314+
"""
315+
if not code:
316+
return code
317+
318+
lines = code.split('\n')
319+
cleaned_lines = []
320+
prev_blank = False
321+
inside_function = False
322+
brace_depth = 0
323+
324+
for line in lines:
325+
stripped = line.strip()
326+
327+
# Skip function signature comments: /* FuncName(...) */ or /* FuncName */
328+
# These appear at the start of functions and are redundant
329+
if stripped.startswith('/*') and stripped.endswith('*/'):
330+
# Check if it looks like a function signature comment
331+
inner = stripped[2:-2].strip()
332+
# Skip if it contains parentheses (function signature) or is just a name
333+
if '(' in inner or (inner and ' ' not in inner and len(inner) < 100):
334+
continue
335+
336+
# Track brace depth to know if we're inside a function body
337+
brace_depth += stripped.count('{') - stripped.count('}')
338+
inside_function = brace_depth > 0
339+
340+
# Handle blank lines
341+
if not stripped:
342+
# Inside function body: skip all blank lines for compact code
343+
if inside_function:
344+
continue
345+
# Outside function: keep max 1 consecutive blank line
346+
if prev_blank:
347+
continue
348+
prev_blank = True
349+
else:
350+
prev_blank = False
351+
352+
cleaned_lines.append(line)
353+
354+
# Remove trailing blank lines
355+
while cleaned_lines and not cleaned_lines[-1].strip():
356+
cleaned_lines.pop()
357+
358+
return '\n'.join(cleaned_lines)
359+
360+
301361
def get_decompiled_function_basic(decomp_ifc, func, monitor):
302362
"""
303363
Decompile a single function and return C code.
@@ -315,7 +375,8 @@ def get_decompiled_function_basic(decomp_ifc, func, monitor):
315375
try:
316376
results = decomp_ifc.decompileFunction(func, 60, monitor)
317377
if results and results.decompileCompleted():
318-
return results.getDecompiledFunction().getC()
378+
code = results.getDecompiledFunction().getC()
379+
return clean_decompiled_code(code)
319380
except Exception as e:
320381
print(" [Error] Failed to decompile {}: {}".format(func.getName(), str(e)))
321382
return None

ghidra_decompile_lib.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,6 @@ def main():
172172
for func, demangled_name in funcs:
173173
decompiled = get_decompiled_function_basic(decomp_ifc, func, monitor)
174174
if decompiled:
175-
f.write("// Original: {}\n".format(func.getName()))
176-
f.write("// Demangled: {}\n".format(demangled_name))
177175
f.write(decompiled)
178176
f.write("\n")
179177
decompiled_count += 1
@@ -199,7 +197,6 @@ def main():
199197
for func, display_name in standalone_functions:
200198
decompiled = get_decompiled_function_basic(decomp_ifc, func, monitor)
201199
if decompiled:
202-
f.write("// Function: {}\n".format(display_name))
203200
f.write(decompiled)
204201
f.write("\n")
205202
decompiled_count += 1

tests/test_libsurgeon.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,3 +1436,186 @@ def test_master_header_includes_all_modules(self, temp_dir):
14361436

14371437
if __name__ == "__main__":
14381438
pytest.main([__file__, "-v"])
1439+
1440+
1441+
# ============================================================
1442+
# Test: Code Cleaning Functions
1443+
# ============================================================
1444+
1445+
1446+
class TestCleanDecompiledCode:
1447+
"""Tests for clean_decompiled_code function"""
1448+
1449+
def test_remove_function_signature_comment(self):
1450+
"""Test removal of function signature comments"""
1451+
from ghidra_common import clean_decompiled_code
1452+
1453+
code = """/* BESplitMatrix(sbematrix const*, sbevec3*, sbequat*, sbevec3*) */
1454+
void BESplitMatrix(sbematrix *param_1, sbevec3 *param_2)
1455+
{
1456+
int x = 1;
1457+
return;
1458+
}"""
1459+
cleaned = clean_decompiled_code(code)
1460+
assert "/* BESplitMatrix" not in cleaned
1461+
assert "void BESplitMatrix" in cleaned
1462+
1463+
def test_remove_simple_function_name_comment(self):
1464+
"""Test removal of simple function name comments"""
1465+
from ghidra_common import clean_decompiled_code
1466+
1467+
code = """/* TestFunc */
1468+
void TestFunc(void)
1469+
{
1470+
return;
1471+
}"""
1472+
cleaned = clean_decompiled_code(code)
1473+
assert "/* TestFunc */" not in cleaned
1474+
assert "void TestFunc" in cleaned
1475+
1476+
def test_remove_blank_lines_inside_function(self):
1477+
"""Test removal of blank lines inside function body"""
1478+
from ghidra_common import clean_decompiled_code
1479+
1480+
code = """void TestFunc(void)
1481+
1482+
{
1483+
1484+
int x;
1485+
1486+
int y;
1487+
1488+
x = 1;
1489+
1490+
y = 2;
1491+
1492+
return;
1493+
1494+
}"""
1495+
cleaned = clean_decompiled_code(code)
1496+
# Should have no blank lines inside the function
1497+
lines = cleaned.split('\n')
1498+
inside_braces = False
1499+
blank_inside = 0
1500+
for line in lines:
1501+
if '{' in line:
1502+
inside_braces = True
1503+
if '}' in line:
1504+
inside_braces = False
1505+
if inside_braces and not line.strip():
1506+
blank_inside += 1
1507+
assert blank_inside == 0, f"Found {blank_inside} blank lines inside function"
1508+
1509+
def test_preserve_blank_line_between_functions(self):
1510+
"""Test that one blank line is preserved between functions"""
1511+
from ghidra_common import clean_decompiled_code
1512+
1513+
code = """void Func1(void)
1514+
{
1515+
return;
1516+
}
1517+
1518+
void Func2(void)
1519+
{
1520+
return;
1521+
}"""
1522+
cleaned = clean_decompiled_code(code)
1523+
# Should have exactly one blank line between functions
1524+
assert "\n\nvoid Func2" in cleaned or "}\n\nvoid" in cleaned
1525+
1526+
def test_collapse_multiple_blank_lines_outside_function(self):
1527+
"""Test that multiple blank lines outside functions are collapsed to one"""
1528+
from ghidra_common import clean_decompiled_code
1529+
1530+
code = """void Func1(void)
1531+
{
1532+
return;
1533+
}
1534+
1535+
1536+
1537+
void Func2(void)
1538+
{
1539+
return;
1540+
}"""
1541+
cleaned = clean_decompiled_code(code)
1542+
# Should not have more than 2 consecutive newlines (one blank line)
1543+
assert "\n\n\n" not in cleaned
1544+
1545+
def test_handle_empty_code(self):
1546+
"""Test handling of empty or None code"""
1547+
from ghidra_common import clean_decompiled_code
1548+
1549+
assert clean_decompiled_code(None) is None
1550+
assert clean_decompiled_code("") == ""
1551+
1552+
def test_preserve_meaningful_comments(self):
1553+
"""Test that meaningful comments are preserved"""
1554+
from ghidra_common import clean_decompiled_code
1555+
1556+
code = """/* This is a meaningful comment about the function */
1557+
void TestFunc(void)
1558+
{
1559+
return;
1560+
}"""
1561+
cleaned = clean_decompiled_code(code)
1562+
assert "meaningful comment" in cleaned
1563+
1564+
def test_real_ghidra_output(self):
1565+
"""Test with real Ghidra-style output"""
1566+
from ghidra_common import clean_decompiled_code
1567+
1568+
code = """/* CMemStore::Alloc(unsigned int, unsigned char, unsigned char, unsigned char, unsigned char, unsigned char, unsigned int) */
1569+
1570+
void * CMemStore::Alloc(uint param_1,undefined param_2,undefined param_3,undefined param_4,
1571+
1572+
undefined param_5,undefined param_6,undefined4 param_7)
1573+
1574+
1575+
1576+
{
1577+
1578+
void *pvVar1;
1579+
1580+
undefined4 in_register_0000003c;
1581+
1582+
1583+
1584+
pvVar1 = operator_new__(CONCAT44(in_register_0000003c,param_1));
1585+
1586+
return pvVar1;
1587+
1588+
}"""
1589+
cleaned = clean_decompiled_code(code)
1590+
1591+
# Should remove the signature comment
1592+
assert "/* CMemStore::Alloc" not in cleaned
1593+
1594+
# Should remove blank lines inside function
1595+
lines = cleaned.split('\n')
1596+
# Count lines - should be much fewer
1597+
assert len(lines) < len(code.split('\n'))
1598+
1599+
# Function should still be valid
1600+
assert "void * CMemStore::Alloc" in cleaned
1601+
assert "return pvVar1;" in cleaned
1602+
1603+
def test_nested_braces(self):
1604+
"""Test handling of nested braces"""
1605+
from ghidra_common import clean_decompiled_code
1606+
1607+
code = """void TestFunc(void)
1608+
{
1609+
1610+
if (x) {
1611+
1612+
y = 1;
1613+
1614+
}
1615+
1616+
return;
1617+
1618+
}"""
1619+
cleaned = clean_decompiled_code(code)
1620+
# Should remove all internal blank lines
1621+
assert "\n\n" not in cleaned.split('{', 1)[1].rsplit('}', 1)[0]

0 commit comments

Comments
 (0)