-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscanner.py
More file actions
166 lines (133 loc) · 5.89 KB
/
Copy pathscanner.py
File metadata and controls
166 lines (133 loc) · 5.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import ast
import os
import json
import logging
# Set up logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)s | %(message)s",
datefmt="%H:%M:%S",
)
logger = logging.getLogger("Archaeologist")
class ArchaeologistScanner:
def __init__(self):
self.definitions = {} # {name: node}
self.calls = set() # Track unique function calls via AST analysis
self.file_map = {} # {name: filepath}
def scan_file(self, filepath):
"""Scan a file and return parsed results"""
with open(filepath, "r", encoding="utf-8") as f:
try:
# Convert source code into tree for structural inspection
tree = ast.parse(f.read())
for node in ast.walk(tree):
# Capture function definitions
if isinstance(
node, (ast.FunctionDef, ast.AsyncFunctionDef)
):
# Create a unique key using the file path and function name
unique_id = f"{filepath}:{node.name}"
self.definitions[unique_id] = node
self.file_map[unique_id] = filepath
# Capture function calls (local and method chaining)
if isinstance(node, ast.Call):
# Standard calls: func()
if isinstance(node.func, ast.Name):
self.calls.add(node.func.id)
# Chained calls: obj.method()
elif isinstance(node.func, ast.Attribute):
self.calls.add(node.func.attr)
# Detailed trace
logger.debug(f"Successfully excavated {filepath}")
except Exception as e:
logger.error(f"Excavation failed {filepath}: {e}")
def calculate_confidence(self, name, node):
"""
Assigns a confidence score (0-100) that the code is 'Dead'.
Lower score = More likely its actually in use (protected)
"""
score = 100
# The Decorator Rule (Framework entry points)
# Covers Flask, Django, FastAPI, Celery, etc.
if node.decorator_list:
return 0 # 0% chance its dead; decorators mean it's an entry point
# Interface Inference (React-like or standard Dunders)
# Component-style names (Capitalised) or Python internals
if name.startswith("__") or name[0].isupper():
score -= 60
# Private Indicators
# Leading underscores often mean its only for local use
if name.startswith("_") and not name.startswith("__"):
score += 10
# Usage Check
if name in self.calls:
return 0 # Definitely in use
# Return the final result between 0 and 100 for consistent scoring
return min(max(score, 0), 100)
def run(self, directory):
"""Run the Tool."""
# Log the start of the process
logger.info(f"Surverying Site: {os.path.abspath(directory)}")
# looks through the directory tree to locate all source files
for root, _, filenames in os.walk(directory):
if any(
x in root for x in ["venv", ".git", "__pycache__", "tests"]
):
continue
for filename in filenames:
if filename.endswith(".py"):
self.scan_file(os.path.join(root, filename))
# Initialise an empty list to store the findings for the final summary
report = []
# Use unique_id (path:name) to iterate through definitions
for unique_id, node in self.definitions.items():
# Extract the actual function name for the confidence rules
name = node.name
# Assign a probability score based on naming patterns and
# framework usage
dead_confidence = self.calculate_confidence(name, node)
# Catalogue suspected dead code with
# its source location and probability score
if dead_confidence > 0:
report.append(
{
"function": name,
"file": self.file_map[unique_id],
"dead_confidence": f"{dead_confidence}%",
}
)
# Sort by most likely to be 'dead'
return sorted(
report,
key=lambda x: int(x["dead_confidence"].strip("%")),
reverse=True,
)
def save_report(self, report, filename="archaeology_report.json"):
"""Write analysis report to a JSON file"""
with open(filename, "w", encoding="utf-8") as f:
# Save to a JSON
json.dump(report, f, indent=4)
# Confirm the report has been stored.
logger.info(f"\n ✅Site Report saved to: {os.path.abspath(filename)}")
if __name__ == "__main__":
scanner = ArchaeologistScanner()
# Start the Action of scanner.py
logger.info("🕵🏽 Starting site excavation..")
# Execute the scan from the current root to map the entire project geography
results = scanner.run(".")
# Format and display the findings in a structured table
print(f"🕵🏽SITE REPORT: {len(results)} potential artifacts found. \n")
print(f"{'CONFIDENCE':<12} | {'FUNCTION':<25} | {'LOCATION'}")
print("-" * 85)
# Display each discovered artifact to the console for real-time review
for r in results:
# Truncate for a clean UI
location = (
(r["file"][:50] + "..") if len(r["file"]) > 50 else r["file"]
)
print(f"{r['dead_confidence']:<12} | {r['function']:<25} | {location}")
print("_" * 85) # Closing line for the table
# Log the end of the transition
logger.info("Excavation complete. Archiving results..")
# Save the report to a JSON file.
scanner.save_report(results)