forked from ghuntley/atlassian-rovo-source-code-z80-dump
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_embedded.py
More file actions
139 lines (108 loc) · 4.79 KB
/
extract_embedded.py
File metadata and controls
139 lines (108 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python3
import re
import os
import zipfile
from io import BytesIO
def extract_embedded_python():
with open('acli', 'rb') as f:
data = f.read()
# Find all PK headers
pk_pattern = b'PK\x03\x04'
matches = []
start_pos = 0
while True:
pos = data.find(pk_pattern, start_pos)
if pos == -1:
break
matches.append(pos)
start_pos = pos + 1
print(f"Found {len(matches)} ZIP entries")
# Look for the start of the rovodev archive
rovo_start = None
for pos in matches:
# Check around the position for rovodev content
check_data = data[pos:pos+300]
if b'atlassian_cli_rovodev' in check_data:
rovo_start = pos
print(f"Found rovodev archive starting at position: {pos}")
break
if not rovo_start:
print("Could not find rovodev archive")
return
# Find the end of central directory record
eocd_pattern = b'PK\x05\x06'
eocd_pos = data.rfind(eocd_pattern)
if eocd_pos == -1:
print("Could not find end of central directory")
return
print(f"Found EOCD at position: {eocd_pos}")
# Extract the ZIP data
zip_data = data[rovo_start:eocd_pos+22] # Include EOCD record
# Try to process as ZIP
try:
with zipfile.ZipFile(BytesIO(zip_data), 'r') as zf:
print(f"ZIP file contains {len(zf.namelist())} files")
# Extract only rovodev related files
for name in zf.namelist():
if 'atlassian_cli_rovodev' in name and name.endswith('.py'):
try:
content = zf.read(name)
# Create directory structure
os.makedirs(os.path.dirname(name), exist_ok=True)
# Write file
with open(name, 'wb') as out_file:
out_file.write(content)
print(f"Extracted: {name}")
except Exception as e:
print(f"Error extracting {name}: {e}")
except zipfile.BadZipFile as e:
print(f"Bad ZIP file: {e}")
# Try to extract individual files manually
extract_individual_files(data, matches)
def extract_individual_files(data, matches):
print("Attempting manual extraction...")
os.makedirs('extracted_rovo', exist_ok=True)
for i, pos in enumerate(matches):
try:
# Read local file header
if pos + 30 > len(data):
continue
header = data[pos:pos+30]
if header[:4] != b'PK\x03\x04':
continue
# Parse header
filename_len = int.from_bytes(header[26:28], 'little')
extra_len = int.from_bytes(header[28:30], 'little')
if pos + 30 + filename_len > len(data):
continue
filename = data[pos+30:pos+30+filename_len].decode('utf-8', errors='ignore')
if 'atlassian_cli_rovodev' in filename and '.py' in filename:
print(f"Found file: {filename}")
# Get compressed size from next entry or estimate
file_start = pos + 30 + filename_len + extra_len
if i + 1 < len(matches):
file_end = matches[i + 1]
else:
file_end = min(file_start + 50000, len(data))
file_data = data[file_start:file_end]
# Try to decompress if it looks compressed
try:
import zlib
decompressed = zlib.decompress(file_data, -15) # Raw deflate
# Save the file
safe_filename = filename.replace('lib/', 'extracted_rovo/')
os.makedirs(os.path.dirname(safe_filename), exist_ok=True)
with open(safe_filename, 'wb') as f:
f.write(decompressed)
print(f"Extracted: {safe_filename}")
except:
# Save raw data
safe_filename = filename.replace('lib/', 'extracted_rovo/') + '.raw'
os.makedirs(os.path.dirname(safe_filename), exist_ok=True)
with open(safe_filename, 'wb') as f:
f.write(file_data[:1000]) # Just first 1KB
print(f"Saved raw: {safe_filename}")
except Exception as e:
continue
if __name__ == "__main__":
extract_embedded_python()