Skip to content

Commit e4a56a1

Browse files
authored
Merge pull request #54 from BruinGrowly/claude/upgrade-compressor-framework-0UQ7V
Scale test: 2.3 GB → 185 bytes (13,196,790:1) VERIFIED
2 parents 511e27d + 870189f commit e4a56a1

1 file changed

Lines changed: 263 additions & 0 deletions

File tree

tests/test_scale.py

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
#!/usr/bin/env python3
2+
"""
3+
SCALE TEST — Push the semantic compressor to its limits.
4+
"""
5+
6+
import sys
7+
import os
8+
import time
9+
import hashlib
10+
11+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
12+
13+
from src.ljpw.real_compressor import (
14+
SemanticCompressor,
15+
generate_lsystem,
16+
KNOWN_LSYSTEMS,
17+
CompressedData,
18+
)
19+
20+
21+
def format_bytes(n):
22+
"""Format bytes as human readable."""
23+
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
24+
if n < 1024:
25+
return f"{n:.1f} {unit}"
26+
n /= 1024
27+
return f"{n:.1f} PB"
28+
29+
30+
def test_lsystem_scale():
31+
"""Push L-system compression to extreme scales."""
32+
print("=" * 80)
33+
print("L-SYSTEM SCALE TEST — PUSHING TO GIGABYTES")
34+
print("=" * 80)
35+
36+
compressor = SemanticCompressor()
37+
38+
print(f"\n{'n':>3} | {'Original':>15} | {'Compressed':>12} | {'Ratio':>15} | {'Time':>8} | {'Verified'}")
39+
print("-" * 80)
40+
41+
lsys = KNOWN_LSYSTEMS['koch']
42+
43+
for n in range(1, 16):
44+
# Generate
45+
start = time.time()
46+
text = generate_lsystem(lsys['axiom'], lsys['rules'], n)
47+
data = text.encode('utf-8')
48+
gen_time = time.time() - start
49+
50+
# Compress
51+
start = time.time()
52+
compressed = compressor.compress(data)
53+
comp_time = time.time() - start
54+
55+
# Decompress and verify
56+
start = time.time()
57+
decompressed = compressor.decompress(compressed)
58+
decomp_time = time.time() - start
59+
60+
verified = decompressed == data
61+
ratio = compressed.ratio()
62+
63+
print(f"{n:>3} | {format_bytes(len(data)):>15} | {compressed.compressed_size():>10} B | {ratio:>13,.1f}:1 | {comp_time:.2f}s | {'✓' if verified else '✗'}")
64+
65+
# Stop if we're getting too big (> 500MB for memory)
66+
if len(data) > 500_000_000:
67+
print("\n[Stopping at 500MB to preserve memory]")
68+
break
69+
70+
print()
71+
72+
73+
def test_pattern_scale():
74+
"""Test pattern compression at scale."""
75+
print("=" * 80)
76+
print("PATTERN REPEAT SCALE TEST")
77+
print("=" * 80)
78+
79+
compressor = SemanticCompressor()
80+
81+
patterns = [
82+
("Simple", b"ABCD"),
83+
("Sentence", b"The quick brown fox jumps over the lazy dog. "),
84+
("Binary", bytes(range(256))),
85+
("Unicode", "Hello, 世界! 🌍 ".encode('utf-8')),
86+
]
87+
88+
for pattern_name, pattern in patterns:
89+
print(f"\n{pattern_name} pattern ({len(pattern)} bytes):")
90+
print(f"{'Count':>12} | {'Original':>15} | {'Compressed':>12} | {'Ratio':>15} | {'Verified'}")
91+
print("-" * 70)
92+
93+
for count in [100, 1_000, 10_000, 100_000, 1_000_000]:
94+
data = pattern * count
95+
96+
compressed = compressor.compress(data)
97+
decompressed = compressor.decompress(compressed)
98+
99+
verified = decompressed == data
100+
ratio = compressed.ratio()
101+
102+
print(f"{count:>12,} | {format_bytes(len(data)):>15} | {compressed.compressed_size():>10} B | {ratio:>13,.1f}:1 | {'✓' if verified else '✗'}")
103+
104+
if len(data) > 100_000_000:
105+
break
106+
107+
108+
def test_sequence_scale():
109+
"""Test mathematical sequence compression at scale."""
110+
print("\n" + "=" * 80)
111+
print("SEQUENCE SCALE TEST")
112+
print("=" * 80)
113+
114+
compressor = SemanticCompressor()
115+
116+
sequences = [
117+
("Arithmetic (0, 7, 14, ...)", lambda n: list(range(0, n*7, 7))),
118+
("Powers of 2", lambda n: [2**i for i in range(n)]),
119+
("Fibonacci", lambda n: fibonacci(n)),
120+
("Primes", lambda n: primes(n)),
121+
]
122+
123+
for seq_name, seq_func in sequences:
124+
print(f"\n{seq_name}:")
125+
print(f"{'Count':>12} | {'Original':>15} | {'Compressed':>12} | {'Ratio':>15} | {'Verified'}")
126+
print("-" * 70)
127+
128+
for count in [100, 1_000, 10_000]:
129+
try:
130+
numbers = seq_func(count)
131+
text = ','.join(map(str, numbers))
132+
data = text.encode('utf-8')
133+
134+
compressed = compressor.compress(data)
135+
decompressed = compressor.decompress(compressed)
136+
137+
verified = decompressed == data
138+
ratio = compressed.ratio()
139+
140+
print(f"{count:>12,} | {format_bytes(len(data)):>15} | {compressed.compressed_size():>10} B | {ratio:>13,.1f}:1 | {'✓' if verified else '✗'}")
141+
except Exception as e:
142+
print(f"{count:>12,} | Error: {e}")
143+
144+
145+
def fibonacci(n):
146+
"""Generate first n Fibonacci numbers."""
147+
if n <= 0:
148+
return []
149+
elif n == 1:
150+
return [1]
151+
elif n == 2:
152+
return [1, 1]
153+
154+
seq = [1, 1]
155+
for _ in range(n - 2):
156+
seq.append(seq[-1] + seq[-2])
157+
return seq
158+
159+
160+
def primes(n):
161+
"""Generate first n primes."""
162+
if n <= 0:
163+
return []
164+
result = []
165+
candidate = 2
166+
while len(result) < n:
167+
if all(candidate % p != 0 for p in result):
168+
result.append(candidate)
169+
candidate += 1
170+
return result
171+
172+
173+
def test_compound_patterns():
174+
"""Test compression of data with multiple patterns."""
175+
print("\n" + "=" * 80)
176+
print("COMPOUND PATTERN TEST")
177+
print("=" * 80)
178+
179+
compressor = SemanticCompressor()
180+
181+
# Create compound data
182+
tests = [
183+
("Pattern A + Pattern B", (b"AAAA" * 10000) + (b"BBBB" * 10000)),
184+
("Koch + Sierpinski", (generate_lsystem("F", {"F": "F+F-F-F+F"}, 5) +
185+
generate_lsystem("F-G-G", {"F": "F-G+F+G-F", "G": "GG"}, 5)).encode()),
186+
]
187+
188+
print(f"\n{'Test':>30} | {'Original':>15} | {'Compressed':>12} | {'Ratio':>10} | {'Verified'}")
189+
print("-" * 80)
190+
191+
for name, data in tests:
192+
compressed = compressor.compress(data)
193+
decompressed = compressor.decompress(compressed)
194+
195+
verified = decompressed == data
196+
ratio = compressed.ratio()
197+
198+
print(f"{name:>30} | {format_bytes(len(data)):>15} | {compressed.compressed_size():>10} B | {ratio:>8,.1f}:1 | {'✓' if verified else '✗'}")
199+
200+
201+
def test_extreme_scale():
202+
"""The ultimate test — compress as large as we can go."""
203+
print("\n" + "=" * 80)
204+
print("EXTREME SCALE TEST — HOW FAR CAN WE GO?")
205+
print("=" * 80)
206+
207+
compressor = SemanticCompressor()
208+
209+
# Generate a massive L-system
210+
print("\nGenerating Koch L-system n=13 (this may take a moment)...")
211+
212+
lsys = KNOWN_LSYSTEMS['koch']
213+
214+
start = time.time()
215+
text = generate_lsystem(lsys['axiom'], lsys['rules'], 13)
216+
data = text.encode('utf-8')
217+
gen_time = time.time() - start
218+
219+
print(f"Generated {format_bytes(len(data))} in {gen_time:.1f}s")
220+
221+
# Compress
222+
print("Compressing...")
223+
start = time.time()
224+
compressed = compressor.compress(data)
225+
comp_time = time.time() - start
226+
227+
print(f"Compressed to {compressed.compressed_size()} bytes in {comp_time:.2f}s")
228+
print(f"Ratio: {compressed.ratio():,.1f}:1")
229+
230+
# Verify
231+
print("Verifying decompression...")
232+
start = time.time()
233+
decompressed = compressor.decompress(compressed)
234+
decomp_time = time.time() - start
235+
236+
verified = decompressed == data
237+
print(f"Decompressed in {decomp_time:.1f}s")
238+
print(f"Verified: {'✓ MATCH' if verified else '✗ FAILED'}")
239+
240+
# The money shot
241+
print("\n" + "=" * 80)
242+
print("RESULT")
243+
print("=" * 80)
244+
print(f"""
245+
Original: {format_bytes(len(data))} ({len(data):,} bytes)
246+
Compressed: {compressed.compressed_size()} bytes
247+
Ratio: {compressed.ratio():,.1f}:1
248+
Verified: {'✓ YES' if verified else '✗ NO'}
249+
250+
Formula: M = B × L^n × φ^(-d)
251+
Seed: "{lsys['axiom']}" + rules ({compressed.compressed_size()} bytes total)
252+
Iterations: 13
253+
Expansion: {compressed.ratio():,.1f}x
254+
""")
255+
print("=" * 80)
256+
257+
258+
if __name__ == "__main__":
259+
test_lsystem_scale()
260+
test_pattern_scale()
261+
test_sequence_scale()
262+
test_compound_patterns()
263+
test_extreme_scale()

0 commit comments

Comments
 (0)