Skip to content

Commit eedd174

Browse files
author
MPCoreDeveloper
committed
WIP: B-tree split logic bug investigation and partial fixes
Identified critical bug in BTree SplitChild causing key/value desynchronization Fixed InsertKey to maintain keysCount/valuesCount synchronization Fixed RemoveKeyAt to keep key/value arrays aligned Updated SplitChild to copy values for both leaf and internal nodes Added debug tests to isolate the split corruption issue ISSUE: BTree still losing keys during split operations Status: Requires complete rewrite of split logic for proper B+ tree implementation
1 parent 4cfa323 commit eedd174

20 files changed

+962
-174
lines changed

DebugRangeQuery.cs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
using SharpCoreDB.DataStructures;
2+
using System;
3+
using System.Linq;
4+
5+
var index = new BTreeIndex<int>("test");
6+
7+
// Add values 1-100
8+
for (int i = 1; i <= 100; i++)
9+
{
10+
index.Add(i, i * 10L);
11+
}
12+
13+
// Get full inorder traversal
14+
var allResults = index.GetSortedEntries().ToList();
15+
Console.WriteLine($"Total entries in tree: {allResults.Count}");
16+
17+
// Check what values are in the range 20-80
18+
var rangeResults = index.FindRange(20, 80).ToList();
19+
Console.WriteLine($"Range [20-80] results: {rangeResults.Count} items");
20+
Console.WriteLine($"First 10: {string.Join(", ", rangeResults.Take(10))}");
21+
Console.WriteLine($"Last 10: {string.Join(", ", rangeResults.Skip(Math.Max(0, rangeResults.Count - 10)))}");
22+
23+
// Check which IDs are missing
24+
var expectedIds = Enumerable.Range(20, 61).Select(id => id * 10L).ToHashSet();
25+
var missingIds = expectedIds.Where(id => !rangeResults.Contains(id)).ToList();
26+
Console.WriteLine($"\nMissing {missingIds.Count} items: {string.Join(", ", missingIds.Take(20))}");
27+
28+
// Test simple case
29+
var simpleIndex = new BTreeIndex<int>("simple");
30+
simpleIndex.Add(1, 10);
31+
simpleIndex.Add(2, 20);
32+
simpleIndex.Add(3, 30);
33+
simpleIndex.Add(4, 40);
34+
simpleIndex.Add(5, 50);
35+
36+
var simpleRange = simpleIndex.FindRange(2, 4).ToList();
37+
Console.WriteLine($"\nSimple test [2-4]: got {simpleRange.Count}, expected 3: {string.Join(", ", simpleRange)}");

fix_bounds_checking.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import re
2+
3+
# Read the file
4+
with open('src/SharpCoreDB/DatabaseExtensions.cs', 'r', encoding='utf-8') as f:
5+
content = f.read()
6+
7+
# Fix 1: ReadString bounds check
8+
content = re.sub(
9+
r'(private static string ReadString\(ReadOnlySpan<byte> data, ref int offset\)\s*\{\s*var length = ReadInt32\(data, ref offset\);)',
10+
r'\1\n if (length < 0 || offset + length > data.Length)\n throw new InvalidOperationException($"String truncated at offset {offset}: length={length}, remaining={data.Length - offset}");',
11+
content
12+
)
13+
14+
# Fix 2: ReadBytes bounds check
15+
content = re.sub(
16+
r'(private static byte\[\] ReadBytes\(ReadOnlySpan<byte> data, ref int offset\)\s*\{\s*var length = ReadInt32\(data, ref offset\);)',
17+
r'\1\n if (length < 0 || offset + length > data.Length)\n throw new InvalidOperationException($"Bytes truncated at offset {offset}: length={length}, remaining={data.Length - offset}");',
18+
content
19+
)
20+
21+
# Fix 3: ReadGuid bounds check
22+
content = re.sub(
23+
r'(private static Guid ReadGuid\(ReadOnlySpan<byte> data, ref int offset\)\s*\{)',
24+
r'\1\n if (offset + 16 > data.Length)\n throw new InvalidOperationException($"Guid truncated at offset {offset}: need 16 bytes, have {data.Length - offset}");',
25+
content
26+
)
27+
28+
# Fix 4: ReadInt32 bounds check
29+
content = re.sub(
30+
r'(private static int ReadInt32\(ReadOnlySpan<byte> data, ref int offset\)\s*\{)',
31+
r'\1\n if (offset + 4 > data.Length)\n throw new InvalidOperationException($"Int32 truncated at offset {offset}: need 4 bytes, have {data.Length - offset}");',
32+
content
33+
)
34+
35+
# Fix 5: ReadInt64 bounds check
36+
content = re.sub(
37+
r'(private static long ReadInt64\(ReadOnlySpan<byte> data, ref int offset\)\s*\{)',
38+
r'\1\n if (offset + 8 > data.Length)\n throw new InvalidOperationException($"Int64 truncated at offset {offset}: need 8 bytes, have {data.Length - offset}");',
39+
content
40+
)
41+
42+
# Fix 6: ReadBoolean bounds check
43+
content = re.sub(
44+
r'(private static bool ReadBoolean\(ReadOnlySpan<byte> data, ref int offset\)\s*\{)',
45+
r'\1\n if (offset + 1 > data.Length)\n throw new InvalidOperationException($"Boolean truncated at offset {offset}: need 1 byte, have {data.Length - offset}");',
46+
content
47+
)
48+
49+
# Write the fixed file
50+
with open('src/SharpCoreDB/DatabaseExtensions.cs', 'w', encoding='utf-8') as f:
51+
f.write(content)
52+
53+
print("Bounds checking added to all Read methods successfully!")

fix_bounds_v2.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#!/usr/bin/env python3
2+
"""Add bounds checking to binary deserialization Read methods."""
3+
4+
def add_bounds_check(lines, start_pattern, check_code):
5+
"""Add bounds checking after a pattern match."""
6+
i = 0
7+
while i < len(lines):
8+
if start_pattern in lines[i]:
9+
# Found the pattern - check if bounds check already exists
10+
if i + 1 < len(lines) and 'truncated at offset' in lines[i + 1]:
11+
print(f"Skipping {start_pattern} - already has bounds check")
12+
i += 1
13+
continue
14+
# Insert the check after the line
15+
lines.insert(i + 1, check_code)
16+
print(f"Added bounds check after: {lines[i].strip()}")
17+
i += 2 # Skip the newly inserted line
18+
else:
19+
i += 1
20+
return lines
21+
22+
# Read file
23+
with open('src/SharpCoreDB/DatabaseExtensions.cs', 'r', encoding='utf-8') as f:
24+
lines = f.readlines()
25+
26+
# Add bounds checks
27+
lines = add_bounds_check(
28+
lines,
29+
'private static string ReadString(ReadOnlySpan<byte> data, ref int offset)',
30+
' {\n var length = ReadInt32(data, ref offset);\n if (length < 0 || offset + length > data.Length)\n throw new InvalidOperationException($"String truncated at offset {offset}: length={length}, remaining={data.Length - offset}");\n'
31+
)
32+
33+
# For ReadString, we need a different approach - find the specific line pattern
34+
i = 0
35+
while i < len(lines):
36+
if 'private static string ReadString' in lines[i]:
37+
# Find the next line that has "var length = ReadInt32"
38+
j = i + 1
39+
while j < len(lines) and j < i + 10: # Look ahead up to 10 lines
40+
if 'var length = ReadInt32(data, ref offset);' in lines[j]:
41+
# Check if next line already has bounds check
42+
if j + 1 < len(lines) and 'truncated at offset' not in lines[j + 1]:
43+
# Insert bounds check
44+
lines.insert(j + 1, ' if (length < 0 || offset + length > data.Length)\n')
45+
lines.insert(j + 2, ' throw new InvalidOperationException($"String truncated at offset {offset}: length={length}, remaining={data.Length - offset}");\n')
46+
print(f"Added ReadString bounds check at line {j}")
47+
break
48+
j += 1
49+
elif 'private static byte[] ReadBytes' in lines[i]:
50+
# Find the next line that has "var length = ReadInt32"
51+
j = i + 1
52+
while j < len(lines) and j < i + 10:
53+
if 'var length = ReadInt32(data, ref offset);' in lines[j]:
54+
if j + 1 < len(lines) and 'truncated at offset' not in lines[j + 1]:
55+
lines.insert(j + 1, ' if (length < 0 || offset + length > data.Length)\n')
56+
lines.insert(j + 2, ' throw new InvalidOperationException($"Bytes truncated at offset {offset}: length={length}, remaining={data.Length - offset}");\n')
57+
print(f"Added ReadBytes bounds check at line {j}")
58+
break
59+
j += 1
60+
elif 'private static Guid ReadGuid' in lines[i] and '{' in lines[i + 1]:
61+
# Insert after the opening brace
62+
if 'truncated at offset' not in lines[i + 2]:
63+
lines.insert(i + 2, ' if (offset + 16 > data.Length)\n')
64+
lines.insert(i + 3, ' throw new InvalidOperationException($"Guid truncated at offset {offset}: need 16 bytes, have {data.Length - offset}");\n')
65+
print(f"Added ReadGuid bounds check at line {i + 2}")
66+
elif 'private static bool ReadBoolean' in lines[i] and '{' in lines[i + 1]:
67+
if 'truncated at offset' not in lines[i + 2]:
68+
lines.insert(i + 2, ' if (offset + 1 > data.Length)\n')
69+
lines.insert(i + 3, ' throw new InvalidOperationException($"Boolean truncated at offset {offset}: need 1 byte, have {data.Length - offset}");\n')
70+
print(f"Added ReadBoolean bounds check at line {i + 2}")
71+
elif 'private static int ReadInt32(ReadOnlySpan<byte> data, ref int offset)' in lines[i] and '{' in lines[i + 1]:
72+
if 'truncated at offset' not in lines[i + 2]:
73+
lines.insert(i + 2, ' if (offset + 4 > data.Length)\n')
74+
lines.insert(i + 3, ' throw new InvalidOperationException($"Int32 truncated at offset {offset}: need 4 bytes, have {data.Length - offset}");\n')
75+
print(f"Added ReadInt32 bounds check at line {i + 2}")
76+
elif 'private static long ReadInt64' in lines[i] and '{' in lines[i + 1]:
77+
if 'truncated at offset' not in lines[i + 2]:
78+
lines.insert(i + 2, ' if (offset + 8 > data.Length)\n')
79+
lines.insert(i + 3, ' throw new InvalidOperationException($"Int64 truncated at offset {offset}: need 8 bytes, have {data.Length - offset}");\n')
80+
print(f"Added ReadInt64 bounds check at line {i + 2}")
81+
i += 1
82+
83+
# Write back
84+
with open('src/SharpCoreDB/DatabaseExtensions.cs', 'w', encoding='utf-8') as f:
85+
f.writelines(lines)
86+
87+
print("\n✅ All bounds checks added successfully!")

fix_bounds_v3.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/usr/bin/env python3
2+
"""Add bounds checking to binary deserialization Read methods - v3."""
3+
4+
def find_and_fix_method(lines, method_signature, bytes_needed):
5+
"""Find a method and add bounds checking."""
6+
i = 0
7+
while i < len(lines):
8+
if method_signature in lines[i]:
9+
# Found the method - now find the opening brace
10+
brace_line = i + 1
11+
if '{' not in lines[brace_line]:
12+
brace_line = i
13+
14+
# Check if bounds check already exists
15+
check_line = brace_line + 1
16+
if check_line < len(lines) and 'truncated at offset' in lines[check_line]:
17+
print(f"✓ {method_signature[:40]}... already has bounds check")
18+
return
19+
20+
# Insert bounds check
21+
indent = ' '
22+
if bytes_needed == 'length':
23+
# For ReadString and ReadBytes - check after reading length
24+
# Find "var length = ReadInt32" line
25+
j = brace_line
26+
while j < min(len(lines), brace_line + 10):
27+
if 'var length = ReadInt32(data, ref offset);' in lines[j]:
28+
# Insert bounds check right after
29+
lines.insert(j + 1, f'{indent}if (length < 0 || offset + length > data.Length)\n')
30+
lines.insert(j + 2, f'{indent} throw new InvalidOperationException($"Truncated at offset {{offset}}: length={{length}}, remaining={{data.Length - offset}}");\n')
31+
print(f"✓ Added bounds check to {method_signature[:40]}...")
32+
return
33+
j += 1
34+
else:
35+
# For fixed-size reads - insert right after opening brace
36+
lines.insert(check_line, f'{indent}if (offset + {bytes_needed} > data.Length)\n')
37+
lines.insert(check_line + 1, f'{indent} throw new InvalidOperationException($"Truncated at offset {{offset}}: need {bytes_needed} bytes, have {{data.Length - offset}}");\n')
38+
print(f"✓ Added bounds check to {method_signature[:40]}...")
39+
return
40+
i += 1
41+
print(f"✗ Could not find {method_signature[:40]}...")
42+
43+
# Read file
44+
with open('src/SharpCoreDB/DatabaseExtensions.cs', 'r', encoding='utf-8') as f:
45+
lines = f.readlines()
46+
47+
print("Adding bounds checks...\n")
48+
49+
# Add bounds checks to each method
50+
find_and_fix_method(lines, 'private static string ReadString(ReadOnlySpan<byte> data, ref int offset)', 'length')
51+
find_and_fix_method(lines, 'private static byte[] ReadBytes(ReadOnlySpan<byte> data, ref int offset)', 'length')
52+
find_and_fix_method(lines, 'private static Guid ReadGuid(ReadOnlySpan<byte> data, ref int offset)', 16)
53+
find_and_fix_method(lines, 'private static bool ReadBoolean(ReadOnlySpan<byte> data, ref int offset)', 1)
54+
find_and_fix_method(lines, 'private static int ReadInt32(ReadOnlySpan<byte> data, ref int offset)', 4)
55+
find_and_fix_method(lines, 'private static long ReadInt64(ReadOnlySpan<byte> data, ref int offset)', 8)
56+
57+
# Write back
58+
with open('src/SharpCoreDB/DatabaseExtensions.cs', 'w', encoding='utf-8') as f:
59+
f.writelines(lines)
60+
61+
print("\n✅ Done!")

fix_duplicate.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
3+
# Read the file
4+
with open('src/SharpCoreDB/DatabaseExtensions.cs', 'r', encoding='utf-8') as f:
5+
lines = f.readlines()
6+
7+
# Remove duplicate line 1415
8+
if len(lines) > 1415:
9+
if 'String truncated' in lines[1414] and 'String truncated' in lines[1415]:
10+
del lines[1415]
11+
12+
# Write back
13+
with open('src/SharpCoreDB/DatabaseExtensions.cs', 'w', encoding='utf-8') as f:
14+
f.writelines(lines)
15+
16+
print("Removed duplicate line!")

0 commit comments

Comments
 (0)