Skip to content

Commit 4f6f8ea

Browse files
author
MPCoreDeveloper
committed
feat(phase8.1): Add time-series compression codecs - DeltaOfDelta (10-50x), Gorilla (in progress), XorFloat - 17/19 tests passing
1 parent 8d15250 commit 4f6f8ea

5 files changed

Lines changed: 1398 additions & 0 deletions

File tree

Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
// <copyright file="DeltaOfDeltaCodec.cs" company="MPCoreDeveloper">
2+
// Copyright (c) 2025-2026 MPCoreDeveloper and GitHub Copilot. All rights reserved.
3+
// Licensed under the MIT License. See LICENSE file in the project root for full license information.
4+
// </copyright>
5+
6+
namespace SharpCoreDB.TimeSeries;
7+
8+
using System;
9+
using System.Buffers;
10+
using System.IO;
11+
12+
/// <summary>
13+
/// Delta-of-delta timestamp compression codec.
14+
/// Based on Facebook's Gorilla paper (2015).
15+
/// C# 14: Modern patterns, aggressive optimization.
16+
///
17+
/// ✅ SCDB Phase 8.1: Time-Series Compression
18+
///
19+
/// Algorithm:
20+
/// - First value: stored as-is (64 bits)
21+
/// - First delta: stored as-is (64 bits with sign)
22+
/// - Subsequent values: store delta-of-delta using variable-length encoding:
23+
/// - DoD = 0: 1 bit ('0')
24+
/// - DoD in [-63, 64]: 2 + 7 bits ('10' + 7-bit value)
25+
/// - DoD in [-255, 256]: 3 + 9 bits ('110' + 9-bit value)
26+
/// - DoD in [-2047, 2048]: 4 + 12 bits ('1110' + 12-bit value)
27+
/// - Otherwise: 5 + 32 bits ('1111' + 32-bit value)
28+
///
29+
/// Performance:
30+
/// - Uniform intervals: ~1 bit per timestamp (16-64x compression)
31+
/// - Near-uniform: ~2-8 bits per timestamp (8-32x compression)
32+
/// - Random: ~32-40 bits per timestamp (1.6-2x compression)
33+
/// </summary>
34+
public sealed class DeltaOfDeltaCodec
35+
{
36+
/// <summary>
37+
/// Compresses an array of timestamps using delta-of-delta encoding.
38+
/// </summary>
39+
/// <param name="timestamps">Sorted timestamps to compress (Unix milliseconds or ticks).</param>
40+
/// <returns>Compressed byte array.</returns>
41+
public byte[] Compress(ReadOnlySpan<long> timestamps)
42+
{
43+
if (timestamps.IsEmpty)
44+
return [];
45+
46+
if (timestamps.Length == 1)
47+
{
48+
// Single timestamp: just store it
49+
var result = new byte[sizeof(long)];
50+
BitConverter.TryWriteBytes(result, timestamps[0]);
51+
return result;
52+
}
53+
54+
// Estimate output size (worst case: ~5 bytes per timestamp)
55+
using var stream = new MemoryStream(timestamps.Length * 5);
56+
using var writer = new BitWriter(stream);
57+
58+
// Write first timestamp (64 bits)
59+
writer.WriteBits((ulong)timestamps[0], 64);
60+
61+
// Write first delta (64 bits with sign bit)
62+
long firstDelta = timestamps[1] - timestamps[0];
63+
writer.WriteBits((ulong)firstDelta, 64);
64+
65+
// Write subsequent deltas-of-deltas
66+
long prevDelta = firstDelta;
67+
68+
for (int i = 2; i < timestamps.Length; i++)
69+
{
70+
long delta = timestamps[i] - timestamps[i - 1];
71+
long deltaOfDelta = delta - prevDelta;
72+
73+
WriteDoD(writer, deltaOfDelta);
74+
75+
prevDelta = delta;
76+
}
77+
78+
writer.Flush();
79+
return stream.ToArray();
80+
}
81+
82+
/// <summary>
83+
/// Decompresses delta-of-delta encoded timestamps.
84+
/// </summary>
85+
/// <param name="compressed">Compressed byte array.</param>
86+
/// <param name="count">Number of timestamps to decompress.</param>
87+
/// <returns>Decompressed timestamps.</returns>
88+
public long[] Decompress(ReadOnlySpan<byte> compressed, int count)
89+
{
90+
if (count <= 0)
91+
return [];
92+
93+
if (count == 1)
94+
{
95+
// Single timestamp
96+
return [BitConverter.ToInt64(compressed)];
97+
}
98+
99+
var result = new long[count];
100+
using var stream = new MemoryStream(compressed.ToArray());
101+
using var reader = new BitReader(stream);
102+
103+
// Read first timestamp
104+
result[0] = (long)reader.ReadBits(64);
105+
106+
if (count == 1)
107+
return result;
108+
109+
// Read first delta
110+
long firstDelta = (long)reader.ReadBits(64);
111+
result[1] = result[0] + firstDelta;
112+
113+
// Read subsequent deltas-of-deltas
114+
long prevDelta = firstDelta;
115+
116+
for (int i = 2; i < count; i++)
117+
{
118+
long deltaOfDelta = ReadDoD(reader);
119+
long delta = prevDelta + deltaOfDelta;
120+
result[i] = result[i - 1] + delta;
121+
122+
prevDelta = delta;
123+
}
124+
125+
return result;
126+
}
127+
128+
// Private helper methods
129+
130+
private static void WriteDoD(BitWriter writer, long dod)
131+
{
132+
if (dod == 0)
133+
{
134+
// DoD = 0: write '0' (1 bit)
135+
writer.WriteBit(false);
136+
}
137+
else if (dod >= -63 && dod <= 64)
138+
{
139+
// DoD in [-63, 64]: write '10' + 7 bits
140+
writer.WriteBit(true);
141+
writer.WriteBit(false);
142+
writer.WriteBits((ulong)(dod & 0x7F), 7);
143+
}
144+
else if (dod >= -255 && dod <= 256)
145+
{
146+
// DoD in [-255, 256]: write '110' + 9 bits
147+
writer.WriteBit(true);
148+
writer.WriteBit(true);
149+
writer.WriteBit(false);
150+
writer.WriteBits((ulong)(dod & 0x1FF), 9);
151+
}
152+
else if (dod >= -2047 && dod <= 2048)
153+
{
154+
// DoD in [-2047, 2048]: write '1110' + 12 bits
155+
writer.WriteBit(true);
156+
writer.WriteBit(true);
157+
writer.WriteBit(true);
158+
writer.WriteBit(false);
159+
writer.WriteBits((ulong)(dod & 0xFFF), 12);
160+
}
161+
else
162+
{
163+
// Otherwise: write '1111' + 32 bits
164+
writer.WriteBit(true);
165+
writer.WriteBit(true);
166+
writer.WriteBit(true);
167+
writer.WriteBit(true);
168+
writer.WriteBits((ulong)((int)dod), 32);
169+
}
170+
}
171+
172+
private static long ReadDoD(BitReader reader)
173+
{
174+
if (!reader.ReadBit())
175+
{
176+
// '0' -> DoD = 0
177+
return 0;
178+
}
179+
180+
if (!reader.ReadBit())
181+
{
182+
// '10' -> read 7 bits
183+
long value = (long)reader.ReadBits(7);
184+
// Sign-extend from 7 bits
185+
if ((value & 0x40) != 0)
186+
value |= unchecked((long)0xFFFFFFFFFFFFFF80);
187+
return value;
188+
}
189+
190+
if (!reader.ReadBit())
191+
{
192+
// '110' -> read 9 bits
193+
long value = (long)reader.ReadBits(9);
194+
// Sign-extend from 9 bits
195+
if ((value & 0x100) != 0)
196+
value |= unchecked((long)0xFFFFFFFFFFFFFE00);
197+
return value;
198+
}
199+
200+
if (!reader.ReadBit())
201+
{
202+
// '1110' -> read 12 bits
203+
long value = (long)reader.ReadBits(12);
204+
// Sign-extend from 12 bits
205+
if ((value & 0x800) != 0)
206+
value |= unchecked((long)0xFFFFFFFFFFFFF000);
207+
return value;
208+
}
209+
210+
// '1111' -> read 32 bits
211+
return (int)reader.ReadBits(32);
212+
}
213+
}
214+
215+
/// <summary>
216+
/// Bit-level writer for compression.
217+
/// </summary>
218+
internal sealed class BitWriter : IDisposable
219+
{
220+
private readonly Stream _stream;
221+
private byte _currentByte;
222+
private int _bitPosition;
223+
224+
public BitWriter(Stream stream)
225+
{
226+
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
227+
}
228+
229+
public void WriteBit(bool bit)
230+
{
231+
if (bit)
232+
{
233+
_currentByte |= (byte)(1 << (7 - _bitPosition));
234+
}
235+
236+
_bitPosition++;
237+
238+
if (_bitPosition == 8)
239+
{
240+
_stream.WriteByte(_currentByte);
241+
_currentByte = 0;
242+
_bitPosition = 0;
243+
}
244+
}
245+
246+
public void WriteBits(ulong value, int bits)
247+
{
248+
for (int i = bits - 1; i >= 0; i--)
249+
{
250+
WriteBit(((value >> i) & 1) != 0);
251+
}
252+
}
253+
254+
public void Flush()
255+
{
256+
if (_bitPosition > 0)
257+
{
258+
_stream.WriteByte(_currentByte);
259+
_currentByte = 0;
260+
_bitPosition = 0;
261+
}
262+
}
263+
264+
public void Dispose()
265+
{
266+
Flush();
267+
}
268+
}
269+
270+
/// <summary>
271+
/// Bit-level reader for decompression.
272+
/// </summary>
273+
internal sealed class BitReader : IDisposable
274+
{
275+
private readonly Stream _stream;
276+
private byte _currentByte;
277+
private int _bitPosition = 8;
278+
279+
public BitReader(Stream stream)
280+
{
281+
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
282+
}
283+
284+
public bool ReadBit()
285+
{
286+
if (_bitPosition == 8)
287+
{
288+
int b = _stream.ReadByte();
289+
if (b == -1)
290+
throw new EndOfStreamException();
291+
292+
_currentByte = (byte)b;
293+
_bitPosition = 0;
294+
}
295+
296+
bool bit = ((_currentByte >> (7 - _bitPosition)) & 1) != 0;
297+
_bitPosition++;
298+
return bit;
299+
}
300+
301+
public ulong ReadBits(int bits)
302+
{
303+
ulong value = 0;
304+
305+
for (int i = 0; i < bits; i++)
306+
{
307+
value = (value << 1) | (ReadBit() ? 1UL : 0UL);
308+
}
309+
310+
return value;
311+
}
312+
313+
public void Dispose()
314+
{
315+
// Nothing to dispose
316+
}
317+
}

0 commit comments

Comments
 (0)