Skip to content

Commit bf63dc3

Browse files
vkuttypclaude
andcommitted
v2.5.13: bump CosmoKv 2.1.9 → 3.2.0, add encryption + admin surface
Brings the driver onto CosmoKv's audited 3.2 release line and exposes the v2.2 / v3.0 / v3.1 / v3.2 features the previous pin couldn't reach. CosmoSQLClient.CosmoKv - csproj: CosmoKv 2.1.9 → 3.2.0. - CosmoKvConfiguration: new EncryptionKey (byte[]?) opens or creates the store as an AES-256-GCM encrypted database; length validated by CosmoKv's own DbOptions check (32 bytes). New AllowPlaintextBackup (bool, default false) is the explicit opt-in before a plaintext BackupAsync is allowed against an encrypted DB. Connection-string parser accepts EncryptionKey=<base64> and AllowPlaintextBackup=true; programmatic construction is preferred for key material — connection strings are commonly logged. - CosmoKvConnection.OpenAsync now builds the full DbOptions (was DbOptions.Default(path)), threading EncryptionKey + AllowPlaintextBackup through. - New BackupAsync(Stream, byte[]? backupKey, CT) overload — non-null key produces an AEAD-encrypted COSMOBAK stream; null requires AllowPlaintextBackup on encrypted sources. - New RunValueLogGcAsync(CT) wrapper exposing vlog space reclaim without reaching for RawDb. - Storage/Catalog.cs: documented Item.ReadValueAsync lifetime at the two HydrateAsync scan loops — CosmoKv v3.1 made the iterator-only validity an explicit invariant; the call sites are already safe, but a future refactor that caches Item past the loop body would break. CosmoSQLClient.CosmoKv.Cli - .backup FILENAME writes a COSMOBAK snapshot (refuses to overwrite). - .vacuum runs RunValueLogGcAsync and reports files reclaimed. - .help + README updated. Opening an encrypted store from the shell works today via the connection-string EncryptionKey=<base64> form. Tests - +3 connection tests: COSMOBAK magic on BackupAsync, no-throw zero return on RunValueLogGcAsync against an empty store, base64-decoded EncryptionKey + AllowPlaintextBackup round-trip through Parse. - 276/276 driver pass; 6/6 Http and 10/10 Pipes pass against the transitive bump. Migration note: encrypted databases written by the previous driver (CosmoKv 2.x) are NOT readable here. CosmoKv v3.0 changed the WAL/vlog AEAD AAD scheme from fileId to (fileId, frameOffset) to defeat intra-file frame-relocation attacks. To migrate, take a backup with the previous driver, then restore into a fresh v3.x store. Plaintext databases upgrade transparently. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 06b2c8e commit bf63dc3

8 files changed

Lines changed: 165 additions & 6 deletions

File tree

Directory.Build.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@
77
<Authors>vkuttyp</Authors>
88
<PackageLicenseExpression>MIT</PackageLicenseExpression>
99
<RepositoryUrl>https://github.com/vkuttyp/CosmoSQLClient-Dotnet</RepositoryUrl>
10-
<Version>2.5.12</Version>
10+
<Version>2.5.13</Version>
1111
</PropertyGroup>
1212
</Project>

src/CosmoSQLClient.CosmoKv.Cli/Cli.cs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ .tables List user tables
286286
.schema [TABLE] CREATE TABLE script (all if omitted)
287287
.indexes [TABLE] List indexes (all if omitted)
288288
.dump Schema + INSERTs for the whole database
289+
.backup FILENAME Write a COSMOBAK snapshot to FILENAME
290+
.vacuum Run value-log GC to reclaim space
289291
.format FMT Switch output (table|csv|tsv|json)
290292
.quit / .exit Leave the shell
291293
""");
@@ -332,6 +334,18 @@ .format FMT Switch output (table|csv|tsv|json)
332334
await DumpAsync();
333335
return true;
334336

337+
case ".backup":
338+
if (arg is null) { await Console.Error.WriteLineAsync("usage: .backup FILENAME"); return true; }
339+
await BackupAsync(arg);
340+
return true;
341+
342+
case ".vacuum":
343+
{
344+
int dropped = await _conn.RunValueLogGcAsync();
345+
Console.WriteLine($"vacuum: {dropped} vlog file(s) reclaimed");
346+
}
347+
return true;
348+
335349
case ".format":
336350
if (arg is null) Console.WriteLine($"format: {Format.ToString().ToLowerInvariant()}");
337351
else
@@ -347,6 +361,15 @@ .format FMT Switch output (table|csv|tsv|json)
347361
}
348362
}
349363

364+
private async Task BackupAsync(string path)
365+
{
366+
// FileMode.CreateNew so a typo on the destination doesn't silently
367+
// overwrite a prior backup. Users can rm + retry if they meant it.
368+
await using var fs = new FileStream(path, FileMode.CreateNew, FileAccess.Write, FileShare.None);
369+
long bytes = await _conn.BackupAsync(fs);
370+
Console.WriteLine($"backup: {bytes:n0} bytes written to {path}");
371+
}
372+
350373
private async Task DumpAsync()
351374
{
352375
Console.WriteLine("-- cosmokv .dump");

src/CosmoSQLClient.CosmoKv.Cli/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ Dot-commands work only at the start of a fresh line (not mid-statement) and cons
9292
| `.schema [TABLE]` | `CREATE TABLE` script for `TABLE` (or every table if omitted). Round-trips through the parser, so you can replay it. |
9393
| `.indexes [TABLE]` | `CREATE INDEX` scripts for `TABLE` (or every table). |
9494
| `.dump` | Schema + `INSERT` statements for the entire database — like `sqlite3`'s `.dump`. Safe to replay against an empty CosmoKv. |
95+
| `.backup FILENAME` | Write a COSMOBAK snapshot to `FILENAME`. MVCC — writes continue uninterrupted. Refuses to overwrite an existing file. |
96+
| `.vacuum` | Run value-log GC to reclaim space from tombstoned/overwritten large values. Prints the number of vlog files dropped. |
9597
| `.format FMT` | Switch the output format mid-session. |
9698
| `.quit` / `.exit` / `Ctrl-D` | Leave the shell. |
9799

@@ -121,7 +123,9 @@ cosmokv ./mydb - < migrations/001_schema.sql
121123

122124
- Statement splitting is lexical only — it understands single-quoted strings but not block comments or T-SQL `GO`. A trailing `;` ends the current statement.
123125
- No `readline`-style history or autocomplete; if you want those, wrap the CLI with `rlwrap`.
124-
- `.dump` rewrites every row as a fully-qualified `INSERT`. For tables with millions of rows, prefer `BackupAsync` (in code) to a COSMOBAK snapshot.
126+
- `.dump` rewrites every row as a fully-qualified `INSERT`. For tables with millions of rows, prefer `.backup FILENAME` — it writes a COSMOBAK snapshot directly without the parse/encode round-trip.
127+
- `.backup` only writes plaintext snapshots. To re-encrypt with a different key, use `BackupAsync(stream, backupKey)` in code.
128+
- Opening an encrypted store from the shell requires a connection string with `EncryptionKey=<base64>` (32-byte AES-256 key, base64-encoded). Bare-path `.open ./mydb` opens unencrypted.
125129

126130
## Related
127131

src/CosmoSQLClient.CosmoKv/CosmoKvConfiguration.cs

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,32 @@ public sealed record CosmoKvConfiguration
2121
/// </summary>
2222
public bool CreateIfMissing { get; init; } = true;
2323

24+
/// <summary>
25+
/// 32-byte master key for AES-256-GCM encryption at rest. When set, the
26+
/// CosmoKv directory is opened (or created) as an encrypted store: every
27+
/// SST block, WAL frame, and vlog frame is AEAD-encrypted. The key never
28+
/// touches disk — only the wrapped DEK in <c>KEYREGISTRY</c> does.
29+
/// <para>
30+
/// Prefer programmatic construction over <see cref="Parse"/> for this
31+
/// field: connection strings are commonly logged, and the parser accepts
32+
/// base64 only as a convenience.
33+
/// </para>
34+
/// <para>
35+
/// CosmoKv v3.0 changed the AEAD AAD scheme; encrypted databases written
36+
/// by CosmoKv v2.x are <b>not</b> readable here. Reseed from a backup
37+
/// taken with the old driver before upgrading.
38+
/// </para>
39+
/// </summary>
40+
public byte[]? EncryptionKey { get; init; }
41+
42+
/// <summary>
43+
/// When <c>true</c>, <c>BackupAsync(stream, backupKey: null)</c> on an
44+
/// encrypted database is allowed to emit plaintext bytes. Default
45+
/// <c>false</c> — encrypted DBs must supply a backup key. Set to true
46+
/// only for explicit export-to-plaintext workflows.
47+
/// </summary>
48+
public bool AllowPlaintextBackup { get; init; }
49+
2450
public string ConnectionString => $"Data Source={DataSource};CreateIfMissing={CreateIfMissing}";
2551

2652
/// <summary>
@@ -55,10 +81,28 @@ public static CosmoKvConfiguration Parse(string connectionString)
5581
if (Get("CreateIfMissing", "Create") is string s && bool.TryParse(s, out var b))
5682
create = b;
5783

84+
byte[]? key = null;
85+
if (Get("EncryptionKey", "Key") is string keyStr && keyStr.Length > 0)
86+
{
87+
try { key = Convert.FromBase64String(keyStr); }
88+
catch (FormatException fe)
89+
{
90+
throw new ArgumentException(
91+
"EncryptionKey must be base64-encoded 32-byte AES-256 key material.",
92+
nameof(connectionString), fe);
93+
}
94+
}
95+
96+
bool allowPlaintextBackup = false;
97+
if (Get("AllowPlaintextBackup") is string apb && bool.TryParse(apb, out var apbB))
98+
allowPlaintextBackup = apbB;
99+
58100
return new CosmoKvConfiguration
59101
{
60-
DataSource = dataSource,
61-
CreateIfMissing = create,
102+
DataSource = dataSource,
103+
CreateIfMissing = create,
104+
EncryptionKey = key,
105+
AllowPlaintextBackup = allowPlaintextBackup,
62106
};
63107
}
64108
}

src/CosmoSQLClient.CosmoKv/CosmoKvConnection.cs

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,46 @@ public bool IdentityInsertEnabled
8585
/// Online backup of the underlying CosmoKv store. Writes a COSMOBAK
8686
/// snapshot to <paramref name="output"/> using MVCC, so writes
8787
/// continue uninterrupted. Returns the number of bytes written.
88+
/// <para>
89+
/// On encrypted databases this overload emits plaintext bytes and
90+
/// therefore requires <see cref="CosmoKvConfiguration.AllowPlaintextBackup"/>
91+
/// to be set; otherwise CosmoKv throws. Use
92+
/// <see cref="BackupAsync(Stream, byte[], CancellationToken)"/> to
93+
/// re-encrypt the backup with a different key.
94+
/// </para>
8895
/// </summary>
8996
public Task<long> BackupAsync(Stream output, CancellationToken ct = default)
9097
{
9198
EnsureOpen();
9299
return _db!.BackupAsync(output, ct);
93100
}
94101

102+
/// <summary>
103+
/// Online backup with an explicit backup key. When non-null, the COSMOBAK
104+
/// stream is AEAD-encrypted with <paramref name="backupKey"/> (32 bytes,
105+
/// AES-256-GCM) — independent of the source database's master key, so
106+
/// restore can rewrap with a fresh key. Pass <c>null</c> for plaintext
107+
/// (requires <see cref="CosmoKvConfiguration.AllowPlaintextBackup"/> on
108+
/// encrypted sources).
109+
/// </summary>
110+
public Task<long> BackupAsync(Stream output, byte[]? backupKey, CancellationToken ct = default)
111+
{
112+
EnsureOpen();
113+
return _db!.BackupAsync(output, backupKey, ct);
114+
}
115+
116+
/// <summary>
117+
/// Reclaim space in the value log by rewriting live entries out of
118+
/// vlog files whose tombstone density crosses CosmoKv's GC threshold.
119+
/// Safe to call concurrently with reads and writes; runs at most one
120+
/// GC pass at a time. Returns the number of vlog files dropped.
121+
/// </summary>
122+
public Task<int> RunValueLogGcAsync(CancellationToken ct = default)
123+
{
124+
EnsureOpen();
125+
return _db!.RunValueLogGcAsync(ct);
126+
}
127+
95128
/// <summary>
96129
/// Pass-through to <see cref="CosmoKv.Db.GetStats"/>. Useful for
97130
/// admin/health surfaces (LSM level fan-out, table counts, optional
@@ -222,7 +255,14 @@ public static async Task<CosmoKvConnection> OpenAsync(
222255
throw new DirectoryNotFoundException(
223256
$"Data directory '{config.DataSource}' does not exist and CreateIfMissing=false.");
224257

225-
var db = await CosmoKvDb.OpenAsync(CosmoKvDbOptions.Default(config.DataSource));
258+
var dbOptions = CosmoKvDbOptions.Default(config.DataSource) with
259+
{
260+
// Length is validated by DbOptions (32 bytes for AES-256-GCM);
261+
// surfacing the throw here would just duplicate that check.
262+
EncryptionKey = config.EncryptionKey,
263+
AllowPlaintextBackup = config.AllowPlaintextBackup,
264+
};
265+
var db = await CosmoKvDb.OpenAsync(dbOptions);
226266
ct.ThrowIfCancellationRequested();
227267
var catalog = await Catalog.LoadAsync(db, ct);
228268
return new CosmoKvConnection(config, db, catalog);

src/CosmoSQLClient.CosmoKv/CosmoSQLClient.CosmoKv.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
</ItemGroup>
66

77
<ItemGroup>
8-
<PackageReference Include="CosmoKv" Version="2.1.9" />
8+
<PackageReference Include="CosmoKv" Version="3.2.0" />
99
<!-- Microsoft.VisualStudio.Threading.AsyncReaderWriterLock — battle-
1010
tested writer-priority RW lock for the CosmoKvConnection
1111
hot path. The v2.5.7 attempt at rolling our own failed under

src/CosmoSQLClient.CosmoKv/Storage/Catalog.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ private async Task HydrateAsync(CancellationToken ct)
4545
};
4646
await foreach (var item in _db.IterateAsync(iterOpts).WithCancellation(ct))
4747
{
48+
// Resolve the value inside the iterator scope: CosmoKv's Item may
49+
// hold a vlog pointer that is only valid for this iterator's
50+
// lifetime. Never cache `item` past the loop body.
4851
var bytes = await item.ReadValueAsync();
4952
var schema = JsonSerializer.Deserialize<TableSchemaDto>(bytes, Json)!;
5053
var resolved = schema.ToSchema();
@@ -57,6 +60,7 @@ private async Task HydrateAsync(CancellationToken ct)
5760
};
5861
await foreach (var item in _db.IterateAsync(idxOpts).WithCancellation(ct))
5962
{
63+
// Same lifetime contract as above — resolve before iteration advances.
6064
var bytes = await item.ReadValueAsync();
6165
var schema = JsonSerializer.Deserialize<IndexSchema>(bytes, Json)!;
6266
_indexes[schema.Name] = schema;

tests/CosmoSQLClient.CosmoKv.Tests/CosmoKvConnectionTests.cs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,48 @@ public void Configuration_Parse_MissingDataSource_Throws()
9292
{
9393
Assert.Throws<ArgumentException>(() => CosmoKvConfiguration.Parse("CreateIfMissing=true"));
9494
}
95+
96+
[Fact]
97+
public async Task BackupAsync_WritesCosmobakHeader()
98+
{
99+
await using var conn = await CosmoKvConnection.OpenAsync(
100+
new CosmoKvConfiguration { DataSource = _dir });
101+
await conn.ExecuteAsync("CREATE TABLE t (id INT PRIMARY KEY, name NVARCHAR(50))");
102+
await conn.ExecuteAsync("INSERT INTO t (id, name) VALUES (1, 'alice')");
103+
104+
using var ms = new MemoryStream();
105+
long bytes = await conn.BackupAsync(ms);
106+
107+
Assert.True(bytes > 0);
108+
Assert.Equal(bytes, ms.Length);
109+
// Backup stream is CRC64-protected — first 8 bytes are the COSMOBAK magic.
110+
Assert.True(ms.Length >= 8);
111+
var magic = System.Text.Encoding.ASCII.GetString(ms.GetBuffer(), 0, 8);
112+
Assert.Equal("COSMOBAK", magic);
113+
}
114+
115+
[Fact]
116+
public async Task RunValueLogGcAsync_OnEmptyStore_ReturnsZero()
117+
{
118+
await using var conn = await CosmoKvConnection.OpenAsync(
119+
new CosmoKvConfiguration { DataSource = _dir });
120+
int dropped = await conn.RunValueLogGcAsync();
121+
// Empty store has no garbage to reclaim — must not throw, must report 0.
122+
Assert.Equal(0, dropped);
123+
}
124+
125+
[Fact]
126+
public void Configuration_Parse_EncryptionKey_FromBase64()
127+
{
128+
var key = new byte[32];
129+
for (int i = 0; i < 32; i++) key[i] = (byte)i;
130+
var b64 = Convert.ToBase64String(key);
131+
132+
var c = CosmoKvConfiguration.Parse(
133+
$"Data Source=/tmp/x;EncryptionKey={b64};AllowPlaintextBackup=true");
134+
135+
Assert.NotNull(c.EncryptionKey);
136+
Assert.Equal(key, c.EncryptionKey);
137+
Assert.True(c.AllowPlaintextBackup);
138+
}
95139
}

0 commit comments

Comments
 (0)