Skip to content

Commit e3f9c49

Browse files
vkuttypclaude
andcommitted
release: 2.5.3 — fix composite UNIQUE indexes
Executor's index-write / unique-check / index-delete paths all read idx.Columns[0] only. Multi-column UNIQUE indexes silently degraded to "unique on first column", falsely rejecting any second row with the same first-column value regardless of the rest of the tuple. Found via a Postgres → CosmoKvD dry-run on a 200k-row mail database: UNIQUE (mailbox_id, uid) on the messages table rejected the second row with mailbox_id=1 even though Postgres had a million distinct (mailbox_id, uid) tuples with no duplicates. Fix encodes ALL indexed columns as a tuple (each value carries its own FoundationDB-style 00 00 terminator so the sequence is byte- comparable in the same order as SQL row comparison). NULL handling follows SQL semantics: any NULL anywhere in the tuple disables the uniqueness check, since NULL != NULL even for compound keys. Single- column WHERE prefix scans against composite indexes keep working because the new encoding starts with enc(first_column). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e361dbf commit e3f9c49

4 files changed

Lines changed: 172 additions & 16 deletions

File tree

Directory.Build.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@
77
<Authors>vkuttyp</Authors>
88
<PackageLicenseExpression>MIT</PackageLicenseExpression>
99
<RepositoryUrl>https://github.com/vkuttyp/CosmoSQLClient-Dotnet</RepositoryUrl>
10-
<Version>2.5.2</Version>
10+
<Version>2.5.3</Version>
1111
</PropertyGroup>
1212
</Project>

src/CosmoSQLClient.CosmoKv/Execution/Executor.cs

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -172,17 +172,33 @@ private async Task<int> ExecuteCreateIndex(CreateIndexStatement s, CancellationT
172172
await _catalog.CreateIndexAsync(index, ct);
173173

174174
// Backfill existing rows so the new index covers data already present.
175-
// Single-column-only in Phase 4; the multi-col schema field is reserved.
176-
int colIdx = table.IndexOf(index.Columns[0]);
175+
var colIdxs = ResolveIndexColumns(table, index);
177176
await foreach (var (row, rowId) in ScanRowsWithIdAsync(table, ct))
178177
{
179-
var enc = IndexKeyCodec.Encode(row[colIdx]);
178+
var enc = IndexKeyCodec.Encode(ProjectIndexedValues(row, colIdxs));
180179
var key = KvKeys.IndexEntry(table.Name, index.Name, enc, rowId);
181180
await _kv.SetAsync(key, Array.Empty<byte>(), ct);
182181
}
183182
return 0;
184183
}
185184

185+
/// <summary>Map each column name in <paramref name="idx"/> to its position in the table.</summary>
186+
private static int[] ResolveIndexColumns(TableSchema table, IndexSchema idx)
187+
{
188+
var result = new int[idx.Columns.Count];
189+
for (int i = 0; i < idx.Columns.Count; i++)
190+
result[i] = table.IndexOf(idx.Columns[i]);
191+
return result;
192+
}
193+
194+
/// <summary>Project a row's indexed columns into a SqlValue tuple.</summary>
195+
private static SqlValue[] ProjectIndexedValues(SqlValue[] row, int[] colIdxs)
196+
{
197+
var vals = new SqlValue[colIdxs.Length];
198+
for (int i = 0; i < colIdxs.Length; i++) vals[i] = row[colIdxs[i]];
199+
return vals;
200+
}
201+
186202
// ── DROP ────────────────────────────────────────────────────────────────
187203

188204
private async Task<int> ExecuteDropTable(DropTableStatement s, CancellationToken ct)
@@ -399,31 +415,36 @@ private async Task WriteIndexEntriesAsync(
399415
{
400416
foreach (var idx in _catalog.IndexesFor(table.Name))
401417
{
402-
int colIdx = table.IndexOf(idx.Columns[0]);
403-
var val = row[colIdx];
418+
var colIdxs = ResolveIndexColumns(table, idx);
419+
var vals = ProjectIndexedValues(row, colIdxs);
404420
// UNIQUE check before writing the new entry. NULL is treated
405421
// per SQL semantics: NULL <> NULL, so multiple NULL rows in a
406-
// UNIQUE column are allowed.
407-
if (idx.IsUnique && !val.IsNull)
422+
// UNIQUE column are allowed. For composite indexes, the row is
423+
// considered unique-violating only if EVERY indexed column has
424+
// a non-NULL value (any NULL anywhere preserves SQL's NULL-is-
425+
// not-comparable semantics).
426+
if (idx.IsUnique && !vals.Any(v => v.IsNull))
408427
{
409-
await EnforceUniqueAsync(table, idx, val, excludeRowId: rowId, ct);
428+
await EnforceUniqueAsync(table, idx, vals, excludeRowId: rowId, ct);
410429
}
411-
var enc = IndexKeyCodec.Encode(val);
430+
var enc = IndexKeyCodec.Encode(vals);
412431
var key = KvKeys.IndexEntry(table.Name, idx.Name, enc, rowId);
413432
await _kv.SetAsync(key, Array.Empty<byte>(), ct);
414433
}
415434
}
416435

417436
/// <summary>
418437
/// Scan the unique index for any existing entry with the same encoded
419-
/// value as <paramref name="val"/>, excluding the row that owns the
438+
/// tuple as <paramref name="vals"/>, excluding the row that owns the
420439
/// pending write (so UPDATE on the same key isn't a self-conflict).
421440
/// Throws <see cref="UniqueConstraintViolationException"/> on match.
441+
/// Tuple-aware: composite UNIQUE indexes match all columns, not just
442+
/// the first.
422443
/// </summary>
423444
private async Task EnforceUniqueAsync(
424-
TableSchema table, IndexSchema idx, SqlValue val, long excludeRowId, CancellationToken ct)
445+
TableSchema table, IndexSchema idx, SqlValue[] vals, long excludeRowId, CancellationToken ct)
425446
{
426-
var encWithTerm = IndexKeyCodec.Encode(val);
447+
var encWithTerm = IndexKeyCodec.Encode(vals);
427448
var prefix = KvKeys.IndexPrefixBytes(idx.TableName, idx.Name);
428449
var fullPrefix = new byte[prefix.Length + encWithTerm.Length];
429450
Buffer.BlockCopy(prefix, 0, fullPrefix, 0, prefix.Length);
@@ -434,8 +455,11 @@ private async Task EnforceUniqueAsync(
434455
{
435456
long otherRowId = KvKeys.ExtractRowId(item.Key);
436457
if (otherRowId == excludeRowId) continue;
458+
var formatted = vals.Length == 1
459+
? vals[0].ToString() ?? "NULL"
460+
: "(" + string.Join(", ", vals.Select(v => v.ToString() ?? "NULL")) + ")";
437461
throw new UniqueConstraintViolationException(
438-
$"Duplicate value for UNIQUE column on index '{idx.Name}': {val}.");
462+
$"Duplicate value for UNIQUE column on index '{idx.Name}': {formatted}.");
439463
}
440464
}
441465

@@ -444,8 +468,8 @@ private async Task DeleteIndexEntriesAsync(
444468
{
445469
foreach (var idx in _catalog.IndexesFor(table.Name))
446470
{
447-
int colIdx = table.IndexOf(idx.Columns[0]);
448-
var enc = IndexKeyCodec.Encode(row[colIdx]);
471+
var colIdxs = ResolveIndexColumns(table, idx);
472+
var enc = IndexKeyCodec.Encode(ProjectIndexedValues(row, colIdxs));
449473
var key = KvKeys.IndexEntry(table.Name, idx.Name, enc, rowId);
450474
await _kv.DeleteAsync(key, ct);
451475
}

src/CosmoSQLClient.CosmoKv/Storage/IndexKeyCodec.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,25 @@ public static byte[] Encode(SqlValue v)
5151
return ms.ToArray();
5252
}
5353

54+
/// <summary>
55+
/// Encode a tuple of values, in order, with terminators between them.
56+
/// Used for composite indexes — `(a, b, c)` encodes as `enc(a) enc(b)
57+
/// enc(c)`, where each `enc()` already includes its own terminator, so
58+
/// the whole tuple stays byte-comparable in the same lexicographic
59+
/// order as SQL row comparison `(a, b, c) < (a', b', c')`.
60+
/// </summary>
61+
public static byte[] Encode(IReadOnlyList<SqlValue> vals)
62+
{
63+
if (vals.Count == 1) return Encode(vals[0]);
64+
var ms = new MemoryStream(capacity: 16 * vals.Count);
65+
foreach (var v in vals)
66+
{
67+
byte[] part = Encode(v);
68+
ms.Write(part, 0, part.Length);
69+
}
70+
return ms.ToArray();
71+
}
72+
5473
/// <summary>
5574
/// Encode without the terminator suffix — used for range-scan seek
5675
/// targets where the boundary should be exclusive of the value's
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
using CosmoSQLClient.Core;
2+
using CosmoSQLClient.CosmoKv;
3+
4+
namespace CosmoSQLClient.CosmoKv.Tests;
5+
6+
/// <summary>
7+
/// v2.5.1 — composite UNIQUE index correctness regression. Pre-fix, the
8+
/// executor encoded only the first column of a multi-column index for
9+
/// both writes and the uniqueness check, so a UNIQUE (a, b) silently
10+
/// degraded to UNIQUE (a). A second row with same a but different b
11+
/// was falsely rejected. Found via the marivil → cosmokvd dry-run on
12+
/// the messages table's UNIQUE (mailbox_id, uid).
13+
/// </summary>
14+
public class Phase14CompositeUniqueTests : IAsyncLifetime
15+
{
16+
private readonly string _dir = Path.Combine(
17+
Path.GetTempPath(),
18+
"cosmosql-cosmokv-p14-" + Guid.NewGuid().ToString("N"));
19+
private CosmoKvConnection? _conn;
20+
21+
public async Task InitializeAsync()
22+
{
23+
_conn = await CosmoKvConnection.OpenAsync(
24+
new CosmoKvConfiguration { DataSource = _dir });
25+
await _conn.ExecuteAsync("""
26+
CREATE TABLE Messages (
27+
Id BIGINT IDENTITY PRIMARY KEY,
28+
MailboxId BIGINT NOT NULL,
29+
Uid BIGINT NOT NULL,
30+
Subject NVARCHAR(64) NOT NULL)
31+
""");
32+
await _conn.ExecuteAsync(
33+
"CREATE UNIQUE INDEX ix_msg_mbox_uid ON Messages(MailboxId, Uid)");
34+
}
35+
36+
public async Task DisposeAsync()
37+
{
38+
if (_conn is not null) await _conn.DisposeAsync();
39+
try { if (Directory.Exists(_dir)) Directory.Delete(_dir, recursive: true); } catch { }
40+
}
41+
42+
[Fact]
43+
public async Task SameFirstColumn_DifferentSecond_AreNotDuplicates()
44+
{
45+
// Both rows have MailboxId=1; only Uid differs. Pre-fix this
46+
// threw "duplicate value for UNIQUE column on index 'ix_msg_mbox_uid': 1".
47+
await _conn!.ExecuteAsync(
48+
"INSERT INTO Messages (MailboxId, Uid, Subject) VALUES (1, 1, 'a')");
49+
await _conn.ExecuteAsync(
50+
"INSERT INTO Messages (MailboxId, Uid, Subject) VALUES (1, 2, 'b')");
51+
await _conn.ExecuteAsync(
52+
"INSERT INTO Messages (MailboxId, Uid, Subject) VALUES (1, 3, 'c')");
53+
54+
var rows = await _conn.QueryAsync(
55+
"SELECT COUNT(*) AS n FROM Messages WHERE MailboxId = @m",
56+
new[] { SqlParameter.Named("@m", SqlValue.From(1L)) });
57+
Assert.Equal(3L, rows[0]["n"].AsInt());
58+
}
59+
60+
[Fact]
61+
public async Task ExactDuplicateTuple_IsRejected()
62+
{
63+
await _conn!.ExecuteAsync(
64+
"INSERT INTO Messages (MailboxId, Uid, Subject) VALUES (1, 1, 'first')");
65+
var ex = await Assert.ThrowsAsync<UniqueConstraintViolationException>(() =>
66+
_conn.ExecuteAsync(
67+
"INSERT INTO Messages (MailboxId, Uid, Subject) VALUES (1, 1, 'dup')"));
68+
Assert.Contains("ix_msg_mbox_uid", ex.Message);
69+
}
70+
71+
[Fact]
72+
public async Task SingleColumnPrefixScan_StillFindsCompositeEntries()
73+
{
74+
// The planner uses idx.Columns[0] for a single-col WHERE; the
75+
// tuple-encoded entry must still be in [enc(first), nextlex(enc(first))).
76+
// 6 rows across 3 different MailboxId values.
77+
for (long m = 1; m <= 3; m++)
78+
for (long u = 1; u <= 2; u++)
79+
await _conn!.ExecuteAsync(
80+
"INSERT INTO Messages (MailboxId, Uid, Subject) VALUES (@m, @u, 's')",
81+
new[]
82+
{
83+
SqlParameter.Named("@m", SqlValue.From(m)),
84+
SqlParameter.Named("@u", SqlValue.From(u)),
85+
});
86+
87+
var rows = await _conn!.QueryAsync(
88+
"SELECT COUNT(*) AS n FROM Messages WHERE MailboxId = @m",
89+
new[] { SqlParameter.Named("@m", SqlValue.From(2L)) });
90+
Assert.Equal(2L, rows[0]["n"].AsInt());
91+
}
92+
93+
[Fact]
94+
public async Task NullInComposite_DoesNotEnforceUniqueness()
95+
{
96+
// SQL: NULL <> NULL even when comparing tuples. Two rows with
97+
// any NULL in the indexed columns must both be allowed.
98+
await _conn!.ExecuteAsync("""
99+
CREATE TABLE Tags (
100+
Id BIGINT IDENTITY PRIMARY KEY,
101+
A BIGINT NULL,
102+
B NVARCHAR(64) NULL)
103+
""");
104+
await _conn.ExecuteAsync("CREATE UNIQUE INDEX ix_tags_a_b ON Tags(A, B)");
105+
await _conn.ExecuteAsync("INSERT INTO Tags (A, B) VALUES (NULL, 'x')");
106+
await _conn.ExecuteAsync("INSERT INTO Tags (A, B) VALUES (NULL, 'x')");
107+
await _conn.ExecuteAsync("INSERT INTO Tags (A, B) VALUES (1, NULL)");
108+
await _conn.ExecuteAsync("INSERT INTO Tags (A, B) VALUES (1, NULL)");
109+
110+
var rows = await _conn.QueryAsync("SELECT COUNT(*) AS n FROM Tags");
111+
Assert.Equal(4L, rows[0]["n"].AsInt());
112+
}
113+
}

0 commit comments

Comments
 (0)