Skip to content

Commit 8192e09

Browse files
docs+test: document skip_duplicates behavior with secondary unique constraints
Resolves #1049 — on PostgreSQL, skip_duplicates=True already enforces secondary unique constraints (ON CONFLICT (pk) DO NOTHING targets only the primary key). On MySQL, ON DUPLICATE KEY UPDATE catches all unique keys, silently skipping secondary violations too. Changes: - Update insert() docstring to document the backend difference. - Add integration tests covering: PK-only skip, secondary unique violation on PostgreSQL (raises), MySQL silent skip (documented asymmetry), composite unique indexes, batch inserts with mixed duplicates, and tables without secondary unique indexes. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 34acbbe commit 8192e09

File tree

2 files changed

+212
-1
lines changed

2 files changed

+212
-1
lines changed

src/datajoint/table.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,13 @@ def insert(
745745
replace : bool, optional
746746
If True, replaces the existing tuple.
747747
skip_duplicates : bool, optional
748-
If True, silently skip duplicate inserts.
748+
If True, silently skip rows with duplicate primary key values.
749+
On **PostgreSQL**, secondary unique constraint violations still
750+
raise an error even when ``skip_duplicates=True``, because the
751+
generated ``ON CONFLICT (pk) DO NOTHING`` clause targets only
752+
the primary key. On **MySQL**, ``ON DUPLICATE KEY UPDATE``
753+
catches all unique-key conflicts, so secondary unique violations
754+
are also silently skipped.
749755
ignore_extra_fields : bool, optional
750756
If False (default), fields that are not in the heading raise error.
751757
allow_direct_insert : bool, optional
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
"""
2+
Tests for skip_duplicates behavior with secondary unique constraints.
3+
4+
Verifies that skip_duplicates=True on PostgreSQL skips primary key
5+
duplicates while still raising on secondary unique constraint violations.
6+
Resolves #1049.
7+
"""
8+
9+
import time
10+
11+
import pytest
12+
13+
import datajoint as dj
14+
from datajoint.errors import DuplicateError
15+
16+
17+
@pytest.fixture(scope="function")
18+
def schema_by_backend(connection_by_backend, db_creds_by_backend):
19+
"""Create a fresh schema per test, parameterized across backends."""
20+
backend = db_creds_by_backend["backend"]
21+
test_id = str(int(time.time() * 1000))[-8:]
22+
schema_name = f"djtest_skipdup_{backend}_{test_id}"[:64]
23+
24+
if connection_by_backend.is_connected:
25+
try:
26+
connection_by_backend.query(
27+
f"DROP DATABASE IF EXISTS {connection_by_backend.adapter.quote_identifier(schema_name)}"
28+
)
29+
except Exception:
30+
pass
31+
32+
schema = dj.Schema(schema_name, connection=connection_by_backend)
33+
yield schema
34+
35+
if connection_by_backend.is_connected:
36+
try:
37+
connection_by_backend.query(
38+
f"DROP DATABASE IF EXISTS {connection_by_backend.adapter.quote_identifier(schema_name)}"
39+
)
40+
except Exception:
41+
pass
42+
43+
44+
def test_skip_duplicates_pk_match(schema_by_backend):
45+
"""skip_duplicates=True silently skips rows whose PK already exists."""
46+
47+
@schema_by_backend
48+
class Item(dj.Manual):
49+
definition = """
50+
item_id : int
51+
---
52+
name : varchar(100)
53+
email : varchar(100)
54+
unique index (email)
55+
"""
56+
57+
Item.insert1(dict(item_id=1, name="Alice", email="alice@example.com"))
58+
59+
# Same PK, different values — should be silently skipped
60+
Item.insert1(
61+
dict(item_id=1, name="Bob", email="bob@example.com"),
62+
skip_duplicates=True,
63+
)
64+
65+
# Original row unchanged
66+
row = (Item & "item_id=1").fetch1()
67+
assert row["name"] == "Alice"
68+
assert row["email"] == "alice@example.com"
69+
70+
71+
def test_skip_duplicates_unique_violation_raises_on_postgres(schema_by_backend, db_creds_by_backend):
72+
"""On PostgreSQL, skip_duplicates=True still raises on secondary unique violations.
73+
74+
Regression test for #1049: a row with a *new* PK but a *conflicting*
75+
secondary unique index value must raise DuplicateError on PostgreSQL.
76+
"""
77+
if db_creds_by_backend["backend"] != "postgresql":
78+
pytest.skip("PostgreSQL-specific: ON CONFLICT (pk) DO NOTHING preserves unique constraints")
79+
80+
@schema_by_backend
81+
class Item(dj.Manual):
82+
definition = """
83+
item_id : int
84+
---
85+
name : varchar(100)
86+
email : varchar(100)
87+
unique index (email)
88+
"""
89+
90+
Item.insert1(dict(item_id=1, name="Alice", email="alice@example.com"))
91+
92+
# New PK (2) but email conflicts with existing row (1)
93+
with pytest.raises(DuplicateError):
94+
Item.insert1(
95+
dict(item_id=2, name="Bob", email="alice@example.com"),
96+
skip_duplicates=True,
97+
)
98+
99+
100+
def test_skip_duplicates_unique_on_mysql(schema_by_backend, db_creds_by_backend):
101+
"""On MySQL, skip_duplicates=True silently skips secondary unique conflicts.
102+
103+
Documents the known MySQL asymmetry: ON DUPLICATE KEY UPDATE catches
104+
all unique key conflicts, not just primary key.
105+
"""
106+
if db_creds_by_backend["backend"] != "mysql":
107+
pytest.skip("MySQL-specific: ON DUPLICATE KEY UPDATE catches all unique keys")
108+
109+
@schema_by_backend
110+
class Item(dj.Manual):
111+
definition = """
112+
item_id : int
113+
---
114+
name : varchar(100)
115+
email : varchar(100)
116+
unique index (email)
117+
"""
118+
119+
Item.insert1(dict(item_id=1, name="Alice", email="alice@example.com"))
120+
121+
# New PK (2) but email conflicts — MySQL silently skips
122+
Item.insert1(
123+
dict(item_id=2, name="Bob", email="alice@example.com"),
124+
skip_duplicates=True,
125+
)
126+
127+
# Only the original row exists
128+
assert len(Item()) == 1
129+
assert (Item & "item_id=1").fetch1()["name"] == "Alice"
130+
131+
132+
def test_skip_duplicates_no_unique_index(schema_by_backend):
133+
"""skip_duplicates=True works normally on tables without secondary unique indexes."""
134+
135+
@schema_by_backend
136+
class Simple(dj.Manual):
137+
definition = """
138+
item_id : int
139+
---
140+
name : varchar(100)
141+
"""
142+
143+
Simple.insert1(dict(item_id=1, name="Alice"))
144+
145+
# Same PK, different name — silently skipped
146+
Simple.insert1(dict(item_id=1, name="Bob"), skip_duplicates=True)
147+
assert (Simple & "item_id=1").fetch1()["name"] == "Alice"
148+
149+
# New PK — inserted
150+
Simple.insert1(dict(item_id=2, name="Bob"), skip_duplicates=True)
151+
assert len(Simple()) == 2
152+
153+
154+
def test_skip_duplicates_composite_unique(schema_by_backend, db_creds_by_backend):
155+
"""skip_duplicates=True with a composite secondary unique index."""
156+
if db_creds_by_backend["backend"] != "postgresql":
157+
pytest.skip("PostgreSQL-specific unique constraint enforcement")
158+
159+
@schema_by_backend
160+
class Record(dj.Manual):
161+
definition = """
162+
record_id : int
163+
---
164+
first_name : varchar(100)
165+
last_name : varchar(100)
166+
data : varchar(255)
167+
unique index (first_name, last_name)
168+
"""
169+
170+
Record.insert1(dict(record_id=1, first_name="Alice", last_name="Smith", data="v1"))
171+
172+
# New PK but composite unique (first_name, last_name) conflicts
173+
with pytest.raises(DuplicateError):
174+
Record.insert1(
175+
dict(record_id=2, first_name="Alice", last_name="Smith", data="v2"),
176+
skip_duplicates=True,
177+
)
178+
179+
180+
def test_skip_duplicates_batch_mixed(schema_by_backend, db_creds_by_backend):
181+
"""Batch insert with skip_duplicates=True: PK duplicates skipped, unique conflicts raise."""
182+
if db_creds_by_backend["backend"] != "postgresql":
183+
pytest.skip("PostgreSQL-specific unique constraint enforcement")
184+
185+
@schema_by_backend
186+
class Item(dj.Manual):
187+
definition = """
188+
item_id : int
189+
---
190+
email : varchar(100)
191+
unique index (email)
192+
"""
193+
194+
Item.insert1(dict(item_id=1, email="alice@example.com"))
195+
196+
# Batch: row 2 is new (OK), row 1 is PK dup (skip), row 3 conflicts on email
197+
with pytest.raises(DuplicateError):
198+
Item.insert(
199+
[
200+
dict(item_id=2, email="bob@example.com"),
201+
dict(item_id=1, email="duplicate-pk@example.com"), # PK dup — skipped
202+
dict(item_id=3, email="alice@example.com"), # unique conflict — error
203+
],
204+
skip_duplicates=True,
205+
)

0 commit comments

Comments
 (0)