2525from pyiceberg .exceptions import NoSuchTableError
2626from pyiceberg .expressions import And , EqualTo , Reference
2727from pyiceberg .expressions .literals import LongLiteral
28+ from pyiceberg .io .pyarrow import schema_to_pyarrow
2829from pyiceberg .schema import Schema
2930from pyiceberg .table import UpsertResult
3031from pyiceberg .table .upsert_util import create_match_filter
@@ -328,7 +329,7 @@ def test_upsert_with_identifier_fields(catalog: Catalog) -> None:
328329
329330 schema = Schema (
330331 NestedField (1 , "city" , StringType (), required = True ),
331- NestedField (2 , "inhabitants " , IntegerType (), required = True ),
332+ NestedField (2 , "population " , IntegerType (), required = True ),
332333 # Mark City as the identifier field, also known as the primary-key
333334 identifier_field_ids = [1 ],
334335 )
@@ -338,30 +339,30 @@ def test_upsert_with_identifier_fields(catalog: Catalog) -> None:
338339 arrow_schema = pa .schema (
339340 [
340341 pa .field ("city" , pa .string (), nullable = False ),
341- pa .field ("inhabitants " , pa .int32 (), nullable = False ),
342+ pa .field ("population " , pa .int32 (), nullable = False ),
342343 ]
343344 )
344345
345346 # Write some data
346347 df = pa .Table .from_pylist (
347348 [
348- {"city" : "Amsterdam" , "inhabitants " : 921402 },
349- {"city" : "San Francisco" , "inhabitants " : 808988 },
350- {"city" : "Drachten" , "inhabitants " : 45019 },
351- {"city" : "Paris" , "inhabitants " : 2103000 },
349+ {"city" : "Amsterdam" , "population " : 921402 },
350+ {"city" : "San Francisco" , "population " : 808988 },
351+ {"city" : "Drachten" , "population " : 45019 },
352+ {"city" : "Paris" , "population " : 2103000 },
352353 ],
353354 schema = arrow_schema ,
354355 )
355356 tbl .append (df )
356357
357358 df = pa .Table .from_pylist (
358359 [
359- # Will be updated, the inhabitants has been updated
360- {"city" : "Drachten" , "inhabitants " : 45505 },
360+ # Will be updated, the population has been updated
361+ {"city" : "Drachten" , "population " : 45505 },
361362 # New row, will be inserted
362- {"city" : "Berlin" , "inhabitants " : 3432000 },
363+ {"city" : "Berlin" , "population " : 3432000 },
363364 # Ignored, already exists in the table
364- {"city" : "Paris" , "inhabitants " : 2103000 },
365+ {"city" : "Paris" , "population " : 2103000 },
365366 ],
366367 schema = arrow_schema ,
367368 )
@@ -388,3 +389,32 @@ def test_create_match_filter_single_condition() -> None:
388389 EqualTo (term = Reference (name = "order_id" ), literal = LongLiteral (101 )),
389390 EqualTo (term = Reference (name = "order_line_id" ), literal = LongLiteral (1 )),
390391 )
392+
393+
394+ def test_upsert_without_identifier_fields (catalog : Catalog ) -> None :
395+ identifier = "default.test_upsert_without_identifier_fields"
396+ _drop_table (catalog , identifier )
397+
398+ schema = Schema (
399+ NestedField (1 , "city" , StringType (), required = True ),
400+ NestedField (2 , "population" , IntegerType (), required = True ),
401+ # No identifier field :o
402+ identifier_field_ids = [],
403+ )
404+
405+ tbl = catalog .create_table (identifier , schema = schema )
406+ # Write some data
407+ df = pa .Table .from_pylist (
408+ [
409+ {"city" : "Amsterdam" , "population" : 921402 },
410+ {"city" : "San Francisco" , "population" : 808988 },
411+ {"city" : "Drachten" , "population" : 45019 },
412+ {"city" : "Paris" , "population" : 2103000 },
413+ ],
414+ schema = schema_to_pyarrow (schema ),
415+ )
416+
417+ with pytest .raises (
418+ ValueError , match = "Join columns could not be found, please set identifier-field-ids or pass in explicitly."
419+ ):
420+ tbl .upsert (df )
0 commit comments