6161from pyiceberg .transforms import BucketTransform , IdentityTransform
6262from pyiceberg .typedef import UTF8
6363from pyiceberg .types import (
64+ BinaryType ,
6465 BooleanType ,
66+ DateType ,
67+ DecimalType ,
68+ DoubleType ,
69+ FixedType ,
70+ FloatType ,
6571 IntegerType ,
72+ ListType ,
6673 LongType ,
74+ MapType ,
6775 NestedField ,
6876 StringType ,
77+ StructType ,
78+ TimestampType ,
79+ TimestamptzType ,
80+ TimeType ,
81+ UUIDType ,
6982)
7083
7184HIVE_CATALOG_NAME = "hive"
@@ -181,15 +194,20 @@ def test_check_number_of_namespaces(table_schema_simple: Schema) -> None:
181194 catalog .create_table ("table" , schema = table_schema_simple )
182195
183196
197+ @pytest .mark .parametrize ("hive2_compatible" , [True , False ])
184198@patch ("time.time" , MagicMock (return_value = 12345 ))
185- def test_create_table (table_schema_simple : Schema , hive_database : HiveDatabase , hive_table : HiveTable ) -> None :
199+ def test_create_table (
200+ table_schema_with_all_types : Schema , hive_database : HiveDatabase , hive_table : HiveTable , hive2_compatible : bool
201+ ) -> None :
186202 catalog = HiveCatalog (HIVE_CATALOG_NAME , uri = HIVE_METASTORE_FAKE_URL )
203+ if hive2_compatible :
204+ catalog = HiveCatalog (HIVE_CATALOG_NAME , uri = HIVE_METASTORE_FAKE_URL , ** {"hive.hive2-compatible" : "true" })
187205
188206 catalog ._client = MagicMock ()
189207 catalog ._client .__enter__ ().create_table .return_value = None
190208 catalog ._client .__enter__ ().get_table .return_value = hive_table
191209 catalog ._client .__enter__ ().get_database .return_value = hive_database
192- catalog .create_table (("default" , "table" ), schema = table_schema_simple , properties = {"owner" : "javaberg" })
210+ catalog .create_table (("default" , "table" ), schema = table_schema_with_all_types , properties = {"owner" : "javaberg" })
193211
194212 called_hive_table : HiveTable = catalog ._client .__enter__ ().create_table .call_args [0 ][0 ]
195213 # This one is generated within the function itself, so we need to extract
@@ -207,9 +225,27 @@ def test_create_table(table_schema_simple: Schema, hive_database: HiveDatabase,
207225 retention = None ,
208226 sd = StorageDescriptor (
209227 cols = [
210- FieldSchema (name = "foo" , type = "string" , comment = None ),
211- FieldSchema (name = "bar" , type = "int" , comment = None ),
212- FieldSchema (name = "baz" , type = "boolean" , comment = None ),
228+ FieldSchema (name = 'boolean' , type = 'boolean' , comment = None ),
229+ FieldSchema (name = 'integer' , type = 'int' , comment = None ),
230+ FieldSchema (name = 'long' , type = 'bigint' , comment = None ),
231+ FieldSchema (name = 'float' , type = 'float' , comment = None ),
232+ FieldSchema (name = 'double' , type = 'double' , comment = None ),
233+ FieldSchema (name = 'decimal' , type = 'decimal(32,3)' , comment = None ),
234+ FieldSchema (name = 'date' , type = 'date' , comment = None ),
235+ FieldSchema (name = 'time' , type = 'string' , comment = None ),
236+ FieldSchema (name = 'timestamp' , type = 'timestamp' , comment = None ),
237+ FieldSchema (
238+ name = 'timestamptz' ,
239+ type = 'timestamp' if hive2_compatible else 'timestamp with local time zone' ,
240+ comment = None ,
241+ ),
242+ FieldSchema (name = 'string' , type = 'string' , comment = None ),
243+ FieldSchema (name = 'uuid' , type = 'string' , comment = None ),
244+ FieldSchema (name = 'fixed' , type = 'binary' , comment = None ),
245+ FieldSchema (name = 'binary' , type = 'binary' , comment = None ),
246+ FieldSchema (name = 'list' , type = 'array<string>' , comment = None ),
247+ FieldSchema (name = 'map' , type = 'map<string,int>' , comment = None ),
248+ FieldSchema (name = 'struct' , type = 'struct<inner_string:string,inner_int:int>' , comment = None ),
213249 ],
214250 location = f"{ hive_database .locationUri } /table" ,
215251 inputFormat = "org.apache.hadoop.mapred.FileInputFormat" ,
@@ -266,12 +302,46 @@ def test_create_table(table_schema_simple: Schema, hive_database: HiveDatabase,
266302 location = metadata .location ,
267303 table_uuid = metadata .table_uuid ,
268304 last_updated_ms = metadata .last_updated_ms ,
269- last_column_id = 3 ,
305+ last_column_id = 22 ,
270306 schemas = [
271307 Schema (
272- NestedField (field_id = 1 , name = "foo" , field_type = StringType (), required = False ),
273- NestedField (field_id = 2 , name = "bar" , field_type = IntegerType (), required = True ),
274- NestedField (field_id = 3 , name = "baz" , field_type = BooleanType (), required = False ),
308+ NestedField (field_id = 1 , name = 'boolean' , field_type = BooleanType (), required = True ),
309+ NestedField (field_id = 2 , name = 'integer' , field_type = IntegerType (), required = True ),
310+ NestedField (field_id = 3 , name = 'long' , field_type = LongType (), required = True ),
311+ NestedField (field_id = 4 , name = 'float' , field_type = FloatType (), required = True ),
312+ NestedField (field_id = 5 , name = 'double' , field_type = DoubleType (), required = True ),
313+ NestedField (field_id = 6 , name = 'decimal' , field_type = DecimalType (precision = 32 , scale = 3 ), required = True ),
314+ NestedField (field_id = 7 , name = 'date' , field_type = DateType (), required = True ),
315+ NestedField (field_id = 8 , name = 'time' , field_type = TimeType (), required = True ),
316+ NestedField (field_id = 9 , name = 'timestamp' , field_type = TimestampType (), required = True ),
317+ NestedField (field_id = 10 , name = 'timestamptz' , field_type = TimestamptzType (), required = True ),
318+ NestedField (field_id = 11 , name = 'string' , field_type = StringType (), required = True ),
319+ NestedField (field_id = 12 , name = 'uuid' , field_type = UUIDType (), required = True ),
320+ NestedField (field_id = 13 , name = 'fixed' , field_type = FixedType (length = 12 ), required = True ),
321+ NestedField (field_id = 14 , name = 'binary' , field_type = BinaryType (), required = True ),
322+ NestedField (
323+ field_id = 15 ,
324+ name = 'list' ,
325+ field_type = ListType (type = 'list' , element_id = 18 , element_type = StringType (), element_required = True ),
326+ required = True ,
327+ ),
328+ NestedField (
329+ field_id = 16 ,
330+ name = 'map' ,
331+ field_type = MapType (
332+ type = 'map' , key_id = 19 , key_type = StringType (), value_id = 20 , value_type = IntegerType (), value_required = True
333+ ),
334+ required = True ,
335+ ),
336+ NestedField (
337+ field_id = 17 ,
338+ name = 'struct' ,
339+ field_type = StructType (
340+ NestedField (field_id = 21 , name = 'inner_string' , field_type = StringType (), required = False ),
341+ NestedField (field_id = 22 , name = 'inner_int' , field_type = IntegerType (), required = True ),
342+ ),
343+ required = True ,
344+ ),
275345 schema_id = 0 ,
276346 identifier_field_ids = [2 ],
277347 )
0 commit comments