@@ -396,6 +396,22 @@ def test_nested_structured(self):
396396 assert inner .structured is True
397397 assert inner .fields [1 ].nullable is False
398398
399+ def test_bare_object_returns_variant (self ):
400+ result = _parse_structured_type_str ("OBJECT" , MAX_STRING_SIZE )
401+ assert result == VariantType ()
402+
403+ def test_bare_map_returns_variant (self ):
404+ result = _parse_structured_type_str ("MAP" , MAX_STRING_SIZE )
405+ assert result == VariantType ()
406+
407+ def test_bare_array_returns_variant (self ):
408+ result = _parse_structured_type_str ("ARRAY" , MAX_STRING_SIZE )
409+ assert result == VariantType ()
410+
411+ def test_bare_object_lowercase_returns_variant (self ):
412+ result = _parse_structured_type_str ("object" , MAX_STRING_SIZE )
413+ assert result == VariantType ()
414+
399415
400416# ---------------------------------------------------------------------------
401417# _infer_schema_for_file_format (mock-based)
@@ -427,7 +443,13 @@ def _build_infer_schema_rows(columns):
427443class TestInferSchemaStructuredTypePath :
428444 """Tests the structured-type branch inside _infer_schema_for_file_format."""
429445
430- def _run_infer (self , columns , use_structured = True , use_relaxed_types = False ):
446+ def _run_infer (
447+ self ,
448+ columns ,
449+ use_structured = True ,
450+ use_relaxed_types = False ,
451+ file_format = "PARQUET" ,
452+ ):
431453 session = _make_mock_session (use_structured = use_structured )
432454 rows = _build_infer_schema_rows (columns )
433455
@@ -453,7 +475,7 @@ def _run_infer(self, columns, use_structured=True, use_relaxed_types=False):
453475 schema_to_cast ,
454476 transformations ,
455477 exception ,
456- ) = reader ._infer_schema_for_file_format ("@stage/path" , "PARQUET" )
478+ ) = reader ._infer_schema_for_file_format ("@stage/path" , file_format )
457479 assert exception is None , f"Unexpected exception: { exception } "
458480 return new_schema , schema_to_cast , transformations
459481
@@ -624,3 +646,149 @@ def test_mixed_columns(self):
624646
625647 assert len (schema_to_cast ) == 5
626648 assert len (transformations ) == 5
649+
650+ # --- bare structured keywords (older backends) ---
651+
652+ def test_bare_object_returns_variant_type (self ):
653+ columns = [
654+ ("address" , "OBJECT" , True , "$1:address::OBJECT" ),
655+ ]
656+ schema , schema_to_cast , _ = self ._run_infer (columns )
657+
658+ assert schema [0 ].datatype == VariantType ()
659+ assert schema_to_cast [0 ][0 ] == '$1:"address"'
660+
661+ def test_bare_map_returns_variant_type (self ):
662+ columns = [
663+ ("props" , "MAP" , True , "$1:props::MAP" ),
664+ ]
665+ schema , schema_to_cast , _ = self ._run_infer (columns )
666+
667+ assert schema [0 ].datatype == VariantType ()
668+ assert schema_to_cast [0 ][0 ] == '$1:"props"'
669+
670+ def test_bare_array_returns_variant_type (self ):
671+ columns = [
672+ ("tags" , "ARRAY" , True , "$1:tags::ARRAY" ),
673+ ]
674+ schema , schema_to_cast , _ = self ._run_infer (columns )
675+
676+ assert schema [0 ].datatype == VariantType ()
677+ assert schema_to_cast [0 ][0 ] == '$1:"tags"'
678+
679+ def test_mixed_bare_and_detailed_structured (self ):
680+ columns = [
681+ ("id" , "NUMBER(38,0)" , True , "$1:id::NUMBER(38,0)" ),
682+ ("addr" , "OBJECT" , True , "$1:addr::OBJECT" ),
683+ (
684+ "tags" ,
685+ "ARRAY(VARCHAR NOT NULL)" ,
686+ True ,
687+ "$1:tags::ARRAY(VARCHAR NOT NULL)" ,
688+ ),
689+ ("meta" , "MAP" , True , "$1:meta::MAP" ),
690+ ]
691+ schema , schema_to_cast , _ = self ._run_infer (columns )
692+
693+ assert schema [0 ].datatype == LongType ()
694+ assert schema [1 ].datatype == VariantType ()
695+ assert isinstance (schema [2 ].datatype , ArrayType )
696+ assert schema [3 ].datatype == VariantType ()
697+ # bare keywords get no cast; detailed types get the cast
698+ assert schema_to_cast [1 ][0 ] == '$1:"addr"'
699+ assert "::ARRAY(VARCHAR)" in schema_to_cast [2 ][0 ]
700+ assert schema_to_cast [3 ][0 ] == '$1:"meta"'
701+
702+ # --- JSON format path ---
703+
704+ def test_json_format_uses_structured_path (self ):
705+ columns = [
706+ ("id" , "NUMBER(38,0)" , True , "$1:id::NUMBER(38,0)" ),
707+ ("name" , "TEXT" , True , "$1:name::TEXT" ),
708+ ]
709+ schema , schema_to_cast , _ = self ._run_infer (columns , file_format = "JSON" )
710+
711+ assert len (schema ) == 2
712+ assert schema [0 ].datatype == LongType ()
713+ assert schema [1 ].datatype == StringType ()
714+ assert "::NUMBER(38,0)" in schema_to_cast [0 ][0 ]
715+ assert "::TEXT" in schema_to_cast [1 ][0 ]
716+
717+ def test_json_format_structured_array (self ):
718+ columns = [
719+ (
720+ "tags" ,
721+ "ARRAY(VARCHAR NOT NULL)" ,
722+ True ,
723+ "$1:tags::ARRAY(VARCHAR NOT NULL)" ,
724+ ),
725+ ]
726+ schema , schema_to_cast , _ = self ._run_infer (columns , file_format = "JSON" )
727+
728+ dt = schema [0 ].datatype
729+ assert isinstance (dt , ArrayType )
730+ assert dt .structured is True
731+ assert dt .contains_null is False
732+ assert "NOT NULL" not in schema_to_cast [0 ][0 ]
733+
734+ def test_json_format_bare_map (self ):
735+ columns = [
736+ ("props" , "MAP" , True , "$1:props::MAP" ),
737+ ]
738+ schema , schema_to_cast , _ = self ._run_infer (columns , file_format = "JSON" )
739+
740+ assert schema [0 ].datatype == VariantType ()
741+ assert schema_to_cast [0 ][0 ] == '$1:"props"'
742+
743+
744+ # ---------------------------------------------------------------------------
745+ # Session parameter defaults
746+ # ---------------------------------------------------------------------------
747+
748+
749+ class TestSessionParameterDefaults :
750+ def test_structured_infer_schema_default_is_false (self ):
751+ session = _make_mock_session (use_structured = False )
752+ assert session ._use_structured_type_infer_schema is False
753+
754+ def test_structured_infer_schema_can_be_enabled (self ):
755+ session = _make_mock_session (use_structured = True )
756+ assert session ._use_structured_type_infer_schema is True
757+
758+ def test_flag_controls_parser_path (self ):
759+ """When the flag is True, structured types are parsed recursively;
760+ when False, the legacy identifier path is used."""
761+ struct_columns = [
762+ (
763+ "addr" ,
764+ "OBJECT(city VARCHAR, zip NUMBER(38,0))" ,
765+ True ,
766+ "$1:addr::OBJECT(city VARCHAR, zip NUMBER(38,0))" ,
767+ ),
768+ ]
769+
770+ # With flag ON: recursive parser produces StructType
771+ session_on = _make_mock_session (use_structured = True )
772+ rows = _build_infer_schema_rows (struct_columns )
773+ session_on ._conn .run_query .side_effect = [{}, {"data" : rows }, {}]
774+ reader_on = DataFrameReader (session_on , _emit_ast = False )
775+ schema_on , _ , _ , exc_on = reader_on ._infer_schema_for_file_format (
776+ "@stage/path" , "PARQUET"
777+ )
778+ assert exc_on is None
779+ assert isinstance (schema_on [0 ].datatype , StructType )
780+
781+ # With flag OFF: uses the legacy identifier path (raw type string)
782+ simple_columns = [
783+ ("id" , "NUMBER(38,0)" , True , "$1:id::NUMBER(38,0)" ),
784+ ]
785+ session_off = _make_mock_session (use_structured = False )
786+ rows = _build_infer_schema_rows (simple_columns )
787+ session_off ._conn .run_query .side_effect = [{}, {"data" : rows }, {}]
788+ reader_off = DataFrameReader (session_off , _emit_ast = False )
789+ schema_off , cast_off , _ , exc_off = reader_off ._infer_schema_for_file_format (
790+ "@stage/path" , "PARQUET"
791+ )
792+ assert exc_off is None
793+ assert schema_off [0 ].datatype == LongType ()
794+ assert "::NUMBER(38,0)" in cast_off [0 ][0 ]
0 commit comments