@@ -49,6 +49,88 @@ def test_returns_correct_count(self, sample_dataframe):
4949 defs = load_feature_definitions_from_dataframe (sample_dataframe )
5050 assert len (defs ) == 3
5151
52+ @pytest .mark .parametrize (
53+ "dtype" ,
54+ ["Int8" , "Int16" , "Int32" , "Int64" ,
55+ "UInt8" , "UInt16" , "UInt32" , "UInt64" ],
56+ )
57+ def test_infers_integral_type_with_pandas_nullable_int (
58+ self , dtype
59+ ):
60+ df = pd .DataFrame (
61+ {"id" : pd .Series ([1 , 2 , 3 ], dtype = dtype )}
62+ )
63+ defs = load_feature_definitions_from_dataframe (df )
64+ assert defs [0 ].feature_type == "Integral"
65+
66+ @pytest .mark .parametrize (
67+ "dtype" , ["Float32" , "Float64" ],
68+ )
69+ def test_infers_fractional_type_with_pandas_nullable_float (
70+ self , dtype
71+ ):
72+ df = pd .DataFrame (
73+ {"value" : pd .Series ([1.1 , 2.2 , 3.3 ], dtype = dtype )}
74+ )
75+ defs = load_feature_definitions_from_dataframe (df )
76+ assert defs [0 ].feature_type == "Fractional"
77+
78+ def test_infers_string_type_with_pandas_string_dtype (self ):
79+ df = pd .DataFrame ({"name" : pd .Series (["a" , "b" , "c" ], dtype = "string" )})
80+ defs = load_feature_definitions_from_dataframe (df )
81+ assert defs [0 ].feature_type == "String"
82+
83+ def test_infers_correct_types_after_convert_dtypes (self ):
84+ df = pd .DataFrame ({
85+ "id" : [1 , 2 , 3 ],
86+ "price" : [1.1 , 2.2 , 3.3 ],
87+ "name" : ["a" , "b" , "c" ],
88+ }).convert_dtypes ()
89+ defs = load_feature_definitions_from_dataframe (df )
90+ id_def = next (d for d in defs if d .feature_name == "id" )
91+ price_def = next (d for d in defs if d .feature_name == "price" )
92+ name_def = next (d for d in defs if d .feature_name == "name" )
93+ assert id_def .feature_type == "Integral"
94+ assert price_def .feature_type == "Fractional"
95+ assert name_def .feature_type == "String"
96+
97+ def test_infers_correct_types_with_mixed_nullable_and_numpy_dtypes (
98+ self ,
99+ ):
100+ df = pd .DataFrame ({
101+ "numpy_int" : pd .Series ([1 , 2 , 3 ], dtype = "int64" ),
102+ "nullable_float" : pd .Series (
103+ [1.1 , 2.2 , 3.3 ], dtype = "Float64"
104+ ),
105+ "nullable_int" : pd .Series (
106+ [10 , 20 , 30 ], dtype = "Int64"
107+ ),
108+ "numpy_float" : pd .Series (
109+ [0.1 , 0.2 , 0.3 ], dtype = "float64"
110+ ),
111+ })
112+ defs = load_feature_definitions_from_dataframe (df )
113+
114+ result = next (
115+ d for d in defs if d .feature_name == "numpy_int"
116+ )
117+ assert result .feature_type == "Integral"
118+
119+ result = next (
120+ d for d in defs if d .feature_name == "nullable_float"
121+ )
122+ assert result .feature_type == "Fractional"
123+
124+ result = next (
125+ d for d in defs if d .feature_name == "nullable_int"
126+ )
127+ assert result .feature_type == "Integral"
128+
129+ result = next (
130+ d for d in defs if d .feature_name == "numpy_float"
131+ )
132+ assert result .feature_type == "Fractional"
133+
52134 def test_collection_type_with_in_memory_storage (self ):
53135 df = pd .DataFrame ({
54136 "id" : pd .Series ([1 , 2 ], dtype = "int64" ),
0 commit comments