Skip to content

Commit 1e5ad13

Browse files
committed
fix: address review comments (iteration #1)
1 parent 0d68f0a commit 1e5ad13

File tree

2 files changed

+66
-42
lines changed

2 files changed

+66
-42
lines changed

sagemaker-mlops/src/sagemaker/mlops/feature_store/feature_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@
4444
"string": "String",
4545
"int64": "Integral",
4646
"float64": "Fractional",
47+
# pandas nullable integer dtypes
48+
"Int8": "Integral",
49+
"Int16": "Integral",
50+
"Int32": "Integral",
51+
"Int64": "Integral",
52+
"UInt8": "Integral",
53+
"UInt16": "Integral",
54+
"UInt32": "Integral",
55+
"UInt64": "Integral",
56+
# pandas nullable float dtypes
57+
"Float32": "Fractional",
58+
"Float64": "Fractional",
4759
}
4860

4961
_INTEGER_TYPES = {
@@ -329,6 +341,8 @@ def _generate_feature_definition(
329341
return IntegralFeatureDefinition(series.name, collection_type)
330342
if dtype in _FLOAT_TYPES:
331343
return FractionalFeatureDefinition(series.name, collection_type)
344+
if dtype in _STRING_TYPES:
345+
return StringFeatureDefinition(series.name, collection_type)
332346
return StringFeatureDefinition(series.name, collection_type)
333347

334348

sagemaker-mlops/tests/unit/sagemaker/mlops/feature_store/test_feature_utils.py

Lines changed: 52 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -49,43 +49,29 @@ def test_returns_correct_count(self, sample_dataframe):
4949
defs = load_feature_definitions_from_dataframe(sample_dataframe)
5050
assert len(defs) == 3
5151

52-
def test_infers_integral_type_with_pandas_nullable_Int64(self):
53-
df = pd.DataFrame({"id": pd.Series([1, 2, 3], dtype="Int64")})
54-
defs = load_feature_definitions_from_dataframe(df)
55-
assert defs[0].feature_type == "Integral"
56-
57-
def test_infers_integral_type_with_pandas_nullable_Int32(self):
58-
df = pd.DataFrame({"id": pd.Series([1, 2, 3], dtype="Int32")})
59-
defs = load_feature_definitions_from_dataframe(df)
60-
assert defs[0].feature_type == "Integral"
61-
62-
def test_infers_integral_type_with_pandas_nullable_Int16(self):
63-
df = pd.DataFrame({"id": pd.Series([1, 2, 3], dtype="Int16")})
64-
defs = load_feature_definitions_from_dataframe(df)
65-
assert defs[0].feature_type == "Integral"
66-
67-
def test_infers_integral_type_with_pandas_nullable_Int8(self):
68-
df = pd.DataFrame({"id": pd.Series([1, 2, 3], dtype="Int8")})
69-
defs = load_feature_definitions_from_dataframe(df)
70-
assert defs[0].feature_type == "Integral"
71-
72-
def test_infers_integral_type_with_pandas_nullable_UInt64(self):
73-
df = pd.DataFrame({"id": pd.Series([1, 2, 3], dtype="UInt64")})
74-
defs = load_feature_definitions_from_dataframe(df)
75-
assert defs[0].feature_type == "Integral"
76-
77-
def test_infers_integral_type_with_pandas_nullable_UInt32(self):
78-
df = pd.DataFrame({"id": pd.Series([1, 2, 3], dtype="UInt32")})
52+
@pytest.mark.parametrize(
53+
"dtype",
54+
["Int8", "Int16", "Int32", "Int64",
55+
"UInt8", "UInt16", "UInt32", "UInt64"],
56+
)
57+
def test_infers_integral_type_with_pandas_nullable_int(
58+
self, dtype
59+
):
60+
df = pd.DataFrame(
61+
{"id": pd.Series([1, 2, 3], dtype=dtype)}
62+
)
7963
defs = load_feature_definitions_from_dataframe(df)
8064
assert defs[0].feature_type == "Integral"
8165

82-
def test_infers_fractional_type_with_pandas_nullable_Float64(self):
83-
df = pd.DataFrame({"value": pd.Series([1.1, 2.2, 3.3], dtype="Float64")})
84-
defs = load_feature_definitions_from_dataframe(df)
85-
assert defs[0].feature_type == "Fractional"
86-
87-
def test_infers_fractional_type_with_pandas_nullable_Float32(self):
88-
df = pd.DataFrame({"value": pd.Series([1.1, 2.2], dtype="Float32")})
66+
@pytest.mark.parametrize(
67+
"dtype", ["Float32", "Float64"],
68+
)
69+
def test_infers_fractional_type_with_pandas_nullable_float(
70+
self, dtype
71+
):
72+
df = pd.DataFrame(
73+
{"value": pd.Series([1.1, 2.2, 3.3], dtype=dtype)}
74+
)
8975
defs = load_feature_definitions_from_dataframe(df)
9076
assert defs[0].feature_type == "Fractional"
9177

@@ -108,18 +94,42 @@ def test_infers_correct_types_after_convert_dtypes(self):
10894
assert price_def.feature_type == "Fractional"
10995
assert name_def.feature_type == "String"
11096

111-
def test_infers_correct_types_with_mixed_nullable_and_numpy_dtypes(self):
97+
def test_infers_correct_types_with_mixed_nullable_and_numpy_dtypes(
98+
self,
99+
):
112100
df = pd.DataFrame({
113101
"numpy_int": pd.Series([1, 2, 3], dtype="int64"),
114-
"nullable_float": pd.Series([1.1, 2.2, 3.3], dtype="Float64"),
115-
"nullable_int": pd.Series([10, 20, 30], dtype="Int64"),
116-
"numpy_float": pd.Series([0.1, 0.2, 0.3], dtype="float64"),
102+
"nullable_float": pd.Series(
103+
[1.1, 2.2, 3.3], dtype="Float64"
104+
),
105+
"nullable_int": pd.Series(
106+
[10, 20, 30], dtype="Int64"
107+
),
108+
"numpy_float": pd.Series(
109+
[0.1, 0.2, 0.3], dtype="float64"
110+
),
117111
})
118112
defs = load_feature_definitions_from_dataframe(df)
119-
assert next(d for d in defs if d.feature_name == "numpy_int").feature_type == "Integral"
120-
assert next(d for d in defs if d.feature_name == "nullable_float").feature_type == "Fractional"
121-
assert next(d for d in defs if d.feature_name == "nullable_int").feature_type == "Integral"
122-
assert next(d for d in defs if d.feature_name == "numpy_float").feature_type == "Fractional"
113+
114+
result = next(
115+
d for d in defs if d.feature_name == "numpy_int"
116+
)
117+
assert result.feature_type == "Integral"
118+
119+
result = next(
120+
d for d in defs if d.feature_name == "nullable_float"
121+
)
122+
assert result.feature_type == "Fractional"
123+
124+
result = next(
125+
d for d in defs if d.feature_name == "nullable_int"
126+
)
127+
assert result.feature_type == "Integral"
128+
129+
result = next(
130+
d for d in defs if d.feature_name == "numpy_float"
131+
)
132+
assert result.feature_type == "Fractional"
123133

124134
def test_collection_type_with_in_memory_storage(self):
125135
df = pd.DataFrame({

0 commit comments

Comments
 (0)