diff --git a/src/main/java/org/rumbledb/runtime/typing/ValidateTypeIterator.java b/src/main/java/org/rumbledb/runtime/typing/ValidateTypeIterator.java index 2c78585c4d..76d33187b8 100644 --- a/src/main/java/org/rumbledb/runtime/typing/ValidateTypeIterator.java +++ b/src/main/java/org/rumbledb/runtime/typing/ValidateTypeIterator.java @@ -24,13 +24,11 @@ import org.rumbledb.types.FieldDescriptor; import org.rumbledb.types.ItemType; -import sparksoniq.spark.DataFrameUtils; import sparksoniq.spark.SparkSessionManager; import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; @@ -68,11 +66,11 @@ public JSoundDataFrame getDataFrame(DynamicContext context) { if (inputDataIterator.isDataFrame()) { JSoundDataFrame inputDataAsDataFrame = inputDataIterator.getDataFrame(context); - validateItemTypeAgainstDataFrame( + checkAnnotationPossibleOrThrowError( this.itemType, - inputDataAsDataFrame.getDataFrame().schema() + inputDataAsDataFrame.getItemType() ); - return inputDataAsDataFrame; + return new JSoundDataFrame(inputDataAsDataFrame.getDataFrame(), this.itemType); } if (inputDataIterator.isRDDOrDataFrame()) { @@ -444,63 +442,94 @@ private static Object getRowColumnFromItemUsingDataType(Item item, ItemType item } - private static void validateItemTypeAgainstDataFrame( - ItemType itemType, - StructType dataFrameSchema + private static void checkAnnotationPossibleOrThrowError( + ItemType expectedType, + ItemType actualType ) { - StructType generatedSchema = convertToDataFrameSchema(itemType); - for (StructField column : dataFrameSchema.fields()) { - final String columnName = column.name(); - final DataType columnDataType = column.dataType(); - - boolean columnMatched = false; - for (StructField structField : generatedSchema.fields()) { - String generatedColumnName = structField.name(); - if (!generatedColumnName.equals(columnName)) { - continue; - } + if (expectedType.isObjectItemType()) { + if (!actualType.isObjectItemType()) { + throw new InvalidInstanceException( + "Unexpected type. Expected " + + expectedType + + " but actually encountered " + + actualType + ); + } + // StructType generatedSchema = convertToDataFrameSchema(itemType); + for (String columnName : actualType.getObjectContentFacet().keySet()) { + final ItemType actualColumnType = actualType.getObjectContentFacet().get(columnName).getType(); + + boolean columnMatched = false; + for (String generatedColumnName : expectedType.getObjectContentFacet().keySet()) { + if (!generatedColumnName.equals(columnName)) { + continue; + } - DataType generatedDataType = structField.dataType(); - if (DataFrameUtils.isUserTypeApplicable(generatedDataType, columnDataType)) { + ItemType expectedColumnType = expectedType.getObjectContentFacet() + .get(generatedColumnName) + .getType(); + checkAnnotationPossibleOrThrowError(expectedColumnType, actualColumnType); columnMatched = true; } - throw new InvalidInstanceException( - "Fields defined in schema must fully match the fields of input data: " - + "expected '" - + ItemParser.getItemTypeNameFromDataFrameDataType(columnDataType) - + "' type for field '" - + columnName - + "', but found '" - + ItemParser.getItemTypeNameFromDataFrameDataType(generatedDataType) - + "'" - ); + if (expectedType != null && expectedType.getClosedFacet()) { + if (!columnMatched) { + throw new InvalidInstanceException( + "Unexpected key in closed object type " + + expectedType.getIdentifierString() + + " : " + + columnName + ); + } + } } - if (itemType != null && itemType.getClosedFacet()) { - if (!columnMatched) { + for (String generatedSchemaColumnName : expectedType.getObjectContentFacet().keySet()) { + boolean userColumnMatched = actualType.getObjectContentFacet() + .keySet() + .contains(generatedSchemaColumnName); + + if (!userColumnMatched) { throw new InvalidInstanceException( - "Unexpected key in closed object type " - + itemType.getIdentifierString() - + " : " - + columnName + "Fields defined in schema must fully match the fields of input data: " + + "redundant type information for non-existent field '" + + generatedSchemaColumnName + + "'." ); } } } - - for (String generatedSchemaColumnName : generatedSchema.fieldNames()) { - boolean userColumnMatched = Arrays.asList(dataFrameSchema.fieldNames()).contains(generatedSchemaColumnName); - - if (!userColumnMatched) { + if (expectedType.isArrayItemType()) { + if (!actualType.isArrayItemType()) { throw new InvalidInstanceException( - "Fields defined in schema must fully match the fields of input data: " - + "redundant type information for non-existent field '" - + generatedSchemaColumnName - + "'." + "Unexpected type. Expected " + + expectedType + + " but actually encountered " + + actualType ); } } + if (expectedType.isAtomicItemType()) { + if (!actualType.isAtomicItemType()) { + throw new InvalidInstanceException( + "Unexpected type, and dataframe casts are not supported at this point (contact us to prioritize). Expected " + + expectedType + + " but actually encountered " + + actualType + ); + } + if (!actualType.isSubtypeOf(expectedType)) { + throw new InvalidInstanceException( + "Unexpected type, and dataframe casts are not supported at this point (contact us to prioritize). Expected " + + expectedType + + " but actually encountered " + + actualType + ); + } + } + if (expectedType.isUnionType()) { + throw new InvalidInstanceException("Union types are not supported at this point."); + } } @Override diff --git a/src/main/java/sparksoniq/spark/DataFrameUtils.java b/src/main/java/sparksoniq/spark/DataFrameUtils.java index 7ca241a805..f0b025580c 100644 --- a/src/main/java/sparksoniq/spark/DataFrameUtils.java +++ b/src/main/java/sparksoniq/spark/DataFrameUtils.java @@ -246,17 +246,4 @@ public static void validateSchemaItemAgainstDataFrame( } } } - - public static boolean isUserTypeApplicable( - DataType userSchemaColumnDataType, - DataType columnDataType - ) { - return userSchemaColumnDataType.equals(columnDataType) - || - (userSchemaColumnDataType.equals(ItemParser.decimalType) && columnDataType.equals(DataTypes.LongType)) - || - (userSchemaColumnDataType.equals(DataTypes.DoubleType) && columnDataType.equals(DataTypes.FloatType)) - || - (userSchemaColumnDataType.equals(DataTypes.IntegerType) && columnDataType.equals(DataTypes.ShortType)); - } } diff --git a/src/test/resources/test_files/runtime-native-flwor/udt/type-default.jq b/src/test/resources/test_files/runtime-native-flwor/udt/type-default.jq index f584499673..6acd82fe66 100644 --- a/src/test/resources/test_files/runtime-native-flwor/udt/type-default.jq +++ b/src/test/resources/test_files/runtime-native-flwor/udt/type-default.jq @@ -1,4 +1,4 @@ -(:JIQS: ShouldRun; Output="({ "foo" : "foo", "bar" : "foobar", "int" : 1 }, { "foo" : "foo", "bar" : "def", "int" : 42 }, { "foo" : "foo", "bar" : "foobar", "int" : 42 }, { "foo" : "foo", "bar" : "def", "int" : 42 }, { "bar" : "foobar", "int" : 42, "foo" : "foo" }, { "foo" : "foo", "bar" : "def", "int" : 42 }, { "bar" : "def", "int" : 42, "foo" : "foo" }, { "bar" : "foobar", "int" : 42, "foo" : "foo" }, { "bar" : "def", "int" : 42, "foo" : "foo" }, { "bar" : "foobar", "int" : 42, "foo" : "foo" })" :) +(:JIQS: ShouldRun; Output="({ "foo" : "foo", "bar" : "foobar", "int" : 1 }, { "foo" : "foo", "bar" : "def", "int" : 42 }, { "foo" : "foo", "bar" : "foobar", "int" : 42 }, { "foo" : "foo", "bar" : "def", "int" : 42 }, { "bar" : "foobar", "foo" : "foo", "int" : 42 }, { "foo" : "foo", "bar" : "def", "int" : 42 }, { "bar" : "def", "int" : 42, "foo" : "foo" }, { "bar" : "foobar", "int" : 42, "foo" : "foo" }, { "bar" : "def", "int" : 42, "foo" : "foo" }, { "bar" : "foobar", "int" : 42, "foo" : "foo" }, { "bar" : "foobar", "int" : 42, "foo" : "foo" })" :) declare type local:a as { "!foo" : "string", "bar" : "string=def", "int" : "integer=42" }; declare type local:b as { "foo" : "string=def" }; declare type local:c as { "foo" : "string" }; @@ -11,6 +11,6 @@ validate type local:a { validate type local:a* { { "foo" : "foo", "bar" : "fooba validate type local:a { validate type local:c* { { "foo" : "foo" } } }, validate type local:a* { { "foo" : "foo" }, { "foo" : "foo", "bar" : "foobar" } }, -validate type local:a* { parallelize(({ "foo" : "foo" }, { "foo" : "foo", "bar" : "foobar" })) }(:, -validate type local:a* { validate type local:a* { { "foo" : "foo", "bar" : "foobar" } } }, +validate type local:a* { parallelize(({ "foo" : "foo" }, { "foo" : "foo", "bar" : "foobar" })) }, +validate type local:a* { validate type local:a* { { "foo" : "foo", "bar" : "foobar" } } }(:, validate type local:a* { validate type local:c* { { "foo" : "foo" } } }:)