From 245e19d020c40f2b3b5589c4260e820138508c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 26 Jun 2023 16:22:50 +0200 Subject: [PATCH 01/23] Add JSON schema for openPMD Written as .toml files for ease of documentation, maintailability and readability. --- share/openPMD/json_schema/attribute_defs.toml | 184 ++++++++++++++++++ share/openPMD/json_schema/attributes.toml | 74 +++++++ share/openPMD/json_schema/dataset_defs.toml | 79 ++++++++ share/openPMD/json_schema/iteration.toml | 77 ++++++++ share/openPMD/json_schema/mesh.toml | 85 ++++++++ .../json_schema/mesh_record_component.toml | 27 +++ .../openPMD/json_schema/particle_patches.toml | 48 +++++ .../openPMD/json_schema/particle_species.toml | 40 ++++ share/openPMD/json_schema/patch_record.toml | 37 ++++ share/openPMD/json_schema/record.toml | 72 +++++++ .../openPMD/json_schema/record_component.toml | 66 +++++++ share/openPMD/json_schema/series.toml | 118 +++++++++++ 12 files changed, 907 insertions(+) create mode 100644 share/openPMD/json_schema/attribute_defs.toml create mode 100644 share/openPMD/json_schema/attributes.toml create mode 100644 share/openPMD/json_schema/dataset_defs.toml create mode 100644 share/openPMD/json_schema/iteration.toml create mode 100644 share/openPMD/json_schema/mesh.toml create mode 100644 share/openPMD/json_schema/mesh_record_component.toml create mode 100644 share/openPMD/json_schema/particle_patches.toml create mode 100644 share/openPMD/json_schema/particle_species.toml create mode 100644 share/openPMD/json_schema/patch_record.toml create mode 100644 share/openPMD/json_schema/record.toml create mode 100644 share/openPMD/json_schema/record_component.toml create mode 100644 share/openPMD/json_schema/series.toml diff --git a/share/openPMD/json_schema/attribute_defs.toml b/share/openPMD/json_schema/attribute_defs.toml new file mode 100644 index 0000000000..d8d3d19c0f --- /dev/null +++ b/share/openPMD/json_schema/attribute_defs.toml @@ -0,0 +1,184 @@ + +["$defs"] + +###################### +# Vectors of strings # +###################### + +["$defs".vec_string_attribute] +required = ["value", "datatype"] + +["$defs".vec_string_attribute.properties] + +value.any_of = [ + { type = "string" }, + { type = "array", items = { "type" = "string" } }, +] + +datatype.enum = [ + "STRING", + "CHAR", + "SCHAR", + "UCHAR", + "VEC_STRING", + "VEC_CHAR", + "VEC_SCHAR", + "VEC_UCHAR", +] + +################## +# Vectors of int # +################## + +["$defs".vec_int_attribute] +required = ["value", "datatype"] + +["$defs".vec_int_attribute.properties] + +value.any_of = [ + { type = "integer" }, + { type = "array", items = { "type" = "integer" } }, +] + +datatype.enum = [ + "SHORT", + "INT", + "LONG", + "LONGLONG", + "USHORT", + "UINT", + "ULONG", + "ULONGLONG", + "VEC_SHORT", + "VEC_INT", + "VEC_LONG", + "VEC_LONGLONG", + "VEC_USHORT", + "VEC_UINT", + "VEC_ULONG", + "VEC_ULONGLONG", +] + +#################### +# Vectors of float # +#################### + +["$defs".vec_float_attribute] +required = ["value", "datatype"] + +["$defs".vec_float_attribute.properties] + +value.any_of = [ + { type = "number" }, + { type = "array", items = { "type" = "number" } }, +] + +datatype.enum = [ + "CHAR", + "UCHAR", + "SCHAR", + "SHORT", + "INT", + "LONG", + "LONGLONG", + "USHORT", + "UINT", + "ULONG", + "ULONGLONG", + "FLOAT", + "DOUBLE", + "LONG_DOUBLE", + "CFLOAT", + "CDOUBLE", + "CLONG_DOUBLE", + "VEC_SHORT", + "VEC_INT", + "VEC_LONG", + "VEC_LONGLONG", + "VEC_USHORT", + "VEC_UINT", + "VEC_ULONG", + "VEC_ULONGLONG", + "VEC_FLOAT", + "VEC_DOUBLE", + "VEC_LONG_DOUBLE", + "VEC_CFLOAT", + "VEC_CDOUBLE", + "VEC_CLONG_DOUBLE", +] + +########################### +# Special case: # +# unitDimension attribute # +########################### + +["$defs".unitDimension] +required = ["value", "datatype"] + +["$defs".unitDimension.properties] + +value = { type = "array", items = { type = "number" } } +datatype.const = "ARR_DBL_7" + +##################### +# string attributes # +##################### + +["$defs".string_attribute] +required = ["value", "datatype"] + +["$defs".string_attribute.properties] + +value.type = "string" +datatype.enum = ["STRING", "CHAR", "SCHAR", "UCHAR"] + +################## +# int attributes # +################## + +["$defs".int_attribute] +required = ["value", "datatype"] + +["$defs".int_attribute.properties] + +value.type = "integer" +datatype.enum = [ + "SHORT", + "INT", + "LONG", + "LONGLONG", + "USHORT", + "UINT", + "ULONG", + "ULONGLONG", +] + +#################### +# float attributes # +#################### + +["$defs".float_attribute] +required = ["value", "datatype"] + +["$defs".float_attribute.properties] + +value.type = "number" +datatype.enum = [ + "CHAR", + "UCHAR", + "SCHAR", + "SHORT", + "INT", + "LONG", + "LONGLONG", + "USHORT", + "UINT", + "ULONG", + "ULONGLONG", + "FLOAT", + "DOUBLE", + "LONG_DOUBLE", + "CFLOAT", + "CDOUBLE", + "CLONG_DOUBLE", +] diff --git a/share/openPMD/json_schema/attributes.toml b/share/openPMD/json_schema/attributes.toml new file mode 100644 index 0000000000..8137554a91 --- /dev/null +++ b/share/openPMD/json_schema/attributes.toml @@ -0,0 +1,74 @@ +title = "Attribute layout" + +[[oneOf]] +type = "null" +title = "No attributes" + +[[oneOf]] +type = "object" +title = "Dictionary of attributes" +description = "Generic layout of an attributes object." + +[oneOf.patternProperties.".*"] +title = "A generic attribute" +type = "object" + +[oneOf.patternProperties.".*".properties] + +value.anyOf = [ + # Any primitive value + { not = { anyOf = [ + { type = "object", title = "An object" }, + { type = "array", title = "An array" }, + ] }, title = "No complex type" }, + # Or an array of any primitive value + { type = "array", items = { not = { anyOf = [ + { type = "object", title = "An object" }, + { type = "array", title = "An array" }, + ] } }, title = "An array of non-complex types" }, +] + +datatype.type = "string" +datatype.enum = [ + "CHAR", + "UCHAR", + "SCHAR", + "SHORT", + "INT", + "LONG", + "LONGLONG", + "USHORT", + "UINT", + "ULONG", + "ULONGLONG", + "FLOAT", + "DOUBLE", + "LONG_DOUBLE", + "CFLOAT", + "CDOUBLE", + "CLONG_DOUBLE", + "STRING", + "VEC_CHAR", + "VEC_SHORT", + "VEC_INT", + "VEC_LONG", + "VEC_LONGLONG", + "VEC_UCHAR", + "VEC_USHORT", + "VEC_UINT", + "VEC_ULONG", + "VEC_ULONGLONG", + "VEC_FLOAT", + "VEC_DOUBLE", + "VEC_LONG_DOUBLE", + "VEC_CFLOAT", + "VEC_CDOUBLE", + "VEC_CLONG_DOUBLE", + "VEC_SCHAR", + "VEC_STRING", + "ARR_DBL_7", + "BOOL", +] + +[oneOf.propertyNames] +pattern = "^\\w*$" diff --git a/share/openPMD/json_schema/dataset_defs.toml b/share/openPMD/json_schema/dataset_defs.toml new file mode 100644 index 0000000000..b2d710acf2 --- /dev/null +++ b/share/openPMD/json_schema/dataset_defs.toml @@ -0,0 +1,79 @@ +["$defs"] + +###################################### +# n-dimensional datasets of any type # +###################################### + +[["$defs".any_type_recursive_array.anyOf]] +title = "A numeric type" +type = "array" +items.anyOf = [{"type" = "number"}, {"type" = "null"}] + +[["$defs".any_type_recursive_array.anyOf]] +title = "A recursive array of numeric types" +type = "array" +items."$ref" = "#/$defs/any_type_recursive_array" + +["$defs".any_type_dataset] +title = "A dataset of any numeric type" +required = ["data", "datatype"] + +["$defs".any_type_dataset.properties] + +datatype.enum = [ + "CHAR", + "UCHAR", + "SCHAR", + "SHORT", + "INT", + "LONG", + "LONGLONG", + "USHORT", + "UINT", + "ULONG", + "ULONGLONG", + "FLOAT", + "DOUBLE", + "LONG_DOUBLE", + "CFLOAT", + "CDOUBLE", + "CLONG_DOUBLE", + "BOOL", +] +data."$ref" = "#/$defs/any_type_recursive_array" + +###################################### +# n-dimensional datasets of int type # +###################################### + +[["$defs".int_type_recursive_array.anyOf]] +title = "An integer type" +type = "array" +items.anyOf = [{"type" = "integer"}, {"type" = "null"}] + +[["$defs".int_type_recursive_array.anyOf]] +title = "A recursive array of integer types" +type = "array" +items."$ref" = "#/$defs/int_type_recursive_array" + +["$defs".int_type_dataset] +title = "A dataset of integer type" +required = ["data", "datatype"] + +["$defs".int_type_dataset.properties] + +datatype.enum = [ + "CHAR", + "UCHAR", + "SCHAR", + "SHORT", + "INT", + "LONG", + "LONGLONG", + "USHORT", + "UINT", + "ULONG", + "ULONGLONG", + "BOOL", +] +data."$ref" = "#/$defs/int_type_recursive_array" diff --git a/share/openPMD/json_schema/iteration.toml b/share/openPMD/json_schema/iteration.toml new file mode 100644 index 0000000000..31a6c74417 --- /dev/null +++ b/share/openPMD/json_schema/iteration.toml @@ -0,0 +1,77 @@ +type = "object" +required = ["attributes"] +title = "Iteration" +description = "One iteration/snapshot." + +[properties] + +################# +# Particle data # +################# + +[properties.particles] +type = "object" +title = "Particles" +description = "Dict of particle species types." + +[properties.particles.properties.attributes] +title = "Attribute layout" +description = "Custom attributes allowed, no required attributes defined." +"$ref" = "attributes.json" + +##################################### +# Particle data -> Particle Species # +##################################### + +[properties.particles.patternProperties."^(?!attributes).*"] +title = "Particle Species" +"$ref" = "particle_species.json" + +############# +# Mesh data # +############# + +[properties.meshes] +type = "object" +title = "Meshes" +description = "Dict of meshes." + +[properties.meshes.properties.attributes] +title = "Attribute layout" +description = "Custom attributes allowed, no required attributes defined." +"$ref" = "attributes.json" + + +########################### +# Mesh data -> Mesh types # +########################### + +[properties.meshes.patternProperties."^(?!attributes).*"] +title = "Mesh" +"$ref" = "mesh.json" + +######################## +# Iteration attributes # +######################## + + +[properties.attributes] +title = "Attributes" + +# First requirement: standard-defined attributes + +[[properties.attributes.allOf]] +required = ["dt", "time", "timeUnitSI"] +title = "Iteration attributes" +description = "Standard-defined attributes at the Iteration level." + +[properties.attributes.allOf.properties] +timeUnitSI."$ref" = "attribute_defs.json#/$defs/float_attribute" +time."$ref" = "attribute_defs.json#/$defs/float_attribute" +dt."$ref" = "attribute_defs.json#/$defs/float_attribute" + +# Second condition: General layout of attributes + +[[properties.attributes.allOf]] +title = "Attribute layout" +"$ref" = "attributes.json" diff --git a/share/openPMD/json_schema/mesh.toml b/share/openPMD/json_schema/mesh.toml new file mode 100644 index 0000000000..f7c82cec9a --- /dev/null +++ b/share/openPMD/json_schema/mesh.toml @@ -0,0 +1,85 @@ +######################################################### +# Requirement 1: Mesh-specific structure and attributes # +######################################################### + +[[allOf]] + +type = "object" +required = ["attributes"] +title = "Mesh" +description = "A mesh/grid of cells." + +[allOf.properties.attributes] +title = "Attributes" + +#################################### +# Requirement 1.1: Mesh attributes # +#################################### + +[[allOf.properties.attributes.allOf]] + +title = "Mesh attributes" +description = "Standard-defined attributes at the Mesh level." +required = [ + "axisLabels", + "geometry", + "gridGlobalOffset", + "gridSpacing", + "gridUnitSI", + "timeOffset", + "unitDimension", +] + +[allOf.properties.attributes.allOf.properties] + +gridUnitSI."$ref" = "attribute_defs.json#/$defs/float_attribute" +gridSpacing."$ref" = "attribute_defs.json#/$defs/vec_float_attribute" +gridGlobalOffset."$ref" = "attribute_defs.json#/$defs/vec_float_attribute" +timeOffset."$ref" = "attribute_defs.json#/$defs/float_attribute" +geometryParameters."$ref" = "attribute_defs.json#/$defs/string_attribute" +unitDimension."$ref" = "attribute_defs.json#/$defs/unitDimension" +geometry."$ref" = "attribute_defs.json#/$defs/string_attribute" +dataOrder."$ref" = "attribute_defs.json#/$defs/string_attribute" +axisLabels."$ref" = "attribute_defs.json#/$defs/vec_string_attribute" + +################################################ +# Requirement 1.2: Generic attribute structure # +################################################ + +[[allOf.properties.attributes.allOf]] +title = "Attribute layout" +"$ref" = "attributes.json" + +###################################### +# Requirement 2: Contains components # +###################################### + +[[allOf]] + +title = "Contains components" + +#################################################### +# Requirement 2.1: Either this is a scalar mesh... # +#################################################### + +[[allOf.oneOf]] +title = "Scalar mesh component" +"$ref" = "mesh_record_component.json" + +################################################# +# Requirement 2.2: ... or it's a vector mesh. # +# Note that exactly one of these two conditions # +# must be true, not both at once (oneOf). # +################################################# + +[[allOf.oneOf]] +title = "Vector component" +description = "Additionally to the attributes, at least one component must be contained" +# The attributes are contained in this dict, and at least one further +# non-scalar component. Hence, we require at least two entries. +minProperties = 2 +propertyNames.pattern = "^\\w*$" + +[allOf.oneOf.patternProperties."^(?!attributes).*"] +title = "Vector mesh component" +"$ref" = "mesh_record_component.json" diff --git a/share/openPMD/json_schema/mesh_record_component.toml b/share/openPMD/json_schema/mesh_record_component.toml new file mode 100644 index 0000000000..6622ea2335 --- /dev/null +++ b/share/openPMD/json_schema/mesh_record_component.toml @@ -0,0 +1,27 @@ +title = "Mesh Record Component" + +############################################# +# Requirement 1: This is a record component # +############################################# + +[[allOf]] +title = "Record Component" +"$ref" = "record_component.json" + +################################################## +# Requirement 2: Mesh Record Component Specifics # +################################################## + +[[allOf]] +title = "Mesh Record Component" +description = "Single component in a mesh record." +type = "object" +required = ["attributes"] + +[allOf.properties.attributes] +required = ["position"] +title = "Record Component attributes" +description = "Standard-defined attributes at the Record Component level." + +[allOf.properties.attributes.properties] +position."$ref" = "attribute_defs.json#/$defs/vec_float_attribute" diff --git a/share/openPMD/json_schema/particle_patches.toml b/share/openPMD/json_schema/particle_patches.toml new file mode 100644 index 0000000000..0144acb5bd --- /dev/null +++ b/share/openPMD/json_schema/particle_patches.toml @@ -0,0 +1,48 @@ +type = "object" +title = "Particle Patches" +description = "Recommended group for post-processing. It logically orders the 1D arrays of attributes into local patches of particles that can be read and processed in parallel." + +required = ["numParticles", "numParticlesOffset", "offset", "extent"] + +################################ +# General layout of attributes # +################################ + +[properties.attributes] +title = "Attribute layout" +description = "Custom attributes allowed, no required attributes defined." +"$ref" = "attributes.json" + +####################################################### +# Definition of particle patches via # +# numParticles, numParticlesOffset, offset and extent # +####################################################### + +[properties.numParticles] +title = "numParticles" +description = "number of particles in this patch" +allOf = [ + { "$ref" = "record_component.json" }, + { "$ref" = "dataset_defs.json#/$defs/int_type_dataset" }, +] + + +[properties.numParticlesOffset] +title = "numParticles" +description = "offset within the one-dimensional records of the particle species where the first particle in this patch is stored" +allOf = [ + { "$ref" = "record_component.json" }, + { "$ref" = "dataset_defs.json#/$defs/int_type_dataset" }, +] + + +[properties.offset] +title = "Offset" +description = "absolute position (position + positionOffset as defined above) where the particle patch begins: defines the (inclusive) lower bound with positions that are associated with the patch; the same requirements as for regular record components apply" +"$ref" = "patch_record.json" + + +[properties.extent] +title = "Extent" +description = "extent of the particle patch; the offset + extent must be larger than the maximum absolute position of particles in the patch as the exact upper bound of position offset + extent is excluded from the patch; the same requirements as for regular record components apply" +"$ref" = "patch_record.json" diff --git a/share/openPMD/json_schema/particle_species.toml b/share/openPMD/json_schema/particle_species.toml new file mode 100644 index 0000000000..43dc33444f --- /dev/null +++ b/share/openPMD/json_schema/particle_species.toml @@ -0,0 +1,40 @@ +type = "object" +title = "Particle Species" +description = "Dict of particle quantities." +propertyNames.pattern = "^\\w*$" + +# A particle species requires at least a "position" record +required = ["position"] + +[properties.attributes] + +################################################## +# First requirement: standard-defined attributes # +################################################## + +[[properties.attributes.allOf]] +title = "Particle Species attributes" +description = "Standard-defined attributes at the Particle Species level." +# No required attributes +required = [] + +[properties.attributes.allOf.properties] +id."$ref" = "attribute_defs.json#/$defs/int_attribute" + +#################################################### +# Second requirement: General layout of attributes # +#################################################### + +[[properties.attributes.allOf]] +title = "Attribute layout" +description = "Custom attributes allowed, no required attributes defined." +"$ref" = "attributes.json" + +[properties.particlePatches] +title = "Particle Patches" +"$ref" = "particle_patches.json" + + +[patternProperties."^(?!(attributes|particlePatches)).*"] +title = "Record" +"$ref" = "record.json" diff --git a/share/openPMD/json_schema/patch_record.toml b/share/openPMD/json_schema/patch_record.toml new file mode 100644 index 0000000000..1494410c57 --- /dev/null +++ b/share/openPMD/json_schema/patch_record.toml @@ -0,0 +1,37 @@ +######################################################### +# Requirement 1: Mesh-specific structure and attributes # +######################################################### + +[[allOf]] + +type = "object" +required = ["attributes"] +title = "Record" +description = "A list of particle quantities." + +############################################## +# Requirement 1: Generic attribute structure # +############################################## + + +[allOf.properties.attributes] +title = "Attribute layout" +"$ref" = "attributes.json" + +####################################################### +# Requirement 2: Contains components of a vector mesh # +####################################################### + +[[allOf]] +title = "Contains vector components" +description = "Additionally to the attributes, at least one component must be contained" +# The attributes are contained in this dict, and at least one further +# non-scalar component. Hence, we require at least two entries. +minProperties = 2 +propertyNames.pattern = "^\\w*$" + +[allOf.patternProperties] + +[allOf.patternProperties."^(?!attributes).*"] +title = "Scalar component" +"$ref" = "record_component.json" diff --git a/share/openPMD/json_schema/record.toml b/share/openPMD/json_schema/record.toml new file mode 100644 index 0000000000..60209fd5a2 --- /dev/null +++ b/share/openPMD/json_schema/record.toml @@ -0,0 +1,72 @@ +######################################################### +# Requirement 1: Mesh-specific structure and attributes # +######################################################### + +[[allOf]] + +type = "object" +required = ["attributes"] +title = "Record" +description = "A list of particle quantities." + +[allOf.properties.attributes] +title = "Attributes" + +#################################### +# Requirement 1.1: Mesh attributes # +#################################### + +[[allOf.properties.attributes.allOf]] + +title = "Particle Record attributes" +description = "Standard-defined attributes at the Particle Record level." +required = ["timeOffset", "unitDimension"] + +[allOf.properties.attributes.allOf.properties] + +timeOffset."$ref" = "attribute_defs.json#/$defs/float_attribute" +unitDimension."$ref" = "attribute_defs.json#/$defs/unitDimension" +dataOrder."$ref" = "attribute_defs.json#/$defs/string_attribute" + +################################################ +# Requirement 1.2: Generic attribute structure # +################################################ + +[[allOf.properties.attributes.allOf]] +title = "Attribute layout" +"$ref" = "attributes.json" + +###################################### +# Requirement 2: Contains components # +###################################### + +[[allOf]] +title = "Contains components" + +#################################################### +# Requirement 2.1: Either this is a scalar mesh... # +#################################################### + +[[allOf.oneOf]] +title = "Scalar component" +"$ref" = "record_component.json" + +################################################# +# Requirement 2.2: ... or it's a vector mesh. # +# Note that exactly one of these two conditions # +# must be true, not both at once (oneOf). # +################################################# + +[[allOf.oneOf]] +title = "Vector component" +description = "Additionally to the attributes, at least one component must be contained" +# The attributes are contained in this dict, and at least one further +# non-scalar component. Hence, we require at least two entries. +minProperties = 2 +propertyNames.pattern = "^\\w*$" + +[allOf.oneOf.patternProperties] + +[allOf.oneOf.patternProperties."^(?!attributes).*"] +title = "Scalar component" +"$ref" = "record_component.json" diff --git a/share/openPMD/json_schema/record_component.toml b/share/openPMD/json_schema/record_component.toml new file mode 100644 index 0000000000..9f81908b68 --- /dev/null +++ b/share/openPMD/json_schema/record_component.toml @@ -0,0 +1,66 @@ +############################################## +# Requirement 1: Record Component attributes # +############################################## + +[[allOf]] +title = "Record Component" +description = "Single component in a record." +type = "object" +required = ["attributes"] + +[allOf.properties.attributes] +title = "Attributes" + +################################################ +# Requirement 1.1: Standard-defined attributes # +################################################ + +[[allOf.properties.attributes.allOf]] +title = "Record Component attributes" +description = "Standard-defined attributes at the Record Component level." +required = ["unitSI"] + +[allOf.properties.attributes.allOf.properties] +unitSI."$ref" = "attribute_defs.json#/$defs/float_attribute" + +############################################# +# Requirement 1.2: Generic attribute layout # +############################################# + +[[allOf.properties.attributes.allOf]] +title = "Attribute layout" +"$ref" = "attributes.json" + +########################################### +# Requirement 2: Either array or constant # +########################################### + +[[allOf]] +title = "Either array or constant" + +##################### +# Option 2.1: Array # +##################### + +[[allOf.oneOf]] +required = ["data", "datatype"] +description = "An n-dimensional dataset containing the payload." +title = "Array dataset" + +"$ref" = "dataset_defs.json#/$defs/any_type_dataset" + +######################## +# Option 2.2: Constant # +######################## + +[[allOf.oneOf]] +title = "Constant dataset" +description = "A dataset represented by two attributes: The constant value and its shape." +required = ["attributes"] + +[allOf.oneOf.properties.attributes] +required = ["shape", "value"] + +[allOf.oneOf.properties.attributes.properties] +value."$ref" = "attribute_defs.json#/$defs/float_attribute" +shape."$ref" = "attribute_defs.json#/$defs/vec_int_attribute" diff --git a/share/openPMD/json_schema/series.toml b/share/openPMD/json_schema/series.toml new file mode 100644 index 0000000000..46c57c7153 --- /dev/null +++ b/share/openPMD/json_schema/series.toml @@ -0,0 +1,118 @@ +type = "object" +required = ["attributes", "data"] +title = "Series" +description = "The root group in the hierarchical openPMD standard." + +######################################################### +# Requirement 1: Basic layout of the root path (Series) # +######################################################### + +[[allOf]] +title = "Basic Series layout" + +##################### +# Series attributes # +##################### + +[allOf.properties.attributes] +title = "Attributes" + +################################################ +# Requirement 1.1: standard-defined attributes # +################################################ + +[[allOf.properties.attributes.allOf]] +required = [ + "openPMD", + "openPMDextension", + "basePath", + "iterationEncoding", + "iterationFormat", +] +title = "Series attributes" +description = "Standard-defined attributes at the Series level." + +[allOf.properties.attributes.allOf.properties] + +author."$ref" = "attribute_defs.json#/$defs/string_attribute" +comment."$ref" = "attribute_defs.json#/$defs/string_attribute" +date."$ref" = "attribute_defs.json#/$defs/string_attribute" +openPMD."$ref" = "attribute_defs.json#/$defs/string_attribute" +iterationEncoding."$ref" = "attribute_defs.json#/$defs/string_attribute" +softwareVersion."$ref" = "attribute_defs.json#/$defs/string_attribute" +basePath."$ref" = "attribute_defs.json#/$defs/string_attribute" +iterationFormat."$ref" = "attribute_defs.json#/$defs/string_attribute" +openPMDextension."$ref" = "attribute_defs.json#/$defs/int_attribute" +software."$ref" = "attribute_defs.json#/$defs/string_attribute" +machine."$ref" = "attribute_defs.json#/$defs/string_attribute" +softwareDependencies."$ref" = "attribute_defs.json#/$defs/string_attribute" + +meshesPath.description = "Note that the meshesPath is hardcoded as its semantics are impossible to model in a JSON schema." +meshesPath.const.value = "meshes/" +meshesPath.const.datatype = "STRING" + +particlesPath.description = "Note that the particlesPath is hardcoded as its semantics are impossible to model in a JSON schema." +particlesPath.const.value = "particles/" +particlesPath.const.datatype = "STRING" + +################################################# +# Requirement 1.2: General layout of attributes # +################################################# + +[[allOf.properties.attributes.allOf]] +title = "Attribute layout" +"$ref" = "attributes.json" + +################################################# +# Requirement 2: The Series contains iterations # +################################################# + +[[allOf]] +title = "Contains iterations" + +######################################################################### +# Requirement 2.1: Either a single iteration in variable-based encoding # +######################################################################### + +[[allOf.oneOf]] +title = "Variable-based encoding" +properties.attributes.properties.iterationEncoding.properties.value = { const = "variableBased" } + +[allOf.oneOf.properties.data] +type = "object" +title = "An iteration" +description = "A single iteration." +"$ref" = "iteration.json" + +############################################################################ +# Requirement 2.2: Or multiple iterations in group- or file-based encoding # +############################################################################ + +[[allOf.oneOf]] +title = "Group-based (or file-based) encoding" +properties.attributes.properties.iterationEncoding.properties.value = { oneOf = [ + { const = "groupBased" }, + { const = "fileBased" }, +] } + +# Base Path + +[allOf.oneOf.properties.data] +type = "object" +title = "Base path" +description = "A map of all iterations/snapshots in the Series." + +propertyNames.pattern = "^(-?[0-9]*|attributes)$" + +[allOf.oneOf.properties.data.properties] + +[allOf.oneOf.properties.data.properties.attributes] +title = "Attribute layout" +description = "Custom attributes allowed, no required attributes defined." +"$ref" = "attributes.json" + +# Base Path -> Iterations + +[allOf.oneOf.properties.data.patternProperties."^-?[0-9]*$"] +title = "Iteration" +"$ref" = "iteration.json" From a86026969f141d86f0352e1c75425ffe5ea05033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 26 Jun 2023 16:27:43 +0200 Subject: [PATCH 02/23] Add convert-json-toml tool Needed for "compiling" the schema to JSON Also add a Makefile to further simplify this --- CMakeLists.txt | 6 +- include/openPMD/auxiliary/JSON_internal.hpp | 15 ++++- share/openPMD/json_schema/Makefile | 13 ++++ src/auxiliary/JSON.cpp | 38 +++++++++--- src/cli/convert-json-toml.cpp | 67 +++++++++++++++++++++ 5 files changed, 126 insertions(+), 13 deletions(-) create mode 100644 share/openPMD/json_schema/Makefile create mode 100644 src/cli/convert-json-toml.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 32007025a6..10fd236d40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -685,11 +685,12 @@ set(openPMD_TEST_NAMES # command line tools set(openPMD_CLI_TOOL_NAMES ls + convert-json-toml ) set(openPMD_PYTHON_CLI_TOOL_NAMES pipe ) -set(openPMD_PYTHON_CLI_MODULE_NAMES ${openPMD_CLI_TOOL_NAMES}) +set(openPMD_PYTHON_CLI_MODULE_NAMES ls) # examples set(openPMD_EXAMPLE_NAMES 1_structure @@ -894,6 +895,9 @@ if(openPMD_BUILD_CLI_TOOLS) endif() target_link_libraries(openpmd-${toolname} PRIVATE openPMD) + target_include_directories(openpmd-${toolname} SYSTEM PRIVATE + $ + $) endforeach() endif() diff --git a/include/openPMD/auxiliary/JSON_internal.hpp b/include/openPMD/auxiliary/JSON_internal.hpp index c608bd7f9f..8096009f9e 100644 --- a/include/openPMD/auxiliary/JSON_internal.hpp +++ b/include/openPMD/auxiliary/JSON_internal.hpp @@ -219,16 +219,25 @@ namespace json * @param options as a parsed JSON object. * @param considerFiles If yes, check if `options` refers to a file and read * from there. + * @param convertLowercase If yes, lowercase conversion is applied + * recursively to keys and values, except for some hardcoded places + * that should be left untouched. */ - ParsedConfig parseOptions(std::string const &options, bool considerFiles); + ParsedConfig parseOptions( + std::string const &options, + bool considerFiles, + bool convertLowercase = true); #if openPMD_HAVE_MPI /** * Parallel version of parseOptions(). MPI-collective. */ - ParsedConfig - parseOptions(std::string const &options, MPI_Comm comm, bool considerFiles); + ParsedConfig parseOptions( + std::string const &options, + MPI_Comm comm, + bool considerFiles, + bool convertLowercase = true); #endif diff --git a/share/openPMD/json_schema/Makefile b/share/openPMD/json_schema/Makefile new file mode 100644 index 0000000000..dcbc1584d3 --- /dev/null +++ b/share/openPMD/json_schema/Makefile @@ -0,0 +1,13 @@ +convert := openpmd-convert-json-toml + +json_files = attribute_defs.json attributes.json dataset_defs.json iteration.json mesh.json mesh_record_component.json particle_patches.json particle_species.json patch_record.json record.json record_component.json series.json + +.PHONY: all +all: $(json_files) + +$(json_files): %.json: %.toml + $(convert) @$^ > $@ + +.PHONY: clean +clean: + -rm $(json_files) diff --git a/src/auxiliary/JSON.cpp b/src/auxiliary/JSON.cpp index 84e7006cbb..e5c885a9f1 100644 --- a/src/auxiliary/JSON.cpp +++ b/src/auxiliary/JSON.cpp @@ -386,7 +386,8 @@ toml::value jsonToToml(nlohmann::json const &val) namespace { - ParsedConfig parseInlineOptions(std::string const &options) + ParsedConfig + parseInlineOptions(std::string const &options, bool convertLowercase) { // speed up default options ParsedConfig res; @@ -422,12 +423,16 @@ namespace res.config = json::tomlToJson(tomlVal); res.originallySpecifiedAs = SupportedLanguages::TOML; } - lowerCase(res.config); + if (convertLowercase) + { + lowerCase(res.config); + } return res; } } // namespace -ParsedConfig parseOptions(std::string const &options, bool considerFiles) +ParsedConfig parseOptions( + std::string const &options, bool considerFiles, bool convertLowercase) { if (considerFiles) { @@ -437,6 +442,12 @@ ParsedConfig parseOptions(std::string const &options, bool considerFiles) std::fstream handle; handle.open( filename.value(), std::ios_base::binary | std::ios_base::in); + if (!handle.good()) + { + throw std::runtime_error( + "Failed opening '" + filename.value() + + "': " + strerror(errno)); + } ParsedConfig res; if (auxiliary::ends_with(filename.value(), ".toml")) { @@ -456,16 +467,22 @@ ParsedConfig parseOptions(std::string const &options, bool considerFiles) "Failed reading JSON config from file " + filename.value() + "."); } - lowerCase(res.config); + if (convertLowercase) + { + lowerCase(res.config); + } return res; } } - return parseInlineOptions(options); + return parseInlineOptions(options, convertLowercase); } #if openPMD_HAVE_MPI -ParsedConfig -parseOptions(std::string const &options, MPI_Comm comm, bool considerFiles) +ParsedConfig parseOptions( + std::string const &options, + MPI_Comm comm, + bool considerFiles, + bool convertLowercase) { if (considerFiles) { @@ -489,11 +506,14 @@ parseOptions(std::string const &options, MPI_Comm comm, bool considerFiles) res.config = nlohmann::json::parse(fileContent); res.originallySpecifiedAs = SupportedLanguages::JSON; } - lowerCase(res.config); + if (convertLowercase) + { + lowerCase(res.config); + } return res; } } - return parseInlineOptions(options); + return parseInlineOptions(options, convertLowercase); } #endif diff --git a/src/cli/convert-json-toml.cpp b/src/cli/convert-json-toml.cpp new file mode 100644 index 0000000000..3abb6127f8 --- /dev/null +++ b/src/cli/convert-json-toml.cpp @@ -0,0 +1,67 @@ +#include +#include +#include + +#include +#include + +namespace json = openPMD::json; + +void parsed_main(std::string jsonOrToml) +{ + auto [config, originallySpecifiedAs] = json::parseOptions( + jsonOrToml, /* considerFiles = */ true, /* convertLowercase = */ false); + { + [[maybe_unused]] auto _ = std::move(jsonOrToml); + } + switch (originallySpecifiedAs) + { + using SL = json::SupportedLanguages; + case SL::JSON: { + auto asToml = json::jsonToToml(config); + std::cout << asToml; + } + break; + case SL::TOML: + std::cout << config << '\n'; + break; + } +} + +int main(int argc, char const **argv) +{ + std::string jsonOrToml; + switch (argc) + { + case 0: + case 1: + // Just read the whole stream into memory + // Not very elegant, but we'll hold the entire JSON/TOML dataset + // in memory at some point anyway, so it doesn't really matter + std::cin >> jsonOrToml; + break; + case 2: + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) + { + std::cout << "Usage: " << std::string(argv[0]) << R"( [json_or_toml] +'json_or_toml' can be a JSON or TOML dataset specified inline or a reference +to a file prepended by an '@'. +Inline datasets will be interpreted as JSON if they start with an '{', as TOML +otherwise. Datasets from a file will be interpreted as JSON or TOML depending +on the file ending '.json' or '.toml' respectively. +Inline dataset specifications can be replaced by input read from stdin. + +If the input is JSON, then it will be converted to TOML and written to stdout, +equivalently from TOML to JSON. +)"; + exit(0); + } + jsonOrToml = argv[1]; + break; + default: + throw std::runtime_error( + std::string("Usage: ") + argv[0] + + " [file location or inline JSON/TOML]"); + } + parsed_main(std::move(jsonOrToml)); +} From 4c957b4f7b40d52b7c90834cce91b44a4406ac9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 26 Jun 2023 16:29:54 +0200 Subject: [PATCH 03/23] Add script for checking openPMD file against the schema Workflow documented in README.md --- share/openPMD/json_schema/README.md | 47 +++++++++++++++++++ share/openPMD/json_schema/check.py | 70 +++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 share/openPMD/json_schema/README.md create mode 100755 share/openPMD/json_schema/check.py diff --git a/share/openPMD/json_schema/README.md b/share/openPMD/json_schema/README.md new file mode 100644 index 0000000000..ae9c641e77 --- /dev/null +++ b/share/openPMD/json_schema/README.md @@ -0,0 +1,47 @@ +# JSON Validation + +This folder contains a JSON schema for validation of openPMD files written as `.json` files. + +## Usage + +### Generating the JSON schema + +For improved readability, maintainability and documentation purposes, the JSON schema is written in `.toml` format and needs to be "compiled" to `.json` files first before usage. +To do this, the openPMD-api installs a tool named `openpmd-convert-json-toml` which can be used to convert between JSON and TOML files in both directions, e.g.: + +```bash +openpmd_convert-json-toml @series.toml > series.json +``` + +A `Makefile` is provided in this folder to simplify the application of this conversion tool. + +### Verifying a file against the JSON schema + +In theory, the JSON schema should be applicable by any JSON validator. This JSON schema is written in terms of multiple files however, and most validators require special care to properly set up the links between the single files. A Python script `check.py` is provided in this folder which sets up the [Python jsonschema](https://python-jsonschema.readthedocs.io) library and verifies a file against it, e.g.: + +```bash +./check.py path/to/my/dataset.json +``` + +For further usage notes check the documentation of the script itself `./check.py --help`. + +## Caveats + +The openPMD standard is not entirely expressible in terms of a JSON schema: + +* Many semantic dependencies, e.g. that the `position/x` and `position/y` vector of a particle species be of the same size, or that the `axisLabels` have the same dimensionality as the dataset itself, will go unchecked. +* The `meshesPath` is assumed to be `meshes/` and the `particlesPath` is assumed to be `particles/`. This dependency cannot be expressed. + +While a large part of the openPMD standard can indeed be verified by checking against a JSON schema, the standard is generally large enough to make this approach come to its limits. Verification of a JSON schema is similar to the use of a naive recursive-descent parser. Error messages will often be unexpectedly verbose and not very informative. +A challenge for the JSON validator are disjunctive statements such as "A Record is either a scalar Record Component or a vector of non-scalar Record Components". If there is even a tiny mistake somewhere down in the hierarchy, the entire disjunctive branch will fail evaluating. + +The layout of attributes is assumed to be that which is created by the JSON backend of the openPMD-api, e.g.: + +```json +"meshesPath": { + "datatype": "STRING", + "value": "meshes/" +} +``` + +Support for an abbreviated notation such as `"meshesPath": "meshes/"` is currently not (yet) available. diff --git a/share/openPMD/json_schema/check.py b/share/openPMD/json_schema/check.py new file mode 100755 index 0000000000..07f76b6ce0 --- /dev/null +++ b/share/openPMD/json_schema/check.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +import argparse +import json +import os +from pathlib import Path +import sys + +import jsonschema.validators + + +def parse_args(program_name): + script_path = Path(os.path.dirname(os.path.realpath(sys.argv[0]))) + parser = argparse.ArgumentParser( + # we need this for line breaks + formatter_class=argparse.RawDescriptionHelpFormatter, + description=""" +Check JSON files against the openPMD JSON schema. + +This tool validates an openPMD-formatted JSON file against the openPMD JSON +schema, using the jsonschema Python library as a backend. +Please use this script instead of the jsonschema directly since the openPMD +schema consists of several JSON files and this script ensures that +cross-referencing is set up correctly. + +Note that the JSON schema is shipped in form of .toml files for ease +of reading, maintenance and documentation. +In order to perform a check, the .toml files need to be converted to .json +first. +The openPMD-api install a tool openpmd-convert-json-toml for this purpose. +Additionally, there is a Makefile shipped in the same folder as this Python +script which can be directly applied to generate the JSON schema. + + +Examples: + {0} --help + {0} --schema_root={1} +""".format(os.path.basename(program_name), script_path / "series.json")) + + parser.add_argument( + '--schema_root', + default=script_path / 'series.json', + help="""\ +The .json file describing the root file of the schema to validate against. +""" + ) + parser.add_argument('openpmd_file', + metavar='file', + nargs=1, + help="The file which to validate.") + + return parser.parse_args() + + +args = parse_args(sys.argv[0]) + +path = Path(os.path.dirname(os.path.realpath(args.schema_root))) +resolver = jsonschema.validators.RefResolver( + base_uri=f"{path.as_uri()}/", + referrer=True, +) + +with open(args.openpmd_file[0], "r") as instance: + jsonschema.validate( + instance=json.load(instance), + schema={"$ref": "./series.json"}, + resolver=resolver, + ) + print("File {} was validated successfully against schema {}.".format( + instance.name, args.schema_root)) From a5109576f529d550d6f8f1c440662a5e6dff4ccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 26 Jun 2023 16:31:13 +0200 Subject: [PATCH 04/23] Don't use spaces in SerialIOTest attribute names The JSON schema verification package does not like that --- test/SerialIOTest.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index e4b3dead4f..881aced538 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -2437,8 +2437,8 @@ inline void bool_test(const std::string &backend) Access::CREATE, R"({"iteration_encoding": "variable_based"})"); - o.setAttribute("Bool attribute true", true); - o.setAttribute("Bool attribute false", false); + o.setAttribute("Bool_attribute_true", true); + o.setAttribute("Bool_attribute_false", false); } { Series o = @@ -2446,12 +2446,12 @@ inline void bool_test(const std::string &backend) auto attrs = o.attributes(); REQUIRE( - std::count(attrs.begin(), attrs.end(), "Bool attribute true") == 1); + std::count(attrs.begin(), attrs.end(), "Bool_attribute_true") == 1); REQUIRE( - std::count(attrs.begin(), attrs.end(), "Bool attribute false") == + std::count(attrs.begin(), attrs.end(), "Bool_attribute_false") == 1); - REQUIRE(o.getAttribute("Bool attribute true").get() == true); - REQUIRE(o.getAttribute("Bool attribute false").get() == false); + REQUIRE(o.getAttribute("Bool_attribute_true").get() == true); + REQUIRE(o.getAttribute("Bool_attribute_false").get() == false); } { Series list{"../samples/serial_bool." + backend, Access::READ_ONLY}; From facfa15a631892a06e90db5368c9a325aecd490b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 26 Jun 2023 16:33:12 +0200 Subject: [PATCH 05/23] Fix bugs detected by this verifier Both of the form "data not found in places where data was expected" --- src/Series.cpp | 44 ++++++++++++++++++++++++++++++++++++-------- test/CoreTest.cpp | 24 ++++++++++++++++++------ 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/src/Series.cpp b/src/Series.cpp index 142dfe4bf0..96a6ec9e70 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -1299,14 +1299,20 @@ void Series::flushFileBased( bool flushIOHandler) { auto &series = get(); - /* - * Iterations might have been present, but have been closed and deleted from - * internal structures. In this case, previous flushes were successful and - * the Series is now in written() state. - */ - if (end == begin && !written()) + if (end == begin && + /* + * At parsing time, this might happen since iterations might contain + * errors and be deleted. + */ + IOHandler()->m_seriesStatus != internal::SeriesStatus::Parsing && + /* + * Iterations might have been present, but have been closed and deleted + * from internal structures. In this case, previous flushes were + * successful and the Series is now in written() state. + */ + !written()) { - throw std::runtime_error( + throw error::WrongAPIUsage( "fileBased output can not be written with no iterations."); } @@ -1422,8 +1428,30 @@ void Series::flushGorVBased( internal::FlushParams const &flushParams, bool flushIOHandler) { - auto &series = get(); + if (iterationEncoding() == IterationEncoding::variableBased && + /* + * At parsing time, this might happen since iterations might contain + * errors and be deleted. + */ + IOHandler()->m_seriesStatus != internal::SeriesStatus::Parsing && + iterations.empty()) + { + /* + * Note: Unlike flushFileBased, it's ok if `begin == end` since this + * method may be called without an explicit iteration. + * But since in variable-based encoding the base path is the same as the + * path to the (currently active) iteration, there must be at least one + * iteration present since the openPMD standard requires mandatory + * attributes. + * In group-based encoding, any number of iterations might be included + * in the base path, in variable-based encoding there must be exactly + * one iteration currently active. + */ + throw error::WrongAPIUsage( + "variableBased output can not be written with no iterations."); + } + auto &series = get(); if (access::readOnly(IOHandler()->m_frontendAccess)) { for (auto it = begin; it != end; ++it) diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index f87be576bb..d3de988491 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -222,7 +222,8 @@ TEST_CASE("myPath", "[core]") REQUIRE( pathOf(scalarMesh) == vec_t{"data", "1234", "meshes", "e_chargeDensity"}); - auto scalarMeshComponent = scalarMesh[RecordComponent::SCALAR]; + auto scalarMeshComponent = scalarMesh[RecordComponent::SCALAR].resetDataset( + {Datatype::FLOAT, {10}}); REQUIRE( pathOf(scalarMeshComponent) == vec_t{"data", "1234", "meshes", "e_chargeDensity"}); @@ -230,7 +231,8 @@ TEST_CASE("myPath", "[core]") auto vectorMesh = iteration.meshes["E"]; REQUIRE(pathOf(vectorMesh) == vec_t{"data", "1234", "meshes", "E"}); - auto vectorMeshComponent = vectorMesh["x"]; + auto vectorMeshComponent = + vectorMesh["x"].resetDataset({Datatype::FLOAT, {10}}); REQUIRE( pathOf(vectorMeshComponent) == vec_t{"data", "1234", "meshes", "E", "x"}); @@ -245,7 +247,8 @@ TEST_CASE("myPath", "[core]") pathOf(speciesPosition) == vec_t{"data", "1234", "particles", "e", "position"}); - auto speciesPositionX = speciesPosition["x"]; + auto speciesPositionX = + speciesPosition["x"].resetDataset({Datatype::FLOAT, {10}}); REQUIRE( pathOf(speciesPositionX) == vec_t{"data", "1234", "particles", "e", "position", "x"}); @@ -256,7 +259,9 @@ TEST_CASE("myPath", "[core]") pathOf(speciesWeighting) == vec_t{"data", "1234", "particles", "e", "weighting"}); - auto speciesWeightingX = speciesWeighting[RecordComponent::SCALAR]; + auto speciesWeightingX = + speciesWeighting[RecordComponent::SCALAR].resetDataset( + {Datatype::FLOAT, {10}}); REQUIRE( pathOf(speciesWeightingX) == vec_t{"data", "1234", "particles", "e", "weighting"}); @@ -271,7 +276,7 @@ TEST_CASE("myPath", "[core]") pathOf(patchExtent) == vec_t{"data", "1234", "particles", "e", "particlePatches", "extent"}); - auto patchExtentX = patchExtent["x"]; + auto patchExtentX = patchExtent["x"].resetDataset({Datatype::INT, {10}}); REQUIRE( pathOf(patchExtentX) == vec_t{ @@ -295,7 +300,8 @@ TEST_CASE("myPath", "[core]") "numParticles"}); auto patchNumParticlesComponent = - patchNumParticles[RecordComponent::SCALAR]; + patchNumParticles[RecordComponent::SCALAR].resetDataset( + {Datatype::INT, {10}}); REQUIRE( pathOf(patchNumParticlesComponent) == vec_t{ @@ -305,6 +311,10 @@ TEST_CASE("myPath", "[core]") "e", "particlePatches", "numParticles"}); + + speciesE.particlePatches["offset"]["x"].resetDataset({Datatype::INT, {10}}); + speciesE.particlePatches["numParticlesOffset"][RecordComponent::SCALAR] + .resetDataset({Datatype::INT, {10}}); #endif } @@ -1119,6 +1129,7 @@ TEST_CASE("backend_via_json", "[core]") { Series series( "../samples/optionsViaJson", Access::CREATE, encodingVariableBased); + series.iterations[0]; // v-based encoding requires at least 1 iteration REQUIRE(series.backend() == "JSON"); REQUIRE(series.iterationEncoding() == IterationEncoding::variableBased); } @@ -1132,6 +1143,7 @@ TEST_CASE("backend_via_json", "[core]") "../samples/optionsViaJson.bp", Access::CREATE, encodingVariableBased); + series.iterations[0]; // v-based encoding requires at least 1 iteration REQUIRE(series.backend() == "JSON"); REQUIRE(series.iterationEncoding() == IterationEncoding::variableBased); } From 3749056b5aa2bc0e5b19e6cf37109b7a64e9c644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 26 Jun 2023 16:33:45 +0200 Subject: [PATCH 06/23] Add GitHub workflow Verify all JSON-openPMD files written by testing against the schema --- .github/workflows/linux.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 3a582aeaee..0980225d82 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -260,7 +260,7 @@ jobs: - name: Install run: | sudo apt-get update - sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas python3-h5py-mpi + sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas python3-h5py-mpi python3-pip # TODO ADIOS2 - name: Build env: {CXXFLAGS: -Werror, PKG_CONFIG_PATH: /usr/lib/x86_64-linux-gnu/pkgconfig} @@ -275,6 +275,22 @@ jobs: cmake --build build --parallel 4 ctest --test-dir build --output-on-failure + python3 -m pip install jsonschema + cd share/openPMD/json_schema + PATH="../../../build/bin:$PATH" make -j 2 + # We need to exclude the thetaMode example since that has a different + # meshesPath and the JSON schema needs to hardcode that. + find ../../../build/samples/ \ + ! -path '*thetaMode*' \ + ! -path '/*many_iterations/*' \ + ! -name 'profiling.json' \ + ! -name '*config.json' \ + -iname '*.json' \ + | while read i; do + echo "Checking $i" + ./check.py "$i" + done + musllinux_py10: runs-on: ubuntu-22.04 if: github.event.pull_request.draft == false From ba3f5b1bbffb837588d68729979beb65818177e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 7 Aug 2023 13:19:27 +0200 Subject: [PATCH 07/23] Shorthand attributes --- share/openPMD/json_schema/attribute_defs.toml | 86 +++++++++++++++---- share/openPMD/json_schema/attributes.toml | 21 ++++- share/openPMD/json_schema/series.toml | 12 ++- 3 files changed, 95 insertions(+), 24 deletions(-) diff --git a/share/openPMD/json_schema/attribute_defs.toml b/share/openPMD/json_schema/attribute_defs.toml index d8d3d19c0f..70f3b9ec16 100644 --- a/share/openPMD/json_schema/attribute_defs.toml +++ b/share/openPMD/json_schema/attribute_defs.toml @@ -5,12 +5,21 @@ # Vectors of strings # ###################### -["$defs".vec_string_attribute] +[["$defs".vec_string_attribute.oneOf]] +title = "Shorthand notation" +anyOf = [ + { type = "string" }, + { type = "array", items = { "type" = "string" } }, +] + +[["$defs".vec_string_attribute.oneOf]] +title = "Long notation" +type = "object" required = ["value", "datatype"] -["$defs".vec_string_attribute.properties] +["$defs".vec_string_attribute.oneOf.properties] -value.any_of = [ +value.anyOf = [ { type = "string" }, { type = "array", items = { "type" = "string" } }, ] @@ -30,12 +39,21 @@ datatype.enum = [ # Vectors of int # ################## -["$defs".vec_int_attribute] +[["$defs".vec_int_attribute.oneOf]] +title = "Shorthand notation" +anyOf = [ + { type = "integer" }, + { type = "array", items = { "type" = "integer" } }, +] + +[["$defs".vec_int_attribute.oneOf]] +title = "Long notation" +type = "object" required = ["value", "datatype"] -["$defs".vec_int_attribute.properties] +["$defs".vec_int_attribute.oneOf.properties] -value.any_of = [ +value.anyOf = [ { type = "integer" }, { type = "array", items = { "type" = "integer" } }, ] @@ -63,12 +81,21 @@ datatype.enum = [ # Vectors of float # #################### -["$defs".vec_float_attribute] +[["$defs".vec_float_attribute.oneOf]] +title = "Shorthand notation" +anyOf = [ + { type = "number" }, + { type = "array", items = { "type" = "number" } }, +] + +[["$defs".vec_float_attribute.oneOf]] +title = "Long notation" +type = "object" required = ["value", "datatype"] -["$defs".vec_float_attribute.properties] +["$defs".vec_float_attribute.oneOf.properties] -value.any_of = [ +value.anyOf = [ { type = "number" }, { type = "array", items = { "type" = "number" } }, ] @@ -112,10 +139,17 @@ datatype.enum = [ # unitDimension attribute # ########################### -["$defs".unitDimension] +[["$defs".unitDimension.oneOf]] +title = "Shorthand notation" +type = "array" +items.type = "number" + +[["$defs".unitDimension.oneOf]] +title = "Long notation" +type = "object" required = ["value", "datatype"] -["$defs".unitDimension.properties] +["$defs".unitDimension.oneOf.properties] value = { type = "array", items = { type = "number" } } datatype.const = "ARR_DBL_7" @@ -124,10 +158,16 @@ datatype.const = "ARR_DBL_7" # string attributes # ##################### -["$defs".string_attribute] +[["$defs".string_attribute.oneOf]] +title = "Shorthand notation" +type = "string" + +[["$defs".string_attribute.oneOf]] +title = "Long notation" +type = "object" required = ["value", "datatype"] -["$defs".string_attribute.properties] +["$defs".string_attribute.oneOf.properties] value.type = "string" datatype.enum = ["STRING", "CHAR", "SCHAR", "UCHAR"] @@ -136,10 +176,16 @@ datatype.enum = ["STRING", "CHAR", "SCHAR", "UCHAR"] # int attributes # ################## -["$defs".int_attribute] +[["$defs".int_attribute.oneOf]] +title = "Shorthand notation" +type = "integer" + +[["$defs".int_attribute.oneOf]] +title = "Long notation" +type = "object" required = ["value", "datatype"] -["$defs".int_attribute.properties] +["$defs".int_attribute.oneOf.properties] value.type = "integer" datatype.enum = [ @@ -157,10 +203,16 @@ datatype.enum = [ # float attributes # #################### -["$defs".float_attribute] +[["$defs".float_attribute.oneOf]] +title = "Shorthand notation" +type = "number" + +[["$defs".float_attribute.oneOf]] +title = "Long notation" +type = "object" required = ["value", "datatype"] -["$defs".float_attribute.properties] +["$defs".float_attribute.oneOf.properties] value.type = "number" datatype.enum = [ diff --git a/share/openPMD/json_schema/attributes.toml b/share/openPMD/json_schema/attributes.toml index 8137554a91..18cfc36101 100644 --- a/share/openPMD/json_schema/attributes.toml +++ b/share/openPMD/json_schema/attributes.toml @@ -9,11 +9,26 @@ type = "object" title = "Dictionary of attributes" description = "Generic layout of an attributes object." -[oneOf.patternProperties.".*"] -title = "A generic attribute" +[[oneOf.patternProperties.".*".oneOf]] +title = "A generic attribute - short form" +anyOf = [ + # Any primitive value + { not = { anyOf = [ + { type = "object", title = "An object" }, + { type = "array", title = "An array" }, + ] }, title = "No complex type" }, + # Or an array of any primitive value + { type = "array", items = { not = { anyOf = [ + { type = "object", title = "An object" }, + { type = "array", title = "An array" }, + ] } }, title = "An array of non-complex types" }, +] + +[[oneOf.patternProperties.".*".oneOf]] +title = "A generic attribute - long form" type = "object" -[oneOf.patternProperties.".*".properties] +[oneOf.patternProperties.".*".oneOf.properties] value.anyOf = [ # Any primitive value diff --git a/share/openPMD/json_schema/series.toml b/share/openPMD/json_schema/series.toml index 46c57c7153..ea5b4b1218 100644 --- a/share/openPMD/json_schema/series.toml +++ b/share/openPMD/json_schema/series.toml @@ -48,12 +48,16 @@ machine."$ref" = "attribute_defs.json#/$defs/string_attribute" softwareDependencies."$ref" = "attribute_defs.json#/$defs/string_attribute" meshesPath.description = "Note that the meshesPath is hardcoded as its semantics are impossible to model in a JSON schema." -meshesPath.const.value = "meshes/" -meshesPath.const.datatype = "STRING" +meshesPath.oneOf = [ + { const = { value = "meshes/", datatype = "STRING" } }, + { const = "meshes/" }, +] particlesPath.description = "Note that the particlesPath is hardcoded as its semantics are impossible to model in a JSON schema." -particlesPath.const.value = "particles/" -particlesPath.const.datatype = "STRING" +particlesPath.oneOf = [ + { const = { value = "particles/", datatype = "STRING" } }, + { const = "particles/" }, +] ################################################# # Requirement 1.2: General layout of attributes # From a719bb6ac52a8bec35655232c7ceb6465cbf3ddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 7 Aug 2023 14:02:58 +0200 Subject: [PATCH 08/23] Add dataset template mode --- share/openPMD/json_schema/dataset_defs.toml | 76 ++++++++++++++++--- .../openPMD/json_schema/record_component.toml | 1 - 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/share/openPMD/json_schema/dataset_defs.toml b/share/openPMD/json_schema/dataset_defs.toml index b2d710acf2..32ac56a63c 100644 --- a/share/openPMD/json_schema/dataset_defs.toml +++ b/share/openPMD/json_schema/dataset_defs.toml @@ -7,19 +7,15 @@ [["$defs".any_type_recursive_array.anyOf]] title = "A numeric type" type = "array" -items.anyOf = [{"type" = "number"}, {"type" = "null"}] +items.anyOf = [{ "type" = "number" }, { "type" = "null" }] [["$defs".any_type_recursive_array.anyOf]] title = "A recursive array of numeric types" type = "array" items."$ref" = "#/$defs/any_type_recursive_array" -["$defs".any_type_dataset] -title = "A dataset of any numeric type" -required = ["data", "datatype"] - -["$defs".any_type_dataset.properties] +["$defs".any_type_dataset_properties.properties] datatype.enum = [ "CHAR", "UCHAR", @@ -41,6 +37,36 @@ datatype.enum = [ "BOOL", ] data."$ref" = "#/$defs/any_type_recursive_array" +extent = { type = "array", items = { type = "integer" } } + + +################################################### +# Either `datatype` and `data` must be defined... # +################################################### + +[["$defs".any_type_dataset.anyOf]] +title = "A dataset of any numeric type" +allOf = [ + { required = [ + "datatype", + "data", + ], title = "Required properties" }, + { "$ref" = "#/$defs/any_type_dataset_properties", title = "Property definitions" }, +] + +######################################################### +# ...or `datatype` and `extent` must be (template form) # +######################################################### + +[["$defs".any_type_dataset.anyOf]] +title = "A template dataset of any numeric type" +allOf = [ + { required = [ + "datatype", + "extent", + ], title = "Required properties" }, + { "$ref" = "#/$defs/any_type_dataset_properties", title = "Property definitions" }, +] ###################################### # n-dimensional datasets of int type # @@ -49,19 +75,15 @@ data."$ref" = "#/$defs/any_type_recursive_array" [["$defs".int_type_recursive_array.anyOf]] title = "An integer type" type = "array" -items.anyOf = [{"type" = "integer"}, {"type" = "null"}] +items.anyOf = [{ "type" = "integer" }, { "type" = "null" }] [["$defs".int_type_recursive_array.anyOf]] title = "A recursive array of integer types" type = "array" items."$ref" = "#/$defs/int_type_recursive_array" -["$defs".int_type_dataset] -title = "A dataset of integer type" -required = ["data", "datatype"] - -["$defs".int_type_dataset.properties] +["$defs".int_type_dataset_properties.properties] datatype.enum = [ "CHAR", "UCHAR", @@ -77,3 +99,33 @@ datatype.enum = [ "BOOL", ] data."$ref" = "#/$defs/int_type_recursive_array" +extent = { type = "array", items = { type = "integer" } } + + +################################################### +# Either `datatype` and `data` must be defined... # +################################################### + +[["$defs".int_type_dataset.anyOf]] +title = "A dataset of integer type" +allOf = [ + { required = [ + "datatype", + "data", + ], title = "Required properties" }, + { "$ref" = "#/$defs/int_type_dataset_properties", title = "Property definitions" }, +] + +######################################################### +# ...or `datatype` and `extent` must be (template form) # +######################################################### + +[["$defs".int_type_dataset.anyOf]] +title = "A dataset template of integer type" +allOf = [ + { required = [ + "datatype", + "extent", + ], title = "Required properties" }, + { "$ref" = "#/$defs/int_type_dataset_properties", title = "Property definitions" }, +] diff --git a/share/openPMD/json_schema/record_component.toml b/share/openPMD/json_schema/record_component.toml index 9f81908b68..b280db94b5 100644 --- a/share/openPMD/json_schema/record_component.toml +++ b/share/openPMD/json_schema/record_component.toml @@ -43,7 +43,6 @@ title = "Either array or constant" ##################### [[allOf.oneOf]] -required = ["data", "datatype"] description = "An n-dimensional dataset containing the payload." title = "Array dataset" From 3a5fc19f634ddb7d7a1a62877496f89bc756aa97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 16 Jul 2024 15:24:19 +0200 Subject: [PATCH 09/23] Fix path --- .github/workflows/linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 0980225d82..a333ffa551 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -276,7 +276,7 @@ jobs: ctest --test-dir build --output-on-failure python3 -m pip install jsonschema - cd share/openPMD/json_schema + cd ../share/openPMD/json_schema PATH="../../../build/bin:$PATH" make -j 2 # We need to exclude the thetaMode example since that has a different # meshesPath and the JSON schema needs to hardcode that. From 334f2d58f85245964783f4679afc5edc5a873d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 16 Jul 2024 15:43:24 +0200 Subject: [PATCH 10/23] Fix reading from stdin --- src/cli/convert-json-toml.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/cli/convert-json-toml.cpp b/src/cli/convert-json-toml.cpp index 3abb6127f8..aa27fb0d82 100644 --- a/src/cli/convert-json-toml.cpp +++ b/src/cli/convert-json-toml.cpp @@ -3,6 +3,7 @@ #include #include +#include #include namespace json = openPMD::json; @@ -38,7 +39,11 @@ int main(int argc, char const **argv) // Just read the whole stream into memory // Not very elegant, but we'll hold the entire JSON/TOML dataset // in memory at some point anyway, so it doesn't really matter - std::cin >> jsonOrToml; + { + std::stringbuf readEverything; + std::cin >> &readEverything; + jsonOrToml = readEverything.str(); + } break; case 2: if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) From 90ce20151ea2346658e9e1474caa42b12cfede8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 5 Aug 2024 11:37:34 +0200 Subject: [PATCH 11/23] toml11 4.0 compatibility --- src/cli/convert-json-toml.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli/convert-json-toml.cpp b/src/cli/convert-json-toml.cpp index aa27fb0d82..f13317947d 100644 --- a/src/cli/convert-json-toml.cpp +++ b/src/cli/convert-json-toml.cpp @@ -20,7 +20,7 @@ void parsed_main(std::string jsonOrToml) using SL = json::SupportedLanguages; case SL::JSON: { auto asToml = json::jsonToToml(config); - std::cout << asToml; + std::cout << json::format_toml(asToml); } break; case SL::TOML: From 779cf12440cddd56b6dd81bd22ae35d501a3f7a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 17 Feb 2025 14:08:33 +0100 Subject: [PATCH 12/23] Only check for existing Iterations in writeOnly mode --- src/Series.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Series.cpp b/src/Series.cpp index 96a6ec9e70..c3a0b059ff 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -1429,12 +1429,7 @@ void Series::flushGorVBased( bool flushIOHandler) { if (iterationEncoding() == IterationEncoding::variableBased && - /* - * At parsing time, this might happen since iterations might contain - * errors and be deleted. - */ - IOHandler()->m_seriesStatus != internal::SeriesStatus::Parsing && - iterations.empty()) + access::writeOnly(IOHandler()->m_frontendAccess) && iterations.empty()) { /* * Note: Unlike flushFileBased, it's ok if `begin == end` since this From f6b1f24641af53018fa530528125a52ad3c4d9a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 17 Feb 2025 14:09:41 +0100 Subject: [PATCH 13/23] Some additions to schema 1. Support UNDEFINED datasets in template mode 2. gridUnitSI may now be a vector --- share/openPMD/json_schema/dataset_defs.toml | 13 +++++++++++++ share/openPMD/json_schema/mesh.toml | 4 +++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/share/openPMD/json_schema/dataset_defs.toml b/share/openPMD/json_schema/dataset_defs.toml index 32ac56a63c..030b94a745 100644 --- a/share/openPMD/json_schema/dataset_defs.toml +++ b/share/openPMD/json_schema/dataset_defs.toml @@ -68,6 +68,19 @@ allOf = [ { "$ref" = "#/$defs/any_type_dataset_properties", title = "Property definitions" }, ] +##################################################### +# ...or `datatype` is UNDEFINED, no extent required # +##################################################### + +[["$defs".any_type_dataset.anyOf]] +title = "An undefined template dataset" +allOf = [ + { required = [ + "datatype", + ], title = "Required properties" }, + { datatype.value = "UNDEFINED", extent = { type = "array", items.type = "integer" } }, +] + ###################################### # n-dimensional datasets of int type # ###################################### diff --git a/share/openPMD/json_schema/mesh.toml b/share/openPMD/json_schema/mesh.toml index f7c82cec9a..0fb387789a 100644 --- a/share/openPMD/json_schema/mesh.toml +++ b/share/openPMD/json_schema/mesh.toml @@ -32,7 +32,9 @@ required = [ [allOf.properties.attributes.allOf.properties] -gridUnitSI."$ref" = "attribute_defs.json#/$defs/float_attribute" +# in openPMD 1.0: float_attribute, openPMD 2.0: vec_float_attribute +# TODO: decide how to better deal with the 1.0/2.0 dichotomy +gridUnitSI."$ref" = "attribute_defs.json#/$defs/vec_float_attribute" gridSpacing."$ref" = "attribute_defs.json#/$defs/vec_float_attribute" gridGlobalOffset."$ref" = "attribute_defs.json#/$defs/vec_float_attribute" timeOffset."$ref" = "attribute_defs.json#/$defs/float_attribute" From f18a88b3c2e117de926018f1efd6753562ef8ebb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 17 Feb 2025 16:33:49 +0100 Subject: [PATCH 14/23] Remove deprecated jsonschema.validators.RefResolver Apparently it's better to make everything 100 times more complicated --- .github/workflows/linux.yml | 2 +- share/openPMD/json_schema/check.py | 26 ++++++++++++++++++-------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index a333ffa551..cb692fad83 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -275,7 +275,7 @@ jobs: cmake --build build --parallel 4 ctest --test-dir build --output-on-failure - python3 -m pip install jsonschema + python3 -m pip install jsonschema referencing cd ../share/openPMD/json_schema PATH="../../../build/bin:$PATH" make -j 2 # We need to exclude the thetaMode example since that has a different diff --git a/share/openPMD/json_schema/check.py b/share/openPMD/json_schema/check.py index 07f76b6ce0..155743007d 100755 --- a/share/openPMD/json_schema/check.py +++ b/share/openPMD/json_schema/check.py @@ -7,6 +7,8 @@ import sys import jsonschema.validators +from referencing import Registry, Resource +from referencing.jsonschema import DRAFT202012 def parse_args(program_name): @@ -39,9 +41,9 @@ def parse_args(program_name): parser.add_argument( '--schema_root', - default=script_path / 'series.json', + default=script_path, help="""\ -The .json file describing the root file of the schema to validate against. +Directory where to resolve JSON schema files to validate against. """ ) parser.add_argument('openpmd_file', @@ -55,16 +57,24 @@ def parse_args(program_name): args = parse_args(sys.argv[0]) path = Path(os.path.dirname(os.path.realpath(args.schema_root))) -resolver = jsonschema.validators.RefResolver( - base_uri=f"{path.as_uri()}/", - referrer=True, -) + + +def retrieve_from_filesystem(uri): + filepath = args.schema_root / uri + with open(filepath, "r") as referred: + loaded_json = json.load(referred) + return Resource.from_contents( + loaded_json, default_specification=DRAFT202012) + + +registry = Registry(retrieve=retrieve_from_filesystem) with open(args.openpmd_file[0], "r") as instance: + loaded_instance = json.load(instance) jsonschema.validate( - instance=json.load(instance), + instance=loaded_instance, schema={"$ref": "./series.json"}, - resolver=resolver, + registry=registry, ) print("File {} was validated successfully against schema {}.".format( instance.name, args.schema_root)) From 5e4a870f274ea817bfbff2f611221f61c68ea6e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 3 Mar 2025 10:41:13 +0100 Subject: [PATCH 15/23] Use most recent version of jsonschema --- .github/workflows/linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index cb692fad83..4eea8a4454 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -275,7 +275,7 @@ jobs: cmake --build build --parallel 4 ctest --test-dir build --output-on-failure - python3 -m pip install jsonschema referencing + python3 -m pip install jsonschema@4.2.3 referencing cd ../share/openPMD/json_schema PATH="../../../build/bin:$PATH" make -j 2 # We need to exclude the thetaMode example since that has a different From 252c0d44e1c5cab193c46b85df9e37daa4c4d650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 3 Mar 2025 12:12:59 +0100 Subject: [PATCH 16/23] Allow empty variable-based series --- .github/workflows/linux.yml | 2 +- share/openPMD/json_schema/Makefile | 6 ++++-- share/openPMD/json_schema/series.toml | 27 ++++++++++++++++++++------- src/Series.cpp | 18 ------------------ 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 4eea8a4454..49deb084ae 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -275,7 +275,7 @@ jobs: cmake --build build --parallel 4 ctest --test-dir build --output-on-failure - python3 -m pip install jsonschema@4.2.3 referencing + python3 -m pip install jsonschema==4.* referencing cd ../share/openPMD/json_schema PATH="../../../build/bin:$PATH" make -j 2 # We need to exclude the thetaMode example since that has a different diff --git a/share/openPMD/json_schema/Makefile b/share/openPMD/json_schema/Makefile index dcbc1584d3..32b403b003 100644 --- a/share/openPMD/json_schema/Makefile +++ b/share/openPMD/json_schema/Makefile @@ -5,9 +5,11 @@ json_files = attribute_defs.json attributes.json dataset_defs.json iteration.jso .PHONY: all all: $(json_files) +# The target file should only be created if the conversion succeeded $(json_files): %.json: %.toml - $(convert) @$^ > $@ + $(convert) @$^ > $@.tmp + mv $@.tmp $@ .PHONY: clean clean: - -rm $(json_files) + for file in $(json_files); do rm -f "$$file" "$$file.tmp"; done diff --git a/share/openPMD/json_schema/series.toml b/share/openPMD/json_schema/series.toml index ea5b4b1218..658e18370b 100644 --- a/share/openPMD/json_schema/series.toml +++ b/share/openPMD/json_schema/series.toml @@ -78,21 +78,34 @@ title = "Contains iterations" # Requirement 2.1: Either a single iteration in variable-based encoding # ######################################################################### -[[allOf.oneOf]] +[allOf.if] +properties.attributes.properties.iterationEncoding.oneOf = [ + { const = { value = "variableBased", datatype = "STRING" } }, + { const = "variableBased" }, +] + +[allOf.then] title = "Variable-based encoding" properties.attributes.properties.iterationEncoding.properties.value = { const = "variableBased" } -[allOf.oneOf.properties.data] +[allOf.then.properties.data] type = "object" title = "An iteration" description = "A single iteration." + +# Only require iteration data if snapshot attribute is defined +[allOf.then.properties.data.if] +required = ["attributes"] +properties.attributes.required = ["snapshot"] + +[allOf.then.properties.data.then] "$ref" = "iteration.json" ############################################################################ # Requirement 2.2: Or multiple iterations in group- or file-based encoding # ############################################################################ -[[allOf.oneOf]] +[allOf.else] title = "Group-based (or file-based) encoding" properties.attributes.properties.iterationEncoding.properties.value = { oneOf = [ { const = "groupBased" }, @@ -101,22 +114,22 @@ properties.attributes.properties.iterationEncoding.properties.value = { oneOf = # Base Path -[allOf.oneOf.properties.data] +[allOf.else.properties.data] type = "object" title = "Base path" description = "A map of all iterations/snapshots in the Series." propertyNames.pattern = "^(-?[0-9]*|attributes)$" -[allOf.oneOf.properties.data.properties] +[allOf.else.properties.data.properties] -[allOf.oneOf.properties.data.properties.attributes] +[allOf.else.properties.data.properties.attributes] title = "Attribute layout" description = "Custom attributes allowed, no required attributes defined." "$ref" = "attributes.json" # Base Path -> Iterations -[allOf.oneOf.properties.data.patternProperties."^-?[0-9]*$"] +[allOf.else.properties.data.patternProperties."^-?[0-9]*$"] title = "Iteration" "$ref" = "iteration.json" diff --git a/src/Series.cpp b/src/Series.cpp index c3a0b059ff..d3acdf4a50 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -1428,24 +1428,6 @@ void Series::flushGorVBased( internal::FlushParams const &flushParams, bool flushIOHandler) { - if (iterationEncoding() == IterationEncoding::variableBased && - access::writeOnly(IOHandler()->m_frontendAccess) && iterations.empty()) - { - /* - * Note: Unlike flushFileBased, it's ok if `begin == end` since this - * method may be called without an explicit iteration. - * But since in variable-based encoding the base path is the same as the - * path to the (currently active) iteration, there must be at least one - * iteration present since the openPMD standard requires mandatory - * attributes. - * In group-based encoding, any number of iterations might be included - * in the base path, in variable-based encoding there must be exactly - * one iteration currently active. - */ - throw error::WrongAPIUsage( - "variableBased output can not be written with no iterations."); - } - auto &series = get(); if (access::readOnly(IOHandler()->m_frontendAccess)) { From 65aa12d1e83d4187e490516a20086340ad38087b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 3 Mar 2025 14:08:08 +0100 Subject: [PATCH 17/23] Use if-then-else for better-steered parsing anyOf and oneOf now only used for trivial distinctions, this makes schemas much more robust since errors can be caught early and error messages become actually useful. --- share/openPMD/json_schema/attribute_defs.toml | 107 ++++++++++-------- share/openPMD/json_schema/attributes.toml | 43 +++---- share/openPMD/json_schema/mesh.toml | 30 +++-- share/openPMD/json_schema/record.toml | 31 +++-- .../openPMD/json_schema/record_component.toml | 34 +++--- share/openPMD/json_schema/series.toml | 4 +- 6 files changed, 148 insertions(+), 101 deletions(-) diff --git a/share/openPMD/json_schema/attribute_defs.toml b/share/openPMD/json_schema/attribute_defs.toml index 70f3b9ec16..dfd86bfdad 100644 --- a/share/openPMD/json_schema/attribute_defs.toml +++ b/share/openPMD/json_schema/attribute_defs.toml @@ -5,19 +5,15 @@ # Vectors of strings # ###################### -[["$defs".vec_string_attribute.oneOf]] -title = "Shorthand notation" -anyOf = [ - { type = "string" }, - { type = "array", items = { "type" = "string" } }, -] +["$defs".vec_string_attribute.if] +type = "object" -[["$defs".vec_string_attribute.oneOf]] +["$defs".vec_string_attribute.then] title = "Long notation" type = "object" required = ["value", "datatype"] -["$defs".vec_string_attribute.oneOf.properties] +["$defs".vec_string_attribute.then.properties] value.anyOf = [ { type = "string" }, @@ -35,23 +31,24 @@ datatype.enum = [ "VEC_UCHAR", ] +["$defs".vec_string_attribute.else] +title = "Shorthand notation" +anyOf = [{ type = "string" }, { type = "array", items = { "type" = "string" } }] + + ################## # Vectors of int # ################## -[["$defs".vec_int_attribute.oneOf]] -title = "Shorthand notation" -anyOf = [ - { type = "integer" }, - { type = "array", items = { "type" = "integer" } }, -] +["$defs".vec_int_attribute.if] +type = "object" -[["$defs".vec_int_attribute.oneOf]] +["$defs".vec_int_attribute.then] title = "Long notation" type = "object" required = ["value", "datatype"] -["$defs".vec_int_attribute.oneOf.properties] +["$defs".vec_int_attribute.then.properties] value.anyOf = [ { type = "integer" }, @@ -77,23 +74,26 @@ datatype.enum = [ "VEC_ULONGLONG", ] +["$defs".vec_int_attribute.else] +title = "Shorthand notation" +anyOf = [ + { type = "integer" }, + { type = "array", items = { "type" = "integer" } }, +] + #################### # Vectors of float # #################### -[["$defs".vec_float_attribute.oneOf]] -title = "Shorthand notation" -anyOf = [ - { type = "number" }, - { type = "array", items = { "type" = "number" } }, -] +["$defs".vec_float_attribute.if] +type = "object" -[["$defs".vec_float_attribute.oneOf]] +["$defs".vec_float_attribute.then] title = "Long notation" type = "object" required = ["value", "datatype"] -["$defs".vec_float_attribute.oneOf.properties] +["$defs".vec_float_attribute.then.properties] value.anyOf = [ { type = "number" }, @@ -134,58 +134,67 @@ datatype.enum = [ "VEC_CLONG_DOUBLE", ] +["$defs".vec_float_attribute.else] +title = "Shorthand notation" +anyOf = [{ type = "number" }, { type = "array", items = { "type" = "number" } }] + ########################### # Special case: # # unitDimension attribute # ########################### -[["$defs".unitDimension.oneOf]] -title = "Shorthand notation" -type = "array" -items.type = "number" +["$defs".unitDimension.if] +type = "object" -[["$defs".unitDimension.oneOf]] +["$defs".unitDimension.then] title = "Long notation" type = "object" required = ["value", "datatype"] -["$defs".unitDimension.oneOf.properties] +["$defs".unitDimension.then.properties] value = { type = "array", items = { type = "number" } } datatype.const = "ARR_DBL_7" +["$defs".unitDimension.else] +title = "Shorthand notation" +type = "array" +items.type = "number" + ##################### # string attributes # ##################### -[["$defs".string_attribute.oneOf]] -title = "Shorthand notation" -type = "string" +["$defs".string_attribute.if] +type = "object" -[["$defs".string_attribute.oneOf]] +["$defs".string_attribute.then] title = "Long notation" type = "object" required = ["value", "datatype"] -["$defs".string_attribute.oneOf.properties] +["$defs".string_attribute.then.properties] value.type = "string" datatype.enum = ["STRING", "CHAR", "SCHAR", "UCHAR"] +["$defs".string_attribute.else] +title = "Shorthand notation" +type = "string" + ################## # int attributes # ################## -[["$defs".int_attribute.oneOf]] -title = "Shorthand notation" -type = "integer" +["$defs".int_attribute.if] +type = "object" -[["$defs".int_attribute.oneOf]] +["$defs".int_attribute.then] title = "Long notation" type = "object" required = ["value", "datatype"] -["$defs".int_attribute.oneOf.properties] +["$defs".int_attribute.then.properties] value.type = "integer" datatype.enum = [ @@ -199,20 +208,23 @@ datatype.enum = [ "ULONGLONG", ] +["$defs".int_attribute.else] +title = "Shorthand notation" +type = "integer" + #################### # float attributes # #################### -[["$defs".float_attribute.oneOf]] -title = "Shorthand notation" -type = "number" +["$defs".float_attribute.if] +type = "object" -[["$defs".float_attribute.oneOf]] +["$defs".float_attribute.then] title = "Long notation" type = "object" required = ["value", "datatype"] -["$defs".float_attribute.oneOf.properties] +["$defs".float_attribute.then.properties] value.type = "number" datatype.enum = [ @@ -234,3 +246,8 @@ datatype.enum = [ "CDOUBLE", "CLONG_DOUBLE", ] + + +["$defs".float_attribute.else] +title = "Shorthand notation" +type = "number" diff --git a/share/openPMD/json_schema/attributes.toml b/share/openPMD/json_schema/attributes.toml index 18cfc36101..1ee28acb2a 100644 --- a/share/openPMD/json_schema/attributes.toml +++ b/share/openPMD/json_schema/attributes.toml @@ -9,28 +9,16 @@ type = "object" title = "Dictionary of attributes" description = "Generic layout of an attributes object." -[[oneOf.patternProperties.".*".oneOf]] -title = "A generic attribute - short form" -anyOf = [ - # Any primitive value - { not = { anyOf = [ - { type = "object", title = "An object" }, - { type = "array", title = "An array" }, - ] }, title = "No complex type" }, - # Or an array of any primitive value - { type = "array", items = { not = { anyOf = [ - { type = "object", title = "An object" }, - { type = "array", title = "An array" }, - ] } }, title = "An array of non-complex types" }, -] +[oneOf.patternProperties.".*".if] +type = "object" -[[oneOf.patternProperties.".*".oneOf]] +[oneOf.patternProperties.".*".then] title = "A generic attribute - long form" type = "object" +required = ["value", "datatype"] +maxProperties = 2 -[oneOf.patternProperties.".*".oneOf.properties] - -value.anyOf = [ +properties.value.anyOf = [ # Any primitive value { not = { anyOf = [ { type = "object", title = "An object" }, @@ -43,8 +31,8 @@ value.anyOf = [ ] } }, title = "An array of non-complex types" }, ] -datatype.type = "string" -datatype.enum = [ +properties.datatype.type = "string" +properties.datatype.enum = [ "CHAR", "UCHAR", "SCHAR", @@ -85,5 +73,20 @@ datatype.enum = [ "BOOL", ] +[oneOf.patternProperties.".*".else] +title = "A generic attribute - short form" +anyOf = [ + # Any primitive value + { not = { anyOf = [ + { type = "object", title = "An object" }, + { type = "array", title = "An array" }, + ] }, title = "No complex type" }, + # Or an array of any primitive value + { type = "array", items = { not = { anyOf = [ + { type = "object", title = "An object" }, + { type = "array", title = "An array" }, + ] } }, title = "An array of non-complex types" }, +] + [oneOf.propertyNames] pattern = "^\\w*$" diff --git a/share/openPMD/json_schema/mesh.toml b/share/openPMD/json_schema/mesh.toml index 0fb387789a..0911130ba8 100644 --- a/share/openPMD/json_schema/mesh.toml +++ b/share/openPMD/json_schema/mesh.toml @@ -64,17 +64,31 @@ title = "Contains components" # Requirement 2.1: Either this is a scalar mesh... # #################################################### -[[allOf.oneOf]] +[allOf.if] +anyOf = [ + # vector mesh + { required = [ + "datatype", + ] }, + # constant mesh + { required = [ + "attributes", + ], properties.attributes.required = [ + "shape", + "value", + ] }, +] + + +[allOf.then] title = "Scalar mesh component" "$ref" = "mesh_record_component.json" -################################################# -# Requirement 2.2: ... or it's a vector mesh. # -# Note that exactly one of these two conditions # -# must be true, not both at once (oneOf). # -################################################# +############################################### +# Requirement 2.2: ... or it's a vector mesh. # +############################################### -[[allOf.oneOf]] +[allOf.else] title = "Vector component" description = "Additionally to the attributes, at least one component must be contained" # The attributes are contained in this dict, and at least one further @@ -82,6 +96,6 @@ description = "Additionally to the attributes, at least one component must be co minProperties = 2 propertyNames.pattern = "^\\w*$" -[allOf.oneOf.patternProperties."^(?!attributes).*"] +[allOf.else.patternProperties."^(?!attributes).*"] title = "Vector mesh component" "$ref" = "mesh_record_component.json" diff --git a/share/openPMD/json_schema/record.toml b/share/openPMD/json_schema/record.toml index 60209fd5a2..eb331a248d 100644 --- a/share/openPMD/json_schema/record.toml +++ b/share/openPMD/json_schema/record.toml @@ -47,17 +47,30 @@ title = "Contains components" # Requirement 2.1: Either this is a scalar mesh... # #################################################### -[[allOf.oneOf]] +[allOf.if] +anyOf = [ + # vector mesh + { required = [ + "datatype", + ] }, + # constant mesh + { required = [ + "attributes", + ], properties.attributes.required = [ + "shape", + "value", + ] }, +] + +[allOf.then] title = "Scalar component" "$ref" = "record_component.json" -################################################# -# Requirement 2.2: ... or it's a vector mesh. # -# Note that exactly one of these two conditions # -# must be true, not both at once (oneOf). # -################################################# +############################################### +# Requirement 2.2: ... or it's a vector mesh. # +############################################### -[[allOf.oneOf]] +[allOf.else] title = "Vector component" description = "Additionally to the attributes, at least one component must be contained" # The attributes are contained in this dict, and at least one further @@ -65,8 +78,6 @@ description = "Additionally to the attributes, at least one component must be co minProperties = 2 propertyNames.pattern = "^\\w*$" -[allOf.oneOf.patternProperties] - -[allOf.oneOf.patternProperties."^(?!attributes).*"] +[allOf.else.patternProperties."^(?!attributes).*"] title = "Scalar component" "$ref" = "record_component.json" diff --git a/share/openPMD/json_schema/record_component.toml b/share/openPMD/json_schema/record_component.toml index b280db94b5..9a4741a9f2 100644 --- a/share/openPMD/json_schema/record_component.toml +++ b/share/openPMD/json_schema/record_component.toml @@ -38,28 +38,28 @@ title = "Attribute layout" [[allOf]] title = "Either array or constant" -##################### -# Option 2.1: Array # -##################### - -[[allOf.oneOf]] -description = "An n-dimensional dataset containing the payload." -title = "Array dataset" - -"$ref" = "dataset_defs.json#/$defs/any_type_dataset" - ######################## -# Option 2.2: Constant # +# Option 2.1: Constant # ######################## -[[allOf.oneOf]] -title = "Constant dataset" -description = "A dataset represented by two attributes: The constant value and its shape." +[allOf.if] required = ["attributes"] +properties.attributes.required = ["shape", "value"] -[allOf.oneOf.properties.attributes] -required = ["shape", "value"] +[allOf.then] +title = "Constant dataset" +description = "A dataset represented by two attributes: The constant value and its shape." -[allOf.oneOf.properties.attributes.properties] +[allOf.properties.attributes.properties] value."$ref" = "attribute_defs.json#/$defs/float_attribute" shape."$ref" = "attribute_defs.json#/$defs/vec_int_attribute" + +##################### +# Option 2.2: Array # +##################### + +[allOf.else] +description = "An n-dimensional dataset containing the payload." +title = "Array dataset" + +"$ref" = "dataset_defs.json#/$defs/any_type_dataset" diff --git a/share/openPMD/json_schema/series.toml b/share/openPMD/json_schema/series.toml index 658e18370b..f013a7e07d 100644 --- a/share/openPMD/json_schema/series.toml +++ b/share/openPMD/json_schema/series.toml @@ -107,9 +107,11 @@ properties.attributes.required = ["snapshot"] [allOf.else] title = "Group-based (or file-based) encoding" -properties.attributes.properties.iterationEncoding.properties.value = { oneOf = [ +properties.attributes.properties.iterationEncoding = { oneOf = [ { const = "groupBased" }, { const = "fileBased" }, + { const = { value = "groupBased", datatype = "STRING" } }, + { const = { value = "fileBased", datatype = "STRING" } }, ] } # Base Path From 83ed23ad14f5a698ad6cd9fc6324f68e76ec4c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 26 Mar 2025 17:44:12 +0100 Subject: [PATCH 18/23] hmm --- .github/workflows/linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 49deb084ae..376db31b71 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -276,7 +276,7 @@ jobs: ctest --test-dir build --output-on-failure python3 -m pip install jsonschema==4.* referencing - cd ../share/openPMD/json_schema + cd share/openPMD/json_schema PATH="../../../build/bin:$PATH" make -j 2 # We need to exclude the thetaMode example since that has a different # meshesPath and the JSON schema needs to hardcode that. From 579e7b073c22ee76117a464e5564f88e3a93113a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 7 Apr 2025 11:02:06 +0200 Subject: [PATCH 19/23] Remove json cfg after test Otherwise CI thinks this is an openPMD file --- test/python/unittest/API/APITest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index 6337807f33..4f4d7d59e0 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -2239,6 +2239,8 @@ def testSeriesConstructors(self): s = io.Series(f, io.Access.create, c) s.close() + os.remove(cfg_as_file) + def testScalarHdf5Fields(self): if "hdf5" not in io.variants: return From 81533b0257e1e5492319db0907346d87a2aaa75e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 15 Jul 2025 15:15:50 +0200 Subject: [PATCH 20/23] Update documentation, rename convert-toml-json tool --- CMakeLists.txt | 2 +- share/openPMD/json_schema/Makefile | 2 +- share/openPMD/json_schema/README.md | 16 +++++++--------- ...nvert-json-toml.cpp => convert-toml-json.cpp} | 0 4 files changed, 9 insertions(+), 11 deletions(-) rename src/cli/{convert-json-toml.cpp => convert-toml-json.cpp} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 10fd236d40..213ffb3171 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -685,7 +685,7 @@ set(openPMD_TEST_NAMES # command line tools set(openPMD_CLI_TOOL_NAMES ls - convert-json-toml + convert-toml-json ) set(openPMD_PYTHON_CLI_TOOL_NAMES pipe diff --git a/share/openPMD/json_schema/Makefile b/share/openPMD/json_schema/Makefile index 32b403b003..0680c45f23 100644 --- a/share/openPMD/json_schema/Makefile +++ b/share/openPMD/json_schema/Makefile @@ -1,4 +1,4 @@ -convert := openpmd-convert-json-toml +convert := openpmd-convert-toml-json json_files = attribute_defs.json attributes.json dataset_defs.json iteration.json mesh.json mesh_record_component.json particle_patches.json particle_species.json patch_record.json record.json record_component.json series.json diff --git a/share/openPMD/json_schema/README.md b/share/openPMD/json_schema/README.md index ae9c641e77..5bc24809b8 100644 --- a/share/openPMD/json_schema/README.md +++ b/share/openPMD/json_schema/README.md @@ -7,13 +7,13 @@ This folder contains a JSON schema for validation of openPMD files written as `. ### Generating the JSON schema For improved readability, maintainability and documentation purposes, the JSON schema is written in `.toml` format and needs to be "compiled" to `.json` files first before usage. -To do this, the openPMD-api installs a tool named `openpmd-convert-json-toml` which can be used to convert between JSON and TOML files in both directions, e.g.: +To do this, the openPMD-api installs a tool named `openpmd-convert-toml-json` which can be used to convert between JSON and TOML files in both directions, e.g.: ```bash -openpmd_convert-json-toml @series.toml > series.json +openpmd-convert-toml-json @series.toml > series.json ``` -A `Makefile` is provided in this folder to simplify the application of this conversion tool. +A `Makefile` is provided in this folder to automate generating the needed JSON files from the TOML files. ### Verifying a file against the JSON schema @@ -32,16 +32,14 @@ The openPMD standard is not entirely expressible in terms of a JSON schema: * Many semantic dependencies, e.g. that the `position/x` and `position/y` vector of a particle species be of the same size, or that the `axisLabels` have the same dimensionality as the dataset itself, will go unchecked. * The `meshesPath` is assumed to be `meshes/` and the `particlesPath` is assumed to be `particles/`. This dependency cannot be expressed. -While a large part of the openPMD standard can indeed be verified by checking against a JSON schema, the standard is generally large enough to make this approach come to its limits. Verification of a JSON schema is similar to the use of a naive recursive-descent parser. Error messages will often be unexpectedly verbose and not very informative. -A challenge for the JSON validator are disjunctive statements such as "A Record is either a scalar Record Component or a vector of non-scalar Record Components". If there is even a tiny mistake somewhere down in the hierarchy, the entire disjunctive branch will fail evaluating. +While a large part of the openPMD standard can indeed be verified by checking against a JSON schema, the standard is generally large enough to make this approach come to its limits. Verification of a JSON schema is similar to the use of a naive recursive-descent parser. Error messages may become unexpectedly verbose and not very informative, especially when parsing disjunctive statements such as "A Record is either a scalar Record Component or a vector of non-scalar Record Components". We have taken care to decide disjunctive statements early on, e.g. with json-schema's support for `if` statements, but error messages may in general become unwieldy even due to tiny mistakes far down in the parse tree. -The layout of attributes is assumed to be that which is created by the JSON backend of the openPMD-api, e.g.: +The layout of attributes is assumed to be that which is created by the JSON backend of the openPMD-api. Both the longhand and shorthand forms are recognized: ```json "meshesPath": { "datatype": "STRING", "value": "meshes/" -} +}, +"particlesPath": "particles/" ``` - -Support for an abbreviated notation such as `"meshesPath": "meshes/"` is currently not (yet) available. diff --git a/src/cli/convert-json-toml.cpp b/src/cli/convert-toml-json.cpp similarity index 100% rename from src/cli/convert-json-toml.cpp rename to src/cli/convert-toml-json.cpp From 985d505e79760b1beb9733cbdac91263b726e890 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 15 Jul 2025 15:17:08 +0200 Subject: [PATCH 21/23] Apply suggestions from code review Co-authored-by: Axel Huebl --- share/openPMD/json_schema/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/openPMD/json_schema/README.md b/share/openPMD/json_schema/README.md index 5bc24809b8..1e5c2f2f8c 100644 --- a/share/openPMD/json_schema/README.md +++ b/share/openPMD/json_schema/README.md @@ -29,7 +29,7 @@ For further usage notes check the documentation of the script itself `./check.py The openPMD standard is not entirely expressible in terms of a JSON schema: -* Many semantic dependencies, e.g. that the `position/x` and `position/y` vector of a particle species be of the same size, or that the `axisLabels` have the same dimensionality as the dataset itself, will go unchecked. +* Many semantic dependencies, e.g., that the `position/x` and `position/y` vectors of a particle species need to be of the same size, or that the `axisLabels` have the same dimensionality as the dataset itself, will go unchecked. * The `meshesPath` is assumed to be `meshes/` and the `particlesPath` is assumed to be `particles/`. This dependency cannot be expressed. While a large part of the openPMD standard can indeed be verified by checking against a JSON schema, the standard is generally large enough to make this approach come to its limits. Verification of a JSON schema is similar to the use of a naive recursive-descent parser. Error messages may become unexpectedly verbose and not very informative, especially when parsing disjunctive statements such as "A Record is either a scalar Record Component or a vector of non-scalar Record Components". We have taken care to decide disjunctive statements early on, e.g. with json-schema's support for `if` statements, but error messages may in general become unwieldy even due to tiny mistakes far down in the parse tree. From 3a2929fad3aed7f828200f32ddc6b36e926a99a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 15 Jul 2025 15:22:00 +0200 Subject: [PATCH 22/23] Add reference to openPMD-validator --- share/openPMD/json_schema/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/share/openPMD/json_schema/README.md b/share/openPMD/json_schema/README.md index 1e5c2f2f8c..8abb9a271a 100644 --- a/share/openPMD/json_schema/README.md +++ b/share/openPMD/json_schema/README.md @@ -43,3 +43,5 @@ The layout of attributes is assumed to be that which is created by the JSON back }, "particlesPath": "particles/" ``` + +For a custom-written verification of openPMD datasets, also consider using the [openPMD-validator](https://github.com/openPMD/openPMD-validator). From d5781673c922a98c2bd846e79371cea8cef1e33a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 18 Jul 2025 14:09:23 +0200 Subject: [PATCH 23/23] Update README.md --- share/openPMD/json_schema/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/openPMD/json_schema/README.md b/share/openPMD/json_schema/README.md index 8abb9a271a..e8ad343660 100644 --- a/share/openPMD/json_schema/README.md +++ b/share/openPMD/json_schema/README.md @@ -32,7 +32,7 @@ The openPMD standard is not entirely expressible in terms of a JSON schema: * Many semantic dependencies, e.g., that the `position/x` and `position/y` vectors of a particle species need to be of the same size, or that the `axisLabels` have the same dimensionality as the dataset itself, will go unchecked. * The `meshesPath` is assumed to be `meshes/` and the `particlesPath` is assumed to be `particles/`. This dependency cannot be expressed. -While a large part of the openPMD standard can indeed be verified by checking against a JSON schema, the standard is generally large enough to make this approach come to its limits. Verification of a JSON schema is similar to the use of a naive recursive-descent parser. Error messages may become unexpectedly verbose and not very informative, especially when parsing disjunctive statements such as "A Record is either a scalar Record Component or a vector of non-scalar Record Components". We have taken care to decide disjunctive statements early on, e.g. with json-schema's support for `if` statements, but error messages may in general become unwieldy even due to tiny mistakes far down in the parse tree. +While a large part of the openPMD standard can indeed be verified by checking against a static JSON schema, the standard is generally large enough to make this approach come to its limits. Verification of a JSON schema is similar to the use of a naive recursive-descent parser. Error messages may become unexpectedly verbose and not very informative, especially when parsing disjunctive statements such as "A Record is either a scalar Record Component or a vector of non-scalar Record Components". We have taken care to decide disjunctive statements early on, e.g. with json-schema's support for `if` statements, but error messages may in general become unwieldy even due to tiny mistakes far down in the parse tree. The layout of attributes is assumed to be that which is created by the JSON backend of the openPMD-api. Both the longhand and shorthand forms are recognized: