@@ -172,3 +172,127 @@ def test_json_schema_check_dataset_builder_invalid():
172172
173173 # Verify that the cache_service.add method was called
174174 cache_service .add .assert_called_once ()
175+
176+
177+ def test_json_schema_missing_instance_type_is_reported ():
178+ """
179+ When an entity in a discriminated anyOf union is missing its instanceType,
180+ the error must be captured and reported rather than silently dropped
181+ (regression for GitHub issue #1546).
182+
183+ The dataset assignment fallback chain is:
184+ 1. instanceType from data → 2. schema title/ref → 3. nearest ancestor
185+ DDF00125 checks for validator == "required" | "additionalProperties", so the
186+ inner `required` context error (not the top-level anyOf) must be surfaced with
187+ a meaningful dataset assignment.
188+ """
189+ schema = {
190+ "type" : "object" ,
191+ "properties" : {
192+ "study" : {"$ref" : "#/$defs/Study" },
193+ },
194+ "$defs" : {
195+ "Study" : {
196+ "title" : "Study" ,
197+ "type" : "object" ,
198+ "properties" : {
199+ "instanceType" : {"const" : "Study" },
200+ "id" : {"type" : "string" },
201+ "items" : {
202+ "type" : "array" ,
203+ "items" : {
204+ "anyOf" : [
205+ {"$ref" : "#/$defs/TypeA" },
206+ {"$ref" : "#/$defs/TypeB" },
207+ ]
208+ },
209+ },
210+ },
211+ "required" : ["instanceType" , "id" ],
212+ },
213+ "TypeA" : {
214+ # title is intentionally present so the dataset assignment falls
215+ # back to "TypeA" (schema title) rather than "Study" (ancestor).
216+ "title" : "TypeA" ,
217+ "type" : "object" ,
218+ "properties" : {
219+ "instanceType" : {"const" : "TypeA" },
220+ "value" : {"type" : "string" },
221+ },
222+ "required" : ["instanceType" , "value" ],
223+ },
224+ "TypeB" : {
225+ "title" : "TypeB" ,
226+ "type" : "object" ,
227+ "properties" : {
228+ "instanceType" : {"const" : "TypeB" },
229+ "data" : {"type" : "integer" },
230+ },
231+ "required" : ["instanceType" , "data" ],
232+ },
233+ },
234+ }
235+
236+ # instanceType is intentionally missing from the nested item
237+ instance = {
238+ "study" : {
239+ "instanceType" : "Study" ,
240+ "id" : "study-1" ,
241+ "items" : [
242+ {"value" : "some-value" }, # missing instanceType
243+ ],
244+ }
245+ }
246+
247+ data_service = MagicMock ()
248+ data_service .json = instance
249+ data_service .dataset_implementation = PandasDataset
250+ data_service .dataset_path = "dummy.json"
251+
252+ cache_service = MagicMock ()
253+ cache_service .get .return_value = None
254+
255+ # Filter to the entity that owns the problematic object.
256+ # Because TypeA.title == "TypeA", the dataset column will be set to "TypeA"
257+ # (schema title wins over ancestor "Study"). This is the correct value for
258+ # DDF00125 to report the issue under the right entity.
259+ dataset_metadata = MagicMock ()
260+ dataset_metadata .name = "TypeA"
261+
262+ builder = JsonSchemaCheckDatasetBuilder (
263+ rule = {},
264+ data_service = data_service ,
265+ cache_service = cache_service ,
266+ rule_processor = MagicMock (),
267+ data_processor = MagicMock (),
268+ dataset_metadata = dataset_metadata ,
269+ define_xml_path = None ,
270+ standard = "USDM" ,
271+ standard_version = "4.0" ,
272+ standard_substandard = None ,
273+ library_metadata = LibraryMetadataContainer (standard_schema_definition = schema ),
274+ )
275+
276+ ds = builder .get_dataset ()
277+ rows = ds .data .to_dict (orient = "records" )
278+
279+ # 1. Error must not be silently dropped
280+ assert len (rows ) >= 1 , "Missing instanceType error must produce at least one row"
281+
282+ # 2. dataset must be set to the schema class name ("TypeA"), not empty string.
283+ # This ensures DDF00125 associates the error with the correct entity.
284+ assert any (row .get ("dataset" ) == "TypeA" for row in rows ), (
285+ "Error with missing instanceType must be associated with the schema class "
286+ "('TypeA' via schema title), not silently dropped with dataset=''"
287+ )
288+
289+ # 3. The error must have validator == "required" so DDF00125's check condition
290+ # (validator is_contained_by ['required', 'additionalProperties']) fires.
291+ assert any (
292+ row .get ("validator" ) == "required" for row in rows
293+ ), "The surfaced error must have validator='required' so DDF00125 can match it"
294+
295+ # 4. The path should identify where in the document the problem is
296+ assert any (
297+ "/study/items/0" in (row .get ("_path" ) or "" ) for row in rows
298+ ), "Path should point to the entity missing instanceType"
0 commit comments