-
Notifications
You must be signed in to change notification settings - Fork 284
Expand file tree
/
Copy pathXsdInference.fs
More file actions
371 lines (299 loc) · 13.7 KB
/
XsdInference.fs
File metadata and controls
371 lines (299 loc) · 13.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
// --------------------------------------------------------------------------------------
// Implements XML type inference from XSD
// --------------------------------------------------------------------------------------
// The XML Provider infers a type from sample documents: an instance of InferedType
// represents elements having a structure compatible with the given samples.
// When a schema is available, we can use it to derive an InferedType representing
// valid elements according to the definitions in the given schema.
// The InferedType derived from a schema should be essentially the same as one
// inferred from a significant set of valid samples.
// Adopting this perspective we can support XSD leveraging the existing functionalities.
// The implementation uses a simplified XSD model to split the task of deriving an InferedType:
// - element definitions in xsd files map to this simplified xsd model
// - instances of this xsd model map to InferedType.
namespace ProviderImplementation
open System.Xml
open System.Xml.Schema
open FSharp.Data
/// Simplified model to represent schemas (XSD).
module XsdModel =
type IsOptional = bool
type Occurs = decimal * decimal
// reference equality and mutable type allow for cycles
[<ReferenceEquality>]
type XsdElement =
{ Name: XmlQualifiedName
mutable Type: XsdType
SubstitutionGroup: XsdElement list
IsAbstract: bool
IsNillable: bool }
and XsdType =
| SimpleType of XmlTypeCode
| ComplexType of XsdComplexType
and [<ReferenceEquality>] XsdComplexType =
{ Attributes: (XmlQualifiedName * XmlTypeCode * IsOptional) list
Contents: XsdContent }
and XsdContent =
| SimpleContent of XmlTypeCode
| ComplexContent of XsdParticle
and XsdParticle =
| Empty
| Any of Occurs
| Element of Occurs * XsdElement
| All of Occurs * XsdParticle list
| Choice of Occurs * XsdParticle list
| Sequence of Occurs * XsdParticle list
/// A simplified schema model is built from xsd.
/// The actual parsing is done using BCL classes.
module XsdParsing =
let ofType<'a> (sequence: System.Collections.IEnumerable) =
sequence |> Seq.cast<obj> |> Seq.filter (fun x -> x :? 'a) |> Seq.cast<'a>
type ParsingContext(xmlSchemaSet: XmlSchemaSet) =
let getElm name = // lookup elements by name
xmlSchemaSet.GlobalElements.Item name :?> XmlSchemaElement
let subst = // lookup of substitution group members
xmlSchemaSet.GlobalElements.Values
|> ofType<XmlSchemaElement>
|> Seq.filter (fun e -> not e.SubstitutionGroup.IsEmpty)
|> Seq.groupBy (fun e -> e.SubstitutionGroup)
|> Seq.map (fun (name, values) -> getElm name, values |> List.ofSeq)
|> dict
let getSubst =
// deep lookup for trees of substitution groups, see
// http://docstore.mik.ua/orelly/xml/schema/ch12_01.htm#xmlschema-CHP-12-SECT-1
let collectSubst elm =
let items = System.Collections.Generic.HashSet()
let rec collect elm =
match subst.TryGetValue elm with
| false, _ -> ()
| true, substVal ->
for x in substVal do
if items.Add x then
collect x
collect elm
items |> List.ofSeq
let subst' = subst.Keys |> Seq.map (fun x -> x, collectSubst x) |> dict
fun elm ->
match subst'.TryGetValue elm with
| true, elVal -> elVal
| false, _ -> []
let elements =
System.Collections.Generic.Dictionary<XmlSchemaElement, XsdModel.XsdElement>()
member x.GetElement name = getElm name
member x.GetSubstitutions elm = getSubst elm
member x.Elements = elements
open XsdModel
let getTypeCode (xmlSchemaDatatype: XmlSchemaDatatype) =
if xmlSchemaDatatype.Variety = XmlSchemaDatatypeVariety.Atomic then
xmlSchemaDatatype.TypeCode
else
XmlTypeCode.None // list and union not supported
let rec parseElement (ctx: ParsingContext) elm =
match ctx.Elements.TryGetValue elm with
| true, x -> x
| _ ->
let substitutionGroup =
ctx.GetSubstitutions elm
|> List.filter (fun x -> x <> elm)
|> List.map (parseElement ctx)
// another attempt in case the element is put while parsing substitution groups
match ctx.Elements.TryGetValue elm with
| true, x -> x
| _ ->
let result =
{ Name = elm.QualifiedName
Type = XsdType.SimpleType XmlTypeCode.None // temporary dummy value
SubstitutionGroup = substitutionGroup
IsAbstract = elm.IsAbstract
IsNillable = elm.IsNillable }
ctx.Elements.Add(elm, result)
// computing the real type after filling the dictionary allows for cycles
result.Type <-
match elm.ElementSchemaType with
| :? XmlSchemaSimpleType as x -> SimpleType(getTypeCode x.Datatype)
| :? XmlSchemaComplexType as x -> ComplexType(parseComplexType ctx x)
| x -> failwithf "unknown ElementSchemaType: %A" x
result
and parseComplexType ctx (x: XmlSchemaComplexType) =
{ Attributes =
x.AttributeUses.Values
|> ofType<XmlSchemaAttribute>
|> Seq.filter (fun a -> a.Use <> XmlSchemaUse.Prohibited)
|> Seq.map (fun a ->
a.QualifiedName, getTypeCode a.AttributeSchemaType.Datatype, a.Use <> XmlSchemaUse.Required)
|> List.ofSeq
Contents =
match x.ContentType with
| XmlSchemaContentType.TextOnly -> SimpleContent(getTypeCode x.Datatype)
| XmlSchemaContentType.Mixed
| XmlSchemaContentType.Empty
| XmlSchemaContentType.ElementOnly -> x.ContentTypeParticle |> parseParticle ctx |> ComplexContent
| _ -> failwithf "Unknown content type: %A." x.ContentType }
and parseParticle ctx (par: XmlSchemaParticle) =
let occurs = par.MinOccurs, par.MaxOccurs
let parseParticles (group: XmlSchemaGroupBase) =
let particles =
group.Items
|> ofType<XmlSchemaParticle>
|> Seq.map (parseParticle ctx)
|> List.ofSeq
match group with
| :? XmlSchemaAll -> All(occurs, particles)
| :? XmlSchemaChoice -> Choice(occurs, particles)
| :? XmlSchemaSequence -> Sequence(occurs, particles)
| _ -> failwithf "unknown group base: %A" group
match par with
| :? XmlSchemaAny -> Any occurs
| :? XmlSchemaGroupBase as grp -> parseParticles grp
| :? XmlSchemaGroupRef as grpRef -> parseParticle ctx grpRef.Particle
| :? XmlSchemaElement as elm ->
let e =
if elm.RefName.IsEmpty then
elm
else
ctx.GetElement elm.RefName
Element(occurs, parseElement ctx e)
| _ -> Empty // XmlSchemaParticle.EmptyParticle
let getElements schema =
let ctx = ParsingContext schema
schema.GlobalElements.Values
|> ofType<XmlSchemaElement>
|> Seq.filter (fun x -> x.ElementSchemaType :? XmlSchemaComplexType)
|> Seq.map (parseElement ctx)
/// Element definitions in a schema are mapped to InferedType instances
module internal XsdInference =
open XsdModel
open FSharp.Data.Runtime.StructuralTypes
// for now we map only the types supported
let getType =
function
| XmlTypeCode.Int -> typeof<int>
| XmlTypeCode.Long -> typeof<int64>
#if NET6_0_OR_GREATER
| XmlTypeCode.Date -> typeof<System.DateOnly>
#else
| XmlTypeCode.Date -> typeof<System.DateTime>
#endif
| XmlTypeCode.DateTime -> typeof<System.DateTimeOffset>
| XmlTypeCode.Boolean -> typeof<bool>
| XmlTypeCode.Decimal -> typeof<decimal>
| XmlTypeCode.Double -> typeof<double>
// fallback to string
| _ -> typeof<string>
let getMultiplicity =
function
| 1M, 1M -> Single
| 0M, 1M -> OptionalSingle
| _ -> Multiple
// how multiplicity is affected when nesting particles
let combineMultiplicity =
function
| Single, x -> x
| Multiple, _ -> Multiple
| _, Multiple -> Multiple
| OptionalSingle, _ -> OptionalSingle
// the effect of a choice is to make mandatory items optional
let makeOptional =
function
| Single -> OptionalSingle
| x -> x
let formatName (qName: XmlQualifiedName) =
if qName.Namespace = "" then
qName.Name
else
sprintf "{%s}%s" qName.Namespace qName.Name
let getElementName (elm: XsdElement) = Some(formatName elm.Name)
let nil =
{ InferedProperty.Name = "{http://www.w3.org/2001/XMLSchema-instance}nil"
Type = InferedType.Primitive(typeof<bool>, None, true, false) }
type InferenceContext = System.Collections.Generic.Dictionary<XsdComplexType, InferedProperty>
// derives an InferedType for an element definition
let rec inferElementType ctx elm =
let name = getElementName elm
if elm.IsAbstract then
InferedType.Record(name, [], optional = false)
else
match elm.Type with
| SimpleType typeCode ->
let ty = InferedType.Primitive(getType typeCode, None, elm.IsNillable, false)
let prop = { InferedProperty.Name = ""; Type = ty }
let props = if elm.IsNillable then [ prop; nil ] else [ prop ]
InferedType.Record(name, props, optional = false)
| ComplexType cty ->
let props = inferProperties ctx cty
let props =
if elm.IsNillable then
for prop in props do
prop.Type <- prop.Type.EnsuresHandlesMissingValues false
nil :: props
else
props
InferedType.Record(name, props, optional = false)
and inferProperties (ctx: InferenceContext) cty =
let attrs: InferedProperty list =
cty.Attributes
|> List.map (fun (name, typeCode, optional) ->
{ Name = formatName name
Type = InferedType.Primitive(getType typeCode, None, optional, false) })
match cty.Contents with
| SimpleContent typeCode ->
let body =
{ InferedProperty.Name = ""
Type = InferedType.Primitive(getType typeCode, None, false, false) }
body :: attrs
| ComplexContent xsdParticle ->
let body =
match ctx.TryGetValue cty with
| true, ctVal -> ctVal
| false, _ ->
let result =
{ InferedProperty.Name = ""
Type = InferedType.Top }
ctx.Add(cty, result)
let getRecordTag (e: XsdElement) = InferedTypeTag.Record(getElementName e)
result.Type <-
match getElements ctx Single xsdParticle with
| [] -> InferedType.Null
| items ->
let tags = items |> List.map (fst >> getRecordTag)
let types =
items
|> List.map (fun (e, m) -> m, inferElementType ctx e)
|> Seq.zip tags
|> Map.ofSeq
InferedType.Collection(tags, types)
result
if body.Type = InferedType.Null then
attrs
else
body :: attrs
// collects element definitions in a particle
and getElements ctx parentMultiplicity =
function
| XsdParticle.Element(occ, elm) ->
let mult = combineMultiplicity (parentMultiplicity, getMultiplicity occ)
match elm.IsAbstract, elm.SubstitutionGroup with
| _, [] -> [ (elm, mult) ]
| true, [ x ] -> [ (x, mult) ]
| true, x -> x |> List.map (fun e -> e, makeOptional mult)
| false, x -> elm :: x |> List.map (fun e -> e, makeOptional mult)
| XsdParticle.Sequence(occ, particles)
| XsdParticle.All(occ, particles) ->
let mult = combineMultiplicity (parentMultiplicity, getMultiplicity occ)
particles |> List.collect (getElements ctx mult)
| XsdParticle.Choice(occ, particles) ->
let mult = makeOptional (getMultiplicity occ)
let mult' = combineMultiplicity (parentMultiplicity, mult)
particles |> List.collect (getElements ctx mult')
| XsdParticle.Empty -> []
| XsdParticle.Any _ -> []
let inferElements elms =
let ctx = InferenceContext()
match elms |> List.filter (fun elm -> not elm.IsAbstract) with
| [] -> failwith "No suitable element definition found in the schema."
| [ elm ] -> inferElementType ctx elm
| elms ->
elms
|> List.map (fun elm -> InferedTypeTag.Record(getElementName elm), inferElementType ctx elm)
|> Map.ofList
|> (fun x -> InferedType.Heterogeneous(x, false))