|
| 1 | +//! `ogar-from-schema` — schema-as-input producer family. |
| 2 | +//! |
| 3 | +//! Sibling to source-AST producers (`ogar-from-rails`, `ogar-from-elixir`, |
| 4 | +//! `ogar-from-ruff`). The pair carves the producer surface along the same |
| 5 | +//! split [`SURREAL-AST-AS-ADAPTER.md`] already carved on the codegen side: |
| 6 | +//! |
| 7 | +//! ```text |
| 8 | +//! STRUCTURAL ARM ──► ogar-from-schema (this crate) |
| 9 | +//! (XSD, TTL, schemas are declarative & bijective |
| 10 | +//! JSON-Schema, → round-trip provable |
| 11 | +//! OpenAPI, Prisma) → 80 OGIT domains for free |
| 12 | +//! |
| 13 | +//! BEHAVIORAL ARM ──► ogar-from-{rails,elixir,ruff,…} |
| 14 | +//! (callbacks, FSMs, source code only — schemas can't carry it |
| 15 | +//! @api.depends, → best-effort, language-specific |
| 16 | +//! gen_statem,…) → adds what schemas can't see |
| 17 | +//! ``` |
| 18 | +//! |
| 19 | +//! **Why both — the funny insight** (`docs/HIRO-IN-CLASSES.md` §2): |
| 20 | +//! |
| 21 | +//! A schema gets you LESS, more RELIABLY. A source AST gets you MORE, |
| 22 | +//! less reliably. The two are not redundant — they cover disjoint surfaces |
| 23 | +//! that meet only at the structural arm, and at that meeting point they |
| 24 | +//! become each other's oracle: |
| 25 | +//! |
| 26 | +//! - Schema lift produces a `Class` set that is **byte-exact** (the schema |
| 27 | +//! IS the contract). |
| 28 | +//! - Source lift produces a `Class` set that is **best-effort** (Ruby is |
| 29 | +//! dynamic; `method_missing` defeats static extraction). |
| 30 | +//! - Where both cover the same domain, emitting a schema from the |
| 31 | +//! source-lifted `Class` and diffing against the committed schema is a |
| 32 | +//! **drift detector** for every PR. That is what Palantir Foundry charges |
| 33 | +//! for as "ontology change management" — we get it from this crate |
| 34 | +//! plus `extract_classes.py`. |
| 35 | +//! |
| 36 | +//! # v0 scope |
| 37 | +//! |
| 38 | +//! - `ttl` front-end: reads the line-oriented OGIT TTL dialect into |
| 39 | +//! [`Class`]. Demo target: `vocab/imports/ogit/MARS/`. |
| 40 | +//! - Cross-check: the **fixed-enum agreement test** asserts the TTL |
| 41 | +//! `ogit:validation-parameter` set matches the XSD oracle's extracted |
| 42 | +//! classifications (the chess-grade bijection, applied at the schema |
| 43 | +//! level — see `docs/calibration/mars/README.md`). |
| 44 | +//! |
| 45 | +//! # Out of v0 (queued) |
| 46 | +//! |
| 47 | +//! - `xsd` front-end (lift `MARSSchema2015.xsd` directly; cross-check vs TTL). |
| 48 | +//! - `json_schema`, `openapi`, `prisma` front-ends. |
| 49 | +//! - Full Turtle / RDF-XML / OWL import via `oxttl`/`oxrdf`. |
| 50 | +//! - Behavioral-arm fields on `Class` are intentionally left empty by this |
| 51 | +//! crate — schemas can't carry them; source-AST producers fill them in. |
| 52 | +//! |
| 53 | +//! [`SURREAL-AST-AS-ADAPTER.md`]: ../docs/SURREAL-AST-AS-ADAPTER.md |
| 54 | +
|
| 55 | +#![warn(missing_docs)] |
| 56 | +#![forbid(unsafe_code)] |
| 57 | + |
| 58 | +use ogar_vocab::{Attribute, Class, EnumDecl, EnumSource, Language}; |
| 59 | + |
| 60 | +pub mod sgo; |
| 61 | +pub mod ttl; |
| 62 | +pub mod ttl_emit; |
| 63 | + |
| 64 | +/// What a single TTL file describes — exactly one of: an entity (`Class`), |
| 65 | +/// a datatype attribute (`Attribute`), or a verb (`Association` shape). |
| 66 | +#[derive(Debug, Clone, PartialEq, Eq)] |
| 67 | +#[non_exhaustive] |
| 68 | +pub enum TtlDeclaration { |
| 69 | + /// An `rdfs:Class` declaration (`entities/<Name>.ttl`). |
| 70 | + Entity(EntityDecl), |
| 71 | + /// An `owl:DatatypeProperty` declaration (`<Entity>/attributes/<name>.ttl`). |
| 72 | + DatatypeAttribute(AttributeDecl), |
| 73 | +} |
| 74 | + |
| 75 | +/// Lifted shape of a single OGIT entity TTL file (e.g. |
| 76 | +/// `entities/Machine.ttl`). The shape captures every predicate the OGIT |
| 77 | +/// TTL dialect uses on `rdfs:Class` subjects, so that round-tripping |
| 78 | +/// (`parse → emit → parse`) preserves the semantic content. Mapping into |
| 79 | +/// the full [`ogar_vocab::Class`] surface happens in [`into_class`]; |
| 80 | +/// emitting back to TTL happens in [`crate::ttl_emit::emit_entity`]. |
| 81 | +#[derive(Debug, Clone, PartialEq, Eq, Default)] |
| 82 | +pub struct EntityDecl { |
| 83 | + /// The CURIE-form name (`ogit.MARS:Machine`). |
| 84 | + pub curie: String, |
| 85 | + /// The local name (`Machine`). |
| 86 | + pub name: String, |
| 87 | + /// `rdfs:label` text (often equal to `name`). |
| 88 | + pub label: String, |
| 89 | + /// `dcterms:description` text — multi-line preserved verbatim. |
| 90 | + pub description: String, |
| 91 | + /// `rdfs:subClassOf` target as written (`ogit:Entity`). |
| 92 | + pub parent: Option<String>, |
| 93 | + /// `dcterms:valid` validity-range string (`"start=2018-06-01;"`). |
| 94 | + pub dcterms_valid: Option<String>, |
| 95 | + /// `dcterms:creator` text (`"fotto@arago.de"` / `"FCO"`). |
| 96 | + pub dcterms_creator: Option<String>, |
| 97 | + /// `ogit:scope` text (`"NTO"` / `"SGO"`). |
| 98 | + pub ogit_scope: Option<String>, |
| 99 | + /// `ogit:parent` token (`ogit:Node`) — separate from |
| 100 | + /// [`Self::parent`] (`rdfs:subClassOf`) — both can be present and |
| 101 | + /// carry different information. |
| 102 | + pub ogit_parent: Option<String>, |
| 103 | + /// `ogit:mandatory-attributes` list, as written. |
| 104 | + pub mandatory_attributes: Vec<String>, |
| 105 | + /// `ogit:optional-attributes` list, as written. |
| 106 | + pub optional_attributes: Vec<String>, |
| 107 | + /// `ogit:indexed-attributes` list, as written. |
| 108 | + pub indexed_attributes: Vec<String>, |
| 109 | + /// `ogit:allowed (...)` block — each entry is `(verb, target)` as |
| 110 | + /// written (`(ogit:dependsOn, ogit.MARS:Resource)`). |
| 111 | + pub allowed: Vec<(String, String)>, |
| 112 | +} |
| 113 | + |
| 114 | +/// Lifted shape of a single OGIT attribute TTL file (e.g. |
| 115 | +/// `Application/attributes/class.ttl`). |
| 116 | +#[derive(Debug, Clone, PartialEq, Eq, Default)] |
| 117 | +pub struct AttributeDecl { |
| 118 | + /// The CURIE-form name (`ogit.MARS.Application:class`). |
| 119 | + pub curie: String, |
| 120 | + /// The local name (`class`). |
| 121 | + pub name: String, |
| 122 | + /// `rdfs:label` text. |
| 123 | + pub label: String, |
| 124 | + /// `dcterms:description` text. |
| 125 | + pub description: String, |
| 126 | + /// `dcterms:valid` validity-range string. |
| 127 | + pub dcterms_valid: Option<String>, |
| 128 | + /// `dcterms:creator` text. |
| 129 | + pub dcterms_creator: Option<String>, |
| 130 | + /// `ogit:validation-type` value (e.g. `"fixed"`) when present. |
| 131 | + pub validation_type: Option<String>, |
| 132 | + /// `ogit:validation-parameter` value — when `validation_type` is |
| 133 | + /// `"fixed"`, this is the comma-separated set of allowed values. |
| 134 | + pub validation_parameter: Option<String>, |
| 135 | +} |
| 136 | + |
| 137 | +impl AttributeDecl { |
| 138 | + /// When this attribute is `validation-type="fixed"`, split the |
| 139 | + /// comma-separated parameter into a sorted, de-duplicated value set — |
| 140 | + /// the shape that **must agree with the XSD-extracted classification |
| 141 | + /// set** for the same `(entity, attribute)` pair. |
| 142 | + /// |
| 143 | + /// Returns `None` when the attribute is not fixed-enum. |
| 144 | + #[must_use] |
| 145 | + pub fn fixed_enum_values(&self) -> Option<Vec<String>> { |
| 146 | + if self.validation_type.as_deref() != Some("fixed") { |
| 147 | + return None; |
| 148 | + } |
| 149 | + let raw = self.validation_parameter.as_deref()?; |
| 150 | + let mut values: Vec<String> = raw |
| 151 | + .split(',') |
| 152 | + .map(str::trim) |
| 153 | + .filter(|s| !s.is_empty()) |
| 154 | + .map(str::to_owned) |
| 155 | + .collect(); |
| 156 | + values.sort(); |
| 157 | + values.dedup(); |
| 158 | + Some(values) |
| 159 | + } |
| 160 | +} |
| 161 | + |
| 162 | +/// Lower a lifted [`EntityDecl`] into the canonical [`ogar_vocab::Class`]. |
| 163 | +/// Behavior-arm fields ([`Class::callbacks`], etc.) stay empty — this is |
| 164 | +/// the structural arm only, per the §`# v0 scope` invariant. |
| 165 | +#[must_use] |
| 166 | +pub fn into_class(entity: &EntityDecl, attributes: &[(&str, &AttributeDecl)]) -> Class { |
| 167 | + let enums: Vec<EnumDecl> = attributes |
| 168 | + .iter() |
| 169 | + .filter_map(|(col, attr)| { |
| 170 | + let values = attr.fixed_enum_values()?; |
| 171 | + // OGIT TTL carries the value list flat (`ogit:validation-parameter |
| 172 | + // "Data,Development,..."`) with no separate label vocabulary, so the |
| 173 | + // lowering renders each value as a self-labeled `(value, value)` |
| 174 | + // pair. A future XSD lift will source labels from |
| 175 | + // `<xs:documentation>` per `<xs:enumeration>`. |
| 176 | + let pairs: Vec<(String, String)> = values.into_iter().map(|v| (v.clone(), v)).collect(); |
| 177 | + let mut decl = EnumDecl::default(); |
| 178 | + decl.column = (*col).to_owned(); |
| 179 | + decl.source = EnumSource::Static(pairs); |
| 180 | + Some(decl) |
| 181 | + }) |
| 182 | + .collect(); |
| 183 | + |
| 184 | + let attrs: Vec<Attribute> = attributes |
| 185 | + .iter() |
| 186 | + .map(|(col, _attr)| { |
| 187 | + let mut a = Attribute::default(); |
| 188 | + a.name = (*col).to_owned(); |
| 189 | + a |
| 190 | + }) |
| 191 | + .collect(); |
| 192 | + |
| 193 | + let mut cls = Class::default(); |
| 194 | + cls.name = entity.name.clone(); |
| 195 | + cls.parent = entity.parent.clone(); |
| 196 | + cls.language = Language::Unknown; |
| 197 | + cls.attributes = attrs; |
| 198 | + cls.enums = enums; |
| 199 | + cls |
| 200 | +} |
0 commit comments