Skip to content

Commit bf6fe44

Browse files
authored
BE-582: HashQL: Change opaque types from invariant to covariant (#8835)
1 parent 455658d commit bf6fe44

13 files changed

Lines changed: 345 additions & 248 deletions

File tree

libs/@local/hashql/core/src/type/inference/solver/graph.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ impl Edges {
113113
}
114114

115115
impl Default for Edges {
116+
#[inline]
116117
fn default() -> Self {
117118
Self::new()
118119
}

libs/@local/hashql/core/src/type/inference/solver/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ struct VariableConstraintSatisfiability {
220220
}
221221

222222
impl Default for VariableConstraintSatisfiability {
223+
#[inline]
223224
fn default() -> Self {
224225
Self { upper: true }
225226
}

libs/@local/hashql/core/src/type/inference/solver/tests.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#![expect(clippy::similar_names)]
12
use hashql_diagnostics::Success;
23

34
use super::{Constraint, InferenceSolver, VariableConstraint};
@@ -2364,6 +2365,55 @@ fn deferred_upper_constraint() {
23642365
);
23652366
}
23662367

2368+
// Regression test: W(A()) as W<AB> where AB = A | B.
2369+
// The constraint system is: W<?1> <: W<A | B>, with ?1 having lower bound A from the
2370+
// constructor call. The solver should decompose through the opaque covariantly and resolve
2371+
// ?1 = A, since A <: A | B.
2372+
#[test]
2373+
fn opaque_covariant_subtype_through_union() {
2374+
scaffold!(heap, env, builder);
2375+
2376+
let hole = builder.fresh_hole();
2377+
2378+
let null = builder.null();
2379+
let integer = builder.integer();
2380+
2381+
// newtype A = Null
2382+
let type_a = builder.opaque("A", null);
2383+
// newtype B = Integer
2384+
let type_b = builder.opaque("B", integer);
2385+
// type AB = A | B
2386+
let type_ab = builder.union([type_a, type_b]);
2387+
2388+
// W<?1> (the value produced by the constructor call)
2389+
let w_hole = builder.opaque("W", builder.infer(hole));
2390+
// W<AB> (the target type from the `as` annotation)
2391+
let w_ab = builder.opaque("W", type_ab);
2392+
2393+
let mut inference = InferenceEnvironment::new(&env);
2394+
// ?1 has lower bound A (from the constructor argument A() flowing into W's parameter)
2395+
inference.add_constraint(Constraint::LowerBound {
2396+
variable: Variable::synthetic(VariableKind::Hole(hole)),
2397+
bound: type_a,
2398+
});
2399+
// W<?1> <: W<AB> (from the `as` expression)
2400+
inference.collect_constraints(Variance::Covariant, w_hole, w_ab);
2401+
2402+
let solver = inference.into_solver();
2403+
let Success {
2404+
value: substitution,
2405+
advisories,
2406+
} = solver.solve().expect("should have solved");
2407+
assert!(advisories.is_empty());
2408+
2409+
// ?1 should resolve to A
2410+
assert_equivalent(
2411+
&env,
2412+
substitution.infer(hole).expect("should be resolved"),
2413+
type_a,
2414+
);
2415+
}
2416+
23672417
// The problem that we currently have is that when we have a nested inference constraint we do
23682418
// **not** "freeze" a variable in place when we've already determined it's type. We require doing
23692419
// so, so that we can propagate the type.

libs/@local/hashql/core/src/type/kind/opaque.rs

Lines changed: 67 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,29 @@ use crate::{
2424
/// even if their representations are identical. This contrasts with structural typing where
2525
/// compatibility is based on structure alone.
2626
///
27-
/// # Type System Design
27+
/// # Semantic Model
2828
///
29-
/// This implementation uses a refined context-sensitive variance approach:
30-
/// - For concrete types or when inference is disabled: Opaque types are **invariant** with respect
31-
/// to their inner representation, enforcing strict nominal typing semantics.
32-
/// - Only for non-concrete types during inference: Opaque types use a **covariant-like** approach
33-
/// that allows them to act as "carriers" for inference variables.
29+
/// For each opaque name `N`, the type constructor is interpreted as:
3430
///
35-
/// This precise, context-sensitive approach provides several benefits:
36-
/// 1. Proper constraint propagation for types containing inference variables
37-
/// 2. Strict nominal type safety for all concrete types
38-
/// 3. Clear separation between inference and checking phases
39-
/// 4. Consistent variance behavior across the type system
31+
/// ```text
32+
/// ⟦N<T>⟧ = { wrap_N(v) | v ∈ ⟦T⟧ }
33+
/// ```
34+
///
35+
/// with `wrap_N` injective and distinct names having disjoint images. This gives:
36+
///
37+
/// - **Covariance**: `⟦A⟧ ⊆ ⟦B⟧ ⟹ ⟦N<A>⟧ ⊆ ⟦N<B>⟧`, so `N<A> <: N<B>`.
38+
/// - **Nominal separation**: if `N ≠ M`, then `⟦N<A>⟧ ∩ ⟦M<B>⟧ = ∅`.
39+
/// - **Meet**: `⟦N<A>⟧ ∩ ⟦N<B>⟧ = ⟦N<A ∧ B>⟧`.
40+
/// - **Join**: `⟦N<A>⟧ ∪ ⟦N<B>⟧ = ⟦N<A ∨ B>⟧`, extensionally.
41+
///
42+
/// Covariance is sound because HashQL values are immutable: the wrapper has no operation
43+
/// that can exploit a `W<B>` view to smuggle an arbitrary `B` back into a `W<A>`. This
44+
/// removes the classic mutation-based unsoundness argument (Pierce, TAPL Ch. 15).
45+
///
46+
/// An opaque type is never the global top: `W(⊤)` is only the top of the `W` fiber
47+
/// (all `W`-wrapped values), not the whole universe, because it does not contain
48+
/// unwrapped values or values of other opaque families. However, `W(⊥)` IS the
49+
/// global bottom: wrapping an empty set gives an empty set, and `∅ ⊆ S` for any `S`.
4050
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
4151
pub struct OpaqueType<'heap> {
4252
pub name: Symbol<'heap>,
@@ -134,21 +144,14 @@ impl<'heap> Lattice<'heap> for OpaqueType<'heap> {
134144

135145
/// Computes the meet (greatest lower bound) of two opaque types.
136146
///
137-
/// The meet operation uses a refined context-sensitive approach:
138-
///
139-
/// When inference is enabled AND at least one type contains inference variables:
140-
/// - If types have different names: Return Never (empty set).
141-
/// - If types have the same name: Meet their inner representations, allowing the opaque type to
142-
/// act as a "carrier" for inference variables until they are resolved.
147+
/// For same-name opaques, meets the inner representations covariantly:
148+
/// `W<A|B> & W<A>` produces `W<A>`, because `meet(A|B, A) = A`.
143149
///
144-
/// Otherwise (for concrete types or when inference is disabled):
145-
/// - If types have different names: Return Never (empty set).
146-
/// - If types have the same name but different representations: Return Never (empty set).
147-
/// - If types have the same name and equivalent representations: Return the type itself.
150+
/// During inference with non-concrete types, both representations are kept as a
151+
/// "carrier" to defer evaluation until inference variables are resolved.
148152
///
149-
/// This approach prevents premature failure during inference while maintaining strict
150-
/// nominal semantics once types are fully resolved. It allows constraint propagation while
151-
/// preserving type safety.
153+
/// For different-name opaques, returns Never (empty set): distinct nominal types have
154+
/// no common subtype.
152155
fn meet(
153156
self: Type<'heap, Self>,
154157
other: Type<'heap, Self>,
@@ -158,26 +161,38 @@ impl<'heap> Lattice<'heap> for OpaqueType<'heap> {
158161
return SmallVec::new();
159162
}
160163

164+
if env.is_equivalent(self.kind.repr, other.kind.repr) {
165+
return SmallVec::from_slice_copy(&[self.id]);
166+
}
167+
161168
if env.is_inference_enabled()
162169
&& (!env.is_concrete(self.kind.repr) || !env.is_concrete(other.kind.repr))
163170
{
171+
// During inference, keep both representations as carriers. The meet is deferred
172+
// until inference variables are resolved and the result is simplified.
164173
let self_repr = env.r#type(self.kind.repr);
165174
let other_repr = env.r#type(other.kind.repr);
166175

167-
// We circumvent `env.meet` here, by directly meeting the representations. This is
168-
// intentional, so that we can propagate the meet result instead of having a
169-
// `Intersection`.
170176
let result = self_repr.meet(other_repr, env);
171177

172-
// If any of the types aren't concrete, we effectively convert ourselves into a
173-
// "carrier" to defer evaluation of the term, once inference is completed we'll simplify
174-
// and postprocess the result.
175-
self.postprocess_lattice(result, env.environment)
176-
} else if env.is_equivalent(self.kind.repr, other.kind.repr) {
177-
SmallVec::from_slice_copy(&[self.id])
178-
} else {
179-
SmallVec::new()
178+
return self.postprocess_lattice(result, env.environment);
180179
}
180+
181+
// For concrete same-name opaques, meet the inner representations through the full
182+
// lattice meet path which handles simplification (e.g. `Number | Never` to `Number`).
183+
let repr = env.meet(self.kind.repr, other.kind.repr);
184+
185+
if env.is_bottom(repr) {
186+
return SmallVec::new();
187+
}
188+
189+
SmallVec::from_slice_copy(&[env.environment.intern_type(PartialType {
190+
span: self.span,
191+
kind: env.environment.intern_kind(TypeKind::Opaque(OpaqueType {
192+
name: self.kind.name,
193+
repr,
194+
})),
195+
})])
181196
}
182197

183198
fn projection(
@@ -201,8 +216,11 @@ impl<'heap> Lattice<'heap> for OpaqueType<'heap> {
201216
env.is_bottom(self.kind.repr)
202217
}
203218

204-
fn is_top(self: Type<'heap, Self>, env: &mut AnalysisEnvironment<'_, 'heap>) -> bool {
205-
env.is_top(self.kind.repr)
219+
fn is_top(self: Type<'heap, Self>, _env: &mut AnalysisEnvironment<'_, 'heap>) -> bool {
220+
// W(Unknown) is the top of the W fiber, not the global top. It does not contain
221+
// unwrapped values or values of other opaque families. Reporting true here would
222+
// cause `T | W(Unknown)` to collapse to `Unknown`, destroying nominal separation.
223+
false
206224
}
207225

208226
fn is_concrete(self: Type<'heap, Self>, env: &mut AnalysisEnvironment<'_, 'heap>) -> bool {
@@ -231,10 +249,14 @@ impl<'heap> Lattice<'heap> for OpaqueType<'heap> {
231249

232250
/// Determines if one opaque type is a subtype of another.
233251
///
234-
/// Implements invariant nominal typing semantics:
252+
/// Implements covariant nominal typing semantics:
235253
/// 1. Types with different names are always unrelated (neither is a subtype of the other)
236-
/// 2. Types with the same name follow invariant rules for their inner representation (enforced
237-
/// via the `in_invariant` context)
254+
/// 2. Types with the same name check their inner representations covariantly
255+
///
256+
/// Covariance is sound because HashQL values are immutable: there is no operation that
257+
/// could "put back" a value through the opaque boundary, so the classic mutation-based
258+
/// unsoundness argument does not apply. The nominal boundary (distinct names) is preserved
259+
/// regardless of variance.
238260
fn is_subtype_of(
239261
self: Type<'heap, Self>,
240262
supertype: Type<'heap, Self>,
@@ -248,7 +270,7 @@ impl<'heap> Lattice<'heap> for OpaqueType<'heap> {
248270
return false;
249271
}
250272

251-
env.is_subtype_of(Variance::Invariant, self.kind.repr, supertype.kind.repr)
273+
env.is_subtype_of(Variance::Covariant, self.kind.repr, supertype.kind.repr)
252274
}
253275

254276
fn is_equivalent(
@@ -297,8 +319,10 @@ impl<'heap> Inference<'heap> for OpaqueType<'heap> {
297319
return;
298320
}
299321

300-
// Opaque types are invariant in regards to their arguments
301-
env.collect_constraints(Variance::Invariant, self.kind.repr, supertype.kind.repr);
322+
// Opaque types are covariant: the inner representation of the subtype must be a subtype
323+
// of the inner representation of the supertype. This is sound because HashQL values are
324+
// immutable.
325+
env.collect_constraints(Variance::Covariant, self.kind.repr, supertype.kind.repr);
302326
}
303327

304328
fn instantiate(self: Type<'heap, Self>, env: &mut InstantiateEnvironment<'_, 'heap>) -> TypeId {

0 commit comments

Comments
 (0)