Skip to content

Commit affa0bf

Browse files
committed
feat: introduce maxCollectionSize limit for materialization of collections
- Added maxCollectionSize property to EngineRuntimeState and ComputeEngine to control the maximum number of elements a collection may have when materialized. - Default value set to 10,000, with the ability to configure it or disable the cap by setting it to Infinity or a negative number. - Implemented logic in the Repeat and Range functions to respect the maxCollectionSize limit during materialization. - Added tests to verify the behavior of maxCollectionSize, including edge cases for empty lists, collection types, and user-defined function broadcasting. - Enhanced the handling of mixed-kind and mixed-dimension lists in type definitions and evaluations. - Updated dictionary definitions to include new operators and ensure compatibility with the changes.
1 parent 8a7cb87 commit affa0bf

22 files changed

Lines changed: 2908 additions & 1383 deletions

docs/plans/2026-05-23-058-a3-lists.md

Lines changed: 963 additions & 0 deletions
Large diffs are not rendered by default.

src/api.md

Lines changed: 1153 additions & 1301 deletions
Large diffs are not rendered by default.

src/common/type/subtype.ts

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,57 @@ function widen2(a: Readonly<Type>, b: Readonly<Type>): Readonly<Type> {
644644
if (isSubtype(a, b)) return b;
645645
if (isSubtype(b, a)) return a;
646646

647-
return superType(a, b);
647+
// Two types that are not subtypes of each other. Try the common
648+
// supertype: this works well for related numeric types (e.g.
649+
// integer/real → real). But if the supertype collapses to a generic
650+
// category that loses information (e.g. 'scalar' for number+string,
651+
// or 'tuple' for two tuples of different shape), surface the
652+
// heterogeneity as an explicit union so downstream consumers (e.g.
653+
// the List operator's type handler) can detect mixed-kind content.
654+
const sup = superType(a, b);
655+
if (LOSSY_SUPERTYPE.has(sup as string)) return unionTypes(a, b);
656+
return sup;
657+
}
658+
659+
const LOSSY_SUPERTYPE = new Set<string>([
660+
'scalar',
661+
'value',
662+
'function',
663+
'expression',
664+
'collection',
665+
'indexed_collection',
666+
'list',
667+
'set',
668+
'tuple',
669+
'record',
670+
'dictionary',
671+
'map',
672+
'any',
673+
]);
674+
675+
/** Build a union of two types, flattening if either is already a union and
676+
* de-duplicating identical members. Returns the simpler type if reducible.
677+
*/
678+
function unionTypes(a: Readonly<Type>, b: Readonly<Type>): Readonly<Type> {
679+
const members: Type[] = [];
680+
const push = (t: Readonly<Type>) => {
681+
if (typeof t === 'object' && t.kind === 'union') {
682+
for (const m of t.types) push(m);
683+
return;
684+
}
685+
// de-dup by structural equality via JSON (cheap and adequate)
686+
const key = typeof t === 'string' ? t : JSON.stringify(t);
687+
if (
688+
!members.some(
689+
(m) => (typeof m === 'string' ? m : JSON.stringify(m)) === key
690+
)
691+
)
692+
members.push(t as Type);
693+
};
694+
push(a);
695+
push(b);
696+
if (members.length === 1) return members[0];
697+
return { kind: 'union', types: members };
648698
}
649699

650700
/** Convert two or more types into a more specific type that is a subtype of

src/common/type/utils.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ export function collectionElementType(type: Readonly<Type>): Type | undefined {
5757
if (type.kind === 'tuple') return widen(...type.elements.map((x) => x.type));
5858

5959
if (type.kind === 'dictionary')
60-
return parseType(`tuple<string, ${type.values}>`);
60+
return parseType(`tuple<string, ${typeToString(type.values)}>`);
6161

6262
if (type.kind === 'record') {
6363
return parseType(

src/compute-engine/boxed-expression/box.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,15 @@ export function box(
403403
)
404404
return ce.number(expr);
405405

406+
//
407+
// Box a boolean primitive as the True/False symbol.
408+
// Tensors with `dtype: 'bool'` store JS booleans directly, so `.each()`
409+
// and `.at()` over such a tensor need this case to yield usable
410+
// symbolic values. Mirrors the `boolean → True/False` mapping in
411+
// `jsValueToExpression`.
412+
//
413+
if (typeof expr === 'boolean') return ce.symbol(expr ? 'True' : 'False');
414+
406415
//
407416
// Box a String, a Symbol or a number as a string shorthand
408417
//

src/compute-engine/boxed-expression/boxed-dictionary.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ export class BoxedDictionary
164164
const eltType = widen(
165165
...Object.values(this._keyValues).map((op) => op.type.type)
166166
);
167-
this._type = this.engine.type(`dictionary<${eltType}>`);
167+
this._type = new BoxedType({ kind: 'dictionary', values: eltType });
168168
return this._type;
169169
}
170170

src/compute-engine/boxed-expression/boxed-function.ts

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,33 @@ export class BoxedFunction
11361136
return this.engine._fn('List', results);
11371137
}
11381138

1139+
//
1140+
// 2b/ Broadcast user-defined function literals over indexed collections
1141+
// When a function defined via `ce.assign('f', x \mapsto ...)` is applied
1142+
// to a list (or other finite indexed collection) and the function's
1143+
// parameters are scalar, map the function over the collection.
1144+
// Note: tuples satisfy `isFiniteIndexedCollection` and are intentionally
1145+
// included — a fixed-size tuple of scalars behaves like a small vector.
1146+
//
1147+
if (
1148+
def._isLambda &&
1149+
this.ops!.some((x) => isFiniteIndexedCollection(x)) &&
1150+
paramsAreScalar(def)
1151+
) {
1152+
const items = zip(this._ops);
1153+
if (items) {
1154+
const results: Expression[] = [];
1155+
while (true) {
1156+
const { done, value } = items.next();
1157+
if (done) break;
1158+
results.push(
1159+
this.engine._fn(this.operator, value).evaluate(options)
1160+
);
1161+
}
1162+
return this.engine._fn('List', results);
1163+
}
1164+
}
1165+
11391166
//
11401167
// 3/ Handle evaluation of lazy collections
11411168
//
@@ -1226,6 +1253,31 @@ export class BoxedFunction
12261253
);
12271254
}
12281255

1256+
//
1257+
// 2b/ Broadcast user-defined function literals over indexed collections.
1258+
// Mirrors the sync path in `_computeValue`.
1259+
//
1260+
if (
1261+
def?._isLambda &&
1262+
this.ops!.some((x) => isFiniteIndexedCollection(x)) &&
1263+
paramsAreScalar(def)
1264+
) {
1265+
const items = zip(this._ops);
1266+
if (items) {
1267+
const results: Promise<Expression>[] = [];
1268+
while (true) {
1269+
const { done, value } = items.next();
1270+
if (done) break;
1271+
results.push(
1272+
this.engine._fn(this.operator, value).evaluateAsync(options)
1273+
);
1274+
}
1275+
return Promise.all(results).then((resolved) =>
1276+
this.engine._fn('List', resolved)
1277+
);
1278+
}
1279+
}
1280+
12291281
//
12301282
// 3/ Evaluate the applicable operands
12311283
//
@@ -1570,10 +1622,98 @@ function applyFunctionLiteral(
15701622
if (!value || value.type.isUnknown)
15711623
return expr.engine.function(expr.operator, ops);
15721624

1625+
// Broadcast if any operand is a finite indexed collection and the
1626+
// function's parameter types are scalar. Zip operands and apply
1627+
// pointwise, returning a List of results. Tuples count as indexed
1628+
// collections, so a tuple of scalars also triggers broadcasting.
1629+
if (
1630+
ops.some((x) => isFiniteIndexedCollection(x)) &&
1631+
paramsAreScalar(value.type.type)
1632+
) {
1633+
const items = zip(ops);
1634+
if (items) {
1635+
const results: Expression[] = [];
1636+
while (true) {
1637+
const { done, value: zipped } = items.next();
1638+
if (done) break;
1639+
results.push(apply(value, zipped).evaluate(options));
1640+
}
1641+
return expr.engine._fn('List', results);
1642+
}
1643+
}
1644+
15731645
// The value is a function literal. Apply the arguments to it
15741646
return apply(value, ops);
15751647
}
15761648

1649+
/** Returns true when every formal parameter of a signature is a scalar
1650+
* type (not a collection/list/tuple/function).
1651+
*
1652+
* Accepts either a `Type` (typically from a function-typed value) or a
1653+
* `BoxedOperatorDefinition` (whose `signature.type` is inspected).
1654+
*
1655+
* Conservative: unknown/any and non-signature types are treated as scalar,
1656+
* which makes this a permissive default for inferred lambda signatures.
1657+
* @internal
1658+
*/
1659+
function paramsAreScalar(source: BoxedOperatorDefinition | Type): boolean {
1660+
const sigType = isOperatorDefinition(source)
1661+
? source.signature?.type
1662+
: source;
1663+
if (!sigType || typeof sigType === 'string') return true;
1664+
if (sigType.kind !== 'signature') return true;
1665+
const args = [
1666+
...(sigType.args ?? []),
1667+
...(sigType.optArgs ?? []),
1668+
...(sigType.variadicArg ? [sigType.variadicArg] : []),
1669+
];
1670+
return args.every((arg) => isScalarType(arg.type));
1671+
}
1672+
1673+
function isOperatorDefinition(
1674+
source: BoxedOperatorDefinition | Type
1675+
): source is BoxedOperatorDefinition {
1676+
return (
1677+
typeof source === 'object' && source !== null && 'signature' in source
1678+
);
1679+
}
1680+
1681+
/** A type is "scalar" for broadcasting purposes if it is NOT a known
1682+
* collection-like type. Conservative: unknown/any → scalar.
1683+
*/
1684+
function isScalarType(t: Type): boolean {
1685+
if (typeof t === 'string') {
1686+
// String types like 'collection', 'list', 'tuple', 'set' are non-scalar.
1687+
if (
1688+
t === 'collection' ||
1689+
t === 'indexed_collection' ||
1690+
t === 'list' ||
1691+
t === 'tuple' ||
1692+
t === 'set' ||
1693+
t === 'dictionary' ||
1694+
t === 'record' ||
1695+
t === 'function'
1696+
)
1697+
return false;
1698+
return true;
1699+
}
1700+
if (
1701+
t.kind === 'collection' ||
1702+
t.kind === 'indexed_collection' ||
1703+
t.kind === 'list' ||
1704+
t.kind === 'tuple' ||
1705+
t.kind === 'set' ||
1706+
t.kind === 'dictionary' ||
1707+
t.kind === 'record' ||
1708+
t.kind === 'signature'
1709+
)
1710+
return false;
1711+
if (t.kind === 'union' || t.kind === 'intersection')
1712+
return t.types.every((x) => isScalarType(x));
1713+
if (t.kind === 'negation') return isScalarType(t.type);
1714+
return true;
1715+
}
1716+
15771717
/** Eagerly evaluate xs by iterating over its elements.
15781718
*
15791719
* If eager is true, evaluate DEFAULT_MATERIALIZATION elements.
@@ -1598,6 +1738,14 @@ function materialize(
15981738
const isIndexed = expr.isIndexedCollection;
15991739
const isFinite = expr.isFiniteCollection;
16001740

1741+
// Leave oversized indexed collections in their lazy form. Consumers
1742+
// can detect the size via `.count` without risking OOM.
1743+
if (isIndexed && isFinite) {
1744+
const count = expr.count;
1745+
if (count !== undefined && count > expr.engine.maxCollectionSize)
1746+
return expr;
1747+
}
1748+
16011749
const xs: Expression[] = [];
16021750

16031751
if (!expr.isEmptyCollection) {

src/compute-engine/boxed-expression/boxed-operator-definition.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,13 @@ export class _BoxedOperatorDefinition implements BoxedOperatorDefinition {
8686
signature: BoxedType;
8787
inferredSignature = true;
8888

89+
/** True if this operator definition was created from a user-defined
90+
* function literal (e.g. via `ce.assign('f', ce.parse('x \\mapsto x^2'))`).
91+
* Used to enable auto-broadcasting when applied to indexed collections.
92+
* @internal
93+
*/
94+
_isLambda = false;
95+
8996
type?: (
9097
ops: ReadonlyArray<Expression>,
9198
options: { engine: ComputeEngine }
@@ -336,6 +343,11 @@ export class _BoxedOperatorDefinition implements BoxedOperatorDefinition {
336343
);
337344
}
338345

346+
// Mark this operator definition as backed by a user-defined function
347+
// literal. Enables auto-broadcasting at apply time.
348+
if (isFunction(boxedFn) && boxedFn.operator === 'Function')
349+
this._isLambda = true;
350+
339351
const fn = applicable(boxedFn);
340352
evaluate = (xs, _options) => fn(xs);
341353
Object.defineProperty(evaluate, 'toString', {

src/compute-engine/boxed-expression/boxed-tensor.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,9 +438,34 @@ export function expressionTensorInfo(
438438
}
439439
}
440440
}
441-
// 4b. all leaves → accumulate dtype
441+
// 4b. all leaves → accumulate dtype.
442+
// Reject collection-like leaves and strings so mixed-kind lists like
443+
// [1, 'hello'] and mixed-dim point lists like [Tuple(1,2), Tuple(3,4,5)]
444+
// aren't misidentified as tensors — the List operator's type handler
445+
// relies on this to surface a heterogeneous element type.
442446
else {
443447
for (const item of t) {
448+
// Operator-name check: tensor detection runs on raw boxed ops where
449+
// item.type may still be 'unknown', so we can't rely on type inspection.
450+
const op = item.operator;
451+
if (
452+
op === 'Tuple' ||
453+
op === 'Pair' ||
454+
op === 'Single' ||
455+
op === 'Triple' ||
456+
op === 'Quadruple' ||
457+
op === 'KeyValuePair' ||
458+
op === 'Dictionary' ||
459+
op === 'Set' ||
460+
op === 'Record'
461+
) {
462+
valid = false;
463+
return;
464+
}
465+
if (item.type.type === 'string') {
466+
valid = false;
467+
return;
468+
}
444469
dtype = getSupertype(dtype, getExpressionDatatype(item));
445470
}
446471
}

src/compute-engine/boxed-expression/inequality-bounds.ts

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,7 @@ export function extractIntervalBounds(
7979
// Less(a, b, c) means a < b < c
8080
// Greater(a, b) means a > b → treated as b < a (flipped to [b, a])
8181
const flipped =
82-
op === 'Greater' || op === 'GreaterEqual'
83-
? [...ops].reverse()
84-
: ops;
82+
op === 'Greater' || op === 'GreaterEqual' ? [...ops].reverse() : ops;
8583

8684
// Walk the (flipped) chain looking for `symbol` as an operand.
8785
// For chain [lower, symbol]: lower bound.
@@ -121,10 +119,7 @@ export function extractIntervalBounds(
121119

122120
function _mergeBounds(into: IntervalBounds, from: IntervalBounds): void {
123121
if (from.lower !== undefined) {
124-
if (
125-
into.lower === undefined ||
126-
from.lower.isGreater(into.lower) === true
127-
) {
122+
if (into.lower === undefined || from.lower.isGreater(into.lower) === true) {
128123
into.lower = from.lower;
129124
into.lowerStrict = from.lowerStrict;
130125
} else if (from.lower.isSame(into.lower)) {
@@ -133,10 +128,7 @@ function _mergeBounds(into: IntervalBounds, from: IntervalBounds): void {
133128
}
134129
}
135130
if (from.upper !== undefined) {
136-
if (
137-
into.upper === undefined ||
138-
from.upper.isLess(into.upper) === true
139-
) {
131+
if (into.upper === undefined || from.upper.isLess(into.upper) === true) {
140132
into.upper = from.upper;
141133
into.upperStrict = from.upperStrict;
142134
} else if (from.upper.isSame(into.upper)) {
@@ -239,10 +231,7 @@ export function getInequalityBoundsFromAssumptions(
239231
// Case 3: symbol < 0 => symbol has upper bound 0
240232
if (isSymbol(lhs, symbol)) {
241233
const bound = ce.Zero;
242-
if (
243-
result.upper === undefined ||
244-
bound.isLess(result.upper) === true
245-
) {
234+
if (result.upper === undefined || bound.isLess(result.upper) === true) {
246235
result.upper = bound;
247236
result.upperStrict = isStrict;
248237
}
@@ -271,10 +260,7 @@ export function getInequalityBoundsFromAssumptions(
271260
if (hasSymbol && constantSum !== 0) {
272261
// symbol + k < 0 => symbol < -k
273262
const bound = ce.expr(-constantSum);
274-
if (
275-
result.upper === undefined ||
276-
bound.isLess(result.upper) === true
277-
) {
263+
if (result.upper === undefined || bound.isLess(result.upper) === true) {
278264
result.upper = bound;
279265
result.upperStrict = isStrict;
280266
}

0 commit comments

Comments
 (0)