Skip to content

Commit 44a772b

Browse files
committed
feat(datastore): cost the stored-schema cache by estimated byte size
The stored-schema cache previously admitted every entry at a fixed cost of 1, so a MaxCost configured in bytes (e.g. the server's 32MiB default, or the embedded SchemaCacheMaxCostBytes) actually bounded the number of cached schema versions rather than memory. Cost each entry by ReadOnlyStoredSchema.EstimatedSize() instead: a rough schema byte size plus, for each registered schema-derived cache kind, an estimate of the bytes it adds once populated. Datastore readers pass the exact serialized size they just read; other callers fall back to a cheap schema-text proxy, avoiding a full proto walk (proto.Size is ~1.5ms on a 1k-definition schema). RegisterDerivedCache now takes a size estimator, and the compiled-caveat cache estimates per-caveat CEL overhead.
1 parent e110422 commit 44a772b

7 files changed

Lines changed: 111 additions & 23 deletions

File tree

internal/caveats/schemacache.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,33 @@ import (
1313
// and is discarded when the schema changes.
1414
var compiledCaveatCacheKey = datastore.NewDerivedCacheKey("caveats.compiled")
1515

16+
// compiledCaveatOverheadBytes is a rough per-caveat estimate of the memory a compiled caveat
17+
// adds on top of its serialized expression. A compiled caveat embeds a built CEL environment
18+
// and program, which dwarf the serialized expression; this is a deliberately conservative,
19+
// order-of-magnitude figure used only for cache-cost budgeting.
20+
const compiledCaveatOverheadBytes = 8 * 1024
21+
1622
func init() {
17-
if err := datastore.RegisterDerivedCache(compiledCaveatCacheKey, func() any { return &CompiledCaveatCache{} }); err != nil {
23+
if err := datastore.RegisterDerivedCache(compiledCaveatCacheKey, func() any { return &CompiledCaveatCache{} }, estimateCompiledCaveatCacheSize); err != nil {
1824
spiceerrors.MustPanicf("failed to register compiled caveat cache: %v", err)
1925
}
2026
}
2127

28+
// estimateCompiledCaveatCacheSize roughly estimates the bytes the compiled-caveat cache adds for
29+
// the given schema when fully populated: per caveat, the serialized expression plus a fixed
30+
// overhead for its compiled CEL environment.
31+
func estimateCompiledCaveatCacheSize(s *datastore.ReadOnlyStoredSchema) int64 {
32+
v1 := s.Get().GetV1()
33+
if v1 == nil {
34+
return 0
35+
}
36+
total := 0
37+
for _, caveat := range v1.GetCaveatDefinitions() {
38+
total += len(caveat.GetSerializedExpression()) + compiledCaveatOverheadBytes
39+
}
40+
return int64(total)
41+
}
42+
2243
// CompiledCaveatCache caches deserialized caveats (which embed a built CEL environment) by
2344
// caveat name, for a single schema version. Deserializing a caveat rebuilds its CEL
2445
// environment, which is expensive; caching it on the (shared) stored schema avoids paying

internal/datastore/common/sqlschema.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ func (s *SQLSingleStoreSchemaReaderWriter[T]) ReadStoredSchema(ctx context.Conte
5959
return nil, fmt.Errorf("failed to unmarshal schema: %w", err)
6060
}
6161

62-
return datastore.NewReadOnlyStoredSchema(storedSchema), nil
62+
// len(data) is the exact serialized size, used as the rough schema-size base for cache cost.
63+
return datastore.NewReadOnlyStoredSchemaWithSize(storedSchema, len(data)), nil
6364
}
6465

6566
// WriteStoredSchema writes the stored schema to the unified schema table.

internal/datastore/memdb/storedschema.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ func (r *memdbReader) ReadStoredSchema(_ context.Context) (*datastore.ReadOnlySt
3939
return nil, fmt.Errorf("failed to unmarshal schema: %w", err)
4040
}
4141

42-
return datastore.NewReadOnlyStoredSchema(storedSchema), nil
42+
// len(sd.data) is the exact serialized size, used as the rough schema-size base for cache cost.
43+
return datastore.NewReadOnlyStoredSchemaWithSize(storedSchema, len(sd.data)), nil
4344
}
4445

4546
// assertSchemaHash verifies the stored schema hash matches expectedHash.

pkg/datalayer/hashcache.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,14 @@ func (c *schemaHashCache) Set(schemaHash SchemaHash, schema *datastore.ReadOnlyS
113113
schema: schema,
114114
})
115115

116-
c.cache.Set(SchemaCacheKey(schemaHash), schema, 1)
116+
// Cost the entry by the schema's estimated byte size (schema blob plus the schema-derived
117+
// caches it will accrete), so the cache's max-cost budget is in bytes. Floor at 1 so an
118+
// (effectively empty) schema is still admitted with a non-zero weight.
119+
cost := schema.EstimatedSize()
120+
if cost < 1 {
121+
cost = 1
122+
}
123+
c.cache.Set(SchemaCacheKey(schemaHash), schema, cost)
117124
return nil
118125
}
119126

pkg/datastore/datastore.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -526,17 +526,31 @@ type RevisionedNamespace = RevisionedDefinition[*core.NamespaceDefinition]
526526
// when the schema changes. The underlying schema itself remains immutable; only the
527527
// concurrency-safe derived caches are populated on demand.
528528
type ReadOnlyStoredSchema struct {
529-
schema *core.StoredSchema
530-
derived sync.Map // map[DerivedCacheKey]any
529+
schema *core.StoredSchema
530+
schemaSize int64 // rough byte size of the schema, used as a cache-cost base (see EstimatedSize)
531+
derived sync.Map // map[DerivedCacheKey]any
531532
}
532533

533-
// NewReadOnlyStoredSchema wraps a StoredSchema as read-only.
534-
// Returns nil if the provided schema is nil.
534+
// NewReadOnlyStoredSchema wraps a StoredSchema as read-only. Returns nil if the provided
535+
// schema is nil. The schema's byte size is estimated cheaply from its schema text; callers
536+
// that have the exact serialized size on hand (e.g. datastore readers that just unmarshaled
537+
// it) should prefer NewReadOnlyStoredSchemaWithSize.
535538
func NewReadOnlyStoredSchema(schema *core.StoredSchema) *ReadOnlyStoredSchema {
536539
if schema == nil {
537540
return nil
538541
}
539-
return &ReadOnlyStoredSchema{schema: schema}
542+
return NewReadOnlyStoredSchemaWithSize(schema, len(schema.GetV1().GetSchemaText()))
543+
}
544+
545+
// NewReadOnlyStoredSchemaWithSize wraps a StoredSchema as read-only, recording sizeBytes as a
546+
// rough byte size of the schema for cache-cost accounting (see EstimatedSize). Returns nil if
547+
// the provided schema is nil. sizeBytes need not be exact; the serialized length the schema
548+
// was read from is a good value.
549+
func NewReadOnlyStoredSchemaWithSize(schema *core.StoredSchema, sizeBytes int) *ReadOnlyStoredSchema {
550+
if schema == nil {
551+
return nil
552+
}
553+
return &ReadOnlyStoredSchema{schema: schema, schemaSize: int64(sizeBytes)}
540554
}
541555

542556
// Get returns the underlying StoredSchema. Callers must not modify the returned value.

pkg/datastore/derivedcache.go

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,51 @@ func NewDerivedCacheKey(name string) DerivedCacheKey { return DerivedCacheKey{na
1818
// Name returns the human-readable name of the key.
1919
func (k DerivedCacheKey) Name() string { return k.name }
2020

21-
// derivedCacheFactories maps a DerivedCacheKey to a factory that builds an empty cache
22-
// instance. Registered once at init time via RegisterDerivedCache.
23-
var derivedCacheFactories sync.Map // map[DerivedCacheKey]func() any
21+
// derivedCacheRegistration bundles the lazy factory for a derived cache kind with an estimator
22+
// of how many bytes that cache adds, when fully populated, on top of the schema it hangs off.
23+
type derivedCacheRegistration struct {
24+
factory func() any
25+
estimator func(*ReadOnlyStoredSchema) int64
26+
}
27+
28+
// derivedCacheFactories maps a DerivedCacheKey to its registration. Registered once at init
29+
// time via RegisterDerivedCache.
30+
var derivedCacheFactories sync.Map // map[DerivedCacheKey]derivedCacheRegistration
2431

25-
// RegisterDerivedCache registers a factory used to lazily build a schema-derived cache of the
26-
// given kind. The factory returns a fresh, empty cache and is invoked at most once per
27-
// ReadOnlyStoredSchema instance (i.e. once per schema version). Intended to be called from an
28-
// init() function. It returns an error if a factory is already registered for the key.
29-
func RegisterDerivedCache(key DerivedCacheKey, factory func() any) error {
30-
if _, loaded := derivedCacheFactories.LoadOrStore(key, factory); loaded {
32+
// RegisterDerivedCache registers a derived cache kind. factory returns a fresh, empty cache and
33+
// is invoked at most once per ReadOnlyStoredSchema instance (i.e. once per schema version).
34+
// estimator returns a rough byte size that this cache adds, when populated, on top of the
35+
// schema; it is summed into the schema's cache cost (see ReadOnlyStoredSchema.EstimatedSize) and
36+
// may be nil to contribute nothing. Intended to be called from an init() function. It returns an
37+
// error if a kind is already registered for the key.
38+
func RegisterDerivedCache(key DerivedCacheKey, factory func() any, estimator func(*ReadOnlyStoredSchema) int64) error {
39+
reg := derivedCacheRegistration{factory: factory, estimator: estimator}
40+
if _, loaded := derivedCacheFactories.LoadOrStore(key, reg); loaded {
3141
return fmt.Errorf("derived schema cache already registered for key %q", key.name)
3242
}
3343
return nil
3444
}
3545

46+
// EstimatedSize returns a rough byte size for this stored schema: the schema's own size plus,
47+
// for every registered derived cache kind, that kind's estimate of the additional bytes it adds
48+
// when populated. It is intended as the cost when caching the schema, so the cache's max-cost
49+
// budget accounts for the derived caches the schema will accrete (compiled caveats, etc.), not
50+
// just the schema blob. The estimate is deliberately rough and conservative (it assumes every
51+
// kind will be populated).
52+
func (r *ReadOnlyStoredSchema) EstimatedSize() int64 {
53+
if r == nil {
54+
return 0
55+
}
56+
size := r.schemaSize
57+
derivedCacheFactories.Range(func(_, v any) bool {
58+
if reg := v.(derivedCacheRegistration); reg.estimator != nil {
59+
size += reg.estimator(r)
60+
}
61+
return true
62+
})
63+
return size
64+
}
65+
3666
// derivedCache returns the derived cache registered under key for this schema, building it
3767
// once (lazily) on first access. It returns an error if no factory is registered for key,
3868
// which indicates a programming error (a cache kind accessed without being registered at
@@ -41,11 +71,11 @@ func (r *ReadOnlyStoredSchema) derivedCache(key DerivedCacheKey) (any, error) {
4171
if v, ok := r.derived.Load(key); ok {
4272
return v, nil
4373
}
44-
factory, ok := derivedCacheFactories.Load(key)
74+
reg, ok := derivedCacheFactories.Load(key)
4575
if !ok {
4676
return nil, spiceerrors.MustBugf("no derived schema cache registered for key %q", key.name)
4777
}
48-
built := factory.(func() any)()
78+
built := reg.(derivedCacheRegistration).factory()
4979
actual, _ := r.derived.LoadOrStore(key, built)
5080
return actual, nil
5181
}

pkg/datastore/derivedcache_test.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func TestDerivedCacheLazyAndShared(t *testing.T) {
3131
require.NoError(t, datastore.RegisterDerivedCache(key, func() any {
3232
built++
3333
return &testCache{id: built}
34-
}))
34+
}, nil))
3535

3636
s := newStoredSchema()
3737

@@ -62,6 +62,20 @@ func TestDerivedCacheUnregisteredKeyErrors(t *testing.T) {
6262

6363
func TestDerivedCacheDuplicateRegistrationErrors(t *testing.T) {
6464
key := uniqueDerivedCacheKey("dup")
65-
require.NoError(t, datastore.RegisterDerivedCache(key, func() any { return &testCache{} }))
66-
require.Error(t, datastore.RegisterDerivedCache(key, func() any { return &testCache{} }))
65+
require.NoError(t, datastore.RegisterDerivedCache(key, func() any { return &testCache{} }, nil))
66+
require.Error(t, datastore.RegisterDerivedCache(key, func() any { return &testCache{} }, nil))
67+
}
68+
69+
func TestEstimatedSizeIncludesSchemaAndDerivedEstimators(t *testing.T) {
70+
// Base size comes from the explicit byte size; registered estimators add on top. The
71+
// registry is process-global, so assert against the delta rather than an absolute total.
72+
s := datastore.NewReadOnlyStoredSchemaWithSize(&core.StoredSchema{}, 1000)
73+
before := s.EstimatedSize()
74+
require.GreaterOrEqual(t, before, int64(1000), "estimated size includes the schema byte size")
75+
76+
key := uniqueDerivedCacheKey("estimator")
77+
require.NoError(t, datastore.RegisterDerivedCache(key, func() any { return &testCache{} },
78+
func(*datastore.ReadOnlyStoredSchema) int64 { return 250 }))
79+
80+
require.Equal(t, before+250, s.EstimatedSize(), "a registered estimator adds to estimated size")
6781
}

0 commit comments

Comments
 (0)