Skip to content

Commit b4cc0ba

Browse files
stats: add memory monitoring to table statistics cache
Add a `mon.BytesMonitor` to the table statistics cache, tracking memory usage under the `server-cache-mon` → `root` SQL memory pool hierarchy. Each cache entry's memory footprint is estimated when populated (accounting for histogram buckets, datum sizes, column IDs, and string fields) and tracked via a `mon.BoundAccount`. When a `Grow` or `Resize` call fails (e.g. because the SQL memory pool is exhausted), LRU entries are evicted until the cumulative freed memory exceeds the new entry's size, ensuring a net decrease even under concurrent memory pressure. If eviction is insufficient, the new entry is removed — callers who were already waiting still receive the stats, but the entry won't be retained for future lookups. On refresh the size delta between the old and new stats is reserved or released. A new `LRUEntry` method is added to `cache.UnorderedCache` to support the eviction logic. Fixes: #54030 Release note: None Co-Authored-By: roachdev-claude <roachdev-claude-bot@cockroachlabs.com>
1 parent 275f1cf commit b4cc0ba

9 files changed

Lines changed: 511 additions & 11 deletions

File tree

docs/generated/settings/settings-for-tenants.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ sql.stats.response.max integer 20000 the maximum number of statements and transa
378378
sql.stats.response.show_internal.enabled boolean false controls if statistics for internal executions should be returned by the CombinedStatements and if internal sessions should be returned by the ListSessions endpoints. These endpoints are used to display statistics on the SQL Activity pages application
379379
sql.stats.system_tables.enabled boolean true when true, enables use of statistics on system tables by the query optimizer application
380380
sql.stats.system_tables_autostats.enabled boolean true when true, enables automatic collection of statistics on system tables application
381-
sql.stats.table_statistics_cache.capacity integer 256 the maximum number of table statistics entries stored in the LRU cache application
381+
sql.stats.table_statistics_cache.capacity integer 256 the maximum number of table statistics entries stored in the LRU cache. Each cache entry corresponds to a single table. application
382382
sql.stats.virtual_computed_columns.enabled boolean true set to true to collect table statistics on virtual computed columns application
383383
sql.telemetry.query_sampling.enabled boolean false when set to true, executed queries will emit an event on the telemetry logging channel application
384384
sql.telemetry.query_sampling.internal.enabled boolean false when set to true, internal queries will be sampled in telemetry logging application

docs/generated/settings/settings.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@
333333
<tr><td><div id="setting-sql-stats-response-show-internal-enabled" class="anchored"><code>sql.stats.response.show_internal.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>controls if statistics for internal executions should be returned by the CombinedStatements and if internal sessions should be returned by the ListSessions endpoints. These endpoints are used to display statistics on the SQL Activity pages</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
334334
<tr><td><div id="setting-sql-stats-system-tables-enabled" class="anchored"><code>sql.stats.system_tables.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>when true, enables use of statistics on system tables by the query optimizer</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
335335
<tr><td><div id="setting-sql-stats-system-tables-autostats-enabled" class="anchored"><code>sql.stats.system_tables_autostats.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>when true, enables automatic collection of statistics on system tables</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
336-
<tr><td><div id="setting-sql-stats-table-statistics-cache-capacity" class="anchored"><code>sql.stats.table_statistics_cache.capacity</code></div></td><td>integer</td><td><code>256</code></td><td>the maximum number of table statistics entries stored in the LRU cache</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
336+
<tr><td><div id="setting-sql-stats-table-statistics-cache-capacity" class="anchored"><code>sql.stats.table_statistics_cache.capacity</code></div></td><td>integer</td><td><code>256</code></td><td>the maximum number of table statistics entries stored in the LRU cache. Each cache entry corresponds to a single table.</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
337337
<tr><td><div id="setting-sql-stats-virtual-computed-columns-enabled" class="anchored"><code>sql.stats.virtual_computed_columns.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>set to true to collect table statistics on virtual computed columns</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
338338
<tr><td><div id="setting-sql-telemetry-query-sampling-enabled" class="anchored"><code>sql.telemetry.query_sampling.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>when set to true, executed queries will emit an event on the telemetry logging channel</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
339339
<tr><td><div id="setting-sql-telemetry-query-sampling-internal-enabled" class="anchored"><code>sql.telemetry.query_sampling.internal.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>when set to true, internal queries will be sampled in telemetry logging</td><td>Serverless/Dedicated/Self-Hosted</td></tr>

pkg/server/server_sql.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,9 +1039,11 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) {
10391039
),
10401040

10411041
TableStatsCache: stats.NewTableStatisticsCache(
1042+
ctx,
10421043
cfg.Settings,
10431044
cfg.internalDB,
10441045
cfg.stopper,
1046+
serverCacheMemoryMonitor,
10451047
),
10461048

10471049
QueryCache: querycache.New(cfg.QueryCacheSize),

pkg/sql/stats/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ go_test(
138138
"//pkg/util/protoutil",
139139
"//pkg/util/randutil",
140140
"//pkg/util/retry",
141+
"//pkg/util/stop",
141142
"//pkg/util/timeutil",
142143
"//pkg/util/timeutil/pgdate",
143144
"@com_github_cockroachdb_errors//:errors",

pkg/sql/stats/automatic_stats_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,11 @@ func TestMaybeRefreshStats(t *testing.T) {
6767
internalDB := s.InternalDB().(descs.DB)
6868
descA := desctestutils.TestingGetPublicTableDescriptor(s.DB(), codec, "t", "a")
6969
cache := NewTableStatisticsCache(
70+
ctx,
7071
s.ClusterSettings(),
7172
s.InternalDB().(descs.DB),
7273
s.AppStopper(),
74+
nil, /* parentMon */
7375
)
7476
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
7577
refresher := MakeRefresher(s.AmbientCtx(), st, internalDB, cache, time.Microsecond /* asOfTime */, nil /* knobs */, false /* readOnlyTenant */)
@@ -228,9 +230,11 @@ func TestEnsureAllTablesQueries(t *testing.T) {
228230

229231
internalDB := s.InternalDB().(descs.DB)
230232
cache := NewTableStatisticsCache(
233+
ctx,
231234
s.ClusterSettings(),
232235
s.InternalDB().(descs.DB),
233236
s.AppStopper(),
237+
nil, /* parentMon */
234238
)
235239
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
236240
r := MakeRefresher(s.AmbientCtx(), st, internalDB, cache, time.Microsecond /* asOfTime */, nil /* knobs */, false /* readOnlyTenant */)
@@ -330,9 +334,11 @@ func BenchmarkEnsureAllTables(b *testing.B) {
330334

331335
internalDB := s.InternalDB().(descs.DB)
332336
cache := NewTableStatisticsCache(
337+
ctx,
333338
s.ClusterSettings(),
334339
s.InternalDB().(descs.DB),
335340
s.AppStopper(),
341+
nil, /* parentMon */
336342
)
337343
require.NoError(b, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
338344
r := MakeRefresher(s.AmbientCtx(), st, internalDB, cache, time.Microsecond /* asOfTime */, nil /* knobs */, false /* readOnlyTenant */)
@@ -403,9 +409,11 @@ func TestAverageRefreshTime(t *testing.T) {
403409
internalDB := s.InternalDB().(descs.DB)
404410
table := desctestutils.TestingGetPublicTableDescriptor(s.DB(), codec, "t", "a")
405411
cache := NewTableStatisticsCache(
412+
ctx,
406413
s.ClusterSettings(),
407414
s.InternalDB().(descs.DB),
408415
s.AppStopper(),
416+
nil, /* parentMon */
409417
)
410418
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
411419
refresher := MakeRefresher(s.AmbientCtx(), st, internalDB, cache, time.Microsecond /* asOfTime */, nil /* knobs */, false /* readOnlyTenant */)
@@ -652,9 +660,11 @@ func TestAutoStatsReadOnlyTables(t *testing.T) {
652660

653661
internalDB := s.InternalDB().(descs.DB)
654662
cache := NewTableStatisticsCache(
663+
ctx,
655664
s.ClusterSettings(),
656665
s.InternalDB().(descs.DB),
657666
s.AppStopper(),
667+
nil, /* parentMon */
658668
)
659669
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
660670
refresher := MakeRefresher(s.AmbientCtx(), st, internalDB, cache, time.Microsecond /* asOfTime */, nil /* knobs */, false /* readOnlyTenant */)
@@ -707,9 +717,11 @@ func TestAutoStatsOnStartupClusterSettingOff(t *testing.T) {
707717

708718
internalDB := s.InternalDB().(descs.DB)
709719
cache := NewTableStatisticsCache(
720+
ctx,
710721
s.ClusterSettings(),
711722
s.InternalDB().(descs.DB),
712723
s.AppStopper(),
724+
nil, /* parentMon */
713725
)
714726
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
715727
refresher := MakeRefresher(s.AmbientCtx(), st, internalDB, cache, time.Microsecond /* asOfTime */, nil /* knobs */, false /* readOnlyTenant */)
@@ -754,9 +766,11 @@ func TestNoRetryOnFailure(t *testing.T) {
754766

755767
internalDB := s.InternalDB().(descs.DB)
756768
cache := NewTableStatisticsCache(
769+
ctx,
757770
s.ClusterSettings(),
758771
s.InternalDB().(descs.DB),
759772
s.AppStopper(),
773+
nil, /* parentMon */
760774
)
761775
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
762776
r := MakeRefresher(s.AmbientCtx(), st, internalDB, cache, time.Microsecond /* asOfTime */, nil /* knobs */, false /* readOnlyTenant */)
@@ -871,9 +885,11 @@ func TestAnalyzeSystemTables(t *testing.T) {
871885
defer evalCtx.Stop(ctx)
872886
executor := s.InternalExecutor().(isql.Executor)
873887
cache := NewTableStatisticsCache(
888+
ctx,
874889
s.ClusterSettings(),
875890
s.InternalDB().(descs.DB),
876891
s.AppStopper(),
892+
nil, /* parentMon */
877893
)
878894
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
879895

@@ -991,9 +1007,11 @@ func TestAutoStatsDisabledReadOnlyTenant(t *testing.T) {
9911007
internalDB := s.InternalDB().(descs.DB)
9921008
descA := desctestutils.TestingGetPublicTableDescriptor(s.DB(), codec, "t", "a")
9931009
cache := NewTableStatisticsCache(
1010+
ctx,
9941011
s.ClusterSettings(),
9951012
s.InternalDB().(descs.DB),
9961013
s.AppStopper(),
1014+
nil, /* parentMon */
9971015
)
9981016
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
9991017
refresher := MakeRefresher(s.AmbientCtx(), st, internalDB, cache,

pkg/sql/stats/delete_stats_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,11 @@ func TestDeleteOldStatsForColumns(t *testing.T) {
4040
s := srv.ApplicationLayer()
4141
db := s.InternalDB().(descs.DB)
4242
cache := NewTableStatisticsCache(
43+
ctx,
4344
s.ClusterSettings(),
4445
db,
4546
s.AppStopper(),
47+
nil, /* parentMon */
4648
)
4749
require.NoError(t, cache.Start(ctx, s.Codec(), s.RangeFeedFactory().(*rangefeed.Factory)))
4850

@@ -340,9 +342,11 @@ func TestDeleteOldStatsForOtherColumns(t *testing.T) {
340342
s := srv.ApplicationLayer()
341343
db := s.InternalDB().(isql.DB)
342344
cache := NewTableStatisticsCache(
345+
ctx,
343346
s.ClusterSettings(),
344347
s.InternalDB().(descs.DB),
345348
s.AppStopper(),
349+
nil, /* parentMon */
346350
)
347351
require.NoError(t, cache.Start(ctx, s.Codec(), s.RangeFeedFactory().(*rangefeed.Factory)))
348352
testData := []TableStatisticProto{

0 commit comments

Comments
 (0)