Skip to content

Commit bc2d7cf

Browse files
perf: cache calendar REST responses, aggressive TTL for past=1 (#246) (#249)
Adds a top-level full-response cache to the calendar REST handler keyed on the COMPLETE CalendarRequest envelope (geo lat/lng/radius/radius_unit, paged, scope, all filters, archive context, search, past flag, cutoff_hour). TTL split: 1h for past=0 (upcoming), 24h for past=1 (immutable historical). The bug exposed in production on 2026-05-10: Pinterestbot iterated every venue/artist archive with distinct geo params, each request taking 30-60s and saturating the PHP-FPM pool. The existing bucket cache was keyed WITHOUT geo params so distinct radius searches collapsed onto one bucket and re-ran the full query every request when the bucket missed. Implementation: - CalendarCache::generate_full_response_key() includes the full envelope including geo params. - CalendarCache::get_full_response/set_full_response use wp_cache_* (group: data-machine-calendar) primarily with transient fallback for non-persistent cache hosts. Redis Object Cache backs both layers on extrachill.com. - Calendar::calendar() short-circuits to the cached response before CalendarAbilities runs. Authenticated users with manage_options bypass the cache so editors see fresh data immediately. - CacheInvalidator::invalidate_all() now flushes the data-machine-calendar wp_cache group on event saves / taxonomy edits. Co-authored-by: homeboy-ci[bot] <266378653+homeboy-ci[bot]@users.noreply.github.com>
1 parent 53f58d0 commit bc2d7cf

4 files changed

Lines changed: 470 additions & 28 deletions

File tree

inc/Api/Controllers/Calendar.php

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
*
55
* Thin wrapper around CalendarAbilities for REST API access.
66
* All business logic delegated to CalendarAbilities.
7+
*
8+
* Wraps the response in a top-level full-response cache to mitigate
9+
* crawler-driven DOS on `?past=1` historical archive variants. See
10+
* Extra-Chill/data-machine-events#246 — Pinterestbot iterating every
11+
* venue/artist archive with distinct geo params produced one expensive
12+
* query per request because the underlying bucket cache was keyed
13+
* without geo params.
714
*/
815

916
namespace DataMachineEvents\Api\Controllers;
@@ -12,6 +19,7 @@
1219

1320
use WP_REST_Request;
1421
use DataMachineEvents\Abilities\CalendarAbilities;
22+
use DataMachineEvents\Blocks\Calendar\Cache\CalendarCache;
1523
use DataMachineEvents\Blocks\Calendar\Query\CalendarRequest;
1624

1725
/**
@@ -27,27 +35,51 @@ class Calendar {
2735
*/
2836
public function calendar( WP_REST_Request $request ) {
2937
$calendar_request = CalendarRequest::fromRestRequest( $request );
30-
$abilities = new CalendarAbilities();
31-
$result = $abilities->executeGetCalendarPage( $calendar_request->toAbilitiesArgs() );
32-
33-
return rest_ensure_response(
34-
array(
35-
'success' => true,
36-
'html' => $result['html']['events'],
37-
'pagination' => array(
38-
'html' => $result['html']['pagination'],
39-
'current_page' => $result['current_page'],
40-
'max_pages' => $result['max_pages'],
41-
'total_events' => $result['total_event_count'],
42-
),
43-
'counter' => $result['html']['counter'],
44-
'navigation' => array(
45-
'html' => $result['html']['navigation'],
46-
'past_count' => $result['event_counts']['past'],
47-
'future_count' => $result['event_counts']['future'],
48-
'show_past' => ! empty( $request->get_param( 'past' ) ),
49-
),
50-
)
38+
$envelope = $calendar_request->toAbilitiesArgs();
39+
40+
// Editors with `manage_options` always bypass the cache so they
41+
// see fresh data immediately after publishing / editing events.
42+
// Anonymous traffic (the DOS-vector path) uses the cache.
43+
$bypass_cache = current_user_can( 'manage_options' );
44+
45+
$cache_key = CalendarCache::generate_full_response_key( $envelope );
46+
47+
if ( ! $bypass_cache ) {
48+
$cached = CalendarCache::get_full_response( $cache_key );
49+
if ( false !== $cached && is_array( $cached ) ) {
50+
return rest_ensure_response( $cached );
51+
}
52+
}
53+
54+
$abilities = new CalendarAbilities();
55+
$result = $abilities->executeGetCalendarPage( $envelope );
56+
57+
$response_body = array(
58+
'success' => true,
59+
'html' => $result['html']['events'],
60+
'pagination' => array(
61+
'html' => $result['html']['pagination'],
62+
'current_page' => $result['current_page'],
63+
'max_pages' => $result['max_pages'],
64+
'total_events' => $result['total_event_count'],
65+
),
66+
'counter' => $result['html']['counter'],
67+
'navigation' => array(
68+
'html' => $result['html']['navigation'],
69+
'past_count' => $result['event_counts']['past'],
70+
'future_count' => $result['event_counts']['future'],
71+
'show_past' => ! empty( $request->get_param( 'past' ) ),
72+
),
5173
);
74+
75+
if ( ! $bypass_cache ) {
76+
CalendarCache::set_full_response(
77+
$cache_key,
78+
$response_body,
79+
CalendarCache::ttl_for_envelope( $envelope )
80+
);
81+
}
82+
83+
return rest_ensure_response( $response_body );
5284
}
5385
}

inc/Blocks/Calendar/Cache/CacheInvalidator.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,21 @@ public static function invalidate_all(): void {
9292
wp_cache_delete( $key, 'transient' );
9393
wp_cache_delete( $key, 'site-transient' );
9494
}
95+
96+
// Flush the dedicated full-response cache group. This is safe to
97+
// flush wholesale because the group is private to the calendar —
98+
// nothing else writes to `data-machine-calendar`.
99+
if ( function_exists( 'wp_cache_flush_group' ) ) {
100+
wp_cache_flush_group( CalendarCache::GROUP );
101+
} else {
102+
// On WP < 6.1 / object-cache drop-ins lacking flush_group support,
103+
// the transient layer above still serves as the source of truth.
104+
// The wp_cache entries will age out within TTL_FULL_PAST (24h).
105+
// Acceptable downside for a fallback path that won't hit on
106+
// extrachill.com (Redis Object Cache supports flush_group).
107+
$noop = true;
108+
unset( $noop );
109+
}
95110
}
96111
}
97112

inc/Blocks/Calendar/Cache/CalendarCache.php

Lines changed: 140 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,24 @@
22
/**
33
* Calendar Cache Manager
44
*
5-
* Centralizes all transient caching for calendar queries.
5+
* Centralizes all caching for calendar queries.
66
* Handles cache key generation, TTLs, and get/set operations.
77
*
8+
* Two cache layers:
9+
* 1. Bucket caches (dates, counts) — keyed without geo params, used by
10+
* EventQueryBuilder/Pagination internals. Stored as transients (which
11+
* Redis object cache backs anyway on persistent-cache hosts).
12+
* 2. Full-response cache (this is the calendar REST envelope itself,
13+
* pre-rendered HTML included). Keyed on the COMPLETE CalendarRequest
14+
* envelope INCLUDING geo params. Stored in wp_cache (dedicated group
15+
* `data-machine-calendar`) with transient fallback for non-persistent
16+
* cache environments.
17+
*
18+
* The full-response cache is the DOS mitigation: bot crawlers hammering
19+
* `?past=1&lat=...&lng=...&archive_taxonomy=venue&archive_term_id=...`
20+
* variants now hit one expensive query per cache window instead of one
21+
* per request. See Extra-Chill/data-machine-events#246.
22+
*
823
* @package DataMachineEvents\Blocks\Calendar\Cache
924
* @since 0.14.0
1025
*/
@@ -17,12 +32,16 @@
1732

1833
class CalendarCache {
1934

20-
const PREFIX = 'data-machine_cal_';
21-
const TTL_DATES = 30 * MINUTE_IN_SECONDS;
22-
const TTL_COUNTS = 30 * MINUTE_IN_SECONDS;
35+
const PREFIX = 'data-machine_cal_';
36+
const FULL_PREFIX = 'data-machine_cal_full_';
37+
const GROUP = 'data-machine-calendar';
38+
const TTL_DATES = 30 * MINUTE_IN_SECONDS;
39+
const TTL_COUNTS = 30 * MINUTE_IN_SECONDS;
40+
const TTL_FULL_UPCOMING = HOUR_IN_SECONDS;
41+
const TTL_FULL_PAST = 24 * HOUR_IN_SECONDS;
2342

2443
/**
25-
* Get a cached value.
44+
* Get a cached value (transient-backed bucket cache).
2645
*
2746
* @param string $key Full cache key.
2847
* @return mixed Cached value or false if not found.
@@ -32,7 +51,7 @@ public static function get( string $key ) {
3251
}
3352

3453
/**
35-
* Set a cached value.
54+
* Set a cached value (transient-backed bucket cache).
3655
*
3756
* @param string $key Full cache key.
3857
* @param mixed $value Value to cache.
@@ -44,7 +63,10 @@ public static function set( string $key, $value, int $ttl ): bool {
4463
}
4564

4665
/**
47-
* Generate a cache key from query parameters.
66+
* Generate a cache key from query parameters (bucket caches).
67+
*
68+
* Does NOT include geo params — bucket caches operate on the broader
69+
* date/count slice and geo filtering happens downstream.
4870
*
4971
* @param array $params Query parameters.
5072
* @param string $prefix Key prefix (e.g. 'dates', 'counts').
@@ -66,4 +88,115 @@ public static function generate_key( array $params, string $prefix ): string {
6688

6789
return self::PREFIX . $prefix . '_' . md5( wp_json_encode( $key_data ) );
6890
}
91+
92+
/**
93+
* Generate a cache key for the full calendar REST response.
94+
*
95+
* Includes the COMPLETE CalendarRequest envelope so distinct geo
96+
* searches, scopes, paged windows, and archive contexts all get
97+
* isolated cache buckets. This is the key surface that issue #246
98+
* was missing — bot variants over `lat`/`lng`/`radius`/`archive_term_id`
99+
* collapsed onto one bucket and re-ran the query every time.
100+
*
101+
* @param array $envelope CalendarRequest::toAbilitiesArgs() output.
102+
* @return string Full cache key.
103+
*/
104+
public static function generate_full_response_key( array $envelope ): string {
105+
$key_data = array(
106+
'paged' => (int) ( $envelope['paged'] ?? 1 ),
107+
'past' => (bool) ( $envelope['past'] ?? false ),
108+
'event_search' => (string) ( $envelope['event_search'] ?? '' ),
109+
'date_start' => (string) ( $envelope['date_start'] ?? '' ),
110+
'date_end' => (string) ( $envelope['date_end'] ?? '' ),
111+
'scope' => (string) ( $envelope['scope'] ?? '' ),
112+
'tax_filter' => $envelope['tax_filter'] ?? array(),
113+
'archive_taxonomy' => (string) ( $envelope['archive_taxonomy'] ?? '' ),
114+
'archive_term_id' => (int) ( $envelope['archive_term_id'] ?? 0 ),
115+
'geo_lat' => (string) ( $envelope['geo_lat'] ?? '' ),
116+
'geo_lng' => (string) ( $envelope['geo_lng'] ?? '' ),
117+
'geo_radius' => (int) ( $envelope['geo_radius'] ?? 0 ),
118+
'geo_radius_unit' => (string) ( $envelope['geo_radius_unit'] ?? '' ),
119+
'cutoff_hour' => \DataMachineEvents\Blocks\Calendar\Grouping\LateNightCutoff::cutoff_hour(),
120+
);
121+
122+
return self::FULL_PREFIX . md5( wp_json_encode( $key_data ) );
123+
}
124+
125+
/**
126+
* Get a cached full calendar REST response.
127+
*
128+
* Tries the object cache first (Redis/Memcached on persistent-cache
129+
* hosts), falls back to a transient. Returns false on miss so callers
130+
* can use the standard `false === $cached` check.
131+
*
132+
* @param string $key Full cache key from generate_full_response_key().
133+
* @return mixed Cached envelope array or false on miss.
134+
*/
135+
public static function get_full_response( string $key ) {
136+
$found = false;
137+
$cached = wp_cache_get( $key, self::GROUP, false, $found );
138+
if ( $found && false !== $cached ) {
139+
return $cached;
140+
}
141+
142+
// Transient fallback for non-persistent cache environments. On
143+
// Redis-backed hosts get_transient also routes through the object
144+
// cache, so this is functionally a no-op there but harmless.
145+
$transient = get_transient( $key );
146+
if ( false !== $transient ) {
147+
// Promote into the object cache so subsequent hits in this
148+
// process / cache window skip the transient SQL lookup.
149+
wp_cache_set( $key, $transient, self::GROUP, self::ttl_for_envelope_default() );
150+
return $transient;
151+
}
152+
153+
return false;
154+
}
155+
156+
/**
157+
* Set a cached full calendar REST response.
158+
*
159+
* Writes to BOTH the object cache and the transient store. The
160+
* transient store survives a `wp_cache_flush()` and acts as the
161+
* source of truth for non-persistent cache hosts; the object cache
162+
* is the fast path for persistent-cache hosts.
163+
*
164+
* @param string $key Full cache key from generate_full_response_key().
165+
* @param mixed $value Response envelope to cache.
166+
* @param int $ttl Time-to-live in seconds.
167+
* @return bool True on success.
168+
*/
169+
public static function set_full_response( string $key, $value, int $ttl ): bool {
170+
wp_cache_set( $key, $value, self::GROUP, $ttl );
171+
return set_transient( $key, $value, $ttl );
172+
}
173+
174+
/**
175+
* Resolve the appropriate full-response TTL for a request envelope.
176+
*
177+
* Past events are immutable — once a show happened, it happened.
178+
* Cache them aggressively (24h). Upcoming events change as new ones
179+
* are published, but `CacheInvalidator` busts the entire group on
180+
* any event save / taxonomy edit, so a 1h ceiling is just a safety
181+
* net for missed invalidation paths.
182+
*
183+
* @param array $envelope CalendarRequest::toAbilitiesArgs() output.
184+
* @return int TTL seconds.
185+
*/
186+
public static function ttl_for_envelope( array $envelope ): int {
187+
$past = ! empty( $envelope['past'] );
188+
return $past ? self::TTL_FULL_PAST : self::TTL_FULL_UPCOMING;
189+
}
190+
191+
/**
192+
* Default TTL used when promoting a transient hit back into the
193+
* object cache. We don't know if the entry was past or upcoming at
194+
* promotion time, so we use the shorter (upcoming) TTL — better to
195+
* recompute one extra time than to extend an already-stale window.
196+
*
197+
* @return int TTL seconds.
198+
*/
199+
private static function ttl_for_envelope_default(): int {
200+
return self::TTL_FULL_UPCOMING;
201+
}
69202
}

0 commit comments

Comments
 (0)