Skip to content

Commit ecc4b17

Browse files
authored
Redis caching for container ancestors and children-page (open-metadata#27858)
* Cache resolved ancestor chains in Redis The /containers/name/{fqn}/ancestors endpoint runs on every detail-page render to populate breadcrumbs. The resolution itself is one indexed findReferencesByFqns call (already slim) plus FQN string walking, but the DB round-trip and JSON deserialization are repeated for every navigation. Bundle this behind Redis with the same shape as CachedReadBundle. Cache key: om:anc:container:{fqnHash} → JSON List<EntityReference>, TTL = entityTtlSeconds (default 5 min). Invalidation: - Writer drops its own key on update/delete (EntityRepository.invalidateCache) - Cross-instance: the existing CacheInvalidationPubSub handler now also drops the ancestors key for the published FQN. - Renames are self-healing: the new FQN is a different key, the old key TTL-expires. - Display-name drift on a remote ancestor is bounded by TTL — acceptable since breadcrumb metadata is cosmetic. The cache is wired into ContainerRepository.getAncestors only — generalising to other hierarchical entity types is straightforward when more /ancestors endpoints land.
1 parent 457afab commit ecc4b17

16 files changed

Lines changed: 1099 additions & 67 deletions

File tree

bootstrap/sql/migrations/native/1.13.0/mysql/schemaChanges.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,3 +288,10 @@ CREATE TABLE IF NOT EXISTS rdf_index_server_stats (
288288
UNIQUE INDEX idx_rdf_index_server_stats_job_server_entity (jobId, serverId, entityType),
289289
INDEX idx_rdf_index_server_stats_job_id (jobId)
290290
);
291+
292+
-- Speeds up the NOT EXISTS anti-join used by ContainerDAO root-only listings
293+
-- (?root=true&service=...). Covers the subquery's filter and projection so the
294+
-- planner can answer "does this container have a parent?" with an index-only
295+
-- scan instead of materializing the child-edge set.
296+
CREATE INDEX idx_er_fromentity_toentity_relation_toid
297+
ON entity_relationship (fromEntity, toEntity, relation, toId);

bootstrap/sql/migrations/native/1.13.0/postgres/schemaChanges.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,3 +309,10 @@ CREATE TABLE IF NOT EXISTS rdf_index_server_stats (
309309
);
310310

311311
CREATE INDEX IF NOT EXISTS idx_rdf_index_server_stats_job_id ON rdf_index_server_stats(jobId);
312+
313+
-- Speeds up the NOT EXISTS anti-join used by ContainerDAO root-only listings
314+
-- (?root=true&service=...). Covers the subquery's filter and projection so the
315+
-- planner can answer "does this container have a parent?" with an index-only
316+
-- scan instead of materializing the child-edge set.
317+
CREATE INDEX IF NOT EXISTS idx_er_fromentity_toentity_relation_toid
318+
ON entity_relationship (fromEntity, toEntity, relation, toId);

openmetadata-integration-tests/pom.xml

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,99 @@
373373
</plugins>
374374
</build>
375375
</profile>
376+
<!-- PostgreSQL + OpenSearch + Redis cache enabled. Mirror of postgres-opensearch but
377+
every IT runs against the Redis-fronted code path so we can spot any test that
378+
depends on always-fresh-from-DB reads. -->
379+
<profile>
380+
<id>postgres-os-redis</id>
381+
<build>
382+
<plugins>
383+
<plugin>
384+
<groupId>org.apache.maven.plugins</groupId>
385+
<artifactId>maven-failsafe-plugin</artifactId>
386+
<version>${maven.failsafe.version}</version>
387+
<executions>
388+
<execution>
389+
<id>sequential-tests</id>
390+
<goals>
391+
<goal>integration-test</goal>
392+
</goals>
393+
<configuration>
394+
<forkCount>1</forkCount>
395+
<reuseForks>true</reuseForks>
396+
<argLine>-Xmx4096m -XX:+UseG1GC</argLine>
397+
<includes>
398+
<include>**/TagRecognizerFeedbackIT.java</include>
399+
<include>**/WorkflowDefinitionResourceIT.java</include>
400+
<include>**/AppsResourceIT.java</include>
401+
<include>**/SystemResourceIT.java</include>
402+
<include>**/VectorEmbeddingIntegrationIT.java</include>
403+
<include>**/PatchTableEmbeddingIT.java</include>
404+
</includes>
405+
<systemPropertyVariables>
406+
<databaseType>postgres</databaseType>
407+
<databaseImage>postgres:15</databaseImage>
408+
<searchType>opensearch</searchType>
409+
<searchImage>opensearchproject/opensearch:3.4.0</searchImage>
410+
<cacheProvider>redis</cacheProvider>
411+
<redisImage>redis:7-alpine</redisImage>
412+
413+
<junit.jupiter.extensions.autodetection.enabled>true</junit.jupiter.extensions.autodetection.enabled>
414+
<junit.jupiter.execution.parallel.enabled>false</junit.jupiter.execution.parallel.enabled>
415+
</systemPropertyVariables>
416+
<trimStackTrace>false</trimStackTrace>
417+
<reportFormat>plain</reportFormat>
418+
<useFile>true</useFile>
419+
</configuration>
420+
</execution>
421+
<execution>
422+
<id>parallel-tests</id>
423+
<goals>
424+
<goal>integration-test</goal>
425+
</goals>
426+
<configuration>
427+
<forkCount>1</forkCount>
428+
<reuseForks>true</reuseForks>
429+
<argLine>-Xmx4096m -XX:+UseG1GC</argLine>
430+
<includes>
431+
<include>**/*IT.java</include>
432+
<include>**/*Test.java</include>
433+
</includes>
434+
<excludes>
435+
<exclude>**/TagRecognizerFeedbackIT.java</exclude>
436+
<exclude>**/WorkflowDefinitionResourceIT.java</exclude>
437+
<exclude>**/AppsResourceIT.java</exclude>
438+
<exclude>**/SystemResourceIT.java</exclude>
439+
<exclude>**/VectorEmbeddingIntegrationIT.java</exclude>
440+
<exclude>**/PatchTableEmbeddingIT.java</exclude>
441+
</excludes>
442+
<systemPropertyVariables>
443+
<databaseType>postgres</databaseType>
444+
<databaseImage>postgres:15</databaseImage>
445+
<searchType>opensearch</searchType>
446+
<searchImage>opensearchproject/opensearch:3.4.0</searchImage>
447+
<cacheProvider>redis</cacheProvider>
448+
<redisImage>redis:7-alpine</redisImage>
449+
450+
<junit.jupiter.extensions.autodetection.enabled>true</junit.jupiter.extensions.autodetection.enabled>
451+
<junit.jupiter.execution.parallel.enabled>true</junit.jupiter.execution.parallel.enabled>
452+
</systemPropertyVariables>
453+
<trimStackTrace>false</trimStackTrace>
454+
<reportFormat>plain</reportFormat>
455+
<useFile>true</useFile>
456+
</configuration>
457+
</execution>
458+
<execution>
459+
<id>verify</id>
460+
<goals>
461+
<goal>verify</goal>
462+
</goals>
463+
</execution>
464+
</executions>
465+
</plugin>
466+
</plugins>
467+
</build>
468+
</profile>
376469
<!-- PostgreSQL + Elasticsearch -->
377470
<profile>
378471
<id>postgres-elasticsearch</id>
@@ -640,6 +733,57 @@
640733
</plugins>
641734
</build>
642735
</profile>
736+
<!-- Redis cache correctness tests — fastest profile for verifying that mutations
737+
invalidate the right keys. Brings up a Redis container, runs only the cache
738+
correctness ITs, skips the heavier search/DB matrix. -->
739+
<profile>
740+
<id>cache-tests</id>
741+
<build>
742+
<plugins>
743+
<plugin>
744+
<groupId>org.apache.maven.plugins</groupId>
745+
<artifactId>maven-failsafe-plugin</artifactId>
746+
<version>${maven.failsafe.version}</version>
747+
<executions>
748+
<execution>
749+
<id>cache-correctness-tests</id>
750+
<goals>
751+
<goal>integration-test</goal>
752+
</goals>
753+
<configuration>
754+
<forkCount>1</forkCount>
755+
<reuseForks>true</reuseForks>
756+
<argLine>-Xmx2048m -XX:+UseG1GC</argLine>
757+
<includes>
758+
<include>**/cache/*IT.java</include>
759+
</includes>
760+
<systemPropertyVariables>
761+
<databaseType>postgres</databaseType>
762+
<databaseImage>postgres:16-alpine</databaseImage>
763+
<searchType>elasticsearch</searchType>
764+
<searchImage>docker.elastic.co/elasticsearch/elasticsearch:9.3.0</searchImage>
765+
<cacheProvider>redis</cacheProvider>
766+
<redisImage>redis:7-alpine</redisImage>
767+
768+
<junit.jupiter.extensions.autodetection.enabled>true</junit.jupiter.extensions.autodetection.enabled>
769+
<junit.jupiter.execution.parallel.enabled>false</junit.jupiter.execution.parallel.enabled>
770+
</systemPropertyVariables>
771+
<trimStackTrace>false</trimStackTrace>
772+
<reportFormat>plain</reportFormat>
773+
<useFile>true</useFile>
774+
</configuration>
775+
</execution>
776+
<execution>
777+
<id>verify</id>
778+
<goals>
779+
<goal>verify</goal>
780+
</goals>
781+
</execution>
782+
</executions>
783+
</plugin>
784+
</plugins>
785+
</build>
786+
</profile>
643787
<!-- RDF integration tests profile -->
644788
<profile>
645789
<id>postgres-rdf-tests</id>

openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/BaseEntityIT.java

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6415,4 +6415,87 @@ void get_changeSummaryNotFound_404(TestNamespace ns) {
64156415
thrown.getMessage().contains("404") || thrown.getMessage().contains("not found"),
64166416
"Should get 404 for non-existent entity, got: " + thrown.getMessage());
64176417
}
6418+
6419+
// ===================================================================
6420+
// Redis cache write-through correctness — fire on every entity subclass
6421+
// when the suite is configured with cacheProvider=redis. With Redis
6422+
// disabled these are no-ops. Each test warms the cache (by-id and
6423+
// by-name), mutates the entity, and re-reads to confirm the cached
6424+
// path returns the latest value rather than a pre-mutation snapshot.
6425+
// ===================================================================
6426+
6427+
@Test
6428+
void cache_displayNameUpdateReflectedOnReadById(TestNamespace ns) {
6429+
Assumptions.assumeTrue(
6430+
org.openmetadata.it.bootstrap.TestSuiteBootstrap.isRedisEnabled(),
6431+
"Skipped — cache write-through tests require cacheProvider=redis");
6432+
6433+
K request = createMinimalRequest(ns);
6434+
T created = createEntity(request);
6435+
String id = created.getId().toString();
6436+
6437+
// Warm by-id and by-name caches.
6438+
T warmById = getEntity(id);
6439+
getEntityByName(created.getFullyQualifiedName());
6440+
6441+
String newDisplayName = "cache-it-" + System.nanoTime();
6442+
warmById.setDisplayName(newDisplayName);
6443+
T patched = patchEntity(id, warmById);
6444+
assertEquals(
6445+
newDisplayName, patched.getDisplayName(), "PATCH response itself must show the update");
6446+
6447+
T fetchedById = getEntity(id);
6448+
assertEquals(
6449+
newDisplayName,
6450+
fetchedById.getDisplayName(),
6451+
"GET-by-id after PATCH must serve the new displayName, not a stale Redis snapshot");
6452+
}
6453+
6454+
@Test
6455+
void cache_displayNameUpdateReflectedOnReadByName(TestNamespace ns) {
6456+
Assumptions.assumeTrue(
6457+
org.openmetadata.it.bootstrap.TestSuiteBootstrap.isRedisEnabled(),
6458+
"Skipped — cache write-through tests require cacheProvider=redis");
6459+
6460+
K request = createMinimalRequest(ns);
6461+
T created = createEntity(request);
6462+
String id = created.getId().toString();
6463+
String fqn = created.getFullyQualifiedName();
6464+
6465+
// Warm both caches up front so PATCH's invalidation has something to invalidate.
6466+
T warm = getEntity(id);
6467+
getEntityByName(fqn);
6468+
6469+
String newDisplayName = "cache-by-name-" + System.nanoTime();
6470+
warm.setDisplayName(newDisplayName);
6471+
patchEntity(id, warm);
6472+
6473+
T fetchedByName = getEntityByName(fqn);
6474+
assertEquals(
6475+
newDisplayName,
6476+
fetchedByName.getDisplayName(),
6477+
"GET-by-name after PATCH must serve the new displayName, not a stale Redis snapshot");
6478+
}
6479+
6480+
@Test
6481+
void cache_hardDeleteReflectedOnReadById(TestNamespace ns) {
6482+
Assumptions.assumeTrue(
6483+
org.openmetadata.it.bootstrap.TestSuiteBootstrap.isRedisEnabled(),
6484+
"Skipped — cache write-through tests require cacheProvider=redis");
6485+
6486+
K request = createMinimalRequest(ns);
6487+
T created = createEntity(request);
6488+
String id = created.getId().toString();
6489+
6490+
// Warm the cache, then hard-delete.
6491+
getEntity(id);
6492+
hardDeleteEntity(id);
6493+
6494+
// Subsequent reads must 404 — a stale cache entry would let the entity stay
6495+
// resolvable until TTL.
6496+
Exception thrown = assertThrows(Exception.class, () -> getEntity(id));
6497+
assertTrue(
6498+
thrown.getMessage().contains("404") || thrown.getMessage().contains("not found"),
6499+
"GET-by-id after hard delete must 404, got: " + thrown.getMessage());
6500+
}
64186501
}

openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/ContainerResourceIT.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,38 @@ void test_rootContainerFiltering(TestNamespace ns) {
845845
boolean childInRootList =
846846
rootContainers.getData().stream().anyMatch(c -> c.getId().equals(child.getId()));
847847
assertFalse(childInRootList, "Child container should not appear in root containers list");
848+
849+
// Default `?service=...` listing (no root flag) MUST include child containers.
850+
// Regression guard: a previous JDBI override on ContainerDAO that shared its Java
851+
// signature with the EntityDAO base accidentally applied the root-only NOT EXISTS
852+
// predicate to every list call, silently dropping children. That broke
853+
// `metadata.list_all_entities(Container, ...)` in the Python ingestion side and
854+
// produced 0-record auto-classification runs.
855+
ListParams allParams = new ListParams();
856+
allParams.setService(service.getFullyQualifiedName());
857+
858+
ListResponse<Container> allContainers = listEntities(allParams);
859+
assertNotNull(allContainers);
860+
assertNotNull(allContainers.getData());
861+
862+
boolean childInAllList =
863+
allContainers.getData().stream().anyMatch(c -> c.getId().equals(child.getId()));
864+
assertTrue(
865+
childInAllList,
866+
"Child container must appear in default `?service=...` listing (without root=true)");
867+
868+
long allMatchingCount =
869+
allContainers.getData().stream()
870+
.filter(
871+
c ->
872+
c.getId().equals(root1.getId())
873+
|| c.getId().equals(root2.getId())
874+
|| c.getId().equals(child.getId()))
875+
.count();
876+
assertEquals(
877+
3,
878+
allMatchingCount,
879+
"`?service=...` must return roots and children (got " + allMatchingCount + ")");
848880
}
849881

850882
@Test

openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/TaskResourceIT.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3711,7 +3711,11 @@ private BotWithUser createBotWithJwtUser(TestNamespace ns, String suffix) {
37113711

37123712
private void awaitSuggestionTaskDeleted(UUID creatorId, String aboutEntity, UUID taskId) {
37133713
Awaitility.await("suggestion task cleanup for creator " + creatorId)
3714-
.atMost(Duration.ofSeconds(15))
3714+
// 30s window gives the bot-delete cascade headroom under heavy parallel load. The
3715+
// cleanup is synchronous in postDelete, but the bot+user cascade itself runs through
3716+
// the entity-cache hot path — under the postgres-os-redis profile it can take 2–3s
3717+
// for the bot delete alone, which leaves little slack inside a 15s budget.
3718+
.atMost(Duration.ofSeconds(30))
37153719
.pollInterval(Duration.ofMillis(250))
37163720
.untilAsserted(
37173721
() -> {

0 commit comments

Comments
 (0)