Skip to content

Commit 938dddf

Browse files
committed
Merge branch 'feature/h001-decision-tree-recommendations' into 'main'
feat(H001): add decision tree recommendations for invalid indexes See merge request postgres-ai/postgres_ai!166
2 parents 6d64dfc + 277da8b commit 938dddf

8 files changed

Lines changed: 561 additions & 89 deletions

File tree

cli/bin/postgres-ai.ts

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1550,6 +1550,16 @@ const MONITORING_CONTAINERS = [
15501550
"postgres-reports",
15511551
];
15521552

1553+
/**
1554+
* Network cleanup constants.
1555+
* Docker Compose creates a default network named "{project}_default".
1556+
* In CI environments, network cleanup can fail if containers are slow to disconnect.
1557+
*/
1558+
const COMPOSE_PROJECT_NAME = "postgres_ai";
1559+
const DOCKER_NETWORK_NAME = `${COMPOSE_PROJECT_NAME}_default`;
1560+
/** Delay before retrying network cleanup (allows container network disconnections to complete) */
1561+
const NETWORK_CLEANUP_DELAY_MS = 2000;
1562+
15531563
/** Remove orphaned containers that docker compose down might miss */
15541564
async function removeOrphanedContainers(): Promise<void> {
15551565
for (const container of MONITORING_CONTAINERS) {
@@ -1565,7 +1575,33 @@ mon
15651575
.command("stop")
15661576
.description("stop monitoring services")
15671577
.action(async () => {
1568-
const code = await runCompose(["down"]);
1578+
// Multi-stage cleanup strategy for reliable shutdown in CI environments:
1579+
// Stage 1: Standard compose down with orphan removal
1580+
// Stage 2: Force remove any orphaned containers, then retry compose down
1581+
// Stage 3: Force remove the Docker network directly
1582+
// This handles edge cases where containers are slow to disconnect from networks.
1583+
let code = await runCompose(["down", "--remove-orphans"]);
1584+
1585+
// Stage 2: If initial cleanup fails, try removing orphaned containers first
1586+
if (code !== 0) {
1587+
await removeOrphanedContainers();
1588+
// Wait a moment for container network disconnections to complete
1589+
await new Promise(resolve => setTimeout(resolve, NETWORK_CLEANUP_DELAY_MS));
1590+
// Retry compose down
1591+
code = await runCompose(["down", "--remove-orphans"]);
1592+
}
1593+
1594+
// Final cleanup: force remove the network if it still exists
1595+
if (code !== 0) {
1596+
try {
1597+
await execFilePromise("docker", ["network", "rm", DOCKER_NETWORK_NAME]);
1598+
// Network removal succeeded - cleanup is complete
1599+
code = 0;
1600+
} catch {
1601+
// Network doesn't exist or couldn't be removed, ignore
1602+
}
1603+
}
1604+
15691605
if (code !== 0) process.exitCode = code;
15701606
});
15711607

cli/lib/checkup.ts

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,12 @@ export interface ClusterMetric {
109109

110110
/**
111111
* Invalid index entry (H001) - matches H001.schema.json invalidIndex
112+
*
113+
* Decision tree for remediation recommendations:
114+
* 1. has_valid_duplicate=true → DROP (valid duplicate exists, safe to remove)
115+
* 2. is_pk=true or is_unique=true → RECREATE (backs a constraint, must restore)
116+
* 3. table_row_estimate < 10000 → RECREATE (small table, quick rebuild)
117+
* 4. Otherwise → UNCERTAIN (needs manual analysis of query plans)
112118
*/
113119
export interface InvalidIndex {
114120
schema_name: string;
@@ -117,9 +123,61 @@ export interface InvalidIndex {
117123
relation_name: string;
118124
index_size_bytes: number;
119125
index_size_pretty: string;
120-
/** Full CREATE INDEX statement from pg_get_indexdef(), useful for DROP/CREATE migrations */
126+
/** Full CREATE INDEX statement from pg_get_indexdef() - useful for DROP/RECREATE migrations */
121127
index_definition: string;
122128
supports_fk: boolean;
129+
/** True if this index backs a PRIMARY KEY constraint */
130+
is_pk: boolean;
131+
/** True if this is a UNIQUE index (includes PK indexes) */
132+
is_unique: boolean;
133+
/** Name of the constraint this index backs, or null if none */
134+
constraint_name: string | null;
135+
/** Estimated row count of the table from pg_class.reltuples */
136+
table_row_estimate: number;
137+
/** True if there is a valid index on the same column(s) */
138+
has_valid_duplicate: boolean;
139+
/** Name of the valid duplicate index if one exists */
140+
valid_duplicate_name: string | null;
141+
/** Full CREATE INDEX statement of the valid duplicate index */
142+
valid_duplicate_definition: string | null;
143+
}
144+
145+
/** Recommendation for handling an invalid index */
146+
export type InvalidIndexRecommendation = "DROP" | "RECREATE" | "UNCERTAIN";
147+
148+
/** Threshold for considering a table "small" (quick to rebuild) */
149+
const SMALL_TABLE_ROW_THRESHOLD = 10000;
150+
151+
/**
152+
* Compute remediation recommendation for an invalid index using decision tree.
153+
*
154+
* Decision tree logic:
155+
* 1. If has_valid_duplicate is true → DROP (valid duplicate exists, safe to remove)
156+
* 2. If is_pk or is_unique is true → RECREATE (backs a constraint, must restore)
157+
* 3. If table_row_estimate < 10000 → RECREATE (small table, quick rebuild)
158+
* 4. Otherwise → UNCERTAIN (needs manual analysis of query plans)
159+
*
160+
* @param index - Invalid index with observation data
161+
* @returns Recommendation: "DROP", "RECREATE", or "UNCERTAIN"
162+
*/
163+
export function getInvalidIndexRecommendation(index: InvalidIndex): InvalidIndexRecommendation {
164+
// 1. Valid duplicate exists - safe to drop
165+
if (index.has_valid_duplicate) {
166+
return "DROP";
167+
}
168+
169+
// 2. Backs a constraint - must recreate
170+
if (index.is_pk || index.is_unique) {
171+
return "RECREATE";
172+
}
173+
174+
// 3. Small table - quick to recreate
175+
if (index.table_row_estimate < SMALL_TABLE_ROW_THRESHOLD) {
176+
return "RECREATE";
177+
}
178+
179+
// 4. Large table without clear path - needs manual analysis
180+
return "UNCERTAIN";
123181
}
124182

125183
/**
@@ -564,18 +622,19 @@ export async function getClusterInfo(client: Client, pgMajorVersion: number = 16
564622

565623
/**
566624
* Get invalid indexes from the database (H001).
567-
* Invalid indexes are indexes that failed to build (e.g., due to CONCURRENTLY failure).
625+
* Invalid indexes have indisvalid = false, typically from failed CREATE INDEX CONCURRENTLY.
568626
*
569627
* @param client - Connected PostgreSQL client
570628
* @param pgMajorVersion - PostgreSQL major version (default: 16)
571-
* @returns Array of invalid index entries with size and FK support info
629+
* @returns Array of invalid index entries with observation data for decision tree analysis
572630
*/
573631
export async function getInvalidIndexes(client: Client, pgMajorVersion: number = 16): Promise<InvalidIndex[]> {
574632
const sql = getMetricSql(METRIC_NAMES.H001, pgMajorVersion);
575633
const result = await client.query(sql);
576634
return result.rows.map((row) => {
577635
const transformed = transformMetricRow(row);
578636
const indexSizeBytes = parseInt(String(transformed.index_size_bytes || 0), 10);
637+
579638
return {
580639
schema_name: String(transformed.schema_name || ""),
581640
table_name: String(transformed.table_name || ""),
@@ -585,6 +644,13 @@ export async function getInvalidIndexes(client: Client, pgMajorVersion: number =
585644
index_size_pretty: formatBytes(indexSizeBytes),
586645
index_definition: String(transformed.index_definition || ""),
587646
supports_fk: toBool(transformed.supports_fk),
647+
is_pk: toBool(transformed.is_pk),
648+
is_unique: toBool(transformed.is_unique),
649+
constraint_name: transformed.constraint_name ? String(transformed.constraint_name) : null,
650+
table_row_estimate: parseInt(String(transformed.table_row_estimate || 0), 10),
651+
has_valid_duplicate: toBool(transformed.has_valid_duplicate),
652+
valid_duplicate_name: transformed.valid_index_name ? String(transformed.valid_index_name) : null,
653+
valid_duplicate_definition: transformed.valid_index_definition ? String(transformed.valid_index_definition) : null,
588654
};
589655
});
590656
}

0 commit comments

Comments
 (0)