Skip to content

Commit 205e41a

Browse files
committed
feat(I001): add pg_stat_io metrics support for PostgreSQL 16+ I/O monitoring
Implements comprehensive I/O statistics collection and reporting using the pg_stat_io view introduced in PostgreSQL 16. Changes: - Add pg_stat_io metric definition to metrics.yml with PG16 and PG18 variants - Add metric to 'full' preset with 30s collection interval - Create I001 report generator in postgres_reports.py with analysis calculations - Add I001 JSON schema (reporter/schemas/I001.schema.json) - Add express checkup support in CLI (checkup.ts, metrics-loader.ts) - Create Grafana dashboard (Dashboard_14_IO_Statistics.json) with panels for: - Total I/O throughput (read/write MB/s) - I/O time (read/write/fsync time) - Buffer hit ratio gauge - I/O operations by backend type (stacked) - Writebacks, fsyncs, and extends - Buffer evictions and reuses - Add unit tests for I001 report generator The I001 report provides: - I/O stats broken down by backend type (client backend, autovacuum, etc.) - Total statistics with aggregated 'total' row via ROLLUP - Analysis section with hit ratio, average read/write times - Graceful degradation for PostgreSQL < 16 (returns available=false)
1 parent 938dddf commit 205e41a

7 files changed

Lines changed: 1804 additions & 2 deletions

File tree

cli/lib/checkup.ts

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,41 @@ export interface RedundantIndex {
242242
redundant_to_parse_error?: string;
243243
}
244244

245+
/**
246+
* I/O statistics by backend type (I001) - matches I001.schema.json backendIOStats
247+
*/
248+
export interface BackendIOStats {
249+
backend_type: string;
250+
reads: number;
251+
read_bytes_mb: number;
252+
read_time_ms: number;
253+
writes: number;
254+
write_bytes_mb: number;
255+
write_time_ms: number;
256+
writebacks: number;
257+
writeback_bytes_mb: number;
258+
writeback_time_ms: number;
259+
fsyncs: number;
260+
fsync_time_ms: number;
261+
extends?: number;
262+
extend_bytes_mb?: number;
263+
hits: number;
264+
evictions: number;
265+
reuses: number;
266+
}
267+
268+
/**
269+
* I/O statistics analysis summary (I001)
270+
*/
271+
export interface IOAnalysis {
272+
total_read_mb: number;
273+
total_write_mb: number;
274+
total_io_time_ms: number;
275+
read_hit_ratio_pct: number;
276+
avg_read_time_ms: number | null;
277+
avg_write_time_ms: number | null;
278+
}
279+
245280
/**
246281
* Node result for reports
247282
*/
@@ -1326,6 +1361,173 @@ async function generateG001(client: Client, nodeName: string): Promise<Report> {
13261361
return report;
13271362
}
13281363

1364+
/**
1365+
* Get I/O statistics from pg_stat_io (PostgreSQL 16+).
1366+
* Uses 'pg_stat_io' metric from metrics.yml.
1367+
*
1368+
* @param client - Connected PostgreSQL client
1369+
* @param pgMajorVersion - PostgreSQL major version
1370+
* @returns Array of I/O stats by backend type, or empty array if unavailable
1371+
*/
1372+
export async function getIOStatistics(client: Client, pgMajorVersion: number = 16): Promise<BackendIOStats[]> {
1373+
// pg_stat_io requires PostgreSQL 16+
1374+
if (pgMajorVersion < 16) {
1375+
return [];
1376+
}
1377+
1378+
try {
1379+
const sql = getMetricSql(METRIC_NAMES.I001, pgMajorVersion);
1380+
// Skip if metric returns empty/placeholder SQL
1381+
if (!sql || sql.trim().startsWith(";")) {
1382+
return [];
1383+
}
1384+
1385+
const result = await client.query(sql);
1386+
return result.rows.map((row) => {
1387+
const transformed = transformMetricRow(row);
1388+
return {
1389+
backend_type: String(transformed.backend_type || "unknown"),
1390+
reads: parseInt(String(transformed.reads || 0), 10),
1391+
read_bytes_mb: parseInt(String(transformed.read_bytes_mb || 0), 10),
1392+
read_time_ms: parseInt(String(transformed.read_time_ms || 0), 10),
1393+
writes: parseInt(String(transformed.writes || 0), 10),
1394+
write_bytes_mb: parseInt(String(transformed.write_bytes_mb || 0), 10),
1395+
write_time_ms: parseInt(String(transformed.write_time_ms || 0), 10),
1396+
writebacks: parseInt(String(transformed.writebacks || 0), 10),
1397+
writeback_bytes_mb: parseInt(String(transformed.writeback_bytes_mb || 0), 10),
1398+
writeback_time_ms: parseInt(String(transformed.writeback_time_ms || 0), 10),
1399+
fsyncs: parseInt(String(transformed.fsyncs || 0), 10),
1400+
fsync_time_ms: parseInt(String(transformed.fsync_time_ms || 0), 10),
1401+
extends: parseInt(String(transformed.extends || 0), 10),
1402+
extend_bytes_mb: parseInt(String(transformed.extend_bytes_mb || 0), 10),
1403+
hits: parseInt(String(transformed.hits || 0), 10),
1404+
evictions: parseInt(String(transformed.evictions || 0), 10),
1405+
reuses: parseInt(String(transformed.reuses || 0), 10),
1406+
};
1407+
});
1408+
} catch (err) {
1409+
const errorMsg = err instanceof Error ? err.message : String(err);
1410+
console.log(`[I001] Error fetching I/O statistics: ${errorMsg}`);
1411+
return [];
1412+
}
1413+
}
1414+
1415+
/**
1416+
* Generate I001 report - I/O statistics (pg_stat_io)
1417+
*
1418+
* This report collects I/O statistics from pg_stat_io (PostgreSQL 16+),
1419+
* providing insights into read/write operations by backend type.
1420+
*/
1421+
async function generateI001(client: Client, nodeName: string): Promise<Report> {
1422+
const report = createBaseReport("I001", "I/O statistics (pg_stat_io)", nodeName);
1423+
const postgresVersion = await getPostgresVersion(client);
1424+
const pgMajorVersion = parseInt(postgresVersion.server_major_ver, 10) || 16;
1425+
1426+
// pg_stat_io requires PostgreSQL 16+
1427+
if (pgMajorVersion < 16) {
1428+
report.results[nodeName] = {
1429+
data: {
1430+
available: false,
1431+
min_version_required: "16",
1432+
by_backend_type: [],
1433+
analysis: {
1434+
total_read_mb: 0,
1435+
total_write_mb: 0,
1436+
total_io_time_ms: 0,
1437+
read_hit_ratio_pct: 0,
1438+
avg_read_time_ms: null,
1439+
avg_write_time_ms: null,
1440+
},
1441+
stats_reset_s: null,
1442+
},
1443+
postgres_version: postgresVersion,
1444+
};
1445+
return report;
1446+
}
1447+
1448+
const ioStats = await getIOStatistics(client, pgMajorVersion);
1449+
1450+
// Sort by backend_type, putting 'total' first if present
1451+
ioStats.sort((a, b) => {
1452+
if (a.backend_type === "total") return -1;
1453+
if (b.backend_type === "total") return 1;
1454+
return a.backend_type.localeCompare(b.backend_type);
1455+
});
1456+
1457+
// Find 'total' row for analysis, or sum all rows if not present
1458+
let totalStats = ioStats.find((s) => s.backend_type === "total");
1459+
if (!totalStats && ioStats.length > 0) {
1460+
totalStats = {
1461+
backend_type: "total",
1462+
reads: ioStats.reduce((sum, s) => sum + s.reads, 0),
1463+
read_bytes_mb: ioStats.reduce((sum, s) => sum + s.read_bytes_mb, 0),
1464+
read_time_ms: ioStats.reduce((sum, s) => sum + s.read_time_ms, 0),
1465+
writes: ioStats.reduce((sum, s) => sum + s.writes, 0),
1466+
write_bytes_mb: ioStats.reduce((sum, s) => sum + s.write_bytes_mb, 0),
1467+
write_time_ms: ioStats.reduce((sum, s) => sum + s.write_time_ms, 0),
1468+
writebacks: ioStats.reduce((sum, s) => sum + s.writebacks, 0),
1469+
writeback_bytes_mb: ioStats.reduce((sum, s) => sum + s.writeback_bytes_mb, 0),
1470+
writeback_time_ms: ioStats.reduce((sum, s) => sum + s.writeback_time_ms, 0),
1471+
fsyncs: ioStats.reduce((sum, s) => sum + s.fsyncs, 0),
1472+
fsync_time_ms: ioStats.reduce((sum, s) => sum + s.fsync_time_ms, 0),
1473+
hits: ioStats.reduce((sum, s) => sum + s.hits, 0),
1474+
evictions: ioStats.reduce((sum, s) => sum + s.evictions, 0),
1475+
reuses: ioStats.reduce((sum, s) => sum + s.reuses, 0),
1476+
};
1477+
}
1478+
1479+
// Calculate analysis
1480+
const totalReadMb = totalStats?.read_bytes_mb || 0;
1481+
const totalWriteMb = totalStats?.write_bytes_mb || 0;
1482+
const totalReadTime = totalStats?.read_time_ms || 0;
1483+
const totalWriteTime = totalStats?.write_time_ms || 0;
1484+
const totalIoTimeMs = totalReadTime + totalWriteTime;
1485+
const totalReads = totalStats?.reads || 0;
1486+
const totalWrites = totalStats?.writes || 0;
1487+
const totalHits = totalStats?.hits || 0;
1488+
1489+
// Hit ratio: hits / (hits + reads) * 100
1490+
const totalRequests = totalHits + totalReads;
1491+
const readHitRatioPct = totalRequests > 0 ? Math.round((totalHits / totalRequests) * 10000) / 100 : 0;
1492+
1493+
// Average times
1494+
const avgReadTimeMs = totalReads > 0 ? Math.round((totalReadTime / totalReads) * 1000) / 1000 : null;
1495+
const avgWriteTimeMs = totalWrites > 0 ? Math.round((totalWriteTime / totalWrites) * 1000) / 1000 : null;
1496+
1497+
// Get stats_reset from pg_stat_io (query separately for stats_reset time)
1498+
let statsResetS: number | null = null;
1499+
try {
1500+
const resetResult = await client.query(`
1501+
SELECT max(extract(epoch from now() - stats_reset)::int) as stats_reset_s
1502+
FROM pg_stat_io
1503+
`);
1504+
if (resetResult.rows.length > 0 && resetResult.rows[0].stats_reset_s !== null) {
1505+
statsResetS = parseInt(resetResult.rows[0].stats_reset_s, 10);
1506+
}
1507+
} catch (err) {
1508+
// Ignore errors getting stats_reset - not critical
1509+
}
1510+
1511+
report.results[nodeName] = {
1512+
data: {
1513+
available: ioStats.length > 0,
1514+
by_backend_type: ioStats,
1515+
analysis: {
1516+
total_read_mb: totalReadMb,
1517+
total_write_mb: totalWriteMb,
1518+
total_io_time_ms: totalIoTimeMs,
1519+
read_hit_ratio_pct: readHitRatioPct,
1520+
avg_read_time_ms: avgReadTimeMs,
1521+
avg_write_time_ms: avgWriteTimeMs,
1522+
},
1523+
stats_reset_s: statsResetS,
1524+
},
1525+
postgres_version: postgresVersion,
1526+
};
1527+
1528+
return report;
1529+
}
1530+
13291531
/**
13301532
* Available report generators
13311533
*/
@@ -1341,6 +1543,7 @@ export const REPORT_GENERATORS: Record<string, (client: Client, nodeName: string
13411543
H001: generateH001,
13421544
H002: generateH002,
13431545
H004: generateH004,
1546+
I001: generateI001,
13441547
};
13451548

13461549
/**
@@ -1358,6 +1561,7 @@ export const CHECK_INFO: Record<string, string> = {
13581561
H001: "Invalid indexes",
13591562
H002: "Unused indexes",
13601563
H004: "Redundant indexes",
1564+
I001: "I/O statistics (pg_stat_io)",
13611565
};
13621566

13631567
/**

cli/lib/metrics-loader.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ export function listMetricNames(): string[] {
6363
export const METRIC_NAMES = {
6464
// Index health checks
6565
H001: "pg_invalid_indexes",
66-
H002: "unused_indexes",
66+
H002: "unused_indexes",
6767
H004: "redundant_indexes",
6868
// Settings and version info (A002, A003, A007, A013)
6969
settings: "settings",
@@ -72,6 +72,8 @@ export const METRIC_NAMES = {
7272
dbSize: "db_size",
7373
// Stats reset info (H002)
7474
statsReset: "stats_reset",
75+
// I/O statistics (I001) - PostgreSQL 16+
76+
I001: "pg_stat_io",
7577
} as const;
7678

7779
/**

0 commit comments

Comments
 (0)