Skip to content

Commit 1b0d944

Browse files
committed
Merge branch 'claude/calculate-solution-metrics-zs1IB' into 'main'
feat(I001): add pg_stat_io metrics support for PostgreSQL 16+ I/O monitoring Closes #117 See merge request postgres-ai/postgresai!168
2 parents 0e5bff2 + 4bed739 commit 1b0d944

11 files changed

Lines changed: 2659 additions & 4 deletions

File tree

cli/lib/checkup.ts

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,50 @@ export interface RedundantIndex {
243243
redundant_to_parse_error?: string;
244244
}
245245

246+
/**
247+
* I/O statistics by backend type (I001) - matches I001.schema.json backendIOStats
248+
*/
249+
export interface BackendIOStats {
250+
backend_type: string;
251+
reads: number;
252+
/** Read MiB. The historical `_mb` suffix is retained for schema compatibility. */
253+
read_bytes_mb: number;
254+
read_time_ms: number;
255+
writes: number;
256+
/** Written MiB. The historical `_mb` suffix is retained for schema compatibility. */
257+
write_bytes_mb: number;
258+
write_time_ms: number;
259+
writebacks: number;
260+
/** Writeback MiB. The historical `_mb` suffix is retained for schema compatibility. */
261+
writeback_bytes_mb: number;
262+
writeback_time_ms: number;
263+
fsyncs: number;
264+
fsync_time_ms: number;
265+
/** Relation extension operations reported by pg_stat_io for PostgreSQL 16+. */
266+
extends?: number;
267+
/** Extended MiB; PG16 derives extends * op_bytes, PG18+ uses native extend_bytes. */
268+
extend_bytes_mb?: number;
269+
hits: number;
270+
evictions: number;
271+
reuses: number;
272+
}
273+
274+
/**
275+
* I/O statistics analysis summary (I001)
276+
*/
277+
export interface IOAnalysis {
278+
total_read_mb: number;
279+
total_write_mb: number;
280+
/** read_time_ms + write_time_ms across backends. Excludes writeback and fsync time. */
281+
total_io_time_ms: number;
282+
/** Buffer hit ratio: hits / (hits + reads) * 100. */
283+
read_hit_ratio_pct: number;
284+
/** Average read latency, or null when there are no reads. */
285+
avg_read_time_ms: number | null;
286+
/** Average write latency, or null when there are no writes. */
287+
avg_write_time_ms: number | null;
288+
}
289+
246290
/**
247291
* Node result for reports
248292
*/
@@ -1664,6 +1708,186 @@ async function generateG003(client: Client, nodeName: string): Promise<Report> {
16641708
return report;
16651709
}
16661710

1711+
/**
1712+
* Get I/O statistics from pg_stat_io (PostgreSQL 16+).
1713+
* Uses 'pg_stat_io' metric from metrics.yml.
1714+
*
1715+
* @param client - Connected PostgreSQL client
1716+
* @param pgMajorVersion - PostgreSQL major version; defaults to 0 so omitted versions return unavailable
1717+
* @param metricSqlOverride - Optional SQL override; empty or placeholder SQL returns [] without querying
1718+
* @returns Array of I/O stats by backend type, or empty array if unavailable
1719+
*/
1720+
export async function getIOStatistics(
1721+
client: Client,
1722+
pgMajorVersion: number = 0,
1723+
metricSqlOverride?: string
1724+
): Promise<BackendIOStats[]> {
1725+
// pg_stat_io requires PostgreSQL 16+
1726+
if (pgMajorVersion < 16) {
1727+
return [];
1728+
}
1729+
1730+
try {
1731+
const sql = metricSqlOverride ?? getMetricSql(METRIC_NAMES.I001, pgMajorVersion);
1732+
// Skip if metric returns empty/placeholder SQL
1733+
if (!sql || sql.trim().startsWith(";")) {
1734+
return [];
1735+
}
1736+
1737+
const result = await client.query(sql);
1738+
return result.rows.map((row) => {
1739+
const transformed = transformMetricRow(row);
1740+
return {
1741+
backend_type: String(transformed.backend_type || "unknown"),
1742+
reads: parseInt(String(transformed.reads || 0), 10),
1743+
read_bytes_mb: parseInt(String(transformed.read_bytes_mb || 0), 10),
1744+
read_time_ms: parseInt(String(transformed.read_time_ms || 0), 10),
1745+
writes: parseInt(String(transformed.writes || 0), 10),
1746+
write_bytes_mb: parseInt(String(transformed.write_bytes_mb || 0), 10),
1747+
write_time_ms: parseInt(String(transformed.write_time_ms || 0), 10),
1748+
writebacks: parseInt(String(transformed.writebacks || 0), 10),
1749+
writeback_bytes_mb: parseInt(String(transformed.writeback_bytes_mb || 0), 10),
1750+
writeback_time_ms: parseInt(String(transformed.writeback_time_ms || 0), 10),
1751+
fsyncs: parseInt(String(transformed.fsyncs || 0), 10),
1752+
fsync_time_ms: parseInt(String(transformed.fsync_time_ms || 0), 10),
1753+
extends: parseInt(String(transformed.extends || 0), 10),
1754+
extend_bytes_mb: parseInt(String(transformed.extend_bytes_mb || 0), 10),
1755+
hits: parseInt(String(transformed.hits || 0), 10),
1756+
evictions: parseInt(String(transformed.evictions || 0), 10),
1757+
reuses: parseInt(String(transformed.reuses || 0), 10),
1758+
};
1759+
});
1760+
} catch (err) {
1761+
const errorMsg = err instanceof Error ? err.message : String(err);
1762+
console.log(`[I001] Error fetching I/O statistics: ${errorMsg}`);
1763+
return [];
1764+
}
1765+
}
1766+
1767+
/**
1768+
* Generate I001 report - I/O statistics (pg_stat_io)
1769+
*
1770+
* This report collects I/O statistics from pg_stat_io (PostgreSQL 16+),
1771+
* providing insights into read/write operations by backend type.
1772+
*
1773+
* @param client - Connected PostgreSQL client
1774+
* @param nodeName - Node name for the report payload
1775+
* @returns I001 report payload
1776+
*/
1777+
async function generateI001(client: Client, nodeName: string): Promise<Report> {
1778+
const report = createBaseReport("I001", "I/O statistics (pg_stat_io)", nodeName);
1779+
const postgresVersion = await getPostgresVersion(client);
1780+
const parsedPgMajorVersion = parseInt(postgresVersion.server_major_ver, 10);
1781+
const pgMajorVersion = Number.isFinite(parsedPgMajorVersion) ? parsedPgMajorVersion : 0;
1782+
1783+
// pg_stat_io requires PostgreSQL 16+
1784+
if (pgMajorVersion < 16) {
1785+
report.results[nodeName] = {
1786+
data: {
1787+
available: false,
1788+
min_version_required: "16",
1789+
by_backend_type: [],
1790+
analysis: {
1791+
total_read_mb: 0,
1792+
total_write_mb: 0,
1793+
total_io_time_ms: 0,
1794+
read_hit_ratio_pct: 0,
1795+
avg_read_time_ms: null,
1796+
avg_write_time_ms: null,
1797+
},
1798+
stats_reset_s: null,
1799+
},
1800+
postgres_version: postgresVersion,
1801+
};
1802+
return report;
1803+
}
1804+
1805+
const ioStats = await getIOStatistics(client, pgMajorVersion);
1806+
1807+
// Sort by backend_type, putting 'total' first if present
1808+
ioStats.sort((a, b) => {
1809+
if (a.backend_type === "total") return -1;
1810+
if (b.backend_type === "total") return 1;
1811+
return a.backend_type.localeCompare(b.backend_type);
1812+
});
1813+
1814+
// Find 'total' row for analysis, or sum all rows if not present
1815+
let totalStats = ioStats.find((s) => s.backend_type === "total");
1816+
if (!totalStats && ioStats.length > 0) {
1817+
totalStats = {
1818+
backend_type: "total",
1819+
reads: ioStats.reduce((sum, s) => sum + s.reads, 0),
1820+
read_bytes_mb: ioStats.reduce((sum, s) => sum + s.read_bytes_mb, 0),
1821+
read_time_ms: ioStats.reduce((sum, s) => sum + s.read_time_ms, 0),
1822+
writes: ioStats.reduce((sum, s) => sum + s.writes, 0),
1823+
write_bytes_mb: ioStats.reduce((sum, s) => sum + s.write_bytes_mb, 0),
1824+
write_time_ms: ioStats.reduce((sum, s) => sum + s.write_time_ms, 0),
1825+
writebacks: ioStats.reduce((sum, s) => sum + s.writebacks, 0),
1826+
writeback_bytes_mb: ioStats.reduce((sum, s) => sum + s.writeback_bytes_mb, 0),
1827+
writeback_time_ms: ioStats.reduce((sum, s) => sum + s.writeback_time_ms, 0),
1828+
fsyncs: ioStats.reduce((sum, s) => sum + s.fsyncs, 0),
1829+
fsync_time_ms: ioStats.reduce((sum, s) => sum + s.fsync_time_ms, 0),
1830+
extends: ioStats.reduce((sum, s) => sum + (s.extends || 0), 0),
1831+
extend_bytes_mb: ioStats.reduce((sum, s) => sum + (s.extend_bytes_mb || 0), 0),
1832+
hits: ioStats.reduce((sum, s) => sum + s.hits, 0),
1833+
evictions: ioStats.reduce((sum, s) => sum + s.evictions, 0),
1834+
reuses: ioStats.reduce((sum, s) => sum + s.reuses, 0),
1835+
};
1836+
}
1837+
1838+
// Calculate analysis
1839+
const totalReadMb = totalStats?.read_bytes_mb || 0;
1840+
const totalWriteMb = totalStats?.write_bytes_mb || 0;
1841+
const totalReadTime = totalStats?.read_time_ms || 0;
1842+
const totalWriteTime = totalStats?.write_time_ms || 0;
1843+
const totalIoTimeMs = totalReadTime + totalWriteTime;
1844+
const totalReads = totalStats?.reads || 0;
1845+
const totalWrites = totalStats?.writes || 0;
1846+
const totalHits = totalStats?.hits || 0;
1847+
1848+
// Hit ratio: hits / (hits + reads) * 100
1849+
const totalRequests = totalHits + totalReads;
1850+
const readHitRatioPct = totalRequests > 0 ? Math.round((totalHits / totalRequests) * 10000) / 100 : 0;
1851+
1852+
// Average times
1853+
const avgReadTimeMs = totalReads > 0 ? Math.round((totalReadTime / totalReads) * 1000) / 1000 : null;
1854+
const avgWriteTimeMs = totalWrites > 0 ? Math.round((totalWriteTime / totalWrites) * 1000) / 1000 : null;
1855+
1856+
// Direct-connect checkup queries stats_reset separately instead of reading it from pgwatch metrics.
1857+
let statsResetS: number | null = null;
1858+
try {
1859+
const resetResult = await client.query(`
1860+
select max(extract(epoch from now() - stats_reset)::int) as stats_reset_s
1861+
from pg_stat_io
1862+
`);
1863+
if (resetResult.rows.length > 0 && resetResult.rows[0].stats_reset_s !== null) {
1864+
const parsedStatsResetS = parseInt(resetResult.rows[0].stats_reset_s, 10);
1865+
statsResetS = Number.isFinite(parsedStatsResetS) ? parsedStatsResetS : null;
1866+
}
1867+
} catch (err) {
1868+
// Ignore errors getting stats_reset - not critical
1869+
}
1870+
1871+
report.results[nodeName] = {
1872+
data: {
1873+
available: ioStats.length > 0,
1874+
by_backend_type: ioStats,
1875+
analysis: {
1876+
total_read_mb: totalReadMb,
1877+
total_write_mb: totalWriteMb,
1878+
total_io_time_ms: totalIoTimeMs,
1879+
read_hit_ratio_pct: readHitRatioPct,
1880+
avg_read_time_ms: avgReadTimeMs,
1881+
avg_write_time_ms: avgWriteTimeMs,
1882+
},
1883+
stats_reset_s: statsResetS,
1884+
},
1885+
postgres_version: postgresVersion,
1886+
};
1887+
1888+
return report;
1889+
}
1890+
16671891
/**
16681892
* Available report generators
16691893
*/
@@ -1683,6 +1907,7 @@ export const REPORT_GENERATORS: Record<string, (client: Client, nodeName: string
16831907
H001: generateH001,
16841908
H002: generateH002,
16851909
H004: generateH004,
1910+
I001: generateI001,
16861911
};
16871912

16881913
/**

cli/lib/metrics-loader.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ export function listMetricNames(): string[] {
6363
export const METRIC_NAMES = {
6464
// Index health checks
6565
H001: "pg_invalid_indexes",
66-
H002: "unused_indexes",
66+
H002: "unused_indexes",
6767
H004: "redundant_indexes",
6868
// Bloat estimation
6969
F004: "pg_table_bloat",
@@ -75,6 +75,8 @@ export const METRIC_NAMES = {
7575
dbSize: "db_size",
7676
// Stats reset info (H002)
7777
statsReset: "stats_reset",
78+
// I/O statistics (I001) - PostgreSQL 16+
79+
I001: "pg_stat_io",
7880
} as const;
7981

8082
/**

cli/scripts/embed-metrics.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ const REQUIRED_METRICS = [
4949
// Bloat estimation (F004, F005)
5050
"pg_table_bloat",
5151
"pg_btree_bloat",
52+
// I/O statistics (I001)
53+
"pg_stat_io",
5254
];
5355

5456
function main() {

0 commit comments

Comments
 (0)