Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
473 changes: 198 additions & 275 deletions mcp-db/data/components-programs.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import { MigrationBuilder } from 'node-pg-migrate'
import { fileURLToPath } from 'url'
import path from 'path'
import { readFileSync } from 'fs'
import { parse } from 'csv-parse/sync'

const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)

interface CsvRow {
PROGRAM_STRING: string
PROGRAM_DESCRIPTION: string
COMPONENT_STRING: string
FREQUENCY: string
FREQUENCY_NOTES: string
}

const csvPath = path.resolve(__dirname, '../data/components-programs.csv')
const csvData = parse(readFileSync(csvPath, 'utf8'), {
columns: true,
skip_empty_lines: true,
})

const escapeSql = (value: string | null | undefined): string => {
if (value === null || value === undefined || value.trim() === '')
return 'NULL'
return `'${value.replace(/'/g, "''")}'`
}
const programMap = new Map<string, string>()
for (const row of csvData as CsvRow[]) {
const acronym = row.PROGRAM_STRING?.trim()
if (!acronym) continue
const description = row.PROGRAM_DESCRIPTION?.trim()
if (!programMap.has(acronym)) {
programMap.set(acronym, description ?? '')
}
}

const programValues = Array.from(programMap.entries())
.map(
([acronym, description]) =>
`(${escapeSql(acronym)}, ${escapeSql(description)})`,
)
.join(',\n ')

const componentValues = (csvData as CsvRow[])
.map(
(row) =>
`(${escapeSql(row.COMPONENT_STRING)}, ${escapeSql(row.FREQUENCY)}, ${escapeSql(row.FREQUENCY_NOTES)})`,
)
.join(',\n ')

export const listSurveyProgramsSQL = `
CREATE OR REPLACE FUNCTION list_survey_programs()
RETURNS TABLE (
program_label TEXT,
Comment thread
luke-keller-census marked this conversation as resolved.
program_string VARCHAR(15),
description TEXT,
table_count INT
)
LANGUAGE sql STABLE
AS $$
SELECT
p.label AS program_label,
p.acronym AS program_string,
-- Prefer the explicit program description; fall back to the first
-- non-null component description so that programs without a
-- programs.description still get orientation text.
COALESCE(
NULLIF(TRIM(p.description), ''),
(
SELECT c2.description
FROM components c2
WHERE c2.program_id = p.id
AND c2.description IS NOT NULL
ORDER BY c2.id
LIMIT 1
)
) AS description,
COUNT(DISTINCT dt.id)::INT AS table_count
FROM programs p
LEFT JOIN components c ON c.program_id = p.id
LEFT JOIN datasets d ON d.component_id = c.id
LEFT JOIN data_table_datasets dtd ON dtd.dataset_id = d.id
LEFT JOIN data_tables dt ON dt.id = dtd.data_table_id
GROUP BY p.id, p.label, p.acronym, p.description
ORDER BY p.label
$$;
`

export const listSurveyComponentsSQL = `
CREATE OR REPLACE FUNCTION list_survey_components(p_program_string TEXT)
RETURNS TABLE (
component_label TEXT,
component_string VARCHAR(60),
api_endpoint VARCHAR(60),
frequency VARCHAR(50),
frequency_notes TEXT,
vintage_start INT,
vintage_end INT,
has_gaps BOOLEAN,
table_count INT,
description TEXT
)
LANGUAGE sql STABLE
AS $$
SELECT
c.label AS component_label,
c.component_id AS component_string,
c.api_endpoint,
c.frequency,
c.frequency_notes,
MIN(y.year)::INT AS vintage_start,
MAX(y.year)::INT AS vintage_end,
-- has_gaps: true when the year range is not fully contiguous.
-- NULL when no datasets are linked (nothing to compare).
CASE
WHEN COUNT(DISTINCT y.year) = 0 THEN NULL
ELSE (MAX(y.year) - MIN(y.year) + 1) != COUNT(DISTINCT y.year)
END AS has_gaps,
COUNT(DISTINCT dt.id)::INT AS table_count,
c.description
FROM programs p
JOIN components c ON c.program_id = p.id
LEFT JOIN datasets d ON d.component_id = c.id
LEFT JOIN years y ON y.id = d.year_id
LEFT JOIN data_table_datasets dtd ON dtd.dataset_id = d.id
LEFT JOIN data_tables dt ON dt.id = dtd.data_table_id
WHERE p.acronym = p_program_string
GROUP BY c.id, c.label, c.component_id, c.api_endpoint,
c.frequency, c.frequency_notes, c.description
ORDER BY c.label
$$;
`
export const updateProgramComponentColumns = `
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM programs LIMIT 1) AND EXISTS (SELECT 1 FROM components LIMIT 1) THEN

UPDATE programs SET description = v.description
FROM (VALUES
${programValues}
) AS v(acronym, description)
WHERE programs.acronym = v.acronym;

UPDATE components SET
frequency = v.frequency,
frequency_notes = v.frequency_notes
FROM (VALUES
${componentValues}
) AS v(component_id, frequency, frequency_notes)
WHERE components.component_id = v.component_id;

ELSE
RAISE NOTICE 'programs or components table is empty, skipping backfill for frequency and description columns.';
END IF;
END $$;
`

export async function up(pgm: MigrationBuilder): Promise<void> {
pgm.addColumns('components', {
frequency: { type: 'varchar(50)' },
frequency_notes: { type: 'text' },
})

pgm.sql(updateProgramComponentColumns)
pgm.sql(listSurveyProgramsSQL)
pgm.sql(listSurveyComponentsSQL)
}

export async function down(pgm: MigrationBuilder): Promise<void> {
pgm.sql('DROP FUNCTION IF EXISTS list_survey_components(TEXT)')
pgm.sql('DROP FUNCTION IF EXISTS list_survey_programs()')
pgm.dropColumns('components', ['frequency', 'frequency_notes'])
pgm.sql('UPDATE programs SET description = NULL;')
}
6 changes: 6 additions & 0 deletions mcp-db/src/schema/components.schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ export const RawComponentSchema = z.object({
COMPONENT_DESCRIPTION: z.string(),
API_SHORT_NAME: z.string(),
PROGRAM_STRING: z.string(),
FREQUENCY: z.string().optional(),
FREQUENCY_NOTES: z.string().optional(),
})

export const RawComponentsArraySchema = z.array(RawComponentSchema)
Expand All @@ -16,6 +18,8 @@ export const ComponentRecordSchema = z.object({
description: z.string(),
api_endpoint: z.string(),
program_id: z.number(),
frequency: z.string().optional(),
frequency_notes: z.string().optional(),
})

export type ComponentRecord = z.infer<typeof ComponentRecordSchema>
Expand Down Expand Up @@ -65,6 +69,8 @@ export function transformComponentData(
description: row.COMPONENT_DESCRIPTION,
api_endpoint: row.API_SHORT_NAME,
program_id: programId,
frequency: row.FREQUENCY,
frequency_notes: row.FREQUENCY_NOTES,
})
}
})
Expand Down
3 changes: 3 additions & 0 deletions mcp-db/src/schema/program.schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ import { z } from 'zod'
export const RawProgramSchema = z.object({
PROGRAM_STRING: z.string(),
PROGRAM_LABEL: z.string(),
PROGRAM_DESCRIPTION: z.string().optional(),
})

export const RawProgramsArraySchema = z.array(RawProgramSchema)

export const ProgramRecordSchema = z.object({
acronym: z.string(),
label: z.string(),
description: z.string().optional(),
})

export type ProgramRecord = z.infer<typeof ProgramRecordSchema>
Expand Down Expand Up @@ -42,6 +44,7 @@ export function transformProgramData(rawData: unknown): {
uniquePrograms.set(row.PROGRAM_STRING, {
acronym: row.PROGRAM_STRING,
label: row.PROGRAM_LABEL,
description: row.PROGRAM_DESCRIPTION,
})
}
})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import { afterEach, beforeEach, describe, it, expect, vi } from 'vitest'
import { MigrationBuilder } from 'node-pg-migrate'

vi.mock('fs', () => ({
readFileSync: vi
.fn()
.mockReturnValue(
`COMPONENT_STRING,COMPONENT_LABEL,COMPONENT_DESCRIPTION,API_SHORT_NAME,PROGRAM_STRING,PROGRAM_LABEL,FREQUENCY,FREQUENCY_NOTES,PROGRAM_DESCRIPTION\n` +
`acs/acs1,1-Year Estimates,ACS 1-year estimates,acs/acs1,ACS,American Community Survey,Annual,,Continuous monthly survey pooled into 1-year and 5-year estimates\n` +
`dec/dec1,Summary File 1,Decennial summary file,dec/dec1,DEC,It's the Decennial Census,,Notes with 'quotes',It's the Decennial Census description\n`,
),
}))

describe('Migration - Create Programs and Components Search Functions', () => {
let mockPgm: MigrationBuilder
let addColumnsSpy: ReturnType<typeof vi.fn>
let dropColumnsSpy: ReturnType<typeof vi.fn>
let sqlSpy: ReturnType<typeof vi.fn>

beforeEach(async () => {
addColumnsSpy = vi.fn().mockResolvedValue(undefined)
dropColumnsSpy = vi.fn().mockResolvedValue(undefined)
sqlSpy = vi.fn().mockResolvedValue(undefined)

mockPgm = {
addColumns: addColumnsSpy,
dropColumns: dropColumnsSpy,
sql: sqlSpy,
} as Partial<MigrationBuilder> as MigrationBuilder
})

afterEach(() => {
vi.clearAllMocks()
vi.resetModules()
})

describe('updateProgramComponentColumns SQL', () => {
let updateProgramComponentColumns: string

beforeEach(async () => {
const migration = await import(
'../../migrations/1773071716018_create-programs-and-components-search-functions'
)
updateProgramComponentColumns = migration.updateProgramComponentColumns
})

it('escapes single quotes in program descriptions', () => {
expect(updateProgramComponentColumns).toContain(
`'It''s the Decennial Census description'`,
)
})

it('escapes single quotes in component frequency_notes', () => {
expect(updateProgramComponentColumns).toContain(`'Notes with ''quotes'''`)
})

it('converts empty frequency to NULL', () => {
expect(updateProgramComponentColumns).toContain(`'dec/dec1', NULL,`)
})

it('converts empty frequency_notes to NULL', () => {
expect(updateProgramComponentColumns).toContain(
`'acs/acs1', 'Annual', NULL`,
)
})

it('includes valid program values', () => {
expect(updateProgramComponentColumns).toContain(
`'ACS', 'Continuous monthly survey pooled into 1-year and 5-year estimates'`,
)
})
})

describe('up', () => {
beforeEach(async () => {
const { up } = await import(
'../../migrations/1773071716018_create-programs-and-components-search-functions'
)
await up(mockPgm)
})

it('adds frequency and frequency_notes columns to components', () => {
expect(addColumnsSpy).toHaveBeenCalledWith('components', {
frequency: { type: 'varchar(50)' },
frequency_notes: { type: 'text' },
})
})

it('runs the backfill SQL for program and component columns', async () => {
const { updateProgramComponentColumns } = vi.mocked(
await import(
'../../migrations/1773071716018_create-programs-and-components-search-functions'
),
)
expect(sqlSpy).toHaveBeenCalledWith(updateProgramComponentColumns)
})

it('runs backfill before creating survey functions', () => {
const backfillOrder = sqlSpy.mock.invocationCallOrder[0]
const programsFnOrder = sqlSpy.mock.invocationCallOrder[1]
const componentsFnOrder = sqlSpy.mock.invocationCallOrder[2]

expect(backfillOrder).toBeLessThan(programsFnOrder)
expect(backfillOrder).toBeLessThan(componentsFnOrder)
})
})

describe('down', () => {
beforeEach(async () => {
const { down } = await import(
'../../migrations/1773071716018_create-programs-and-components-search-functions'
)
await down(mockPgm)
})

it('drops the list_survey_components function', () => {
expect(sqlSpy).toHaveBeenCalledWith(
'DROP FUNCTION IF EXISTS list_survey_components(TEXT)',
)
})

it('drops the list_survey_programs function', () => {
expect(sqlSpy).toHaveBeenCalledWith(
'DROP FUNCTION IF EXISTS list_survey_programs()',
)
})

it('drops frequency and frequency_notes columns from components', () => {
expect(dropColumnsSpy).toHaveBeenCalledWith('components', [
'frequency',
'frequency_notes',
])
})

it('nulls out program descriptions', () => {
expect(sqlSpy).toHaveBeenCalledWith(
'UPDATE programs SET description = NULL;',
)
})

it('drops functions before dropping columns', () => {
const dropColumnsOrder = dropColumnsSpy.mock.invocationCallOrder[0]
const dropComponentsFnOrder = sqlSpy.mock.invocationCallOrder[0]

expect(dropComponentsFnOrder).toBeLessThan(dropColumnsOrder)
})
})
})
Loading
Loading