uscensusbureau · luke-keller-census · Mar 11, 2026 · Mar 13, 2026
diff --git a/mcp-db/data/components-programs.csv b/mcp-db/data/components-programs.csv
diff --git a/mcp-db/migrations/1773071716018_create-programs-and-components-search-functions.ts b/mcp-db/migrations/1773071716018_create-programs-and-components-search-functions.ts
@@ -0,0 +1,176 @@
+import { MigrationBuilder } from 'node-pg-migrate'
+import { fileURLToPath } from 'url'
+import path from 'path'
+import { readFileSync } from 'fs'
+import { parse } from 'csv-parse/sync'
+
+const __filename = fileURLToPath(import.meta.url)
+const __dirname = path.dirname(__filename)
+
+interface CsvRow {
+  PROGRAM_STRING: string
+  PROGRAM_DESCRIPTION: string
+  COMPONENT_STRING: string
+  FREQUENCY: string
+  FREQUENCY_NOTES: string
+}
+
+const csvPath = path.resolve(__dirname, '../data/components-programs.csv')
+const csvData = parse(readFileSync(csvPath, 'utf8'), {
+  columns: true,
+  skip_empty_lines: true,
+})
+
+const escapeSql = (value: string | null | undefined): string => {
+  if (value === null || value === undefined || value.trim() === '')
+    return 'NULL'
+  return `'${value.replace(/'/g, "''")}'`
+}
+const programMap = new Map<string, string>()
+for (const row of csvData as CsvRow[]) {
+  const acronym = row.PROGRAM_STRING?.trim()
+  if (!acronym) continue
+  const description = row.PROGRAM_DESCRIPTION?.trim()
+  if (!programMap.has(acronym)) {
+    programMap.set(acronym, description ?? '')
+  }
+}
+
+const programValues = Array.from(programMap.entries())
+  .map(
+    ([acronym, description]) =>
+      `(${escapeSql(acronym)}, ${escapeSql(description)})`,
+  )
+  .join(',\n        ')
+
+const componentValues = (csvData as CsvRow[])
+  .map(
+    (row) =>
+      `(${escapeSql(row.COMPONENT_STRING)}, ${escapeSql(row.FREQUENCY)}, ${escapeSql(row.FREQUENCY_NOTES)})`,
+  )
+  .join(',\n        ')
+
+export const listSurveyProgramsSQL = `
+  CREATE OR REPLACE FUNCTION list_survey_programs()
+  RETURNS TABLE (
+    program_label  TEXT,
+    program_string VARCHAR(15),
+    description    TEXT,
+    table_count    INT
+  )
+  LANGUAGE sql STABLE
+  AS $$
+    SELECT
+      p.label                                                    AS program_label,
+      p.acronym                                                  AS program_string,
+      -- Prefer the explicit program description; fall back to the first
+      -- non-null component description so that programs without a
+      -- programs.description still get orientation text.
+      COALESCE(
+        NULLIF(TRIM(p.description), ''),
+        (
+          SELECT c2.description
+          FROM   components c2
+          WHERE  c2.program_id = p.id
+            AND  c2.description IS NOT NULL
+          ORDER  BY c2.id
+          LIMIT  1
+        )
+      )                                                          AS description,
+      COUNT(DISTINCT dt.id)::INT                                 AS table_count
+    FROM  programs p
+    LEFT  JOIN components          c   ON c.program_id   = p.id
+    LEFT  JOIN datasets            d   ON d.component_id = c.id
+    LEFT  JOIN data_table_datasets dtd ON dtd.dataset_id = d.id
+    LEFT  JOIN data_tables         dt  ON dt.id = dtd.data_table_id
+    GROUP BY p.id, p.label, p.acronym, p.description
+    ORDER BY p.label
+  $$;
+`
+
+export const listSurveyComponentsSQL = `
+  CREATE OR REPLACE FUNCTION list_survey_components(p_program_string TEXT)
+  RETURNS TABLE (
+    component_label  TEXT,
+    component_string VARCHAR(60),
+    api_endpoint     VARCHAR(60),
+    frequency        VARCHAR(50),
+    frequency_notes  TEXT,
+    vintage_start    INT,
+    vintage_end      INT,
+    has_gaps         BOOLEAN,
+    table_count      INT,
+    description      TEXT
+  )
+  LANGUAGE sql STABLE
+  AS $$
+    SELECT
+      c.label                                                        AS component_label,
+      c.component_id                                                 AS component_string,
+      c.api_endpoint,
+      c.frequency,
+      c.frequency_notes,
+      MIN(y.year)::INT                                               AS vintage_start,
+      MAX(y.year)::INT                                               AS vintage_end,
+      -- has_gaps: true when the year range is not fully contiguous.
+      -- NULL when no datasets are linked (nothing to compare).
+      CASE
+        WHEN COUNT(DISTINCT y.year) = 0 THEN NULL
+        ELSE (MAX(y.year) - MIN(y.year) + 1) != COUNT(DISTINCT y.year)
+      END                                                            AS has_gaps,
+      COUNT(DISTINCT dt.id)::INT                                     AS table_count,
+      c.description
+    FROM   programs p
+    JOIN   components          c   ON c.program_id   = p.id
+    LEFT   JOIN datasets       d   ON d.component_id = c.id
+    LEFT   JOIN years          y   ON y.id = d.year_id
+    LEFT   JOIN data_table_datasets dtd ON dtd.dataset_id = d.id
+    LEFT   JOIN data_tables    dt  ON dt.id = dtd.data_table_id
+    WHERE  p.acronym = p_program_string
+    GROUP  BY c.id, c.label, c.component_id, c.api_endpoint,
+              c.frequency, c.frequency_notes, c.description
+    ORDER  BY c.label
+  $$;
+`
+export const updateProgramComponentColumns = `
+  DO $$
+  BEGIN
+    IF EXISTS (SELECT 1 FROM programs LIMIT 1) AND EXISTS (SELECT 1 FROM components LIMIT 1) THEN
+
+      UPDATE programs SET description = v.description
+      FROM (VALUES
+        ${programValues}
+      ) AS v(acronym, description)
+      WHERE programs.acronym = v.acronym;
+
+      UPDATE components SET
+        frequency = v.frequency,
+        frequency_notes = v.frequency_notes
+      FROM (VALUES
+        ${componentValues}
+      ) AS v(component_id, frequency, frequency_notes)
+      WHERE components.component_id = v.component_id;
+
+    ELSE
+      RAISE NOTICE 'programs or components table is empty, skipping backfill for frequency and description columns.';
+    END IF;
+  END $$;
+`
+
+export async function up(pgm: MigrationBuilder): Promise<void> {
+  pgm.addColumns('components', {
+    frequency: { type: 'varchar(50)' },
+    frequency_notes: { type: 'text' },
+  })
+
+  pgm.sql(updateProgramComponentColumns)
+  pgm.sql(listSurveyProgramsSQL)
+  pgm.sql(listSurveyComponentsSQL)
+}
+
+export async function down(pgm: MigrationBuilder): Promise<void> {
+  pgm.sql('DROP FUNCTION IF EXISTS list_survey_components(TEXT)')
+  pgm.sql('DROP FUNCTION IF EXISTS list_survey_programs()')
+  pgm.dropColumns('components', ['frequency', 'frequency_notes'])
+  pgm.sql('UPDATE programs SET description = NULL;')
+}
diff --git a/mcp-db/src/schema/components.schema.ts b/mcp-db/src/schema/components.schema.ts
@@ -6,6 +6,8 @@ export const RawComponentSchema = z.object({
   COMPONENT_DESCRIPTION: z.string(),
   API_SHORT_NAME: z.string(),
   PROGRAM_STRING: z.string(),
+  FREQUENCY: z.string().optional(),
+  FREQUENCY_NOTES: z.string().optional(),
 })
 
 export const RawComponentsArraySchema = z.array(RawComponentSchema)
@@ -16,6 +18,8 @@ export const ComponentRecordSchema = z.object({
   description: z.string(),
   api_endpoint: z.string(),
   program_id: z.number(),
+  frequency: z.string().optional(),
+  frequency_notes: z.string().optional(),
 })
 
 export type ComponentRecord = z.infer<typeof ComponentRecordSchema>
@@ -65,6 +69,8 @@ export function transformComponentData(
         description: row.COMPONENT_DESCRIPTION,
         api_endpoint: row.API_SHORT_NAME,
         program_id: programId,
+        frequency: row.FREQUENCY,
+        frequency_notes: row.FREQUENCY_NOTES,
       })
     }
   })

diff --git a/mcp-db/src/schema/program.schema.ts b/mcp-db/src/schema/program.schema.ts
@@ -3,13 +3,15 @@ import { z } from 'zod'
 export const RawProgramSchema = z.object({
   PROGRAM_STRING: z.string(),
   PROGRAM_LABEL: z.string(),
+  PROGRAM_DESCRIPTION: z.string().optional(),
 })
 
 export const RawProgramsArraySchema = z.array(RawProgramSchema)
 
 export const ProgramRecordSchema = z.object({
   acronym: z.string(),
   label: z.string(),
+  description: z.string().optional(),
 })
 
 export type ProgramRecord = z.infer<typeof ProgramRecordSchema>
@@ -42,6 +44,7 @@ export function transformProgramData(rawData: unknown): {
       uniquePrograms.set(row.PROGRAM_STRING, {
         acronym: row.PROGRAM_STRING,
         label: row.PROGRAM_LABEL,
+        description: row.PROGRAM_DESCRIPTION,
       })
     }
   })

diff --git a/...db/tests/migrations/1773071716018_create-programs-and-components-search-functions.test.ts b/...db/tests/migrations/1773071716018_create-programs-and-components-search-functions.test.ts
@@ -0,0 +1,148 @@
+import { afterEach, beforeEach, describe, it, expect, vi } from 'vitest'
+import { MigrationBuilder } from 'node-pg-migrate'
+
+vi.mock('fs', () => ({
+  readFileSync: vi
+    .fn()
+    .mockReturnValue(
+      `COMPONENT_STRING,COMPONENT_LABEL,COMPONENT_DESCRIPTION,API_SHORT_NAME,PROGRAM_STRING,PROGRAM_LABEL,FREQUENCY,FREQUENCY_NOTES,PROGRAM_DESCRIPTION\n` +
+        `acs/acs1,1-Year Estimates,ACS 1-year estimates,acs/acs1,ACS,American Community Survey,Annual,,Continuous monthly survey pooled into 1-year and 5-year estimates\n` +
+        `dec/dec1,Summary File 1,Decennial summary file,dec/dec1,DEC,It's the Decennial Census,,Notes with 'quotes',It's the Decennial Census description\n`,
+    ),
+}))
+
+describe('Migration - Create Programs and Components Search Functions', () => {
+  let mockPgm: MigrationBuilder
+  let addColumnsSpy: ReturnType<typeof vi.fn>
+  let dropColumnsSpy: ReturnType<typeof vi.fn>
+  let sqlSpy: ReturnType<typeof vi.fn>
+
+  beforeEach(async () => {
+    addColumnsSpy = vi.fn().mockResolvedValue(undefined)
+    dropColumnsSpy = vi.fn().mockResolvedValue(undefined)
+    sqlSpy = vi.fn().mockResolvedValue(undefined)
+
+    mockPgm = {
+      addColumns: addColumnsSpy,
+      dropColumns: dropColumnsSpy,
+      sql: sqlSpy,
+    } as Partial<MigrationBuilder> as MigrationBuilder
+  })
+
+  afterEach(() => {
+    vi.clearAllMocks()
+    vi.resetModules()
+  })
+
+  describe('updateProgramComponentColumns SQL', () => {
+    let updateProgramComponentColumns: string
+
+    beforeEach(async () => {
+      const migration = await import(
+        '../../migrations/1773071716018_create-programs-and-components-search-functions'
+      )
+      updateProgramComponentColumns = migration.updateProgramComponentColumns
+    })
+
+    it('escapes single quotes in program descriptions', () => {
+      expect(updateProgramComponentColumns).toContain(
+        `'It''s the Decennial Census description'`,
+      )
+    })
+
+    it('escapes single quotes in component frequency_notes', () => {
+      expect(updateProgramComponentColumns).toContain(`'Notes with ''quotes'''`)
+    })
+
+    it('converts empty frequency to NULL', () => {
+      expect(updateProgramComponentColumns).toContain(`'dec/dec1', NULL,`)
+    })
+
+    it('converts empty frequency_notes to NULL', () => {
+      expect(updateProgramComponentColumns).toContain(
+        `'acs/acs1', 'Annual', NULL`,
+      )
+    })
+
+    it('includes valid program values', () => {
+      expect(updateProgramComponentColumns).toContain(
+        `'ACS', 'Continuous monthly survey pooled into 1-year and 5-year estimates'`,
+      )
+    })
+  })
+
+  describe('up', () => {
+    beforeEach(async () => {
+      const { up } = await import(
+        '../../migrations/1773071716018_create-programs-and-components-search-functions'
+      )
+      await up(mockPgm)
+    })
+
+    it('adds frequency and frequency_notes columns to components', () => {
+      expect(addColumnsSpy).toHaveBeenCalledWith('components', {
+        frequency: { type: 'varchar(50)' },
+        frequency_notes: { type: 'text' },
+      })
+    })
+
+    it('runs the backfill SQL for program and component columns', async () => {
+      const { updateProgramComponentColumns } = vi.mocked(
+        await import(
+          '../../migrations/1773071716018_create-programs-and-components-search-functions'
+        ),
+      )
+      expect(sqlSpy).toHaveBeenCalledWith(updateProgramComponentColumns)
+    })
+
+    it('runs backfill before creating survey functions', () => {
+      const backfillOrder = sqlSpy.mock.invocationCallOrder[0]
+      const programsFnOrder = sqlSpy.mock.invocationCallOrder[1]
+      const componentsFnOrder = sqlSpy.mock.invocationCallOrder[2]
+
+      expect(backfillOrder).toBeLessThan(programsFnOrder)
+      expect(backfillOrder).toBeLessThan(componentsFnOrder)
+    })
+  })
+
+  describe('down', () => {
+    beforeEach(async () => {
+      const { down } = await import(
+        '../../migrations/1773071716018_create-programs-and-components-search-functions'
+      )
+      await down(mockPgm)
+    })
+
+    it('drops the list_survey_components function', () => {
+      expect(sqlSpy).toHaveBeenCalledWith(
+        'DROP FUNCTION IF EXISTS list_survey_components(TEXT)',
+      )
+    })
+
+    it('drops the list_survey_programs function', () => {
+      expect(sqlSpy).toHaveBeenCalledWith(
+        'DROP FUNCTION IF EXISTS list_survey_programs()',
+      )
+    })
+
+    it('drops frequency and frequency_notes columns from components', () => {
+      expect(dropColumnsSpy).toHaveBeenCalledWith('components', [
+        'frequency',
+        'frequency_notes',
+      ])
+    })
+
+    it('nulls out program descriptions', () => {
+      expect(sqlSpy).toHaveBeenCalledWith(
+        'UPDATE programs SET description = NULL;',
+      )
+    })
+
+    it('drops functions before dropping columns', () => {
+      const dropColumnsOrder = dropColumnsSpy.mock.invocationCallOrder[0]
+      const dropComponentsFnOrder = sqlSpy.mock.invocationCallOrder[0]
+
+      expect(dropComponentsFnOrder).toBeLessThan(dropColumnsOrder)
+    })
+  })
+})