diff --git a/apps/admin/src/api/ai.ts b/apps/admin/src/api/ai.ts index b2656306936..7db231660a0 100644 --- a/apps/admin/src/api/ai.ts +++ b/apps/admin/src/api/ai.ts @@ -134,6 +134,7 @@ export interface ProviderModel { } export interface ProviderModelsResponse { + embeddingModels?: ProviderModel[] error?: string models: ProviderModel[] providerId: string diff --git a/apps/admin/src/features/settings/components/ai/AIConfigEditor.tsx b/apps/admin/src/features/settings/components/ai/AIConfigEditor.tsx index 983bd997b85..a7f55d59477 100644 --- a/apps/admin/src/features/settings/components/ai/AIConfigEditor.tsx +++ b/apps/admin/src/features/settings/components/ai/AIConfigEditor.tsx @@ -36,23 +36,52 @@ export function AIConfigEditor(props: { enabled: hasEnabledProvider, queryFn: async () => { const response = await getModels() - return response.reduce>( + return response.reduce<{ + chat: Record + embedding: Record + }>( (result, provider) => ({ - ...result, - [provider.providerId]: provider.models ?? [], + chat: { + ...result.chat, + [provider.providerId]: provider.models ?? [], + }, + embedding: { + ...result.embedding, + [provider.providerId]: provider.embeddingModels ?? [], + }, }), - {}, + { chat: {}, embedding: {} }, ) }, queryKey: props.modelCacheKey, staleTime: 24 * 60 * 60 * 1000, }) - const providerModels = modelsQuery.data ?? {} + const providerModels = modelsQuery.data?.chat ?? {} + const embeddingProviderModels = modelsQuery.data?.embedding ?? {} const providers = props.value.providers ?? [] const updateConfig = (patch: Partial) => props.onChange({ ...props.value, ...patch }) + const updateNumber = (key: keyof AIConfig, raw: string) => { + const trimmed = raw.trim() + updateConfig({ [key]: trimmed ? Number(trimmed) : undefined }) + } + + const updateNestedNumber = ( + section: 'aiEmbedding' | 'aiMemory' | 'aiPersona', + key: string, + raw: string, + ) => { + const trimmed = raw.trim() + updateConfig({ + [section]: { + ...(props.value[section] ?? {}), + [key]: trimmed ? Number(trimmed) : undefined, + }, + }) + } + const updateProvider = (id: string, patch: Partial) => { updateConfig({ providers: providers.map((provider) => @@ -335,6 +364,183 @@ export function AIConfigEditor(props: { /> + + updateConfig({ echoModel })} + providers={providers} + value={props.value.echoModel} + /> + updateConfig({ embeddingModel })} + providers={providers} + value={props.value.embeddingModel} + /> + + updateConfig({ personaDistillModel }) + } + providers={providers} + value={props.value.personaDistillModel} + /> + + } + description={t('settings.ai.section.echoDescription')} + enabled={Boolean(props.value.enableEcho)} + onEnabledChange={(enableEcho) => updateConfig({ enableEcho })} + title={t('settings.ai.section.echo')} + toggleLabel={t('settings.ai.switch.enableEcho')} + > + + updateConfig({ enableAutoGenerateEchoOnCreate }) + } + /> + + updateNumber('echoDailyQuota', value)} + type="number" + value={String(props.value.echoDailyQuota ?? 200)} + /> + updateNumber('echoRetrievalTopK', value)} + type="number" + value={String(props.value.echoRetrievalTopK ?? 5)} + /> + + updateNumber('echoRetrievalMinSimilarity', value) + } + step={0.01} + type="number" + value={String(props.value.echoRetrievalMinSimilarity ?? 0.72)} + /> + updateNumber('echoExemplarsCount', value)} + type="number" + value={String(props.value.echoExemplarsCount ?? 4)} + /> + + + + updateNestedNumber('aiEmbedding', 'chunkMaxTokens', value) + } + type="number" + value={String(props.value.aiEmbedding?.chunkMaxTokens ?? 500)} + /> + + updateNestedNumber('aiEmbedding', 'chunkOverlapTokens', value) + } + type="number" + value={String(props.value.aiEmbedding?.chunkOverlapTokens ?? 50)} + /> + + updateNestedNumber('aiEmbedding', 'backfillBatchSize', value) + } + type="number" + value={String(props.value.aiEmbedding?.backfillBatchSize ?? 50)} + /> + + updateNestedNumber('aiEmbedding', 'defaultTopK', value) + } + type="number" + value={String(props.value.aiEmbedding?.defaultTopK ?? 5)} + /> + + updateNestedNumber('aiEmbedding', 'defaultMinSimilarity', value) + } + step={0.01} + type="number" + value={String( + props.value.aiEmbedding?.defaultMinSimilarity ?? 0.7, + )} + /> + + updateNestedNumber('aiMemory', 'recallTopK', value) + } + type="number" + value={String(props.value.aiMemory?.recallTopK ?? 5)} + /> + + updateNestedNumber('aiMemory', 'recallMinSimilarity', value) + } + step={0.01} + type="number" + value={String(props.value.aiMemory?.recallMinSimilarity ?? 0.7)} + /> + + updateNestedNumber('aiPersona', 'distillSampleMaxTokens', value) + } + type="number" + value={String( + props.value.aiPersona?.distillSampleMaxTokens ?? 60000, + )} + /> + + + ) } + +function NumberGrid(props: { children: ReactNode }) { + return ( +
+ {props.children} +
+ ) +} diff --git a/apps/admin/src/features/settings/types/settings.ts b/apps/admin/src/features/settings/types/settings.ts index 94bf87cc9f0..9fd217d701f 100644 --- a/apps/admin/src/features/settings/types/settings.ts +++ b/apps/admin/src/features/settings/types/settings.ts @@ -24,21 +24,47 @@ export interface AIModelAssignment { } export interface AIConfig { + aiEmbedding?: { + backfillBatchSize?: number + chunkMaxTokens?: number + chunkOverlapTokens?: number + defaultMinSimilarity?: number + defaultTopK?: number + } + aiMemory?: { + recallMinSimilarity?: number + recallTopK?: number + } + aiPersona?: { + distillSampleMaxTokens?: number + exemplarsCandidateCacheTtlSec?: number + exemplarsLengthMax?: number + exemplarsLengthMin?: number + } commentReviewModel?: AIModelAssignment + echoDailyQuota?: number + echoExemplarsCount?: number + echoModel?: AIModelAssignment + echoRetrievalMinSimilarity?: number + echoRetrievalTopK?: number enableAutoGenerateInsightsOnCreate?: boolean enableAutoGenerateInsightsOnUpdate?: boolean + enableAutoGenerateEchoOnCreate?: boolean enableAutoGenerateSummaryOnCreate?: boolean enableAutoGenerateSummaryOnUpdate?: boolean enableAutoGenerateTranslation?: boolean enableAutoTranslateInsights?: boolean + enableEcho?: boolean enableInsights?: boolean enableSummary?: boolean enableTranslation?: boolean enableTranslationReview?: boolean + embeddingModel?: AIModelAssignment insightsMinTextLength?: number insightsModel?: AIModelAssignment insightsTargetLanguages?: string[] insightsTranslationModel?: AIModelAssignment + personaDistillModel?: AIModelAssignment providers?: AIProviderConfig[] summaryMinTextLength?: number summaryModel?: AIModelAssignment diff --git a/apps/admin/src/i18n/resources/en-US.ts b/apps/admin/src/i18n/resources/en-US.ts index 77f9e042520..1a2b9b36527 100644 --- a/apps/admin/src/i18n/resources/en-US.ts +++ b/apps/admin/src/i18n/resources/en-US.ts @@ -1652,6 +1652,10 @@ export const enUS = { 'settings.ai.assignment.commentReviewDescription': 'Model used to review comments.', 'settings.ai.assignment.commentReviewLabel': 'Comment review', + 'settings.ai.assignment.echoLabel': 'Echo generation', + 'settings.ai.assignment.embeddingDescription': + 'Embedding requires an explicit assignment; it does not fall back to the provider default chat model.', + 'settings.ai.assignment.embeddingLabel': 'Embedding', 'settings.ai.assignment.insightsDescription': 'Model used to generate long-form insights.', 'settings.ai.assignment.insightsLabel': 'Insights', @@ -1661,6 +1665,7 @@ export const enUS = { 'settings.ai.assignment.modelPlaceholder': 'Use provider default model', 'settings.ai.assignment.providerAriaLabel': '{label} provider', 'settings.ai.assignment.providerNone': 'Unassigned', + 'settings.ai.assignment.personaDistillLabel': 'Persona distill', 'settings.ai.assignment.summaryDescription': 'Model used to generate article summaries.', 'settings.ai.assignment.summaryLabel': 'Summary', @@ -1701,6 +1706,9 @@ export const enUS = { 'settings.ai.provider.editAction': 'Edit', 'settings.ai.provider.row.empty': 'No model assigned', 'settings.ai.section.featureToggles': 'Feature toggles', + 'settings.ai.section.echo': 'AI echo', + 'settings.ai.section.echoDescription': + 'Generate persona replies for recently entries, with retrieval, memories, and persona distillation.', 'settings.ai.section.insights': 'AI insights', 'settings.ai.section.insightsDescription': 'Generate long-form insights from article content; optional translation.', @@ -1722,7 +1730,23 @@ export const enUS = { 'Auto-generate summary on create', 'settings.ai.switch.enableAutoSummaryUpdate': 'Regenerate summary on update', 'settings.ai.switch.enableAutoTranslate': 'Auto-generate translations', + 'settings.ai.switch.enableAutoEchoCreate': + 'Auto-generate echo on recently create', 'settings.ai.switch.enableAutoTranslateInsights': 'Auto-translate insights', + 'settings.ai.switch.echoDailyQuota': 'Echo daily quota', + 'settings.ai.switch.echoExemplarsCount': 'Echo exemplars count', + 'settings.ai.switch.echoRetrievalMinSimilarity': + 'Echo retrieval min similarity', + 'settings.ai.switch.echoRetrievalTopK': 'Echo retrieval top-K', + 'settings.ai.switch.embeddingBackfillBatchSize': + 'Embedding backfill batch size', + 'settings.ai.switch.embeddingChunkMaxTokens': 'Embedding chunk max tokens', + 'settings.ai.switch.embeddingChunkOverlapTokens': + 'Embedding chunk overlap tokens', + 'settings.ai.switch.embeddingDefaultMinSimilarity': + 'Embedding default min similarity', + 'settings.ai.switch.embeddingDefaultTopK': 'Embedding default top-K', + 'settings.ai.switch.enableEcho': 'Enable AI echo', 'settings.ai.switch.enableInsights': 'Enable AI insights', 'settings.ai.switch.enableSummary': 'Enable AI summary', 'settings.ai.switch.enableTranslation': 'Enable AI translation', @@ -1732,6 +1756,11 @@ export const enUS = { 'settings.ai.switch.insightsMinTextLength': 'Insights auto-generate minimum text length', 'settings.ai.switch.insightsTargetLanguages': 'Insights target languages', + 'settings.ai.switch.memoryRecallMinSimilarity': + 'Memory recall min similarity', + 'settings.ai.switch.memoryRecallTopK': 'Memory recall top-K', + 'settings.ai.switch.personaDistillSampleMaxTokens': + 'Persona distill sample max tokens', 'settings.ai.switch.summaryMinTextLength': 'Summary auto-generate minimum text length', 'settings.ai.switch.summaryTargetLanguages': 'Summary target languages', diff --git a/apps/admin/src/i18n/resources/zh-CN.ts b/apps/admin/src/i18n/resources/zh-CN.ts index 5e0f54ae5b8..50915137135 100644 --- a/apps/admin/src/i18n/resources/zh-CN.ts +++ b/apps/admin/src/i18n/resources/zh-CN.ts @@ -1571,6 +1571,10 @@ export const zhCN = { 'settings.ai.action.testConnection': '测试连接', 'settings.ai.assignment.commentReviewDescription': '用于审核评论的模型。', 'settings.ai.assignment.commentReviewLabel': '评论审核', + 'settings.ai.assignment.echoLabel': 'Echo 生成', + 'settings.ai.assignment.embeddingDescription': + 'Embedding 需要显式配置,不会回退到 Provider 的默认聊天模型。', + 'settings.ai.assignment.embeddingLabel': 'Embedding', 'settings.ai.assignment.insightsDescription': '用于生成长篇精读的模型。', 'settings.ai.assignment.insightsLabel': '精读生成', 'settings.ai.assignment.insightsTranslationDescription': @@ -1579,6 +1583,7 @@ export const zhCN = { 'settings.ai.assignment.modelPlaceholder': '使用 Provider 默认模型', 'settings.ai.assignment.providerAriaLabel': '{label}服务商', 'settings.ai.assignment.providerNone': '不指定', + 'settings.ai.assignment.personaDistillLabel': '人格蒸馏', 'settings.ai.assignment.summaryDescription': '用于生成文章摘要的模型。', 'settings.ai.assignment.summaryLabel': '摘要功能', 'settings.ai.assignment.translationDescription': '用于生成文章翻译的模型。', @@ -1615,6 +1620,9 @@ export const zhCN = { 'settings.ai.provider.editAction': '编辑', 'settings.ai.provider.row.empty': '未指定模型', 'settings.ai.section.featureToggles': '功能开关', + 'settings.ai.section.echo': 'AI Echo', + 'settings.ai.section.echoDescription': + '为树洞/最近动态生成人格化回复,可结合检索、记忆与人格蒸馏。', 'settings.ai.section.insights': 'AI 精读', 'settings.ai.section.insightsDescription': '基于文章内容生成长篇精读,可附自动翻译。', @@ -1632,7 +1640,19 @@ export const zhCN = { 'settings.ai.switch.enableAutoSummaryCreate': '文章创建时自动生成摘要', 'settings.ai.switch.enableAutoSummaryUpdate': '文章更新时重新生成摘要', 'settings.ai.switch.enableAutoTranslate': '自动生成翻译', + 'settings.ai.switch.enableAutoEchoCreate': '创建树洞时自动生成 Echo', 'settings.ai.switch.enableAutoTranslateInsights': '自动翻译精读', + 'settings.ai.switch.echoDailyQuota': 'Echo 每日额度', + 'settings.ai.switch.echoExemplarsCount': 'Echo 示例数量', + 'settings.ai.switch.echoRetrievalMinSimilarity': 'Echo 检索相似度下限', + 'settings.ai.switch.echoRetrievalTopK': 'Echo 检索 Top-K', + 'settings.ai.switch.embeddingBackfillBatchSize': 'Embedding 回填批大小', + 'settings.ai.switch.embeddingChunkMaxTokens': 'Embedding 分块最大 tokens', + 'settings.ai.switch.embeddingChunkOverlapTokens': 'Embedding 分块重叠 tokens', + 'settings.ai.switch.embeddingDefaultMinSimilarity': + 'Embedding 默认相似度下限', + 'settings.ai.switch.embeddingDefaultTopK': 'Embedding 默认 Top-K', + 'settings.ai.switch.enableEcho': '启用 AI Echo', 'settings.ai.switch.enableInsights': '启用 AI 精读', 'settings.ai.switch.enableSummary': '启用 AI 摘要', 'settings.ai.switch.enableTranslation': '启用 AI 翻译', @@ -1641,6 +1661,9 @@ export const zhCN = { '开启后走 writer → reviewer → editor 流水线:reviewer 按本地化规范评分,低于阈值时由 editor 修订有问题的段落。', 'settings.ai.switch.insightsMinTextLength': '精读自动生成最小文本长度', 'settings.ai.switch.insightsTargetLanguages': '精读目标语言', + 'settings.ai.switch.memoryRecallMinSimilarity': '记忆召回相似度下限', + 'settings.ai.switch.memoryRecallTopK': '记忆召回 Top-K', + 'settings.ai.switch.personaDistillSampleMaxTokens': '人格蒸馏采样最大 tokens', 'settings.ai.switch.summaryMinTextLength': '摘要自动生成最小文本长度', 'settings.ai.switch.summaryTargetLanguages': '摘要目标语言', 'settings.ai.switch.translationReviewScoreThreshold': '审稿评分阈值', diff --git a/apps/admin/src/models/options.ts b/apps/admin/src/models/options.ts index bc2c6569580..e3c68268e75 100644 --- a/apps/admin/src/models/options.ts +++ b/apps/admin/src/models/options.ts @@ -153,13 +153,39 @@ export namespace MxServerOptions { } export interface AIOption { + aiEmbedding?: { + backfillBatchSize?: number + chunkMaxTokens?: number + chunkOverlapTokens?: number + defaultMinSimilarity?: number + defaultTopK?: number + } + aiMemory?: { + recallMinSimilarity?: number + recallTopK?: number + } + aiPersona?: { + distillSampleMaxTokens?: number + exemplarsCandidateCacheTtlSec?: number + exemplarsLengthMax?: number + exemplarsLengthMin?: number + } providers: AIProviderConfig[] summaryModel?: AIModelAssignment writerModel?: AIModelAssignment commentReviewModel?: AIModelAssignment + echoModel?: AIModelAssignment + embeddingModel?: AIModelAssignment + personaDistillModel?: AIModelAssignment enableSummary: boolean enableAutoGenerateSummaryOnCreate: boolean enableAutoGenerateSummaryOnUpdate: boolean + enableEcho?: boolean + enableAutoGenerateEchoOnCreate?: boolean + echoDailyQuota?: number + echoRetrievalTopK?: number + echoRetrievalMinSimilarity?: number + echoExemplarsCount?: number summaryTargetLanguages: string[] summaryMinTextLength?: number insightsModel?: AIModelAssignment @@ -179,6 +205,7 @@ export namespace MxServerOptions { } export interface ProviderModelsResponse { + embeddingModels?: ModelInfo[] providerId: string providerName: string providerType: AIProviderType diff --git a/apps/admin/src/ui/primitives/text-field.tsx b/apps/admin/src/ui/primitives/text-field.tsx index 3ff296c8475..c01f03bb02f 100644 --- a/apps/admin/src/ui/primitives/text-field.tsx +++ b/apps/admin/src/ui/primitives/text-field.tsx @@ -25,6 +25,7 @@ interface TextInputProps { labelClassName?: string list?: string maxLength?: number + max?: ComponentPropsWithoutRef<'input'>['max'] min?: ComponentPropsWithoutRef<'input'>['min'] name?: string onBlur?: FocusEventHandler @@ -33,6 +34,7 @@ interface TextInputProps { placeholder?: string required?: boolean spellCheck?: boolean + step?: ComponentPropsWithoutRef<'input'>['step'] style?: CSSProperties type?: TextInputType value: string @@ -52,6 +54,7 @@ export const TextInput = forwardRef( id={props.id} inputMode={props.inputMode} list={props.list} + max={props.max} maxLength={props.maxLength} min={props.min} name={props.name} @@ -62,6 +65,7 @@ export const TextInput = forwardRef( ref={ref} required={props.required} spellCheck={props.spellCheck} + step={props.step} style={props.style} type={props.type ?? 'text'} value={props.value} diff --git a/apps/core/src/common/errors/app-error-code.ts b/apps/core/src/common/errors/app-error-code.ts index 69893fd351d..c1c81d0bd0d 100644 --- a/apps/core/src/common/errors/app-error-code.ts +++ b/apps/core/src/common/errors/app-error-code.ts @@ -47,6 +47,31 @@ export enum AppErrorCode { AI_TASK_CANNOT_RETRY = 'AI_TASK_CANNOT_RETRY', AI_TRANSLATION_NOT_FOUND = 'AI_TRANSLATION_NOT_FOUND', + // ai-embeddings + AI_EMBEDDING_MODEL_NOT_CONFIGURED = 'AI_EMBEDDING_MODEL_NOT_CONFIGURED', + AI_EMBEDDING_BATCH_FAILED = 'AI_EMBEDDING_BATCH_FAILED', + + // ai-persona + AI_PERSONA_NOT_FOUND = 'AI_PERSONA_NOT_FOUND', + AI_PERSONA_PROFILE_NOT_FOUND = 'AI_PERSONA_PROFILE_NOT_FOUND', + AI_PERSONA_NOT_DISTILLABLE = 'AI_PERSONA_NOT_DISTILLABLE', + AI_PERSONA_REFRESH_IN_PROGRESS = 'AI_PERSONA_REFRESH_IN_PROGRESS', + AI_PERSONA_DISTILL_MODEL_NOT_CONFIGURED = 'AI_PERSONA_DISTILL_MODEL_NOT_CONFIGURED', + + // ai-memory + AI_MEMORY_NOT_FOUND = 'AI_MEMORY_NOT_FOUND', + AI_MEMORY_INVALID_SCOPE = 'AI_MEMORY_INVALID_SCOPE', + AI_MEMORY_INVALID_TYPE = 'AI_MEMORY_INVALID_TYPE', + + // ai-echo + AI_ECHO_NOT_FOUND = 'AI_ECHO_NOT_FOUND', + AI_ECHO_SUBJECT_NOT_FOUND = 'AI_ECHO_SUBJECT_NOT_FOUND', + AI_ECHO_SCENARIO_NOT_REGISTERED = 'AI_ECHO_SCENARIO_NOT_REGISTERED', + AI_ECHO_GENERATION_FAILED = 'AI_ECHO_GENERATION_FAILED', + AI_ECHO_REGENERATE_IN_PROGRESS = 'AI_ECHO_REGENERATE_IN_PROGRESS', + AI_ECHO_MODEL_NOT_CONFIGURED = 'AI_ECHO_MODEL_NOT_CONFIGURED', + AI_ECHO_DAILY_QUOTA_EXCEEDED = 'AI_ECHO_DAILY_QUOTA_EXCEEDED', + // auth AUTH_DEVICE_FLOW_PENDING = 'AUTH_DEVICE_FLOW_PENDING', AUTH_INVALID_CREDENTIALS = 'AUTH_INVALID_CREDENTIALS', diff --git a/apps/core/src/common/errors/app-error-definitions.ts b/apps/core/src/common/errors/app-error-definitions.ts index 3a4bd501a35..b0cb7f070be 100644 --- a/apps/core/src/common/errors/app-error-definitions.ts +++ b/apps/core/src/common/errors/app-error-definitions.ts @@ -209,6 +209,111 @@ export const APP_ERROR_DEFINITIONS = { message: 'Translation not found', }, + // ai-embeddings + [AppErrorCode.AI_EMBEDDING_MODEL_NOT_CONFIGURED]: { + status: 400, + message: 'AI embedding model is not configured', + }, + [AppErrorCode.AI_EMBEDDING_BATCH_FAILED]: { + status: 502, + message: (p) => p?.message ?? 'AI embedding batch failed', + }, + + // ai-persona + [AppErrorCode.AI_PERSONA_NOT_FOUND]: { + status: 404, + message: (p) => + p?.key ? `Persona "${p.key}" not found` : 'Persona not found', + details: (p) => (p?.key ? { key: p.key } : undefined), + }, + [AppErrorCode.AI_PERSONA_PROFILE_NOT_FOUND]: { + status: 404, + message: (p) => + p?.key + ? `Persona profile "${p.key}" not found` + : 'Persona profile not found', + details: (p) => (p?.key ? { key: p.key } : undefined), + }, + [AppErrorCode.AI_PERSONA_NOT_DISTILLABLE]: { + status: 400, + message: (p) => + p?.key + ? `Persona "${p.key}" cannot be distilled` + : 'Persona cannot be distilled', + details: (p) => (p?.key ? { key: p.key } : undefined), + }, + [AppErrorCode.AI_PERSONA_REFRESH_IN_PROGRESS]: { + status: 409, + message: 'Persona refresh is already in progress', + details: (p) => (p?.key ? { key: p.key } : undefined), + }, + [AppErrorCode.AI_PERSONA_DISTILL_MODEL_NOT_CONFIGURED]: { + status: 400, + message: 'AI persona distill model is not configured', + }, + + // ai-memory + [AppErrorCode.AI_MEMORY_NOT_FOUND]: { + status: 404, + message: 'Memory not found', + details: (p) => (p?.id ? { id: p.id } : undefined), + }, + [AppErrorCode.AI_MEMORY_INVALID_SCOPE]: { + status: 400, + message: (p) => + p?.scope ? `Invalid memory scope: ${p.scope}` : 'Invalid memory scope', + details: (p) => (p?.scope ? { scope: p.scope } : undefined), + }, + [AppErrorCode.AI_MEMORY_INVALID_TYPE]: { + status: 400, + message: (p) => + p?.type ? `Invalid memory type: ${p.type}` : 'Invalid memory type', + details: (p) => (p?.type ? { type: p.type } : undefined), + }, + + // ai-echo + [AppErrorCode.AI_ECHO_NOT_FOUND]: { + status: 404, + message: 'Echo not found', + details: (p) => (p?.id ? { id: p.id } : undefined), + }, + [AppErrorCode.AI_ECHO_SUBJECT_NOT_FOUND]: { + status: 404, + message: 'Echo subject not found', + details: (p) => + p?.subjectId + ? { subjectType: p.subjectType, subjectId: p.subjectId } + : undefined, + }, + [AppErrorCode.AI_ECHO_SCENARIO_NOT_REGISTERED]: { + status: 400, + message: (p) => + p?.scenarioKey + ? `Echo scenario "${p.scenarioKey}" is not registered` + : 'Echo scenario is not registered', + details: (p) => + p?.scenarioKey ? { scenarioKey: p.scenarioKey } : undefined, + }, + [AppErrorCode.AI_ECHO_GENERATION_FAILED]: { + status: 500, + message: (p) => p?.message ?? 'Echo generation failed', + }, + [AppErrorCode.AI_ECHO_REGENERATE_IN_PROGRESS]: { + status: 409, + message: 'Echo regeneration already in progress', + details: (p) => (p?.echoId ? { echoId: p.echoId } : undefined), + }, + [AppErrorCode.AI_ECHO_MODEL_NOT_CONFIGURED]: { + status: 400, + message: 'AI echo model is not configured', + }, + [AppErrorCode.AI_ECHO_DAILY_QUOTA_EXCEEDED]: { + status: 429, + message: 'Echo daily quota exceeded', + details: (p) => + p?.quota !== undefined ? { used: p?.used, quota: p?.quota } : undefined, + }, + // auth [AppErrorCode.AUTH_DEVICE_FLOW_PENDING]: { status: 202, diff --git a/apps/core/src/common/errors/app-error-payload.ts b/apps/core/src/common/errors/app-error-payload.ts index cd6e4d9be88..6c430c31205 100644 --- a/apps/core/src/common/errors/app-error-payload.ts +++ b/apps/core/src/common/errors/app-error-payload.ts @@ -54,6 +54,37 @@ export type AppErrorPayloadMap = { [AppErrorCode.AI_TASK_CANNOT_RETRY]: { reason?: string } | undefined [AppErrorCode.AI_TRANSLATION_NOT_FOUND]: undefined + // ai-embeddings + [AppErrorCode.AI_EMBEDDING_MODEL_NOT_CONFIGURED]: undefined + [AppErrorCode.AI_EMBEDDING_BATCH_FAILED]: OptMessage + + // ai-persona + [AppErrorCode.AI_PERSONA_NOT_FOUND]: { key?: string } | undefined + [AppErrorCode.AI_PERSONA_PROFILE_NOT_FOUND]: { key?: string } | undefined + [AppErrorCode.AI_PERSONA_NOT_DISTILLABLE]: { key?: string } | undefined + [AppErrorCode.AI_PERSONA_REFRESH_IN_PROGRESS]: { key?: string } | undefined + [AppErrorCode.AI_PERSONA_DISTILL_MODEL_NOT_CONFIGURED]: undefined + + // ai-memory + [AppErrorCode.AI_MEMORY_NOT_FOUND]: WithId + [AppErrorCode.AI_MEMORY_INVALID_SCOPE]: { scope?: string } | undefined + [AppErrorCode.AI_MEMORY_INVALID_TYPE]: { type?: string } | undefined + + // ai-echo + [AppErrorCode.AI_ECHO_NOT_FOUND]: WithId + [AppErrorCode.AI_ECHO_SUBJECT_NOT_FOUND]: + | { subjectType?: string; subjectId?: string } + | undefined + [AppErrorCode.AI_ECHO_SCENARIO_NOT_REGISTERED]: + | { scenarioKey?: string } + | undefined + [AppErrorCode.AI_ECHO_GENERATION_FAILED]: OptMessage + [AppErrorCode.AI_ECHO_REGENERATE_IN_PROGRESS]: { echoId?: string } | undefined + [AppErrorCode.AI_ECHO_MODEL_NOT_CONFIGURED]: undefined + [AppErrorCode.AI_ECHO_DAILY_QUOTA_EXCEEDED]: + | { used?: number; quota?: number } + | undefined + // auth [AppErrorCode.AUTH_DEVICE_FLOW_PENDING]: undefined [AppErrorCode.AUTH_INVALID_CREDENTIALS]: undefined diff --git a/apps/core/src/constants/business-event.constant.ts b/apps/core/src/constants/business-event.constant.ts index 18aa2b1a027..19ceca5cdfe 100644 --- a/apps/core/src/constants/business-event.constant.ts +++ b/apps/core/src/constants/business-event.constant.ts @@ -43,6 +43,9 @@ export enum BusinessEvents { RECENTLY_CREATE = 'RECENTLY_CREATE', RECENTLY_UPDATE = 'RECENTLY_UPDATE', RECENTLY_DELETE = 'RECENTLY_DELETE', + RECENTLY_ECHO_LANDED = 'RECENTLY_ECHO_LANDED', + + PERSONA_PROFILE_REFRESHED = 'PERSONA_PROFILE_REFRESHED', AGGREGATE_UPDATE = 'AGGREGATE_UPDATE', diff --git a/apps/core/src/database/app-migrations/20260524-ai-corpus-initial-backfill.ts b/apps/core/src/database/app-migrations/20260524-ai-corpus-initial-backfill.ts new file mode 100644 index 00000000000..338a53137e8 --- /dev/null +++ b/apps/core/src/database/app-migrations/20260524-ai-corpus-initial-backfill.ts @@ -0,0 +1,12 @@ +import type { AppMigration } from './types' + +export const migration: AppMigration = { + id: '20260524-ai-corpus-initial-backfill', + description: + 'Mark initial corpus_embeddings backfill window; actual backfill runs via POST /ai-embeddings/backfill once the embedding model is configured.', + async up({ logger }) { + logger.log( + 'Initial AI corpus backfill marker recorded. Run POST /ai-embeddings/backfill after configuring an embedding model to populate corpus_embeddings.', + ) + }, +} diff --git a/apps/core/src/database/app-migrations/registry.ts b/apps/core/src/database/app-migrations/registry.ts index ebc05dece3f..21ee089de2c 100644 --- a/apps/core/src/database/app-migrations/registry.ts +++ b/apps/core/src/database/app-migrations/registry.ts @@ -1,4 +1,5 @@ import { migration as recentlyDropEnrichmentColumns } from './20260515-recently-drop-enrichment-columns' +import { migration as aiCorpusInitialBackfill } from './20260524-ai-corpus-initial-backfill' import type { AppMigration } from './types' /** @@ -9,4 +10,7 @@ import type { AppMigration } from './types' * Migrations removed from this list never re-run; the ledger row of a * previously applied one is left in place and simply goes unreferenced. */ -export const migrations: AppMigration[] = [recentlyDropEnrichmentColumns] +export const migrations: AppMigration[] = [ + recentlyDropEnrichmentColumns, + aiCorpusInitialBackfill, +] diff --git a/apps/core/src/database/migrations/0017_ai_echo_system.sql b/apps/core/src/database/migrations/0017_ai_echo_system.sql new file mode 100644 index 00000000000..f1505246390 --- /dev/null +++ b/apps/core/src/database/migrations/0017_ai_echo_system.sql @@ -0,0 +1,76 @@ +-- migration-lint:allow=no-bare-create-index reason=ai_echoes, ai_memories, corpus_embeddings, persona_profiles are brand-new tables, empty at deploy time +CREATE EXTENSION IF NOT EXISTS vector;--> statement-breakpoint +CREATE TABLE "ai_echoes" ( + "id" text PRIMARY KEY NOT NULL, + "scenario_key" text NOT NULL, + "subject_type" text NOT NULL, + "subject_id" text NOT NULL, + "persona_key" text NOT NULL, + "content" text, + "status" text NOT NULL, + "model" text, + "metadata" jsonb DEFAULT '{}'::jsonb NOT NULL, + "generated_at" timestamp with time zone, + "edited_at" timestamp with time zone, + "edited_by" text, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone +); +--> statement-breakpoint +CREATE TABLE "ai_memories" ( + "id" text PRIMARY KEY NOT NULL, + "scope" text NOT NULL, + "type" text NOT NULL, + "content" text NOT NULL, + "confidence" real DEFAULT 1 NOT NULL, + "salience" real DEFAULT 1 NOT NULL, + "source" jsonb DEFAULT '{}'::jsonb NOT NULL, + "embedding" vector, + "embedding_model" text, + "dim" integer, + "first_seen_at" timestamp with time zone DEFAULT now() NOT NULL, + "last_seen_at" timestamp with time zone DEFAULT now() NOT NULL, + "expires_at" timestamp with time zone, + "supersedes_id" text, + "status" text DEFAULT 'active' NOT NULL, + "metadata" jsonb DEFAULT '{}'::jsonb NOT NULL, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone +); +--> statement-breakpoint +CREATE TABLE "corpus_embeddings" ( + "id" text PRIMARY KEY NOT NULL, + "source_type" text NOT NULL, + "source_id" text NOT NULL, + "chunk_index" integer NOT NULL, + "content" text NOT NULL, + "content_hash" text NOT NULL, + "embedding" vector NOT NULL, + "embedding_model" text NOT NULL, + "dim" integer NOT NULL, + "created_at" timestamp with time zone DEFAULT now() NOT NULL +); +--> statement-breakpoint +CREATE TABLE "persona_profiles" ( + "id" text PRIMARY KEY NOT NULL, + "persona_key" text NOT NULL, + "profile" text NOT NULL, + "profile_summary" text, + "corpus_version" integer NOT NULL, + "distill_model" text NOT NULL, + "refreshed_at" timestamp with time zone NOT NULL, + "auto_next_at" timestamp with time zone, + "metadata" jsonb DEFAULT '{}'::jsonb NOT NULL, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone, + CONSTRAINT "persona_profiles_persona_key_unique" UNIQUE("persona_key") +); +--> statement-breakpoint +ALTER TABLE "ai_memories" ADD CONSTRAINT "ai_memories_supersedes_id_ai_memories_id_fk" FOREIGN KEY ("supersedes_id") REFERENCES "public"."ai_memories"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint +CREATE INDEX "ai_echoes_subject_idx" ON "ai_echoes" USING btree ("scenario_key","subject_type","subject_id");--> statement-breakpoint +CREATE INDEX "ai_echoes_status_idx" ON "ai_echoes" USING btree ("scenario_key","status");--> statement-breakpoint +CREATE INDEX "ai_echoes_persona_subject_idx" ON "ai_echoes" USING btree ("subject_type","subject_id","persona_key");--> statement-breakpoint +CREATE INDEX "ai_memories_scope_status_idx" ON "ai_memories" USING btree ("scope","status");--> statement-breakpoint +CREATE INDEX "ai_memories_active_idx" ON "ai_memories" USING btree ("status") WHERE "ai_memories"."status" = 'active';--> statement-breakpoint +CREATE UNIQUE INDEX "corpus_embeddings_source_chunk_model_uniq" ON "corpus_embeddings" USING btree ("source_type","source_id","chunk_index","embedding_model");--> statement-breakpoint +CREATE INDEX "corpus_embeddings_source_idx" ON "corpus_embeddings" USING btree ("source_type","source_id"); diff --git a/apps/core/src/database/migrations/meta/0017_snapshot.json b/apps/core/src/database/migrations/meta/0017_snapshot.json new file mode 100644 index 00000000000..5e3a033dbd0 --- /dev/null +++ b/apps/core/src/database/migrations/meta/0017_snapshot.json @@ -0,0 +1,6247 @@ +{ + "id": "b3f7cd53-02dd-4af1-9767-6495de6b105f", + "prevId": "46155142-76cd-478e-aace-d7cbaa0c4b47", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.ai_agent_conversations": { + "name": "ai_agent_conversations", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "ref_id": { + "name": "ref_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ref_type": { + "name": "ref_type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "messages": { + "name": "messages", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "provider_id": { + "name": "provider_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "review_state": { + "name": "review_state", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "diff_state": { + "name": "diff_state", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "message_count": { + "name": "message_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + } + }, + "indexes": { + "ai_agent_conversations_ref_idx": { + "name": "ai_agent_conversations_ref_idx", + "columns": [ + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "ref_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "ai_agent_conversations_updated_at_idx": { + "name": "ai_agent_conversations_updated_at_idx", + "columns": [ + { + "expression": "updated_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ai_echoes": { + "name": "ai_echoes", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "scenario_key": { + "name": "scenario_key", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "subject_type": { + "name": "subject_type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "subject_id": { + "name": "subject_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "persona_key": { + "name": "persona_key", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "generated_at": { + "name": "generated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "edited_at": { + "name": "edited_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "edited_by": { + "name": "edited_by", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "ai_echoes_subject_idx": { + "name": "ai_echoes_subject_idx", + "columns": [ + { + "expression": "scenario_key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "subject_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "subject_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "ai_echoes_status_idx": { + "name": "ai_echoes_status_idx", + "columns": [ + { + "expression": "scenario_key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "ai_echoes_persona_subject_idx": { + "name": "ai_echoes_persona_subject_idx", + "columns": [ + { + "expression": "subject_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "subject_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "persona_key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ai_insights": { + "name": "ai_insights", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "ref_id": { + "name": "ref_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "lang": { + "name": "lang", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "hash": { + "name": "hash", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "is_translation": { + "name": "is_translation", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "source_insights_id": { + "name": "source_insights_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "source_lang": { + "name": "source_lang", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "model_info": { + "name": "model_info", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "ai_insights_ref_lang_uniq": { + "name": "ai_insights_ref_lang_uniq", + "columns": [ + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "lang", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "ai_insights_source_insights_id_ai_insights_id_fk": { + "name": "ai_insights_source_insights_id_ai_insights_id_fk", + "tableFrom": "ai_insights", + "tableTo": "ai_insights", + "columnsFrom": [ + "source_insights_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ai_memories": { + "name": "ai_memories", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "confidence": { + "name": "confidence", + "type": "real", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "salience": { + "name": "salience", + "type": "real", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "source": { + "name": "source", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "embedding": { + "name": "embedding", + "type": "vector", + "primaryKey": false, + "notNull": false + }, + "embedding_model": { + "name": "embedding_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "dim": { + "name": "dim", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "first_seen_at": { + "name": "first_seen_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_seen_at": { + "name": "last_seen_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "supersedes_id": { + "name": "supersedes_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'active'" + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "ai_memories_scope_status_idx": { + "name": "ai_memories_scope_status_idx", + "columns": [ + { + "expression": "scope", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "ai_memories_active_idx": { + "name": "ai_memories_active_idx", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"ai_memories\".\"status\" = 'active'", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "ai_memories_supersedes_id_ai_memories_id_fk": { + "name": "ai_memories_supersedes_id_ai_memories_id_fk", + "tableFrom": "ai_memories", + "tableTo": "ai_memories", + "columnsFrom": [ + "supersedes_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ai_summaries": { + "name": "ai_summaries", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "hash": { + "name": "hash", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "summary": { + "name": "summary", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ref_id": { + "name": "ref_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "lang": { + "name": "lang", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "ai_summaries_ref_id_idx": { + "name": "ai_summaries_ref_id_idx", + "columns": [ + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.ai_translations": { + "name": "ai_translations", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "hash": { + "name": "hash", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ref_id": { + "name": "ref_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ref_type": { + "name": "ref_type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "lang": { + "name": "lang", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "source_lang": { + "name": "source_lang", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "subtitle": { + "name": "subtitle", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "summary": { + "name": "summary", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "tags": { + "name": "tags", + "type": "text[]", + "primaryKey": false, + "notNull": true, + "default": "'{}'::text[]" + }, + "source_modified_at": { + "name": "source_modified_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "ai_model": { + "name": "ai_model", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "ai_provider": { + "name": "ai_provider", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content_format": { + "name": "content_format", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "source_block_snapshots": { + "name": "source_block_snapshots", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "source_meta_hashes": { + "name": "source_meta_hashes", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "ai_translations_ref_lang_uniq": { + "name": "ai_translations_ref_lang_uniq", + "columns": [ + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "ref_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "lang", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "ai_translations_ref_id_idx": { + "name": "ai_translations_ref_id_idx", + "columns": [ + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.corpus_embeddings": { + "name": "corpus_embeddings", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "source_type": { + "name": "source_type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "source_id": { + "name": "source_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "chunk_index": { + "name": "chunk_index", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "content_hash": { + "name": "content_hash", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "embedding": { + "name": "embedding", + "type": "vector", + "primaryKey": false, + "notNull": true + }, + "embedding_model": { + "name": "embedding_model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "dim": { + "name": "dim", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "corpus_embeddings_source_chunk_model_uniq": { + "name": "corpus_embeddings_source_chunk_model_uniq", + "columns": [ + { + "expression": "source_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "source_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "chunk_index", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "embedding_model", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "corpus_embeddings_source_idx": { + "name": "corpus_embeddings_source_idx", + "columns": [ + { + "expression": "source_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "source_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.persona_profiles": { + "name": "persona_profiles", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "persona_key": { + "name": "persona_key", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "profile": { + "name": "profile", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "profile_summary": { + "name": "profile_summary", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "corpus_version": { + "name": "corpus_version", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "distill_model": { + "name": "distill_model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "refreshed_at": { + "name": "refreshed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "auto_next_at": { + "name": "auto_next_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "persona_profiles_persona_key_unique": { + "name": "persona_profiles_persona_key_unique", + "nullsNotDistinct": false, + "columns": [ + "persona_key" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.translation_entries": { + "name": "translation_entries", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "key_path": { + "name": "key_path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "lang": { + "name": "lang", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "key_type": { + "name": "key_type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "lookup_key": { + "name": "lookup_key", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "source_text": { + "name": "source_text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "translated_text": { + "name": "translated_text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "source_updated_at": { + "name": "source_updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "translation_entries_key_uniq": { + "name": "translation_entries_key_uniq", + "columns": [ + { + "expression": "key_path", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "lang", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "key_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "lookup_key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "translation_entries_path_lang_idx": { + "name": "translation_entries_path_lang_idx", + "columns": [ + { + "expression": "key_path", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "lang", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "translation_entries_lookup_key_idx": { + "name": "translation_entries_lookup_key_idx", + "columns": [ + { + "expression": "lookup_key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.accounts": { + "name": "accounts", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "account_id": { + "name": "account_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "provider_id": { + "name": "provider_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "provider_account_id": { + "name": "provider_account_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "password": { + "name": "password", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "access_token": { + "name": "access_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "refresh_token": { + "name": "refresh_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "access_token_expires_at": { + "name": "access_token_expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "refresh_token_expires_at": { + "name": "refresh_token_expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "id_token": { + "name": "id_token", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "raw": { + "name": "raw", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "accounts_provider_uniq": { + "name": "accounts_provider_uniq", + "columns": [ + { + "expression": "provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "provider_account_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "accounts_user_id_idx": { + "name": "accounts_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "accounts_user_id_readers_id_fk": { + "name": "accounts_user_id_readers_id_fk", + "tableFrom": "accounts", + "tableTo": "readers", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.api_keys": { + "name": "api_keys", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reference_id": { + "name": "reference_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "config_id": { + "name": "config_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "key": { + "name": "key", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "start": { + "name": "start", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "prefix": { + "name": "prefix", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "enabled": { + "name": "enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "rate_limit_enabled": { + "name": "rate_limit_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "rate_limit_time_window": { + "name": "rate_limit_time_window", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "rate_limit_max": { + "name": "rate_limit_max", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "request_count": { + "name": "request_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "remaining": { + "name": "remaining", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "refill_interval": { + "name": "refill_interval", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "refill_amount": { + "name": "refill_amount", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "last_refill_at": { + "name": "last_refill_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "last_request": { + "name": "last_request", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "permissions": { + "name": "permissions", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "api_keys_key_uniq": { + "name": "api_keys_key_uniq", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "api_keys_user_id_idx": { + "name": "api_keys_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "api_keys_user_id_readers_id_fk": { + "name": "api_keys_user_id_readers_id_fk", + "tableFrom": "api_keys", + "tableTo": "readers", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "api_keys_reference_id_readers_id_fk": { + "name": "api_keys_reference_id_readers_id_fk", + "tableFrom": "api_keys", + "tableTo": "readers", + "columnsFrom": [ + "reference_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.device_codes": { + "name": "device_codes", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "device_code": { + "name": "device_code", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_code": { + "name": "user_code", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "last_polled_at": { + "name": "last_polled_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "polling_interval": { + "name": "polling_interval", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "client_id": { + "name": "client_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "scope": { + "name": "scope", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "device_codes_device_code_uniq": { + "name": "device_codes_device_code_uniq", + "columns": [ + { + "expression": "device_code", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "device_codes_user_code_uniq": { + "name": "device_codes_user_code_uniq", + "columns": [ + { + "expression": "user_code", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "device_codes_expires_at_idx": { + "name": "device_codes_expires_at_idx", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "device_codes_user_id_readers_id_fk": { + "name": "device_codes_user_id_readers_id_fk", + "tableFrom": "device_codes", + "tableTo": "readers", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.owner_profiles": { + "name": "owner_profiles", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "reader_id": { + "name": "reader_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mail": { + "name": "mail", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "introduce": { + "name": "introduce", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "last_login_ip": { + "name": "last_login_ip", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "last_login_time": { + "name": "last_login_time", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "social_ids": { + "name": "social_ids", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "owner_profiles_reader_id_uniq": { + "name": "owner_profiles_reader_id_uniq", + "columns": [ + { + "expression": "reader_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "owner_profiles_reader_id_readers_id_fk": { + "name": "owner_profiles_reader_id_readers_id_fk", + "tableFrom": "owner_profiles", + "tableTo": "readers", + "columnsFrom": [ + "reader_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.passkeys": { + "name": "passkeys", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "credential_id": { + "name": "credential_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "public_key": { + "name": "public_key", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "counter": { + "name": "counter", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "device_type": { + "name": "device_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "backed_up": { + "name": "backed_up", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "transports": { + "name": "transports", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "aaguid": { + "name": "aaguid", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "passkeys_credential_id_uniq": { + "name": "passkeys_credential_id_uniq", + "columns": [ + { + "expression": "credential_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "passkeys_user_id_idx": { + "name": "passkeys_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "passkeys_user_id_readers_id_fk": { + "name": "passkeys_user_id_readers_id_fk", + "tableFrom": "passkeys", + "tableTo": "readers", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.readers": { + "name": "readers", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "email_verified": { + "name": "email_verified", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "handle": { + "name": "handle", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "username": { + "name": "username", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "display_username": { + "name": "display_username", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "image": { + "name": "image", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "role": { + "name": "role", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'reader'" + }, + "banned_at": { + "name": "banned_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "ban_reason": { + "name": "ban_reason", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "readers_email_uniq": { + "name": "readers_email_uniq", + "columns": [ + { + "expression": "email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "where": "\"readers\".\"email\" is not null", + "concurrently": false, + "method": "btree", + "with": {} + }, + "readers_username_uniq": { + "name": "readers_username_uniq", + "columns": [ + { + "expression": "username", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "where": "\"readers\".\"username\" is not null", + "concurrently": false, + "method": "btree", + "with": {} + }, + "readers_role_idx": { + "name": "readers_role_idx", + "columns": [ + { + "expression": "role", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sessions": { + "name": "sessions", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "token": { + "name": "token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "ip_address": { + "name": "ip_address", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "user_agent": { + "name": "user_agent", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "sessions_token_uniq": { + "name": "sessions_token_uniq", + "columns": [ + { + "expression": "token", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "sessions_user_id_idx": { + "name": "sessions_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "sessions_user_id_readers_id_fk": { + "name": "sessions_user_id_readers_id_fk", + "tableFrom": "sessions", + "tableTo": "readers", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.verifications": { + "name": "verifications", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "identifier": { + "name": "identifier", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "value": { + "name": "value", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "verifications_identifier_idx": { + "name": "verifications_identifier_idx", + "columns": [ + { + "expression": "identifier", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.categories": { + "name": "categories", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + } + }, + "indexes": { + "categories_name_uniq": { + "name": "categories_name_uniq", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "categories_slug_uniq": { + "name": "categories_slug_uniq", + "columns": [ + { + "expression": "slug", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.comments": { + "name": "comments", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "ref_type": { + "name": "ref_type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ref_id": { + "name": "ref_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "author": { + "name": "author", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "mail": { + "name": "mail", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "state": { + "name": "state", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "parent_comment_id": { + "name": "parent_comment_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "root_comment_id": { + "name": "root_comment_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reply_count": { + "name": "reply_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "latest_reply_at": { + "name": "latest_reply_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "is_deleted": { + "name": "is_deleted", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "ip": { + "name": "ip", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "agent": { + "name": "agent", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "pin": { + "name": "pin", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "location": { + "name": "location", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "is_whispers": { + "name": "is_whispers", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "avatar": { + "name": "avatar", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "auth_provider": { + "name": "auth_provider", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "meta": { + "name": "meta", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reader_id": { + "name": "reader_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "edited_at": { + "name": "edited_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "anchor": { + "name": "anchor", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "comments_thread_idx": { + "name": "comments_thread_idx", + "columns": [ + { + "expression": "ref_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "parent_comment_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "pin", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "comments_root_idx": { + "name": "comments_root_idx", + "columns": [ + { + "expression": "root_comment_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "comments_reader_idx": { + "name": "comments_reader_idx", + "columns": [ + { + "expression": "reader_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "comments_parent_comment_id_comments_id_fk": { + "name": "comments_parent_comment_id_comments_id_fk", + "tableFrom": "comments", + "tableTo": "comments", + "columnsFrom": [ + "parent_comment_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "comments_root_comment_id_comments_id_fk": { + "name": "comments_root_comment_id_comments_id_fk", + "tableFrom": "comments", + "tableTo": "comments", + "columnsFrom": [ + "root_comment_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "comments_reader_id_readers_id_fk": { + "name": "comments_reader_id_readers_id_fk", + "tableFrom": "comments", + "tableTo": "readers", + "columnsFrom": [ + "reader_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.draft_histories": { + "name": "draft_histories", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "draft_id": { + "name": "draft_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "version": { + "name": "version", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content_format": { + "name": "content_format", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type_specific_data": { + "name": "type_specific_data", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "saved_at": { + "name": "saved_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "is_full_snapshot": { + "name": "is_full_snapshot", + "type": "boolean", + "primaryKey": false, + "notNull": true + }, + "ref_version": { + "name": "ref_version", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "base_version": { + "name": "base_version", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "draft_histories_draft_version_uniq": { + "name": "draft_histories_draft_version_uniq", + "columns": [ + { + "expression": "draft_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "version", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "draft_histories_draft_id_drafts_id_fk": { + "name": "draft_histories_draft_id_drafts_id_fk", + "tableFrom": "draft_histories", + "tableTo": "drafts", + "columnsFrom": [ + "draft_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.drafts": { + "name": "drafts", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "ref_type": { + "name": "ref_type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ref_id": { + "name": "ref_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content_format": { + "name": "content_format", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "images": { + "name": "images", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "meta": { + "name": "meta", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "type_specific_data": { + "name": "type_specific_data", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "history": { + "name": "history", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "version": { + "name": "version", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "published_version": { + "name": "published_version", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "drafts_ref_idx": { + "name": "drafts_ref_idx", + "columns": [ + { + "expression": "ref_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"drafts\".\"ref_id\" is not null", + "concurrently": false, + "method": "btree", + "with": {} + }, + "drafts_updated_at_idx": { + "name": "drafts_updated_at_idx", + "columns": [ + { + "expression": "updated_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.notes": { + "name": "notes", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "nid": { + "name": "nid", + "type": "integer", + "primaryKey": false, + "notNull": true, + "identity": { + "type": "byDefault", + "name": "notes_nid_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content_format": { + "name": "content_format", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "images": { + "name": "images", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "meta": { + "name": "meta", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "is_published": { + "name": "is_published", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "password": { + "name": "password", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "public_at": { + "name": "public_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "mood": { + "name": "mood", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "weather": { + "name": "weather", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "bookmark": { + "name": "bookmark", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "coordinates": { + "name": "coordinates", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "location": { + "name": "location", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "read_count": { + "name": "read_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "like_count": { + "name": "like_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "topic_id": { + "name": "topic_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "modified_at": { + "name": "modified_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "notes_nid_uniq": { + "name": "notes_nid_uniq", + "columns": [ + { + "expression": "nid", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "notes_slug_uniq": { + "name": "notes_slug_uniq", + "columns": [ + { + "expression": "slug", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "where": "\"notes\".\"slug\" is not null", + "concurrently": false, + "method": "btree", + "with": {} + }, + "notes_nid_desc_idx": { + "name": "notes_nid_desc_idx", + "columns": [ + { + "expression": "nid", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "notes_modified_at_idx": { + "name": "notes_modified_at_idx", + "columns": [ + { + "expression": "modified_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "notes_created_at_idx": { + "name": "notes_created_at_idx", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "notes_topic_id_idx": { + "name": "notes_topic_id_idx", + "columns": [ + { + "expression": "topic_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "notes_published_public_created_idx": { + "name": "notes_published_public_created_idx", + "columns": [ + { + "expression": "is_published", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + }, + { + "expression": "public_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": true, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "notes_topic_id_topics_id_fk": { + "name": "notes_topic_id_topics_id_fk", + "tableFrom": "notes", + "tableTo": "topics", + "columnsFrom": [ + "topic_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.pages": { + "name": "pages", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "subtitle": { + "name": "subtitle", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content_format": { + "name": "content_format", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "images": { + "name": "images", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "meta": { + "name": "meta", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "order": { + "name": "order", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "modified_at": { + "name": "modified_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "pages_slug_uniq": { + "name": "pages_slug_uniq", + "columns": [ + { + "expression": "slug", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "pages_order_idx": { + "name": "pages_order_idx", + "columns": [ + { + "expression": "order", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.post_related_posts": { + "name": "post_related_posts", + "schema": "", + "columns": { + "post_id": { + "name": "post_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "related_post_id": { + "name": "related_post_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "position": { + "name": "position", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + } + }, + "indexes": { + "post_related_posts_pk": { + "name": "post_related_posts_pk", + "columns": [ + { + "expression": "post_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "related_post_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "post_related_posts_related_idx": { + "name": "post_related_posts_related_idx", + "columns": [ + { + "expression": "related_post_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "post_related_posts_post_id_posts_id_fk": { + "name": "post_related_posts_post_id_posts_id_fk", + "tableFrom": "post_related_posts", + "tableTo": "posts", + "columnsFrom": [ + "post_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "post_related_posts_related_post_id_posts_id_fk": { + "name": "post_related_posts_related_post_id_posts_id_fk", + "tableFrom": "post_related_posts", + "tableTo": "posts", + "columnsFrom": [ + "related_post_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.posts": { + "name": "posts", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content_format": { + "name": "content_format", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "summary": { + "name": "summary", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "images": { + "name": "images", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "meta": { + "name": "meta", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "tags": { + "name": "tags", + "type": "text[]", + "primaryKey": false, + "notNull": true, + "default": "'{}'::text[]" + }, + "modified_at": { + "name": "modified_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "category_id": { + "name": "category_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "copyright": { + "name": "copyright", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "is_published": { + "name": "is_published", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "read_count": { + "name": "read_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "like_count": { + "name": "like_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "pin_at": { + "name": "pin_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "pin_order": { + "name": "pin_order", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "posts_slug_uniq": { + "name": "posts_slug_uniq", + "columns": [ + { + "expression": "slug", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "posts_modified_at_idx": { + "name": "posts_modified_at_idx", + "columns": [ + { + "expression": "modified_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "posts_created_at_idx": { + "name": "posts_created_at_idx", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "posts_category_id_idx": { + "name": "posts_category_id_idx", + "columns": [ + { + "expression": "category_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "posts_published_created_at_idx": { + "name": "posts_published_created_at_idx", + "columns": [ + { + "expression": "is_published", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "pin_at", + "isExpression": false, + "asc": false, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": true, + "method": "btree", + "with": {} + }, + "posts_category_published_created_idx": { + "name": "posts_category_published_created_idx", + "columns": [ + { + "expression": "category_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_published", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "pin_at", + "isExpression": false, + "asc": false, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": true, + "method": "btree", + "with": {} + }, + "posts_tags_gin_idx": { + "name": "posts_tags_gin_idx", + "columns": [ + { + "expression": "tags", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": true, + "method": "gin", + "with": {} + } + }, + "foreignKeys": { + "posts_category_id_categories_id_fk": { + "name": "posts_category_id_categories_id_fk", + "tableFrom": "posts", + "tableTo": "categories", + "columnsFrom": [ + "category_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "restrict", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.recentlies": { + "name": "recentlies", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "ref_type": { + "name": "ref_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "ref_id": { + "name": "ref_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "comments_index": { + "name": "comments_index", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "allow_comment": { + "name": "allow_comment", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "modified_at": { + "name": "modified_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "up": { + "name": "up", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "down": { + "name": "down", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + } + }, + "indexes": { + "recentlies_ref_idx": { + "name": "recentlies_ref_idx", + "columns": [ + { + "expression": "ref_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "recentlies_created_at_idx": { + "name": "recentlies_created_at_idx", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.topics": { + "name": "topics", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "introduce": { + "name": "introduce", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "icon": { + "name": "icon", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "topics_name_uniq": { + "name": "topics_name_uniq", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "topics_slug_uniq": { + "name": "topics_slug_uniq", + "columns": [ + { + "expression": "slug", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.enrichment_cache": { + "name": "enrichment_cache", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "provider": { + "name": "provider", + "type": "varchar(64)", + "primaryKey": false, + "notNull": true + }, + "external_id": { + "name": "external_id", + "type": "varchar(256)", + "primaryKey": false, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "locale": { + "name": "locale", + "type": "varchar(8)", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "normalized": { + "name": "normalized", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "raw": { + "name": "raw", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "fetched_at": { + "name": "fetched_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "failure_count": { + "name": "failure_count", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "last_error": { + "name": "last_error", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "enrichment_provider_external_id_locale_uniq": { + "name": "enrichment_provider_external_id_locale_uniq", + "columns": [ + { + "expression": "provider", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "external_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "locale", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "enrichment_expires_at_idx": { + "name": "enrichment_expires_at_idx", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.enrichment_captures": { + "name": "enrichment_captures", + "schema": "", + "columns": { + "enrichment_id": { + "name": "enrichment_id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "object_key": { + "name": "object_key", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "bytes": { + "name": "bytes", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "width": { + "name": "width", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "height": { + "name": "height", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "thumbhash": { + "name": "thumbhash", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "palette": { + "name": "palette", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_accessed_at": { + "name": "last_accessed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "enrichment_captures_lru_idx": { + "name": "enrichment_captures_lru_idx", + "columns": [ + { + "expression": "last_accessed_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "enrichment_captures_enrichment_id_enrichment_cache_id_fk": { + "name": "enrichment_captures_enrichment_id_enrichment_cache_id_fk", + "tableFrom": "enrichment_captures", + "tableTo": "enrichment_cache", + "columnsFrom": [ + "enrichment_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public._app_migrations": { + "name": "_app_migrations", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "applied_at": { + "name": "applied_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.auth_id_map": { + "name": "auth_id_map", + "schema": "", + "columns": { + "collection": { + "name": "collection", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mongo_id": { + "name": "mongo_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "pg_id": { + "name": "pg_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "auth_id_map_collection_mongo_uniq": { + "name": "auth_id_map_collection_mongo_uniq", + "columns": [ + { + "expression": "collection", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "mongo_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "auth_id_map_collection_pg_uniq": { + "name": "auth_id_map_collection_pg_uniq", + "columns": [ + { + "expression": "collection", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "pg_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.data_migration_runs": { + "name": "data_migration_runs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "started_at": { + "name": "started_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "finished_at": { + "name": "finished_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.mongo_id_map": { + "name": "mongo_id_map", + "schema": "", + "columns": { + "collection": { + "name": "collection", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mongo_id": { + "name": "mongo_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "snowflake_id": { + "name": "snowflake_id", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "mongo_id_map_pk": { + "name": "mongo_id_map_pk", + "columns": [ + { + "expression": "collection", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "mongo_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "mongo_id_map_snowflake_uniq": { + "name": "mongo_id_map_snowflake_uniq", + "columns": [ + { + "expression": "snowflake_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.schema_migrations": { + "name": "schema_migrations", + "schema": "", + "columns": { + "name": { + "name": "name", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "applied_at": { + "name": "applied_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.activities": { + "name": "activities", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "type": { + "name": "type", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "payload": { + "name": "payload", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "activities_created_at_idx": { + "name": "activities_created_at_idx", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.analyzes": { + "name": "analyzes", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "timestamp": { + "name": "timestamp", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "ip": { + "name": "ip", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "ua": { + "name": "ua", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "country": { + "name": "country", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "path": { + "name": "path", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "referer": { + "name": "referer", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "analyzes_timestamp_idx": { + "name": "analyzes_timestamp_idx", + "columns": [ + { + "expression": "timestamp", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "analyzes_timestamp_path_idx": { + "name": "analyzes_timestamp_path_idx", + "columns": [ + { + "expression": "timestamp", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "path", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "analyzes_timestamp_referer_idx": { + "name": "analyzes_timestamp_referer_idx", + "columns": [ + { + "expression": "timestamp", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "referer", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "analyzes_timestamp_ip_idx": { + "name": "analyzes_timestamp_ip_idx", + "columns": [ + { + "expression": "timestamp", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "ip", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.file_references": { + "name": "file_references", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "file_url": { + "name": "file_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "file_name": { + "name": "file_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ref_id": { + "name": "ref_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "ref_type": { + "name": "ref_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "s3_object_key": { + "name": "s3_object_key", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reader_id": { + "name": "reader_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "uploaded_by": { + "name": "uploaded_by", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "mime_type": { + "name": "mime_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "byte_size": { + "name": "byte_size", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "detached_at": { + "name": "detached_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "file_references_file_url_idx": { + "name": "file_references_file_url_idx", + "columns": [ + { + "expression": "file_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "file_references_ref_idx": { + "name": "file_references_ref_idx", + "columns": [ + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "ref_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "file_references_status_created_idx": { + "name": "file_references_status_created_idx", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "file_references_reader_status_created_idx": { + "name": "file_references_reader_status_created_idx", + "columns": [ + { + "expression": "reader_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "file_references_status_detached_idx": { + "name": "file_references_status_detached_idx", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "detached_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "file_references_reader_id_readers_id_fk": { + "name": "file_references_reader_id_readers_id_fk", + "tableFrom": "file_references", + "tableTo": "readers", + "columnsFrom": [ + "reader_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "set null", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.links": { + "name": "links", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "avatar": { + "name": "avatar", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "state": { + "name": "state", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "links_name_uniq": { + "name": "links_name_uniq", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "links_url_uniq": { + "name": "links_url_uniq", + "columns": [ + { + "expression": "url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.meta_presets": { + "name": "meta_presets", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "content_type": { + "name": "content_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "fields": { + "name": "fields", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + } + }, + "indexes": { + "meta_presets_name_uniq": { + "name": "meta_presets_name_uniq", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.options": { + "name": "options", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "value": { + "name": "value", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "options_name_uniq": { + "name": "options_name_uniq", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.poll_vote_options": { + "name": "poll_vote_options", + "schema": "", + "columns": { + "vote_id": { + "name": "vote_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "option_id": { + "name": "option_id", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "poll_vote_options_pk": { + "name": "poll_vote_options_pk", + "columns": [ + { + "expression": "vote_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "option_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "poll_vote_options_option_idx": { + "name": "poll_vote_options_option_idx", + "columns": [ + { + "expression": "option_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "poll_vote_options_vote_id_poll_votes_id_fk": { + "name": "poll_vote_options_vote_id_poll_votes_id_fk", + "tableFrom": "poll_vote_options", + "tableTo": "poll_votes", + "columnsFrom": [ + "vote_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.poll_votes": { + "name": "poll_votes", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "poll_id": { + "name": "poll_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "voter_fingerprint": { + "name": "voter_fingerprint", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "poll_votes_poll_voter_uniq": { + "name": "poll_votes_poll_voter_uniq", + "columns": [ + { + "expression": "poll_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "voter_fingerprint", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "poll_votes_poll_id_idx": { + "name": "poll_votes_poll_id_idx", + "columns": [ + { + "expression": "poll_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.projects": { + "name": "projects", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "preview_url": { + "name": "preview_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "doc_url": { + "name": "doc_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "project_url": { + "name": "project_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "images": { + "name": "images", + "type": "text[]", + "primaryKey": false, + "notNull": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "avatar": { + "name": "avatar", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "projects_name_uniq": { + "name": "projects_name_uniq", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.says": { + "name": "says", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "text": { + "name": "text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "source": { + "name": "source", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "author": { + "name": "author", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "says_created_at_idx": { + "name": "says_created_at_idx", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.serverless_logs": { + "name": "serverless_logs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "function_id": { + "name": "function_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "reference": { + "name": "reference", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "method": { + "name": "method", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "ip": { + "name": "ip", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "execution_time": { + "name": "execution_time", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "logs": { + "name": "logs", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "error": { + "name": "error", + "type": "jsonb", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "serverless_logs_created_at_idx": { + "name": "serverless_logs_created_at_idx", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "serverless_logs_function_idx": { + "name": "serverless_logs_function_idx", + "columns": [ + { + "expression": "function_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "serverless_logs_reference_idx": { + "name": "serverless_logs_reference_idx", + "columns": [ + { + "expression": "reference", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.serverless_storages": { + "name": "serverless_storages", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "namespace": { + "name": "namespace", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "key": { + "name": "key", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "value": { + "name": "value", + "type": "jsonb", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "serverless_storages_ns_key_uniq": { + "name": "serverless_storages_ns_key_uniq", + "columns": [ + { + "expression": "namespace", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.slug_trackers": { + "name": "slug_trackers", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "target_id": { + "name": "target_id", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "slug_trackers_type_target_idx": { + "name": "slug_trackers_type_target_idx", + "columns": [ + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "target_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "slug_trackers_slug_type_idx": { + "name": "slug_trackers_slug_type_idx", + "columns": [ + { + "expression": "slug", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.snippets": { + "name": "snippets", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "private": { + "name": "private", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "raw": { + "name": "raw", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "reference": { + "name": "reference", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'root'" + }, + "comment": { + "name": "comment", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "metatype": { + "name": "metatype", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "schema": { + "name": "schema", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "method": { + "name": "method", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "custom_path": { + "name": "custom_path", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "secret": { + "name": "secret", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "enable": { + "name": "enable", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "built_in": { + "name": "built_in", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "compiled_code": { + "name": "compiled_code", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "snippets_name_reference_idx": { + "name": "snippets_name_reference_idx", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "reference", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "snippets_type_idx": { + "name": "snippets_type_idx", + "columns": [ + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "snippets_custom_path_uniq": { + "name": "snippets_custom_path_uniq", + "columns": [ + { + "expression": "custom_path", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "where": "\"snippets\".\"custom_path\" is not null", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.subscribes": { + "name": "subscribes", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "cancel_token": { + "name": "cancel_token", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "subscribe": { + "name": "subscribe", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "verified": { + "name": "verified", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": { + "subscribes_email_uniq": { + "name": "subscribes_email_uniq", + "columns": [ + { + "expression": "email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "subscribes_cancel_token_uniq": { + "name": "subscribes_cancel_token_uniq", + "columns": [ + { + "expression": "cancel_token", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.webhook_events": { + "name": "webhook_events", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "timestamp": { + "name": "timestamp", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "headers": { + "name": "headers", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "payload": { + "name": "payload", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "event": { + "name": "event", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "response": { + "name": "response", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "success": { + "name": "success", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "hook_id": { + "name": "hook_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + } + }, + "indexes": { + "webhook_events_hook_id_idx": { + "name": "webhook_events_hook_id_idx", + "columns": [ + { + "expression": "hook_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "webhook_events_timestamp_idx": { + "name": "webhook_events_timestamp_idx", + "columns": [ + { + "expression": "timestamp", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "webhook_events_hook_id_webhooks_id_fk": { + "name": "webhook_events_hook_id_webhooks_id_fk", + "tableFrom": "webhook_events", + "tableTo": "webhooks", + "columnsFrom": [ + "hook_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.webhooks": { + "name": "webhooks", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "timestamp": { + "name": "timestamp", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "payload_url": { + "name": "payload_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "events": { + "name": "events", + "type": "text[]", + "primaryKey": false, + "notNull": true + }, + "enabled": { + "name": "enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "secret": { + "name": "secret", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "scope": { + "name": "scope", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "webhooks_enabled_idx": { + "name": "webhooks_enabled_idx", + "columns": [ + { + "expression": "enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.search_documents": { + "name": "search_documents", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "ref_type": { + "name": "ref_type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ref_id": { + "name": "ref_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "lang": { + "name": "lang", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "source_hash": { + "name": "source_hash", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "''" + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "search_text": { + "name": "search_text", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "terms": { + "name": "terms", + "type": "text[]", + "primaryKey": false, + "notNull": true, + "default": "'{}'::text[]" + }, + "title_term_freq": { + "name": "title_term_freq", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "body_term_freq": { + "name": "body_term_freq", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "title_length": { + "name": "title_length", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "body_length": { + "name": "body_length", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "nid": { + "name": "nid", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "is_published": { + "name": "is_published", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "public_at": { + "name": "public_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "has_password": { + "name": "has_password", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "modified_at": { + "name": "modified_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "search_documents_ref_lang_uniq": { + "name": "search_documents_ref_lang_uniq", + "columns": [ + { + "expression": "ref_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "ref_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "lang", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "search_documents_published_idx": { + "name": "search_documents_published_idx", + "columns": [ + { + "expression": "is_published", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "public_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "search_documents_lang_idx": { + "name": "search_documents_lang_idx", + "columns": [ + { + "expression": "lang", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/apps/core/src/database/migrations/meta/_journal.json b/apps/core/src/database/migrations/meta/_journal.json index 290b58e99dc..5df81c44396 100644 --- a/apps/core/src/database/migrations/meta/_journal.json +++ b/apps/core/src/database/migrations/meta/_journal.json @@ -120,6 +120,13 @@ "when": 1779990739617, "tag": "0016_milky_mystique", "breakpoints": true + }, + { + "idx": 17, + "version": "7", + "when": 1780047030233, + "tag": "0017_ai_echo_system", + "breakpoints": true } ] } \ No newline at end of file diff --git a/apps/core/src/modules/ai/ai-echo/ai-echo.constants.ts b/apps/core/src/modules/ai/ai-echo/ai-echo.constants.ts new file mode 100644 index 00000000000..a33f1b8b40e --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/ai-echo.constants.ts @@ -0,0 +1,11 @@ +export const ECHO_SCENARIO = Symbol('ECHO_SCENARIO') + +export const ECHO_DEFAULTS = { + dailyQuota: 200, + retrievalTopK: 5, + retrievalMinSimilarity: 0.72, + exemplarsCount: 4, + upstreamMessageMaxLen: 1000, +} as const + +export const ECHO_QUOTA_REDIS_KEY_PREFIX = 'ai-echo:quota:' diff --git a/apps/core/src/modules/ai/ai-echo/ai-echo.controller.ts b/apps/core/src/modules/ai/ai-echo/ai-echo.controller.ts new file mode 100644 index 00000000000..2614d2b76b5 --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/ai-echo.controller.ts @@ -0,0 +1,134 @@ +import { + Body, + Delete, + Get, + HttpCode, + Param, + Post, + Put, + Query, +} from '@nestjs/common' + +import { ApiController } from '~/common/decorators/api-controller.decorator' +import { Auth } from '~/common/decorators/auth.decorator' +import { CurrentUser } from '~/common/decorators/current-user.decorator' +import { withMeta } from '~/common/response/envelope.types' +import { MetaObjectBuilder } from '~/common/response/meta-builder' +import type { SessionUser } from '~/modules/auth/auth.types' +import { EntityIdDto } from '~/shared/dto/id.dto' + +import { + AdminListEchoQueryDto, + EditEchoDto, + RegenerateEchoDto, + SubjectParamsDto, +} from './ai-echo.schema' +import { AiEchoService } from './ai-echo.service' +import type { AiEcho } from './ai-echo.types' +import { + type AiEchoAdminView, + type AiEchoPublicView, + AiEchoViews, +} from './ai-echo.views' + +@ApiController('ai-echo') +export class AiEchoController { + constructor(private readonly service: AiEchoService) {} + + @Get('/by-subject/:subjectType/:subjectId') + async listBySubject( + @Param() params: SubjectParamsDto, + @Query('personaKey') personaKey?: string, + @Query('scenarioKey') scenarioKey?: string, + ) { + const rows = await this.service.listPublicBySubject( + scenarioKey ?? params.subjectType, + params.subjectType, + params.subjectId, + personaKey, + ) + return rows.map((row) => this.toPublicView(row)) + } + + @Get('/') + @Auth() + async adminList(@Query() query: AdminListEchoQueryDto) { + const result = await this.service.adminList(query) + return withMeta( + result.data.map((row) => this.toAdminView(row)), + new MetaObjectBuilder().pagination(result.pagination).build(), + ) + } + + @Post('/regenerate/:subjectType/:subjectId') + @Auth() + async regenerate( + @Param() params: SubjectParamsDto, + @Body() body: RegenerateEchoDto, + @Query('scenarioKey') scenarioKey?: string, + ) { + return this.service.regenerate( + params.subjectType, + params.subjectId, + body.personaKey, + body.force ?? false, + scenarioKey, + ) + } + + @Put('/:id') + @Auth() + async edit( + @Param() params: EntityIdDto, + @Body() body: EditEchoDto, + @CurrentUser() user: SessionUser, + ) { + const row = await this.service.edit(params.id, body.content, user.id) + return this.toAdminView(row) + } + + @Delete('/:id') + @Auth() + @HttpCode(204) + async delete(@Param() params: EntityIdDto) { + await this.service.softDelete(params.id) + } + + private toPublicView(row: AiEcho): AiEchoPublicView { + return AiEchoViews.public.parse({ + id: row.id, + scenarioKey: row.scenarioKey, + subjectType: row.subjectType, + subjectId: row.subjectId, + personaKey: row.personaKey, + content: row.content, + status: row.status, + generatedAt: row.generatedAt, + editedAt: row.editedAt, + metadata: { + profileRefreshedAt: row.metadata.profileRefreshedAt, + retrievalIds: row.metadata.retrievalIds, + memoryIds: row.metadata.memoryIds, + }, + }) + } + + private toAdminView(row: AiEcho): AiEchoAdminView { + return AiEchoViews.admin.parse({ + id: row.id, + scenarioKey: row.scenarioKey, + subjectType: row.subjectType, + subjectId: row.subjectId, + personaKey: row.personaKey, + content: row.content, + status: row.status, + model: row.model, + metadata: row.metadata, + generatedAt: row.generatedAt, + editedAt: row.editedAt, + editedBy: row.editedBy, + createdAt: row.createdAt, + updatedAt: row.updatedAt, + }) + } +} diff --git a/apps/core/src/modules/ai/ai-echo/ai-echo.module.ts b/apps/core/src/modules/ai/ai-echo/ai-echo.module.ts new file mode 100644 index 00000000000..56a1fdd0dc8 --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/ai-echo.module.ts @@ -0,0 +1,31 @@ +import { forwardRef, Module } from '@nestjs/common' + +import { AiModule } from '../ai.module' +import { AiEmbeddingsModule } from '../ai-embeddings/ai-embeddings.module' +import { AiMemoryModule } from '../ai-memory/ai-memory.module' +import { AiPersonaModule } from '../ai-persona/ai-persona.module' +import { AiTaskModule } from '../ai-task/ai-task.module' +import { AiEchoController } from './ai-echo.controller' +import { AiEchoRepository } from './ai-echo.repository' +import { AiEchoService } from './ai-echo.service' +import { EchoScenarioRegistry } from './echo-scenario.registry' +import { EchoGenerateTaskProcessor } from './tasks/echo-generate.processor' + +@Module({ + imports: [ + AiTaskModule, + AiEmbeddingsModule, + AiPersonaModule, + AiMemoryModule, + forwardRef(() => AiModule), + ], + controllers: [AiEchoController], + providers: [ + AiEchoRepository, + AiEchoService, + EchoScenarioRegistry, + EchoGenerateTaskProcessor, + ], + exports: [AiEchoService, AiEchoRepository, EchoScenarioRegistry], +}) +export class AiEchoModule {} diff --git a/apps/core/src/modules/ai/ai-echo/ai-echo.repository.ts b/apps/core/src/modules/ai/ai-echo/ai-echo.repository.ts new file mode 100644 index 00000000000..512cf13fb14 --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/ai-echo.repository.ts @@ -0,0 +1,197 @@ +import { Inject, Injectable } from '@nestjs/common' +import { and, desc, eq, type SQL, sql } from 'drizzle-orm' + +import { PG_DB_TOKEN } from '~/constants/system.constant' +import { aiEchoes } from '~/database/schema' +import { + BaseRepository, + type PaginationResult, + toEntityId, +} from '~/processors/database/base.repository' +import type { AppDatabase } from '~/processors/database/postgres.provider' +import { type EntityId, parseEntityId } from '~/shared/id/entity-id' +import { SnowflakeService } from '~/shared/id/snowflake.service' + +import type { + AiEcho, + AiEchoCreateInput, + AiEchoListFilters, + AiEchoMetadata, + AiEchoStatus, + AiEchoUpdateInput, +} from './ai-echo.types' + +type AiEchoRow = typeof aiEchoes.$inferSelect + +const mapRow = (row: AiEchoRow): AiEcho => ({ + id: toEntityId(row.id) as EntityId, + scenarioKey: row.scenarioKey, + subjectType: row.subjectType, + subjectId: toEntityId(row.subjectId) as EntityId, + personaKey: row.personaKey, + content: row.content, + status: row.status as AiEchoStatus, + model: row.model, + metadata: (row.metadata ?? {}) as AiEchoMetadata, + generatedAt: row.generatedAt, + editedAt: row.editedAt, + editedBy: row.editedBy ? (toEntityId(row.editedBy) as EntityId) : null, + createdAt: row.createdAt, + updatedAt: row.updatedAt, +}) + +@Injectable() +export class AiEchoRepository extends BaseRepository { + constructor( + @Inject(PG_DB_TOKEN) db: AppDatabase, + private readonly snowflake: SnowflakeService, + ) { + super(db) + } + + async findById(id: EntityId | string): Promise { + const [row] = await this.db + .select() + .from(aiEchoes) + .where(eq(aiEchoes.id, parseEntityId(id))) + .limit(1) + return row ? mapRow(row) : null + } + + async findOne(criteria: { + scenarioKey: string + subjectType: string + subjectId: string + personaKey: string + }): Promise { + const [row] = await this.db + .select() + .from(aiEchoes) + .where( + and( + eq(aiEchoes.scenarioKey, criteria.scenarioKey), + eq(aiEchoes.subjectType, criteria.subjectType), + eq(aiEchoes.subjectId, parseEntityId(criteria.subjectId)), + eq(aiEchoes.personaKey, criteria.personaKey), + )!, + ) + .orderBy(desc(aiEchoes.createdAt)) + .limit(1) + return row ? mapRow(row) : null + } + + async findAllBySubject( + scenarioKey: string, + subjectType: string, + subjectId: EntityId | string, + ): Promise { + const rows = await this.db + .select() + .from(aiEchoes) + .where( + and( + eq(aiEchoes.scenarioKey, scenarioKey), + eq(aiEchoes.subjectType, subjectType), + eq(aiEchoes.subjectId, parseEntityId(subjectId)), + )!, + ) + .orderBy(desc(aiEchoes.createdAt)) + return rows.map(mapRow) + } + + async findAdmin( + filters: AiEchoListFilters, + page = 1, + size = 20, + ): Promise> { + page = Math.max(1, page) + size = Math.min(100, Math.max(1, size)) + const offset = (page - 1) * size + const where = this.buildListWhere(filters) + const [rows, [{ count }]] = await Promise.all([ + this.db + .select() + .from(aiEchoes) + .where(where) + .orderBy(desc(aiEchoes.createdAt)) + .limit(size) + .offset(offset), + this.db + .select({ count: sql`count(*)::int` }) + .from(aiEchoes) + .where(where), + ]) + return { + data: rows.map(mapRow), + pagination: this.paginationOf(Number(count ?? 0), page, size), + } + } + + async create(input: AiEchoCreateInput): Promise { + const id = this.snowflake.nextId() + const [row] = await this.db + .insert(aiEchoes) + .values({ + id, + scenarioKey: input.scenarioKey, + subjectType: input.subjectType, + subjectId: parseEntityId(input.subjectId), + personaKey: input.personaKey, + status: input.status, + metadata: input.metadata ?? {}, + }) + .returning() + return mapRow(row) + } + + async update( + id: EntityId | string, + patch: AiEchoUpdateInput, + ): Promise { + const update: Partial = { + updatedAt: new Date(), + } + if (patch.status !== undefined) update.status = patch.status + if (patch.content !== undefined) update.content = patch.content + if (patch.model !== undefined) update.model = patch.model + if (patch.metadata !== undefined) update.metadata = patch.metadata + if (patch.generatedAt !== undefined) update.generatedAt = patch.generatedAt + if (patch.editedAt !== undefined) update.editedAt = patch.editedAt + if (patch.editedBy !== undefined) { + update.editedBy = patch.editedBy ? parseEntityId(patch.editedBy) : null + } + + const [row] = await this.db + .update(aiEchoes) + .set(update) + .where(eq(aiEchoes.id, parseEntityId(id))) + .returning() + return row ? mapRow(row) : null + } + + async setStatus( + id: EntityId | string, + status: AiEchoStatus, + metadataPatch?: AiEchoMetadata, + ): Promise { + const existing = await this.findById(id) + if (!existing) return null + const nextMetadata = metadataPatch + ? { ...existing.metadata, ...metadataPatch } + : existing.metadata + return this.update(id, { status, metadata: nextMetadata }) + } + + private buildListWhere(filters: AiEchoListFilters): SQL | undefined { + const conds: SQL[] = [] + if (filters.scenarioKey) + conds.push(eq(aiEchoes.scenarioKey, filters.scenarioKey)) + if (filters.status) conds.push(eq(aiEchoes.status, filters.status)) + if (filters.personaKey) + conds.push(eq(aiEchoes.personaKey, filters.personaKey)) + if (filters.subjectType) + conds.push(eq(aiEchoes.subjectType, filters.subjectType)) + if (!conds.length) return undefined + return and(...conds) + } +} diff --git a/apps/core/src/modules/ai/ai-echo/ai-echo.schema.ts b/apps/core/src/modules/ai/ai-echo/ai-echo.schema.ts new file mode 100644 index 00000000000..dc4867e323f --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/ai-echo.schema.ts @@ -0,0 +1,48 @@ +import { createZodDto } from 'nestjs-zod' +import { z } from 'zod' + +const ECHO_STATUSES = [ + 'pending', + 'generating', + 'ready', + 'edited', + 'failed', + 'archived', +] as const + +export const RegenerateEchoSchema = z.object({ + personaKey: z.string().min(1), + force: z.boolean().optional().default(false), +}) + +export class RegenerateEchoDto extends createZodDto(RegenerateEchoSchema) {} + +export const EditEchoSchema = z.object({ + content: z.string().min(1).max(8000), +}) + +export class EditEchoDto extends createZodDto(EditEchoSchema) {} + +export const AdminListEchoQuerySchema = z.object({ + scenarioKey: z.string().optional(), + status: z.enum(ECHO_STATUSES).optional(), + personaKey: z.string().optional(), + subjectType: z.string().optional(), + page: z.coerce.number().int().min(1).default(1), + size: z.coerce.number().int().min(1).max(100).default(20), +}) + +export class AdminListEchoQueryDto extends createZodDto( + AdminListEchoQuerySchema, +) {} + +export const SubjectParamsSchema = z.object({ + subjectType: z.string().min(1), + subjectId: z.string().min(1), +}) + +export class SubjectParamsDto extends createZodDto(SubjectParamsSchema) {} + +export type RegenerateEchoInput = z.infer +export type EditEchoInput = z.infer +export type AdminListEchoQueryInput = z.infer diff --git a/apps/core/src/modules/ai/ai-echo/ai-echo.service.ts b/apps/core/src/modules/ai/ai-echo/ai-echo.service.ts new file mode 100644 index 00000000000..5b3a630d484 --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/ai-echo.service.ts @@ -0,0 +1,334 @@ +import { Injectable, Logger, type OnApplicationBootstrap } from '@nestjs/common' + +import { AppErrorCode, createAppException } from '~/common/errors' +import { EventManagerService } from '~/processors/helper/helper.event.service' +import { RedisService } from '~/processors/redis/redis.service' + +import { ConfigsService } from '../../configs/configs.service' +import { AiTaskService } from '../ai-task/ai-task.service' +import { + AITaskType, + type EchoGenerateTaskPayload, +} from '../ai-task/ai-task.types' +import { ECHO_DEFAULTS, ECHO_QUOTA_REDIS_KEY_PREFIX } from './ai-echo.constants' +import { AiEchoRepository } from './ai-echo.repository' +import type { AdminListEchoQueryInput } from './ai-echo.schema' +import type { AiEcho, AiEchoStatus } from './ai-echo.types' +import { EchoScenarioRegistry } from './echo-scenario.registry' +import type { EchoScenario } from './scenario.types' + +interface DispatchResult { + echoId: string + taskId: string | null + status: AiEchoStatus +} + +@Injectable() +export class AiEchoService implements OnApplicationBootstrap { + private readonly logger = new Logger(AiEchoService.name) + + constructor( + private readonly repository: AiEchoRepository, + private readonly aiTaskService: AiTaskService, + private readonly eventManager: EventManagerService, + private readonly configsService: ConfigsService, + private readonly redisService: RedisService, + private readonly registry: EchoScenarioRegistry, + ) {} + + onApplicationBootstrap() { + for (const scenario of this.registry.list()) { + if (!scenario.triggerEvent) continue + this.eventManager.on(scenario.triggerEvent, async (payload) => { + try { + await this.handleTrigger(scenario, payload) + } catch (error) { + this.logger.error( + `Echo trigger handler failed: scenario=${scenario.key} error=${(error as Error).message}`, + (error as Error).stack, + ) + } + }) + this.logger.log( + `Echo scenario "${scenario.key}" subscribed to ${scenario.triggerEvent}`, + ) + } + } + + getScenario(key: string): EchoScenario | undefined { + return this.registry.get(key) + } + + requireScenario(key: string): EchoScenario { + const scenario = this.registry.get(key) + if (!scenario) { + throw createAppException(AppErrorCode.AI_ECHO_SCENARIO_NOT_REGISTERED, { + scenarioKey: key, + }) + } + return scenario + } + + listScenarios(): EchoScenario[] { + return this.registry.list() + } + + private async handleTrigger( + scenario: EchoScenario, + payload: unknown, + ): Promise { + const aiConfig = await this.configsService.get('ai').catch(() => null) + if (!aiConfig?.enableEcho) return + if (!aiConfig.enableAutoGenerateEchoOnCreate) return + + const subjectId = this.extractSubjectId(payload) + if (!subjectId) { + this.logger.warn( + `Echo trigger for scenario "${scenario.key}" missing subject id`, + ) + return + } + await this.dispatch(scenario.key, scenario.key, subjectId) + } + + private extractSubjectId(payload: unknown): string | null { + if (!payload || typeof payload !== 'object') return null + const obj = payload as Record + const direct = obj.id + if (typeof direct === 'string') return direct + const nested = obj.data + if (nested && typeof nested === 'object') { + const nestedId = (nested as Record).id + if (typeof nestedId === 'string') return nestedId + } + return null + } + + async dispatch( + scenarioKey: string, + subjectType: string, + subjectId: string, + ): Promise { + const scenario = this.requireScenario(scenarioKey) + const results: DispatchResult[] = [] + for (const personaKey of scenario.defaultPersonas) { + results.push( + await this.enqueueOne(scenarioKey, subjectType, subjectId, personaKey), + ) + } + return results + } + + async regenerate( + subjectType: string, + subjectId: string, + personaKey: string, + force: boolean, + scenarioKey?: string, + ): Promise<{ echoId: string; taskId: string | null }> { + const resolvedScenarioKey = scenarioKey ?? subjectType + this.requireScenario(resolvedScenarioKey) + const existing = await this.repository.findOne({ + scenarioKey: resolvedScenarioKey, + subjectType, + subjectId, + personaKey, + }) + if ( + existing && + (existing.status === 'pending' || existing.status === 'generating') && + !force + ) { + throw createAppException(AppErrorCode.AI_ECHO_REGENERATE_IN_PROGRESS, { + echoId: existing.id, + }) + } + if (existing && force) { + await this.repository.update(existing.id, { status: 'archived' }) + } + const result = await this.enqueueOne( + resolvedScenarioKey, + subjectType, + subjectId, + personaKey, + ) + return { echoId: result.echoId, taskId: result.taskId } + } + + private async enqueueOne( + scenarioKey: string, + subjectType: string, + subjectId: string, + personaKey: string, + ): Promise { + const quotaOk = await this.consumeQuota() + const row = await this.repository.create({ + scenarioKey, + subjectType, + subjectId, + personaKey, + status: quotaOk.allowed ? 'pending' : 'failed', + metadata: quotaOk.allowed + ? {} + : { + errorCode: AppErrorCode.AI_ECHO_DAILY_QUOTA_EXCEEDED, + quota: quotaOk.quota, + used: quotaOk.used, + }, + }) + if (!quotaOk.allowed) { + this.logger.warn( + `Echo daily quota exceeded: used=${quotaOk.used} quota=${quotaOk.quota}`, + ) + return { echoId: row.id, taskId: null, status: row.status } + } + try { + const payload: EchoGenerateTaskPayload = { echoId: row.id } + const { taskId } = await this.aiTaskService.crud.createTask({ + type: AITaskType.EchoGenerate, + payload: payload as unknown as Record, + dedupKey: `echo:generate:${row.id}`, + }) + await this.repository.update(row.id, { + metadata: { ...row.metadata, taskId }, + }) + return { echoId: row.id, taskId, status: row.status } + } catch (error) { + this.logger.error( + `Failed to enqueue echo task: echoId=${row.id} error=${(error as Error).message}`, + ) + await this.repository.update(row.id, { + status: 'failed', + metadata: { + ...row.metadata, + errorCode: AppErrorCode.AI_ECHO_GENERATION_FAILED, + upstreamMessage: (error as Error).message?.slice( + 0, + ECHO_DEFAULTS.upstreamMessageMaxLen, + ), + }, + }) + return { echoId: row.id, taskId: null, status: 'failed' } + } + } + + private async consumeQuota(): Promise<{ + allowed: boolean + used: number + quota: number + }> { + let quota: number = ECHO_DEFAULTS.dailyQuota + try { + const aiConfig = await this.configsService.get('ai') + if (typeof aiConfig?.echoDailyQuota === 'number') { + quota = aiConfig.echoDailyQuota + } + } catch { + // fall through with default + } + if (quota <= 0) { + return { allowed: true, used: 0, quota: 0 } + } + const key = `${ECHO_QUOTA_REDIS_KEY_PREFIX}${this.dayKey()}` + try { + const redis = this.redisService.getClient() + const used = await redis.incr(key) + if (used === 1) { + await redis.expire(key, 60 * 60 * 26) + } + return { allowed: used <= quota, used, quota } + } catch (error) { + this.logger.warn( + `Echo quota redis unavailable; allowing request: ${(error as Error).message}`, + ) + return { allowed: true, used: 0, quota } + } + } + + private dayKey(): string { + return new Date().toISOString().slice(0, 10) + } + + async listPublicBySubject( + scenarioKey: string, + subjectType: string, + subjectId: string, + personaKey?: string, + ): Promise { + const rows = await this.repository.findAllBySubject( + scenarioKey, + subjectType, + subjectId, + ) + return rows.filter( + (row) => + (row.status === 'ready' || row.status === 'edited') && + (!personaKey || row.personaKey === personaKey), + ) + } + + async getById(id: string): Promise { + const row = await this.repository.findById(id) + if (!row) { + throw createAppException(AppErrorCode.AI_ECHO_NOT_FOUND, { id }) + } + return row + } + + async edit(id: string, content: string, actorId: string): Promise { + await this.getById(id) + const updated = await this.repository.update(id, { + status: 'edited', + content, + editedAt: new Date(), + editedBy: actorId, + }) + if (!updated) { + throw createAppException(AppErrorCode.AI_ECHO_NOT_FOUND, { id }) + } + return updated + } + + async softDelete(id: string): Promise { + const updated = await this.repository.setStatus(id, 'archived') + if (!updated) { + throw createAppException(AppErrorCode.AI_ECHO_NOT_FOUND, { id }) + } + } + + async adminList(query: AdminListEchoQueryInput) { + return this.repository.findAdmin( + { + scenarioKey: query.scenarioKey, + status: query.status, + personaKey: query.personaKey, + subjectType: query.subjectType, + }, + query.page, + query.size, + ) + } + + async handleSubjectDeleted( + subjectType: string, + subjectId: string, + ): Promise { + const scenario = this.registry.list().find((s) => s.key === subjectType) + const scenarioKey = scenario?.key ?? subjectType + const rows = await this.repository.findAllBySubject( + scenarioKey, + subjectType, + subjectId, + ) + for (const row of rows) { + if (row.status === 'pending' || row.status === 'generating') { + await this.repository.update(row.id, { + status: 'failed', + metadata: { ...row.metadata, aborted: true }, + }) + } else if (row.status === 'ready' || row.status === 'edited') { + await this.repository.update(row.id, { status: 'archived' }) + } + } + } +} diff --git a/apps/core/src/modules/ai/ai-echo/ai-echo.types.ts b/apps/core/src/modules/ai/ai-echo/ai-echo.types.ts new file mode 100644 index 00000000000..8cbe8a6d669 --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/ai-echo.types.ts @@ -0,0 +1,64 @@ +import type { EntityId } from '~/shared/id/entity-id' + +export type AiEchoStatus = + | 'pending' + | 'generating' + | 'ready' + | 'edited' + | 'failed' + | 'archived' + +export interface AiEchoMetadata { + taskId?: string + retrievalIds?: string[] + retrievalSimilarities?: number[] + memoryIds?: string[] + profileRefreshedAt?: string | null + errorCode?: string + upstreamMessage?: string + aborted?: boolean + [key: string]: unknown +} + +export interface AiEcho { + id: EntityId + scenarioKey: string + subjectType: string + subjectId: EntityId + personaKey: string + content: string | null + status: AiEchoStatus + model: string | null + metadata: AiEchoMetadata + generatedAt: Date | null + editedAt: Date | null + editedBy: EntityId | null + createdAt: Date + updatedAt: Date | null +} + +export interface AiEchoCreateInput { + scenarioKey: string + subjectType: string + subjectId: string + personaKey: string + status: AiEchoStatus + metadata?: AiEchoMetadata +} + +export interface AiEchoUpdateInput { + status?: AiEchoStatus + content?: string | null + model?: string | null + metadata?: AiEchoMetadata + generatedAt?: Date | null + editedAt?: Date | null + editedBy?: string | null +} + +export interface AiEchoListFilters { + scenarioKey?: string + status?: AiEchoStatus + personaKey?: string + subjectType?: string +} diff --git a/apps/core/src/modules/ai/ai-echo/ai-echo.views.ts b/apps/core/src/modules/ai/ai-echo/ai-echo.views.ts new file mode 100644 index 00000000000..09c3ad183a9 --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/ai-echo.views.ts @@ -0,0 +1,53 @@ +import { z } from 'zod' + +const dateOrString = z.union([z.date(), z.string()]) + +const PublicMetadataSchema = z + .object({ + profileRefreshedAt: z.union([dateOrString, z.null()]).optional(), + retrievalIds: z.array(z.string()).optional(), + memoryIds: z.array(z.string()).optional(), + }) + .strict() + +const AiEchoPublicSchema = z + .object({ + id: z.string(), + scenarioKey: z.string(), + subjectType: z.string(), + subjectId: z.string(), + personaKey: z.string(), + content: z.string().nullable(), + status: z.string(), + generatedAt: dateOrString.nullable(), + editedAt: dateOrString.nullable(), + metadata: PublicMetadataSchema, + }) + .strict() + +const AiEchoAdminSchema = z + .object({ + id: z.string(), + scenarioKey: z.string(), + subjectType: z.string(), + subjectId: z.string(), + personaKey: z.string(), + content: z.string().nullable(), + status: z.string(), + model: z.string().nullable(), + metadata: z.record(z.string(), z.unknown()), + generatedAt: dateOrString.nullable(), + editedAt: dateOrString.nullable(), + editedBy: z.string().nullable(), + createdAt: dateOrString, + updatedAt: dateOrString.nullable(), + }) + .strict() + +export const AiEchoViews = { + public: AiEchoPublicSchema, + admin: AiEchoAdminSchema, +} as const + +export type AiEchoPublicView = z.infer +export type AiEchoAdminView = z.infer diff --git a/apps/core/src/modules/ai/ai-echo/echo-prompt-builder.ts b/apps/core/src/modules/ai/ai-echo/echo-prompt-builder.ts new file mode 100644 index 00000000000..62700fbe200 --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/echo-prompt-builder.ts @@ -0,0 +1,68 @@ +import type { RecentlyRow } from '../../recently/recently.types' +import { AI_PERSONA_PROMPTS } from '../ai-persona/prompts' +import type { ChatMessage, EchoPromptInput } from './scenario.types' + +const NO_UNVERIFIED_MEMORY_RULE = + 'Do NOT claim to remember the author\'s past ("you wrote", "back when", "I remember", "我记得").' + +export function buildRecentlyEchoPrompt( + input: EchoPromptInput, +): ChatMessage[] { + const personaKey = input.persona.key + const userContent = input.subject?.content ?? '' + + if (personaKey === 'passerby') { + return [ + { role: 'system', content: AI_PERSONA_PROMPTS.passerby }, + { role: 'user', content: userContent }, + ] + } + + const sections: string[] = [AI_PERSONA_PROMPTS.innerSelf] + + if (input.profile) { + const summary = + input.profile.profileSummary?.trim() || input.profile.profile.trim() + if (summary) { + sections.push('', 'Voice summary:', summary) + } + } + + if (input.exemplars.length) { + sections.push('', 'Mimic the cadence of these passages:') + input.exemplars.forEach((ex, i) => { + sections.push(`${i + 1}. ${ex.content}`) + }) + } + + if (input.memories.length) { + sections.push('', 'Canonical facts (apply only if relevant):') + input.memories.forEach((mem) => { + sections.push(`- ${mem.content}`) + }) + } + + if (input.retrieval.length) { + sections.push( + '', + 'Relevant past thoughts (reference only if directly applicable):', + ) + input.retrieval.forEach((ret) => { + sections.push(`[${ret.sourceType}:${ret.sourceId}] ${ret.content}`) + }) + } + + const rules: string[] = ['', 'RULES:', '- Reply in 1–3 short sentences.'] + if (input.retrieval.length === 0 && input.memories.length === 0) { + rules.push(`- ${NO_UNVERIFIED_MEMORY_RULE}`) + } + rules.push("- Match the author's first-person voice.") + rules.push('- Plain markdown only; no code fences.') + + sections.push(...rules) + + return [ + { role: 'system', content: sections.join('\n') }, + { role: 'user', content: userContent }, + ] +} diff --git a/apps/core/src/modules/ai/ai-echo/echo-scenario.registry.ts b/apps/core/src/modules/ai/ai-echo/echo-scenario.registry.ts new file mode 100644 index 00000000000..0d91abc2cb5 --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/echo-scenario.registry.ts @@ -0,0 +1,31 @@ +import { Injectable, Logger } from '@nestjs/common' + +import type { EchoScenario } from './scenario.types' + +@Injectable() +export class EchoScenarioRegistry { + private readonly logger = new Logger(EchoScenarioRegistry.name) + private readonly byKey = new Map() + + register(scenario: EchoScenario): void { + if (this.byKey.has(scenario.key)) { + this.logger.warn( + `Echo scenario "${scenario.key}" already registered; overwriting`, + ) + } + this.byKey.set(scenario.key, scenario) + this.logger.log(`Echo scenario registered: ${scenario.key}`) + } + + get(key: string): EchoScenario | undefined { + return this.byKey.get(key) + } + + list(): EchoScenario[] { + return [...this.byKey.values()] + } + + clear(): void { + this.byKey.clear() + } +} diff --git a/apps/core/src/modules/ai/ai-echo/scenario.types.ts b/apps/core/src/modules/ai/ai-echo/scenario.types.ts new file mode 100644 index 00000000000..7075eec2ee9 --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/scenario.types.ts @@ -0,0 +1,37 @@ +import type { BusinessEvents } from '~/constants/business-event.constant' + +import type { RetrievalResult } from '../ai-embeddings/ai-embeddings.types' +import type { AiMemory } from '../ai-memory/ai-memory.types' +import type { + ExemplarPassage, + PersonaDefinition, + PersonaKey, + PersonaProfile, +} from '../ai-persona/ai-persona.types' + +export interface ChatMessage { + role: 'system' | 'user' | 'assistant' + content: string +} + +export interface EchoPromptInput { + subject: Subject + persona: PersonaDefinition + profile: PersonaProfile | null + retrieval: RetrievalResult[] + memories: AiMemory[] + exemplars: ExemplarPassage[] +} + +export interface EchoScenario { + readonly key: string + readonly triggerEvent?: BusinessEvents + readonly defaultPersonas: PersonaKey[] + readonly persistEchoes?: boolean + readonly emitOnReady?: BusinessEvents + + loadSubject: (subjectId: string) => Promise + extractRetrievalQuery: (subject: Subject) => string | null + buildPrompt: (input: EchoPromptInput) => ChatMessage[] + postProcess?: (content: string, subject: Subject) => string +} diff --git a/apps/core/src/modules/ai/ai-echo/tasks/echo-generate.processor.ts b/apps/core/src/modules/ai/ai-echo/tasks/echo-generate.processor.ts new file mode 100644 index 00000000000..10a90ba2a3f --- /dev/null +++ b/apps/core/src/modules/ai/ai-echo/tasks/echo-generate.processor.ts @@ -0,0 +1,256 @@ +import { + forwardRef, + Inject, + Injectable, + Logger, + type OnModuleInit, +} from '@nestjs/common' + +import { AppErrorCode } from '~/common/errors' +import { BusinessEvents, EventScope } from '~/constants/business-event.constant' +import { EventManagerService } from '~/processors/helper/helper.event.service' +import { + type TaskExecuteContext, + TaskQueueProcessor, +} from '~/processors/task-queue' + +import { ConfigsService } from '../../../configs/configs.service' +import { AiService } from '../../ai.service' +import { AiEmbeddingsService } from '../../ai-embeddings/ai-embeddings.service' +import { AiMemoryService } from '../../ai-memory/ai-memory.service' +import { AiPersonaService } from '../../ai-persona/ai-persona.service' +import { tryGetPersonaDefinition } from '../../ai-persona/persona-registry' +import { + AITaskType, + type EchoGenerateTaskPayload, +} from '../../ai-task/ai-task.types' +import { ECHO_DEFAULTS } from '../ai-echo.constants' +import { AiEchoRepository } from '../ai-echo.repository' +import type { AiEchoMetadata } from '../ai-echo.types' +import { EchoScenarioRegistry } from '../echo-scenario.registry' + +@Injectable() +export class EchoGenerateTaskProcessor implements OnModuleInit { + private readonly logger = new Logger(EchoGenerateTaskProcessor.name) + + constructor( + private readonly taskProcessor: TaskQueueProcessor, + private readonly repository: AiEchoRepository, + @Inject(forwardRef(() => AiService)) + private readonly aiService: AiService, + private readonly aiEmbeddingsService: AiEmbeddingsService, + private readonly aiMemoryService: AiMemoryService, + private readonly aiPersonaService: AiPersonaService, + private readonly configsService: ConfigsService, + private readonly eventManager: EventManagerService, + private readonly registry: EchoScenarioRegistry, + ) {} + + onModuleInit() { + this.taskProcessor.registerHandler({ + type: AITaskType.EchoGenerate, + execute: (payload, context) => this.handle(payload, context), + }) + this.logger.log('Echo generate task handler registered') + } + + async handle( + payload: EchoGenerateTaskPayload, + context: TaskExecuteContext, + ): Promise { + const { echoId } = payload + const row = await this.repository.findById(echoId) + if (!row) { + await context.appendLog('warn', `Echo not found: ${echoId}`) + return + } + if (row.status !== 'pending' && row.status !== 'generating') { + await context.appendLog( + 'info', + `Echo status=${row.status}; skipping generation`, + ) + return + } + + await this.repository.update(echoId, { status: 'generating' }) + + const scenario = this.registry.get(row.scenarioKey) + if (!scenario) { + await this.terminalFail( + echoId, + AppErrorCode.AI_ECHO_SCENARIO_NOT_REGISTERED, + `Scenario "${row.scenarioKey}" not registered`, + ) + return + } + + let subject: unknown + try { + subject = await scenario.loadSubject(row.subjectId) + } catch (error) { + await this.terminalFail( + echoId, + AppErrorCode.AI_ECHO_SUBJECT_NOT_FOUND, + (error as Error).message, + ) + return + } + if (!subject) { + await this.terminalFail( + echoId, + AppErrorCode.AI_ECHO_SUBJECT_NOT_FOUND, + `subject ${row.subjectType}:${row.subjectId} not found`, + ) + return + } + + const persona = tryGetPersonaDefinition(row.personaKey) + if (!persona) { + await this.terminalFail( + echoId, + AppErrorCode.AI_PERSONA_NOT_FOUND, + `persona "${row.personaKey}" not found`, + ) + return + } + + const aiConfig = await this.configsService.get('ai').catch(() => null) + const topK = aiConfig?.echoRetrievalTopK ?? ECHO_DEFAULTS.retrievalTopK + const minSimilarity = + aiConfig?.echoRetrievalMinSimilarity ?? + ECHO_DEFAULTS.retrievalMinSimilarity + const exemplarsCount = + aiConfig?.echoExemplarsCount ?? ECHO_DEFAULTS.exemplarsCount + + const profile = persona.needsProfile + ? await this.aiPersonaService + .getProfileOrNull(persona.key) + .catch(() => null) + : null + + const query = scenario.extractRetrievalQuery(subject) + + const retrieval = + persona.needsRetrieval && query + ? await this.aiEmbeddingsService + .search(query, { + topK, + minSimilarity, + sourceTypes: ['note', 'page'], + }) + .catch((error) => { + this.logger.warn( + `Echo retrieval failed: ${(error as Error).message}`, + ) + return [] + }) + : [] + + const memories = await this.aiMemoryService + .recall({ + scope: ['global', `persona:${persona.key}`], + query: query ?? undefined, + topK, + minSimilarity, + }) + .catch((error) => { + this.logger.warn(`Echo recall failed: ${(error as Error).message}`) + return [] + }) + + const exemplars = persona.usesExemplars + ? await this.aiPersonaService + .pickExemplars(persona.key, { count: exemplarsCount }) + .catch(() => []) + : [] + + const messages = scenario.buildPrompt({ + subject, + persona, + profile, + retrieval, + memories, + exemplars, + }) + + let runtime + try { + runtime = await this.aiService.getEchoModel() + } catch (error) { + await this.terminalFail( + echoId, + AppErrorCode.AI_ECHO_MODEL_NOT_CONFIGURED, + (error as Error).message, + ) + return + } + + let result + try { + result = await runtime.generateText({ + messages, + temperature: 0.7, + maxRetries: 2, + }) + } catch (error) { + await this.terminalFail( + echoId, + AppErrorCode.AI_ECHO_GENERATION_FAILED, + (error as Error).message, + ) + throw error + } + const finalContent = + scenario.postProcess?.(result.text, subject) ?? result.text + + const metadataPatch: AiEchoMetadata = { + ...row.metadata, + retrievalIds: retrieval.map( + (r) => `${r.sourceType}:${r.sourceId}#${r.chunkIndex}`, + ), + retrievalSimilarities: retrieval.map((r) => r.similarity), + memoryIds: memories.map((m) => m.id), + profileRefreshedAt: profile?.refreshedAt + ? profile.refreshedAt.toISOString() + : null, + } + + const saved = await this.repository.update(echoId, { + status: 'ready', + content: finalContent, + model: runtime.providerInfo.model, + generatedAt: new Date(), + metadata: metadataPatch, + }) + + if (saved && scenario.emitOnReady) { + await this.eventManager.emit( + scenario.emitOnReady as BusinessEvents, + saved, + { scope: EventScope.TO_SYSTEM_VISITOR }, + ) + } + + await context.appendLog('info', `Echo generated: ${echoId}`) + if (result.usage?.totalTokens) { + await context.incrementTokens(result.usage.totalTokens) + } + } + + private async terminalFail( + echoId: string, + code: AppErrorCode, + message?: string, + ): Promise { + const existing = await this.repository.findById(echoId) + const truncated = message?.slice(0, ECHO_DEFAULTS.upstreamMessageMaxLen) + await this.repository.update(echoId, { + status: 'failed', + metadata: { + ...existing?.metadata, + errorCode: code, + upstreamMessage: truncated, + }, + }) + } +} diff --git a/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.constants.ts b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.constants.ts new file mode 100644 index 00000000000..bc1b4235025 --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.constants.ts @@ -0,0 +1,11 @@ +export const EMBEDDINGS_DEFAULTS = { + chunkMaxTokens: 500, + chunkOverlapTokens: 50, + backfillBatchSize: 50, + defaultMinSimilarity: 0.7, + defaultTopK: 5, +} as const + +export const SUPPORTED_SOURCE_TYPES = ['post', 'note', 'page'] as const + +export type SupportedSourceType = (typeof SUPPORTED_SOURCE_TYPES)[number] diff --git a/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.controller.ts b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.controller.ts new file mode 100644 index 00000000000..4dd08f6bcfa --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.controller.ts @@ -0,0 +1,24 @@ +import { Body, Get, Post } from '@nestjs/common' + +import { ApiController } from '~/common/decorators/api-controller.decorator' +import { Auth } from '~/common/decorators/auth.decorator' + +import { BackfillDto } from './ai-embeddings.schema' +import { AiEmbeddingsService } from './ai-embeddings.service' + +@ApiController('ai-embeddings') +export class AiEmbeddingsController { + constructor(private readonly service: AiEmbeddingsService) {} + + @Post('backfill') + @Auth() + async backfill(@Body() body: BackfillDto) { + return this.service.runBackfill({ sourceTypes: body.sourceTypes }) + } + + @Get('stats') + @Auth() + async stats() { + return this.service.getStats() + } +} diff --git a/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.module.ts b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.module.ts new file mode 100644 index 00000000000..371bcaaa269 --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.module.ts @@ -0,0 +1,26 @@ +import { forwardRef, Module } from '@nestjs/common' + +import { AiModule } from '../ai.module' +import { AiTaskModule } from '../ai-task/ai-task.module' +import { AiEmbeddingsController } from './ai-embeddings.controller' +import { AiEmbeddingsRepository } from './ai-embeddings.repository' +import { AiEmbeddingsService } from './ai-embeddings.service' +import { NoteEmbeddingEventsListener } from './listeners/note-events.listener' +import { PageEmbeddingEventsListener } from './listeners/page-events.listener' +import { PostEmbeddingEventsListener } from './listeners/post-events.listener' +import { EmbedSyncTaskProcessor } from './tasks/embed-sync.processor' + +@Module({ + imports: [AiTaskModule, forwardRef(() => AiModule)], + controllers: [AiEmbeddingsController], + providers: [ + AiEmbeddingsRepository, + AiEmbeddingsService, + EmbedSyncTaskProcessor, + NoteEmbeddingEventsListener, + PageEmbeddingEventsListener, + PostEmbeddingEventsListener, + ], + exports: [AiEmbeddingsService, AiEmbeddingsRepository], +}) +export class AiEmbeddingsModule {} diff --git a/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.repository.ts b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.repository.ts new file mode 100644 index 00000000000..575d80dbe7c --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.repository.ts @@ -0,0 +1,230 @@ +import { Inject, Injectable } from '@nestjs/common' +import { and, eq, inArray, sql } from 'drizzle-orm' + +import { PG_DB_TOKEN } from '~/constants/system.constant' +import { corpusEmbeddings } from '~/database/schema' +import { + BaseRepository, + toEntityId, +} from '~/processors/database/base.repository' +import type { AppDatabase } from '~/processors/database/postgres.provider' +import { type EntityId, parseEntityId } from '~/shared/id/entity-id' +import { SnowflakeService } from '~/shared/id/snowflake.service' + +import type { + CorpusEmbeddingRow, + EmbeddingStats, + RetrievalResult, +} from './ai-embeddings.types' + +const mapRow = ( + row: typeof corpusEmbeddings.$inferSelect, +): CorpusEmbeddingRow => ({ + id: toEntityId(row.id)!, + sourceType: row.sourceType, + sourceId: toEntityId(row.sourceId)!, + chunkIndex: row.chunkIndex, + content: row.content, + contentHash: row.contentHash, + embedding: row.embedding, + embeddingModel: row.embeddingModel, + dim: row.dim, + createdAt: row.createdAt, +}) + +const vectorLiteral = (vec: number[]) => `[${vec.join(',')}]` + +export interface UpsertChunkInput { + sourceType: string + sourceId: string + chunkIndex: number + content: string + contentHash: string + embedding: number[] + embeddingModel: string + dim: number +} + +@Injectable() +export class AiEmbeddingsRepository extends BaseRepository { + constructor( + @Inject(PG_DB_TOKEN) db: AppDatabase, + private readonly snowflake: SnowflakeService, + ) { + super(db) + } + + async findBySource( + sourceType: string, + sourceId: EntityId | string, + embeddingModel: string, + ): Promise { + const rows = await this.db + .select() + .from(corpusEmbeddings) + .where( + and( + eq(corpusEmbeddings.sourceType, sourceType), + eq(corpusEmbeddings.sourceId, parseEntityId(sourceId)), + eq(corpusEmbeddings.embeddingModel, embeddingModel), + )!, + ) + return rows.map(mapRow) + } + + async deleteBySource( + sourceType: string, + sourceId: EntityId | string, + ): Promise { + const res = await this.db + .delete(corpusEmbeddings) + .where( + and( + eq(corpusEmbeddings.sourceType, sourceType), + eq(corpusEmbeddings.sourceId, parseEntityId(sourceId)), + )!, + ) + .returning({ id: corpusEmbeddings.id }) + return res.length + } + + async deleteByIndices( + sourceType: string, + sourceId: EntityId | string, + embeddingModel: string, + chunkIndices: number[], + ): Promise { + if (chunkIndices.length === 0) return 0 + const res = await this.db + .delete(corpusEmbeddings) + .where( + and( + eq(corpusEmbeddings.sourceType, sourceType), + eq(corpusEmbeddings.sourceId, parseEntityId(sourceId)), + eq(corpusEmbeddings.embeddingModel, embeddingModel), + inArray(corpusEmbeddings.chunkIndex, chunkIndices), + )!, + ) + .returning({ id: corpusEmbeddings.id }) + return res.length + } + + async upsertChunks(inputs: UpsertChunkInput[]): Promise { + if (inputs.length === 0) return 0 + const rows = inputs.map((input) => ({ + id: this.snowflake.nextId(), + sourceType: input.sourceType, + sourceId: parseEntityId(input.sourceId), + chunkIndex: input.chunkIndex, + content: input.content, + contentHash: input.contentHash, + embedding: input.embedding, + embeddingModel: input.embeddingModel, + dim: input.dim, + })) + const res = await this.db + .insert(corpusEmbeddings) + .values(rows) + .onConflictDoUpdate({ + target: [ + corpusEmbeddings.sourceType, + corpusEmbeddings.sourceId, + corpusEmbeddings.chunkIndex, + corpusEmbeddings.embeddingModel, + ], + set: { + content: sql`excluded.content`, + contentHash: sql`excluded.content_hash`, + embedding: sql`excluded.embedding`, + dim: sql`excluded.dim`, + }, + }) + .returning({ id: corpusEmbeddings.id }) + return res.length + } + + async searchByVector( + queryVector: number[], + options: { + embeddingModel: string + topK: number + sourceTypes?: string[] + }, + ): Promise { + const literal = vectorLiteral(queryVector) + const filterSourceTypes = + options.sourceTypes && options.sourceTypes.length > 0 + ? sql`AND ${corpusEmbeddings.sourceType} = ANY(${options.sourceTypes})` + : sql`` + + const rows = await this.db.execute<{ + source_type: string + source_id: string + chunk_index: number + content: string + distance: number + }>(sql` + SELECT + ${corpusEmbeddings.sourceType} AS source_type, + ${corpusEmbeddings.sourceId} AS source_id, + ${corpusEmbeddings.chunkIndex} AS chunk_index, + ${corpusEmbeddings.content} AS content, + (${corpusEmbeddings.embedding} <=> ${literal}::vector) AS distance + FROM ${corpusEmbeddings} + WHERE ${corpusEmbeddings.embeddingModel} = ${options.embeddingModel} + ${filterSourceTypes} + ORDER BY ${corpusEmbeddings.embedding} <=> ${literal}::vector + LIMIT ${options.topK} + `) + + const data = Array.isArray(rows) ? rows : (rows.rows ?? []) + return data.map((row: any) => { + const distance = Number(row.distance) + return { + sourceType: row.source_type, + sourceId: toEntityId(row.source_id)!, + chunkIndex: Number(row.chunk_index), + content: row.content, + distance, + similarity: 1 - distance, + } + }) + } + + async stats(): Promise { + const [totalRow] = await this.db + .select({ count: sql`count(*)::int` }) + .from(corpusEmbeddings) + const total = Number(totalRow?.count ?? 0) + + const byModelRows = await this.db + .select({ + model: corpusEmbeddings.embeddingModel, + dim: corpusEmbeddings.dim, + rows: sql`count(*)::int`, + }) + .from(corpusEmbeddings) + .groupBy(corpusEmbeddings.embeddingModel, corpusEmbeddings.dim) + + const bySourceRows = await this.db + .select({ + type: corpusEmbeddings.sourceType, + rows: sql`count(*)::int`, + }) + .from(corpusEmbeddings) + .groupBy(corpusEmbeddings.sourceType) + + return { + total, + byModel: byModelRows.map((r) => ({ + model: r.model, + dim: Number(r.dim), + rows: Number(r.rows), + })), + bySourceType: bySourceRows.map((r) => ({ + type: r.type, + rows: Number(r.rows), + })), + } + } +} diff --git a/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.schema.ts b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.schema.ts new file mode 100644 index 00000000000..eea6937e820 --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.schema.ts @@ -0,0 +1,10 @@ +import { createZodDto } from 'nestjs-zod' +import { z } from 'zod' + +export const BackfillSchema = z.object({ + sourceTypes: z.array(z.enum(['post', 'note', 'page'])).optional(), +}) + +export class BackfillDto extends createZodDto(BackfillSchema) {} + +export type BackfillInput = z.infer diff --git a/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.service.ts b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.service.ts new file mode 100644 index 00000000000..459c4f1b7dc --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.service.ts @@ -0,0 +1,208 @@ +import { forwardRef, Inject, Injectable, Logger } from '@nestjs/common' + +import { AppErrorCode, createAppException } from '~/common/errors' +import { PG_DB_TOKEN } from '~/constants/system.constant' +import { DatabaseService } from '~/processors/database/database.service' +import type { AppDatabase } from '~/processors/database/postgres.provider' + +import { ConfigsService } from '../../configs/configs.service' +import { AiService } from '../ai.service' +import { AIFeatureKey } from '../ai.types' +import { EMBEDDINGS_DEFAULTS } from './ai-embeddings.constants' +import { AiEmbeddingsRepository } from './ai-embeddings.repository' +import type { + EmbeddingStats, + RetrievalResult, + SearchOptions, +} from './ai-embeddings.types' +import { chunk } from './chunker' +import { + type BackfillOptions, + type BackfillSummary, + runCorpusBackfill, +} from './tasks/corpus-backfill.driver' + +interface ResolvedEmbeddingParams { + chunkMaxTokens: number + chunkOverlapTokens: number + backfillBatchSize: number + defaultMinSimilarity: number + defaultTopK: number +} + +@Injectable() +export class AiEmbeddingsService { + private readonly logger = new Logger(AiEmbeddingsService.name) + + constructor( + private readonly repository: AiEmbeddingsRepository, + private readonly configService: ConfigsService, + @Inject(forwardRef(() => AiService)) + private readonly aiService: AiService, + private readonly databaseService: DatabaseService, + @Inject(PG_DB_TOKEN) private readonly db: AppDatabase, + ) {} + + async runBackfill(options: BackfillOptions = {}): Promise { + return runCorpusBackfill(this, this.db, options) + } + + async resolveParams(): Promise { + const aiConfig = await this.configService.get('ai') + const params = aiConfig.aiEmbedding ?? {} + return { + chunkMaxTokens: + params.chunkMaxTokens ?? EMBEDDINGS_DEFAULTS.chunkMaxTokens, + chunkOverlapTokens: + params.chunkOverlapTokens ?? EMBEDDINGS_DEFAULTS.chunkOverlapTokens, + backfillBatchSize: + params.backfillBatchSize ?? EMBEDDINGS_DEFAULTS.backfillBatchSize, + defaultMinSimilarity: + params.defaultMinSimilarity ?? EMBEDDINGS_DEFAULTS.defaultMinSimilarity, + defaultTopK: params.defaultTopK ?? EMBEDDINGS_DEFAULTS.defaultTopK, + } + } + + async isEmbeddingConfigured(): Promise { + return this.aiService.hasFeatureModel(AIFeatureKey.Embedding) + } + + async embedBatch(inputs: string[]): Promise<{ + vectors: number[][] + model: string + dim: number + }> { + if (inputs.length === 0) { + return { vectors: [], model: '', dim: 0 } + } + const runtime = await this.aiService.getEmbeddingModel() + if (!runtime.embedBatch) { + throw createAppException(AppErrorCode.AI_EMBEDDING_MODEL_NOT_CONFIGURED) + } + try { + const result = await runtime.embedBatch({ inputs }) + return result + } catch (error) { + throw createAppException(AppErrorCode.AI_EMBEDDING_BATCH_FAILED, { + message: (error as Error)?.message, + }) + } + } + + async search( + query: string, + options: SearchOptions = {}, + ): Promise { + if (!query?.trim()) return [] + if (!(await this.isEmbeddingConfigured())) { + throw createAppException(AppErrorCode.AI_EMBEDDING_MODEL_NOT_CONFIGURED) + } + const params = await this.resolveParams() + const topK = options.topK ?? params.defaultTopK + const minSimilarity = options.minSimilarity ?? params.defaultMinSimilarity + + const { vectors, model } = await this.embedBatch([query]) + if (vectors.length === 0) return [] + const effectiveModel = options.model ?? model + + const results = await this.repository.searchByVector(vectors[0], { + embeddingModel: effectiveModel, + topK, + sourceTypes: options.sourceTypes, + }) + + return results.filter((r) => r.similarity >= minSimilarity) + } + + async syncSource( + sourceType: string, + sourceId: string, + op: 'upsert' | 'delete', + ): Promise<{ deleted?: number; embedded?: number }> { + if (op === 'delete') { + const deleted = await this.repository.deleteBySource(sourceType, sourceId) + return { deleted } + } + + if (!(await this.isEmbeddingConfigured())) { + this.logger.debug( + `Embedding model unconfigured; skipping sync for ${sourceType}:${sourceId}`, + ) + return {} + } + + const source = await this.databaseService.findGlobalById(sourceId) + if (!source || !source.document) return {} + if (source.type !== sourceType) return {} + + const document = source.document as { text?: string; content?: string } + const markdown = (document.text || document.content || '').trim() + + const params = await this.resolveParams() + + if (!markdown) { + const deleted = await this.repository.deleteBySource(sourceType, sourceId) + return { deleted } + } + + const chunks = chunk(markdown, { + maxTokens: params.chunkMaxTokens, + overlapTokens: params.chunkOverlapTokens, + }) + + const runtime = await this.aiService.getEmbeddingModel() + const modelId = runtime.providerInfo.model + + const existing = await this.repository.findBySource( + sourceType, + sourceId, + modelId, + ) + const existingByIndex = new Map(existing.map((e) => [e.chunkIndex, e])) + + const staleIndices = existing + .map((e) => e.chunkIndex) + .filter((i) => i >= chunks.length) + let deleted = 0 + if (staleIndices.length > 0) { + deleted = await this.repository.deleteByIndices( + sourceType, + sourceId, + modelId, + staleIndices, + ) + } + + const toEmbed = chunks.filter( + (c) => existingByIndex.get(c.index)?.contentHash !== c.hash, + ) + + if (toEmbed.length === 0) { + return { deleted, embedded: 0 } + } + + const { vectors, model, dim } = await this.embedBatch( + toEmbed.map((c) => c.content), + ) + const effectiveModel = model || modelId + + const embedded = await this.repository.upsertChunks( + toEmbed.map((c, i) => ({ + sourceType, + sourceId, + chunkIndex: c.index, + content: c.content, + contentHash: c.hash, + embedding: vectors[i], + embeddingModel: effectiveModel, + dim: dim || vectors[i].length, + })), + ) + + return { deleted, embedded } + } + + async getStats(): Promise { + return this.repository.stats() + } +} diff --git a/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.types.ts b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.types.ts new file mode 100644 index 00000000000..1d1e0fa2a7b --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/ai-embeddings.types.ts @@ -0,0 +1,46 @@ +export interface ChunkSpec { + index: number + content: string + hash: string +} + +export interface RetrievalResult { + sourceType: string + sourceId: string + chunkIndex: number + content: string + distance: number + similarity: number +} + +export interface SearchOptions { + topK?: number + minSimilarity?: number + model?: string + sourceTypes?: string[] +} + +export interface SyncOperation { + sourceType: string + sourceId: string + op: 'upsert' | 'delete' +} + +export interface CorpusEmbeddingRow { + id: string + sourceType: string + sourceId: string + chunkIndex: number + content: string + contentHash: string + embedding: number[] + embeddingModel: string + dim: number + createdAt: Date +} + +export interface EmbeddingStats { + total: number + byModel: Array<{ model: string; dim: number; rows: number }> + bySourceType: Array<{ type: string; rows: number }> +} diff --git a/apps/core/src/modules/ai/ai-embeddings/chunker.ts b/apps/core/src/modules/ai/ai-embeddings/chunker.ts new file mode 100644 index 00000000000..9c2672b4d74 --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/chunker.ts @@ -0,0 +1,124 @@ +import { createHash } from 'node:crypto' + +import type { ChunkSpec } from './ai-embeddings.types' + +export interface ChunkOptions { + maxTokens: number + overlapTokens: number +} + +const FENCED_CODE_RE = /```.*?```/gs +const PARAGRAPH_SPLIT_RE = /\n{2,}/ +const SENTENCE_SPLIT_RE = /(?<=[!.?。!?])\s+/ + +const CJK_RE = + /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/gu +const ASCII_RE = /[\x20-\x7E]/g + +function estimateTokens(text: string): number { + if (!text) return 0 + const cjk = (text.match(CJK_RE) || []).length + const ascii = (text.match(ASCII_RE) || []).length + const total = text.length + if (cjk === 0 && ascii === 0) return Math.ceil(total / 4) + const cjkTokens = cjk / 3 + const asciiTokens = ascii / 4 + const otherTokens = Math.max(0, total - cjk - ascii) / 4 + return Math.ceil(cjkTokens + asciiTokens + otherTokens) +} + +function sliceByTokenBudget(text: string, maxTokens: number): string[] { + const out: string[] = [] + if (!text) return out + const cjk = (text.match(CJK_RE) || []).length + const ascii = (text.match(ASCII_RE) || []).length + const charsPerToken = cjk >= ascii ? 3 : 4 + const window = Math.max(1, maxTokens * charsPerToken) + for (let i = 0; i < text.length; i += window) { + out.push(text.slice(i, i + window)) + } + return out +} + +function splitOversized(paragraph: string, maxTokens: number): string[] { + if (estimateTokens(paragraph) <= maxTokens) return [paragraph] + const sentences = paragraph.split(SENTENCE_SPLIT_RE).filter(Boolean) + const result: string[] = [] + for (const sentence of sentences) { + if (estimateTokens(sentence) <= maxTokens) { + result.push(sentence) + } else { + result.push(...sliceByTokenBudget(sentence, maxTokens)) + } + } + return result +} + +function tailTokens(text: string, overlapTokens: number): string { + if (overlapTokens <= 0 || !text) return '' + const cjk = (text.match(CJK_RE) || []).length + const ascii = (text.match(ASCII_RE) || []).length + const charsPerToken = cjk >= ascii ? 3 : 4 + const tailChars = Math.min(text.length, overlapTokens * charsPerToken) + return text.slice(text.length - tailChars) +} + +function normalize(content: string): string { + return content.replaceAll('\r\n', '\n').trim() +} + +function hashContent(content: string): string { + return createHash('sha256').update(normalize(content)).digest('hex') +} + +export function chunk(markdown: string, opts: ChunkOptions): ChunkSpec[] { + const stripped = (markdown || '').replaceAll(FENCED_CODE_RE, '\n\n') + const normalized = stripped.replaceAll('\r\n', '\n').trim() + if (!normalized) return [] + + const paragraphs = normalized + .split(PARAGRAPH_SPLIT_RE) + .map((p) => p.trim()) + .filter(Boolean) + + const units: string[] = [] + for (const p of paragraphs) { + units.push(...splitOversized(p, opts.maxTokens)) + } + + const packed: string[] = [] + let buffer = '' + let bufferTokens = 0 + for (const unit of units) { + const unitTokens = estimateTokens(unit) + if (!buffer) { + buffer = unit + bufferTokens = unitTokens + continue + } + if (bufferTokens + unitTokens <= opts.maxTokens) { + buffer = `${buffer}\n\n${unit}` + bufferTokens += unitTokens + } else { + packed.push(buffer) + buffer = unit + bufferTokens = unitTokens + } + } + if (buffer) packed.push(buffer) + + if (opts.overlapTokens > 0 && packed.length > 1) { + for (let i = 1; i < packed.length; i++) { + const overlap = tailTokens(packed[i - 1], opts.overlapTokens) + if (overlap) { + packed[i] = `${overlap}\n\n${packed[i]}` + } + } + } + + return packed.map((content, index) => ({ + index, + content, + hash: hashContent(content), + })) +} diff --git a/apps/core/src/modules/ai/ai-embeddings/listeners/note-events.listener.ts b/apps/core/src/modules/ai/ai-embeddings/listeners/note-events.listener.ts new file mode 100644 index 00000000000..8e9925fcf24 --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/listeners/note-events.listener.ts @@ -0,0 +1,44 @@ +import { Injectable, Logger } from '@nestjs/common' +import { OnEvent } from '@nestjs/event-emitter' + +import { BusinessEvents } from '~/constants/business-event.constant' + +import { AiTaskService } from '../../ai-task/ai-task.service' + +interface NoteEventPayload { + id: string +} + +@Injectable() +export class NoteEmbeddingEventsListener { + private readonly logger = new Logger(NoteEmbeddingEventsListener.name) + + constructor(private readonly aiTaskService: AiTaskService) {} + + @OnEvent(BusinessEvents.NOTE_CREATE) + @OnEvent(BusinessEvents.NOTE_UPDATE) + async handleUpsert(event: NoteEventPayload) { + if (!event?.id) return + await this.enqueue(event.id, 'upsert') + } + + @OnEvent(BusinessEvents.NOTE_DELETE) + async handleDelete(event: NoteEventPayload) { + if (!event?.id) return + await this.enqueue(event.id, 'delete') + } + + private async enqueue(id: string, op: 'upsert' | 'delete') { + try { + await this.aiTaskService.createEmbedSyncTask({ + sourceType: 'note', + sourceId: id, + op, + }) + } catch (error) { + this.logger.warn( + `Failed to enqueue note embed sync: id=${id} op=${op} error=${(error as Error)?.message}`, + ) + } + } +} diff --git a/apps/core/src/modules/ai/ai-embeddings/listeners/page-events.listener.ts b/apps/core/src/modules/ai/ai-embeddings/listeners/page-events.listener.ts new file mode 100644 index 00000000000..db15746be5f --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/listeners/page-events.listener.ts @@ -0,0 +1,44 @@ +import { Injectable, Logger } from '@nestjs/common' +import { OnEvent } from '@nestjs/event-emitter' + +import { BusinessEvents } from '~/constants/business-event.constant' + +import { AiTaskService } from '../../ai-task/ai-task.service' + +interface PageEventPayload { + id: string +} + +@Injectable() +export class PageEmbeddingEventsListener { + private readonly logger = new Logger(PageEmbeddingEventsListener.name) + + constructor(private readonly aiTaskService: AiTaskService) {} + + @OnEvent(BusinessEvents.PAGE_CREATE) + @OnEvent(BusinessEvents.PAGE_UPDATE) + async handleUpsert(event: PageEventPayload) { + if (!event?.id) return + await this.enqueue(event.id, 'upsert') + } + + @OnEvent(BusinessEvents.PAGE_DELETE) + async handleDelete(event: PageEventPayload) { + if (!event?.id) return + await this.enqueue(event.id, 'delete') + } + + private async enqueue(id: string, op: 'upsert' | 'delete') { + try { + await this.aiTaskService.createEmbedSyncTask({ + sourceType: 'page', + sourceId: id, + op, + }) + } catch (error) { + this.logger.warn( + `Failed to enqueue page embed sync: id=${id} op=${op} error=${(error as Error)?.message}`, + ) + } + } +} diff --git a/apps/core/src/modules/ai/ai-embeddings/listeners/post-events.listener.ts b/apps/core/src/modules/ai/ai-embeddings/listeners/post-events.listener.ts new file mode 100644 index 00000000000..7152895013e --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/listeners/post-events.listener.ts @@ -0,0 +1,44 @@ +import { Injectable, Logger } from '@nestjs/common' +import { OnEvent } from '@nestjs/event-emitter' + +import { BusinessEvents } from '~/constants/business-event.constant' + +import { AiTaskService } from '../../ai-task/ai-task.service' + +interface PostEventPayload { + id: string +} + +@Injectable() +export class PostEmbeddingEventsListener { + private readonly logger = new Logger(PostEmbeddingEventsListener.name) + + constructor(private readonly aiTaskService: AiTaskService) {} + + @OnEvent(BusinessEvents.POST_CREATE) + @OnEvent(BusinessEvents.POST_UPDATE) + async handleUpsert(event: PostEventPayload) { + if (!event?.id) return + await this.enqueue(event.id, 'upsert') + } + + @OnEvent(BusinessEvents.POST_DELETE) + async handleDelete(event: PostEventPayload) { + if (!event?.id) return + await this.enqueue(event.id, 'delete') + } + + private async enqueue(id: string, op: 'upsert' | 'delete') { + try { + await this.aiTaskService.createEmbedSyncTask({ + sourceType: 'post', + sourceId: id, + op, + }) + } catch (error) { + this.logger.warn( + `Failed to enqueue post embed sync: id=${id} op=${op} error=${(error as Error)?.message}`, + ) + } + } +} diff --git a/apps/core/src/modules/ai/ai-embeddings/tasks/corpus-backfill.driver.ts b/apps/core/src/modules/ai/ai-embeddings/tasks/corpus-backfill.driver.ts new file mode 100644 index 00000000000..124373c0809 --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/tasks/corpus-backfill.driver.ts @@ -0,0 +1,104 @@ +import { asc, gt } from 'drizzle-orm' + +import { notes, pages, posts } from '~/database/schema' +import type { AppDatabase } from '~/processors/database/postgres.provider' + +import { + EMBEDDINGS_DEFAULTS, + SUPPORTED_SOURCE_TYPES, + type SupportedSourceType, +} from '../ai-embeddings.constants' +import type { AiEmbeddingsService } from '../ai-embeddings.service' + +export interface BackfillOptions { + sourceTypes?: readonly string[] + batchSize?: number +} + +export interface BackfillSummary { + configured: boolean + sourceTypes: string[] + scanned: number + embedded: number + deleted: number + skipped: number +} + +const sourceTables = { + post: posts, + note: notes, + page: pages, +} as const + +export async function listSourceIdsAfter( + db: AppDatabase, + sourceType: SupportedSourceType, + cursor: string | null, + limit: number, +): Promise { + const table = sourceTables[sourceType] + const condition = cursor ? gt(table.id, cursor) : undefined + const rows = await db + .select({ id: table.id }) + .from(table) + .where(condition) + .orderBy(asc(table.id)) + .limit(limit) + return rows.map((r) => String(r.id)) +} + +export async function runCorpusBackfill( + service: AiEmbeddingsService, + db: AppDatabase, + options: BackfillOptions = {}, +): Promise { + const configured = await service.isEmbeddingConfigured() + const sourceTypes = ( + options.sourceTypes && options.sourceTypes.length > 0 + ? options.sourceTypes + : SUPPORTED_SOURCE_TYPES + ).filter((t): t is SupportedSourceType => + (SUPPORTED_SOURCE_TYPES as readonly string[]).includes(t), + ) + + const summary: BackfillSummary = { + configured, + sourceTypes: [...sourceTypes], + scanned: 0, + embedded: 0, + deleted: 0, + skipped: 0, + } + + if (!configured || sourceTypes.length === 0) { + return summary + } + + const params = await service.resolveParams() + const batchSize = + options.batchSize ?? + params.backfillBatchSize ?? + EMBEDDINGS_DEFAULTS.backfillBatchSize + + for (const sourceType of sourceTypes) { + let cursor: string | null = null + while (true) { + const batch = await listSourceIdsAfter(db, sourceType, cursor, batchSize) + if (batch.length === 0) break + for (const id of batch) { + try { + const result = await service.syncSource(sourceType, id, 'upsert') + summary.scanned++ + summary.embedded += result.embedded ?? 0 + summary.deleted += result.deleted ?? 0 + } catch { + summary.skipped++ + } + } + cursor = batch.at(-1) ?? null + if (batch.length < batchSize) break + } + } + + return summary +} diff --git a/apps/core/src/modules/ai/ai-embeddings/tasks/embed-sync.processor.ts b/apps/core/src/modules/ai/ai-embeddings/tasks/embed-sync.processor.ts new file mode 100644 index 00000000000..8ef6af25f6f --- /dev/null +++ b/apps/core/src/modules/ai/ai-embeddings/tasks/embed-sync.processor.ts @@ -0,0 +1,38 @@ +import { Injectable, Logger, type OnModuleInit } from '@nestjs/common' + +import { TaskQueueProcessor } from '~/processors/task-queue' + +import { + AITaskType, + type EmbedSyncTaskPayload, +} from '../../ai-task/ai-task.types' +import { AiEmbeddingsService } from '../ai-embeddings.service' + +@Injectable() +export class EmbedSyncTaskProcessor implements OnModuleInit { + private readonly logger = new Logger(EmbedSyncTaskProcessor.name) + + constructor( + private readonly taskProcessor: TaskQueueProcessor, + private readonly embeddingsService: AiEmbeddingsService, + ) {} + + onModuleInit() { + this.taskProcessor.registerHandler({ + type: AITaskType.EmbedSync, + execute: async (payload, context) => { + await context.appendLog( + 'info', + `embed sync ${payload.sourceType}:${payload.sourceId} op=${payload.op}`, + ) + const result = await this.embeddingsService.syncSource( + payload.sourceType, + payload.sourceId, + payload.op, + ) + await context.setResult(result) + }, + }) + this.logger.log('Embed sync task handler registered') + } +} diff --git a/apps/core/src/modules/ai/ai-memory/ai-memory.constants.ts b/apps/core/src/modules/ai/ai-memory/ai-memory.constants.ts new file mode 100644 index 00000000000..71244d3de26 --- /dev/null +++ b/apps/core/src/modules/ai/ai-memory/ai-memory.constants.ts @@ -0,0 +1,20 @@ +export const AI_MEMORY_SCOPE_REGEX = + /^(global|persona:[\da-z-]+|scenario:[\da-z-]+)$/ + +export const AI_MEMORY_TYPES = [ + 'fact', + 'event', + 'preference', + 'thread', + 'pattern', +] as const + +export const AI_MEMORY_STATUSES = [ + 'active', + 'superseded', + 'archived', + 'pending_review', +] as const + +export const AI_MEMORY_DEFAULT_RECALL_TOP_K = 5 +export const AI_MEMORY_DEFAULT_RECALL_MIN_SIMILARITY = 0.7 diff --git a/apps/core/src/modules/ai/ai-memory/ai-memory.controller.ts b/apps/core/src/modules/ai/ai-memory/ai-memory.controller.ts new file mode 100644 index 00000000000..4549b9fa62c --- /dev/null +++ b/apps/core/src/modules/ai/ai-memory/ai-memory.controller.ts @@ -0,0 +1,103 @@ +import { + Body, + Delete, + Get, + HttpCode, + Param, + Post, + Put, + Query, +} from '@nestjs/common' + +import { ApiController } from '~/common/decorators/api-controller.decorator' +import { Auth } from '~/common/decorators/auth.decorator' +import { CurrentUser } from '~/common/decorators/current-user.decorator' +import { withMeta } from '~/common/response/envelope.types' +import { MetaObjectBuilder } from '~/common/response/meta-builder' +import type { SessionUser } from '~/modules/auth/auth.types' +import { EntityIdDto } from '~/shared/dto/id.dto' + +import { + CreateMemoryDto, + ListMemoryQueryDto, + UpdateMemoryDto, +} from './ai-memory.schema' +import { AiMemoryService } from './ai-memory.service' +import type { AiMemory } from './ai-memory.types' +import { type AiMemoryDetailView, AiMemoryViews } from './ai-memory.views' + +@ApiController('ai-memory') +export class AiMemoryController { + constructor(private readonly service: AiMemoryService) {} + + @Get('/') + @Auth() + async list(@Query() query: ListMemoryQueryDto) { + const result = await this.service.list(query) + return withMeta( + result.data.map((row) => this.toDetailView(row)), + new MetaObjectBuilder().pagination(result.pagination).build(), + ) + } + + @Get('/kpi') + @Auth() + async kpi() { + return this.service.getKpi() + } + + @Get('/:id') + @Auth() + async findById(@Param() params: EntityIdDto) { + const row = await this.service.findById(params.id) + return this.toDetailView(row) + } + + @Post('/') + @Auth() + async create( + @Body() body: CreateMemoryDto, + @CurrentUser() user: SessionUser, + ) { + const row = await this.service.create(body, user.id) + return this.toDetailView(row) + } + + @Put('/:id') + @Auth() + async update( + @Param() params: EntityIdDto, + @Body() body: UpdateMemoryDto, + @CurrentUser() user: SessionUser, + ) { + const row = await this.service.update(params.id, body, user.id) + return this.toDetailView(row) + } + + @Delete('/:id') + @Auth() + @HttpCode(204) + async delete(@Param() params: EntityIdDto) { + await this.service.archive(params.id) + } + + private toDetailView(row: AiMemory): AiMemoryDetailView { + return AiMemoryViews.detail.parse({ + id: row.id, + scope: row.scope, + type: row.type, + content: row.content, + confidence: row.confidence, + salience: row.salience, + source: row.source, + status: row.status, + firstSeenAt: row.firstSeenAt, + lastSeenAt: row.lastSeenAt, + expiresAt: row.expiresAt, + metadata: row.metadata, + createdAt: row.createdAt, + updatedAt: row.updatedAt, + hasEmbedding: row.embedding !== null, + }) + } +} diff --git a/apps/core/src/modules/ai/ai-memory/ai-memory.module.ts b/apps/core/src/modules/ai/ai-memory/ai-memory.module.ts new file mode 100644 index 00000000000..7cb4df2e57c --- /dev/null +++ b/apps/core/src/modules/ai/ai-memory/ai-memory.module.ts @@ -0,0 +1,21 @@ +import { Module } from '@nestjs/common' + +import { AiService } from '../ai.service' +import { AiTaskModule } from '../ai-task/ai-task.module' +import { AiMemoryController } from './ai-memory.controller' +import { AiMemoryRepository } from './ai-memory.repository' +import { AiMemoryService } from './ai-memory.service' +import { MemoryEmbedTaskProcessor } from './tasks/memory-embed.processor' + +@Module({ + imports: [AiTaskModule], + providers: [ + AiMemoryRepository, + AiMemoryService, + AiService, + MemoryEmbedTaskProcessor, + ], + controllers: [AiMemoryController], + exports: [AiMemoryService], +}) +export class AiMemoryModule {} diff --git a/apps/core/src/modules/ai/ai-memory/ai-memory.repository.ts b/apps/core/src/modules/ai/ai-memory/ai-memory.repository.ts new file mode 100644 index 00000000000..544619e57c6 --- /dev/null +++ b/apps/core/src/modules/ai/ai-memory/ai-memory.repository.ts @@ -0,0 +1,291 @@ +import { Inject, Injectable } from '@nestjs/common' +import { and, desc, eq, inArray, type SQL, sql } from 'drizzle-orm' + +import { PG_DB_TOKEN } from '~/constants/system.constant' +import { aiMemories } from '~/database/schema' +import { + BaseRepository, + type PaginationResult, + toEntityId, +} from '~/processors/database/base.repository' +import type { AppDatabase } from '~/processors/database/postgres.provider' +import { type EntityId, parseEntityId } from '~/shared/id/entity-id' +import { SnowflakeService } from '~/shared/id/snowflake.service' + +import type { + AiMemory, + AiMemorySource, + AiMemoryStatus, + AiMemoryType, + RecallScoredMemory, +} from './ai-memory.types' + +type AiMemoryRow = typeof aiMemories.$inferSelect + +const mapRow = (row: AiMemoryRow): AiMemory => ({ + id: toEntityId(row.id) as EntityId, + scope: row.scope, + type: row.type as AiMemoryType, + content: row.content, + confidence: row.confidence, + salience: row.salience, + source: (row.source ?? {}) as AiMemorySource, + embedding: row.embedding, + embeddingModel: row.embeddingModel, + dim: row.dim, + firstSeenAt: row.firstSeenAt, + lastSeenAt: row.lastSeenAt, + expiresAt: row.expiresAt, + supersedesId: row.supersedesId + ? (toEntityId(row.supersedesId) as EntityId) + : null, + status: row.status as AiMemoryStatus, + metadata: (row.metadata ?? {}) as Record, + createdAt: row.createdAt, + updatedAt: row.updatedAt, +}) + +export interface CreateMemoryRow { + scope: string + type: AiMemoryType + content: string + confidence?: number + salience?: number + source?: AiMemorySource + expiresAt?: Date | null + metadata?: Record +} + +export interface UpdateMemoryRow { + scope?: string + type?: AiMemoryType + content?: string + confidence?: number + salience?: number + expiresAt?: Date | null + metadata?: Record +} + +export interface ListMemoryFilters { + scope?: string + type?: AiMemoryType + status?: AiMemoryStatus +} + +export interface RecallQueryRow { + scope: string | string[] + embedding: number[] + embeddingModel: string + limit: number +} + +@Injectable() +export class AiMemoryRepository extends BaseRepository { + constructor( + @Inject(PG_DB_TOKEN) db: AppDatabase, + private readonly snowflake: SnowflakeService, + ) { + super(db) + } + + async findById(id: EntityId | string): Promise { + const [row] = await this.db + .select() + .from(aiMemories) + .where(eq(aiMemories.id, parseEntityId(id))) + .limit(1) + return row ? mapRow(row) : null + } + + async list( + filters: ListMemoryFilters, + page = 1, + size = 20, + ): Promise> { + page = Math.max(1, page) + size = Math.min(100, Math.max(1, size)) + const offset = (page - 1) * size + const where = this.buildListWhere(filters) + const [rows, [{ count }]] = await Promise.all([ + this.db + .select() + .from(aiMemories) + .where(where) + .orderBy(desc(aiMemories.createdAt)) + .limit(size) + .offset(offset), + this.db + .select({ count: sql`count(*)::int` }) + .from(aiMemories) + .where(where), + ]) + return { + data: rows.map(mapRow), + pagination: this.paginationOf(Number(count ?? 0), page, size), + } + } + + async create(input: CreateMemoryRow): Promise { + const id = this.snowflake.nextId() + const [row] = await this.db + .insert(aiMemories) + .values({ + id, + scope: input.scope, + type: input.type, + content: input.content, + confidence: input.confidence ?? 1, + salience: input.salience ?? 1, + source: input.source ?? {}, + expiresAt: input.expiresAt ?? null, + metadata: input.metadata ?? {}, + }) + .returning() + return mapRow(row) + } + + async update( + id: EntityId | string, + input: UpdateMemoryRow, + ): Promise { + const patch: Partial = { + updatedAt: new Date(), + } + if (input.scope !== undefined) patch.scope = input.scope + if (input.type !== undefined) patch.type = input.type + if (input.content !== undefined) patch.content = input.content + if (input.confidence !== undefined) patch.confidence = input.confidence + if (input.salience !== undefined) patch.salience = input.salience + if (input.expiresAt !== undefined) patch.expiresAt = input.expiresAt + if (input.metadata !== undefined) patch.metadata = input.metadata + + const [row] = await this.db + .update(aiMemories) + .set(patch) + .where(eq(aiMemories.id, parseEntityId(id))) + .returning() + return row ? mapRow(row) : null + } + + async updateEmbedding( + id: EntityId | string, + embedding: number[], + embeddingModel: string, + ): Promise { + await this.db + .update(aiMemories) + .set({ + embedding, + embeddingModel, + dim: embedding.length, + updatedAt: new Date(), + }) + .where(eq(aiMemories.id, parseEntityId(id))) + } + + async setStatus( + id: EntityId | string, + status: AiMemoryStatus, + ): Promise { + const [row] = await this.db + .update(aiMemories) + .set({ status, updatedAt: new Date() }) + .where(eq(aiMemories.id, parseEntityId(id))) + .returning() + return row ? mapRow(row) : null + } + + async listActiveByScope( + scopes: string[], + limit: number, + ): Promise { + if (!scopes.length) return [] + const rows = await this.db + .select() + .from(aiMemories) + .where( + and( + eq(aiMemories.status, 'active'), + inArray(aiMemories.scope, scopes), + sql`(${aiMemories.expiresAt} is null or ${aiMemories.expiresAt} > now())`, + )!, + ) + .orderBy(desc(aiMemories.salience), desc(aiMemories.lastSeenAt)) + .limit(limit) + return rows.map(mapRow) + } + + async vectorSearch( + scopes: string[], + embedding: number[], + embeddingModel: string, + limit: number, + ): Promise { + if (!scopes.length) return [] + const vectorLiteral = `[${embedding.join(',')}]` + const distance = sql`(${aiMemories.embedding} <=> ${vectorLiteral}::vector)` + const rows = await this.db + .select({ + row: aiMemories, + distance, + }) + .from(aiMemories) + .where( + and( + eq(aiMemories.status, 'active'), + inArray(aiMemories.scope, scopes), + sql`${aiMemories.embedding} is not null`, + eq(aiMemories.embeddingModel, embeddingModel), + sql`(${aiMemories.expiresAt} is null or ${aiMemories.expiresAt} > now())`, + )!, + ) + .orderBy(distance) + .limit(limit) + + return rows.map((entry) => { + const mapped = mapRow(entry.row) + const dist = Number(entry.distance ?? 1) + return { + ...mapped, + similarity: 1 - dist, + } + }) + } + + async countByStatus(): Promise> { + const rows = await this.db + .select({ + status: aiMemories.status, + count: sql`count(*)::int`, + }) + .from(aiMemories) + .groupBy(aiMemories.status) + const result: Record = {} + for (const r of rows) { + result[r.status] = Number(r.count ?? 0) + } + return { + active: result.active ?? 0, + superseded: result.superseded ?? 0, + archived: result.archived ?? 0, + pending_review: result.pending_review ?? 0, + } + } + + async totalActive(): Promise { + const [row] = await this.db + .select({ count: sql`count(*)::int` }) + .from(aiMemories) + .where(eq(aiMemories.status, 'active')) + return Number(row?.count ?? 0) + } + + private buildListWhere(filters: ListMemoryFilters): SQL | undefined { + const conds: SQL[] = [] + if (filters.scope) conds.push(eq(aiMemories.scope, filters.scope)) + if (filters.type) conds.push(eq(aiMemories.type, filters.type)) + if (filters.status) conds.push(eq(aiMemories.status, filters.status)) + if (!conds.length) return undefined + return and(...conds) + } +} diff --git a/apps/core/src/modules/ai/ai-memory/ai-memory.schema.ts b/apps/core/src/modules/ai/ai-memory/ai-memory.schema.ts new file mode 100644 index 00000000000..6faa7ec0dc9 --- /dev/null +++ b/apps/core/src/modules/ai/ai-memory/ai-memory.schema.ts @@ -0,0 +1,38 @@ +import { createZodDto } from 'nestjs-zod' +import { z } from 'zod' + +import { + AI_MEMORY_SCOPE_REGEX, + AI_MEMORY_STATUSES, + AI_MEMORY_TYPES, +} from './ai-memory.constants' + +export const CreateMemorySchema = z.object({ + scope: z.string().regex(AI_MEMORY_SCOPE_REGEX), + type: z.enum(AI_MEMORY_TYPES), + content: z.string().min(1).max(2000), + confidence: z.number().min(0).max(1).optional().default(1), + salience: z.number().min(0).max(10).optional().default(1), + expiresAt: z.string().datetime().optional(), + metadata: z.record(z.string(), z.unknown()).optional(), +}) + +export class CreateMemoryDto extends createZodDto(CreateMemorySchema) {} + +export const UpdateMemorySchema = CreateMemorySchema.partial() + +export class UpdateMemoryDto extends createZodDto(UpdateMemorySchema) {} + +export const ListMemoryQuerySchema = z.object({ + scope: z.string().optional(), + type: z.enum(AI_MEMORY_TYPES).optional(), + status: z.enum(AI_MEMORY_STATUSES).optional(), + page: z.coerce.number().int().min(1).default(1), + size: z.coerce.number().int().min(1).max(100).default(20), +}) + +export class ListMemoryQueryDto extends createZodDto(ListMemoryQuerySchema) {} + +export type CreateMemoryInput = z.infer +export type UpdateMemoryInput = z.infer +export type ListMemoryQueryInput = z.infer diff --git a/apps/core/src/modules/ai/ai-memory/ai-memory.service.ts b/apps/core/src/modules/ai/ai-memory/ai-memory.service.ts new file mode 100644 index 00000000000..05d160721ec --- /dev/null +++ b/apps/core/src/modules/ai/ai-memory/ai-memory.service.ts @@ -0,0 +1,244 @@ +import { Injectable, Logger } from '@nestjs/common' + +import { AppErrorCode, createAppException } from '~/common/errors' +import { ConfigsService } from '~/modules/configs/configs.service' + +import { AiService } from '../ai.service' +import { AiTaskService } from '../ai-task/ai-task.service' +import { AITaskType } from '../ai-task/ai-task.types' +import { + AI_MEMORY_DEFAULT_RECALL_MIN_SIMILARITY, + AI_MEMORY_DEFAULT_RECALL_TOP_K, +} from './ai-memory.constants' +import { + AiMemoryRepository, + type ListMemoryFilters, +} from './ai-memory.repository' +import type { + CreateMemoryInput, + ListMemoryQueryInput, + UpdateMemoryInput, +} from './ai-memory.schema' +import type { + AiMemory, + AiMemoryStatus, + RecallOptions, + RecallScoredMemory, +} from './ai-memory.types' + +@Injectable() +export class AiMemoryService { + private readonly logger = new Logger(AiMemoryService.name) + + constructor( + private readonly repository: AiMemoryRepository, + private readonly aiTaskService: AiTaskService, + private readonly aiService: AiService, + private readonly configService: ConfigsService, + ) {} + + async list(query: ListMemoryQueryInput) { + const filters: ListMemoryFilters = { + scope: query.scope, + type: query.type, + status: query.status, + } + return this.repository.list(filters, query.page, query.size) + } + + async findById(id: string): Promise { + const row = await this.repository.findById(id) + if (!row) { + throw createAppException(AppErrorCode.AI_MEMORY_NOT_FOUND, { id }) + } + return row + } + + async create(input: CreateMemoryInput, actorId: string): Promise { + const expiresAt = input.expiresAt ? new Date(input.expiresAt) : null + const created = await this.repository.create({ + scope: input.scope, + type: input.type, + content: input.content, + confidence: input.confidence, + salience: input.salience, + expiresAt, + metadata: input.metadata, + source: { kind: 'manual', authorId: actorId }, + }) + await this.enqueueEmbed(created.id) + return created + } + + async update( + id: string, + input: UpdateMemoryInput, + _actorId: string, + ): Promise { + const existing = await this.findById(id) + const expiresAt = + input.expiresAt === undefined + ? undefined + : input.expiresAt === null + ? null + : new Date(input.expiresAt) + const updated = await this.repository.update(id, { + scope: input.scope, + type: input.type, + content: input.content, + confidence: input.confidence, + salience: input.salience, + expiresAt, + metadata: input.metadata, + }) + if (!updated) { + throw createAppException(AppErrorCode.AI_MEMORY_NOT_FOUND, { id }) + } + const contentChanged = + input.content !== undefined && input.content !== existing.content + if (contentChanged) { + await this.enqueueEmbed(updated.id) + } + return updated + } + + async archive(id: string): Promise { + const updated = await this.repository.setStatus(id, 'archived') + if (!updated) { + throw createAppException(AppErrorCode.AI_MEMORY_NOT_FOUND, { id }) + } + } + + async recall(opts: RecallOptions): Promise { + const scopes = Array.isArray(opts.scope) ? opts.scope : [opts.scope] + if (!scopes.length) return [] + const memoryConfig = await this.getMemoryConfig() + const topK = opts.topK ?? memoryConfig.recallTopK + const minSimilarity = opts.minSimilarity ?? memoryConfig.recallMinSimilarity + + if (!opts.query) { + const rows = await this.repository.listActiveByScope(scopes, topK) + return rows.map((row) => ({ ...row })) + } + + const runtime = await this.tryGetEmbeddingRuntime() + if (!runtime) return [] + + let queryVec: number[] + let modelId: string + try { + const result = await runtime.embedBatch!({ inputs: [opts.query] }) + queryVec = result.vectors[0] + modelId = result.model + } catch (error) { + this.logger.warn( + `Memory recall embed failed: ${(error as Error).message}`, + ) + return [] + } + + if (!queryVec?.length) return [] + + const candidates = await this.repository.vectorSearch( + scopes, + queryVec, + modelId, + Math.max(1, topK * 2), + ) + + return candidates + .filter((row) => (row.similarity ?? 0) >= minSimilarity) + .map((row) => ({ + ...row, + score: (row.similarity ?? 0) * row.salience * row.confidence, + })) + .sort((a, b) => (b.score ?? 0) - (a.score ?? 0)) + .slice(0, topK) + } + + async totalActive(): Promise { + return this.repository.totalActive() + } + + async getKpi(): Promise<{ + total: number + active: number + archived: number + }> { + const counts = await this.repository.countByStatus() + const total = (Object.values(counts) as number[]).reduce( + (sum, n) => sum + n, + 0, + ) + return { + total, + active: counts.active ?? 0, + archived: counts.archived ?? 0, + } + } + + async handleEmbedTask(memoryId: string): Promise { + const row = await this.repository.findById(memoryId) + if (!row) return + const allowed: AiMemoryStatus[] = ['active', 'pending_review'] + if (!allowed.includes(row.status)) return + + const runtime = await this.tryGetEmbeddingRuntime() + if (!runtime) return + + try { + const result = await runtime.embedBatch!({ inputs: [row.content] }) + const vec = result.vectors[0] + if (!vec?.length) return + await this.repository.updateEmbedding(memoryId, vec, result.model) + } catch (error) { + this.logger.warn( + `Memory embed task failed for ${memoryId}: ${(error as Error).message}`, + ) + } + } + + private async enqueueEmbed(memoryId: string): Promise { + try { + await this.aiTaskService.crud.createTask({ + type: AITaskType.MemoryEmbed, + payload: { memoryId } as unknown as Record, + dedupKey: `memory:embed:${memoryId}`, + }) + } catch (error) { + this.logger.warn( + `Failed to enqueue MEMORY_EMBED task for ${memoryId}: ${(error as Error).message}`, + ) + } + } + + private async tryGetEmbeddingRuntime() { + try { + const runtime = await this.aiService.getEmbeddingModel() + if (!runtime?.embedBatch) return null + return runtime + } catch { + return null + } + } + + private async getMemoryConfig(): Promise<{ + recallTopK: number + recallMinSimilarity: number + }> { + try { + const aiConfig = await this.configService.get('ai') + const cfg = aiConfig?.aiMemory + return { + recallTopK: cfg?.recallTopK ?? AI_MEMORY_DEFAULT_RECALL_TOP_K, + recallMinSimilarity: + cfg?.recallMinSimilarity ?? AI_MEMORY_DEFAULT_RECALL_MIN_SIMILARITY, + } + } catch { + return { + recallTopK: AI_MEMORY_DEFAULT_RECALL_TOP_K, + recallMinSimilarity: AI_MEMORY_DEFAULT_RECALL_MIN_SIMILARITY, + } + } + } +} diff --git a/apps/core/src/modules/ai/ai-memory/ai-memory.types.ts b/apps/core/src/modules/ai/ai-memory/ai-memory.types.ts new file mode 100644 index 00000000000..098451bfdd7 --- /dev/null +++ b/apps/core/src/modules/ai/ai-memory/ai-memory.types.ts @@ -0,0 +1,53 @@ +import type { EntityId } from '~/shared/id/entity-id' + +export type AiMemoryType = + | 'fact' + | 'event' + | 'preference' + | 'thread' + | 'pattern' + +export type AiMemoryStatus = + | 'active' + | 'superseded' + | 'archived' + | 'pending_review' + +export interface AiMemorySource { + kind?: string + authorId?: string + [key: string]: unknown +} + +export interface AiMemory { + id: EntityId + scope: string + type: AiMemoryType + content: string + confidence: number + salience: number + source: AiMemorySource + embedding: number[] | null + embeddingModel: string | null + dim: number | null + firstSeenAt: Date + lastSeenAt: Date + expiresAt: Date | null + supersedesId: EntityId | null + status: AiMemoryStatus + metadata: Record + createdAt: Date + updatedAt: Date | null +} + +export interface RecallOptions { + scope: string | string[] + query?: string + topK?: number + minSimilarity?: number +} + +export interface RecallScoredMemory extends AiMemory { + similarity?: number + score?: number +} diff --git a/apps/core/src/modules/ai/ai-memory/ai-memory.views.ts b/apps/core/src/modules/ai/ai-memory/ai-memory.views.ts new file mode 100644 index 00000000000..911afcb42ef --- /dev/null +++ b/apps/core/src/modules/ai/ai-memory/ai-memory.views.ts @@ -0,0 +1,29 @@ +import { z } from 'zod' + +const dateOrString = z.union([z.date(), z.string()]) + +const AiMemoryDetailSchema = z + .object({ + id: z.string(), + scope: z.string(), + type: z.string(), + content: z.string(), + confidence: z.number(), + salience: z.number(), + source: z.record(z.string(), z.unknown()), + status: z.string(), + firstSeenAt: dateOrString, + lastSeenAt: dateOrString, + expiresAt: dateOrString.nullable(), + metadata: z.record(z.string(), z.unknown()), + createdAt: dateOrString, + updatedAt: dateOrString.nullable(), + hasEmbedding: z.boolean(), + }) + .strict() + +export const AiMemoryViews = { + detail: AiMemoryDetailSchema, +} as const + +export type AiMemoryDetailView = z.infer diff --git a/apps/core/src/modules/ai/ai-memory/tasks/memory-embed.processor.ts b/apps/core/src/modules/ai/ai-memory/tasks/memory-embed.processor.ts new file mode 100644 index 00000000000..044a9259ca6 --- /dev/null +++ b/apps/core/src/modules/ai/ai-memory/tasks/memory-embed.processor.ts @@ -0,0 +1,36 @@ +import { Injectable, Logger, type OnModuleInit } from '@nestjs/common' + +import { + type TaskExecuteContext, + TaskQueueProcessor, +} from '~/processors/task-queue' + +import { + AITaskType, + type MemoryEmbedTaskPayload, +} from '../../ai-task/ai-task.types' +import { AiMemoryService } from '../ai-memory.service' + +@Injectable() +export class MemoryEmbedTaskProcessor implements OnModuleInit { + private readonly logger = new Logger(MemoryEmbedTaskProcessor.name) + + constructor( + private readonly taskProcessor: TaskQueueProcessor, + private readonly memoryService: AiMemoryService, + ) {} + + onModuleInit() { + this.taskProcessor.registerHandler({ + type: AITaskType.MemoryEmbed, + execute: async ( + payload: MemoryEmbedTaskPayload, + context: TaskExecuteContext, + ) => { + if (context.isAborted()) return + await this.memoryService.handleEmbedTask(payload.memoryId) + }, + }) + this.logger.log('Memory embed task handler registered') + } +} diff --git a/apps/core/src/modules/ai/ai-persona/ai-persona.constants.ts b/apps/core/src/modules/ai/ai-persona/ai-persona.constants.ts new file mode 100644 index 00000000000..a48075f5372 --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/ai-persona.constants.ts @@ -0,0 +1,17 @@ +export const PERSONA_DISTILL_LOCK_KEY_PREFIX = 'persona:distill:' +export const PERSONA_DISTILL_LOCK_TTL_SEC = 600 + +export const PERSONA_EXEMPLAR_CANDIDATES_CACHE_KEY_PREFIX = + 'persona:exemplars:candidates:' +export const PERSONA_EXEMPLAR_CANDIDATES_CACHE_TTL_SEC = 3600 + +export const PERSONA_DEFAULTS = { + distillSampleMaxTokens: 60_000, + exemplarsLengthMin: 200, + exemplarsLengthMax: 800, + exemplarsCandidateCacheTtlSec: 3600, + exemplarsCandidatesMax: 200, + charsPerToken: 4, + perTypeQuota: { post: 0.5, note: 0.3, page: 0.2 }, + recencyHalfLifeDays: 365, +} diff --git a/apps/core/src/modules/ai/ai-persona/ai-persona.controller.ts b/apps/core/src/modules/ai/ai-persona/ai-persona.controller.ts new file mode 100644 index 00000000000..f3c6695622b --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/ai-persona.controller.ts @@ -0,0 +1,30 @@ +import { Get, Param, Post } from '@nestjs/common' + +import { ApiController } from '~/common/decorators/api-controller.decorator' +import { Auth } from '~/common/decorators/auth.decorator' + +import { PersonaKeyParamDto } from './ai-persona.schema' +import { AiPersonaService } from './ai-persona.service' + +@ApiController('ai-persona') +export class AiPersonaController { + constructor(private readonly service: AiPersonaService) {} + + @Get('/') + @Auth() + list() { + return this.service.listPersonasWithStatus() + } + + @Get('/:key/profile') + @Auth() + getProfile(@Param() params: PersonaKeyParamDto) { + return this.service.getProfile(params.key) + } + + @Post('/:key/refresh') + @Auth() + refresh(@Param() params: PersonaKeyParamDto) { + return this.service.refresh(params.key) + } +} diff --git a/apps/core/src/modules/ai/ai-persona/ai-persona.module.ts b/apps/core/src/modules/ai/ai-persona/ai-persona.module.ts new file mode 100644 index 00000000000..f796a828f3f --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/ai-persona.module.ts @@ -0,0 +1,23 @@ +import { Module } from '@nestjs/common' + +import { AiService } from '../ai.service' +import { AiTaskModule } from '../ai-task/ai-task.module' +import { AiPersonaController } from './ai-persona.controller' +import { PersonaProfileRepository } from './ai-persona.repository' +import { AiPersonaService } from './ai-persona.service' +import { ExemplarSelector } from './exemplar-selector' +import { PersonaDistillProcessor } from './tasks/persona-distill.processor' + +@Module({ + imports: [AiTaskModule], + controllers: [AiPersonaController], + providers: [ + AiService, + AiPersonaService, + PersonaProfileRepository, + ExemplarSelector, + PersonaDistillProcessor, + ], + exports: [AiPersonaService, PersonaProfileRepository, ExemplarSelector], +}) +export class AiPersonaModule {} diff --git a/apps/core/src/modules/ai/ai-persona/ai-persona.repository.ts b/apps/core/src/modules/ai/ai-persona/ai-persona.repository.ts new file mode 100644 index 00000000000..60194d91912 --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/ai-persona.repository.ts @@ -0,0 +1,108 @@ +import { Inject, Injectable } from '@nestjs/common' +import { eq } from 'drizzle-orm' + +import { PG_DB_TOKEN } from '~/constants/system.constant' +import { personaProfiles } from '~/database/schema' +import { + BaseRepository, + toEntityId, +} from '~/processors/database/base.repository' +import type { AppDatabase } from '~/processors/database/postgres.provider' +import type { EntityId } from '~/shared/id/entity-id' +import { SnowflakeService } from '~/shared/id/snowflake.service' + +import type { PersonaProfile } from './ai-persona.types' + +const mapRow = (row: typeof personaProfiles.$inferSelect): PersonaProfile => ({ + id: toEntityId(row.id) as EntityId, + personaKey: row.personaKey, + profile: row.profile, + profileSummary: row.profileSummary ?? null, + corpusVersion: row.corpusVersion, + distillModel: row.distillModel, + refreshedAt: row.refreshedAt, + autoNextAt: row.autoNextAt ?? null, + metadata: (row.metadata ?? {}) as Record, + createdAt: row.createdAt, + updatedAt: row.updatedAt ?? row.createdAt, +}) + +export interface UpsertPersonaProfileInput { + personaKey: string + profile: string + profileSummary: string | null + corpusVersion: number + distillModel: string + refreshedAt: Date + metadata: Record +} + +@Injectable() +export class PersonaProfileRepository extends BaseRepository { + constructor( + @Inject(PG_DB_TOKEN) db: AppDatabase, + private readonly snowflake: SnowflakeService, + ) { + super(db) + } + + async findByKey(personaKey: string): Promise { + const [row] = await this.db + .select() + .from(personaProfiles) + .where(eq(personaProfiles.personaKey, personaKey)) + .limit(1) + return row ? mapRow(row) : null + } + + async listKeysWithProfiles(): Promise> { + const rows = await this.db + .select({ personaKey: personaProfiles.personaKey }) + .from(personaProfiles) + return new Set(rows.map((r) => r.personaKey)) + } + + async upsert(input: UpsertPersonaProfileInput): Promise { + const existing = await this.findByKey(input.personaKey) + if (existing) { + const [row] = await this.db + .update(personaProfiles) + .set({ + profile: input.profile, + profileSummary: input.profileSummary, + corpusVersion: input.corpusVersion, + distillModel: input.distillModel, + refreshedAt: input.refreshedAt, + metadata: input.metadata, + updatedAt: new Date(), + }) + .where(eq(personaProfiles.personaKey, input.personaKey)) + .returning() + return mapRow(row) + } + const id = this.snowflake.nextId() + const [row] = await this.db + .insert(personaProfiles) + .values({ + id, + personaKey: input.personaKey, + profile: input.profile, + profileSummary: input.profileSummary, + corpusVersion: input.corpusVersion, + distillModel: input.distillModel, + refreshedAt: input.refreshedAt, + metadata: input.metadata, + updatedAt: new Date(), + }) + .returning() + return mapRow(row) + } + + async deleteByKey(personaKey: string): Promise { + const result = await this.db + .delete(personaProfiles) + .where(eq(personaProfiles.personaKey, personaKey)) + .returning({ id: personaProfiles.id }) + return result.length > 0 + } +} diff --git a/apps/core/src/modules/ai/ai-persona/ai-persona.schema.ts b/apps/core/src/modules/ai/ai-persona/ai-persona.schema.ts new file mode 100644 index 00000000000..9e8d6bf9984 --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/ai-persona.schema.ts @@ -0,0 +1,23 @@ +import { createZodDto } from 'nestjs-zod' +import { z } from 'zod' + +export const PersonaKeyParamSchema = z.object({ + key: z.string().min(1).max(64), +}) + +export class PersonaKeyParamDto extends createZodDto(PersonaKeyParamSchema) {} + +export const DistillOutputSchema = z.object({ + profile: z.string().min(1), + profile_summary: z.string().nullish(), + metadata: z + .object({ + tone_tags: z.array(z.string()).default([]), + recurring_themes: z.array(z.string()).default([]), + signature_phrases: z.array(z.string()).default([]), + }) + .partial() + .default({}), +}) + +export type DistillOutputInput = z.infer diff --git a/apps/core/src/modules/ai/ai-persona/ai-persona.service.ts b/apps/core/src/modules/ai/ai-persona/ai-persona.service.ts new file mode 100644 index 00000000000..56a01395afd --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/ai-persona.service.ts @@ -0,0 +1,128 @@ +import { Injectable, Logger } from '@nestjs/common' + +import { AppErrorCode, createAppException } from '~/common/errors' +import { RedisService } from '~/processors/redis/redis.service' + +import { ConfigsService } from '../../configs/configs.service' +import { AiTaskService } from '../ai-task/ai-task.service' +import { + AITaskType, + type PersonaDistillTaskPayload, +} from '../ai-task/ai-task.types' +import { PERSONA_DISTILL_LOCK_KEY_PREFIX } from './ai-persona.constants' +import { PersonaProfileRepository } from './ai-persona.repository' +import type { + ExemplarPassage, + PersonaDefinitionWithStatus, + PersonaProfile, +} from './ai-persona.types' +import { ExemplarSelector } from './exemplar-selector' +import { + getPersonaDefinition, + listPersonas, + tryGetPersonaDefinition, +} from './persona-registry' + +@Injectable() +export class AiPersonaService { + private readonly logger = new Logger(AiPersonaService.name) + + constructor( + private readonly profileRepo: PersonaProfileRepository, + private readonly aiTaskService: AiTaskService, + private readonly redisService: RedisService, + private readonly configsService: ConfigsService, + private readonly exemplarSelector: ExemplarSelector, + ) {} + + async listPersonasWithStatus(): Promise { + const personas = listPersonas() + const profileKeys = await this.profileRepo.listKeysWithProfiles() + return personas.map((p) => ({ + ...p, + hasProfile: p.needsProfile && profileKeys.has(p.key), + })) + } + + async getProfile(personaKey: string): Promise { + const def = tryGetPersonaDefinition(personaKey) + if (!def || !def.needsProfile) { + throw createAppException(AppErrorCode.AI_PERSONA_PROFILE_NOT_FOUND, { + key: personaKey, + }) + } + const row = await this.profileRepo.findByKey(personaKey) + if (!row) { + throw createAppException(AppErrorCode.AI_PERSONA_PROFILE_NOT_FOUND, { + key: personaKey, + }) + } + return row + } + + async getProfileOrNull(personaKey: string): Promise { + const def = tryGetPersonaDefinition(personaKey) + if (!def || !def.needsProfile) return null + return this.profileRepo.findByKey(personaKey) + } + + async refresh(personaKey: string): Promise<{ taskId: string }> { + const def = getPersonaDefinition(personaKey) + if (!def.needsProfile) { + throw createAppException(AppErrorCode.AI_PERSONA_NOT_DISTILLABLE, { + key: personaKey, + }) + } + + await this.assertDistillModelConfigured() + + const lockKey = `${PERSONA_DISTILL_LOCK_KEY_PREFIX}${personaKey}` + const redis = this.redisService.getClient() + const exists = await redis.exists(lockKey) + if (exists) { + throw createAppException(AppErrorCode.AI_PERSONA_REFRESH_IN_PROGRESS, { + key: personaKey, + }) + } + + const payload: PersonaDistillTaskPayload = { personaKey } + const { taskId } = await this.aiTaskService.crud.createTask({ + type: AITaskType.PersonaDistill, + payload: payload as unknown as Record, + dedupKey: `persona:distill:${personaKey}`, + }) + + this.logger.log( + `Persona distill task enqueued: key=${personaKey} taskId=${taskId}`, + ) + + return { taskId } + } + + async pickExemplars( + personaKey: string, + opts: { + count: number + lengthMin?: number + lengthMax?: number + rng?: () => number + bypassCache?: boolean + }, + ): Promise { + const def = getPersonaDefinition(personaKey) + if (!def.usesExemplars) return [] + return this.exemplarSelector.pickExemplars(personaKey, opts) + } + + private async assertDistillModelConfigured(): Promise { + const aiConfig = await this.configsService.get('ai') + const personaModel = aiConfig?.personaDistillModel + const echoModel = aiConfig?.echoModel + const hasProviders = !!aiConfig?.providers?.some((p) => p.enabled) + if (!hasProviders || (!personaModel && !echoModel)) { + throw createAppException( + AppErrorCode.AI_PERSONA_DISTILL_MODEL_NOT_CONFIGURED, + ) + } + } +} diff --git a/apps/core/src/modules/ai/ai-persona/ai-persona.types.ts b/apps/core/src/modules/ai/ai-persona/ai-persona.types.ts new file mode 100644 index 00000000000..02ae3502dba --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/ai-persona.types.ts @@ -0,0 +1,56 @@ +import type { EntityId } from '~/shared/id/entity-id' + +export type PersonaKey = 'inner-self' | 'passerby' | (string & {}) + +export interface PersonaDefinition { + key: PersonaKey + displayName: string + description: string + needsProfile: boolean + needsRetrieval: boolean + usesExemplars: boolean + staticPrompt: string +} + +export interface PersonaProfile { + id: EntityId + personaKey: string + profile: string + profileSummary: string | null + corpusVersion: number + distillModel: string + refreshedAt: Date + autoNextAt: Date | null + metadata: Record + createdAt: Date + updatedAt: Date +} + +export interface ExemplarPassage { + sourceType: 'note' | 'page' + sourceId: string + content: string + createdAt: Date +} + +export interface PersonaDefinitionWithStatus extends PersonaDefinition { + hasProfile: boolean +} + +export interface ParsedDistillOutput { + profile: string + profileSummary: string | null + metadata: { + toneTags: string[] + recurringThemes: string[] + signaturePhrases: string[] + } +} + +export interface CorpusSample { + sourceType: 'post' | 'note' | 'page' + sourceId: string + title: string | null + createdAt: Date + body: string +} diff --git a/apps/core/src/modules/ai/ai-persona/exemplar-selector.ts b/apps/core/src/modules/ai/ai-persona/exemplar-selector.ts new file mode 100644 index 00000000000..9fc79e2c2a6 --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/exemplar-selector.ts @@ -0,0 +1,256 @@ +import { Inject, Injectable, Logger } from '@nestjs/common' +import { and, desc, eq, isNotNull } from 'drizzle-orm' + +import { PG_DB_TOKEN } from '~/constants/system.constant' +import { notes, pages } from '~/database/schema' +import type { AppDatabase } from '~/processors/database/postgres.provider' +import { RedisService } from '~/processors/redis/redis.service' + +import { ConfigsService } from '../../configs/configs.service' +import { + PERSONA_DEFAULTS, + PERSONA_EXEMPLAR_CANDIDATES_CACHE_KEY_PREFIX, +} from './ai-persona.constants' +import type { ExemplarPassage } from './ai-persona.types' + +interface CandidatePassage extends ExemplarPassage { + recencyWeight: number +} + +interface RawCandidate { + sourceType: 'note' | 'page' + sourceId: string + content: string + createdAt: Date +} + +const PARAGRAPH_SPLIT_RE = /\n{2,}/ + +@Injectable() +export class ExemplarSelector { + private readonly logger = new Logger(ExemplarSelector.name) + + constructor( + @Inject(PG_DB_TOKEN) private readonly db: AppDatabase, + private readonly redisService: RedisService, + private readonly configsService: ConfigsService, + ) {} + + async pickExemplars( + personaKey: string, + opts: { + count: number + lengthMin?: number + lengthMax?: number + rng?: () => number + bypassCache?: boolean + }, + ): Promise { + if (opts.count <= 0) return [] + + const personaCfg = await this.getPersonaConfig() + const lengthMin = opts.lengthMin ?? personaCfg.lengthMin + const lengthMax = opts.lengthMax ?? personaCfg.lengthMax + const cacheTtl = personaCfg.cacheTtl + + const candidates = await this.loadCandidates({ + personaKey, + lengthMin, + lengthMax, + cacheTtl, + bypassCache: opts.bypassCache ?? false, + }) + + if (!candidates.length) return [] + + const rng = opts.rng ?? Math.random + return this.weightedRandomPick(candidates, opts.count, rng) + } + + private async getPersonaConfig(): Promise<{ + lengthMin: number + lengthMax: number + cacheTtl: number + }> { + try { + const aiCfg = await this.configsService.get('ai') + const personaCfg = aiCfg?.aiPersona + return { + lengthMin: + personaCfg?.exemplarsLengthMin ?? PERSONA_DEFAULTS.exemplarsLengthMin, + lengthMax: + personaCfg?.exemplarsLengthMax ?? PERSONA_DEFAULTS.exemplarsLengthMax, + cacheTtl: + personaCfg?.exemplarsCandidateCacheTtlSec ?? + PERSONA_DEFAULTS.exemplarsCandidateCacheTtlSec, + } + } catch { + return { + lengthMin: PERSONA_DEFAULTS.exemplarsLengthMin, + lengthMax: PERSONA_DEFAULTS.exemplarsLengthMax, + cacheTtl: PERSONA_DEFAULTS.exemplarsCandidateCacheTtlSec, + } + } + } + + private async loadCandidates(input: { + personaKey: string + lengthMin: number + lengthMax: number + cacheTtl: number + bypassCache: boolean + }): Promise { + const cacheKey = `${PERSONA_EXEMPLAR_CANDIDATES_CACHE_KEY_PREFIX}${input.personaKey}:${input.lengthMin}:${input.lengthMax}` + const redis = this.redisService.getClient() + + if (!input.bypassCache) { + try { + const cached = await redis.get(cacheKey) + if (cached) { + const parsed = JSON.parse(cached) as Array< + Omit & { createdAt: string } + > + return parsed.map((p) => ({ + ...p, + createdAt: new Date(p.createdAt), + })) + } + } catch (error) { + this.logger.warn( + `Exemplar cache read failed: ${(error as Error).message}`, + ) + } + } + + const raws = await this.loadRawPassages() + const candidates = this.toCandidates(raws, input.lengthMin, input.lengthMax) + + if (candidates.length) { + try { + await redis.set( + cacheKey, + JSON.stringify( + candidates.map((c) => ({ + ...c, + createdAt: c.createdAt.toISOString(), + })), + ), + 'EX', + input.cacheTtl, + ) + } catch (error) { + this.logger.warn( + `Exemplar cache write failed: ${(error as Error).message}`, + ) + } + } + + return candidates + } + + private async loadRawPassages(): Promise { + const noteRows = await this.db + .select({ + sourceId: notes.id, + content: notes.text, + createdAt: notes.createdAt, + }) + .from(notes) + .where(and(eq(notes.isPublished, true), isNotNull(notes.text))!) + .orderBy(desc(notes.createdAt)) + .limit(500) + + const pageRows = await this.db + .select({ + sourceId: pages.id, + content: pages.text, + createdAt: pages.createdAt, + }) + .from(pages) + .where(isNotNull(pages.text)) + .orderBy(desc(pages.createdAt)) + .limit(200) + + const out: RawCandidate[] = [] + for (const row of noteRows) { + if (!row.content) continue + out.push({ + sourceType: 'note', + sourceId: String(row.sourceId), + content: row.content, + createdAt: row.createdAt, + }) + } + for (const row of pageRows) { + if (!row.content) continue + out.push({ + sourceType: 'page', + sourceId: String(row.sourceId), + content: row.content, + createdAt: row.createdAt, + }) + } + return out + } + + private toCandidates( + raws: RawCandidate[], + lengthMin: number, + lengthMax: number, + ): CandidatePassage[] { + const now = Date.now() + const halfLifeMs = + PERSONA_DEFAULTS.recencyHalfLifeDays * 24 * 60 * 60 * 1000 + const out: CandidatePassage[] = [] + for (const raw of raws) { + const paragraphs = raw.content + .split(PARAGRAPH_SPLIT_RE) + .map((p) => p.trim()) + .filter((p) => p.length >= lengthMin && p.length <= lengthMax) + const ageMs = Math.max(0, now - raw.createdAt.getTime()) + const recencyWeight = Math.pow(0.5, ageMs / halfLifeMs) + for (const paragraph of paragraphs) { + out.push({ + sourceType: raw.sourceType, + sourceId: raw.sourceId, + content: paragraph, + createdAt: raw.createdAt, + recencyWeight, + }) + if (out.length >= PERSONA_DEFAULTS.exemplarsCandidatesMax) { + return out + } + } + } + return out + } + + private weightedRandomPick( + candidates: CandidatePassage[], + count: number, + rng: () => number, + ): ExemplarPassage[] { + const pool = candidates.slice() + const picked: ExemplarPassage[] = [] + const want = Math.min(count, pool.length) + for (let i = 0; i < want; i++) { + const totalWeight = pool.reduce((acc, c) => acc + c.recencyWeight, 0) + if (totalWeight <= 0) break + let target = rng() * totalWeight + let idx = 0 + for (; idx < pool.length; idx++) { + target -= pool[idx].recencyWeight + if (target <= 0) break + } + if (idx >= pool.length) idx = pool.length - 1 + const chosen = pool.splice(idx, 1)[0] + picked.push({ + sourceType: chosen.sourceType, + sourceId: chosen.sourceId, + content: chosen.content, + createdAt: chosen.createdAt, + }) + } + return picked + } +} diff --git a/apps/core/src/modules/ai/ai-persona/persona-registry.ts b/apps/core/src/modules/ai/ai-persona/persona-registry.ts new file mode 100644 index 00000000000..af0e593299b --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/persona-registry.ts @@ -0,0 +1,48 @@ +import { AppErrorCode, createAppException } from '~/common/errors' + +import type { PersonaDefinition, PersonaKey } from './ai-persona.types' +import { AI_PERSONA_PROMPTS } from './prompts' + +export const PERSONA_REGISTRY: Record = { + 'inner-self': { + key: 'inner-self', + displayName: 'Inner Self', + description: + "The author's alternate voice — distilled from their own writing.", + needsProfile: true, + needsRetrieval: true, + usesExemplars: true, + staticPrompt: AI_PERSONA_PROMPTS.innerSelf, + }, + passerby: { + key: 'passerby', + displayName: 'Passerby', + description: 'A visiting stranger; brief, fresh-eyed reactions.', + needsProfile: false, + needsRetrieval: false, + usesExemplars: false, + staticPrompt: AI_PERSONA_PROMPTS.passerby, + }, +} + +export function listPersonas(): PersonaDefinition[] { + return Object.values(PERSONA_REGISTRY) +} + +export function getPersonaDefinition(key: string): PersonaDefinition { + const definition = PERSONA_REGISTRY[key] + if (!definition) { + throw createAppException(AppErrorCode.AI_PERSONA_NOT_FOUND, { key }) + } + return definition +} + +export function tryGetPersonaDefinition( + key: string, +): PersonaDefinition | undefined { + return PERSONA_REGISTRY[key] +} + +export function isKnownPersonaKey(key: string): key is PersonaKey { + return key in PERSONA_REGISTRY +} diff --git a/apps/core/src/modules/ai/ai-persona/prompts.ts b/apps/core/src/modules/ai/ai-persona/prompts.ts new file mode 100644 index 00000000000..162d390bf6b --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/prompts.ts @@ -0,0 +1,16 @@ +export const AI_PERSONA_PROMPTS = { + innerSelf: [ + "You are the author's inner-self echo — an alternate voice distilled from the author's own writing.", + 'Speak with the same cadence, vocabulary, and value tendencies as the author. Keep it short (1–3 sentences) and honest.', + 'You may receive a distilled profile, a retrieval section, recalled memories, and exemplar passages.', + "Use those signals to ground your reply in the author's actual style and concerns.", + 'When no retrieval section is present, never claim "I remember" — speak only from voice and general sensibility.', + 'Do not greet, do not introduce yourself, do not moralize.', + ].join(' '), + passerby: [ + 'You are a passerby — a stranger pausing briefly at this wishing well.', + 'React with fresh eyes, in 1–2 sentences. Tone: warm, curious, slightly distant.', + 'Do not claim to know the author. Do not give advice. Do not greet.', + 'Stay light; this is a glance, not a conversation.', + ].join(' '), +} as const diff --git a/apps/core/src/modules/ai/ai-persona/tasks/persona-distill.processor.ts b/apps/core/src/modules/ai/ai-persona/tasks/persona-distill.processor.ts new file mode 100644 index 00000000000..8ac670e74be --- /dev/null +++ b/apps/core/src/modules/ai/ai-persona/tasks/persona-distill.processor.ts @@ -0,0 +1,387 @@ +import { Inject, Injectable, Logger, type OnModuleInit } from '@nestjs/common' +import { and, desc, eq, isNotNull } from 'drizzle-orm' + +import { AppErrorCode, createAppException } from '~/common/errors' +import { BusinessEvents } from '~/constants/business-event.constant' +import { PG_DB_TOKEN } from '~/constants/system.constant' +import { notes, pages, posts } from '~/database/schema' +import type { AppDatabase } from '~/processors/database/postgres.provider' +import { EventManagerService } from '~/processors/helper/helper.event.service' +import { RedisService } from '~/processors/redis/redis.service' +import { + type TaskExecuteContext, + TaskQueueProcessor, + TaskStatus, +} from '~/processors/task-queue' + +import { ConfigsService } from '../../../configs/configs.service' +import { AiService } from '../../ai.service' +import { + AITaskType, + type PersonaDistillTaskPayload, +} from '../../ai-task/ai-task.types' +import { + PERSONA_DEFAULTS, + PERSONA_DISTILL_LOCK_KEY_PREFIX, + PERSONA_DISTILL_LOCK_TTL_SEC, +} from '../ai-persona.constants' +import { PersonaProfileRepository } from '../ai-persona.repository' +import { + type DistillOutputInput, + DistillOutputSchema, +} from '../ai-persona.schema' +import type { CorpusSample, ParsedDistillOutput } from '../ai-persona.types' +import { getPersonaDefinition, isKnownPersonaKey } from '../persona-registry' + +const STRIP_FENCE_RE = /^```(?:json)?\n?(.*?)\n?```$/s + +@Injectable() +export class PersonaDistillProcessor implements OnModuleInit { + private readonly logger = new Logger(PersonaDistillProcessor.name) + + constructor( + @Inject(PG_DB_TOKEN) private readonly db: AppDatabase, + private readonly taskProcessor: TaskQueueProcessor, + private readonly aiService: AiService, + private readonly configsService: ConfigsService, + private readonly profileRepo: PersonaProfileRepository, + private readonly redisService: RedisService, + private readonly eventManager: EventManagerService, + ) {} + + onModuleInit() { + this.taskProcessor.registerHandler({ + type: AITaskType.PersonaDistill, + execute: async (payload, context) => this.handle(payload, context), + }) + } + + private async handle( + payload: PersonaDistillTaskPayload, + context: TaskExecuteContext, + ): Promise { + const { personaKey } = payload + if (!isKnownPersonaKey(personaKey)) { + throw createAppException(AppErrorCode.AI_PERSONA_NOT_FOUND, { + key: personaKey, + }) + } + const definition = getPersonaDefinition(personaKey) + if (!definition.needsProfile) { + throw createAppException(AppErrorCode.AI_PERSONA_NOT_DISTILLABLE, { + key: personaKey, + }) + } + + const redis = this.redisService.getClient() + const lockKey = `${PERSONA_DISTILL_LOCK_KEY_PREFIX}${personaKey}` + const acquired = await redis.set( + lockKey, + '1', + 'EX', + PERSONA_DISTILL_LOCK_TTL_SEC, + 'NX', + ) + + if (!acquired) { + await context.appendLog( + 'warn', + `Persona "${personaKey}" distill lock held; skipping`, + ) + context.setStatus(TaskStatus.Cancelled) + return + } + + try { + await context.appendLog('info', `Persona distill started: ${personaKey}`) + await context.updateProgress(5, 'Sampling corpus') + + const aiConfig = await this.configsService.get('ai') + const maxTokens = + aiConfig?.aiPersona?.distillSampleMaxTokens ?? + PERSONA_DEFAULTS.distillSampleMaxTokens + + const corpus = await this.sampleCorpus({ maxTokens }) + const corpusVersion = corpus.length + if (!corpus.length) { + await context.appendLog( + 'warn', + 'Corpus is empty; aborting persona distill', + ) + context.setStatus(TaskStatus.Failed) + return + } + + await context.updateProgress(30, 'Calling distill model') + + const runtime = await this.aiService.getPersonaDistillModel() + const messages = this.buildDistillPrompt(corpus) + const result = await runtime.generateText({ + messages, + temperature: 0.4, + maxRetries: 2, + }) + await context.incrementTokens(result.usage?.totalTokens ?? 0) + + await context.updateProgress(80, 'Persisting profile') + const parsed = this.parseDistillOutput(result.text) + + const row = await this.profileRepo.upsert({ + personaKey, + profile: parsed.profile, + profileSummary: parsed.profileSummary, + corpusVersion, + distillModel: runtime.providerInfo.model, + refreshedAt: new Date(), + metadata: { + toneTags: parsed.metadata.toneTags, + recurringThemes: parsed.metadata.recurringThemes, + signaturePhrases: parsed.metadata.signaturePhrases, + }, + }) + + await context.setResult({ + personaKey, + profileId: row.id, + corpusVersion, + refreshedAt: row.refreshedAt.toISOString(), + }) + + await this.eventManager.emit(BusinessEvents.PERSONA_PROFILE_REFRESHED, { + personaKey, + profileId: row.id, + refreshedAt: row.refreshedAt.toISOString(), + corpusVersion, + }) + } catch (error) { + this.logger.error( + `Persona distill failed: ${(error as Error).message}`, + (error as Error).stack, + ) + throw error + } finally { + try { + await redis.del(lockKey) + } catch (error) { + this.logger.warn( + `Failed to release persona distill lock: ${(error as Error).message}`, + ) + } + } + } + + async sampleCorpus(opts: { + maxTokens: number + rng?: () => number + }): Promise { + const rng = opts.rng ?? Math.random + const charBudget = opts.maxTokens * PERSONA_DEFAULTS.charsPerToken + const overshoot = charBudget * 1.1 + + const quota = PERSONA_DEFAULTS.perTypeQuota + const perType: Record<'post' | 'note' | 'page', number> = { + post: Math.floor(overshoot * quota.post), + note: Math.floor(overshoot * quota.note), + page: Math.floor(overshoot * quota.page), + } + + const [postRows, noteRows, pageRows] = await Promise.all([ + this.db + .select({ + id: posts.id, + title: posts.title, + createdAt: posts.createdAt, + text: posts.text, + }) + .from(posts) + .where(and(eq(posts.isPublished, true), isNotNull(posts.text))!) + .orderBy(desc(posts.createdAt)) + .limit(500), + this.db + .select({ + id: notes.id, + title: notes.title, + createdAt: notes.createdAt, + text: notes.text, + }) + .from(notes) + .where(and(eq(notes.isPublished, true), isNotNull(notes.text))!) + .orderBy(desc(notes.createdAt)) + .limit(500), + this.db + .select({ + id: pages.id, + title: pages.title, + createdAt: pages.createdAt, + text: pages.text, + }) + .from(pages) + .where(isNotNull(pages.text)) + .orderBy(desc(pages.createdAt)) + .limit(200), + ]) + + const samples: CorpusSample[] = [] + const collectType = ( + type: 'post' | 'note' | 'page', + rows: Array<{ + id: string + title: string | null + createdAt: Date + text: string | null + }>, + ) => { + const budget = perType[type] + const pool = rows.slice() + const weighted = this.recencyWeighted(pool, rng) + let used = 0 + for (const row of weighted) { + if (!row.text) continue + const len = row.text.length + if (used + len > budget && used > 0) break + samples.push({ + sourceType: type, + sourceId: String(row.id), + title: row.title ?? null, + createdAt: row.createdAt, + body: row.text, + }) + used += len + } + } + + collectType('post', postRows) + collectType('note', noteRows) + collectType('page', pageRows) + + samples.sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime()) + return samples + } + + private recencyWeighted( + items: T[], + rng: () => number, + ): T[] { + if (!items.length) return items + const now = Date.now() + const halfLifeMs = + PERSONA_DEFAULTS.recencyHalfLifeDays * 24 * 60 * 60 * 1000 + const pool = items.slice() + const out: T[] = [] + while (pool.length) { + const weights = pool.map((item) => { + const age = Math.max(0, now - item.createdAt.getTime()) + return Math.pow(0.5, age / halfLifeMs) + }) + const total = weights.reduce((a, b) => a + b, 0) + if (total <= 0) { + out.push(...pool) + break + } + let target = rng() * total + let idx = 0 + for (; idx < weights.length; idx++) { + target -= weights[idx] + if (target <= 0) break + } + if (idx >= pool.length) idx = pool.length - 1 + out.push(pool.splice(idx, 1)[0]) + } + return out + } + + buildDistillPrompt(corpus: CorpusSample[]): Array<{ + role: 'system' | 'user' + content: string + }> { + const system = [ + 'You are profiling a single author from their own writing.', + 'Read the passages below and produce a JSON object with three fields:', + '', + '- "profile": a description (200–600 words) covering the author\'s voice,', + ' cadence, vocabulary, recurring themes, value tendencies, signature', + ' phrases. Write in second person ("the author tends to…"). Be specific', + ' and citable, not generic.', + '', + '- "profile_summary": a 60–120 word condensation suitable for embedding', + ' into another prompt.', + '', + '- "metadata": {', + ' "tone_tags": [string],', + ' "recurring_themes": [string],', + ' "signature_phrases": [string]', + ' }', + '', + 'Reply with raw JSON, no markdown fences.', + ].join('\n') + + const userBody: string[] = ['Passages (oldest first):', ''] + for (const sample of corpus) { + const dateStr = sample.createdAt.toISOString().slice(0, 10) + const head = sample.title + ? `[${sample.sourceType}:${sample.sourceId} — ${dateStr} — ${sample.title}]` + : `[${sample.sourceType}:${sample.sourceId} — ${dateStr}]` + userBody.push(head, sample.body, '') + } + + return [ + { role: 'system', content: system }, + { role: 'user', content: userBody.join('\n') }, + ] + } + + parseDistillOutput(raw: string): ParsedDistillOutput { + const trimmed = (raw ?? '').trim() + if (!trimmed) { + throw new Error('Empty distill output') + } + + const candidate = this.stripFences(trimmed) + let parsedJson: unknown + try { + parsedJson = JSON.parse(candidate) + } catch { + this.logger.warn('Distill output is not valid JSON; using text fallback') + return this.textFallback(trimmed) + } + + const validated = DistillOutputSchema.safeParse(parsedJson) + if (!validated.success) { + this.logger.warn( + `Distill output JSON failed validation: ${validated.error.message}`, + ) + return this.textFallback(trimmed) + } + return this.toParsed(validated.data) + } + + private stripFences(text: string): string { + const match = STRIP_FENCE_RE.exec(text) + return match ? match[1].trim() : text + } + + private toParsed(input: DistillOutputInput): ParsedDistillOutput { + const md = input.metadata ?? {} + return { + profile: input.profile.trim(), + profileSummary: input.profile_summary?.trim() ?? null, + metadata: { + toneTags: md.tone_tags ?? [], + recurringThemes: md.recurring_themes ?? [], + signaturePhrases: md.signature_phrases ?? [], + }, + } + } + + private textFallback(raw: string): ParsedDistillOutput { + return { + profile: raw.slice(0, 4000), + profileSummary: null, + metadata: { + toneTags: [], + recurringThemes: [], + signaturePhrases: [], + }, + } + } +} diff --git a/apps/core/src/modules/ai/ai-task/ai-task.service.ts b/apps/core/src/modules/ai/ai-task/ai-task.service.ts index 7b764a31607..6725cd38146 100644 --- a/apps/core/src/modules/ai/ai-task/ai-task.service.ts +++ b/apps/core/src/modules/ai/ai-task/ai-task.service.ts @@ -9,6 +9,7 @@ import { type AITaskPayload, AITaskType, computeAITaskDedupKey, + type EmbedSyncTaskPayload, type InsightsTaskPayload, type InsightsTranslationTaskPayload, type SlugBackfillTaskPayload, @@ -75,6 +76,12 @@ export class AiTaskService { return this.createTask(AITaskType.InsightsTranslation, payload) } + async createEmbedSyncTask( + payload: EmbedSyncTaskPayload, + ): Promise<{ taskId: string; created: boolean }> { + return this.createTask(AITaskType.EmbedSync, payload) + } + async retryTaskWithFailedOnly( taskId: string, ): Promise<{ taskId: string; created: boolean }> { diff --git a/apps/core/src/modules/ai/ai-task/ai-task.types.ts b/apps/core/src/modules/ai/ai-task/ai-task.types.ts index 20c5614ec8d..26f176c343c 100644 --- a/apps/core/src/modules/ai/ai-task/ai-task.types.ts +++ b/apps/core/src/modules/ai/ai-task/ai-task.types.ts @@ -6,6 +6,11 @@ export enum AITaskType { SlugBackfill = 'ai:slug:backfill', Insights = 'ai:insights', InsightsTranslation = 'ai:insights:translation', + EmbedSync = 'ai:embed:sync', + EmbedBackfill = 'ai:embed:backfill', + PersonaDistill = 'ai:persona:distill', + MemoryEmbed = 'ai:memory:embed', + EchoGenerate = 'ai:echo:generate', } export interface SummaryTaskPayload { @@ -56,6 +61,29 @@ export interface InsightsTranslationTaskPayload { refType?: string } +export interface EmbedSyncTaskPayload { + sourceType: string + sourceId: string + op: 'upsert' | 'delete' +} + +export interface EmbedBackfillTaskPayload { + sourceTypes?: string[] + batchSize?: number +} + +export interface PersonaDistillTaskPayload { + personaKey: string +} + +export interface MemoryEmbedTaskPayload { + memoryId: string +} + +export interface EchoGenerateTaskPayload { + echoId: string +} + export type AITaskPayload = | SummaryTaskPayload | TranslationTaskPayload @@ -64,6 +92,11 @@ export type AITaskPayload = | SlugBackfillTaskPayload | InsightsTaskPayload | InsightsTranslationTaskPayload + | EmbedSyncTaskPayload + | EmbedBackfillTaskPayload + | PersonaDistillTaskPayload + | MemoryEmbedTaskPayload + | EchoGenerateTaskPayload export function computeAITaskDedupKey( type: AITaskType, @@ -101,5 +134,25 @@ export function computeAITaskDedupKey( const p = payload as InsightsTranslationTaskPayload return `${p.refId}:${p.targetLang}` } + case AITaskType.EmbedSync: { + const p = payload as EmbedSyncTaskPayload + return `embed:${p.sourceType}:${p.sourceId}:${p.op}` + } + case AITaskType.EmbedBackfill: { + const p = payload as EmbedBackfillTaskPayload + return `embed:backfill:${(p.sourceTypes || []).slice().sort().join(',')}` + } + case AITaskType.PersonaDistill: { + const p = payload as PersonaDistillTaskPayload + return `persona:distill:${p.personaKey}` + } + case AITaskType.MemoryEmbed: { + const p = payload as MemoryEmbedTaskPayload + return `memory:embed:${p.memoryId}` + } + case AITaskType.EchoGenerate: { + const p = payload as EchoGenerateTaskPayload + return `echo:generate:${p.echoId}` + } } } diff --git a/apps/core/src/modules/ai/ai.controller.ts b/apps/core/src/modules/ai/ai.controller.ts index 1d130ed91ce..bb5b45e490f 100644 --- a/apps/core/src/modules/ai/ai.controller.ts +++ b/apps/core/src/modules/ai/ai.controller.ts @@ -24,6 +24,7 @@ interface ProviderModelsResponse { providerName: string providerType: AIProviderType models: ModelInfo[] + embeddingModels?: ModelInfo[] error?: string } @@ -66,7 +67,10 @@ export class AiController { try { const runtime = createModelRuntime(provider) const models = await this.fetchModelsFromRuntime(runtime) + const embeddingModels = + await this.fetchEmbeddingModelsFromRuntime(runtime) results.push({ + embeddingModels, providerId: provider.id, providerName: provider.name, providerType: provider.type, @@ -77,6 +81,7 @@ export class AiController { providerId: provider.id, providerName: provider.name, providerType: provider.type, + embeddingModels: [], models: [], error: error.message || 'Unknown error', }) @@ -293,7 +298,10 @@ export class AiController { try { const runtime = createModelRuntime(provider) const models = await this.fetchModelsFromRuntime(runtime) + const embeddingModels = + await this.fetchEmbeddingModelsFromRuntime(runtime) return { + embeddingModels, providerId: provider.id, providerName: provider.name, providerType: provider.type, @@ -304,6 +312,7 @@ export class AiController { providerId: provider.id, providerName: provider.name, providerType: provider.type, + embeddingModels: [], models: [], error: error.message || 'Unknown error', } @@ -318,4 +327,13 @@ export class AiController { } return runtime.listModels() } + + private async fetchEmbeddingModelsFromRuntime( + runtime: IModelRuntime, + ): Promise { + if (!runtime.listEmbeddingModels) { + return [] + } + return runtime.listEmbeddingModels() + } } diff --git a/apps/core/src/modules/ai/ai.module.ts b/apps/core/src/modules/ai/ai.module.ts index 7d961b92644..138d7305035 100644 --- a/apps/core/src/modules/ai/ai.module.ts +++ b/apps/core/src/modules/ai/ai.module.ts @@ -8,11 +8,15 @@ import { AiAgentController } from './ai-agent/ai-agent.controller' import { AiAgentChatService } from './ai-agent/ai-agent-chat.service' import { AiAgentConversationRepository } from './ai-agent/ai-agent-conversation.repository' import { AiAgentConversationService } from './ai-agent/ai-agent-conversation.service' +import { AiEchoModule } from './ai-echo/ai-echo.module' +import { AiEmbeddingsModule } from './ai-embeddings/ai-embeddings.module' import { AiInFlightService } from './ai-inflight/ai-inflight.service' import { AiInsightsController } from './ai-insights/ai-insights.controller' import { AiInsightsRepository } from './ai-insights/ai-insights.repository' import { AiInsightsService } from './ai-insights/ai-insights.service' import { AiInsightsTranslationService } from './ai-insights/ai-insights-translation.service' +import { AiMemoryModule } from './ai-memory/ai-memory.module' +import { AiPersonaModule } from './ai-persona/ai-persona.module' import { AiSummaryController } from './ai-summary/ai-summary.controller' import { AiSummaryRepository } from './ai-summary/ai-summary.repository' import { AiSummaryService } from './ai-summary/ai-summary.service' @@ -40,7 +44,15 @@ import { AiWriterController } from './ai-writer/ai-writer.controller' import { AiWriterService } from './ai-writer/ai-writer.service' @Module({ - imports: [AiTaskModule, TopicModule, forwardRef(() => NoteModule)], + imports: [ + AiTaskModule, + TopicModule, + forwardRef(() => NoteModule), + AiEmbeddingsModule, + AiPersonaModule, + AiMemoryModule, + AiEchoModule, + ], providers: [ AiSummaryService, AiSummaryRepository, diff --git a/apps/core/src/modules/ai/ai.service.ts b/apps/core/src/modules/ai/ai.service.ts index b3bfc8e08cb..b8c0189044e 100644 --- a/apps/core/src/modules/ai/ai.service.ts +++ b/apps/core/src/modules/ai/ai.service.ts @@ -70,6 +70,38 @@ export class AiService { return this.getModelForFeature(AIFeatureKey.InsightsTranslation) } + public async getEchoModel(): Promise { + return this.getModelForFeature(AIFeatureKey.Echo) + } + + public async getEmbeddingModel(): Promise { + const aiConfig = await this.configService.get('ai') + const assignment = this.getAssignment(aiConfig, AIFeatureKey.Embedding) + const provider = this.resolveAssignedProvider(aiConfig, assignment) + if (!provider) { + throw createAppException(AppErrorCode.AI_EMBEDDING_MODEL_NOT_CONFIGURED) + } + return createModelRuntime(provider, assignment?.model) + } + + public async getPersonaDistillModel(): Promise { + const aiConfig = await this.configService.get('ai') + const assignment = this.getAssignment(aiConfig, AIFeatureKey.PersonaDistill) + if (!assignment) { + return this.getEchoModel() + } + return this.getModelForFeature(AIFeatureKey.PersonaDistill) + } + + public async hasFeatureModel(feature: AIFeatureKey): Promise { + const aiConfig = await this.configService.get('ai') + const assignment = this.getAssignment(aiConfig, feature) + if (feature === AIFeatureKey.Embedding) { + return Boolean(this.resolveAssignedProvider(aiConfig, assignment)) + } + return Boolean(this.resolveProvider(aiConfig, assignment?.providerId)) + } + private async resolveFeatureRuntime(feature: AIFeatureKey): Promise<{ runtime: IModelRuntime provider: AIProviderConfig @@ -126,6 +158,9 @@ export class AiService { [AIFeatureKey.TranslationReview]: 'translationReviewModel', [AIFeatureKey.Insights]: 'insightsModel', [AIFeatureKey.InsightsTranslation]: 'insightsTranslationModel', + [AIFeatureKey.Echo]: 'echoModel', + [AIFeatureKey.Embedding]: 'embeddingModel', + [AIFeatureKey.PersonaDistill]: 'personaDistillModel', } return config[featureToConfigKey[feature]] as AIModelAssignment | undefined } @@ -149,4 +184,18 @@ export class AiService { // Fallback to first enabled provider return config.providers.find((p) => p.enabled) || null } + + private resolveAssignedProvider( + config: AIConfig, + assignment?: AIModelAssignment, + ): AIProviderConfig | null { + if (!assignment?.providerId || !config.providers?.length) { + return null + } + return ( + config.providers.find( + (provider) => provider.id === assignment.providerId && provider.enabled, + ) || null + ) + } } diff --git a/apps/core/src/modules/ai/ai.types.ts b/apps/core/src/modules/ai/ai.types.ts index 019a13277c2..12de1933347 100644 --- a/apps/core/src/modules/ai/ai.types.ts +++ b/apps/core/src/modules/ai/ai.types.ts @@ -13,6 +13,9 @@ export enum AIFeatureKey { TranslationReview = 'translationReview', Insights = 'insights', InsightsTranslation = 'insightsTranslation', + Echo = 'echo', + Embedding = 'embedding', + PersonaDistill = 'personaDistill', } export interface AIProviderConfig { diff --git a/apps/core/src/modules/ai/runtime/model-runtime.interface.ts b/apps/core/src/modules/ai/runtime/model-runtime.interface.ts index 2577a250f20..ba16b383f18 100644 --- a/apps/core/src/modules/ai/runtime/model-runtime.interface.ts +++ b/apps/core/src/modules/ai/runtime/model-runtime.interface.ts @@ -1,5 +1,8 @@ import type { z } from 'zod' + import type { + EmbedBatchOptions, + EmbedBatchResult, GenerateStructuredOptions, GenerateStructuredResult, GenerateTextOptions, @@ -22,5 +25,9 @@ export interface IModelRuntime { options: GenerateStructuredOptions, ) => Promise>> + embedBatch?: (options: EmbedBatchOptions) => Promise + listModels?: () => Promise + listChatModels?: () => Promise + listEmbeddingModels?: () => Promise } diff --git a/apps/core/src/modules/ai/runtime/openai-compatible.runtime.ts b/apps/core/src/modules/ai/runtime/openai-compatible.runtime.ts index 198f46290c5..106ac616046 100644 --- a/apps/core/src/modules/ai/runtime/openai-compatible.runtime.ts +++ b/apps/core/src/modules/ai/runtime/openai-compatible.runtime.ts @@ -7,6 +7,8 @@ import { AIProviderType } from '../ai.types' import { buildAiSdkDefaultHeaders } from './ai-sdk-attribution' import { BaseRuntime } from './base.runtime' import type { + EmbedBatchOptions, + EmbedBatchResult, GenerateStructuredOptions, GenerateStructuredResult, GenerateTextOptions, @@ -18,6 +20,18 @@ import type { TextStreamChunk, } from './types' +const NON_CHAT_MODEL_TOKENS = [ + 'embedding', + 'whisper', + 'tts', + 'dall-e', + 'moderation', + 'davinci', + 'babbage', + 'ada', + 'curie', +] + export class OpenAICompatibleRuntime extends BaseRuntime { readonly providerInfo: RuntimeProviderInfo private readonly client: OpenAI @@ -293,6 +307,38 @@ export class OpenAICompatibleRuntime extends BaseRuntime { } async listModels(): Promise { + return this.listChatModels() + } + + async listChatModels(): Promise { + return this.listFilteredModels((id) => + NON_CHAT_MODEL_TOKENS.every((token) => !id.includes(token)), + ) + } + + async listEmbeddingModels(): Promise { + return this.listFilteredModels((id) => id.includes('embedding')) + } + + async embedBatch(options: EmbedBatchOptions): Promise { + if (options.inputs.length === 0) { + return { vectors: [], model: this.providerInfo.model, dim: 0 } + } + const response = await this.client.embeddings.create( + { + model: this.providerInfo.model, + input: options.inputs, + }, + { signal: options.signal }, + ) + const vectors = response.data.map((entry) => entry.embedding as number[]) + const dim = vectors[0]?.length ?? 0 + return { vectors, model: response.model ?? this.providerInfo.model, dim } + } + + private async listFilteredModels( + keep: (lowercasedId: string) => boolean, + ): Promise { const controller = new AbortController() const timeoutId = setTimeout(() => controller.abort(), 10000) @@ -306,20 +352,7 @@ export class OpenAICompatibleRuntime extends BaseRuntime { const models: ModelInfo[] = [] for await (const model of response) { const id = (model.id || '').toLowerCase() - // Filter out non-chat models - if ( - id.includes('embedding') || - id.includes('whisper') || - id.includes('tts') || - id.includes('dall-e') || - id.includes('moderation') || - id.includes('davinci') || - id.includes('babbage') || - id.includes('ada') || - id.includes('curie') - ) { - continue - } + if (!keep(id)) continue models.push({ id: model.id, name: model.id, @@ -327,7 +360,6 @@ export class OpenAICompatibleRuntime extends BaseRuntime { }) } - // Sort by creation time, newest first return models.sort((a, b) => (b.created || 0) - (a.created || 0)) } catch (error: any) { clearTimeout(timeoutId) diff --git a/apps/core/src/modules/ai/runtime/types.ts b/apps/core/src/modules/ai/runtime/types.ts index e484931f176..14531782b97 100644 --- a/apps/core/src/modules/ai/runtime/types.ts +++ b/apps/core/src/modules/ai/runtime/types.ts @@ -64,6 +64,17 @@ export interface ModelInfo { created?: number } +export interface EmbedBatchOptions { + inputs: string[] + signal?: AbortSignal +} + +export interface EmbedBatchResult { + vectors: number[][] + model: string + dim: number +} + export interface RuntimeConfig { apiKey: string endpoint?: string diff --git a/apps/core/src/modules/configs/configs.default.ts b/apps/core/src/modules/configs/configs.default.ts index 98a2b26681e..831a5e970b3 100644 --- a/apps/core/src/modules/configs/configs.default.ts +++ b/apps/core/src/modules/configs/configs.default.ts @@ -156,6 +156,32 @@ export const generateDefaultConfig: () => IConfig = () => ({ enableAutoTranslateInsights: false, insightsTargetLanguages: [], insightsMinTextLength: 300, + echoModel: undefined, + embeddingModel: undefined, + personaDistillModel: undefined, + enableEcho: false, + enableAutoGenerateEchoOnCreate: false, + echoDailyQuota: 200, + echoRetrievalTopK: 5, + echoRetrievalMinSimilarity: 0.72, + echoExemplarsCount: 4, + aiEmbedding: { + chunkMaxTokens: 500, + chunkOverlapTokens: 50, + backfillBatchSize: 50, + defaultMinSimilarity: 0.7, + defaultTopK: 5, + }, + aiPersona: { + distillSampleMaxTokens: 60_000, + exemplarsLengthMin: 200, + exemplarsLengthMax: 800, + exemplarsCandidateCacheTtlSec: 3600, + }, + aiMemory: { + recallTopK: 5, + recallMinSimilarity: 0.7, + }, }, oauth: { providers: [], diff --git a/apps/core/src/modules/configs/configs.schema.ts b/apps/core/src/modules/configs/configs.schema.ts index 8e60bccb01e..f0033be1788 100644 --- a/apps/core/src/modules/configs/configs.schema.ts +++ b/apps/core/src/modules/configs/configs.schema.ts @@ -847,6 +847,101 @@ export const AISchema = section('AI settings', { 'Skips automatic hooks (OnCreate/OnUpdate) when the body has fewer characters than this; only affects automatic triggers. 0 means no limit. Default 300', }, ), + echoModel: field.plain(AIModelAssignmentSchema.optional(), 'Echo model'), + embeddingModel: field.plain( + AIModelAssignmentSchema.optional(), + 'Embedding model', + ), + personaDistillModel: field.plain( + AIModelAssignmentSchema.optional(), + 'Persona distill model', + { description: 'Falls back to the echo model when empty' }, + ), + enableEcho: field.toggle(z.boolean().optional(), 'Allow AI echo', { + description: 'Master switch for AI echo generation', + }), + enableAutoGenerateEchoOnCreate: field.toggle( + z.boolean().optional(), + 'Auto-generate echo on recently create', + { description: 'Requires enableEcho to also be enabled' }, + ), + echoDailyQuota: field.number( + z.preprocess( + (val) => + val === '' || val === null || val === undefined ? val : Number(val), + z.number().int().min(0).optional(), + ), + 'Echo daily quota', + { + description: + 'Max echo generation calls per day; 0 means unlimited. Default 200', + }, + ), + echoRetrievalTopK: field.number( + z.preprocess( + (val) => + val === '' || val === null || val === undefined ? val : Number(val), + z.number().int().min(1).optional(), + ), + 'Echo retrieval top-K', + { description: 'Default 5' }, + ), + echoRetrievalMinSimilarity: field.number( + z.preprocess( + (val) => + val === '' || val === null || val === undefined ? val : Number(val), + z.number().min(0).max(1).optional(), + ), + 'Echo retrieval min similarity', + { + description: + 'Cosine similarity threshold; below this, no retrieval section is injected. Default 0.72', + }, + ), + echoExemplarsCount: field.number( + z.preprocess( + (val) => + val === '' || val === null || val === undefined ? val : Number(val), + z.number().int().min(0).optional(), + ), + 'Echo exemplars count', + { description: 'Default 4' }, + ), + aiEmbedding: field.plain( + z + .object({ + chunkMaxTokens: z.number().int().min(64).optional(), + chunkOverlapTokens: z.number().int().min(0).optional(), + backfillBatchSize: z.number().int().min(1).optional(), + defaultMinSimilarity: z.number().min(0).max(1).optional(), + defaultTopK: z.number().int().min(1).optional(), + }) + .optional(), + 'Embedding parameters', + ), + aiPersona: field.plain( + z + .object({ + distillSampleMaxTokens: z.number().int().min(1000).optional(), + exemplarsLengthMin: z.number().int().min(40).optional(), + exemplarsLengthMax: z.number().int().min(80).optional(), + exemplarsCandidateCacheTtlSec: z.number().int().min(60).optional(), + autoRefreshCron: z.string().optional(), + autoRefreshThreshold: z.number().int().min(1).optional(), + }) + .optional(), + 'Persona parameters', + ), + aiMemory: field.plain( + z + .object({ + recallTopK: z.number().int().min(1).optional(), + recallMinSimilarity: z.number().min(0).max(1).optional(), + nudgeIfReferencedBelow: z.number().int().min(0).optional(), + }) + .optional(), + 'Memory parameters', + ), }) export class AIDto extends createZodDto(AISchema) {} export type AIConfig = z.infer diff --git a/apps/core/src/modules/recently/recently.module.ts b/apps/core/src/modules/recently/recently.module.ts index 9181279680f..8f5021edf5f 100644 --- a/apps/core/src/modules/recently/recently.module.ts +++ b/apps/core/src/modules/recently/recently.module.ts @@ -1,15 +1,29 @@ import { forwardRef, Module } from '@nestjs/common' +import { AiEchoModule } from '../ai/ai-echo/ai-echo.module' import { CommentModule } from '../comment/comment.module' import { EnrichmentModule } from '../enrichment/enrichment.module' import { RecentlyController } from './recently.controller' import { RecentlyRepository } from './recently.repository' import { RecentlyService } from './recently.service' +import { + RecentlyEchoScenarioRegistrar, + RecentlyEchoSubjectDeleteListener, +} from './scenarios/recently-echo.scenario' @Module({ controllers: [RecentlyController], - providers: [RecentlyService, RecentlyRepository], + providers: [ + RecentlyService, + RecentlyRepository, + RecentlyEchoScenarioRegistrar, + RecentlyEchoSubjectDeleteListener, + ], exports: [RecentlyService, RecentlyRepository], - imports: [forwardRef(() => CommentModule), EnrichmentModule], + imports: [ + forwardRef(() => CommentModule), + EnrichmentModule, + forwardRef(() => AiEchoModule), + ], }) export class RecentlyModule {} diff --git a/apps/core/src/modules/recently/scenarios/recently-echo.scenario.ts b/apps/core/src/modules/recently/scenarios/recently-echo.scenario.ts new file mode 100644 index 00000000000..dcabe5892d5 --- /dev/null +++ b/apps/core/src/modules/recently/scenarios/recently-echo.scenario.ts @@ -0,0 +1,63 @@ +import { Injectable, Logger, type OnModuleInit } from '@nestjs/common' +import { OnEvent } from '@nestjs/event-emitter' + +import { BusinessEvents } from '~/constants/business-event.constant' + +import { AiEchoService } from '../../ai/ai-echo/ai-echo.service' +import { buildRecentlyEchoPrompt } from '../../ai/ai-echo/echo-prompt-builder' +import { EchoScenarioRegistry } from '../../ai/ai-echo/echo-scenario.registry' +import type { EchoScenario } from '../../ai/ai-echo/scenario.types' +import { RecentlyService } from '../recently.service' +import type { RecentlyRow } from '../recently.types' + +export const buildRecentlyEchoScenario = ( + recentlyService: RecentlyService, +): EchoScenario => ({ + key: 'recently', + triggerEvent: BusinessEvents.RECENTLY_CREATE, + defaultPersonas: ['inner-self', 'passerby'], + persistEchoes: true, + emitOnReady: BusinessEvents.RECENTLY_ECHO_LANDED, + async loadSubject(id: string) { + const row = await recentlyService.repository.findById(id) + return row ?? null + }, + extractRetrievalQuery(recently) { + return recently.content?.trim() || null + }, + buildPrompt(input) { + return buildRecentlyEchoPrompt(input) + }, +}) + +@Injectable() +export class RecentlyEchoScenarioRegistrar implements OnModuleInit { + constructor( + private readonly registry: EchoScenarioRegistry, + private readonly recentlyService: RecentlyService, + ) {} + + onModuleInit() { + this.registry.register(buildRecentlyEchoScenario(this.recentlyService)) + } +} + +@Injectable() +export class RecentlyEchoSubjectDeleteListener { + private readonly logger = new Logger(RecentlyEchoSubjectDeleteListener.name) + + constructor(private readonly aiEchoService: AiEchoService) {} + + @OnEvent(BusinessEvents.RECENTLY_DELETE) + async handleDelete(payload: { id?: string } | undefined) { + const id = payload?.id + if (!id) return + try { + await this.aiEchoService.handleSubjectDeleted('recently', id) + } catch (error) { + this.logger.warn( + `Failed to cascade recently delete to ai-echo: id=${id} error=${(error as Error).message}`, + ) + } + } +} diff --git a/apps/core/src/processors/database/repository.tokens.ts b/apps/core/src/processors/database/repository.tokens.ts index 80b5495ce74..a5de9b780b0 100644 --- a/apps/core/src/processors/database/repository.tokens.ts +++ b/apps/core/src/processors/database/repository.tokens.ts @@ -23,6 +23,10 @@ export const POSTGRES_REPOSITORY_TOKENS = { aiTranslation: Symbol('AiTranslationRepository'), translationEntry: Symbol('TranslationEntryRepository'), aiAgentConversation: Symbol('AiAgentConversationRepository'), + corpusEmbedding: Symbol('CorpusEmbeddingRepository'), + personaProfile: Symbol('PersonaProfileRepository'), + aiMemory: Symbol('AiMemoryRepository'), + aiEcho: Symbol('AiEchoRepository'), activity: Symbol('ActivityRepository'), analyze: Symbol('AnalyzeRepository'), fileReference: Symbol('FileReferenceRepository'), diff --git a/apps/core/test/helper/pg-testcontainer.ts b/apps/core/test/helper/pg-testcontainer.ts index 72254b5c792..aec6b4d0253 100644 --- a/apps/core/test/helper/pg-testcontainer.ts +++ b/apps/core/test/helper/pg-testcontainer.ts @@ -58,7 +58,7 @@ export async function startPgTestContainer(): Promise { return externalDatabase } - container = await new PostgreSqlContainer('postgres:17-alpine') + container = await new PostgreSqlContainer('pgvector/pgvector:pg17') .withDatabase('mx_verify') .withUsername('mx') .withPassword('mx') diff --git a/apps/core/test/mock/processors/ai-embedding.mock.ts b/apps/core/test/mock/processors/ai-embedding.mock.ts new file mode 100644 index 00000000000..474d81c6b3c --- /dev/null +++ b/apps/core/test/mock/processors/ai-embedding.mock.ts @@ -0,0 +1,42 @@ +import { createHash } from 'node:crypto' + +import type { + EmbedBatchOptions, + EmbedBatchResult, +} from '~/modules/ai/runtime/types' + +const MOCK_MODEL_ID = 'mock-embedding-model' +const MOCK_DIM = 8 + +export function deterministicEmbedding(text: string): number[] { + const digest = createHash('sha256').update(text).digest() + const vector: number[] = [] + for (let i = 0; i < MOCK_DIM; i++) { + const byte = digest[i] ?? 0 + vector.push((byte / 255) * 2 - 1) + } + const norm = Math.hypot(...vector) || 1 + return vector.map((value) => value / norm) +} + +export function createMockEmbeddingRuntime() { + return { + providerInfo: { + id: 'mock-embedding-provider', + type: 'openai-compatible' as const, + model: MOCK_MODEL_ID, + }, + async embedBatch({ inputs }: EmbedBatchOptions): Promise { + const vectors = inputs.map((input) => deterministicEmbedding(input)) + return { vectors, model: MOCK_MODEL_ID, dim: MOCK_DIM } + }, + async generateText() { + throw new Error('mock embedding runtime does not support generateText') + }, + async generateStructured() { + throw new Error( + 'mock embedding runtime does not support generateStructured', + ) + }, + } +} diff --git a/apps/core/test/mock/processors/ai-runtime.mock.ts b/apps/core/test/mock/processors/ai-runtime.mock.ts new file mode 100644 index 00000000000..e343c02d7a0 --- /dev/null +++ b/apps/core/test/mock/processors/ai-runtime.mock.ts @@ -0,0 +1,53 @@ +import type { GenerateTextOptions } from '~/modules/ai/runtime/types' + +export type MockChatBehavior = + | { kind: 'text'; text: string } + | { kind: 'throw'; error: Error } + | { + kind: 'fn' + fn: (options: GenerateTextOptions) => string | Promise + } + +export interface MockAiRuntimeOptions { + modelId?: string + providerId?: string + behavior?: MockChatBehavior +} + +export function createMockAiRuntime(options: MockAiRuntimeOptions = {}) { + let nextBehavior: MockChatBehavior = options.behavior ?? { + kind: 'text', + text: 'mock-response', + } + const calls: GenerateTextOptions[] = [] + + const runtime = { + providerInfo: { + id: options.providerId ?? 'mock-provider', + type: 'openai-compatible' as const, + model: options.modelId ?? 'mock-model', + }, + async generateText(opts: GenerateTextOptions) { + calls.push(opts) + const b = nextBehavior + if (b.kind === 'throw') throw b.error + const text = b.kind === 'fn' ? await b.fn(opts) : b.text + return { text } + }, + async generateStructured() { + throw new Error('mock chat runtime does not support generateStructured') + }, + } + + return { + runtime, + calls, + setBehavior(behavior: MockChatBehavior) { + nextBehavior = behavior + }, + reset() { + nextBehavior = options.behavior ?? { kind: 'text', text: 'mock-response' } + calls.length = 0 + }, + } +} diff --git a/apps/core/test/src/modules/ai/ai-echo/ai-echo.integration.spec.ts b/apps/core/test/src/modules/ai/ai-echo/ai-echo.integration.spec.ts new file mode 100644 index 00000000000..4bbdae4df6b --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-echo/ai-echo.integration.spec.ts @@ -0,0 +1,406 @@ +import { drizzle, type NodePgDatabase } from 'drizzle-orm/node-postgres' +import { Pool } from 'pg' +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + it, + vi, +} from 'vitest' + +import { startPgTestContainer } from '@/helper/pg-testcontainer' +import { createMockAiRuntime } from '@/mock/processors/ai-runtime.mock' +import { AppErrorCode } from '~/common/errors' +import { AppException } from '~/common/errors/exception.types' +import * as schema from '~/database/schema' +import { aiEchoes } from '~/database/schema' +import { AiEchoRepository } from '~/modules/ai/ai-echo/ai-echo.repository' +import { AiEchoService } from '~/modules/ai/ai-echo/ai-echo.service' +import { EchoScenarioRegistry } from '~/modules/ai/ai-echo/echo-scenario.registry' +import type { EchoScenario } from '~/modules/ai/ai-echo/scenario.types' +import { EchoGenerateTaskProcessor } from '~/modules/ai/ai-echo/tasks/echo-generate.processor' +import { SnowflakeService } from '~/shared/id/snowflake.service' + +type Drizzle = NodePgDatabase + +const taskContextStub = () => ({ + taskId: 'task', + signal: new AbortController().signal, + updateProgress: vi.fn(async () => {}), + incrementTokens: vi.fn(async () => {}), + appendLog: vi.fn(async () => {}), + setResult: vi.fn(async () => {}), + setStatus: vi.fn(), + isAborted: () => false, +}) + +const subjectStore = new Map() + +const recentlyScenario = (): EchoScenario => ({ + key: 'recently', + defaultPersonas: ['inner-self', 'passerby'], + emitOnReady: 'RECENTLY_ECHO_LANDED' as any, + async loadSubject(id: string) { + return subjectStore.get(id) ?? null + }, + extractRetrievalQuery(s: any) { + return s.content ?? null + }, + buildPrompt() { + return [ + { role: 'system' as const, content: 'sys' }, + { role: 'user' as const, content: 'user' }, + ] + }, +}) + +const hypotheticalScenario: EchoScenario = { + key: 'comment', + defaultPersonas: ['passerby'], + async loadSubject(id: string) { + return { id, content: 'hello from comment' } + }, + extractRetrievalQuery() { + return null + }, + buildPrompt() { + return [ + { role: 'system' as const, content: 'sys' }, + { role: 'user' as const, content: 'user' }, + ] + }, +} + +interface Harness { + service: AiEchoService + repo: AiEchoRepository + processor: EchoGenerateTaskProcessor + runtime: ReturnType + eventEmitterMock: { + emit: ReturnType + on: ReturnType + } + emittedEvents: Array<{ event: string; data: any }> + registry: EchoScenarioRegistry + taskCalls: any[] +} + +const buildHarness = ( + db: Drizzle, + opts: { + extraScenarios?: EchoScenario[] + quota?: number + runtimeBehavior?: Parameters[0] + } = {}, +): Harness => { + const repo = new AiEchoRepository(db as any, new SnowflakeService()) + const emittedEvents: Array<{ event: string; data: any }> = [] + const eventEmitterMock = { + emit: vi.fn(async (event: string, data: any) => { + emittedEvents.push({ event, data }) + }), + on: vi.fn(), + } + const taskCalls: any[] = [] + const taskService = { + crud: { + createTask: vi.fn(async (input: any) => { + taskCalls.push(input) + return { taskId: 'task-' + taskCalls.length, created: true } + }), + }, + } + const configsService = { + get: vi.fn(async () => ({ + enableEcho: true, + enableAutoGenerateEchoOnCreate: true, + echoDailyQuota: opts.quota ?? 200, + })), + } + let counter = 0 + const redisService = { + getClient: () => ({ + incr: vi.fn(async () => { + counter += 1 + return counter + }), + expire: vi.fn(async () => 1), + }), + } + const registry = new EchoScenarioRegistry() + registry.register(recentlyScenario()) + for (const s of opts.extraScenarios ?? []) registry.register(s) + + const service = new AiEchoService( + repo, + taskService as any, + eventEmitterMock as any, + configsService as any, + redisService as any, + registry, + ) + + const runtime = createMockAiRuntime({ + modelId: 'mock-echo-model', + behavior: opts.runtimeBehavior?.behavior ?? { + kind: 'text', + text: 'mock echo content', + }, + }) + const aiService = { + getEchoModel: vi.fn(async () => runtime.runtime), + } + const aiEmbeddingsService = { search: vi.fn(async () => []) } + const aiMemoryService = { recall: vi.fn(async () => []) } + const aiPersonaService = { + getProfileOrNull: vi.fn(async () => null), + pickExemplars: vi.fn(async () => []), + } + const processor = new EchoGenerateTaskProcessor( + { registerHandler: vi.fn() } as any, + repo, + aiService as any, + aiEmbeddingsService as any, + aiMemoryService as any, + aiPersonaService as any, + configsService as any, + eventEmitterMock as any, + registry, + ) + + return { + service, + repo, + processor, + runtime, + eventEmitterMock, + emittedEvents, + registry, + taskCalls, + } +} + +describe('AiEchoService integration (pg testcontainer)', () => { + let pool: Pool + let db: Drizzle + + beforeAll(async () => { + const container = await startPgTestContainer() + pool = new Pool({ connectionString: container.getConnectionUri(), max: 4 }) + db = drizzle(pool, { schema, casing: 'snake_case' }) + await pool.query('CREATE EXTENSION IF NOT EXISTS vector') + }) + + afterAll(async () => { + await pool?.end() + }) + + beforeEach(async () => { + await pool.query('DELETE FROM ai_echoes') + subjectStore.clear() + }) + + it('dispatch creates one row per persona, processor brings each to ready and emits RECENTLY_ECHO_LANDED', async () => { + const harness = buildHarness(db) + subjectStore.set('100001', { id: '100001', content: 'a thought' }) + + const dispatched = await harness.service.dispatch( + 'recently', + 'recently', + '100001', + ) + expect(dispatched).toHaveLength(2) + expect(harness.taskCalls).toHaveLength(2) + + for (const r of dispatched) { + await harness.processor.handle( + { echoId: r.echoId }, + taskContextStub() as any, + ) + } + + const rows = await db.select().from(aiEchoes) + expect(rows).toHaveLength(2) + for (const r of rows) { + expect(r.status).toBe('ready') + expect(r.content).toBe('mock echo content') + expect(r.model).toBe('mock-echo-model') + } + + const landed = harness.emittedEvents.filter( + (e) => e.event === 'RECENTLY_ECHO_LANDED', + ) + expect(landed).toHaveLength(2) + }) + + it('replay: second handle invocation for the same echoId is a no-op', async () => { + const harness = buildHarness(db) + subjectStore.set('100002', { id: '100002', content: 'replay subject' }) + const [first] = await harness.service.dispatch( + 'recently', + 'recently', + '100002', + ) + await harness.processor.handle( + { echoId: first.echoId }, + taskContextStub() as any, + ) + const beforeRowsSql = await db.select().from(aiEchoes) + const beforeRow = beforeRowsSql.find((r) => r.id === first.echoId)! + const beforeUpdatedAt = beforeRow.updatedAt?.getTime() + + await new Promise((r) => setTimeout(r, 10)) + await harness.processor.handle( + { echoId: first.echoId }, + taskContextStub() as any, + ) + + const afterRowsSql = await db.select().from(aiEchoes) + const afterRow = afterRowsSql.find((r) => r.id === first.echoId)! + expect(afterRow.status).toBe('ready') + expect(afterRow.updatedAt?.getTime()).toBe(beforeUpdatedAt) + }) + + it('subject delete mid-flight marks in-flight rows failed/aborted; next task wake is a no-op', async () => { + const harness = buildHarness(db) + subjectStore.set('100003', { + id: '100003', + content: 'deleted before generate', + }) + const [first] = await harness.service.dispatch( + 'recently', + 'recently', + '100003', + ) + + await harness.service.handleSubjectDeleted('recently', '100003') + + const rowsAfterDelete = await db.select().from(aiEchoes) + const target = rowsAfterDelete.find((r) => r.id === first.echoId)! + expect(target.status).toBe('failed') + expect((target.metadata as any).aborted).toBe(true) + + await harness.processor.handle( + { echoId: first.echoId }, + taskContextStub() as any, + ) + const rowsAfterReplay = await db.select().from(aiEchoes) + const targetAfter = rowsAfterReplay.find((r) => r.id === first.echoId)! + expect(targetAfter.status).toBe('failed') + }) + + it('regenerate with force archives old row and creates fresh', async () => { + const harness = buildHarness(db) + subjectStore.set('100004', { id: '100004', content: 'regen subject' }) + + const [first] = await harness.service.dispatch( + 'recently', + 'recently', + '100004', + ) + await harness.processor.handle( + { echoId: first.echoId }, + taskContextStub() as any, + ) + + const result = await harness.service.regenerate( + 'recently', + '100004', + 'inner-self', + true, + ) + expect(result.echoId).not.toBe(first.echoId) + + const rows = await db.select().from(aiEchoes) + const old = rows.find((r) => r.id === first.echoId)! + expect(old.status).toBe('archived') + const fresh = rows.find((r) => r.id === result.echoId)! + expect(fresh.status).toBe('pending') + }) + + it('edit returns status="edited" with editedBy populated', async () => { + const harness = buildHarness(db) + subjectStore.set('100005', { id: '100005', content: 'editme' }) + const [first] = await harness.service.dispatch( + 'recently', + 'recently', + '100005', + ) + await harness.processor.handle( + { echoId: first.echoId }, + taskContextStub() as any, + ) + + const editor = '7000000000000000001' + const edited = await harness.service.edit(first.echoId, 'rewritten', editor) + expect(edited.status).toBe('edited') + expect(edited.content).toBe('rewritten') + expect(edited.editedBy).toBe(editor) + expect(edited.editedAt).not.toBeNull() + }) + + it('runtime throws -> status=failed, errorCode=AI_ECHO_GENERATION_FAILED', async () => { + const harness = buildHarness(db, { + runtimeBehavior: { + behavior: { kind: 'throw', error: new Error('rate limited') }, + }, + }) + subjectStore.set('100006', { id: '100006', content: 'fail me' }) + const [first] = await harness.service.dispatch( + 'recently', + 'recently', + '100006', + ) + await expect( + harness.processor.handle( + { echoId: first.echoId }, + taskContextStub() as any, + ), + ).rejects.toThrow() + + const rows = await db.select().from(aiEchoes) + const target = rows.find((r) => r.id === first.echoId)! + expect(target.status).toBe('failed') + expect((target.metadata as any).errorCode).toBe( + AppErrorCode.AI_ECHO_GENERATION_FAILED, + ) + expect((target.metadata as any).upstreamMessage).toContain('rate limited') + }) + + it('echoDailyQuota=1 -> second enqueue terminates with AI_ECHO_DAILY_QUOTA_EXCEEDED', async () => { + const harness = buildHarness(db, { quota: 1 }) + subjectStore.set('100007', { id: '100007', content: 'quota me' }) + await harness.service.dispatch('recently', 'recently', '100007') + const rows = await db.select().from(aiEchoes) + expect(rows).toHaveLength(2) + const failed = rows.find((r) => r.status === 'failed')! + expect((failed.metadata as any).errorCode).toBe( + AppErrorCode.AI_ECHO_DAILY_QUOTA_EXCEEDED, + ) + }) + + it('hypothetical extra ECHO_SCENARIO consumed by engine with zero engine changes', async () => { + const harness = buildHarness(db, { extraScenarios: [hypotheticalScenario] }) + const result = await harness.service.dispatch( + 'comment', + 'comment', + '700001', + ) + expect(result).toHaveLength(1) + expect(result[0].status).toBe('pending') + const rows = await db.select().from(aiEchoes) + expect(rows[0].scenarioKey).toBe('comment') + expect(rows[0].personaKey).toBe('passerby') + }) + + it('regenerate (force=false) on pending row throws AI_ECHO_REGENERATE_IN_PROGRESS', async () => { + const harness = buildHarness(db) + subjectStore.set('100008', { id: '100008', content: 'pending row' }) + await harness.service.dispatch('recently', 'recently', '100008') + await expect( + harness.service.regenerate('recently', '100008', 'inner-self', false), + ).rejects.toBeInstanceOf(AppException) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-echo/ai-echo.service.spec.ts b/apps/core/test/src/modules/ai/ai-echo/ai-echo.service.spec.ts new file mode 100644 index 00000000000..2ebcbc14bcd --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-echo/ai-echo.service.spec.ts @@ -0,0 +1,247 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +import { AppErrorCode } from '~/common/errors' +import { AppException } from '~/common/errors/exception.types' +import { AiEchoService } from '~/modules/ai/ai-echo/ai-echo.service' +import { EchoScenarioRegistry } from '~/modules/ai/ai-echo/echo-scenario.registry' +import type { EchoScenario } from '~/modules/ai/ai-echo/scenario.types' + +const fakeRepoFactory = () => { + const rows: any[] = [] + let nextId = 1 + return { + rows, + findById: vi.fn( + async (id: string) => rows.find((r) => r.id === id) ?? null, + ), + findOne: vi.fn(async (criteria: any) => { + return ( + rows.find( + (r) => + r.scenarioKey === criteria.scenarioKey && + r.subjectType === criteria.subjectType && + r.subjectId === criteria.subjectId && + r.personaKey === criteria.personaKey, + ) ?? null + ) + }), + findAllBySubject: vi.fn( + async (scenarioKey: string, subjectType: string, subjectId: string) => + rows.filter( + (r) => + r.scenarioKey === scenarioKey && + r.subjectType === subjectType && + r.subjectId === subjectId, + ), + ), + findAdmin: vi.fn(async () => ({ + data: rows.slice(), + pagination: { + currentPage: 1, + totalPage: 1, + total: rows.length, + size: 20, + hasNextPage: false, + hasPrevPage: false, + }, + })), + create: vi.fn(async (input: any) => { + const row = { + id: String(nextId++), + ...input, + metadata: input.metadata ?? {}, + content: null, + model: null, + generatedAt: null, + editedAt: null, + editedBy: null, + createdAt: new Date(), + updatedAt: new Date(), + } + rows.push(row) + return row + }), + update: vi.fn(async (id: string, patch: any) => { + const row = rows.find((r) => r.id === id) + if (!row) return null + Object.assign(row, patch) + return row + }), + setStatus: vi.fn(async function (this: any, id: string, status: string) { + return this.update(id, { status }) + }), + } +} + +const fakeTaskService = () => ({ + crud: { + createTask: vi.fn(async () => ({ taskId: 'task-1', created: true })), + }, +}) + +const fakeEventManager = () => ({ + on: vi.fn(), + emit: vi.fn(), +}) + +const fakeConfigsService = (overrides: any = {}) => ({ + get: vi.fn(async () => ({ + enableEcho: true, + enableAutoGenerateEchoOnCreate: true, + echoDailyQuota: 200, + ...overrides, + })), +}) + +const fakeRedis = (initial = 0) => { + let counter = initial + return { + getClient: () => ({ + incr: vi.fn(async () => { + counter += 1 + return counter + }), + expire: vi.fn(async () => 1), + }), + } +} + +const registryWith = (scenarios: EchoScenario[]) => { + const registry = new EchoScenarioRegistry() + scenarios.forEach((s) => registry.register(s)) + return registry +} + +const scenarioStub: EchoScenario = { + key: 'recently', + defaultPersonas: ['inner-self', 'passerby'], + async loadSubject() { + return null + }, + extractRetrievalQuery() { + return null + }, + buildPrompt() { + return [] + }, +} + +describe('AiEchoService', () => { + let repo: ReturnType + let task: ReturnType + let event: ReturnType + let config: ReturnType + let redis: ReturnType + + beforeEach(() => { + repo = fakeRepoFactory() + task = fakeTaskService() + event = fakeEventManager() + config = fakeConfigsService() + redis = fakeRedis() + }) + + it('requireScenario throws AI_ECHO_SCENARIO_NOT_REGISTERED on unknown key', () => { + const service = new AiEchoService( + repo as any, + task as any, + event as any, + config as any, + redis as any, + registryWith([]), + ) + expect(() => service.requireScenario('unknown')).toThrow(AppException) + try { + service.requireScenario('unknown') + } catch (error) { + expect((error as AppException).code).toBe( + AppErrorCode.AI_ECHO_SCENARIO_NOT_REGISTERED, + ) + } + }) + + it('dispatch creates one row per persona and enqueues tasks', async () => { + const service = new AiEchoService( + repo as any, + task as any, + event as any, + config as any, + redis as any, + registryWith([scenarioStub]), + ) + const results = await service.dispatch('recently', 'recently', '1001') + expect(results).toHaveLength(2) + expect(repo.rows).toHaveLength(2) + expect(task.crud.createTask).toHaveBeenCalledTimes(2) + for (const row of repo.rows) { + expect(row.status).toBe('pending') + expect(row.metadata.taskId).toBe('task-1') + } + }) + + it('enforces echoDailyQuota — second enqueue marked failed with AI_ECHO_DAILY_QUOTA_EXCEEDED', async () => { + config = fakeConfigsService({ echoDailyQuota: 1 }) + const service = new AiEchoService( + repo as any, + task as any, + event as any, + config as any, + redis as any, + registryWith([scenarioStub]), + ) + await service.dispatch('recently', 'recently', '1001') + expect(repo.rows[0].status).toBe('pending') + expect(repo.rows[1].status).toBe('failed') + expect(repo.rows[1].metadata.errorCode).toBe( + AppErrorCode.AI_ECHO_DAILY_QUOTA_EXCEEDED, + ) + expect(task.crud.createTask).toHaveBeenCalledTimes(1) + }) + + it('regenerate without force throws when in-flight row exists', async () => { + const service = new AiEchoService( + repo as any, + task as any, + event as any, + config as any, + redis as any, + registryWith([scenarioStub]), + ) + await repo.create({ + scenarioKey: 'recently', + subjectType: 'recently', + subjectId: '2002', + personaKey: 'inner-self', + status: 'pending', + }) + await expect( + service.regenerate('recently', '2002', 'inner-self', false), + ).rejects.toBeInstanceOf(AppException) + }) + + it('regenerate with force archives old row and creates new', async () => { + const service = new AiEchoService( + repo as any, + task as any, + event as any, + config as any, + redis as any, + registryWith([scenarioStub]), + ) + const existing = await repo.create({ + scenarioKey: 'recently', + subjectType: 'recently', + subjectId: '3003', + personaKey: 'inner-self', + status: 'ready', + }) + const result = await service.regenerate( + 'recently', + '3003', + 'inner-self', + true, + ) + expect(repo.rows.find((r) => r.id === existing.id)?.status).toBe('archived') + expect(result.echoId).not.toBe(existing.id) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-echo/echo-generate.processor.spec.ts b/apps/core/test/src/modules/ai/ai-echo/echo-generate.processor.spec.ts new file mode 100644 index 00000000000..451b1478e8a --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-echo/echo-generate.processor.spec.ts @@ -0,0 +1,128 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +import { EchoScenarioRegistry } from '~/modules/ai/ai-echo/echo-scenario.registry' +import type { EchoScenario } from '~/modules/ai/ai-echo/scenario.types' +import { EchoGenerateTaskProcessor } from '~/modules/ai/ai-echo/tasks/echo-generate.processor' + +const makeContext = () => ({ + taskId: 'task-1', + signal: new AbortController().signal, + updateProgress: vi.fn(async () => {}), + incrementTokens: vi.fn(async () => {}), + appendLog: vi.fn(async () => {}), + setResult: vi.fn(async () => {}), + setStatus: vi.fn(() => {}), + isAborted: () => false, +}) + +const stubScenario: EchoScenario = { + key: 'recently', + defaultPersonas: ['inner-self'], + async loadSubject() { + return { id: 'subject', content: 'hi' } + }, + extractRetrievalQuery() { + return null + }, + buildPrompt() { + return [ + { role: 'system', content: 'sys' }, + { role: 'user', content: 'hi' }, + ] + }, +} + +const buildProcessor = (rowOverrides: Partial = {}) => { + const row = { + id: 'echo-1', + scenarioKey: 'recently', + subjectType: 'recently', + subjectId: '10001', + personaKey: 'inner-self', + status: 'pending', + metadata: {}, + ...rowOverrides, + } + const repository = { + findById: vi.fn(async () => row), + update: vi.fn(async (id: string, patch: any) => { + Object.assign(row, patch) + return row + }), + } + const registry = new EchoScenarioRegistry() + registry.register(stubScenario) + const eventManager = { emit: vi.fn() } + const taskProcessor = { registerHandler: vi.fn() } + const aiService = { + getEchoModel: vi.fn(async () => ({ + providerInfo: { id: 'mock', type: 'openai-compatible', model: 'm' }, + generateText: vi.fn(async () => ({ text: 'echo content' })), + })), + } + const aiEmbeddingsService = { search: vi.fn(async () => []) } + const aiMemoryService = { recall: vi.fn(async () => []) } + const aiPersonaService = { + getProfileOrNull: vi.fn(async () => null), + pickExemplars: vi.fn(async () => []), + } + const configsService = { + get: vi.fn(async () => ({})), + } + const processor = new EchoGenerateTaskProcessor( + taskProcessor as any, + repository as any, + aiService as any, + aiEmbeddingsService as any, + aiMemoryService as any, + aiPersonaService as any, + configsService as any, + eventManager as any, + registry, + ) + return { processor, repository, row, eventManager, aiService } +} + +describe('EchoGenerateTaskProcessor', () => { + let context: ReturnType + + beforeEach(() => { + context = makeContext() + }) + + it('step-2 guard: status NOT IN (pending|generating) -> no writes, no events', async () => { + for (const status of ['ready', 'edited', 'failed', 'archived']) { + const { processor, repository, eventManager } = buildProcessor({ status }) + await processor.handle({ echoId: 'echo-1' }, context as any) + expect(repository.update).not.toHaveBeenCalled() + expect(eventManager.emit).not.toHaveBeenCalled() + } + }) + + it('proceeds when status is pending', async () => { + const { processor, repository } = buildProcessor({ status: 'pending' }) + await processor.handle({ echoId: 'echo-1' }, context as any) + expect(repository.update).toHaveBeenCalled() + const finalCall = repository.update.mock.calls.at(-1)! + expect(finalCall[1].status).toBe('ready') + expect(finalCall[1].content).toBe('echo content') + }) + + it('returns silently when row not found', async () => { + const repository = { findById: vi.fn(async () => null), update: vi.fn() } + const registry = new EchoScenarioRegistry() + const processor = new EchoGenerateTaskProcessor( + { registerHandler: vi.fn() } as any, + repository as any, + {} as any, + {} as any, + {} as any, + {} as any, + {} as any, + {} as any, + registry, + ) + await processor.handle({ echoId: 'echo-missing' }, context as any) + expect(repository.update).not.toHaveBeenCalled() + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-echo/echo-prompt-builder.spec.ts b/apps/core/test/src/modules/ai/ai-echo/echo-prompt-builder.spec.ts new file mode 100644 index 00000000000..88dcef53ca4 --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-echo/echo-prompt-builder.spec.ts @@ -0,0 +1,127 @@ +import { describe, expect, it } from 'vitest' + +import { buildRecentlyEchoPrompt } from '~/modules/ai/ai-echo/echo-prompt-builder' +import type { EchoPromptInput } from '~/modules/ai/ai-echo/scenario.types' +import { PERSONA_REGISTRY } from '~/modules/ai/ai-persona/persona-registry' +import { AI_PERSONA_PROMPTS } from '~/modules/ai/ai-persona/prompts' +import type { RecentlyRow } from '~/modules/recently/recently.types' + +const baseSubject = { content: 'hello world' } as RecentlyRow + +const baseInput = ( + overrides: Partial> = {}, +): EchoPromptInput => ({ + subject: baseSubject, + persona: PERSONA_REGISTRY['inner-self'], + profile: null, + retrieval: [], + memories: [], + exemplars: [], + ...overrides, +}) + +const NO_MEMORY_RULE = 'Do NOT claim to remember' + +describe('buildRecentlyEchoPrompt', () => { + it('inner-self with no retrieval/memories adds the no-unverified-memory rule', () => { + const messages = buildRecentlyEchoPrompt(baseInput()) + expect(messages).toHaveLength(2) + expect(messages[0].role).toBe('system') + expect(messages[0].content).toContain(NO_MEMORY_RULE) + expect(messages[1]).toEqual({ role: 'user', content: 'hello world' }) + }) + + it('inner-self with retrieval omits the no-unverified-memory rule', () => { + const messages = buildRecentlyEchoPrompt( + baseInput({ + retrieval: [ + { + sourceType: 'note', + sourceId: 'n1', + chunkIndex: 0, + content: 'past thought', + distance: 0.1, + similarity: 0.9, + }, + ], + }), + ) + expect(messages[0].content).not.toContain(NO_MEMORY_RULE) + expect(messages[0].content).toContain('past thought') + }) + + it('inner-self with memories omits the no-unverified-memory rule', () => { + const messages = buildRecentlyEchoPrompt( + baseInput({ + memories: [ + { + id: 'm1', + content: 'remembered fact', + } as any, + ], + }), + ) + expect(messages[0].content).not.toContain(NO_MEMORY_RULE) + expect(messages[0].content).toContain('remembered fact') + }) + + it('inner-self includes profile summary when present', () => { + const messages = buildRecentlyEchoPrompt( + baseInput({ + profile: { + profileSummary: 'a quiet voice', + profile: 'long form', + } as any, + }), + ) + expect(messages[0].content).toContain('a quiet voice') + }) + + it('inner-self includes exemplars when present', () => { + const messages = buildRecentlyEchoPrompt( + baseInput({ + exemplars: [ + { + sourceType: 'note', + sourceId: '1', + content: 'sample passage', + createdAt: new Date(), + }, + ], + }), + ) + expect(messages[0].content).toContain('sample passage') + }) + + it('passerby uses the fixed prompt without profile/exemplars/memories/retrieval', () => { + const messages = buildRecentlyEchoPrompt( + baseInput({ + persona: PERSONA_REGISTRY['passerby'], + profile: { profileSummary: 'never used' } as any, + exemplars: [ + { + sourceType: 'note', + sourceId: '1', + content: 'never used exemplar', + createdAt: new Date(), + }, + ], + memories: [{ id: 'm1', content: 'never used memory' } as any], + retrieval: [ + { + sourceType: 'note', + sourceId: 'n1', + chunkIndex: 0, + content: 'never used retrieval', + distance: 0.1, + similarity: 0.9, + }, + ], + }), + ) + expect(messages).toHaveLength(2) + expect(messages[0].content).toBe(AI_PERSONA_PROMPTS.passerby) + expect(messages[0].content).not.toContain('never used') + expect(messages[1]).toEqual({ role: 'user', content: 'hello world' }) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-embeddings/ai-embeddings.integration.spec.ts b/apps/core/test/src/modules/ai/ai-embeddings/ai-embeddings.integration.spec.ts new file mode 100644 index 00000000000..9eedc347f0a --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-embeddings/ai-embeddings.integration.spec.ts @@ -0,0 +1,243 @@ +import { drizzle, type NodePgDatabase } from 'drizzle-orm/node-postgres' +import { Pool } from 'pg' +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + it, + vi, +} from 'vitest' + +import { startPgTestContainer } from '@/helper/pg-testcontainer' +import { createMockEmbeddingRuntime } from '@/mock/processors/ai-embedding.mock' +import { AppException } from '~/common/errors/exception.types' +import * as schema from '~/database/schema' +import { corpusEmbeddings } from '~/database/schema' +import { AiEmbeddingsRepository } from '~/modules/ai/ai-embeddings/ai-embeddings.repository' +import { AiEmbeddingsService } from '~/modules/ai/ai-embeddings/ai-embeddings.service' +import { runCorpusBackfill } from '~/modules/ai/ai-embeddings/tasks/corpus-backfill.driver' +import { SnowflakeService } from '~/shared/id/snowflake.service' + +type Drizzle = NodePgDatabase + +const wireService = ( + db: Drizzle, + opts: { + configured?: boolean + sources?: Record + } = {}, +) => { + const repository = new AiEmbeddingsRepository( + db as any, + new SnowflakeService(), + ) + const sources = opts.sources ?? {} + const databaseService = { + findGlobalById: vi.fn(async (id: string) => sources[id] ?? null), + } + const runtime = createMockEmbeddingRuntime() + const aiService = { + hasFeatureModel: vi.fn(async () => opts.configured ?? true), + getEmbeddingModel: vi.fn(async () => runtime), + } + const configService = { + get: vi.fn(async () => ({ + aiEmbedding: { + chunkMaxTokens: 80, + chunkOverlapTokens: 0, + backfillBatchSize: 50, + defaultMinSimilarity: 0.6, + defaultTopK: 5, + }, + })), + } + const service = new AiEmbeddingsService( + repository, + configService as any, + aiService as any, + databaseService as any, + db as any, + ) + return { service, repository, runtime, databaseService, aiService } +} + +describe('AiEmbeddingsService integration (pg testcontainer)', () => { + let pool: Pool + let db: Drizzle + + beforeAll(async () => { + const container = await startPgTestContainer() + pool = new Pool({ connectionString: container.getConnectionUri(), max: 4 }) + db = drizzle(pool, { schema, casing: 'snake_case' }) + await pool.query('CREATE EXTENSION IF NOT EXISTS vector') + }) + + afterAll(async () => { + await pool?.end() + }) + + beforeEach(async () => { + await pool.query('DELETE FROM corpus_embeddings') + }) + + it('upserts chunks for a new note', async () => { + const noteId = '7000000000000000001' + const { service } = wireService(db, { + sources: { + [noteId]: { + type: 'note', + document: { text: 'first paragraph.\n\nsecond paragraph.' }, + }, + }, + }) + + const res = await service.syncSource('note', noteId, 'upsert') + expect(res.embedded).toBeGreaterThan(0) + + const rows = await db.select().from(corpusEmbeddings) + expect(rows.length).toBeGreaterThan(0) + expect(rows[0].sourceType).toBe('note') + expect(rows[0].embeddingModel).toBe('mock-embedding-model') + expect(rows[0].dim).toBe(8) + }) + + it('is a no-op when content unchanged', async () => { + const noteId = '7000000000000000002' + const sources = { + [noteId]: { + type: 'note', + document: { text: 'alpha\n\nbeta\n\ngamma' }, + }, + } + const { service } = wireService(db, { sources }) + + await service.syncSource('note', noteId, 'upsert') + const firstRows = await db.select().from(corpusEmbeddings) + + const second = await service.syncSource('note', noteId, 'upsert') + expect(second.embedded).toBe(0) + const secondRows = await db.select().from(corpusEmbeddings) + expect(secondRows.map((r) => r.id).sort()).toEqual( + firstRows.map((r) => r.id).sort(), + ) + }) + + it('re-embeds only chunks whose content hash changed', async () => { + const noteId = '7000000000000000003' + const initial = { + [noteId]: { + type: 'note', + document: { + text: 'paragraph A original\n\nparagraph B original\n\nparagraph C original', + }, + }, + } + const wired = wireService(db, { sources: initial }) + await wired.service.syncSource('note', noteId, 'upsert') + + const before = await db.select().from(corpusEmbeddings) + expect(before.length).toBeGreaterThanOrEqual(1) + + const updated = { + [noteId]: { + type: 'note', + document: { + text: 'paragraph A original\n\nparagraph B CHANGED\n\nparagraph C original', + }, + }, + } + const wired2 = wireService(db, { sources: updated }) + const res2 = await wired2.service.syncSource('note', noteId, 'upsert') + expect(res2.embedded).toBeGreaterThan(0) + expect(res2.embedded! < before.length + 1).toBe(true) + }) + + it('removes all rows on delete', async () => { + const noteId = '7000000000000000004' + const { service } = wireService(db, { + sources: { + [noteId]: { type: 'note', document: { text: 'hello world' } }, + }, + }) + await service.syncSource('note', noteId, 'upsert') + + const res = await service.syncSource('note', noteId, 'delete') + expect(res.deleted).toBeGreaterThan(0) + const rows = await db.select().from(corpusEmbeddings) + expect(rows.length).toBe(0) + }) + + it('search returns rows ordered and threshold-filtered', async () => { + const ids = [ + '7000000000000000010', + '7000000000000000011', + '7000000000000000012', + ] + const texts = ['cats love fish', 'dogs love bones', 'parrots mimic words'] + const sources: Record = {} + ids.forEach((id, i) => { + sources[id] = { type: 'note', document: { text: texts[i] } } + }) + const { service } = wireService(db, { sources }) + for (const id of ids) { + await service.syncSource('note', id, 'upsert') + } + + const results = await service.search('cats love fish', { + topK: 3, + minSimilarity: 0.5, + }) + expect(results.length).toBeGreaterThan(0) + for (let i = 1; i < results.length; i++) { + expect(results[i - 1].distance).toBeLessThanOrEqual(results[i].distance) + } + for (const r of results) { + expect(r.similarity).toBeGreaterThanOrEqual(0.5) + expect(r.similarity).toBeCloseTo(1 - r.distance, 5) + } + expect(results[0].content).toContain('cats') + }) + + it('graceful no-op when embedding model unconfigured', async () => { + const noteId = '7000000000000000020' + const { service } = wireService(db, { + configured: false, + sources: { + [noteId]: { type: 'note', document: { text: 'hello' } }, + }, + }) + const res = await service.syncSource('note', noteId, 'upsert') + expect(res).toEqual({}) + const rows = await db.select().from(corpusEmbeddings) + expect(rows).toHaveLength(0) + + await expect(service.search('hello')).rejects.toBeInstanceOf(AppException) + }) + + it('backfill driver runs idempotently with no duplicates', async () => { + const noteIds = ['7000000000000000030', '7000000000000000031'] + const sources: Record = {} + for (const id of noteIds) { + sources[id] = { + type: 'note', + document: { text: `unique text for ${id}` }, + } + } + const { service } = wireService(db, { sources }) + + for (const id of noteIds) { + await service.syncSource('note', id, 'upsert') + } + const before = (await db.select().from(corpusEmbeddings)).length + + const summary = await runCorpusBackfill(service, db as any, { + sourceTypes: [], + }) + expect(summary.configured).toBe(true) + + const after = (await db.select().from(corpusEmbeddings)).length + expect(after).toBe(before) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-embeddings/ai-embeddings.service.spec.ts b/apps/core/test/src/modules/ai/ai-embeddings/ai-embeddings.service.spec.ts new file mode 100644 index 00000000000..6cd0f50bb49 --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-embeddings/ai-embeddings.service.spec.ts @@ -0,0 +1,145 @@ +import { describe, expect, it, vi } from 'vitest' + +import { createMockEmbeddingRuntime } from '@/mock/processors/ai-embedding.mock' +import { AppException } from '~/common/errors/exception.types' +import { AiEmbeddingsService } from '~/modules/ai/ai-embeddings/ai-embeddings.service' + +const createService = (overrides: { configured?: boolean } = {}) => { + const repository = { + findBySource: vi.fn().mockResolvedValue([]), + deleteBySource: vi.fn().mockResolvedValue(0), + deleteByIndices: vi.fn().mockResolvedValue(0), + upsertChunks: vi.fn().mockResolvedValue(0), + searchByVector: vi.fn(), + stats: vi.fn(), + } + const configService = { + get: vi.fn().mockResolvedValue({ aiEmbedding: {} }), + } + const runtime = createMockEmbeddingRuntime() + const aiService = { + hasFeatureModel: vi.fn().mockResolvedValue(overrides.configured ?? true), + getEmbeddingModel: vi.fn().mockResolvedValue(runtime), + } + const databaseService = { + findGlobalById: vi.fn(), + } + const db = {} as never + const service = new AiEmbeddingsService( + repository as any, + configService as any, + aiService as any, + databaseService as any, + db, + ) + return { + repository, + configService, + aiService, + databaseService, + runtime, + service, + } +} + +describe('AiEmbeddingsService.search', () => { + it('annotates similarity as 1 - distance and filters by threshold', async () => { + const { repository, service } = createService() + repository.searchByVector.mockResolvedValue([ + { + sourceType: 'note', + sourceId: 'note-1', + chunkIndex: 0, + content: 'a', + distance: 0.1, + similarity: 0.9, + }, + { + sourceType: 'note', + sourceId: 'note-2', + chunkIndex: 0, + content: 'b', + distance: 0.5, + similarity: 0.5, + }, + ]) + + const out = await service.search('hello', { minSimilarity: 0.7 }) + expect(out).toHaveLength(1) + expect(out[0].sourceId).toBe('note-1') + expect(out[0].similarity).toBeCloseTo(0.9) + }) + + it('throws AI_EMBEDDING_MODEL_NOT_CONFIGURED when embedding model missing', async () => { + const { service } = createService({ configured: false }) + await expect(service.search('hello')).rejects.toBeInstanceOf(AppException) + }) + + it('returns empty array on blank query', async () => { + const { service } = createService() + expect(await service.search(' ')).toEqual([]) + }) +}) + +describe('AiEmbeddingsService.syncSource', () => { + it('deletes when op === delete regardless of model configuration', async () => { + const { repository, service } = createService({ configured: false }) + repository.deleteBySource.mockResolvedValue(3) + const res = await service.syncSource('note', 'note-1', 'delete') + expect(res.deleted).toBe(3) + expect(repository.deleteBySource).toHaveBeenCalledWith('note', 'note-1') + }) + + it('no-ops gracefully when embedding model unconfigured for upsert', async () => { + const { repository, service, databaseService } = createService({ + configured: false, + }) + const res = await service.syncSource('note', 'note-1', 'upsert') + expect(res).toEqual({}) + expect(databaseService.findGlobalById).not.toHaveBeenCalled() + expect(repository.upsertChunks).not.toHaveBeenCalled() + }) + + it('skips when source not found', async () => { + const { service, databaseService } = createService() + databaseService.findGlobalById.mockResolvedValue(null) + const res = await service.syncSource('note', 'note-1', 'upsert') + expect(res).toEqual({}) + }) + + it('skips re-embedding when content hash unchanged', async () => { + const { service, databaseService, repository, runtime } = createService() + databaseService.findGlobalById.mockResolvedValue({ + type: 'note', + document: { text: 'paragraph alpha.\n\nparagraph beta.' }, + }) + + const embedSpy = vi.spyOn(runtime, 'embedBatch') + + await service.syncSource('note', 'note-1', 'upsert') + expect(embedSpy).toHaveBeenCalled() + const upsertedInputs = repository.upsertChunks.mock.calls[0]?.[0] ?? [] + expect(upsertedInputs.length).toBeGreaterThan(0) + + repository.findBySource.mockResolvedValue( + upsertedInputs.map((u: any) => ({ + id: '1', + sourceType: u.sourceType, + sourceId: u.sourceId, + chunkIndex: u.chunkIndex, + content: u.content, + contentHash: u.contentHash, + embedding: u.embedding, + embeddingModel: u.embeddingModel, + dim: u.dim, + createdAt: new Date(), + })), + ) + embedSpy.mockClear() + repository.upsertChunks.mockClear() + + const second = await service.syncSource('note', 'note-1', 'upsert') + expect(embedSpy).not.toHaveBeenCalled() + expect(second.embedded).toBe(0) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-embeddings/chunker.spec.ts b/apps/core/test/src/modules/ai/ai-embeddings/chunker.spec.ts new file mode 100644 index 00000000000..6f498de6020 --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-embeddings/chunker.spec.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from 'vitest' + +import { chunk } from '~/modules/ai/ai-embeddings/chunker' + +describe('chunker', () => { + it('returns no chunks for empty input', () => { + expect(chunk('', { maxTokens: 100, overlapTokens: 0 })).toEqual([]) + expect(chunk(' \n\n ', { maxTokens: 100, overlapTokens: 0 })).toEqual([]) + }) + + it('is deterministic — same input yields identical chunks (content + hash)', () => { + const text = 'first paragraph.\n\nsecond paragraph.\n\nthird paragraph.' + const a = chunk(text, { maxTokens: 100, overlapTokens: 0 }) + const b = chunk(text, { maxTokens: 100, overlapTokens: 0 }) + expect(a).toEqual(b) + expect(a[0].hash).toMatch(/^[\da-f]{64}$/) + }) + + it('strips fenced code blocks before chunking', () => { + const text = 'prose A\n\n```ts\nconst secret = "ignored"\n```\n\nprose B' + const chunks = chunk(text, { maxTokens: 100, overlapTokens: 0 }) + for (const c of chunks) { + expect(c.content).not.toContain('secret') + } + }) + + it('greedy-packs small paragraphs into a single chunk under budget', () => { + const text = 'one.\n\ntwo.\n\nthree.\n\nfour.' + const chunks = chunk(text, { maxTokens: 200, overlapTokens: 0 }) + expect(chunks.length).toBe(1) + expect(chunks[0].content).toContain('one') + expect(chunks[0].content).toContain('four') + }) + + it('splits oversized paragraphs into multiple chunks', () => { + const big = 'a'.repeat(2000) + const chunks = chunk(big, { maxTokens: 50, overlapTokens: 0 }) + expect(chunks.length).toBeGreaterThan(1) + expect(chunks.every((c) => c.content.length > 0)).toBe(true) + }) + + it('falls back to sentence then character window for oversized paragraphs', () => { + const sentence = `sentence one. ${'x'.repeat(1500)}. sentence three.` + const chunks = chunk(sentence, { maxTokens: 40, overlapTokens: 0 }) + expect(chunks.length).toBeGreaterThan(1) + }) + + it('applies overlap between consecutive chunks', () => { + const text = `${'first paragraph stuff '.repeat(80)}\n\n${'second paragraph different '.repeat(80)}` + const chunks = chunk(text, { maxTokens: 100, overlapTokens: 20 }) + expect(chunks.length).toBeGreaterThan(1) + const tail = chunks[0].content.slice(-30) + expect(chunks[1].content.startsWith(tail.slice(-10))).toBe(false) + expect(chunks[1].content.length).toBeGreaterThan(0) + }) + + it('handles CJK content', () => { + const text = `中文段落一。${'中'.repeat(500)}\n\n中文段落二。` + const chunks = chunk(text, { maxTokens: 80, overlapTokens: 10 }) + expect(chunks.length).toBeGreaterThan(1) + expect(chunks.every((c) => c.hash.length === 64)).toBe(true) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-memory/ai-memory.controller.e2e-spec.ts b/apps/core/test/src/modules/ai/ai-memory/ai-memory.controller.e2e-spec.ts new file mode 100644 index 00000000000..043b3574601 --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-memory/ai-memory.controller.e2e-spec.ts @@ -0,0 +1,176 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +import { createE2EApp } from '@/helper/create-e2e-app' +import { authPassHeader } from '@/mock/guard/auth.guard' +import { apiRoutePrefix } from '~/common/decorators/api-controller.decorator' +import { AiMemoryController } from '~/modules/ai/ai-memory/ai-memory.controller' +import { AiMemoryService } from '~/modules/ai/ai-memory/ai-memory.service' + +const baseRow = (overrides: Record = {}) => ({ + id: '7000000000000000010', + scope: 'global', + type: 'fact', + content: 'be brief', + confidence: 1, + salience: 1, + source: { kind: 'manual', authorId: '1' }, + embedding: null, + embeddingModel: null, + dim: null, + firstSeenAt: new Date('2026-01-01T00:00:00.000Z'), + lastSeenAt: new Date('2026-01-01T00:00:00.000Z'), + expiresAt: null, + supersedesId: null, + status: 'active', + metadata: {}, + createdAt: new Date('2026-01-01T00:00:00.000Z'), + updatedAt: null, + ...overrides, +}) + +describe('AiMemoryController e2e', () => { + const mockService = { + list: vi.fn(), + findById: vi.fn(), + create: vi.fn(), + update: vi.fn(), + archive: vi.fn(), + getKpi: vi.fn(), + } + + const proxy = createE2EApp({ + controllers: [AiMemoryController], + providers: [{ provide: AiMemoryService, useValue: mockService }], + }) + + beforeEach(() => { + vi.clearAllMocks() + }) + + it('GET /ai-memory returns paginated envelope with snake_case data', async () => { + mockService.list.mockResolvedValue({ + data: [baseRow()], + pagination: { + currentPage: 1, + totalPage: 1, + total: 1, + size: 20, + hasNextPage: false, + hasPrevPage: false, + }, + }) + + const res = await proxy.app.inject({ + method: 'GET', + url: `${apiRoutePrefix}/ai-memory?scope=global&page=1&size=20`, + headers: authPassHeader, + }) + + expect(res.statusCode).toBe(200) + const body = res.json() + expect(Array.isArray(body.data)).toBe(true) + expect(body.data[0]).toMatchObject({ + id: '7000000000000000010', + has_embedding: false, + scope: 'global', + type: 'fact', + }) + expect(body.meta?.pagination).toMatchObject({ page: 1, total: 1 }) + }) + + it('GET /ai-memory/:id returns detail view', async () => { + mockService.findById.mockResolvedValue(baseRow()) + + const res = await proxy.app.inject({ + method: 'GET', + url: `${apiRoutePrefix}/ai-memory/7000000000000000010`, + headers: authPassHeader, + }) + + expect(res.statusCode).toBe(200) + expect(res.json().data).toMatchObject({ + id: '7000000000000000010', + has_embedding: false, + }) + }) + + it('POST /ai-memory accepts CreateMemoryDto and forwards actor id', async () => { + mockService.create.mockResolvedValue(baseRow({ id: '7000000000000000020' })) + + const res = await proxy.app.inject({ + method: 'POST', + url: `${apiRoutePrefix}/ai-memory`, + headers: authPassHeader, + payload: { + scope: 'persona:inner-self', + type: 'preference', + content: 'sleep early', + }, + }) + + expect(res.statusCode).toBe(201) + expect(mockService.create).toHaveBeenCalledWith( + expect.objectContaining({ + scope: 'persona:inner-self', + content: 'sleep early', + }), + '1', + ) + }) + + it('POST /ai-memory rejects malformed scope', async () => { + const res = await proxy.app.inject({ + method: 'POST', + url: `${apiRoutePrefix}/ai-memory`, + headers: authPassHeader, + payload: { scope: 'invalid:Scope', type: 'fact', content: 'x' }, + }) + + expect([400, 422]).toContain(res.statusCode) + expect(mockService.create).not.toHaveBeenCalled() + }) + + it('PUT /ai-memory/:id calls service.update', async () => { + mockService.update.mockResolvedValue(baseRow({ content: 'updated' })) + + const res = await proxy.app.inject({ + method: 'PUT', + url: `${apiRoutePrefix}/ai-memory/7000000000000000010`, + headers: authPassHeader, + payload: { content: 'updated' }, + }) + + expect(res.statusCode).toBe(200) + expect(mockService.update).toHaveBeenCalledWith( + '7000000000000000010', + expect.objectContaining({ content: 'updated' }), + '1', + ) + }) + + it('DELETE /ai-memory/:id returns 204 and calls archive', async () => { + mockService.archive.mockResolvedValue(undefined) + + const res = await proxy.app.inject({ + method: 'DELETE', + url: `${apiRoutePrefix}/ai-memory/7000000000000000010`, + headers: authPassHeader, + }) + + expect(res.statusCode).toBe(204) + expect(mockService.archive).toHaveBeenCalledWith('7000000000000000010') + }) + + it('GET /ai-memory/kpi returns counts', async () => { + mockService.getKpi.mockResolvedValue({ total: 5, active: 3, archived: 2 }) + + const res = await proxy.app.inject({ + method: 'GET', + url: `${apiRoutePrefix}/ai-memory/kpi`, + headers: authPassHeader, + }) + + expect(res.statusCode).toBe(200) + expect(res.json().data).toEqual({ total: 5, active: 3, archived: 2 }) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-memory/ai-memory.integration.spec.ts b/apps/core/test/src/modules/ai/ai-memory/ai-memory.integration.spec.ts new file mode 100644 index 00000000000..6ac18a9bede --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-memory/ai-memory.integration.spec.ts @@ -0,0 +1,275 @@ +import { drizzle, type NodePgDatabase } from 'drizzle-orm/node-postgres' +import { Pool } from 'pg' +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + it, + vi, +} from 'vitest' + +import { startPgTestContainer } from '@/helper/pg-testcontainer' +import { createMockEmbeddingRuntime } from '@/mock/processors/ai-embedding.mock' +import * as schema from '~/database/schema' +import { aiMemories } from '~/database/schema' +import { AiMemoryRepository } from '~/modules/ai/ai-memory/ai-memory.repository' +import { AiMemoryService } from '~/modules/ai/ai-memory/ai-memory.service' +import { SnowflakeService } from '~/shared/id/snowflake.service' + +type Drizzle = NodePgDatabase + +const createEnqueueSpy = () => { + const enqueued: string[] = [] + return { + enqueued, + crud: { + createTask: vi.fn(async (input: { payload: { memoryId: string } }) => { + enqueued.push(input.payload.memoryId) + return { taskId: 'task-' + input.payload.memoryId, created: true } + }), + }, + } +} + +const wireService = ( + db: Drizzle, + opts: { + embeddingRuntime?: ReturnType | null + } = {}, +) => { + const repository = new AiMemoryRepository(db as any, new SnowflakeService()) + const taskService = createEnqueueSpy() + const aiService = { + async getEmbeddingModel() { + if (opts.embeddingRuntime === null) { + throw new Error('no embedding model configured') + } + return opts.embeddingRuntime ?? createMockEmbeddingRuntime() + }, + } + const configService = { + async get() { + return { + aiMemory: { recallTopK: 5, recallMinSimilarity: 0.7 }, + } + }, + } + const service = new AiMemoryService( + repository, + taskService as any, + aiService as any, + configService as any, + ) + return { service, repository, taskService } +} + +describe('AiMemoryService integration (pg testcontainer)', () => { + let pool: Pool + let db: Drizzle + + beforeAll(async () => { + const container = await startPgTestContainer() + pool = new Pool({ connectionString: container.getConnectionUri(), max: 4 }) + db = drizzle(pool, { schema, casing: 'snake_case' }) + await pool.query('CREATE EXTENSION IF NOT EXISTS vector') + }) + + afterAll(async () => { + await pool?.end() + }) + + beforeEach(async () => { + await pool.query('DELETE FROM ai_memories') + }) + + it('creates memory with embedding=null and enqueues MEMORY_EMBED', async () => { + const { service, taskService } = wireService(db) + const created = await service.create( + { + scope: 'global', + type: 'fact', + content: 'Brevity matters', + confidence: 1, + salience: 1, + }, + 'actor-1', + ) + + expect(created.embedding).toBeNull() + expect(created.source).toEqual({ kind: 'manual', authorId: 'actor-1' }) + expect(taskService.enqueued).toEqual([created.id]) + }) + + it('embed task populates embedding column', async () => { + const { service } = wireService(db) + const created = await service.create( + { + scope: 'global', + type: 'fact', + content: 'I value brevity', + confidence: 1, + salience: 1, + }, + 'actor-2', + ) + + await service.handleEmbedTask(created.id) + + const rows = await db.select().from(aiMemories) + const row = rows.find((r) => r.id === created.id)! + expect(row.embedding).not.toBeNull() + expect(row.embeddingModel).toBe('mock-embedding-model') + expect(row.dim).toBe(8) + }) + + it('re-enqueues embed on content change but not on metadata-only update', async () => { + const { service, taskService } = wireService(db) + const row = await service.create( + { + scope: 'global', + type: 'fact', + content: 'original', + confidence: 1, + salience: 1, + }, + 'actor', + ) + taskService.enqueued.length = 0 + + await service.update(row.id, { salience: 2 }, 'actor') + expect(taskService.enqueued).toEqual([]) + + await service.update(row.id, { content: 'changed' }, 'actor') + expect(taskService.enqueued).toEqual([row.id]) + }) + + it('archive sets status and excludes from recall', async () => { + const { service } = wireService(db) + const row = await service.create( + { + scope: 'global', + type: 'fact', + content: 'archived later', + confidence: 1, + salience: 1, + }, + 'actor', + ) + + await service.archive(row.id) + + const reloaded = (await db.select().from(aiMemories)).find( + (r) => r.id === row.id, + )! + expect(reloaded.status).toBe('archived') + + const recalled = await service.recall({ scope: 'global' }) + expect(recalled.find((m) => m.id === row.id)).toBeUndefined() + }) + + it('recall (no query) skips expired memories', async () => { + const { service } = wireService(db) + const active = await service.create( + { + scope: 'global', + type: 'fact', + content: 'active mem', + confidence: 1, + salience: 1, + }, + 'actor', + ) + const expired = await service.create( + { + scope: 'global', + type: 'fact', + content: 'expired mem', + confidence: 1, + salience: 5, + expiresAt: '2020-01-01T00:00:00.000Z', + }, + 'actor', + ) + + const recalled = await service.recall({ scope: 'global' }) + + const ids = recalled.map((r) => r.id) + expect(ids).toContain(active.id) + expect(ids).not.toContain(expired.id) + }) + + it('recall returns [] when query set but embedding model unconfigured', async () => { + const { service } = wireService(db, { embeddingRuntime: null }) + await service.create( + { + scope: 'global', + type: 'fact', + content: 'hello world', + confidence: 1, + salience: 1, + }, + 'actor', + ) + + const result = await service.recall({ scope: 'global', query: 'world' }) + expect(result).toEqual([]) + }) + + it('recall query path retrieves vector matches ordered by score', async () => { + const { service } = wireService(db) + const created: string[] = [] + for (const content of [ + 'cats love fish', + 'dogs love bones', + 'parrots mimic words', + ]) { + const row = await service.create( + { scope: 'global', type: 'fact', content, confidence: 1, salience: 1 }, + 'actor', + ) + created.push(row.id) + await service.handleEmbedTask(row.id) + } + + const recalled = await service.recall({ + scope: 'global', + query: 'cats love fish', + topK: 3, + minSimilarity: 0.5, + }) + expect(recalled.length).toBeGreaterThan(0) + expect(recalled[0].content).toBe('cats love fish') + }) + + it('getKpi reports total / active / archived counts', async () => { + const { service } = wireService(db) + const a = await service.create( + { + scope: 'global', + type: 'fact', + content: 'a', + confidence: 1, + salience: 1, + }, + 'actor', + ) + await service.create( + { + scope: 'global', + type: 'fact', + content: 'b', + confidence: 1, + salience: 1, + }, + 'actor', + ) + await service.archive(a.id) + + const kpi = await service.getKpi() + expect(kpi.total).toBe(2) + expect(kpi.active).toBe(1) + expect(kpi.archived).toBe(1) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-memory/ai-memory.schema.spec.ts b/apps/core/test/src/modules/ai/ai-memory/ai-memory.schema.spec.ts new file mode 100644 index 00000000000..a9eabdaf842 --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-memory/ai-memory.schema.spec.ts @@ -0,0 +1,82 @@ +import { describe, expect, it } from 'vitest' + +import { CreateMemorySchema } from '~/modules/ai/ai-memory/ai-memory.schema' + +describe('CreateMemorySchema scope regex', () => { + it.each([ + 'global', + 'persona:inner-self', + 'persona:passerby', + 'scenario:recently', + 'scenario:comment-reply', + ])('accepts %s', (scope) => { + const result = CreateMemorySchema.safeParse({ + scope, + type: 'fact', + content: 'hello', + }) + expect(result.success).toBe(true) + }) + + it.each([ + 'Global', + 'persona:', + 'persona:Inner-Self', + 'persona:inner_self', + 'scenario:Recently', + 'random', + 'global:extra', + 'persona', + 'scenario:', + ])('rejects %s', (scope) => { + const result = CreateMemorySchema.safeParse({ + scope, + type: 'fact', + content: 'hello', + }) + expect(result.success).toBe(false) + }) + + it('rejects unknown memory type', () => { + const result = CreateMemorySchema.safeParse({ + scope: 'global', + type: 'unknown', + content: 'x', + }) + expect(result.success).toBe(false) + }) + + it('enforces content length 1..2000', () => { + expect( + CreateMemorySchema.safeParse({ + scope: 'global', + type: 'fact', + content: '', + }).success, + ).toBe(false) + expect( + CreateMemorySchema.safeParse({ + scope: 'global', + type: 'fact', + content: 'a'.repeat(2001), + }).success, + ).toBe(false) + expect( + CreateMemorySchema.safeParse({ + scope: 'global', + type: 'fact', + content: 'a'.repeat(2000), + }).success, + ).toBe(true) + }) + + it('applies default confidence / salience', () => { + const parsed = CreateMemorySchema.parse({ + scope: 'global', + type: 'fact', + content: 'x', + }) + expect(parsed.confidence).toBe(1) + expect(parsed.salience).toBe(1) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-memory/ai-memory.service.spec.ts b/apps/core/test/src/modules/ai/ai-memory/ai-memory.service.spec.ts new file mode 100644 index 00000000000..164221cb5de --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-memory/ai-memory.service.spec.ts @@ -0,0 +1,258 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +import { createPgRepositoryMock } from '@/helper/pg-repository-mock' +import { createMockEmbeddingRuntime } from '@/mock/processors/ai-embedding.mock' +import type { AiMemoryRepository } from '~/modules/ai/ai-memory/ai-memory.repository' +import { AiMemoryService } from '~/modules/ai/ai-memory/ai-memory.service' +import type { + AiMemory, + RecallScoredMemory, +} from '~/modules/ai/ai-memory/ai-memory.types' + +const baseMemory = (overrides: Partial = {}): AiMemory => ({ + id: '7000000000000000010' as any, + scope: 'global', + type: 'fact', + content: 'I prefer brevity', + confidence: 1, + salience: 1, + source: { kind: 'manual' }, + embedding: null, + embeddingModel: null, + dim: null, + firstSeenAt: new Date('2026-01-01T00:00:00.000Z'), + lastSeenAt: new Date('2026-01-01T00:00:00.000Z'), + expiresAt: null, + supersedesId: null, + status: 'active', + metadata: {}, + createdAt: new Date('2026-01-01T00:00:00.000Z'), + updatedAt: null, + ...overrides, +}) + +const createService = () => { + const repository = createPgRepositoryMock() + const aiTaskService = { + crud: { + createTask: vi.fn().mockResolvedValue({ taskId: 't1', created: true }), + }, + } + const aiService = { + getEmbeddingModel: vi.fn(), + } + const configService = { + get: vi.fn().mockResolvedValue({ + aiMemory: { recallTopK: 5, recallMinSimilarity: 0.7 }, + }), + } + const service = new AiMemoryService( + repository as any, + aiTaskService as any, + aiService as any, + configService as any, + ) + return { repository, aiTaskService, aiService, configService, service } +} + +describe('AiMemoryService.recall', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('returns salience-ordered active memories when no query supplied', async () => { + const { repository, service } = createService() + const rows = [ + baseMemory({ id: '1' as any, salience: 3 }), + baseMemory({ id: '2' as any, salience: 5 }), + ] + repository.listActiveByScope.mockResolvedValue(rows) + + const result = await service.recall({ scope: 'global' }) + + expect(repository.listActiveByScope).toHaveBeenCalledWith(['global'], 5) + expect(result.map((r) => r.id)).toEqual(['1', '2']) + }) + + it('returns [] when query is set but embedding model unavailable', async () => { + const { aiService, repository, service } = createService() + aiService.getEmbeddingModel.mockRejectedValue(new Error('not configured')) + + const result = await service.recall({ scope: 'global', query: 'foo' }) + + expect(result).toEqual([]) + expect(repository.vectorSearch).not.toHaveBeenCalled() + }) + + it('applies similarity threshold and re-ranks by similarity * salience * confidence', async () => { + const { aiService, repository, service } = createService() + aiService.getEmbeddingModel.mockResolvedValue(createMockEmbeddingRuntime()) + + const candidates: RecallScoredMemory[] = [ + { + ...baseMemory({ id: 'a' as any, salience: 1, confidence: 1 }), + similarity: 0.95, + }, + { + ...baseMemory({ id: 'b' as any, salience: 5, confidence: 1 }), + similarity: 0.8, + }, + { + ...baseMemory({ id: 'c' as any, salience: 1, confidence: 1 }), + similarity: 0.4, + }, + ] + repository.vectorSearch.mockResolvedValue(candidates) + + const result = await service.recall({ + scope: ['global', 'persona:inner-self'], + query: 'brevity', + topK: 2, + minSimilarity: 0.7, + }) + + expect(result).toHaveLength(2) + expect(result[0].id).toBe('b') + expect(result[1].id).toBe('a') + expect(repository.vectorSearch).toHaveBeenCalledWith( + ['global', 'persona:inner-self'], + expect.any(Array), + expect.any(String), + 4, + ) + }) + + it('filters out below-threshold candidates', async () => { + const { aiService, repository, service } = createService() + aiService.getEmbeddingModel.mockResolvedValue(createMockEmbeddingRuntime()) + repository.vectorSearch.mockResolvedValue([ + { ...baseMemory({ id: 'x' as any }), similarity: 0.5 }, + ]) + + const result = await service.recall({ + scope: 'global', + query: 'q', + minSimilarity: 0.7, + }) + + expect(result).toEqual([]) + }) +}) + +describe('AiMemoryService.create / update', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('persists then enqueues MEMORY_EMBED on create', async () => { + const { repository, aiTaskService, service } = createService() + const row = baseMemory({ id: '9001' as any }) + repository.create.mockResolvedValue(row) + + const created = await service.create( + { + scope: 'global', + type: 'fact', + content: 'x', + confidence: 1, + salience: 1, + }, + 'actor-1', + ) + + expect(created.id).toBe('9001') + expect(repository.create).toHaveBeenCalledWith( + expect.objectContaining({ + source: { kind: 'manual', authorId: 'actor-1' }, + }), + ) + expect(aiTaskService.crud.createTask).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'ai:memory:embed', + payload: { memoryId: '9001' }, + }), + ) + }) + + it('does not enqueue embed when update omits content', async () => { + const { repository, aiTaskService, service } = createService() + const row = baseMemory({ id: '9002' as any, content: 'unchanged' }) + repository.findById.mockResolvedValue(row) + repository.update.mockResolvedValue(row) + + await service.update('9002', { salience: 5 }, 'actor') + + expect(aiTaskService.crud.createTask).not.toHaveBeenCalled() + }) + + it('enqueues embed when update changes content', async () => { + const { repository, aiTaskService, service } = createService() + const existing = baseMemory({ id: '9003' as any, content: 'old' }) + const updated = baseMemory({ id: '9003' as any, content: 'new' }) + repository.findById.mockResolvedValue(existing) + repository.update.mockResolvedValue(updated) + + await service.update('9003', { content: 'new' }, 'actor') + + expect(aiTaskService.crud.createTask).toHaveBeenCalledWith( + expect.objectContaining({ payload: { memoryId: '9003' } }), + ) + }) +}) + +describe('AiMemoryService.handleEmbedTask', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('skips when memory is archived', async () => { + const { repository, aiService, service } = createService() + repository.findById.mockResolvedValue( + baseMemory({ id: '9004' as any, status: 'archived' }), + ) + aiService.getEmbeddingModel.mockResolvedValue(createMockEmbeddingRuntime()) + + await service.handleEmbedTask('9004') + + expect(repository.updateEmbedding).not.toHaveBeenCalled() + }) + + it('no-ops when embedding model unavailable', async () => { + const { repository, aiService, service } = createService() + repository.findById.mockResolvedValue(baseMemory({ id: '9005' as any })) + aiService.getEmbeddingModel.mockRejectedValue(new Error('unset')) + + await service.handleEmbedTask('9005') + + expect(repository.updateEmbedding).not.toHaveBeenCalled() + }) + + it('writes vector when runtime returns embedding', async () => { + const { repository, aiService, service } = createService() + repository.findById.mockResolvedValue(baseMemory({ id: '9006' as any })) + aiService.getEmbeddingModel.mockResolvedValue(createMockEmbeddingRuntime()) + + await service.handleEmbedTask('9006') + + expect(repository.updateEmbedding).toHaveBeenCalledWith( + '9006', + expect.any(Array), + 'mock-embedding-model', + ) + }) +}) + +describe('AiMemoryService.getKpi', () => { + it('aggregates total / active / archived counts', async () => { + const { repository, service } = createService() + repository.countByStatus.mockResolvedValue({ + active: 3, + archived: 2, + superseded: 1, + pending_review: 0, + }) + + const kpi = await service.getKpi() + expect(kpi).toEqual({ total: 6, active: 3, archived: 2 }) + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-persona/ai-persona.integration.spec.ts b/apps/core/test/src/modules/ai/ai-persona/ai-persona.integration.spec.ts new file mode 100644 index 00000000000..cf2306479f0 --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-persona/ai-persona.integration.spec.ts @@ -0,0 +1,293 @@ +import { drizzle, type NodePgDatabase } from 'drizzle-orm/node-postgres' +import { Pool } from 'pg' +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + it, + vi, +} from 'vitest' + +import { startPgTestContainer } from '@/helper/pg-testcontainer' +import { redisHelper } from '@/helper/redis-mock.helper' +import { createMockAiRuntime } from '@/mock/processors/ai-runtime.mock' +import { AppErrorCode } from '~/common/errors' +import { AppException } from '~/common/errors/exception.types' +import { BusinessEvents } from '~/constants/business-event.constant' +import * as schema from '~/database/schema' +import { notes, pages, personaProfiles } from '~/database/schema' +import { PERSONA_DISTILL_LOCK_KEY_PREFIX } from '~/modules/ai/ai-persona/ai-persona.constants' +import { PersonaProfileRepository } from '~/modules/ai/ai-persona/ai-persona.repository' +import { AiPersonaService } from '~/modules/ai/ai-persona/ai-persona.service' +import { ExemplarSelector } from '~/modules/ai/ai-persona/exemplar-selector' +import { PersonaDistillProcessor } from '~/modules/ai/ai-persona/tasks/persona-distill.processor' +import { AITaskType } from '~/modules/ai/ai-task/ai-task.types' +import { SnowflakeService } from '~/shared/id/snowflake.service' + +type Drizzle = NodePgDatabase + +const seedCorpus = async (db: Drizzle, snowflake: SnowflakeService) => { + await db.insert(notes).values({ + id: snowflake.nextId(), + title: 'A Note', + text: 'Reading again about solitude.\n\nThe author writes with quiet precision. I noticed how a single careful sentence can dismantle a whole afternoon of restlessness.', + contentFormat: 'markdown', + isPublished: true, + } as any) + await db.insert(pages).values({ + id: snowflake.nextId(), + title: 'About', + slug: `about-${Date.now()}`, + text: 'I have been writing here for years.\n\nThis page collects the slow accumulations of a life spent paying attention to small things — drafts, half-thoughts, partial sketches.', + contentFormat: 'markdown', + } as any) +} + +const DISTILL_JSON = JSON.stringify({ + profile: + 'The author writes with quiet precision. They tend to anchor abstract reflections in concrete sensory details and prefer short, declarative sentences with careful pauses.', + profile_summary: + 'A precise, quiet voice that anchors abstractions in concrete details.', + metadata: { + tone_tags: ['quiet', 'precise'], + recurring_themes: ['attention', 'solitude'], + signature_phrases: ['quiet precision'], + }, +}) + +describe('ai-persona integration (pg + redis)', () => { + let pool: Pool + let db: Drizzle + let snowflake: SnowflakeService + let redisService: any + let mockRuntime: ReturnType + let service: AiPersonaService + let processor: PersonaDistillProcessor + let profileRepo: PersonaProfileRepository + let exemplarSelector: ExemplarSelector + let aiTaskService: any + let configsService: any + let eventManager: any + let enqueuedTasks: Array<{ type: string; payload: any; dedupKey?: string }> + + beforeAll(async () => { + const container = await startPgTestContainer() + pool = new Pool({ + connectionString: container.getConnectionUri(), + max: 4, + }) + db = drizzle(pool, { schema, casing: 'snake_case' }) + await pool.query('CREATE EXTENSION IF NOT EXISTS vector') + + snowflake = new SnowflakeService() + const helper = await redisHelper + redisService = { + getClient: () => helper.RedisService.getClient(), + } + mockRuntime = createMockAiRuntime({ + modelId: 'mock-distill', + behavior: { kind: 'text', text: DISTILL_JSON }, + }) + + profileRepo = new PersonaProfileRepository(db as any, snowflake) + exemplarSelector = new ExemplarSelector(db as any, redisService, {} as any) + + configsService = { + get: vi.fn(async (key: string) => { + if (key === 'ai') { + return { + providers: [{ id: 'p', enabled: true }], + personaDistillModel: { providerId: 'p', model: 'm' }, + echoModel: undefined, + aiPersona: { + distillSampleMaxTokens: 8000, + exemplarsLengthMin: 50, + exemplarsLengthMax: 600, + exemplarsCandidateCacheTtlSec: 60, + }, + } + } + return {} + }), + } + enqueuedTasks = [] + aiTaskService = { + crud: { + createTask: vi.fn(async (opts: any) => { + enqueuedTasks.push(opts) + return { taskId: `task-${enqueuedTasks.length}`, created: true } + }), + }, + } + eventManager = { + emit: vi.fn(async () => {}), + broadcast: vi.fn(async () => {}), + } + + service = new AiPersonaService( + profileRepo, + aiTaskService, + redisService, + configsService, + exemplarSelector, + ) + + processor = new PersonaDistillProcessor( + db as any, + { registerHandler: vi.fn() } as any, + { + async getPersonaDistillModel() { + return mockRuntime.runtime + }, + } as any, + configsService, + profileRepo, + redisService, + eventManager, + ) + }) + + afterAll(async () => { + await pool?.end() + }) + + beforeEach(async () => { + await pool.query('DELETE FROM persona_profiles') + await pool.query('DELETE FROM notes') + await pool.query('DELETE FROM pages') + const redis = redisService.getClient() + const keys = await redis.keys(`${PERSONA_DISTILL_LOCK_KEY_PREFIX}*`) + if (keys.length) await redis.del(...keys) + enqueuedTasks.length = 0 + mockRuntime.reset() + mockRuntime.setBehavior({ kind: 'text', text: DISTILL_JSON }) + eventManager.emit.mockClear() + }) + + it('refresh enqueues a PERSONA_DISTILL task', async () => { + const { taskId } = await service.refresh('inner-self') + expect(taskId).toBeTruthy() + expect(enqueuedTasks).toHaveLength(1) + expect(enqueuedTasks[0].type).toBe(AITaskType.PersonaDistill) + expect(enqueuedTasks[0].payload).toMatchObject({ + personaKey: 'inner-self', + }) + }) + + it('persists a row and emits PERSONA_PROFILE_REFRESHED after processor runs', async () => { + await seedCorpus(db, snowflake) + const ctx = makeContext() + await invokeProcessor(processor, ctx) + const row = await profileRepo.findByKey('inner-self') + expect(row).not.toBeNull() + expect(row?.profile).toContain('quiet precision') + expect(row?.profileSummary).toContain('precise, quiet voice') + expect(row?.distillModel).toBe('mock-distill') + expect(eventManager.emit).toHaveBeenCalledWith( + BusinessEvents.PERSONA_PROFILE_REFRESHED, + expect.objectContaining({ personaKey: 'inner-self' }), + ) + }) + + it('second concurrent refresh returns 409 when lock held', async () => { + const redis = redisService.getClient() + await redis.set( + `${PERSONA_DISTILL_LOCK_KEY_PREFIX}inner-self`, + '1', + 'EX', + 60, + ) + try { + await service.refresh('inner-self') + expect.fail('should have thrown') + } catch (error) { + expect(error).toBeInstanceOf(AppException) + expect((error as AppException).code).toBe( + AppErrorCode.AI_PERSONA_REFRESH_IN_PROGRESS, + ) + } + }) + + it('listPersonasWithStatus marks inner-self hasProfile=true after distill', async () => { + await seedCorpus(db, snowflake) + await invokeProcessor(processor, makeContext()) + const list = await service.listPersonasWithStatus() + const inner = list.find((p) => p.key === 'inner-self')! + const passerby = list.find((p) => p.key === 'passerby')! + expect(inner.hasProfile).toBe(true) + expect(passerby.hasProfile).toBe(false) + }) + + it('getProfile returns AI_PERSONA_PROFILE_NOT_FOUND for passerby', async () => { + try { + await service.getProfile('passerby') + expect.fail('should have thrown') + } catch (error) { + expect(error).toBeInstanceOf(AppException) + expect((error as AppException).code).toBe( + AppErrorCode.AI_PERSONA_PROFILE_NOT_FOUND, + ) + } + }) + + it('refresh fails with AI_PERSONA_DISTILL_MODEL_NOT_CONFIGURED when no model is set', async () => { + configsService.get.mockResolvedValueOnce({ + providers: [{ id: 'p', enabled: true }], + personaDistillModel: undefined, + echoModel: undefined, + }) + try { + await service.refresh('inner-self') + expect.fail('should have thrown') + } catch (error) { + expect(error).toBeInstanceOf(AppException) + expect((error as AppException).code).toBe( + AppErrorCode.AI_PERSONA_DISTILL_MODEL_NOT_CONFIGURED, + ) + } + }) + + it('upsert updates an existing row in place', async () => { + await seedCorpus(db, snowflake) + await invokeProcessor(processor, makeContext()) + mockRuntime.setBehavior({ + kind: 'text', + text: JSON.stringify({ + profile: 'Revised profile content.', + profile_summary: 'Revised summary.', + metadata: { + tone_tags: ['revised'], + recurring_themes: [], + signature_phrases: [], + }, + }), + }) + await invokeProcessor(processor, makeContext()) + const rows = await db + .select({ id: personaProfiles.id, profile: personaProfiles.profile }) + .from(personaProfiles) + expect(rows).toHaveLength(1) + expect(rows[0].profile).toBe('Revised profile content.') + }) +}) + +const makeContext = () => ({ + taskId: 'test-task', + signal: new AbortController().signal, + appendLog: vi.fn(async () => {}), + updateProgress: vi.fn(async () => {}), + incrementTokens: vi.fn(async () => {}), + setResult: vi.fn(async () => {}), + setStatus: vi.fn(), + isAborted: () => false, +}) + +const invokeProcessor = async ( + proc: PersonaDistillProcessor, + ctx: ReturnType, +) => { + const handle = (proc as any).handle.bind(proc) + await handle({ personaKey: 'inner-self' }, ctx) +} diff --git a/apps/core/test/src/modules/ai/ai-persona/exemplar-selector.spec.ts b/apps/core/test/src/modules/ai/ai-persona/exemplar-selector.spec.ts new file mode 100644 index 00000000000..7a3571b40d2 --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-persona/exemplar-selector.spec.ts @@ -0,0 +1,182 @@ +import { describe, expect, it, vi } from 'vitest' + +import { ExemplarSelector } from '~/modules/ai/ai-persona/exemplar-selector' + +type NoteRow = { + id: string + text: string | null + createdAt: Date + isPublished: boolean +} +type PageRow = { + id: string + text: string | null + createdAt: Date +} + +const makeSelector = ({ + notes, + pages, + redisStore, +}: { + notes: NoteRow[] + pages: PageRow[] + redisStore?: Map +}) => { + const store = redisStore ?? new Map() + + const buildQueryChain = (rows: any[]) => { + const chain: any = { + from: () => chain, + where: () => chain, + orderBy: () => chain, + limit: () => Promise.resolve(rows), + } + return chain + } + + let selectCallCount = 0 + const db: any = { + select: vi.fn().mockImplementation(() => { + selectCallCount += 1 + if (selectCallCount === 1) { + return buildQueryChain( + notes.map((n) => ({ + sourceId: n.id, + content: n.text, + createdAt: n.createdAt, + })), + ) + } + return buildQueryChain( + pages.map((p) => ({ + sourceId: p.id, + content: p.text, + createdAt: p.createdAt, + })), + ) + }), + } + + const redisClient: any = { + get: vi.fn(async (key: string) => store.get(key) ?? null), + set: vi.fn(async (key: string, value: string) => { + store.set(key, value) + return 'OK' + }), + } + const redisService: any = { getClient: () => redisClient } + const configsService: any = { + get: vi.fn(async () => ({ + aiPersona: { + exemplarsLengthMin: 50, + exemplarsLengthMax: 600, + exemplarsCandidateCacheTtlSec: 60, + }, + })), + } + + const selector = new ExemplarSelector(db, redisService, configsService) + return { selector, store, redisClient } +} + +describe('ExemplarSelector', () => { + it('returns empty array when no passages match the length window', async () => { + const { selector } = makeSelector({ + notes: [ + { + id: 'n1', + text: 'short', + createdAt: new Date('2025-01-01T00:00:00Z'), + isPublished: true, + }, + ], + pages: [], + }) + const out = await selector.pickExemplars('inner-self', { count: 3 }) + expect(out).toEqual([]) + }) + + it('honors count and length window with a seeded RNG', async () => { + const longBody = 'A'.repeat(120) + '\n\n' + 'B'.repeat(120) + const { selector } = makeSelector({ + notes: [ + { + id: 'n1', + text: longBody, + createdAt: new Date('2025-06-01T00:00:00Z'), + isPublished: true, + }, + { + id: 'n2', + text: 'C'.repeat(120) + '\n\n' + 'D'.repeat(120), + createdAt: new Date('2025-07-01T00:00:00Z'), + isPublished: true, + }, + ], + pages: [], + }) + const seededRng = mulberry32(42) + const out = await selector.pickExemplars('inner-self', { + count: 2, + lengthMin: 100, + lengthMax: 200, + rng: seededRng, + bypassCache: true, + }) + expect(out).toHaveLength(2) + for (const passage of out) { + expect(passage.content.length).toBeGreaterThanOrEqual(100) + expect(passage.content.length).toBeLessThanOrEqual(200) + expect(['note', 'page']).toContain(passage.sourceType) + } + }) + + it('is deterministic given the same seeded RNG', async () => { + const longText = + 'A'.repeat(150) + + '\n\n' + + 'B'.repeat(150) + + '\n\n' + + 'C'.repeat(150) + + '\n\n' + + 'D'.repeat(150) + const params = { + notes: [ + { + id: 'n1', + text: longText, + createdAt: new Date('2025-06-01T00:00:00Z'), + isPublished: true, + }, + ], + pages: [], + } + const a = makeSelector(params) + const b = makeSelector(params) + const opts = { + count: 3, + lengthMin: 100, + lengthMax: 200, + rng: mulberry32(123), + bypassCache: true, + } + const out1 = await a.selector.pickExemplars('inner-self', opts) + const out2 = await b.selector.pickExemplars('inner-self', { + ...opts, + rng: mulberry32(123), + }) + expect(out1.map((p) => p.content)).toEqual(out2.map((p) => p.content)) + }) +}) + +function mulberry32(seed: number): () => number { + let s = seed >>> 0 + return () => { + s = (s + 0x6d2b79f5) >>> 0 + let t = s + t = Math.imul(t ^ (t >>> 15), t | 1) + t ^= t + Math.imul(t ^ (t >>> 7), t | 61) + return ((t ^ (t >>> 14)) >>> 0) / 4294967296 + } +} diff --git a/apps/core/test/src/modules/ai/ai-persona/persona-distill.processor.spec.ts b/apps/core/test/src/modules/ai/ai-persona/persona-distill.processor.spec.ts new file mode 100644 index 00000000000..748131865eb --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-persona/persona-distill.processor.spec.ts @@ -0,0 +1,113 @@ +import { describe, expect, it, vi } from 'vitest' + +import { PersonaDistillProcessor } from '~/modules/ai/ai-persona/tasks/persona-distill.processor' + +const makeProcessor = () => { + const db = {} as any + const taskProcessor = { registerHandler: vi.fn() } as any + const aiService = {} as any + const configsService = {} as any + const profileRepo = {} as any + const redisService = {} as any + const eventManager = {} as any + return new PersonaDistillProcessor( + db, + taskProcessor, + aiService, + configsService, + profileRepo, + redisService, + eventManager, + ) +} + +describe('PersonaDistillProcessor.parseDistillOutput', () => { + const processor = makeProcessor() + + it('parses a well-formed JSON envelope', () => { + const raw = JSON.stringify({ + profile: 'The author writes with quiet precision.', + profile_summary: 'Quiet, precise voice.', + metadata: { + tone_tags: ['quiet', 'precise'], + recurring_themes: ['solitude'], + signature_phrases: ['it is what it is'], + }, + }) + const parsed = processor.parseDistillOutput(raw) + expect(parsed.profile).toContain('quiet precision') + expect(parsed.profileSummary).toBe('Quiet, precise voice.') + expect(parsed.metadata.toneTags).toEqual(['quiet', 'precise']) + expect(parsed.metadata.recurringThemes).toEqual(['solitude']) + expect(parsed.metadata.signaturePhrases).toEqual(['it is what it is']) + }) + + it('strips markdown fences before parsing', () => { + const raw = + '```json\n' + + JSON.stringify({ + profile: 'hello', + profile_summary: null, + metadata: {}, + }) + + '\n```' + const parsed = processor.parseDistillOutput(raw) + expect(parsed.profile).toBe('hello') + expect(parsed.profileSummary).toBeNull() + }) + + it('falls back to text-only profile on malformed JSON', () => { + const raw = 'This is not JSON at all; just prose about voice.' + const parsed = processor.parseDistillOutput(raw) + expect(parsed.profile).toBe(raw) + expect(parsed.profileSummary).toBeNull() + expect(parsed.metadata.toneTags).toEqual([]) + }) + + it('throws on empty input', () => { + expect(() => processor.parseDistillOutput('')).toThrow( + 'Empty distill output', + ) + expect(() => processor.parseDistillOutput(' \n ')).toThrow( + 'Empty distill output', + ) + }) + + it('falls back when JSON shape is invalid', () => { + const raw = JSON.stringify({ profile: 123, metadata: 'oops' }) + const parsed = processor.parseDistillOutput(raw) + expect(parsed.profile.length).toBeGreaterThan(0) + expect(parsed.profileSummary).toBeNull() + }) +}) + +describe('PersonaDistillProcessor.buildDistillPrompt', () => { + const processor = makeProcessor() + + it('emits system + user with passage headers and JSON instruction', () => { + const messages = processor.buildDistillPrompt([ + { + sourceType: 'post', + sourceId: '1', + title: 'First Post', + createdAt: new Date('2025-01-15T00:00:00Z'), + body: 'lorem ipsum', + }, + { + sourceType: 'note', + sourceId: '2', + title: null, + createdAt: new Date('2025-02-20T00:00:00Z'), + body: 'dolor sit amet', + }, + ]) + expect(messages).toHaveLength(2) + expect(messages[0].role).toBe('system') + expect(messages[0].content).toContain('raw JSON') + expect(messages[1].role).toBe('user') + expect(messages[1].content).toContain('[post:1 — 2025-01-15 — First Post]') + expect(messages[1].content).toContain('[note:2 — 2025-02-20]') + expect(messages[1].content).toContain('lorem ipsum') + expect(messages[1].content).toContain('dolor sit amet') + }) +}) diff --git a/apps/core/test/src/modules/ai/ai-persona/persona-registry.spec.ts b/apps/core/test/src/modules/ai/ai-persona/persona-registry.spec.ts new file mode 100644 index 00000000000..7f664b58ae7 --- /dev/null +++ b/apps/core/test/src/modules/ai/ai-persona/persona-registry.spec.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from 'vitest' + +import { AppErrorCode } from '~/common/errors' +import { AppException } from '~/common/errors/exception.types' +import { + getPersonaDefinition, + isKnownPersonaKey, + listPersonas, + PERSONA_REGISTRY, + tryGetPersonaDefinition, +} from '~/modules/ai/ai-persona/persona-registry' + +describe('persona-registry', () => { + it('exposes inner-self with profile/retrieval/exemplar capabilities', () => { + const def = PERSONA_REGISTRY['inner-self'] + expect(def).toBeDefined() + expect(def.key).toBe('inner-self') + expect(def.needsProfile).toBe(true) + expect(def.needsRetrieval).toBe(true) + expect(def.usesExemplars).toBe(true) + expect(def.staticPrompt.length).toBeGreaterThan(0) + }) + + it('exposes passerby as fully fixed', () => { + const def = PERSONA_REGISTRY.passerby + expect(def.needsProfile).toBe(false) + expect(def.needsRetrieval).toBe(false) + expect(def.usesExemplars).toBe(false) + expect(def.staticPrompt.length).toBeGreaterThan(0) + }) + + it('list returns both shipped personas', () => { + const list = listPersonas() + const keys = list.map((p) => p.key) + expect(keys).toContain('inner-self') + expect(keys).toContain('passerby') + }) + + it('isKnownPersonaKey gates on the registry', () => { + expect(isKnownPersonaKey('inner-self')).toBe(true) + expect(isKnownPersonaKey('passerby')).toBe(true) + expect(isKnownPersonaKey('ghost')).toBe(false) + }) + + it('tryGetPersonaDefinition returns undefined for unknown keys', () => { + expect(tryGetPersonaDefinition('unknown-key')).toBeUndefined() + }) + + it('getPersonaDefinition throws AI_PERSONA_NOT_FOUND for unknown key', () => { + try { + getPersonaDefinition('does-not-exist') + expect.fail('should have thrown') + } catch (error) { + expect(error).toBeInstanceOf(AppException) + expect((error as AppException).code).toBe( + AppErrorCode.AI_PERSONA_NOT_FOUND, + ) + } + }) +}) diff --git a/apps/core/test/src/modules/ai/ai.service.spec.ts b/apps/core/test/src/modules/ai/ai.service.spec.ts index b3bba5aefc8..f9669157180 100644 --- a/apps/core/test/src/modules/ai/ai.service.spec.ts +++ b/apps/core/test/src/modules/ai/ai.service.spec.ts @@ -3,7 +3,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' import { AppException } from '~/common/errors/exception.types' import { AiService } from '~/modules/ai/ai.service' -import { AIProviderType } from '~/modules/ai/ai.types' +import { AIFeatureKey, AIProviderType } from '~/modules/ai/ai.types' import { ConfigsService } from '~/modules/configs/configs.service' // Mock the runtime factory @@ -159,4 +159,53 @@ describe('AiService', () => { expect(runtime.providerInfo.id).toBe('enabled-second') }) }) + + describe('embedding model', () => { + it('reports unconfigured when no embedding assignment exists', async () => { + configsService.get.mockResolvedValueOnce({ + ...mockAiConfig, + embeddingModel: undefined, + }) + + await expect( + service.hasFeatureModel(AIFeatureKey.Embedding), + ).resolves.toBe(false) + }) + + it('throws when resolving embedding without explicit assignment', async () => { + configsService.get.mockResolvedValueOnce({ + ...mockAiConfig, + embeddingModel: undefined, + }) + + await expect(service.getEmbeddingModel()).rejects.toBeInstanceOf( + AppException, + ) + }) + + it('does not fallback when embedding assignment points to a missing provider', async () => { + configsService.get.mockResolvedValueOnce({ + ...mockAiConfig, + embeddingModel: { providerId: 'missing' }, + }) + + await expect( + service.hasFeatureModel(AIFeatureKey.Embedding), + ).resolves.toBe(false) + await expect(service.getEmbeddingModel()).rejects.toBeInstanceOf( + AppException, + ) + }) + + it('uses the configured embedding assignment', async () => { + configsService.get.mockResolvedValueOnce({ + ...mockAiConfig, + embeddingModel: { providerId: 'main', model: 'text-embedding-3-small' }, + }) + + const runtime = await service.getEmbeddingModel() + expect(runtime.providerInfo.id).toBe('main') + expect(runtime.providerInfo.model).toBe('text-embedding-3-small') + }) + }) }) diff --git a/docs/superpowers/specs/2026-05-23-ai-echo-engine-design.md b/docs/superpowers/specs/2026-05-23-ai-echo-engine-design.md new file mode 100644 index 00000000000..83e63af41c2 --- /dev/null +++ b/docs/superpowers/specs/2026-05-23-ai-echo-engine-design.md @@ -0,0 +1,441 @@ +# ai-echo Engine — Design + +- **Date:** 2026-05-23 +- **Status:** Design — pending review +- **Author:** Innei (brainstormed with assistant) +- **Parent:** [AI Echo System Root](./2026-05-23-ai-echo-system-root.md) +- **Sibling specs:** [ai-embeddings](./2026-05-23-ai-embeddings-design.md), [ai-persona](./2026-05-23-ai-persona-design.md), [ai-memory](./2026-05-23-ai-memory-design.md) + +## 1. Scope + +This spec covers the **generic echo engine** and its **first concrete consumer** (the `recently` scenario): + +- New module: `apps/core/src/modules/ai/ai-echo/` +- New table: `ai_echoes` (polymorphic across scenarios) +- New abstraction: `EchoScenario` interface + Nest multi-provider registration +- New task: `ECHO_GENERATE` +- One scenario provider shipped in MVP: `recentlyEchoScenarioProvider`, registered from `apps/core/src/modules/recently/scenarios/recently-echo.scenario.ts` +- New endpoints: public read + admin manage + +Cross-cutting decisions (IDs, model config, scenario registration pattern, failure principles, visibility) are defined in the root spec; this spec applies them. + +## 2. Module layout + +``` +apps/core/src/modules/ai/ai-echo/ +├── ai-echo.module.ts +├── ai-echo.controller.ts # admin + public endpoints +├── ai-echo.service.ts # orchestrator: takes (scenarioKey, subjectType, subjectId), enqueues, returns echo rows +├── ai-echo.repository.ts # extends BaseRepository +├── ai-echo.schema.ts # Zod DTOs (RegenerateDto, EditEchoDto, …) +├── ai-echo.types.ts +├── ai-echo.views.ts # AiEchoViews.public / .admin +├── ai-echo.constants.ts # ECHO_SCENARIO token, default config keys +├── ai-echo.errors.ts # registers new AppErrorCode entries +├── scenario.types.ts # EchoScenario interface + helper types +├── echo-prompt-builder.ts # uniform prompt assembly given profile / retrieval / memories / exemplars +└── tasks/ + └── echo-generate.processor.ts # AiTaskType.ECHO_GENERATE processor + +apps/core/src/modules/recently/scenarios/ +└── recently-echo.scenario.ts # Nest provider {provide: ECHO_SCENARIO, useValue: ..., multi: true} +``` + +`recently.module.ts` imports `AiEchoModule` and adds the scenario provider. Nothing in `recently.service.ts` or `recently.controller.ts` changes. + +## 3. Data model + +### 3.1 `ai_echoes` table + +```sql +-- migration 00XX_ai_echoes.sql +CREATE TABLE ai_echoes ( + id text PRIMARY KEY, -- snowflake string (pkText) + scenario_key text NOT NULL, -- 'recently' | future + subject_type text NOT NULL, -- 'recently' | 'comment' | 'post' | … + subject_id text NOT NULL, -- snowflake string of the subject + persona_key text NOT NULL, -- 'inner-self' | 'passerby' | … + content text, -- null until status='ready' + status text NOT NULL, -- pending|generating|ready|edited|failed|archived + model text, -- resolved model id at generation time + metadata jsonb NOT NULL DEFAULT '{}', -- {taskId, retrievalIds[], retrievalSimilarities[], memoryIds[], profileRefreshedAt, errorCode, aborted, …} + generated_at timestamptz, + edited_at timestamptz, + edited_by text, -- user id (text) + created_at timestamptz NOT NULL DEFAULT now(), + updated_at timestamptz NOT NULL DEFAULT now() +); + +CREATE INDEX ai_echoes_subject + ON ai_echoes (scenario_key, subject_type, subject_id); + +CREATE INDEX ai_echoes_status + ON ai_echoes (scenario_key, status); + +CREATE INDEX ai_echoes_persona_subject + ON ai_echoes (subject_type, subject_id, persona_key); +``` + +Drizzle definition added in `packages/db-schema/src/schema/ai.ts`. + +### 3.2 Status state machine + +``` +pending ──► generating ──► ready ──► edited (terminal-but-mutable) + │ │ │ + ▼ ▼ ▼ + failed failed archived (e.g. force-regenerate, subject deleted) +``` + +`failed` is terminal; operator action moves it via regenerate (which creates a new row, leaves the failed row archived). + +`archived` is terminal; rows are kept for audit, hidden from public reads. + +## 4. EchoScenario abstraction + +### 4.1 Interface + +```ts +// scenario.types.ts +export interface EchoScenario { + /** Unique key per scenario; used as ai_echoes.scenario_key */ + readonly key: string + + /** Business event that auto-triggers; omit for purely on-demand scenarios */ + readonly triggerEvent?: BusinessEvents + + /** Personas invoked by default on each generation */ + readonly defaultPersonas: PersonaKey[] + + /** Whether generated content should persist to ai_echoes (false for streaming/ephemeral) */ + readonly persistEchoes?: boolean // default true + + /** Event emitted when an echo row reaches status='ready' */ + readonly emitOnReady?: BusinessEvents + + /** Fetch the subject by id; null when subject no longer exists (echo aborts) */ + loadSubject(subjectId: string): Promise + + /** Extract the natural-language query used for retrieval and memory recall; null skips both */ + extractRetrievalQuery(subject: Subject): string | null + + /** Build the chat message list from all available context */ + buildPrompt(input: EchoPromptInput): ChatMessage[] + + /** Optional post-processing (strip code fences, trim, etc.) */ + postProcess?(content: string, subject: Subject): string +} + +export interface EchoPromptInput { + subject: Subject + persona: PersonaDefinition + profile: PersonaProfile | null // null if persona.needsProfile=false or no row yet + retrieval: RetrievalResult[] // [] when below similarity threshold or query null + memories: AiMemory[] // [] when none above similarity threshold + exemplars: ExemplarPassage[] // [] for personas without exemplars +} +``` + +### 4.2 Registration + +```ts +// ai-echo.constants.ts +export const ECHO_SCENARIO = Symbol('ECHO_SCENARIO') + +// recently/scenarios/recently-echo.scenario.ts +export const recentlyEchoScenarioProvider: Provider = { + provide: ECHO_SCENARIO, + useValue: { + key: 'recently', + triggerEvent: BusinessEvents.RECENTLY_CREATE, + defaultPersonas: ['inner-self', 'passerby'], + persistEchoes: true, + emitOnReady: BusinessEvents.RECENTLY_ECHO_LANDED, + async loadSubject(id) { /* RecentlyService.findById(id) via injected service */ }, + extractRetrievalQuery(recently) { return recently.content ?? null }, + buildPrompt(input) { return buildRecentlyEchoPrompt(input) }, + } satisfies EchoScenario, + multi: true, +} + +// ai-echo.service.ts +constructor(@Inject(ECHO_SCENARIO) private readonly scenarios: EchoScenario[]) { + this.byKey = new Map(scenarios.map((s) => [s.key, s])) +} +``` + +The orchestrator subscribes to each `scenario.triggerEvent` via the event manager during `OnModuleInit`. On the event, it calls `dispatch(scenario.key, subjectType, subjectId)`. + +Subjects whose deletion should cascade to echoes are handled by per-scenario listeners. The recently scenario also subscribes to `RECENTLY_DELETE` and calls `aiEchoService.handleSubjectDeleted('recently', id)` — which marks in-flight rows `failed/aborted` and `ready` rows `archived`. + +### 4.3 Dependency direction + +`recently.module` imports `AiEchoModule` and provides `recentlyEchoScenarioProvider`. `ai-echo.module` does **not** import `recently.module` — it only knows about `ECHO_SCENARIO` providers. The recently scenario file's `loadSubject` resolves `RecentlyService` via constructor injection (the provider is `useFactory` or accesses services through a small adapter pattern; see implementation plan). + +This direction avoids circular imports and keeps the engine generic. + +## 5. Generation pipeline + +### 5.1 Orchestrator (synchronous, fast) + +``` +event RECENTLY_CREATE { recently } received + └─► AiEchoService.dispatch('recently', 'recently', recently.id) + 1. scenario = scenarios.byKey('recently') + 2. for each persona in scenario.defaultPersonas: + a. INSERT ai_echoes (status='pending', persona_key, scenario_key, subject_type, subject_id, metadata={}) + b. taskId = aiTaskService.enqueue(ECHO_GENERATE, { echoId }) + c. UPDATE ai_echoes SET metadata = jsonb_set(metadata, '{taskId}', $taskId) WHERE id = $echoId + 3. return +``` + +The HTTP `recently.create` response is already returned by this point — dispatch runs from an event listener, not the HTTP path. + +### 5.2 `ECHO_GENERATE` task processor + +``` +EchoGenerateTaskProcessor.handle({ echoId }): + 1. row = repo.findById(echoId) + 2. if !row OR row.status NOT IN ('pending', 'generating'): return // no-op (idempotent replay guard) + 3. UPDATE status='generating', updated_at=now() + 4. scenario = scenariosByKey.get(row.scenario_key) + if !scenario: fail('AI_ECHO_SCENARIO_NOT_REGISTERED'); return + 5. subject = await scenario.loadSubject(row.subject_id) + if !subject: fail('AI_ECHO_SUBJECT_NOT_FOUND', terminal=true); return + 6. persona = personaRegistry.get(row.persona_key) + if !persona: fail('AI_PERSONA_NOT_FOUND', terminal=true); return + 7. profile = persona.needsProfile ? aiPersonaService.getProfile(persona.key) : null + 8. query = scenario.extractRetrievalQuery(subject) + 9. retrieval = (persona.needsRetrieval && query) + ? await aiEmbeddingsService.search(query, { topK, minSimilarity, sourceTypes: ['note','page'] }) + : [] + 10. memories = await aiMemoryService.recall({ scope: ['global', `persona:${persona.key}`], query, topK, minSimilarity }) + 11. exemplars = persona.usesExemplars + ? await aiPersonaService.pickExemplars(persona.key, { count, query }) + : [] + 12. messages = scenario.buildPrompt({ subject, persona, profile, retrieval, memories, exemplars }) + 13. runtime = await aiService.getEchoModel() + 14. content = await runtime.chat(messages) + 15. content = scenario.postProcess?.(content, subject) ?? content + 16. UPDATE row: + status='ready', content, model=runtime.modelId, generated_at=now(), + metadata = metadata || { + retrievalIds: retrieval.map(r => `${r.sourceType}:${r.sourceId}#${r.chunkIndex}`), + retrievalSimilarities: retrieval.map(r => r.similarity), + memoryIds: memories.map(m => m.id), + profileRefreshedAt: profile?.refreshedAt ?? null, + } + 17. if scenario.emitOnReady: emit event with the row, scope=TO_SYSTEM_VISITOR +``` + +### 5.3 Failure handling + +- Transient failures (network, 5xx from provider): task queue retries with backoff `[5_000, 30_000, 120_000]`, `maxRetries=3`. On final failure, write `status='failed'`, `metadata.errorCode='AI_ECHO_GENERATION_FAILED'`, `metadata.upstreamMessage` (truncated to 1k chars). +- Structural failures (subject gone, scenario missing, model unconfigured): terminate immediately, no retry. +- Quota exceeded (`AI_ECHO_DAILY_QUOTA_EXCEEDED`): terminate immediately, no retry; surfaced via admin list filter. + +The `recently` HTTP create path never sees any of this. + +### 5.4 Regenerate flow + +``` +POST /ai-echo/regenerate/recently/:subjectId body: { personaKey, force?: boolean } + + 1. existing = repo.findOne({ scenario_key:'recently', subject_type:'recently', subject_id, persona_key }) + 2. if existing && existing.status IN ('pending','generating') && !force: + throw 409 AI_ECHO_REGENERATE_IN_PROGRESS + 3. if existing && force: + UPDATE existing SET status='archived', updated_at=now() + 4. dispatch new row via same flow as 5.1 + 5. return { echoId: newRow.id, taskId: newRow.metadata.taskId } +``` + +Force-archived rows are not deleted; admin list can still see them with `?status=archived`. + +### 5.5 Subject-delete cascade + +Recently scenario listener for `RECENTLY_DELETE`: + +``` +aiEchoService.handleSubjectDeleted('recently', id): + - rows = repo.findAll({ scenario_key:'recently', subject_type:'recently', subject_id: id }) + - for r in rows: + if r.status IN ('pending', 'generating'): + UPDATE status='failed', metadata.aborted=true + (the running task will check status on next step and no-op) + else: + UPDATE status='archived' +``` + +The task processor's step-2 guard ensures any in-flight work either completes harmlessly (if it already passed the guard) or stops before writing. + +## 6. Prompt assembly (recently scenario) + +`echo-prompt-builder.ts` exports `buildRecentlyEchoPrompt(input)`. For `inner-self`: + +``` +SYSTEM: + + + Voice summary: + {input.profile.profileSummary || input.profile.profile} + + Mimic the cadence of these passages: + 1. {exemplars[0].content} + 2. {exemplars[1].content} + ... + + Canonical facts (apply only if relevant): + - {memories[0].content} + - {memories[1].content} + ... + + Relevant past thoughts (reference only if directly applicable): + [{sourceType}:{sourceId}@{date}] {retrieval[0].content} + ... + + RULES: + - Reply in 1–3 short sentences. + - + Do NOT claim to remember the author's past ("you wrote", "back when", "I remember", "我记得"). + + - Match the author's first-person voice. + - Plain markdown only; no code fences. + +USER: + {subject.content} +``` + +For `passerby` the SYSTEM is the fixed prompt from `ai-persona/prompts.ts` with no profile, exemplars, memories, or retrieval. USER is the same `subject.content`. + +The no-unverified-memory rule is the most important MVP guardrail; it's enforced by prompt and verified by unit test (assert the rule appears whenever retrieval+memories are empty). + +## 7. API surface + +All endpoints under `@ApiController('ai-echo')` and the V2 envelope. Errors use `BizException` with codes from §8. + +### 7.1 Public + +| Method | Path | Auth | Body / Query | Returns | +| --- | --- | --- | --- | --- | +| GET | `/ai-echo/by-subject/:subjectType/:subjectId` | — | `?personaKey=&scenarioKey=` | `AiEchoViews.public[]` (filters to `status IN ('ready','edited')`) | + +### 7.2 Admin (all `@Auth()`) + +| Method | Path | Body | Returns | +| --- | --- | --- | --- | +| POST | `/ai-echo/regenerate/:subjectType/:subjectId` | `{ personaKey, force?: boolean }` | `{ echoId, taskId }` | +| PUT | `/ai-echo/:id` | `{ content }` | `AiEchoViews.admin` (`status='edited'`, `edited_at`, `edited_by` populated) | +| DELETE | `/ai-echo/:id` | — | 204; soft-deletes to `status='archived'` | +| GET | `/ai-echo` | `?scenarioKey=&status=&personaKey=&subjectType=&page=&size=` | Paginated `AiEchoViews.admin[]` with `MetaObjectBuilder` pagination | + +Rating endpoint, KPI endpoints, and streaming endpoints are not part of this MVP. + +### 7.3 Views + +```ts +// ai-echo.views.ts +export const AiEchoViews = { + public: ZodObject<{ + id, scenarioKey, subjectType, subjectId, personaKey, + content, status, generatedAt, editedAt, + metadata: { profileRefreshedAt?, retrievalIds?, memoryIds? }, // public-safe subset + }>, + admin: ZodObject<{ ...full row... }>, +} +``` + +Recently `getList` / `getOne` do **not** embed echoes. The Yohaku frontend issues a separate `GET /ai-echo/by-subject/recently/:id` per item (or batch). This decision keeps recently endpoints cacheable and decouples failure modes. + +## 8. Errors + +New entries in `AppErrorCode`: + +| Code | HTTP | Notes | +| --- | --- | --- | +| `AI_ECHO_NOT_FOUND` | 404 | | +| `AI_ECHO_SUBJECT_NOT_FOUND` | 404 | Returned by regenerate when subject is gone; processor uses internally to terminal-fail. | +| `AI_ECHO_SCENARIO_NOT_REGISTERED` | 400 | Defensive; only on misconfigured deploy. | +| `AI_ECHO_GENERATION_FAILED` | 500 | Persisted on row; not normally surfaced to public. | +| `AI_ECHO_REGENERATE_IN_PROGRESS` | 409 | When `force=false` and a row is already pending/generating. | +| `AI_ECHO_MODEL_NOT_CONFIGURED` | 400 | `AIFeatureKey.Echo` has no assignment. | +| `AI_ECHO_DAILY_QUOTA_EXCEEDED` | 429 | Task-level fail; queue does not retry. | + +## 9. Configuration + +Schema additions in `configs.schema.ts → AISchema` (see root §4.4 for conventions): + +```ts +echoModel: field.plain(AIModelAssignmentSchema.optional(), 'Echo model'), +enableEcho: field.toggle(z.boolean().optional(), 'Allow AI echo'), +enableAutoGenerateEchoOnCreate: field.toggle(z.boolean().optional(), 'Auto-generate echo on recently create', + { description: 'Requires enableEcho to also be enabled' }), +echoDailyQuota: field.number( + z.preprocess(/* numeric coerce */, z.number().int().min(0).optional()), + 'Echo daily quota', + { description: 'Max echo generation calls per day; 0 means unlimited. Default 200' }), +echoRetrievalTopK: field.number(/* … */, 'Echo retrieval top-K', { description: 'Default 5' }), +echoRetrievalMinSimilarity: field.number(/* numeric 0..1 */, 'Echo retrieval min similarity', + { description: 'Cosine similarity threshold; below this no retrieval section is injected. Default 0.72' }), +echoExemplarsCount: field.number(/* … */, 'Echo exemplars count', { description: 'Default 4' }), +``` + +The `enableAutoGenerateEchoOnCreate` toggle gates the orchestrator's event listener; when off, echoes are only generated via the admin regenerate endpoint. + +## 10. Testing + +Project conventions: `vitest`, `@testcontainers/postgresql`, `redis-mock`, `createE2EApp`. + +### 10.1 Unit + +- `echo-prompt-builder`: deterministic given inputs; the no-unverified-memory rule appears iff retrieval+memories are empty. +- Scenario registration: `AiEchoService` resolves scenarios by key, throws when unknown. +- `EchoGenerateTaskProcessor` step-2 guard: row with status NOT IN ('pending','generating') causes no writes and emits no events. + +### 10.2 Integration (pg + redis containers) + +- `recently.create` → mock runtime → two `ai_echoes` rows reach `status='ready'`; `RECENTLY_ECHO_LANDED` fires twice on the event bus. +- `recently.delete` mid-flight → in-flight row marked `failed/aborted`; subsequent task wake is a no-op. +- `POST /ai-echo/regenerate` (force=true) archives old row, inserts fresh one. +- `PUT /ai-echo/:id` → `status='edited'`, `edited_by` matches authenticated user. +- Runtime throws → row `status='failed'`, `metadata.errorCode='AI_ECHO_GENERATION_FAILED'`, retries observed up to `maxRetries`. +- `echoDailyQuota=1` → second enqueue terminates with `AI_ECHO_DAILY_QUOTA_EXCEEDED`. +- Replay (task fires twice for same echoId after first ready) → second invocation no-ops. + +### 10.3 Mocks + +- Extend or add `test/mock/processors/ai-runtime.mock.ts` with controllable chat responses + throw modes. Shared with persona + embeddings tests. + +## 11. Migration + +``` +00XX_ai_echoes.sql -- this spec +``` + +Additive only. Drizzle schema for `ai_echoes` added in `packages/db-schema/src/schema/ai.ts`. Repository registered in `repository.tokens.ts` per project convention. + +## 12. MVP / v2 boundary + +**MVP (this spec):** +- ai-echo module, `ai_echoes` table, EchoScenario abstraction, multi-provider registration +- Recently scenario provider in `recently/scenarios/` +- Generate / regenerate / edit / delete / list endpoints +- Public list endpoint +- ECHO_GENERATE task with idempotency guards +- Subject-delete cascade for recently +- Configuration (`echoModel`, `enableEcho`, `enableAutoGenerateEchoOnCreate`, quota, retrieval thresholds, exemplar count) + +**v2 (not this spec):** +- `POST /ai-echo/:id/rating` endpoint and rating field +- Comment-reply scenario (subscribes to `COMMENT_CREATE` with article filter) +- Reader-companion scenario (`persistEchoes=false`, uses ai-inflight streaming) +- Per-scenario quota / pricing surfaces +- Streaming echo delivery (ai-inflight integration) + +## 13. Acceptance criteria (engine-specific) + +- `EchoScenario` providers register cleanly via Nest DI; unit test verifies multi-injection works. +- A new `recently` row produces two echoes within seconds; force-regenerate replaces the previous row's status and emits a new task. +- Step-2 status guard verified by an integration test that double-fires the same task. +- All Echo-related errors carry stable codes from §8. +- Adding a hypothetical new scenario in test (`provide: ECHO_SCENARIO, multi: true`) requires zero changes to `ai-echo.service.ts`. diff --git a/docs/superpowers/specs/2026-05-23-ai-echo-system-root.md b/docs/superpowers/specs/2026-05-23-ai-echo-system-root.md new file mode 100644 index 00000000000..0299ef0725d --- /dev/null +++ b/docs/superpowers/specs/2026-05-23-ai-echo-system-root.md @@ -0,0 +1,183 @@ +# AI Echo System — Root + +- **Date:** 2026-05-23 +- **Status:** Design — pending review +- **Author:** Innei (brainstormed with assistant) +- **Scope:** `apps/core/` server. Admin UI (admin-vue3) and Yohaku frontend are downstream and tracked separately. + +This is the root spec for an AI-driven response system anchored on the `recently` (wishing-well / 树洞) module. It coordinates four sibling sub-specs and records cross-cutting decisions that apply to all of them. + +--- + +## 1. Vision + +The site owner publishes short markdown thoughts via `recently`. The owner's framing: *if it's a wishing well, it should have echoes.* The system gives each new thought a small set of AI-written replies, voiced by configurable **personas**: + +- **inner-self** (另我) — dynamic voice distilled from the owner's own writing (notes + pages + selected past recently). +- **passerby** (路人) — a fixed prompt; no distillation, no retrieval. + +Echoes are public; visitors see them under each thought. The system is intentionally generic so that future scenarios (comment auto-reply, reader companion) plug into the same engine without rewriting it. + +## 2. Module map + +``` +apps/core/src/modules/ai/ +├── ai/ (existing — provider/runtime/feature resolution) +├── ai-task/ (existing — async task queue) +├── ai-inflight/ (existing — streaming substrate, not used in MVP echoes) +├── ai-embeddings/ (NEW — see ai-embeddings spec) +├── ai-persona/ (NEW — see ai-persona spec) +├── ai-memory/ (NEW — see ai-memory spec) +└── ai-echo/ (NEW — see ai-echo engine spec) +``` + +Dependency direction: + +``` +ai-echo ──► ai-persona ──► ai-embeddings ──► ai + │ │ + ├──► ai-memory ──► ai-embeddings + │ + └──► ai-task, gateway, event-manager, configs +``` + +## 3. Sub-specs + +| Spec | Owns | Status | +| --- | --- | --- | +| [ai-echo engine](./2026-05-23-ai-echo-engine-design.md) | `ai-echo` module, `ai_echoes` table, `EchoScenario` abstraction, recently scenario | Design | +| [ai-embeddings](./2026-05-23-ai-embeddings-design.md) | `ai-embeddings` module, `corpus_embeddings` table, pgvector custom type, chunking, retrieval | Design | +| [ai-persona](./2026-05-23-ai-persona-design.md) | `ai-persona` module, `persona_profiles` table, `PersonaRegistry`, distillation | Design | +| [ai-memory](./2026-05-23-ai-memory-design.md) | `ai-memory` module, `ai_memories` table, recall pipeline | Design | + +## 4. Cross-cutting decisions + +These bind every sub-spec; do not redecide them locally. + +### 4.1 Storage / IDs + +All new tables use **text PK/FK** via the existing `pkText()` / `refText()` helpers in `packages/db-schema/src/schema/columns.ts`. IDs are snowflake values serialized as text — same as `recentlies.id`. No `bigserial` or `bigint` for IDs in new tables. + +### 4.2 pgvector substrate + +Retrieval is built on pgvector, not on Postgres `tsvector`. Rationale: at personal-blog Chinese corpus scale, full-text search produces enough silent misses to make the "voice that remembers" persona feel incoherent. + +Operational constraints (the **"A-lite"** profile): + +- Async embed writes; missing embeddings are normal and do not block reads. +- Hybrid retrieval gate: top-k followed by a minimum similarity threshold. If empty after the gate, the echo prompt is built without a retrieval section and the prompt explicitly forbids "I remember" claims. +- No ANN index in MVP (rows ≤ ~10k → exact scan suffices). A future migration adds HNSW or IVFFLAT. +- The `vector` column stores no fixed dimension at the type level; each row records its own `embedding_model` and `dim` so multiple models can coexist during gradual rebuilds. + +### 4.3 Distance vs similarity + +pgvector operators return **distance** (`<=>` is cosine distance). All sub-specs and config keys use **similarity** in user-facing names, defined as `similarity = 1 - cosine_distance`. Helper functions in `ai-embeddings` compute and expose similarity scores; threshold config keys are named `minSimilarity`, never `minScore` or `maxDistance` (the latter only appears in SQL fragments where it is the actual operator output). + +### 4.4 AI model configuration + +Model selection follows the existing `AIFeatureKey` + `AIModelAssignment` pattern (see `apps/core/src/modules/configs/configs.schema.ts` and `apps/core/src/modules/ai/ai.types.ts`). + +Add to `AIFeatureKey`: + +- `Echo` — echo generation +- `Embedding` — embedding model +- `PersonaDistill` — persona profile distillation (optional; falls back to `Echo` when unset) + +Add to `AISchema` as `field.plain(AIModelAssignmentSchema.optional(), ...)`: + +- `echoModel` +- `embeddingModel` +- `personaDistillModel` + +Add toggles consistent with existing `enableSummary` / `enableInsights` style: + +- `enableEcho` (master switch) +- `enableAutoGenerateEchoOnCreate` (auto-generate on `recently` create) + +Do **not** introduce parallel `xxxProvider` keys; provider is resolved via `AIModelAssignment.providerId`. + +The OpenAI-compatible runtime's model listing (see `runtime/openai-compatible.runtime.ts`) is split into `listChatModels()` (existing behavior) and `listEmbeddingModels()` (reverses the filter). Admin UI calls the appropriate variant per slot. + +### 4.5 Scenario registration + +The `ai-echo` engine is generic and does not hard-code `recently`. Scenarios register via **Nest multi-provider injection**: an `ECHO_SCENARIO` injection token marked `provide` with `multi: true`. Each scenario module declares one provider; the engine constructor receives `EchoScenario[]`. No global registry, no module-init side effects, no race between two ways of subscribing. + +The `recently` module ships one provider (`recentlyEchoScenarioProvider`); the engine consumes it without knowing about `recently` specifically. Future scenarios (comment-reply, reader-companion) ship the same way. + +### 4.6 Task lifecycle and idempotency + +`ai_echoes` rows persist `metadata.taskId` once a task is enqueued. The task processor reloads the row before doing work and **only proceeds when status is `pending` or `generating`**. Rows in `ready`, `edited`, `failed`, or `archived` are skipped and the task completes as a no-op. This prevents: + +- Zombie tasks re-running after `regenerate force=true` archived the original row +- Replay of stale tasks after queue recovery / restart +- Two writers racing on the same `(scenarioKey, subjectType, subjectId, personaKey)` tuple + +Subject-delete handlers mark in-flight echoes `failed` with `metadata.aborted=true` and suppress the ready broadcast. + +### 4.7 Failure principles + +- Public read endpoints never crash on missing AI rows. Empty data is normal. +- `recently` create is never blocked by AI work. If echo generation fails for every persona, the recently entry still publishes. +- Transient upstream failures retry via `ai-task` backoff (max 3). Structural errors (subject missing, model unconfigured, scenario unknown) terminate immediately and persist `metadata.errorCode`. +- Operator triage runs through the admin echo list (`GET /ai-echo?status=failed`). + +### 4.8 Visibility + +Echoes are **public** under each `recently` entry. Visitors see them via `GET /ai-echo/by-subject/recently/:id`. Hidden states (`pending`, `generating`, `failed`, `archived`) are filtered out of public reads; `ready` and `edited` are shown. + +## 5. Phasing + +Each sub-spec marks its own MVP scope and v2 roadmap. The system-level cuts: + +**MVP (this sprint):** +- ai-echo: engine + `ai_echoes` + recently scenario + public list + admin regenerate/edit/delete/list. No rating endpoint. +- ai-embeddings: corpus for `page` and `note` only; sync events; retrieval; admin backfill + stats. No reindex endpoint, no recently embedding. +- ai-persona: single-pass LLM distill (no map/reduce), manual refresh only. Exemplar selection by length window + recency-weighted random (no vector-by-query selection). +- ai-memory: CRUD + recall + total count. No `/from-passage` LLM draft, no KPI nudge widget, no STM. + +**v2 (next slice):** +- ai-echo: rating endpoint, comment-reply scenario, reader-companion scenario. +- ai-embeddings: recently embedding (eligibility threshold), reindex endpoint, HNSW/IVFFLAT index migration. +- ai-persona: map/reduce distill, auto-refresh cron + threshold, vector-based exemplar selection, optional `persona_exemplars` table. +- ai-memory: `/from-passage` LLM draft, extract task with forced review workflow, decay task, supersede detection, KPI nudge widget, STM as cached rolling summary. + +The MVP slice is self-coherent: the operator can configure providers/models, run backfill, refresh persona once, seed a few memories, and start receiving echoes on new recently posts. Every v2 item slots into an existing table or interface; no v2 work invalidates an MVP migration. + +## 6. Migration order + +All migrations are additive (expand-only); rolling deploys tolerate them. + +1. `00XX_ai_vector_extension.sql` — `CREATE EXTENSION IF NOT EXISTS vector;` +2. `00XX_ai_corpus_embeddings.sql` (ai-embeddings spec) +3. `00XX_ai_persona_profiles.sql` (ai-persona spec) +4. `00XX_ai_memories.sql` (ai-memory spec) +5. `00XX_ai_echoes.sql` (ai-echo engine spec) + +Drizzle pgvector helper (`vector` custom type) is added in `packages/db-schema/src/schema/columns.ts` as part of the ai-embeddings work; sub-spec owns the implementation details. + +Data backfill (`corpus_embeddings` for existing notes/pages) runs as an app-migration entry (`src/database/app-migrations/`) using the existing ledger pattern — see ai-embeddings spec. + +## 7. System-level acceptance criteria + +After deploying all four sub-specs and configuring providers/models: + +- A new `recently` entry produces two `ai_echoes` rows (one per default persona) within seconds, both reaching `status='ready'` with non-empty content under normal AI conditions. +- WebSocket subscribers receive a `RECENTLY_ECHO_LANDED` event per echo. +- Public `GET /ai-echo/by-subject/recently/:id` returns only `ready` and `edited` echoes. +- Admin can edit, delete, and regenerate any echo. Regenerate with `force=true` archives the old row and produces a fresh one. +- Persona refresh produces a populated `persona_profiles` row for `inner-self`. Concurrent refresh requests return 409. +- Embedding sync produces correct `corpus_embeddings` rows for new notes/pages; re-syncing an unchanged source is a no-op (content-hash dedupe). +- Memory CRUD works; recall integrates into echo prompts and is reflected in `ai_echoes.metadata.memoryIds`. +- With embedding model unconfigured, sync and retrieval no-op gracefully; echoes still generate (without a retrieval section). +- All existing `recently` and AI tests pass unchanged. + +## 8. Cross-spec open questions + +- **`AI_ECHO_DAILY_QUOTA_EXCEEDED` granularity**: global vs per-scenario. MVP: global. If reader-companion v2 has much higher volume, split. +- **Public visibility of `failed` echoes**: MVP hides. Confirm — alternative is a subtle placeholder ("the well is quiet today"). +- **STM cadence**: MVP defers entirely. If echoes start repeating phrasing within a week of usage, ship the cached rolling summary from v2 early. +- **Recently embed eligibility threshold**: MVP does not embed recently at all. v2 calibrates the threshold from observed usage. + +--- + +*Cross-references each sub-spec; sub-specs do not duplicate cross-cutting decisions, they cite this root.* diff --git a/docs/superpowers/specs/2026-05-23-ai-embeddings-design.md b/docs/superpowers/specs/2026-05-23-ai-embeddings-design.md new file mode 100644 index 00000000000..54aabf724ec --- /dev/null +++ b/docs/superpowers/specs/2026-05-23-ai-embeddings-design.md @@ -0,0 +1,355 @@ +# ai-embeddings — Design + +- **Date:** 2026-05-23 +- **Status:** Design — pending review +- **Author:** Innei (brainstormed with assistant) +- **Parent:** [AI Echo System Root](./2026-05-23-ai-echo-system-root.md) +- **Sibling specs:** [ai-echo engine](./2026-05-23-ai-echo-engine-design.md), [ai-persona](./2026-05-23-ai-persona-design.md), [ai-memory](./2026-05-23-ai-memory-design.md) + +## 1. Scope + +The corpus embedding substrate consumed by `ai-echo`, `ai-persona`, and `ai-memory`: + +- New module: `apps/core/src/modules/ai/ai-embeddings/` +- New extension: `vector` (pgvector) +- New table: `corpus_embeddings` +- New Drizzle helper: `vector` custom column type in `packages/db-schema/src/schema/columns.ts` +- New task: `EMBED_SYNC` +- Event listeners that keep the corpus in sync with source content (notes, pages; recently deferred to v2) +- New endpoints: admin backfill + stats +- New retrieval API consumed by sibling modules + +Cross-cutting decisions (IDs, distance vs similarity, model config) are defined in the root spec. + +## 2. Module layout + +``` +apps/core/src/modules/ai/ai-embeddings/ +├── ai-embeddings.module.ts +├── ai-embeddings.controller.ts +├── ai-embeddings.service.ts # public API: embedBatch, search, syncSource +├── ai-embeddings.repository.ts +├── ai-embeddings.schema.ts # Zod DTOs (BackfillDto, …) +├── ai-embeddings.types.ts # RetrievalResult, ChunkSpec, … +├── ai-embeddings.constants.ts +├── ai-embeddings.errors.ts +├── chunker.ts # paragraph-aware chunking, deterministic +├── listeners/ +│ ├── note-events.listener.ts +│ └── page-events.listener.ts +└── tasks/ + ├── embed-sync.processor.ts + └── corpus-backfill.driver.ts # batched driver used by both endpoint and app-migration +``` + +`recently` event listener is intentionally deferred to v2 (see root §5). + +## 3. pgvector setup + +### 3.1 Extension + +```sql +-- migration 00XX_ai_vector_extension.sql +CREATE EXTENSION IF NOT EXISTS vector; +``` + +### 3.2 Drizzle custom type + +Added in `packages/db-schema/src/schema/columns.ts`: + +```ts +import { customType } from 'drizzle-orm/pg-core' + +export const vector = customType<{ data: number[]; driverData: string }>({ + dataType() { return 'vector' }, // dimension-less; per-row dim stored in a separate column + toDriver(value) { + if (!Array.isArray(value)) throw new TypeError('vector expects number[]') + return `[${value.join(',')}]` + }, + fromDriver(value) { + if (typeof value !== 'string') throw new TypeError('expected pgvector string repr') + return JSON.parse(value) as number[] + }, +}) +``` + +Rationale for omitting the dimension at type level: we want multiple embedding models to coexist during gradual rebuilds. Each row records its own `embedding_model` and `dim`. Indexes (added in v2) will be expression/partial indexes constrained to a single model so dimensions match within the index. + +## 4. Data model + +```sql +-- migration 00XX_ai_corpus_embeddings.sql +CREATE TABLE corpus_embeddings ( + id text PRIMARY KEY, -- snowflake string (pkText) + source_type text NOT NULL, -- 'post' | 'note' | 'page' | 'recently' (v2) + source_id text NOT NULL, -- snowflake string of source + chunk_index integer NOT NULL, + content text NOT NULL, + content_hash text NOT NULL, -- sha256(content); change detection + embedding vector NOT NULL, -- dim per-row, see embedding_model + embedding_model text NOT NULL, -- e.g. 'text-embedding-3-small' + dim integer NOT NULL, -- redundant, enables multi-model coexistence + created_at timestamptz NOT NULL DEFAULT now() +); + +CREATE UNIQUE INDEX corpus_embeddings_source_chunk_model + ON corpus_embeddings (source_type, source_id, chunk_index, embedding_model); + +CREATE INDEX corpus_embeddings_source + ON corpus_embeddings (source_type, source_id); + +-- ANN index deferred to v2. Exact search is fine at ≤10k rows: +-- SELECT … ORDER BY embedding <=> $query LIMIT $k; +``` + +Drizzle definition in `packages/db-schema/src/schema/ai.ts`. + +## 5. Chunking + +`chunker.ts` is pure and deterministic. + +```ts +chunk(markdown: string, opts: { maxTokens: number, overlapTokens: number }): ChunkSpec[] +interface ChunkSpec { index: number, content: string, hash: string } +``` + +Algorithm: + +1. Strip fenced code blocks (replace with a placeholder); they distort embedding signal. +2. Split on paragraph boundaries (`\n\n+`). +3. Greedy pack paragraphs into chunks until adding the next would exceed `maxTokens`. If a single paragraph exceeds `maxTokens`, fall back to sentence splitting; if a single sentence exceeds, fall back to character window. +4. Overlap: prepend the last `overlapTokens` tokens of chunk N to chunk N+1. +5. Token counting uses a cheap byte-pair-equivalent approximation (chars / 3 for CJK-heavy text, chars / 4 for ASCII-heavy); precise token counting is not required because the model handles slight overflow gracefully. +6. `hash = sha256(normalized content)`. + +Defaults (`configs.aiEmbedding.*`): + +- `chunkMaxTokens` = 500 +- `chunkOverlapTokens` = 50 + +## 6. Sync pipeline + +### 6.1 Event listeners + +Subscribe to: + +- `NOTE_CREATE`, `NOTE_UPDATE`, `NOTE_DELETE` +- `PAGE_CREATE`, `PAGE_UPDATE`, `PAGE_DELETE` +- `POST_CREATE`, `POST_UPDATE`, `POST_DELETE` (also embed posts for retrieval) + +Each handler enqueues `EMBED_SYNC { sourceType, sourceId, op }` and returns immediately. + +Recently event listeners are not subscribed in MVP. Adding them in v2 requires only a new listener file + a length-eligibility filter; the rest of the pipeline is unchanged. + +### 6.2 `EMBED_SYNC` task processor + +``` +EmbedSyncTaskProcessor.handle({ sourceType, sourceId, op }): + if op === 'delete': + DELETE FROM corpus_embeddings WHERE source_type=$sourceType AND source_id=$sourceId + return + + source = await sourceLoader(sourceType, sourceId) // routes to NoteService / PageService / PostService + if !source: return // raced with delete; idempotent no-op + + if !aiEmbeddingModelConfigured: return // graceful no-op; admin can backfill later + + markdown = source.text ?? source.content ?? '' + chunks = chunker.chunk(markdown, configs.aiEmbedding) + + existing = repo.findAll({ source_type, source_id, embedding_model: configs.embeddingModel }) + existingByIndex = Map(existing.map(e => [e.chunk_index, e])) + + // 1. Delete chunks whose index no longer exists + staleIndices = existing.map(e => e.chunk_index).filter(i => i >= chunks.length) + if staleIndices.length: repo.delete({ source_type, source_id, embedding_model, chunk_index IN staleIndices }) + + // 2. Embed only changed/new chunks + toEmbed = chunks.filter(c => existingByIndex.get(c.index)?.content_hash !== c.hash) + if toEmbed.length === 0: return + + runtime = await aiService.getEmbeddingModel() + vectors = await runtime.embedBatch(toEmbed.map(c => c.content)) + rows = toEmbed.map((c, i) => ({ + id: snowflake(), + source_type, source_id, + chunk_index: c.index, content: c.content, content_hash: c.hash, + embedding: vectors[i], embedding_model: runtime.modelId, dim: vectors[i].length, + })) + repo.upsert(rows, conflictTarget: ['source_type','source_id','chunk_index','embedding_model']) +``` + +Idempotency: `(source_type, source_id, chunk_index, embedding_model)` unique constraint + content-hash diff. Re-running on unchanged source is a no-op. + +### 6.3 Backfill + +Two callers, same driver (`corpus-backfill.driver.ts`): + +- `POST /ai-embeddings/backfill { sourceTypes?: string[] }` — admin-triggered. +- `2026XXXX-ai-corpus-initial-backfill.ts` app-migration entry — runs once at first deploy after this feature lands. + +Driver: + +``` +async function backfill({ sourceTypes }): + for st in sourceTypes ?? ['post','note','page']: + cursor = null + while: + batch = await fetchSourceIds(st, { cursor, limit: configs.aiEmbedding.backfillBatchSize }) + if batch.length === 0: break + await Promise.all(batch.map(id => embedSyncProcessor.handle({ sourceType: st, sourceId: id, op:'update' }))) + cursor = batch[batch.length - 1] +``` + +The driver calls the processor directly (not the queue), bypassing the queue's quota concerns for backfill operations. Honors rate limits via a small per-batch delay. + +## 7. Retrieval API + +```ts +class AiEmbeddingsService { + async search( + query: string, + opts: { + topK?: number // default 5 + minSimilarity?: number // default 0.7 + model?: string // default: resolved embedding model + sourceTypes?: string[] // default: all + }, + ): Promise +} + +interface RetrievalResult { + sourceType: string + sourceId: string + chunkIndex: number + content: string + distance: number // raw pgvector cosine distance + similarity: number // 1 - distance +} +``` + +Implementation (exact search, MVP): + +```sql +SELECT + source_type, source_id, chunk_index, content, + (embedding <=> $query) AS distance +FROM corpus_embeddings +WHERE embedding_model = $model + AND ($sourceTypes IS NULL OR source_type = ANY($sourceTypes)) +ORDER BY embedding <=> $query +LIMIT $topK; +``` + +Service-layer post-processing: + +```ts +results + .map((r) => ({ ...r, similarity: 1 - r.distance })) + .filter((r) => r.similarity >= (opts.minSimilarity ?? 0.7)) +``` + +Consumers (`ai-echo`, `ai-memory`) always read `similarity`. The `distance` field is exposed for debugging / admin tooling only. + +## 8. API surface + +| Method | Path | Auth | Body / Query | Returns | +| --- | --- | --- | --- | --- | +| POST | `/ai-embeddings/backfill` | @Auth | `{ sourceTypes?: ('post'\|'note'\|'page')[] }` | `{ taskId }` (queued backfill driver invocation) | +| GET | `/ai-embeddings/stats` | @Auth | — | `{ byModel: [{ model, dim, rows }], bySourceType: [{ type, rows }], total }` | + +`/ai-embeddings/reindex` is a v2 scaffold; not exposed in MVP. + +## 9. Errors + +| Code | HTTP | Notes | +| --- | --- | --- | +| `AI_EMBEDDING_MODEL_NOT_CONFIGURED` | 400 | Backfill / search called without `AIFeatureKey.Embedding` assignment. Sync listener treats this as a graceful no-op. | +| `AI_EMBEDDING_BATCH_FAILED` | 502 | Upstream embedding API failure during sync; task queue retries. | + +Sync failures persist no row state on `corpus_embeddings`; the task itself owns retry. A permanently failing source is visible in task queue's dead-letter list. + +## 10. Configuration + +Additions in `configs.schema.ts → AISchema`: + +```ts +embeddingModel: field.plain(AIModelAssignmentSchema.optional(), 'Embedding model'), +aiEmbedding: field.plain(z.object({ + chunkMaxTokens: z.number().int().min(64).default(500), + chunkOverlapTokens: z.number().int().min(0).default(50), + backfillBatchSize: z.number().int().min(1).default(50), + defaultMinSimilarity: z.number().min(0).max(1).default(0.7), + defaultTopK: z.number().int().min(1).default(5), +}).optional(), 'Embedding parameters'), +``` + +The `defaultMinSimilarity` and `defaultTopK` are fallbacks used when consumers don't pass per-call overrides; ai-echo overrides them via `echoRetrievalMinSimilarity` / `echoRetrievalTopK` (see ai-echo engine spec §9). + +`AIFeatureKey.Embedding` added to the enum. `AiService.getEmbeddingModel()` resolves it. `openai-compatible.runtime.ts` gains `listEmbeddingModels()` (reverses the chat-model filter — only ids containing `embedding` are returned). + +## 11. Testing + +### 11.1 Unit + +- `chunker.chunk` is deterministic on fixtures; correctly handles code blocks, oversized paragraphs, overlap. +- `vector` custom type round-trips through `toDriver` / `fromDriver`. +- `AiEmbeddingsService.search` correctly computes `similarity = 1 - distance` and applies the threshold filter. + +### 11.2 Integration (pg container) + +- `NOTE_CREATE` → `corpus_embeddings` rows written for new note; row count matches chunk count. +- `NOTE_UPDATE` with unchanged content → no new rows, no deletions. +- `NOTE_UPDATE` with one paragraph changed → only affected chunk re-embedded; `content_hash` updated. +- `NOTE_DELETE` → all rows for that source removed. +- `POST /ai-embeddings/backfill` → expected row count after run. +- `POST /ai-embeddings/backfill` is idempotent (run twice → no duplicates due to unique constraint). +- `search` returns rows ordered by similarity descending, all above threshold. +- Embedding model unconfigured: `NOTE_CREATE` → no rows written, no error thrown; `search` throws `AI_EMBEDDING_MODEL_NOT_CONFIGURED`. + +### 11.3 Mocks + +- `test/mock/processors/ai-embedding.mock.ts` — deterministic embedding model returning `vector = sha-derived float[8]` (small dim for fast tests). Used across all consumer tests. + +## 12. Migration + +``` +00XX_ai_vector_extension.sql -- CREATE EXTENSION (this spec, prereq to all others) +00XX_ai_corpus_embeddings.sql -- table (this spec) +``` + +Plus Drizzle column helper added in `packages/db-schema/src/schema/columns.ts` (no migration; package change only). + +Data: + +``` +src/database/app-migrations/2026XXXX-ai-corpus-initial-backfill.ts +``` + +Uses the existing app-migration ledger; runs once after deploy. Skips gracefully when embedding model unconfigured (admin can re-run via the backfill endpoint later). + +## 13. MVP / v2 boundary + +**MVP:** +- Extension + table + Drizzle helper +- Sync listeners for `post`, `note`, `page` +- `EMBED_SYNC` task with hash-diff idempotency +- Backfill (admin endpoint + initial app-migration) +- Retrieval API +- Stats endpoint +- Config additions + +**v2:** +- Sync listener for `recently` with `embedRecentlyMinChars` threshold +- `POST /ai-embeddings/reindex` — change embedding model gracefully (writes new rows under new model, validates, then drops old model rows) +- ANN index migration (HNSW or IVFFLAT) when row count crosses threshold +- Embedding cost telemetry per task + +## 14. Acceptance criteria + +- New note → embeddings appear within seconds; chunk count matches expected. +- Repeated sync on unchanged note → no row changes. +- Note delete → all rows removed. +- Retrieval over a fixture corpus returns expected ordering and filters by similarity threshold. +- Stats endpoint reflects row counts by model and source type. +- Config switch of embedding model → new sync writes go under new model; old rows remain queryable until cleanup (no immediate breakage). diff --git a/docs/superpowers/specs/2026-05-23-ai-memory-design.md b/docs/superpowers/specs/2026-05-23-ai-memory-design.md new file mode 100644 index 00000000000..fa230f0e334 --- /dev/null +++ b/docs/superpowers/specs/2026-05-23-ai-memory-design.md @@ -0,0 +1,289 @@ +# ai-memory — Design + +- **Date:** 2026-05-23 +- **Status:** Design — pending review +- **Author:** Innei (brainstormed with assistant) +- **Parent:** [AI Echo System Root](./2026-05-23-ai-echo-system-root.md) +- **Sibling specs:** [ai-echo engine](./2026-05-23-ai-echo-engine-design.md), [ai-embeddings](./2026-05-23-ai-embeddings-design.md), [ai-persona](./2026-05-23-ai-persona-design.md) + +## 1. Scope + +A **human-authored canonical-facts layer** that the ai-echo prompt can recall from. The metaphor and rationale: structured "this is who I am / what I think" facts that raw-passage retrieval cannot crystallize on its own (e.g., "I dislike morning meetings", "I value brevity"). Echo personas use them as ambient constraints, not as evidence to quote. + +MVP is intentionally minimal: CRUD + recall. Autonomous extraction, decay, supersession are explicitly v2 — small-corpus scale doesn't justify the variance of LLM-driven memory pipelines without a forced operator review workflow. + +- New module: `apps/core/src/modules/ai/ai-memory/` +- New table: `ai_memories` +- New endpoints: CRUD, list, total count +- Recall consumed by `ai-echo` + +Cross-cutting decisions (IDs, model config, similarity semantics) are defined in the root spec. + +## 2. Module layout + +``` +apps/core/src/modules/ai/ai-memory/ +├── ai-memory.module.ts +├── ai-memory.controller.ts +├── ai-memory.service.ts # CRUD + recall + embed-on-write +├── ai-memory.repository.ts +├── ai-memory.schema.ts # Zod DTOs (CreateMemoryDto, UpdateMemoryDto) +├── ai-memory.types.ts +├── ai-memory.constants.ts +├── ai-memory.errors.ts +└── tasks/ + └── memory-embed.processor.ts # async embedding for new/updated memory content +``` + +## 3. Data model + +```sql +-- migration 00XX_ai_memories.sql +CREATE TABLE ai_memories ( + id text PRIMARY KEY, -- snowflake string (pkText) + scope text NOT NULL, -- 'global' | 'persona:' | 'scenario:' + type text NOT NULL, -- 'fact' | 'event' | 'preference' | 'thread' | 'pattern' + content text NOT NULL, + confidence real NOT NULL DEFAULT 1.0, + salience real NOT NULL DEFAULT 1.0, + source jsonb NOT NULL DEFAULT '{}', -- {kind:'manual', authorId} | {kind:'extraction', from:'recently:'} (v2) + embedding vector, -- nullable; embedded async after write + embedding_model text, + dim integer, + first_seen_at timestamptz NOT NULL DEFAULT now(), + last_seen_at timestamptz NOT NULL DEFAULT now(), + expires_at timestamptz, -- nullable; respected by recall + supersedes_id text REFERENCES ai_memories(id), -- v2: extraction can chain supersessions + status text NOT NULL DEFAULT 'active', -- 'active' | 'superseded' | 'archived' | 'pending_review' (v2) + metadata jsonb NOT NULL DEFAULT '{}', + created_at timestamptz NOT NULL DEFAULT now(), + updated_at timestamptz NOT NULL DEFAULT now() +); + +CREATE INDEX ai_memories_scope_status ON ai_memories (scope, status); +CREATE INDEX ai_memories_status_active ON ai_memories (status) WHERE status = 'active'; +``` + +All v2 fields (`confidence`, `salience`, `last_seen_at`, `supersedes_id`, `expires_at`, `pending_review` status value) are present from MVP but **inert** — MVP code does not change them automatically. This avoids a future migration when v2 lights up. + +`embedding` is nullable: a memory is queryable as soon as it's written; once the async embed lands, it becomes vector-recallable. Without an embedding, the row is excluded from query-based recall but remains visible to scope-only listing. + +Drizzle definition in `packages/db-schema/src/schema/ai.ts`. + +## 4. Service API + +```ts +class AiMemoryService { + // CRUD + list(opts: { scope?: string|string[], type?: string, status?: string, page?: number, size?: number }): Promise> + findById(id: string): Promise + create(input: CreateMemoryDto, actorId: string): Promise // enqueues embed task + update(id: string, input: UpdateMemoryDto, actorId: string): Promise // re-enqueues embed if content changed + archive(id: string): Promise // status='archived' + + // Recall (consumed by ai-echo) + recall(opts: { + scope: string | string[] + query?: string // when present, vector-rank; when absent, salience-only + topK?: number // default 5 + minSimilarity?: number // default 0.7 + }): Promise + + // KPI + totalActive(): Promise +} +``` + +### 4.1 `create` / `update` embed flow + +``` +create(input, actorId): + 1. row = INSERT ai_memories with source.kind='manual', source.authorId=actorId + 2. enqueue MEMORY_EMBED { memoryId } + 3. return row (embedding=null at this moment) + +MemoryEmbedTaskProcessor.handle({ memoryId }): + 1. row = repo.findById(memoryId) + 2. if !row OR row.status NOT IN ('active', 'pending_review'): return + 3. if !aiEmbeddingModelConfigured: return // graceful no-op + 4. runtime = await aiService.getEmbeddingModel() + 5. vec = await runtime.embedBatch([row.content]).then(v => v[0]) + 6. UPDATE ai_memories SET embedding=vec, embedding_model=runtime.modelId, dim=vec.length WHERE id=$memoryId +``` + +`update` re-embeds only when `content` changes (other field updates skip the task). + +### 4.2 `recall` + +``` +recall({ scope, query, topK=5, minSimilarity=0.7 }): + scopeList = Array.isArray(scope) ? scope : [scope] + + if !query: + // Salience-only ranking + SELECT * FROM ai_memories + WHERE status='active' AND scope = ANY($scopeList) + AND (expires_at IS NULL OR expires_at > now()) + ORDER BY salience DESC, last_seen_at DESC + LIMIT $topK; + return. + + // Query-based: vector search with embedding model + if !embeddingModelConfigured: return [] + + q = await runtime.embed(query) + rows = SELECT *, + (embedding <=> $q) AS distance + FROM ai_memories + WHERE status='active' AND scope = ANY($scopeList) + AND embedding IS NOT NULL + AND embedding_model = $modelId + AND (expires_at IS NULL OR expires_at > now()) + ORDER BY embedding <=> $q + LIMIT $topK * 2; // overfetch, filter, re-rank + + // Compute similarity, filter by threshold, re-rank by similarity × salience × confidence + return rows + .map(r => ({ ...r, similarity: 1 - r.distance })) + .filter(r => r.similarity >= minSimilarity) + .map(r => ({ ...r, score: r.similarity * r.salience * r.confidence })) + .sort((a,b) => b.score - a.score) + .slice(0, topK) +``` + +The ai-echo task processor passes both 'global' and `persona:` scopes. The returned memories appear in the echo prompt under the "Canonical facts" section (see ai-echo engine spec §6). + +## 5. API surface + +| Method | Path | Auth | Body / Query | Returns | +| --- | --- | --- | --- | --- | +| GET | `/ai-memory` | @Auth | `?scope=&type=&status=&page=&size=` | Paginated `AiMemoryViews.detail[]` with `MetaObjectBuilder` pagination | +| GET | `/ai-memory/:id` | @Auth | — | `AiMemoryViews.detail` | +| POST | `/ai-memory` | @Auth | `CreateMemoryDto` | created row | +| PUT | `/ai-memory/:id` | @Auth | `UpdateMemoryDto` | updated row | +| DELETE | `/ai-memory/:id` | @Auth | — | 204; soft delete (`status='archived'`) | +| GET | `/ai-memory/kpi` | @Auth | — | `{ total, active, archived }` | + +`/ai-memory/from-passage` (LLM-drafted memory from a highlighted passage) and KPI nudge fields (`referencedThisWeek`, `seedRecommended`) are v2. + +### 5.1 DTOs + +```ts +const CreateMemoryDto = z.object({ + scope: z.string().regex(/^(global|persona:[a-z0-9-]+|scenario:[a-z0-9-]+)$/), + type: z.enum(['fact','event','preference','thread','pattern']), + content: z.string().min(1).max(2000), + confidence: z.number().min(0).max(1).optional().default(1.0), + salience: z.number().min(0).max(10).optional().default(1.0), + expiresAt: z.string().datetime().optional(), + metadata: z.record(z.unknown()).optional(), +}) + +const UpdateMemoryDto = CreateMemoryDto.partial() +``` + +### 5.2 Views + +```ts +// ai-memory.views.ts +export const AiMemoryViews = { + detail: ZodObject<{ + id, scope, type, content, confidence, salience, source, status, + firstSeenAt, lastSeenAt, expiresAt, metadata, createdAt, updatedAt, + hasEmbedding: boolean, // derived; never expose vector to admin UI + }>, +} +``` + +## 6. Errors + +| Code | HTTP | Notes | +| --- | --- | --- | +| `AI_MEMORY_NOT_FOUND` | 404 | | +| `AI_MEMORY_INVALID_SCOPE` | 400 | Rejected by Zod regex; surfaced via VALIDATION_FAILED if Zod catches first. | +| `AI_MEMORY_INVALID_TYPE` | 400 | Same as above. | + +Recall itself never throws on missing embedding model — it returns `[]` (graceful no-op). + +## 7. Configuration + +Additions in `configs.schema.ts → AISchema`: + +```ts +aiMemory: field.plain(z.object({ + recallTopK: z.number().int().min(1).default(5), + recallMinSimilarity: z.number().min(0).max(1).default(0.7), + // v2-only, accepted but ignored: + nudgeIfReferencedBelow: z.number().int().min(0).default(1), +}).optional(), 'Memory parameters'), +``` + +The actual recall thresholds used by ai-echo are read via `configs.aiMemory.*`; ai-echo doesn't introduce parallel memory thresholds. + +## 8. Operational guidance (downstream-visible) + +The MVP layer is only valuable if memories are actually seeded. The admin UI (downstream PR) should provide: + +- A "Seed memories" call-to-action when `GET /ai-memory/kpi { active }` < 10. +- A simple add-memory form with type/scope dropdowns. +- A list view with type/scope filters and a soft-delete action. + +The server exposes everything needed; the operational forcing is the admin UX's job. This spec documents the contract; v2 ships the `seedRecommended` and `referencedThisWeek` flags so the nudge widget has data. + +## 9. Testing + +### 9.1 Unit + +- `CreateMemoryDto` regex accepts `global`, `persona:inner-self`, `scenario:recently`; rejects malformed. +- `recall` salience-only path: returns rows ordered by `salience DESC`, filters expired. +- `recall` query path: applies threshold; re-ranks by `similarity × salience × confidence`. + +### 9.2 Integration (pg + redis containers) + +- `POST /ai-memory` → row created with `embedding=null`; `MEMORY_EMBED` task enqueued. +- After task runs (mock embedding model) → `embedding` populated, `dim` matches. +- `PUT /ai-memory/:id` with content change → re-enqueues embed; row's embedding updated. +- `PUT /ai-memory/:id` without content change → no embed task enqueued. +- `DELETE /ai-memory/:id` → `status='archived'`; subsequent `recall` excludes it. +- `recall` with two memories (active + expired) returns only the active one. +- `recall` with embedding model unconfigured returns `[]` without throwing. +- `recall` with query → expected ordering across fixture memories. + +### 9.3 Mocks + +- Reuse `test/mock/processors/ai-embedding.mock.ts` (from ai-embeddings spec) for deterministic vectors. + +## 10. Migration + +``` +00XX_ai_memories.sql -- this spec +``` + +Additive only. Drizzle definition in `packages/db-schema/src/schema/ai.ts`. + +## 11. MVP / v2 boundary + +**MVP:** +- Table with all fields (v2 fields present but inert) +- CRUD endpoints +- Recall (salience-only when no query; vector-ranked when query provided) +- Async embed-on-write task +- KPI total/active/archived +- Config additions for recall parameters + +**v2:** +- `POST /ai-memory/from-passage` — accepts a passage + ref, runs a small synchronous LLM call to draft `content`; operator reviews before saving. +- `MEMORY_EXTRACT` task — periodically extracts candidate memories from new corpus; writes as `status='pending_review'`. +- Forced review workflow — weekly digest email + blocking dashboard badge when `pending_review` count > 0. +- `MEMORY_DECAY` task — cron lowers `confidence` over time without reinforcement; below `archive_threshold` → archived. +- Auto-supersession — extract phase detects contradiction with existing active memories and proposes `supersedes_id` resolutions. +- KPI nudge fields (`referencedThisWeek`, `seedRecommended`) + admin dashboard widget. + +## 12. Acceptance criteria + +- Operator can create, list, edit, soft-delete memories via admin endpoints. +- `recall` returns the expected memories given fixture data and a query string. +- Memories without embeddings yet are excluded from query-based recall but included in salience-only listings. +- Recall integrates into echo prompts (verified end-to-end in ai-echo engine integration tests, not duplicated here). +- Embedding model unconfigured does not crash recall; returns `[]`. +- All v2 fields exist in schema and are exposed (read-only) via the detail view so future automation has no migration cost. diff --git a/docs/superpowers/specs/2026-05-23-ai-persona-design.md b/docs/superpowers/specs/2026-05-23-ai-persona-design.md new file mode 100644 index 00000000000..28bd52adaed --- /dev/null +++ b/docs/superpowers/specs/2026-05-23-ai-persona-design.md @@ -0,0 +1,320 @@ +# ai-persona — Design + +- **Date:** 2026-05-23 +- **Status:** Design — pending review +- **Author:** Innei (brainstormed with assistant) +- **Parent:** [AI Echo System Root](./2026-05-23-ai-echo-system-root.md) +- **Sibling specs:** [ai-echo engine](./2026-05-23-ai-echo-engine-design.md), [ai-embeddings](./2026-05-23-ai-embeddings-design.md), [ai-memory](./2026-05-23-ai-memory-design.md) + +## 1. Scope + +Persona definitions consumed by `ai-echo` prompts: + +- New module: `apps/core/src/modules/ai/ai-persona/` +- New table: `persona_profiles` +- Code-level `PersonaRegistry` (no admin CRUD; personas are added via code at MVP) +- Two personas shipped at MVP: `inner-self` (dynamic) and `passerby` (static) +- Persona profile distillation (single-pass LLM call in MVP; map-reduce deferred to v2) +- Exemplar selection (length window + recency-weighted random in MVP; vector-by-query selection deferred to v2) +- New task: `PERSONA_DISTILL` +- New endpoints: list personas, get profile, manual refresh + +Cross-cutting decisions (IDs, model config, distance vs similarity) are defined in the root spec. + +## 2. Module layout + +``` +apps/core/src/modules/ai/ai-persona/ +├── ai-persona.module.ts +├── ai-persona.controller.ts # admin endpoints +├── ai-persona.service.ts # getProfile, refresh, pickExemplars +├── ai-persona.repository.ts +├── ai-persona.schema.ts # Zod DTOs +├── ai-persona.types.ts # PersonaKey, PersonaDefinition, PersonaProfile, ExemplarPassage +├── ai-persona.constants.ts +├── ai-persona.errors.ts +├── persona-registry.ts # code-level const map +├── prompts.ts # static prompt templates +├── exemplar-selector.ts # MVP: length + recency; v2: vector +└── tasks/ + └── persona-distill.processor.ts +``` + +## 3. Persona registry + +```ts +// ai-persona.types.ts +export type PersonaKey = 'inner-self' | 'passerby' // open union extended by future personas + +export interface PersonaDefinition { + key: PersonaKey + displayName: string + description: string + needsProfile: boolean // inner-self: true; passerby: false + needsRetrieval: boolean // inner-self: true; passerby: false + usesExemplars: boolean // inner-self: true; passerby: false + staticPrompt: string // base system instruction (always present) +} + +// persona-registry.ts +export const PERSONA_REGISTRY: Record = { + 'inner-self': { + key: 'inner-self', + displayName: 'Inner Self (另我)', + description: 'The author\'s alternate voice — distilled from their own writing.', + needsProfile: true, + needsRetrieval: true, + usesExemplars: true, + staticPrompt: AI_PERSONA_PROMPTS.innerSelf, // from prompts.ts + }, + passerby: { + key: 'passerby', + displayName: 'Passerby (路人)', + description: 'A visiting stranger; brief, fresh-eyed reactions.', + needsProfile: false, + needsRetrieval: false, + usesExemplars: false, + staticPrompt: AI_PERSONA_PROMPTS.passerby, + }, +} +``` + +Adding a future persona is a code edit only (new key + entry); no schema migration, no admin UI. + +## 4. Data model + +```sql +-- migration 00XX_ai_persona_profiles.sql +CREATE TABLE persona_profiles ( + id text PRIMARY KEY, -- snowflake string (pkText) + persona_key text NOT NULL UNIQUE, -- 'inner-self'; passerby never appears here + profile text NOT NULL, -- full distilled voice description (≤ 2k tokens) + profile_summary text, -- shorter version for prompt embedding (≤ 300 tokens) + corpus_version integer NOT NULL, -- corpus_embeddings row count snapshot at distill time + distill_model text NOT NULL, -- resolved model id at distill time + refreshed_at timestamptz NOT NULL, + auto_next_at timestamptz, -- v2: schedule next auto refresh; nullable in MVP + metadata jsonb NOT NULL DEFAULT '{}', -- tone tags, recurring themes, signal words + created_at timestamptz NOT NULL DEFAULT now(), + updated_at timestamptz NOT NULL DEFAULT now() +); +``` + +Only personas with `needsProfile=true` get a row. Drizzle definition in `packages/db-schema/src/schema/ai.ts`. + +## 5. Distillation pipeline + +### 5.1 MVP: single-pass distill + +``` +PersonaDistillTaskProcessor.handle({ personaKey }): + if personaKey !== 'inner-self': fail('AI_PERSONA_NOT_DISTILLABLE') + + lock = redis.setNX(`persona:distill:${personaKey}`, '1', EX=600) + if !lock: skip (concurrent run; another worker picked it up) + + try: + 1. corpus = await sampleCorpus({ + sourceTypes: ['post','note','page'], // recently excluded in MVP (see ai-embeddings v2) + maxTokens: configs.aiPersona.distillSampleMaxTokens, // default 60_000 + recencyWeighted: true, + perTypeQuota: { post: 0.5, note: 0.3, page: 0.2 }, + }) + 2. runtime = await aiService.getPersonaDistillModel() + 3. messages = buildDistillPrompt(corpus) // single call; see §5.3 + 4. result = await runtime.chat(messages) + 5. { profile, profileSummary, metadata } = parseDistillOutput(result) + 6. UPSERT persona_profiles WHERE persona_key='inner-self' SET: + profile, profile_summary=profileSummary, corpus_version=count(corpus_embeddings), + distill_model=runtime.modelId, refreshed_at=now(), metadata + 7. emit PERSONA_PROFILE_REFRESHED + finally: + redis.del(`persona:distill:${personaKey}`) +``` + +The MVP single-pass approach passes the entire sampled corpus (capped at ~60k tokens) directly to one LLM call. This trades naively higher per-call cost for radical simplicity; v2 replaces with a map-reduce path for larger corpora and reduced token cost. + +### 5.2 Sampling + +`sampleCorpus`: + +1. List all source IDs of the requested types. +2. Weight each by recency (exponential decay; half-life ~ 365 days). +3. Probabilistic sample without replacement until cumulative token estimate ≥ `maxTokens` × 1.1 (slight overshoot then trim). +4. Maintain per-type ratios via stratified sampling. +5. Return as a structured list of `{ sourceType, sourceId, title?, createdAt, body }`. + +### 5.3 Distill prompt (single-pass) + +``` +SYSTEM: + You are profiling a single author from their own writing. + Read the passages below and produce a JSON object with three fields: + + - "profile": a description (200–600 words) covering the author's voice, + cadence, vocabulary, recurring themes, value tendencies, signature + phrases. Write in second person ("the author tends to…"). Be specific + and citable, not generic. + + - "profile_summary": a 60–120 word condensation suitable for embedding + into another prompt. + + - "metadata": { + "tone_tags": [string], // e.g. ["wry", "self-deprecating", "quiet-confident"] + "recurring_themes": [string], + "signature_phrases": [string] // verbatim or near-verbatim phrasings the author returns to + } + + Reply with raw JSON, no markdown fences. + +USER: + Passages (oldest first): + + [post:abc — 2025-03-14] {body} + [note:def — 2025-04-02] {body} + ... +``` + +`parseDistillOutput` accepts the JSON, validates with Zod, and falls back to a textual profile (no `profile_summary`, empty metadata) if parsing fails. The fallback is logged but does not fail the task — operator can refresh manually. + +### 5.4 Refresh triggers + +**MVP**: admin endpoint only. + +**v2 (deferred)**: + +- Cron: `configs.aiPersona.autoRefreshCron` (default `'0 4 * * 1'`). +- Threshold: a Redis counter increments on `POST_CREATE` / `NOTE_CREATE` / `PAGE_CREATE`; when counter ≥ `configs.aiPersona.autoRefreshThreshold` (default 30), enqueue refresh and reset counter. + +Both v2 triggers reuse the same task processor; only the trigger logic is new. + +## 6. Exemplar selection + +`pickExemplars(personaKey, opts)` is called by the ai-echo task processor for personas with `usesExemplars=true`. + +### 6.1 MVP: length window + recency-weighted random + +```ts +async pickExemplars(personaKey: 'inner-self', opts: { count: number, query?: string }): Promise { + // Read raw passages directly from source tables (notes/pages) — NOT from corpus_embeddings. + // Rationale: chunk boundaries optimize for retrieval, not style. We want intact paragraphs. + const candidates = await loadCandidates({ + sourceTypes: ['note', 'page'], + paragraphLengthRange: [200, 800], // characters + maxCandidates: 200, // recency-weighted sample + }) + return weightedRandomPick(candidates, opts.count) +} +``` + +`loadCandidates` runs a small SQL query that joins notes/pages, splits each by paragraph boundaries (a pure function), and keeps paragraphs whose length is in the configured window. Result is cached in Redis for 1h (key: `persona:exemplars:candidates:${personaKey}`). + +### 6.2 v2: vector-by-query selection + +Defer to v2: when `opts.query` is provided, compute its embedding and pick paragraphs whose chunk-level embedding is in the upper similarity quartile of `query`. This requires either (a) embedding paragraphs separately from corpus_embeddings (preserving chunk boundaries) or (b) accepting the corpus chunk boundaries as exemplars (lossy but cheap). Choice deferred. + +### 6.3 Output shape + +```ts +interface ExemplarPassage { + sourceType: 'note' | 'page' + sourceId: string + content: string // paragraph text, no markdown fences + createdAt: Date +} +``` + +## 7. API surface + +| Method | Path | Auth | Body / Query | Returns | +| --- | --- | --- | --- | --- | +| GET | `/ai-persona` | @Auth | — | List of `PersonaDefinition[]` from registry (with `hasProfile: boolean` derived from db). | +| GET | `/ai-persona/:key/profile` | @Auth | — | `PersonaProfile` row or 404. Returns 404 for personas with `needsProfile=false`. | +| POST | `/ai-persona/:key/refresh` | @Auth | — | `{ taskId }`. 409 (`AI_PERSONA_REFRESH_IN_PROGRESS`) when Redis lock held. | + +The v2 endpoint `/ai-persona/:key/refresh/status/:taskId` is deferred; admin UI can poll `ai-task` queue's existing status endpoint. + +## 8. Errors + +| Code | HTTP | Notes | +| --- | --- | --- | +| `AI_PERSONA_NOT_FOUND` | 404 | Unknown `personaKey` in registry. | +| `AI_PERSONA_PROFILE_NOT_FOUND` | 404 | Persona has `needsProfile=false`, or no row yet. | +| `AI_PERSONA_NOT_DISTILLABLE` | 400 | Distill called for a persona with `needsProfile=false`. | +| `AI_PERSONA_REFRESH_IN_PROGRESS` | 409 | Redis lock held. | +| `AI_PERSONA_DISTILL_MODEL_NOT_CONFIGURED` | 400 | Neither `personaDistillModel` nor `echoModel` configured. | + +## 9. Configuration + +Additions in `configs.schema.ts → AISchema`: + +```ts +personaDistillModel: field.plain(AIModelAssignmentSchema.optional(), 'Persona distill model', + { description: 'Falls back to echoModel when empty' }), +aiPersona: field.plain(z.object({ + distillSampleMaxTokens: z.number().int().min(1000).default(60_000), + exemplarsLengthMin: z.number().int().min(40).default(200), + exemplarsLengthMax: z.number().int().min(80).default(800), + exemplarsCandidateCacheTtlSec: z.number().int().min(60).default(3600), + // v2 fields, accepted but ignored in MVP: + autoRefreshCron: z.string().default('0 4 * * 1'), + autoRefreshThreshold: z.number().int().min(1).default(30), +}).optional(), 'Persona parameters'), +``` + +`AIFeatureKey.PersonaDistill` added. `AiService.getPersonaDistillModel()` resolves it with fallback to `AIFeatureKey.Echo` when unset. + +## 10. Testing + +### 10.1 Unit + +- `persona-registry`: returns expected entries; unknown key throws `AI_PERSONA_NOT_FOUND`. +- `parseDistillOutput`: valid JSON → struct; malformed JSON → fallback profile with text-only content; empty input → throws. +- `exemplar-selector` (MVP): deterministic given seeded random; respects length window; honors `count`. + +### 10.2 Integration (pg + redis containers) + +- `POST /ai-persona/inner-self/refresh` → row appears in `persona_profiles`; `PERSONA_PROFILE_REFRESHED` event emitted. +- Two concurrent refresh requests → second returns 409. +- `GET /ai-persona` lists registry; `hasProfile` true for `inner-self` after refresh. +- `GET /ai-persona/passerby/profile` returns 404 (`AI_PERSONA_PROFILE_NOT_FOUND`). +- Distill model unconfigured → refresh returns 400 (`AI_PERSONA_DISTILL_MODEL_NOT_CONFIGURED`). +- `pickExemplars` with a fixture corpus returns the expected count of paragraph passages in the length window. + +### 10.3 Mocks + +- Reuse `test/mock/processors/ai-runtime.mock.ts` (declared in ai-echo engine spec). For persona distillation tests, the mock returns a fixed JSON profile. + +## 11. Migration + +``` +00XX_ai_persona_profiles.sql -- this spec +``` + +Additive; no changes to existing tables. Drizzle definition in `packages/db-schema/src/schema/ai.ts`. + +## 12. MVP / v2 boundary + +**MVP:** +- Module + table + registry (`inner-self`, `passerby`) +- Single-pass distill with parsed JSON profile +- Manual refresh endpoint +- Exemplar selection by length window + recency (no vector-by-query) +- Static `passerby` prompt +- Config additions for distill sample size and exemplar window + +**v2:** +- Map-reduce distill for larger corpora and reduced token cost +- Auto-refresh: cron + threshold counter +- Vector-by-query exemplar selection +- Optional `persona_exemplars` curated table if auto selection proves insufficient +- Per-persona prompt overrides editable in admin (turning the code-level registry into a hybrid) + +## 13. Acceptance criteria + +- `inner-self` profile is created on first refresh; profile content reflects sampled corpus content. +- Refresh emits `PERSONA_PROFILE_REFRESHED`; subsequent runs upsert the same row. +- Concurrent refresh requests return 409. +- `passerby` has no profile row and is never distilled. +- `pickExemplars` returns paragraphs (not chunks) from notes/pages within the length window. +- ai-echo task processor consumes the profile via `getProfile('inner-self')` and the exemplars via `pickExemplars('inner-self', ...)`. diff --git a/packages/db-schema/src/schema/ai.ts b/packages/db-schema/src/schema/ai.ts index 34f2441cb41..e3eccc66888 100644 --- a/packages/db-schema/src/schema/ai.ts +++ b/packages/db-schema/src/schema/ai.ts @@ -6,11 +6,12 @@ import { integer, jsonb, pgTable, + real, text, uniqueIndex, } from 'drizzle-orm/pg-core' -import { createdAt, pkText, refText, tsCol, updatedAt } from './columns' +import { createdAt, pkText, refText, tsCol, updatedAt, vector } from './columns' export const aiTranslations = pgTable( 'ai_translations', @@ -129,3 +130,120 @@ export const aiAgentConversations = pgTable( index('ai_agent_conversations_updated_at_idx').on(table.updatedAt), ], ) + +export const corpusEmbeddings = pgTable( + 'corpus_embeddings', + { + id: pkText(), + sourceType: text('source_type').notNull(), + sourceId: refText('source_id').notNull(), + chunkIndex: integer('chunk_index').notNull(), + content: text('content').notNull(), + contentHash: text('content_hash').notNull(), + embedding: vector('embedding').notNull(), + embeddingModel: text('embedding_model').notNull(), + dim: integer('dim').notNull(), + createdAt: createdAt(), + }, + (table) => [ + uniqueIndex('corpus_embeddings_source_chunk_model_uniq').on( + table.sourceType, + table.sourceId, + table.chunkIndex, + table.embeddingModel, + ), + index('corpus_embeddings_source_idx').on(table.sourceType, table.sourceId), + ], +) + +export const personaProfiles = pgTable('persona_profiles', { + id: pkText(), + personaKey: text('persona_key').notNull().unique(), + profile: text('profile').notNull(), + profileSummary: text('profile_summary'), + corpusVersion: integer('corpus_version').notNull(), + distillModel: text('distill_model').notNull(), + refreshedAt: tsCol('refreshed_at').notNull(), + autoNextAt: tsCol('auto_next_at'), + metadata: jsonb('metadata') + .$type>() + .notNull() + .default(sql`'{}'::jsonb`), + createdAt: createdAt(), + updatedAt: updatedAt(), +}) + +export const aiMemories = pgTable( + 'ai_memories', + { + id: pkText(), + scope: text('scope').notNull(), + type: text('type').notNull(), + content: text('content').notNull(), + confidence: real('confidence').notNull().default(1), + salience: real('salience').notNull().default(1), + source: jsonb('source') + .$type>() + .notNull() + .default(sql`'{}'::jsonb`), + embedding: vector('embedding'), + embeddingModel: text('embedding_model'), + dim: integer('dim'), + firstSeenAt: tsCol('first_seen_at').notNull().defaultNow(), + lastSeenAt: tsCol('last_seen_at').notNull().defaultNow(), + expiresAt: tsCol('expires_at'), + supersedesId: refText('supersedes_id').references( + (): AnyPgColumn => aiMemories.id, + { onDelete: 'set null' }, + ), + status: text('status').notNull().default('active'), + metadata: jsonb('metadata') + .$type>() + .notNull() + .default(sql`'{}'::jsonb`), + createdAt: createdAt(), + updatedAt: updatedAt(), + }, + (table) => [ + index('ai_memories_scope_status_idx').on(table.scope, table.status), + index('ai_memories_active_idx') + .on(table.status) + .where(sql`${table.status} = 'active'`), + ], +) + +export const aiEchoes = pgTable( + 'ai_echoes', + { + id: pkText(), + scenarioKey: text('scenario_key').notNull(), + subjectType: text('subject_type').notNull(), + subjectId: refText('subject_id').notNull(), + personaKey: text('persona_key').notNull(), + content: text('content'), + status: text('status').notNull(), + model: text('model'), + metadata: jsonb('metadata') + .$type>() + .notNull() + .default(sql`'{}'::jsonb`), + generatedAt: tsCol('generated_at'), + editedAt: tsCol('edited_at'), + editedBy: refText('edited_by'), + createdAt: createdAt(), + updatedAt: updatedAt(), + }, + (table) => [ + index('ai_echoes_subject_idx').on( + table.scenarioKey, + table.subjectType, + table.subjectId, + ), + index('ai_echoes_status_idx').on(table.scenarioKey, table.status), + index('ai_echoes_persona_subject_idx').on( + table.subjectType, + table.subjectId, + table.personaKey, + ), + ], +) diff --git a/packages/db-schema/src/schema/columns.ts b/packages/db-schema/src/schema/columns.ts index 6ea9b4f88b0..8427f2352e6 100644 --- a/packages/db-schema/src/schema/columns.ts +++ b/packages/db-schema/src/schema/columns.ts @@ -1,4 +1,4 @@ -import { text, timestamp } from 'drizzle-orm/pg-core' +import { customType, text, timestamp } from 'drizzle-orm/pg-core' /** * Snowflake primary key column stored as text. IDs are generated as Snowflake @@ -20,3 +20,25 @@ export const updatedAt = (name = 'updated_at') => export const tsCol = (name: string) => timestamp(name, { withTimezone: true, mode: 'date' }) + +/** + * pgvector column. Dimension-less at the type level so multiple embedding + * models can coexist; each row records its own `embedding_model` and `dim`. + */ +export const vector = customType<{ data: number[]; driverData: string }>({ + dataType() { + return 'vector' + }, + toDriver(value) { + if (!Array.isArray(value)) { + throw new TypeError('vector expects number[]') + } + return `[${value.join(',')}]` + }, + fromDriver(value) { + if (typeof value !== 'string') { + throw new TypeError('expected pgvector string repr') + } + return JSON.parse(value) as number[] + }, +})