Skip to content

Commit 7aaf016

Browse files
committed
Refactor analytics and add LLM translation utilities
Moved analytics modules from 'optional' to 'utilities' for better organization and updated all imports accordingly. Added shared translation utilities in 'utilities/translation/translationProcessor.ts' to support LLM-based translation with symbol preservation across AAC formats. Updated processors (gridset, obf, touchchat) to use these utilities, and added comprehensive documentation in 'utilities/translation/README.md'.
1 parent d48522a commit 7aaf016

38 files changed

Lines changed: 819 additions & 100 deletions

scripts/analysis/analyze_obl_data.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import * as fs from 'fs';
22
import * as path from 'path';
3-
import { OblUtil } from '../../src/optional/analytics/index';
3+
import { OblUtil } from '../../src/utilities/analytics/index';
44

55
/**
66
* Script to bulk-analyze OBLA clinical data and extract utterances to a CSV.

scripts/translation/gemini-translate-gridset.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,17 @@ async function main() {
103103
// Step 1: Extract symbol information
104104
console.log('STEP 1: Extracting symbol information from gridset...');
105105
const symbolInfo = processor.extractSymbolsForLLM(inputPath);
106+
107+
// NOTE: This script uses batch processing - sends ALL buttons in a single API call.
108+
// For large vocabularies (1000+ buttons), consider chunking by page to avoid:
109+
// - Hitting LLM context window limits
110+
// - API rate limits
111+
// - Better error recovery and progress tracking
112+
//
113+
// Example chunking approach:
114+
// - Group buttons by page
115+
// - Process each page separately
116+
// - Combine results before applying to gridset
106117

107118
console.log(` Found ${symbolInfo.length} buttons with symbols`);
108119
console.log(` Processing first ${Math.min(maxItems, symbolInfo.length)} items`);

src/core/baseProcessor.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,45 @@
1+
/**
2+
* Base Processor for AAC File Formats
3+
*
4+
* This module provides base functionality for processing AAC (Augmentative and Alternative
5+
* Communication) files across various formats (gridset, OBF, Snap, TouchChat, etc.).
6+
*
7+
* ## LLM-Based Translation with Symbol Preservation
8+
*
9+
* All processor formats support LLM-based translation that preserves symbol-to-word
10+
* associations across languages. This is critical for AAC systems where visual symbols
11+
* are attached to specific words.
12+
*
13+
* ### Usage Example:
14+
*
15+
* ```typescript
16+
* import { extractAllButtonsForTranslation, createTranslationPrompt } from '../optional/translation/translationProcessor';
17+
*
18+
* // 1. Extract buttons from your format
19+
* const buttons = extractAllButtonsForTranslation(myFormatButtons, (button) => ({
20+
* pageId: button.pageId,
21+
* pageName: button.pageName
22+
* }));
23+
*
24+
* // 2. Create prompt for LLM
25+
* const prompt = createTranslationPrompt(buttons, 'Spanish');
26+
*
27+
* // 3. Send to LLM (Gemini, GPT, etc.) and get response
28+
* const llmResponse = await callLLMAPI(prompt);
29+
*
30+
* // 4. Apply translations to your format
31+
* processor.processLLMTranslations(filePath, llmResponse, outputPath);
32+
* ```
33+
*
34+
* ### Format-Specific Implementation:
35+
*
36+
* Each processor should implement:
37+
* - `extractSymbolsForLLM()` - Uses extractAllButtonsForTranslation() utility
38+
* - `processLLMTranslations()` - Applies translations using format-specific logic
39+
*
40+
* See `src/utilities/translation/translationProcessor.ts` for shared utilities.
41+
*/
42+
143
import { AACTree, AACButton, AACSemanticCategory } from './treeStructure';
244
import { StringCasing, detectCasing, isNumericOrEmpty } from './stringCasing';
345
import { ValidationResult } from '../validation/validationTypes';

src/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ export * from './core/baseProcessor';
44
export * from './core/stringCasing';
55
export * from './processors';
66
export * from './validation';
7-
export * as Analytics from './optional/analytics';
7+
export * as Analytics from './utilities/analytics';
88
export {
99
collectUnifiedHistory,
1010
listGrid3Users as listHistoryGrid3Users,
1111
listSnapUsers as listHistorySnapUsers,
12-
} from './optional/analytics/history';
12+
} from './utilities/analytics/history';
1313

1414
import { BaseProcessor } from './core/baseProcessor';
1515
import { DotProcessor } from './processors/dotProcessor';

src/processors/gridsetProcessor.ts

Lines changed: 31 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ import AdmZip from 'adm-zip';
1818
import fs from 'fs';
1919
import { XMLParser, XMLBuilder } from 'fast-xml-parser';
2020
import { resolveGrid3CellImage } from './gridset/resolver';
21+
import {
22+
extractAllButtonsForTranslation,
23+
validateTranslationResults,
24+
type ButtonForTranslation,
25+
type LLMLTranslationResult,
26+
} from '../utilities/translation/translationProcessor';
2127
import { getZipEntriesWithPassword, resolveGridsetPassword } from './gridset/password';
2228
import crypto from 'crypto';
2329
import zlib from 'zlib';
@@ -28,7 +34,7 @@ import { detectPluginCellType, Grid3CellType } from './gridset/pluginTypes';
2834
import { detectCommand } from './gridset/commands';
2935
import { type SymbolReference, parseSymbolReference } from './gridset/symbols';
3036
import { isSymbolLibraryReference } from './gridset/resolver';
31-
import { generateCloneId } from '../optional/analytics/utils/idGenerator';
37+
import { generateCloneId } from '../utilities/analytics/utils/idGenerator';
3238
import { translateWithSymbols, extractSymbolsFromButton } from './gridset/symbolAlignment';
3339

3440
class GridsetProcessor extends BaseProcessor {
@@ -1245,122 +1251,56 @@ class GridsetProcessor extends BaseProcessor {
12451251
* Extract symbol information from a gridset for LLM-based translation.
12461252
* Returns a structured format showing which buttons have symbols and their context.
12471253
*
1254+
* This method uses shared translation utilities that work across all AAC formats.
1255+
*
12481256
* @param filePathOrBuffer - Path to gridset file or buffer
12491257
* @returns Array of symbol information for LLM processing
12501258
*/
1251-
extractSymbolsForLLM(filePathOrBuffer: string | Buffer): Array<{
1252-
buttonId: string;
1253-
pageId: string;
1254-
pageName: string;
1255-
label: string;
1256-
message: string;
1257-
textToTranslate: string;
1258-
symbols: Array<{
1259-
text: string;
1260-
image?: string;
1261-
symbolLibrary?: string;
1262-
symbolPath?: string;
1263-
}>;
1264-
}> {
1259+
extractSymbolsForLLM(filePathOrBuffer: string | Buffer): ButtonForTranslation[] {
12651260
const tree = this.loadIntoTree(filePathOrBuffer);
1266-
const symbolInfo: Array<{
1267-
buttonId: string;
1268-
pageId: string;
1269-
pageName: string;
1270-
label: string;
1271-
message: string;
1272-
textToTranslate: string;
1273-
symbols: Array<{
1274-
text: string;
1275-
image?: string;
1276-
symbolLibrary?: string;
1277-
symbolPath?: string;
1278-
}>;
1279-
}> = [];
12801261

1262+
// Collect all buttons from all pages
1263+
const allButtons: any[] = [];
12811264
Object.values(tree.pages).forEach((page) => {
12821265
page.buttons.forEach((button) => {
1283-
// Extract symbols from various sources
1284-
const symbols: Array<{
1285-
text: string;
1286-
image?: string;
1287-
symbolLibrary?: string;
1288-
symbolPath?: string;
1289-
}> = [];
1290-
1291-
// Check richText.symbols
1292-
if (button.semanticAction?.richText?.symbols) {
1293-
symbols.push(...button.semanticAction.richText.symbols);
1294-
}
1295-
1296-
// Check symbolLibrary + symbolPath
1297-
if (button.symbolLibrary && button.symbolPath) {
1298-
const text = button.label || button.message || '';
1299-
if (text) {
1300-
symbols.push({
1301-
text,
1302-
symbolLibrary: button.symbolLibrary,
1303-
symbolPath: button.symbolPath,
1304-
});
1305-
}
1306-
}
1307-
1308-
// Check image field for symbol reference
1309-
if (button.image && button.image.startsWith('[')) {
1310-
const text = button.label || button.message || '';
1311-
if (text) {
1312-
symbols.push({
1313-
text,
1314-
image: button.image,
1315-
});
1316-
}
1317-
}
1318-
1319-
// Only include buttons that have symbols
1320-
if (symbols.length > 0) {
1321-
const textToTranslate = button.message || button.label || '';
1322-
if (textToTranslate) {
1323-
symbolInfo.push({
1324-
buttonId: button.id,
1325-
pageId: page.id,
1326-
pageName: page.name || page.id,
1327-
label: button.label || '',
1328-
message: button.message || '',
1329-
textToTranslate,
1330-
symbols,
1331-
});
1332-
}
1333-
}
1266+
// Add page context to each button
1267+
(button as any).pageId = page.id;
1268+
(button as any).pageName = page.name || page.id;
1269+
allButtons.push(button);
13341270
});
13351271
});
13361272

1337-
return symbolInfo;
1273+
// Use shared utility to extract buttons with translation context
1274+
return extractAllButtonsForTranslation(allButtons, (button) => ({
1275+
pageId: button.pageId,
1276+
pageName: button.pageName,
1277+
}));
13381278
}
13391279

13401280
/**
13411281
* Apply LLM translations with symbol information.
13421282
* The LLM should provide translations with symbol attachments in the correct positions.
13431283
*
1284+
* This method uses shared translation utilities that work across all AAC formats.
1285+
*
13441286
* @param filePathOrBuffer - Path to gridset file or buffer
13451287
* @param llmTranslations - Array of LLM translations with symbol info
13461288
* @param outputPath - Where to save the translated gridset
1289+
* @param options - Translation options (e.g., allowPartial for testing)
13471290
* @returns Buffer of the translated gridset
13481291
*/
13491292
processLLMTranslations(
13501293
filePathOrBuffer: string | Buffer,
1351-
llmTranslations: Array<{
1352-
buttonId: string;
1353-
translatedLabel?: string;
1354-
translatedMessage?: string;
1355-
symbols?: Array<{
1356-
text: string;
1357-
image?: string;
1358-
}>;
1359-
}>,
1360-
outputPath: string
1294+
llmTranslations: LLMLTranslationResult[],
1295+
outputPath: string,
1296+
options?: { allowPartial?: boolean }
13611297
): Buffer {
13621298
const tree = this.loadIntoTree(filePathOrBuffer);
13631299

1300+
// Validate translations using shared utility
1301+
const buttonIds = Object.values(tree.pages).flatMap((page) => page.buttons.map((b) => b.id));
1302+
validateTranslationResults(llmTranslations, buttonIds, options);
1303+
13641304
// Create a map for quick lookup
13651305
const translationMap = new Map(llmTranslations.map((t) => [t.buttonId, t]));
13661306

src/processors/obfProcessor.ts

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,17 @@ import {
1313
AACSemanticCategory,
1414
AACSemanticIntent,
1515
} from '../core/treeStructure';
16-
import { generateCloneId } from '../optional/analytics/utils/idGenerator';
16+
import { generateCloneId } from '../utilities/analytics/utils/idGenerator';
1717
import AdmZip from 'adm-zip';
1818
import fs from 'fs';
1919
import { ObfValidator } from '../validation/obfValidator';
2020
import { ValidationResult } from '../validation/validationTypes';
21+
import {
22+
extractAllButtonsForTranslation,
23+
validateTranslationResults,
24+
type ButtonForTranslation,
25+
type LLMLTranslationResult,
26+
} from '../utilities/translation/translationProcessor';
2127

2228
const OBF_FORMAT_VERSION = 'open-board-0.1';
2329

@@ -496,6 +502,102 @@ class ObfProcessor extends BaseProcessor {
496502
async validate(filePath: string): Promise<ValidationResult> {
497503
return ObfValidator.validateFile(filePath);
498504
}
505+
506+
/**
507+
* Extract symbol information from an OBF/OBZ file for LLM-based translation.
508+
* Returns a structured format showing which buttons have symbols and their context.
509+
*
510+
* This method uses shared translation utilities that work across all AAC formats.
511+
*
512+
* @param filePathOrBuffer - Path to OBF/OBZ file or buffer
513+
* @returns Array of symbol information for LLM processing
514+
*/
515+
extractSymbolsForLLM(filePathOrBuffer: string | Buffer): ButtonForTranslation[] {
516+
const tree = this.loadIntoTree(filePathOrBuffer);
517+
518+
// Collect all buttons from all pages
519+
const allButtons: any[] = [];
520+
Object.values(tree.pages).forEach((page) => {
521+
page.buttons.forEach((button) => {
522+
// Add page context to each button
523+
(button as any).pageId = page.id;
524+
(button as any).pageName = page.name || page.id;
525+
allButtons.push(button);
526+
});
527+
});
528+
529+
// Use shared utility to extract buttons with translation context
530+
return extractAllButtonsForTranslation(allButtons, (button) => ({
531+
pageId: button.pageId,
532+
pageName: button.pageName,
533+
}));
534+
}
535+
536+
/**
537+
* Apply LLM translations with symbol information.
538+
* The LLM should provide translations with symbol attachments in the correct positions.
539+
*
540+
* This method uses shared translation utilities that work across all AAC formats.
541+
*
542+
* @param filePathOrBuffer - Path to OBF/OBZ file or buffer
543+
* @param llmTranslations - Array of LLM translations with symbol info
544+
* @param outputPath - Where to save the translated OBF/OBZ file
545+
* @param options - Translation options (e.g., allowPartial for testing)
546+
* @returns Buffer of the translated OBF/OBZ file
547+
*/
548+
processLLMTranslations(
549+
filePathOrBuffer: string | Buffer,
550+
llmTranslations: LLMLTranslationResult[],
551+
outputPath: string,
552+
options?: { allowPartial?: boolean }
553+
): Buffer {
554+
const tree = this.loadIntoTree(filePathOrBuffer);
555+
556+
// Validate translations using shared utility
557+
const buttonIds = Object.values(tree.pages).flatMap((page) => page.buttons.map((b) => b.id));
558+
validateTranslationResults(llmTranslations, buttonIds, options);
559+
560+
// Create a map for quick lookup
561+
const translationMap = new Map(llmTranslations.map((t) => [t.buttonId, t]));
562+
563+
// Apply translations
564+
Object.values(tree.pages).forEach((page) => {
565+
page.buttons.forEach((button) => {
566+
const translation = translationMap.get(button.id);
567+
if (!translation) return;
568+
569+
// Apply label translation
570+
if (translation.translatedLabel) {
571+
button.label = translation.translatedLabel;
572+
}
573+
574+
// Apply message translation (vocalization in OBF)
575+
if (translation.translatedMessage) {
576+
button.message = translation.translatedMessage;
577+
578+
// Update semantic action if symbols provided
579+
if (translation.symbols && translation.symbols.length > 0) {
580+
if (!button.semanticAction) {
581+
button.semanticAction = {
582+
category: AACSemanticCategory.COMMUNICATION,
583+
intent: AACSemanticIntent.SPEAK_TEXT,
584+
text: translation.translatedMessage,
585+
};
586+
}
587+
588+
button.semanticAction.richText = {
589+
text: translation.translatedMessage,
590+
symbols: translation.symbols,
591+
};
592+
}
593+
}
594+
});
595+
});
596+
597+
// Save and return
598+
this.saveFromTree(tree, outputPath);
599+
return fs.readFileSync(outputPath);
600+
}
499601
}
500602

501603
export { ObfProcessor };

src/processors/snapProcessor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import {
1313
AACSemanticCategory,
1414
AACSemanticIntent,
1515
} from '../core/treeStructure';
16-
import { generateCloneId } from '../optional/analytics/utils/idGenerator';
16+
import { generateCloneId } from '../utilities/analytics/utils/idGenerator';
1717
import Database from 'better-sqlite3';
1818
import path from 'path';
1919
import fs from 'fs';

0 commit comments

Comments
 (0)