11/**
22 * Tests for CLI output formatting, input parsing, and model-cache helpers.
33 * These are pure unit tests — no workers, no network.
4+ *
5+ * All helpers are imported directly from cli.js now that it is guarded by an
6+ * ESM entry-point check and will not invoke main() when imported.
47 */
58
6- import { test , describe } from 'node:test' ;
9+ import { test , describe , mock } from 'node:test' ;
710import assert from 'node:assert/strict' ;
811import { homedir } from 'os' ;
912import { join } from 'path' ;
10- import { existsSync } from 'fs' ;
13+ import { existsSync , readFileSync , mkdtempSync , rmSync } from 'fs' ;
14+ import { tmpdir } from 'os' ;
15+
16+ // Import the real helpers from cli.js instead of duplicating them here.
17+ // The entry guard in cli.js ensures main() is not called on import.
18+ const { parseDelimiter, parseTexts, formatOutput, writeOutput } = await import ( '../src/cli.js' ) ;
1119
1220describe ( 'model-cache' , async ( ) => {
1321 test ( 'DEFAULT_CACHE_DIR is ~/.embedeer/models' , async ( ) => {
@@ -30,73 +38,6 @@ describe('model-cache', async () => {
3038 } ) ;
3139} ) ;
3240
33- // ── Inline helpers mirroring cli.js (cli.js runs main() on import) ──────────
34-
35- function parseDelimiter ( str ) {
36- return str
37- . replace ( / \\ 0 / g, '\0' )
38- . replace ( / \\ n / g, '\n' )
39- . replace ( / \\ t / g, '\t' )
40- . replace ( / \\ r / g, '\r' ) ;
41- }
42-
43- function parseTexts ( raw , delimiter = '\n' ) {
44- try {
45- const parsed = JSON . parse ( raw ) ;
46- if ( ! Array . isArray ( parsed ) ) throw new Error ( 'Expected a JSON array' ) ;
47- return parsed ;
48- } catch {
49- return raw . split ( delimiter ) . filter ( Boolean ) ;
50- }
51- }
52-
53- function formatOutput ( texts , embeddings , format , withText = false ) {
54- switch ( format ) {
55- case 'jsonl' :
56- return texts
57- . map ( ( text , i ) => JSON . stringify ( { text, embedding : embeddings [ i ] } ) )
58- . join ( '\n' ) ;
59-
60- case 'csv' : {
61- if ( embeddings . length === 0 ) return '' ;
62- const dims = embeddings [ 0 ] . length ;
63- const header = [ 'text' , ...Array . from ( { length : dims } , ( _ , k ) => `dim_${ k } ` ) ] . join ( ',' ) ;
64- const rows = texts . map ( ( text , i ) => {
65- const safeText = '"' + text . replace ( / " / g, '""' ) + '"' ;
66- return [ safeText , ...embeddings [ i ] ] . join ( ',' ) ;
67- } ) ;
68- return [ header , ...rows ] . join ( '\n' ) ;
69- }
70-
71- case 'txt' :
72- if ( withText ) {
73- return texts . map ( ( text , i ) => `${ text } \t${ embeddings [ i ] . join ( ' ' ) } ` ) . join ( '\n' ) ;
74- }
75- return embeddings . map ( ( vec ) => vec . join ( ' ' ) ) . join ( '\n' ) ;
76-
77- case 'sql' : {
78- const rows = texts . map ( ( text , i ) => {
79- const safeText = text . replace ( / ' / g, "''" ) ;
80- const vector = JSON . stringify ( embeddings [ i ] ) ;
81- return ` ('${ safeText } ', '${ vector } ')` ;
82- } ) ;
83- return (
84- 'INSERT INTO embeddings (text, vector) VALUES\n' +
85- rows . join ( ',\n' ) +
86- ';'
87- ) ;
88- }
89-
90- default : // json
91- if ( withText ) {
92- return JSON . stringify (
93- texts . map ( ( text , i ) => ( { text, embedding : embeddings [ i ] } ) )
94- ) ;
95- }
96- return JSON . stringify ( embeddings ) ;
97- }
98- }
99-
10041// ── parseDelimiter ───────────────────────────────────────────────────────────
10142
10243describe ( 'parseDelimiter' , ( ) => {
@@ -162,6 +103,18 @@ describe('parseTexts', () => {
162103 const result = parseTexts ( '["x","y"]' , '|' ) ;
163104 assert . deepEqual ( result , [ 'x' , 'y' ] ) ;
164105 } ) ;
106+
107+ test ( 'non-array JSON falls through to delimiter splitting' , ( ) => {
108+ // A JSON number is valid JSON but not an array — treated as raw text.
109+ const result = parseTexts ( '42' ) ;
110+ assert . deepEqual ( result , [ '42' ] ) ;
111+ } ) ;
112+
113+ test ( 'JSON string (not array) falls through to delimiter splitting' , ( ) => {
114+ // '"hello"' is valid JSON but not an array.
115+ const result = parseTexts ( '"hello"' ) ;
116+ assert . deepEqual ( result , [ '"hello"' ] ) ;
117+ } ) ;
165118} ) ;
166119
167120// ── CLI output formatting ────────────────────────────────────────────────────
@@ -216,6 +169,14 @@ describe('CLI output formatting', () => {
216169 assert . equal ( formatOutput ( [ ] , [ ] , 'csv' ) , '' ) ;
217170 } ) ;
218171
172+ test ( 'csv with a single embedding still includes header' , ( ) => {
173+ const out = formatOutput ( [ 'one' ] , [ [ 0.5 , 0.6 , 0.7 ] ] , 'csv' ) ;
174+ const lines = out . split ( '\n' ) ;
175+ assert . equal ( lines [ 0 ] , 'text,dim_0,dim_1,dim_2' ) ;
176+ assert . equal ( lines [ 1 ] , '"one",0.5,0.6,0.7' ) ;
177+ assert . equal ( lines . length , 2 ) ;
178+ } ) ;
179+
219180 test ( 'txt output is one space-separated line per embedding' , ( ) => {
220181 const out = formatOutput ( texts , embeddings , 'txt' ) ;
221182 const lines = out . split ( '\n' ) ;
@@ -246,3 +207,45 @@ describe('CLI output formatting', () => {
246207 assert . deepEqual ( parsed , embeddings ) ;
247208 } ) ;
248209} ) ;
210+
211+ // ── writeOutput ──────────────────────────────────────────────────────────────
212+
213+ describe ( 'writeOutput' , ( ) => {
214+ test ( 'writes content + newline to a file when dumpPath is provided' , ( ) => {
215+ const tmp = mkdtempSync ( join ( tmpdir ( ) , 'embedeer-test-' ) ) ;
216+ try {
217+ const dumpPath = join ( tmp , 'out.json' ) ;
218+ writeOutput ( '[1,2,3]' , dumpPath ) ;
219+ const content = readFileSync ( dumpPath , 'utf8' ) ;
220+ assert . equal ( content , '[1,2,3]\n' ) ;
221+ } finally {
222+ rmSync ( tmp , { recursive : true , force : true } ) ;
223+ }
224+ } ) ;
225+
226+ test ( 'logs content to console when no dumpPath given' , ( ) => {
227+ const logged = [ ] ;
228+ const origLog = console . log ;
229+ console . log = ( ...args ) => logged . push ( args ) ;
230+ try {
231+ writeOutput ( 'hello output' , null ) ;
232+ assert . equal ( logged . length , 1 ) ;
233+ assert . equal ( logged [ 0 ] [ 0 ] , 'hello output' ) ;
234+ } finally {
235+ console . log = origLog ;
236+ }
237+ } ) ;
238+
239+ test ( 'logs to console when dumpPath is undefined' , ( ) => {
240+ const logged = [ ] ;
241+ const origLog = console . log ;
242+ console . log = ( ...args ) => logged . push ( args ) ;
243+ try {
244+ writeOutput ( 'no path' , undefined ) ;
245+ assert . equal ( logged . length , 1 ) ;
246+ assert . equal ( logged [ 0 ] [ 0 ] , 'no path' ) ;
247+ } finally {
248+ console . log = origLog ;
249+ }
250+ } ) ;
251+ } ) ;
0 commit comments