DocSpring
diff --git a/‎src/extraction/context-window.test.ts‎
Lines changed: 1 addition & 26 deletions b/‎src/extraction/context-window.test.ts‎
Lines changed: 1 addition & 26 deletions
diff --git a/‎src/extraction/context-window.ts‎
Lines changed: 4 additions & 13 deletions b/‎src/extraction/context-window.ts‎
Lines changed: 4 additions & 13 deletions
diff --git a/‎src/extraction/heuristics/context-window.test.ts‎
Lines changed: 6 additions & 7 deletions b/‎src/extraction/heuristics/context-window.test.ts‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎src/parser/imessage.ts‎
Lines changed: 38 additions & 31 deletions b/‎src/parser/imessage.ts‎
Lines changed: 38 additions & 31 deletions
@@ -4,14 +4,7 @@
 
 import { describe, expect, it } from 'vitest'
 import type { ParsedMessage } from '../types'
-import {
-  getMessageContext,
-  isInContextWindow,
-  MAX_MESSAGE_CHARS,
-  MIN_CONTEXT_MESSAGES,
-  TRUNCATION_MARKER,
-  truncateContent
-} from './context-window'
+import { getMessageContext, isInContextWindow, MIN_CONTEXT_MESSAGES } from './context-window'
 
 function createMessage(id: number, content: string, sender = 'User'): ParsedMessage {
   return {
@@ -25,24 +18,6 @@ function createMessage(id: number, content: string, sender = 'User'): ParsedMess
   }
 }
 
-describe('truncateContent', () => {
-  it('returns short messages unchanged', () => {
-    const msg = 'Hello world'
-    expect(truncateContent(msg)).toBe(msg)
-  })
-
-  it('truncates messages over 280 chars', () => {
-    const longMsg = 'x'.repeat(300)
-    const result = truncateContent(longMsg)
-    expect(result).toBe('x'.repeat(MAX_MESSAGE_CHARS) + TRUNCATION_MARKER)
-  })
-
-  it('returns exactly 280 chars unchanged', () => {
-    const msg = 'x'.repeat(280)
-    expect(truncateContent(msg)).toBe(msg)
-  })
-})
-
 describe('getMessageContext', () => {
   it('throws for invalid index', () => {
     const messages = [createMessage(1, 'Hello')]
 
@@ -8,7 +8,7 @@
  * Rules:
  * - Minimum 280 chars before and 280 chars after
  * - Minimum 2 messages on each side
- * - Each message truncated to max 280 chars with "[truncated to 280 chars]" suffix
+ * - Messages are already chunked at parse time (≤280 chars each), no truncation needed
  * - Snaps to message boundaries
  * - Messages include timestamps in WhatsApp format so AI understands time gaps
  */
@@ -17,8 +17,6 @@ import type { CandidateMessage, ContextMessage, ParsedMessage } from '../types'
 
 const MIN_CONTEXT_CHARS = 280
 export const MIN_CONTEXT_MESSAGES = 2
-export const MAX_MESSAGE_CHARS = 280
-export const TRUNCATION_MARKER = ' [truncated to 280 chars]'
 
 export interface MessageContext {
   /** Context messages before target */
@@ -34,21 +32,14 @@ export interface MessageContext {
 }
 
 /**
- * Truncate content to max chars with marker.
- */
-export function truncateContent(content: string): string {
-  if (content.length <= MAX_MESSAGE_CHARS) return content
-  return content.slice(0, MAX_MESSAGE_CHARS) + TRUNCATION_MARKER
-}
-
-/**
- * Convert a ParsedMessage to a ContextMessage with truncated content.
+ * Convert a ParsedMessage to a ContextMessage.
+ * No truncation needed - messages are already chunked at parse time.
  */
 function toContextMessage(msg: ParsedMessage): ContextMessage {
   return {
     id: msg.id,
     sender: msg.sender,
-    content: truncateContent(msg.content),
+    content: msg.content,
     timestamp: msg.timestamp
   }
 }
 
@@ -4,8 +4,8 @@
  * Rules:
  * - Minimum 280 chars before and 280 chars after
  * - Minimum 2 messages on each side
- * - Each message truncated to max 280 chars with "[truncated to 280 chars]" suffix
- * - For prior context: snap to message boundaries, then truncate
+ * - Messages are chunked at parse time (≤280 chars each), no truncation in context
+ * - For prior context: snap to message boundaries
  */
 
 import { readFileSync } from 'node:fs'
@@ -58,7 +58,7 @@ describe('Context Window', () => {
     expect(visitCandidate.contextAfter.length).toBeGreaterThanOrEqual(2)
   })
 
-  it('should truncate long messages with marker', async () => {
+  it('should preserve long messages when remainder would be too small to chunk', async () => {
     const chat = readFileSync(join(FIXTURES_DIR, 'context-window.txt'), 'utf-8')
     const messages = parseWhatsAppChat(chat)
     const result = await extractCandidatesByHeuristics(messages)
@@ -71,11 +71,10 @@ describe('Context Window', () => {
       ...candidate.contextAfter.map((m) => m.content)
     ].join('\n')
 
-    // Long message about The Golden Fork should be truncated
-    expect(allContext).toContain('[truncated to 280 chars]')
+    // Long message about The Golden Fork is NOT chunked because remainder < 32 chars
+    // All content is preserved in a single message
     expect(allContext).toContain('Golden Fork')
-    // Should not contain text beyond 280 chars (the end of the message)
-    expect(allContext).not.toContain('with friends and family')
+    expect(allContext).toContain('friends and family')
   })
 
   it('should get at least 280 chars of context before', async () => {
 
@@ -16,7 +16,7 @@
  */
 
 import type { ParsedMessage } from '../types'
-import { normalizeApostrophes } from './index'
+import { chunkMessage, createChunkedMessages, normalizeApostrophes } from './index'
 
 // Timestamp line pattern: Apr 02, 2025  8:52:29 AM (optional read receipt)
 const TIMESTAMP_PATTERN =
@@ -100,23 +100,26 @@ interface MessageBuilder {
 type ParserState = 'timestamp' | 'sender' | 'content'
 
 /**
- * Finalize a message builder into a ParsedMessage.
+ * Finalize a message builder into ParsedMessage(s), chunking long content.
+ * Returns an array of messages (multiple if content was chunked).
  */
-function finalizeBuilder(builder: MessageBuilder, messageId: number): ParsedMessage | null {
+function finalizeBuilder(builder: MessageBuilder, startId: number): ParsedMessage[] {
   const content = builder.contentLines.join('\n').trim()
-  if (content.length === 0) return null
+  if (content.length === 0) return []
 
   const urls = extractUrls(content)
-  return {
-    id: messageId,
+  const rawLine = builder.rawLines.join('\n')
+  const chunks = chunkMessage(content)
+
+  return createChunkedMessages(chunks, {
+    startId,
     timestamp: builder.timestamp,
     sender: builder.sender,
-    content,
-    rawLine: builder.rawLines.join('\n'),
-    hasMedia: false,
-    urls: urls.length > 0 ? urls : undefined,
-    source: 'imessage'
-  }
+    rawLine,
+    source: 'imessage',
+    urls,
+    hasMedia: false
+  })
 }
 
 interface IMessageParserState {
@@ -138,12 +141,12 @@ function createInitialState(): IMessageParserState {
 function handleTimestampLine(
   timestampMatch: RegExpExecArray,
   parserState: IMessageParserState
-): ParsedMessage | null {
-  let message: ParsedMessage | null = null
+): ParsedMessage[] {
+  let messages: ParsedMessage[] = []
 
   if (parserState.currentBuilder && parserState.currentBuilder.contentLines.length > 0) {
-    message = finalizeBuilder(parserState.currentBuilder, parserState.messageId)
-    if (message) parserState.messageId++
+    messages = finalizeBuilder(parserState.currentBuilder, parserState.messageId)
+    parserState.messageId += messages.length
   }
 
   const [, dateStr, timeStr] = timestampMatch
@@ -153,7 +156,7 @@ function handleTimestampLine(
   parserState.state = 'sender'
   parserState.currentBuilder = null
 
-  return message
+  return messages
 }
 
 function handleSenderLine(
@@ -185,9 +188,9 @@ function handleContentLine(
 }
 
 /**
- * Process a single line and return a message if a complete one was found.
+ * Process a single line and return messages if a complete one was found.
  */
-function processLine(line: string, parserState: IMessageParserState): ParsedMessage | null {
+function processLine(line: string, parserState: IMessageParserState): ParsedMessage[] {
   const trimmedLine = line.trim()
   const timestampMatch = TIMESTAMP_PATTERN.exec(trimmedLine)
 
@@ -201,17 +204,17 @@ function processLine(line: string, parserState: IMessageParserState): ParsedMess
     handleContentLine(line, trimmedLine, parserState)
   }
 
-  return null
+  return []
 }
 
 /**
- * Finalize the parser state and return any remaining message.
+ * Finalize the parser state and return any remaining messages.
  */
-function finalizeParserState(parserState: IMessageParserState): ParsedMessage | null {
+function finalizeParserState(parserState: IMessageParserState): ParsedMessage[] {
   if (parserState.currentBuilder && parserState.currentBuilder.contentLines.length > 0) {
     return finalizeBuilder(parserState.currentBuilder, parserState.messageId)
   }
-  return null
+  return []
 }
 
 /**
@@ -225,12 +228,12 @@ export function parseIMessageChat(raw: string): ParsedMessage[] {
   const parserState = createInitialState()
 
   for (const line of lines) {
-    const msg = processLine(line, parserState)
-    if (msg) messages.push(msg)
+    const parsed = processLine(line, parserState)
+    messages.push(...parsed)
   }
 
-  const finalMsg = finalizeParserState(parserState)
-  if (finalMsg) messages.push(finalMsg)
+  const finalized = finalizeParserState(parserState)
+  messages.push(...finalized)
 
   return messages
 }
@@ -246,10 +249,14 @@ export async function* parseIMessageChatStream(
   for await (const rawLine of lines) {
     // Normalize apostrophe variants (curly → straight) for regex matching
     const line = normalizeApostrophes(rawLine)
-    const msg = processLine(line, parserState)
-    if (msg) yield msg
+    const parsed = processLine(line, parserState)
+    for (const msg of parsed) {
+      yield msg
+    }
   }
 
-  const finalMsg = finalizeParserState(parserState)
-  if (finalMsg) yield finalMsg
+  const finalized = finalizeParserState(parserState)
+  for (const msg of finalized) {
+    yield msg
+  }
 }