1616 */
1717
1818import type { ParsedMessage } from '../types'
19- import { normalizeApostrophes } from './index'
19+ import { chunkMessage , createChunkedMessages , normalizeApostrophes } from './index'
2020
2121// Timestamp line pattern: Apr 02, 2025 8:52:29 AM (optional read receipt)
2222const TIMESTAMP_PATTERN =
@@ -100,23 +100,26 @@ interface MessageBuilder {
100100type ParserState = 'timestamp' | 'sender' | 'content'
101101
102102/**
103- * Finalize a message builder into a ParsedMessage.
103+ * Finalize a message builder into ParsedMessage(s), chunking long content.
104+ * Returns an array of messages (multiple if content was chunked).
104105 */
105- function finalizeBuilder ( builder : MessageBuilder , messageId : number ) : ParsedMessage | null {
106+ function finalizeBuilder ( builder : MessageBuilder , startId : number ) : ParsedMessage [ ] {
106107 const content = builder . contentLines . join ( '\n' ) . trim ( )
107- if ( content . length === 0 ) return null
108+ if ( content . length === 0 ) return [ ]
108109
109110 const urls = extractUrls ( content )
110- return {
111- id : messageId ,
111+ const rawLine = builder . rawLines . join ( '\n' )
112+ const chunks = chunkMessage ( content )
113+
114+ return createChunkedMessages ( chunks , {
115+ startId,
112116 timestamp : builder . timestamp ,
113117 sender : builder . sender ,
114- content,
115- rawLine : builder . rawLines . join ( '\n' ) ,
116- hasMedia : false ,
117- urls : urls . length > 0 ? urls : undefined ,
118- source : 'imessage'
119- }
118+ rawLine,
119+ source : 'imessage' ,
120+ urls,
121+ hasMedia : false
122+ } )
120123}
121124
122125interface IMessageParserState {
@@ -138,12 +141,12 @@ function createInitialState(): IMessageParserState {
138141function handleTimestampLine (
139142 timestampMatch : RegExpExecArray ,
140143 parserState : IMessageParserState
141- ) : ParsedMessage | null {
142- let message : ParsedMessage | null = null
144+ ) : ParsedMessage [ ] {
145+ let messages : ParsedMessage [ ] = [ ]
143146
144147 if ( parserState . currentBuilder && parserState . currentBuilder . contentLines . length > 0 ) {
145- message = finalizeBuilder ( parserState . currentBuilder , parserState . messageId )
146- if ( message ) parserState . messageId ++
148+ messages = finalizeBuilder ( parserState . currentBuilder , parserState . messageId )
149+ parserState . messageId += messages . length
147150 }
148151
149152 const [ , dateStr , timeStr ] = timestampMatch
@@ -153,7 +156,7 @@ function handleTimestampLine(
153156 parserState . state = 'sender'
154157 parserState . currentBuilder = null
155158
156- return message
159+ return messages
157160}
158161
159162function handleSenderLine (
@@ -185,9 +188,9 @@ function handleContentLine(
185188}
186189
187190/**
188- * Process a single line and return a message if a complete one was found.
191+ * Process a single line and return messages if a complete one was found.
189192 */
190- function processLine ( line : string , parserState : IMessageParserState ) : ParsedMessage | null {
193+ function processLine ( line : string , parserState : IMessageParserState ) : ParsedMessage [ ] {
191194 const trimmedLine = line . trim ( )
192195 const timestampMatch = TIMESTAMP_PATTERN . exec ( trimmedLine )
193196
@@ -201,17 +204,17 @@ function processLine(line: string, parserState: IMessageParserState): ParsedMess
201204 handleContentLine ( line , trimmedLine , parserState )
202205 }
203206
204- return null
207+ return [ ]
205208}
206209
207210/**
208- * Finalize the parser state and return any remaining message .
211+ * Finalize the parser state and return any remaining messages .
209212 */
210- function finalizeParserState ( parserState : IMessageParserState ) : ParsedMessage | null {
213+ function finalizeParserState ( parserState : IMessageParserState ) : ParsedMessage [ ] {
211214 if ( parserState . currentBuilder && parserState . currentBuilder . contentLines . length > 0 ) {
212215 return finalizeBuilder ( parserState . currentBuilder , parserState . messageId )
213216 }
214- return null
217+ return [ ]
215218}
216219
217220/**
@@ -225,12 +228,12 @@ export function parseIMessageChat(raw: string): ParsedMessage[] {
225228 const parserState = createInitialState ( )
226229
227230 for ( const line of lines ) {
228- const msg = processLine ( line , parserState )
229- if ( msg ) messages . push ( msg )
231+ const parsed = processLine ( line , parserState )
232+ messages . push ( ... parsed )
230233 }
231234
232- const finalMsg = finalizeParserState ( parserState )
233- if ( finalMsg ) messages . push ( finalMsg )
235+ const finalized = finalizeParserState ( parserState )
236+ messages . push ( ... finalized )
234237
235238 return messages
236239}
@@ -246,10 +249,14 @@ export async function* parseIMessageChatStream(
246249 for await ( const rawLine of lines ) {
247250 // Normalize apostrophe variants (curly → straight) for regex matching
248251 const line = normalizeApostrophes ( rawLine )
249- const msg = processLine ( line , parserState )
250- if ( msg ) yield msg
252+ const parsed = processLine ( line , parserState )
253+ for ( const msg of parsed ) {
254+ yield msg
255+ }
251256 }
252257
253- const finalMsg = finalizeParserState ( parserState )
254- if ( finalMsg ) yield finalMsg
258+ const finalized = finalizeParserState ( parserState )
259+ for ( const msg of finalized ) {
260+ yield msg
261+ }
255262}
0 commit comments