Skip to content

Commit af256f6

Browse files
committed
detects bracketed 24-hour exports as iOS format
1 parent 79e85b2 commit af256f6

File tree

2 files changed

+93
-22
lines changed

2 files changed

+93
-22
lines changed

src/parser/whatsapp.test.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ describe('WhatsApp Parser', () => {
1919
const content = 'Random text without patterns'
2020
expect(detectFormat(content)).toBe('ios')
2121
})
22+
23+
it('detects bracketed 24-hour exports as iOS format', () => {
24+
const content = `[15/03/24, 10:30:15] Sarah: Good morning! ☀️
25+
[15/03/24, 10:31:22] Mike: Ready for the weekend?`
26+
expect(detectFormat(content)).toBe('ios')
27+
})
2228
})
2329

2430
describe('parseWhatsAppChat - iOS format', () => {
@@ -97,6 +103,26 @@ and line three
97103
expect(messages[0]?.timestamp.getHours()).toBe(0)
98104
expect(messages[1]?.timestamp.getHours()).toBe(12)
99105
})
106+
107+
it('parses bracketed 24-hour day-first exports', () => {
108+
const content = `[15/03/24, 10:30:15] Sarah: Good morning! ☀️
109+
[15/03/24, 10:32:45] Sarah: We should try that new Italian place on Ponsonby Road
110+
[15/03/24, 10:36:28] Sarah: Definitely! Let's do the Waitakere Ranges trail`
111+
112+
const messages = parseWhatsAppChat(content)
113+
114+
expect(messages).toHaveLength(3)
115+
expect(messages[0]?.sender).toBe('Sarah')
116+
expect(messages[0]?.content).toBe('Good morning! ☀️')
117+
expect(messages[0]?.timestamp.getFullYear()).toBe(2024)
118+
expect(messages[0]?.timestamp.getMonth()).toBe(2)
119+
expect(messages[0]?.timestamp.getDate()).toBe(15)
120+
expect(messages[0]?.timestamp.getHours()).toBe(10)
121+
expect(messages[0]?.timestamp.getMinutes()).toBe(30)
122+
expect(messages[0]?.timestamp.getSeconds()).toBe(15)
123+
expect(messages[1]?.content).toContain('Ponsonby Road')
124+
expect(messages[2]?.content).toContain('Waitakere Ranges trail')
125+
})
100126
})
101127

102128
describe('parseWhatsAppChat - Android format', () => {

src/parser/whatsapp.ts

Lines changed: 67 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010
import type { MediaType, ParsedMessage, ParserOptions, WhatsAppFormat } from '../types'
1111
import { chunkMessage, createChunkedMessages, normalizeApostrophes } from './index'
1212

13-
// WhatsApp iOS format: [MM/DD/YY, H:MM:SS AM/PM] Sender: Message
13+
// WhatsApp iOS format:
14+
// - [MM/DD/YY, H:MM:SS AM/PM] Sender: Message
15+
// - [DD/MM/YY, HH:MM:SS] Sender: Message (locale-dependent 24-hour export)
1416
// Notes:
1517
// - \u200E is left-to-right mark that WhatsApp iOS exports include at line start
1618
// - \u202F is narrow no-break space between time and AM/PM
1719
const IOS_MESSAGE_PATTERN =
18-
/^[\u200E]?\[(\d{1,2}\/\d{1,2}\/\d{2,4}),\s*(\d{1,2}:\d{2}:\d{2}[\s\u202F]*[AP]M)\]\s*([^:]+):\s*(.*)$/
20+
/^[\u200E]?\[(\d{1,2}\/\d{1,2}\/\d{2,4}),\s*(\d{1,2}:\d{2}(?::\d{2})?[\s\u202F]*(?:[AP]M)?)\]\s*([^:]+):\s*(.*)$/
1921

2022
// WhatsApp Android format: MM/DD/YY, H:MM - Sender: Message
2123
const ANDROID_MESSAGE_PATTERN =
@@ -53,20 +55,69 @@ const SYSTEM_PATTERNS: readonly RegExp[] = [
5355
// URL extraction pattern
5456
const URL_PATTERN = /https?:\/\/[^\s<>"')\]]+/gi
5557

56-
/**
57-
* Parse a WhatsApp timestamp (iOS format: MM/DD/YY H:MM:SS AM/PM)
58-
*/
59-
function parseIosTimestamp(dateStr: string, timeStr: string): Date {
58+
type DateOrder = 'month-first' | 'day-first'
59+
60+
function normalizeYear(year: string | undefined): number {
61+
if (!year) {
62+
return 2025
63+
}
64+
65+
return Number.parseInt(year.length === 2 ? `20${year}` : year, 10)
66+
}
67+
68+
function resolveDateOrder(dateStr: string, defaultOrder: DateOrder): DateOrder {
69+
const dateParts = dateStr.split('/')
70+
if (dateParts.length !== 3) {
71+
throw new Error(`Invalid date format: ${dateStr}`)
72+
}
73+
74+
const [first, second] = dateParts
75+
const firstValue = Number.parseInt(first ?? '0', 10)
76+
const secondValue = Number.parseInt(second ?? '0', 10)
77+
78+
if (firstValue > 12 && secondValue <= 12) {
79+
return 'day-first'
80+
}
81+
82+
if (secondValue > 12 && firstValue <= 12) {
83+
return 'month-first'
84+
}
85+
86+
return defaultOrder
87+
}
88+
89+
function parseDateParts(
90+
dateStr: string,
91+
defaultOrder: DateOrder
92+
): { year: number; month: number; day: number } {
6093
const dateParts = dateStr.split('/')
6194
if (dateParts.length !== 3) {
6295
throw new Error(`Invalid date format: ${dateStr}`)
6396
}
6497

65-
const [month, day, year] = dateParts
66-
const fullYear = year?.length === 2 ? `20${year}` : year
98+
const [first, second, year] = dateParts
99+
const dateOrder = resolveDateOrder(dateStr, defaultOrder)
100+
const month = dateOrder === 'month-first' ? first : second
101+
const day = dateOrder === 'month-first' ? second : first
102+
103+
return {
104+
year: normalizeYear(year),
105+
month: Number.parseInt(month ?? '1', 10),
106+
day: Number.parseInt(day ?? '1', 10)
107+
}
108+
}
109+
110+
/**
111+
* Parse a WhatsApp timestamp (iOS format: bracketed 12-hour or 24-hour time)
112+
*/
113+
function parseIosTimestamp(dateStr: string, timeStr: string): Date {
114+
const { year, month, day } = parseDateParts(
115+
dateStr,
116+
/[AP]M/i.test(timeStr) ? 'month-first' : 'day-first'
117+
)
67118

68119
// Parse time with AM/PM
69-
const timeMatch = timeStr.match(/(\d{1,2}):(\d{2}):(\d{2})\s*([AP]M)/i)
120+
const timeMatch = timeStr.match(/(\d{1,2}):(\d{2})(?::(\d{2}))?[\s\u202F]*([AP]M)?/i)
70121
if (!timeMatch) {
71122
throw new Error(`Invalid time format: ${timeStr}`)
72123
}
@@ -80,9 +131,9 @@ function parseIosTimestamp(dateStr: string, timeStr: string): Date {
80131
}
81132

82133
return new Date(
83-
Number.parseInt(fullYear ?? '2025', 10),
84-
Number.parseInt(month ?? '1', 10) - 1,
85-
Number.parseInt(day ?? '1', 10),
134+
year,
135+
month - 1,
136+
day,
86137
hour,
87138
Number.parseInt(minute ?? '0', 10),
88139
Number.parseInt(second ?? '0', 10)
@@ -93,13 +144,7 @@ function parseIosTimestamp(dateStr: string, timeStr: string): Date {
93144
* Parse a WhatsApp timestamp (Android format: MM/DD/YY H:MM)
94145
*/
95146
function parseAndroidTimestamp(dateStr: string, timeStr: string): Date {
96-
const dateParts = dateStr.split('/')
97-
if (dateParts.length !== 3) {
98-
throw new Error(`Invalid date format: ${dateStr}`)
99-
}
100-
101-
const [month, day, year] = dateParts
102-
const fullYear = year?.length === 2 ? `20${year}` : year
147+
const { year, month, day } = parseDateParts(dateStr, 'month-first')
103148

104149
const timeParts = timeStr.split(':')
105150
if (timeParts.length !== 2) {
@@ -109,9 +154,9 @@ function parseAndroidTimestamp(dateStr: string, timeStr: string): Date {
109154
const [hour, minute] = timeParts
110155

111156
return new Date(
112-
Number.parseInt(fullYear ?? '2025', 10),
113-
Number.parseInt(month ?? '1', 10) - 1,
114-
Number.parseInt(day ?? '1', 10),
157+
year,
158+
month - 1,
159+
day,
115160
Number.parseInt(hour ?? '0', 10),
116161
Number.parseInt(minute ?? '0', 10)
117162
)

0 commit comments

Comments
 (0)