Skip to content

Commit 860f65f

Browse files
committed
fix(ingest): use title-based checkpoint for YNA news
1 parent 28ba925 commit 860f65f

2 files changed

Lines changed: 246 additions & 66 deletions

File tree

src/modules/ingest/app/sources/yna-news.source.test.ts

Lines changed: 204 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@ import { afterEach, describe, expect, it, vi } from 'vitest';
22
import { EventKinds } from '@/modules/events/domain/event.enums';
33
import type { LlmLabelClassifierService } from '../llm-label-classifier.service';
44

5+
type MockClassifier = Pick<LlmLabelClassifierService, 'isEnabled' | 'classifyBatch'>;
6+
57
describe('YnaNewsSource', () => {
68
afterEach(() => {
79
delete process.env.YNA_SERVICE_KEY;
810
vi.useRealTimers();
911
vi.unstubAllGlobals();
1012
});
1113

12-
it('should classify and emit news events', async () => {
14+
it('should initialize checkpoint and not emit events on first run', async () => {
1315
vi.useFakeTimers();
1416
vi.setSystemTime(new Date('2025-01-02T03:00:00.000Z'));
1517

@@ -43,14 +45,67 @@ describe('YnaNewsSource', () => {
4345
);
4446
vi.stubGlobal('fetch', fetchMock);
4547

46-
const classifier = {
48+
const classifier: MockClassifier = {
4749
isEnabled: vi.fn(() => true),
4850
classifyBatch: vi.fn(() => Promise.resolve(new Map([['123', '호우']]))),
49-
} as unknown as LlmLabelClassifierService;
51+
};
5052

5153
const source = new YnaNewsSource(classifier);
5254
const result = await source.run(null);
5355

56+
expect(result.events).toHaveLength(0);
57+
expect(classifier.classifyBatch).not.toHaveBeenCalled();
58+
59+
const parsedState = JSON.parse(result.nextState ?? '{}') as {
60+
version?: number;
61+
seen?: Record<string, string>;
62+
};
63+
expect(parsedState.version).toBe(2);
64+
expect(parsedState.seen?.['호우로 인한 침수 발생']).toBe('2025-01-02T03:00:00.000Z');
65+
});
66+
67+
it('should classify and emit news events when state is initialized', async () => {
68+
vi.useFakeTimers();
69+
vi.setSystemTime(new Date('2025-01-02T03:00:00.000Z'));
70+
71+
process.env.YNA_SERVICE_KEY = 'test-key';
72+
vi.resetModules();
73+
const { YnaNewsSource } = await import('./yna-news.source');
74+
75+
const responseBody = {
76+
header: { resultCode: '00', resultMsg: 'OK' },
77+
numOfRows: 1,
78+
pageNo: 1,
79+
totalCount: 1,
80+
body: [
81+
{
82+
YNA_NO: 123,
83+
YNA_TTL: '호우로 인한 침수 발생',
84+
YNA_CN: '침수 피해가 보고되었습니다.',
85+
YNA_YMD: '20250102',
86+
CRT_DT: '2025-01-02 11:00:00',
87+
},
88+
],
89+
};
90+
91+
const fetchMock = vi.fn().mockImplementation(() =>
92+
Promise.resolve(
93+
new Response(JSON.stringify(responseBody), {
94+
status: 200,
95+
headers: { 'Content-Type': 'application/json' },
96+
}),
97+
),
98+
);
99+
vi.stubGlobal('fetch', fetchMock);
100+
101+
const classifier: MockClassifier = {
102+
isEnabled: vi.fn(() => true),
103+
classifyBatch: vi.fn(() => Promise.resolve(new Map([['123', '호우']]))),
104+
};
105+
106+
const source = new YnaNewsSource(classifier);
107+
const result = await source.run(JSON.stringify({ version: 2, seen: {} }));
108+
54109
expect(result.events).toHaveLength(1);
55110
expect(result.events[0].kind).toBe(EventKinds.Rain);
56111
expect(result.events[0].title).toBe('호우로 인한 침수 발생');
@@ -90,12 +145,155 @@ describe('YnaNewsSource', () => {
90145
);
91146
vi.stubGlobal('fetch', fetchMock);
92147

93-
const classifier = {
148+
const classifier: MockClassifier = {
94149
isEnabled: vi.fn(() => true),
95150
classifyBatch: vi.fn(() => Promise.resolve(null)),
96-
} as unknown as LlmLabelClassifierService;
151+
};
152+
153+
const source = new YnaNewsSource(classifier);
154+
await expect(source.run(JSON.stringify({ version: 2, seen: {} }))).rejects.toThrow(
155+
'LLM label classification failed',
156+
);
157+
});
158+
159+
it('should not emit duplicate title news across runs', async () => {
160+
vi.useFakeTimers();
161+
vi.setSystemTime(new Date('2025-01-02T03:00:00.000Z'));
162+
163+
process.env.YNA_SERVICE_KEY = 'test-key';
164+
vi.resetModules();
165+
const { YnaNewsSource } = await import('./yna-news.source');
166+
167+
const firstResponseBody = {
168+
header: { resultCode: '00', resultMsg: 'OK' },
169+
numOfRows: 1,
170+
pageNo: 1,
171+
totalCount: 1,
172+
body: [
173+
{
174+
YNA_NO: 200,
175+
YNA_TTL: '산불 대응 작업 진행',
176+
YNA_CN: '첫 번째 본문',
177+
YNA_YMD: '20250102',
178+
CRT_DT: '2025-01-02 12:00:00',
179+
},
180+
],
181+
};
182+
183+
const secondResponseBody = {
184+
header: { resultCode: '00', resultMsg: 'OK' },
185+
numOfRows: 1,
186+
pageNo: 1,
187+
totalCount: 1,
188+
body: [
189+
{
190+
YNA_NO: 201,
191+
YNA_TTL: '산불 대응 작업 진행',
192+
YNA_CN: '두 번째 본문',
193+
YNA_YMD: '20250102',
194+
CRT_DT: '2025-01-02 12:10:00',
195+
},
196+
],
197+
};
198+
199+
const fetchMock = vi
200+
.fn()
201+
.mockImplementationOnce(() =>
202+
Promise.resolve(
203+
new Response(JSON.stringify(firstResponseBody), {
204+
status: 200,
205+
headers: { 'Content-Type': 'application/json' },
206+
}),
207+
),
208+
)
209+
.mockImplementationOnce(() =>
210+
Promise.resolve(
211+
new Response(JSON.stringify(secondResponseBody), {
212+
status: 200,
213+
headers: { 'Content-Type': 'application/json' },
214+
}),
215+
),
216+
);
217+
vi.stubGlobal('fetch', fetchMock);
218+
219+
const classifier: MockClassifier = {
220+
isEnabled: vi.fn(() => true),
221+
classifyBatch: vi.fn((request: { items: Array<{ id: string }> }) => {
222+
const entries: Array<[string, string]> = [];
223+
for (const item of request.items) {
224+
entries.push([item.id, '산불']);
225+
}
226+
return Promise.resolve(new Map(entries));
227+
}),
228+
};
97229

98230
const source = new YnaNewsSource(classifier);
99-
await expect(source.run(null)).rejects.toThrow('LLM label classification failed');
231+
const firstResult = await source.run(JSON.stringify({ version: 2, seen: {} }));
232+
expect(firstResult.events).toHaveLength(1);
233+
expect(firstResult.events[0].title).toBe('산불 대응 작업 진행');
234+
235+
const secondResult = await source.run(firstResult.nextState);
236+
expect(secondResult.events).toHaveLength(0);
237+
expect(classifier.classifyBatch).toHaveBeenCalledTimes(1);
238+
});
239+
240+
it('should treat legacy state as first run and skip emission', async () => {
241+
vi.useFakeTimers();
242+
vi.setSystemTime(new Date('2025-01-02T03:00:00.000Z'));
243+
244+
process.env.YNA_SERVICE_KEY = 'test-key';
245+
vi.resetModules();
246+
const { YnaNewsSource } = await import('./yna-news.source');
247+
248+
const responseBody = {
249+
header: { resultCode: '00', resultMsg: 'OK' },
250+
numOfRows: 1,
251+
pageNo: 1,
252+
totalCount: 1,
253+
body: [
254+
{
255+
YNA_NO: 301,
256+
YNA_TTL: '호우로 인한 침수 발생',
257+
YNA_CN: '본문',
258+
YNA_YMD: '20250102',
259+
CRT_DT: '2025-01-02 11:00:00',
260+
},
261+
],
262+
};
263+
264+
const fetchMock = vi.fn().mockImplementation(() =>
265+
Promise.resolve(
266+
new Response(JSON.stringify(responseBody), {
267+
status: 200,
268+
headers: { 'Content-Type': 'application/json' },
269+
}),
270+
),
271+
);
272+
vi.stubGlobal('fetch', fetchMock);
273+
274+
const classifier: MockClassifier = {
275+
isEnabled: vi.fn(() => true),
276+
classifyBatch: vi.fn(() => Promise.resolve(new Map([['301', '호우']]))),
277+
};
278+
279+
const source = new YnaNewsSource(classifier);
280+
const legacyState = JSON.stringify({
281+
seen: {
282+
300: '2025-01-02T02:30:00.000Z',
283+
},
284+
});
285+
286+
const result = await source.run(legacyState);
287+
expect(result.events).toHaveLength(0);
288+
expect(classifier.classifyBatch).not.toHaveBeenCalled();
289+
290+
const parsedState = JSON.parse(result.nextState ?? '{}') as {
291+
version?: number;
292+
seen?: Record<string, string>;
293+
};
294+
295+
expect(parsedState.version).toBe(2);
296+
expect(parsedState.seen).toBeDefined();
297+
expect(parsedState.seen?.['호우로 인한 침수 발생']).toBe('2025-01-02T03:00:00.000Z');
100298
});
101299
});

0 commit comments

Comments
 (0)