11package com.team1.hangsha.batch.job
22
3+ import com.fasterxml.jackson.databind.ObjectMapper
34import com.team1.hangsha.batch.crawler.DetailSession
45import com.team1.hangsha.batch.crawler.ExtraSnuCrawler
56import com.team1.hangsha.batch.crawler.ProgramEvent
@@ -10,12 +11,15 @@ import com.team1.hangsha.event.service.EventSyncService
1011import org.springframework.boot.ApplicationArguments
1112import org.springframework.boot.ApplicationRunner
1213import org.springframework.stereotype.Component
14+ import java.nio.file.Files
15+ import java.nio.file.Path
1316import kotlin.system.exitProcess
1417
1518@Component
1619class ExtraSnuSyncRunner (
1720 private val eventSyncService : EventSyncService ,
1821 private val ociUploadService : OciUploadService ,
22+ private val objectMapper : ObjectMapper ,
1923) : ApplicationRunner {
2024
2125 override fun run (args : ApplicationArguments ) {
@@ -26,6 +30,7 @@ class ExtraSnuSyncRunner(
2630 var totalUpserted = 0
2731 var totalCrawled = 0
2832 var totalSkipped = 0
33+ val dumpBuffer = mutableListOf<CrawledProgramEvent >()
2934
3035 ExtraSnuCrawler (
3136 delayMsBetweenPages = opt.delayMs,
@@ -48,25 +53,46 @@ class ExtraSnuSyncRunner(
4853 crawler.enrichDetails(baseEvents, ociUploadService) // { e -> e.status != "모집마감" } // @TODO: 위의 0001, 0002, ... 와 같이 매직 넘버라, ENUM화?
4954 }
5055
51- val eventsWithUploadedImages = if (! opt.withDetails) {
52- events
53- } else {
54- crawler.uploadEventImages(events, ociUploadService)
56+ // dumpOnly 여부와 상관없이 이미지 업로드는 항상 수행한다.
57+ val eventsWithUploadedImages = crawler.uploadEventImages(events, ociUploadService)
58+
59+ val crawledEvents = eventsWithUploadedImages.map { it.toCrawledProgramEvent() }
60+ if (opt.outFile != null ) {
61+ dumpBuffer + = crawledEvents
5562 }
5663
57- val result = eventSyncService.sync(eventsWithUploadedImages.map { it.toCrawledProgramEvent() })
64+ totalCrawled + = crawledEvents.size
65+ if (opt.dumpOnly) {
66+ println (" Page $page crawled: total=${crawledEvents.size} " )
67+ continue
68+ }
5869
70+ val result = eventSyncService.sync(crawledEvents)
5971 totalUpserted + = result.upserted
60- totalCrawled + = result.total
6172 totalSkipped + = result.skipped
6273
6374 println (" Page $page synced: upserted=${result.upserted} , total=${result.total} , skipped=${result.skipped} " )
6475 }
6576 }
6677
67- println (" Synced $totalUpserted rows from $totalCrawled crawled events (skipped=$totalSkipped )" )
78+ if (opt.outFile != null ) {
79+ writeDumpFile(opt.outFile, dumpBuffer)
80+ println (" Saved crawled events to ${opt.outFile} (count=${dumpBuffer.size} )" )
81+ }
82+
83+ if (opt.dumpOnly) {
84+ println (" Crawled $totalCrawled rows (dump-only mode)" )
85+ } else {
86+ println (" Synced $totalUpserted rows from $totalCrawled crawled events (skipped=$totalSkipped )" )
87+ }
6888 exitProcess(0 )
6989 }
90+
91+ private fun writeDumpFile (outFile : String , rows : List <CrawledProgramEvent >) {
92+ val path = Path .of(outFile).toAbsolutePath().normalize()
93+ path.parent?.let { Files .createDirectories(it) }
94+ objectMapper.writerWithDefaultPrettyPrinter().writeValue(path.toFile(), rows)
95+ }
7096}
7197
7298private data class BatchArgs (
@@ -75,6 +101,8 @@ private data class BatchArgs(
75101 val delayMs : Long = 200 ,
76102 val withDetails : Boolean = true ,
77103 val detailDelayMs : Long = 100 ,
104+ val outFile : String? = null ,
105+ val dumpOnly : Boolean = false ,
78106) {
79107 companion object {
80108 fun from (args : ApplicationArguments ): BatchArgs {
@@ -92,6 +120,8 @@ private data class BatchArgs(
92120 delayMs = single(" delayMs" )?.toLong() ? : 200L ,
93121 withDetails = withDetails,
94122 detailDelayMs = single(" detailDelayMs" )?.toLong() ? : 100L ,
123+ outFile = single(" outFile" ),
124+ dumpOnly = args.containsOption(" dumpOnly" ),
95125 )
96126 }
97127 }
@@ -124,4 +154,4 @@ private fun DetailSession.toCrawledDetailSession(): CrawledDetailSession =
124154 endDate = endDate,
125155 startTime = startTime,
126156 endTime = endTime
127- )
157+ )
0 commit comments