@@ -84,7 +84,7 @@ class ExtraSnuCrawler(
8484 var sessions = parseDetailSessions(html1)
8585 var mainHtml = parseMainContentHtml(html1, ociUploadService)
8686
87- if (sessions.isEmpty()) {
87+ if (sessions.isEmpty()) { // fallback once
8888 val html2 = fetchDetailPageByPlaywright(dataSeq)
8989 if (html2 != null ) {
9090 sessions = parseDetailSessions(html2)
@@ -140,20 +140,21 @@ class ExtraSnuCrawler(
140140 if (debug) println (" \n [PW] goto(view) => $viewUrl " )
141141
142142 fun isWait (u : String ) = u.contains(" /wait.jsp" )
143- fun isDetail (u : String ) = u.contains(" /ptfol/imng/icmpNsbjtPgm/findIcmpNsbjtPgmInfo.do" )
143+ fun isDetail (u : String ) =
144+ u.contains(" /ptfol/imng/icmpNsbjtPgm/findIcmpNsbjtPgmInfo.do" ) ||
145+ u.contains(" /ptfol/cous/staGrp/rcri/view.do" )
144146
145147 val page = pwContext.newPage()
146148
147149 return try {
148- // ✅ domcontentloaded 기다리지 말고 'commit'까지만 (응답만 받으면 됨)
149150 page.navigate(
150151 viewUrl,
151152 Page .NavigateOptions ()
152153 .setWaitUntil(WaitUntilState .COMMIT )
153154 .setTimeout(20_000.0 )
154155 )
155156
156- val hardDeadlineMs = System .currentTimeMillis() + 120_000L // 총 2분까지 기다림
157+ val hardDeadlineMs = System .currentTimeMillis() + 120_000L
157158 var lastUrl = page.url()
158159
159160 while (System .currentTimeMillis() < hardDeadlineMs) {
@@ -165,50 +166,87 @@ class ExtraSnuCrawler(
165166 return null
166167 }
167168
168- // 1) wait.jsp면: 서버가 대기열 처리 중. 재navigate 하지 말고 잠깐 기다림.
169169 if (isWait(curUrl)) {
170170 if (debug) println (" [PW] wait.jsp... (sleep)" )
171- page.waitForTimeout(800.0 + Math .random() * 1200.0 ) // 0.8~2.0s
171+ page.waitForTimeout(800.0 + Math .random() * 1200.0 )
172172 continue
173173 }
174174
175- // 2) 최종 상세(findIcmp...)로 왔으면: 실제 기간 값이 들어올 때까지 기다렸다가 content
176175 if (isDetail(curUrl)) {
177- runCatching {
178- page.waitForFunction(
179- """
180- () => {
181- const ths = Array.from(document.querySelectorAll("th"));
182- const th = ths.find(x => (x.textContent || "").includes("교육(활동)기간"));
183- if (!th) return false;
184- const td = th.nextElementSibling;
185- if (!td) return false;
186- const txt = (td.textContent || "").replace(/\s+/g, " ").trim();
187- return /\d{4}\.\d{2}\.\d{2}\./.test(txt) && /\d{2}:\d{2}/.test(txt);
176+ val detailDeadlineMs = System .currentTimeMillis() + 10_000L
177+
178+ while (System .currentTimeMillis() < detailDeadlineMs) {
179+ val titles = runCatching {
180+ @Suppress(" UNCHECKED_CAST" )
181+ page.evalOnSelectorAll(
182+ " div.cont_box p.cont_tit" ,
183+ " els => els.map(el => (el.textContent || '').replace(/\\ s+/g, ' ').trim())"
184+ ) as List <String >
185+ }.getOrDefault(emptyList())
186+
187+ if (debug) println (" [PW] titles=$titles " )
188+
189+ // 아직 본문 골격이 안 뜬 상태
190+ if (titles.isEmpty()) {
191+ page.waitForTimeout(500.0 )
192+ continue
188193 }
189- """ .trimIndent(),
190- Page .WaitForFunctionOptions ().setTimeout(10_000.0 )
191- )
192- }.onFailure {
193- runCatching {
194- page.waitForLoadState(LoadState .NETWORKIDLE )
194+
195+ // 강좌 정보 자체가 없는 페이지면 더 기다리지 말고 그냥 반환
196+ if (! titles.contains(" 강좌 정보" )) {
197+ val html = try {
198+ page.content()
199+ } catch (e: Exception ) {
200+ page.waitForTimeout(500.0 )
201+ page.content()
202+ }
203+ if (debug) println (" [PW] OK detail(no lecture info) url=$curUrl htmlLen=${html.length} " )
204+ return html
205+ }
206+
207+ // 강좌 정보가 있으면 교육(활동)기간 값이 실제로 채워질 때까지 조금 더 기다림
208+ val hasPeriod = runCatching {
209+ page.evaluate(
210+ """
211+ () => {
212+ const ths = Array.from(document.querySelectorAll("th"));
213+ const th = ths.find(x => (x.textContent || "").includes("교육(활동)기간"));
214+ if (!th) return false;
215+ const td = th.nextElementSibling;
216+ if (!td) return false;
217+ const txt = (td.textContent || "").replace(/\s+/g, " ").trim();
218+ return /\d{4}\.\d{2}\.\d{2}\./.test(txt) && /\d{2}:\d{2}/.test(txt);
219+ }
220+ """ .trimIndent()
221+ ) as Boolean
222+ }.getOrDefault(false )
223+
224+ if (hasPeriod) {
225+ val html = try {
226+ page.content()
227+ } catch (e: Exception ) {
228+ page.waitForTimeout(500.0 + Math .random() * 600.0 )
229+ page.content()
230+ }
231+ if (debug) println (" [PW] OK detail(with lecture info) url=$curUrl htmlLen=${html.length} " )
232+ return html
195233 }
196- page.waitForTimeout(1500.0 )
234+
235+ page.waitForTimeout(500.0 )
197236 }
198237
199- // navigating 중 content() 터질 수 있어 방어
238+ // detail 페이지까지는 왔는데 10초 동안 period가 안 떴음
239+ // 그래도 HTML은 넘기고, 실제 판정은 Kotlin parse 쪽에서 처리
200240 val html = try {
201241 page.content()
202242 } catch (e: Exception ) {
203- page.waitForTimeout(500.0 + Math .random() * 600.0 )
243+ page.waitForTimeout(500.0 )
204244 page.content()
205245 }
206-
207- if (debug) println (" [PW] OK detail url=$curUrl htmlLen=${html.length} " )
246+ if (debug) println (" [PW] OK detail(timeout fallback) url=$curUrl htmlLen=${html.length} " )
208247 return html
209248 }
210249
211- // 3) 그 외 상태: 아직 view.do이거나 중간 이동 중
212250 page.waitForTimeout(400.0 + Math .random() * 600.0 )
213251 }
214252
@@ -258,8 +296,8 @@ class ExtraSnuCrawler(
258296 label to value
259297 }.orEmpty()
260298
261- val applyCount = counts[" 신청" ]?.toIntOrNullSafe()
262- val capacity = counts[" 정원" ]?.toIntOrNullSafe()
299+ val applyCount = counts[" 신청" ]?.toIntOrNullSafe() ? : 0
300+ val capacity = counts[" 정원" ]?.toIntOrNullSafe() ? : 0
263301
264302 val imageUrl = card.selectFirst(" .img_wrap img" )
265303 ?.absUrl(" src" )
0 commit comments