Skip to content

Commit 9516548

Browse files
authored
Merge pull request #126 from Pinback-Team/dev
refactor: 아티클 메타데이터 저장 기능 리팩토링
2 parents 13bc306 + 48219cb commit 9516548

1 file changed

Lines changed: 10 additions & 4 deletions

File tree

infrastructure/src/main/java/com/pinback/infrastructure/article/service/ArticleMetadataAdapter.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
@RequiredArgsConstructor
2626
public class ArticleMetadataAdapter implements ArticleMetadataPort {
2727
private static final String COMMON_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
28-
private static final int TIMEOUT_MILLIS = 5000;
28+
private static final int TIMEOUT_MILLIS = 8000;
2929
private final S3StorageService s3StorageService;
3030
@Value("${default-thumbnail}")
3131
private String DEFAULT_THUMBNAIL_URL;
@@ -37,11 +37,13 @@ public ArticleMetadataResponse extractMetadata(String url) {
3737
// 1. 웹페이지 접속
3838
Document doc = Jsoup.connect(processedUrl)
3939
.userAgent(COMMON_USER_AGENT)
40+
.referrer("https://www.google.com/")
41+
.header("Accept-Language", "ko-KR,ko;q=0.9,en-US;q=0.8")
4042
.timeout(TIMEOUT_MILLIS)
4143
.get();
4244

4345
// 2. 제목 추출 (Open Graph -> HTML Title 순)
44-
String title = extractMetaContent(doc, "meta[property=og:title]");
46+
String title = extractMetaContent(doc, "meta[property=og:title]", false);
4547
if (title.isBlank()) {
4648
title = doc.title();
4749
}
@@ -53,7 +55,7 @@ public ArticleMetadataResponse extractMetadata(String url) {
5355
}
5456

5557
// 3. 썸네일 추출 (Open Graph)
56-
String originalThumbnail = extractMetaContent(doc, "meta[property=og:image]");
58+
String originalThumbnail = extractMetaContent(doc, "meta[property=og:image]", true);
5759

5860
// 썸네일이 없는 경우 기본 이미지로 처리
5961
String finalThumbnail;
@@ -72,7 +74,11 @@ public ArticleMetadataResponse extractMetadata(String url) {
7274

7375
}
7476

75-
private String extractMetaContent(Document doc, String selector) {
77+
private String extractMetaContent(Document doc, String selector, boolean isUrl) {
78+
if (isUrl) {
79+
String absContent = doc.select(selector).attr("abs:content").trim();
80+
return absContent.isEmpty() ? doc.select(selector).attr("content").trim() : absContent;
81+
}
7682
return doc.select(selector).attr("content").trim();
7783
}
7884

0 commit comments

Comments
 (0)