2525@ RequiredArgsConstructor
2626public class ArticleMetadataAdapter implements ArticleMetadataPort {
2727 private static final String COMMON_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ;
28- private static final int TIMEOUT_MILLIS = 5000 ;
28+ private static final int TIMEOUT_MILLIS = 8000 ;
2929 private final S3StorageService s3StorageService ;
3030 @ Value ("${default-thumbnail}" )
3131 private String DEFAULT_THUMBNAIL_URL ;
@@ -37,11 +37,13 @@ public ArticleMetadataResponse extractMetadata(String url) {
3737 // 1. 웹페이지 접속
3838 Document doc = Jsoup .connect (processedUrl )
3939 .userAgent (COMMON_USER_AGENT )
40+ .referrer ("https://www.google.com/" )
41+ .header ("Accept-Language" , "ko-KR,ko;q=0.9,en-US;q=0.8" )
4042 .timeout (TIMEOUT_MILLIS )
4143 .get ();
4244
4345 // 2. 제목 추출 (Open Graph -> HTML Title 순)
44- String title = extractMetaContent (doc , "meta[property=og:title]" );
46+ String title = extractMetaContent (doc , "meta[property=og:title]" , false );
4547 if (title .isBlank ()) {
4648 title = doc .title ();
4749 }
@@ -53,7 +55,7 @@ public ArticleMetadataResponse extractMetadata(String url) {
5355 }
5456
5557 // 3. 썸네일 추출 (Open Graph)
56- String originalThumbnail = extractMetaContent (doc , "meta[property=og:image]" );
58+ String originalThumbnail = extractMetaContent (doc , "meta[property=og:image]" , true );
5759
5860 // 썸네일이 없는 경우 기본 이미지로 처리
5961 String finalThumbnail ;
@@ -72,7 +74,11 @@ public ArticleMetadataResponse extractMetadata(String url) {
7274
7375 }
7476
75- private String extractMetaContent (Document doc , String selector ) {
77+ private String extractMetaContent (Document doc , String selector , boolean isUrl ) {
78+ if (isUrl ) {
79+ String absContent = doc .select (selector ).attr ("abs:content" ).trim ();
80+ return absContent .isEmpty () ? doc .select (selector ).attr ("content" ).trim () : absContent ;
81+ }
7682 return doc .select (selector ).attr ("content" ).trim ();
7783 }
7884
0 commit comments