@@ -56,7 +56,33 @@ function isLongStoryText(value) {
5656function isLikelyMediaUrl ( value ) {
5757 const url = String ( value || '' ) . trim ( ) . toLowerCase ( ) ;
5858 if ( ! url ) return false ;
59- return / \. ( p n g | j p e ? g | g i f | w e b p | b m p | s v g | m p 4 | m o v | a v i | m k v | w e b m | m 4 v | w m v | f l v ) ( \? .* ) ? $ / . test ( url ) ;
59+ return / \. ( p n g | j p e ? g | g i f | w e b p | b m p | s v g | m p 4 | m o v | a v i | m k v | w e b m | m 4 v | w m v | f l v | m p 3 | w a v | o g g | m 4 a | a a c | f l a c | o p u s ) ( \? .* ) ? $ / . test ( url ) ;
60+ }
61+
62+ function isLikelyImageUrl ( value ) {
63+ const url = String ( value || '' ) . trim ( ) . toLowerCase ( ) ;
64+ if ( ! url ) return false ;
65+ return / \. ( p n g | j p e ? g | g i f | w e b p | b m p | s v g ) ( \? .* ) ? $ / . test ( url ) ;
66+ }
67+
68+ function isLikelyAudioUrl ( value ) {
69+ const url = String ( value || '' ) . trim ( ) . toLowerCase ( ) ;
70+ if ( ! url ) return false ;
71+ return / \. ( m p 3 | w a v | o g g | m 4 a | a a c | f l a c | o p u s ) ( \? .* ) ? $ / . test ( url ) ;
72+ }
73+
74+ function isLikelyPdfUrl ( value ) {
75+ const candidate = normalizeUrlCandidate ( value ) ;
76+ if ( ! looksLikeUrl ( candidate ) ) return false ;
77+ try {
78+ const parsed = new URL ( candidate ) ;
79+ const joined = `${ parsed . pathname || '' } ${ parsed . search || '' } ` . toLowerCase ( ) ;
80+ if ( joined . includes ( '.pdf' ) ) return true ;
81+ const mimeHint = String ( parsed . searchParams . get ( 'format' ) || '' ) . toLowerCase ( ) ;
82+ return mimeHint === 'pdf' || mimeHint === 'application/pdf' ;
83+ } catch ( _ ) {
84+ return / \. p d f ( \? .* ) ? $ / i. test ( candidate ) ;
85+ }
6086}
6187
6288function isLikelyWebPageUrl ( value ) {
@@ -106,6 +132,60 @@ function extractTextFallbackFromUrlMessage(value) {
106132 return candidate ;
107133}
108134
135+ function extractFirstPdfUrlLikeToken ( value ) {
136+ const text = String ( value || '' ) . trim ( ) ;
137+ if ( ! text ) return '' ;
138+ const explicit = extractFirstUrl ( text ) ;
139+ if ( explicit && isLikelyPdfUrl ( explicit ) ) return explicit ;
140+ const tokens = text . split ( / \s + / ) . map ( ( t ) => normalizeUrlCandidate ( t ) ) . filter ( Boolean ) ;
141+ for ( const token of tokens ) {
142+ const inferred = inferLikelyWebUrlFromText ( token ) ;
143+ if ( inferred && isLikelyPdfUrl ( inferred ) ) return inferred ;
144+ }
145+ return '' ;
146+ }
147+
148+ function extractFirstImageUrlLikeToken ( value ) {
149+ const text = String ( value || '' ) . trim ( ) ;
150+ if ( ! text ) return '' ;
151+ const explicit = extractFirstUrl ( text ) ;
152+ if ( explicit && isLikelyImageUrl ( explicit ) ) return explicit ;
153+ const tokens = text . split ( / \s + / ) . map ( ( t ) => normalizeUrlCandidate ( t ) ) . filter ( Boolean ) ;
154+ for ( const token of tokens ) {
155+ const inferred = inferLikelyWebUrlFromText ( token ) ;
156+ if ( inferred && isLikelyImageUrl ( inferred ) ) return inferred ;
157+ }
158+ return '' ;
159+ }
160+
161+ function extractFirstAudioUrlLikeToken ( value ) {
162+ const text = String ( value || '' ) . trim ( ) ;
163+ if ( ! text ) return '' ;
164+ const explicit = extractFirstUrl ( text ) ;
165+ if ( explicit && isLikelyAudioUrl ( explicit ) ) return explicit ;
166+ const tokens = text . split ( / \s + / ) . map ( ( t ) => normalizeUrlCandidate ( t ) ) . filter ( Boolean ) ;
167+ for ( const token of tokens ) {
168+ const inferred = inferLikelyWebUrlFromText ( token ) ;
169+ if ( inferred && isLikelyAudioUrl ( inferred ) ) return inferred ;
170+ if ( ! inferred ) {
171+ let candidate = token ;
172+ if ( ! / ^ h t t p s ? : \/ \/ / i. test ( candidate ) ) {
173+ if ( / ^ w w w \. / i. test ( candidate ) || / ^ [ a - z 0 - 9 . - ] + \. [ a - z ] { 2 , } (?: [ / : ? # ] .* ) ? $ / i. test ( candidate ) ) {
174+ candidate = `https://${ candidate } ` ;
175+ } else {
176+ continue ;
177+ }
178+ }
179+ try {
180+ const parsed = new URL ( candidate ) ;
181+ const normalized = parsed . toString ( ) ;
182+ if ( isLikelyAudioUrl ( normalized ) ) return normalized ;
183+ } catch ( _ ) { }
184+ }
185+ }
186+ return '' ;
187+ }
188+
109189function extractLinksFromEntities ( baseText , entities ) {
110190 const text = String ( baseText || '' ) ;
111191 const list = Array . isArray ( entities ) ? entities : [ ] ;
@@ -189,9 +269,15 @@ module.exports = {
189269 extractFirstUrl,
190270 classifyMessageInput,
191271 isLikelyWebPageUrl,
272+ isLikelyPdfUrl,
273+ isLikelyImageUrl,
274+ isLikelyAudioUrl,
192275 extractTextFallbackFromUrlMessage,
193276 inferLikelyWebUrlFromText,
194277 extractMessageInputText,
195- extractFirstUrlLikeToken
278+ extractFirstUrlLikeToken,
279+ extractFirstPdfUrlLikeToken,
280+ extractFirstImageUrlLikeToken,
281+ extractFirstAudioUrlLikeToken
196282} ;
197283const { LONG_STORY_TEXT_MIN_CHARS } = require ( './data/thresholds' ) ;
0 commit comments