Skip to content

Commit fdef552

Browse files
authored
External Resource Validation (#301)
* fix(media-library): keep existing index visible during discovery * fix(media-library): validate markdown external media during indexing * fix(media-library): validate external media responses more conservatively , addressing review comment
1 parent 4edc4b3 commit fdef552

9 files changed

Lines changed: 603 additions & 60 deletions

File tree

nx/blocks/media-library/core/constants.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ export const IndexConfig = Object.freeze({
1616
USAGE_MAP_PROGRESSIVE_BATCH_SIZE: 1000,
1717
/* Index chunking configuration */
1818
MEDIA_INDEX_CHUNK_SIZE: 20_000, /* Entries per chunk (~15-20MB per chunk) */
19+
LOCK_HEARTBEAT_INTERVAL_MS: 60_000,
20+
LOCK_STALE_THRESHOLD_MS: 10 * 60_000,
1921
});
2022

2123
export const Operation = Object.freeze({

nx/blocks/media-library/core/state.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ let appState = {
2626
selectedMediaTab: 'usage',
2727

2828
isIndexing: false,
29+
isBackgroundRefreshInProgress: false,
2930
indexProgress: null,
3031
indexStartTime: null,
3132
indexLockedByOther: false,

nx/blocks/media-library/indexing/admin-api.js

Lines changed: 183 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { daFetch, initIms } from '../../../utils/daFetch.js';
22
import { etcFetch } from '../core/urls.js';
33
import { AEM_ORIGIN, DA_ORIGIN } from '../../../public/utils/constants.js';
4-
import { IndexFiles, ExternalMedia } from '../core/constants.js';
4+
import { IndexFiles, ExternalMedia, DA_ETC_ORIGIN } from '../core/constants.js';
55
import { MediaLibraryError, ErrorCodes, logMediaLibraryError } from '../core/errors.js';
66
import { isPerfEnabled } from '../core/params.js';
77
import { t } from '../core/messages.js';
@@ -1148,37 +1148,202 @@ export async function saveIndexMeta(meta, path) {
11481148
});
11491149
}
11501150

1151-
// Fetches Last-Modified timestamp for absolute URLs (PDFs, SVGs, fragments)
1152-
export async function fetchFileLastModified(url, timeoutMs = 5000) {
1153-
if (!url) return null;
1151+
function isProtectedSiteAssetUrl(url, org, repo, ref = 'main') {
1152+
if (!url || !org || !repo) return false;
1153+
1154+
try {
1155+
const parsed = new URL(url);
1156+
return parsed.hostname === `${ref}--${repo}--${org}.aem.page`;
1157+
} catch {
1158+
return false;
1159+
}
1160+
}
1161+
1162+
function sanitizeValidationUrl(url) {
1163+
if (!url) return url;
1164+
1165+
try {
1166+
const parsed = new URL(url);
1167+
parsed.hash = '';
1168+
return parsed.toString();
1169+
} catch {
1170+
return url.split('#')[0];
1171+
}
1172+
}
1173+
1174+
async function fetchFileResponse(url, {
1175+
method = 'HEAD',
1176+
redirectMode = 'manual',
1177+
org = '',
1178+
repo = '',
1179+
ref = 'main',
1180+
signal,
1181+
} = {}) {
1182+
const requestUrl = sanitizeValidationUrl(url);
1183+
const opts = {
1184+
method,
1185+
signal,
1186+
redirect: redirectMode,
1187+
};
1188+
const isProtectedSiteAsset = isProtectedSiteAssetUrl(requestUrl, org, repo, ref);
1189+
const cachedSiteHeaders = isProtectedSiteAsset ? getCachedSiteTokenHeaders(org, repo, ref) : null;
1190+
1191+
if (cachedSiteHeaders) {
1192+
opts.headers = cachedSiteHeaders;
1193+
}
1194+
1195+
const response = await etcFetch(appendNoCacheParam(requestUrl), 'cors', opts);
1196+
1197+
if (!isProtectedSiteAsset || !AEM_SITE_AUTH_DENIED.has(response.status)) {
1198+
return response;
1199+
}
1200+
1201+
if (cachedSiteHeaders) {
1202+
clearCachedAemSiteToken(org, repo, ref);
1203+
}
1204+
1205+
const siteTokenHeaders = await getSiteTokenHeaders(org, repo, ref);
1206+
if (!siteTokenHeaders) {
1207+
return response;
1208+
}
1209+
1210+
return etcFetch(appendNoCacheParam(requestUrl), 'cors', {
1211+
method,
1212+
signal,
1213+
redirect: redirectMode,
1214+
headers: siteTokenHeaders,
1215+
});
1216+
}
1217+
1218+
function resolveProxyRedirectUrl(originalUrl, response) {
1219+
try {
1220+
const originalUrlObj = new URL(sanitizeValidationUrl(originalUrl));
1221+
const etcHostname = new URL(DA_ETC_ORIGIN).hostname;
1222+
const location = (response.headers.get('location') || '').trim();
1223+
1224+
if (location && response.status >= 300 && response.status < 400) {
1225+
const locationUrl = new URL(location, originalUrlObj.origin);
1226+
if (locationUrl.hostname === etcHostname) {
1227+
return `${originalUrlObj.origin}${locationUrl.pathname}${locationUrl.search}`;
1228+
}
1229+
return locationUrl.toString();
1230+
}
1231+
} catch {
1232+
// ignore malformed URL/redirects
1233+
}
1234+
1235+
return '';
1236+
}
1237+
1238+
async function followFileRedirects(requestUrl, response, options = {}, redirectCount = 0) {
1239+
const redirectUrl = resolveProxyRedirectUrl(requestUrl, response);
1240+
if (!redirectUrl || redirectUrl === requestUrl || redirectCount >= 5) {
1241+
return {
1242+
requestUrl,
1243+
response,
1244+
};
1245+
}
1246+
1247+
const nextResponse = await fetchFileResponse(redirectUrl, options);
1248+
return followFileRedirects(redirectUrl, nextResponse, options, redirectCount + 1);
1249+
}
1250+
1251+
async function fetchFileResponseInfo(url, {
1252+
method = 'HEAD',
1253+
redirectMode = 'manual',
1254+
timeoutMs = 5000,
1255+
org = '',
1256+
repo = '',
1257+
ref = 'main',
1258+
} = {}) {
1259+
if (!url) {
1260+
return {
1261+
ok: false,
1262+
status: 0,
1263+
contentType: '',
1264+
finalUrl: '',
1265+
lastModified: null,
1266+
};
1267+
}
11541268

11551269
try {
11561270
const controller = new AbortController();
11571271
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
11581272

1159-
const response = await etcFetch(url, 'cors', {
1160-
method: 'HEAD',
1273+
const response = await fetchFileResponse(url, {
1274+
method,
1275+
redirectMode,
1276+
org,
1277+
repo,
1278+
ref,
1279+
signal: controller.signal,
1280+
});
1281+
const {
1282+
requestUrl,
1283+
response: finalResponse,
1284+
} = await followFileRedirects(url, response, {
1285+
method,
1286+
redirectMode,
1287+
org,
1288+
repo,
1289+
ref,
11611290
signal: controller.signal,
11621291
});
11631292

11641293
clearTimeout(timeoutId);
11651294

1166-
if (response.ok) {
1167-
const lastModified = response.headers.get('last-modified');
1168-
if (lastModified) {
1169-
const timestamp = new Date(lastModified).getTime();
1170-
if (!Number.isNaN(timestamp)) {
1171-
return timestamp;
1172-
}
1173-
}
1174-
}
1295+
const rawLastModified = finalResponse.headers.get('last-modified');
1296+
const parsedLastModified = rawLastModified ? new Date(rawLastModified).getTime() : Number.NaN;
1297+
1298+
return {
1299+
ok: finalResponse.ok,
1300+
status: finalResponse.status,
1301+
contentType: finalResponse.headers.get('content-type') || '',
1302+
finalUrl: requestUrl,
1303+
redirected: finalResponse.redirected || requestUrl !== url,
1304+
lastModified: Number.isNaN(parsedLastModified) ? null : parsedLastModified,
1305+
};
11751306
} catch (error) {
1176-
// Timeout or network error - return null
11771307
if (isPerfEnabled()) {
11781308
// eslint-disable-next-line no-console
1179-
console.log(`[fetchFileLastModified] Failed for ${url}:`, error.message);
1309+
console.log(`[fetchFileResponseInfo:${method}] Failed for ${url}:`, error.message);
11801310
}
1311+
1312+
return {
1313+
ok: false,
1314+
status: 0,
1315+
contentType: '',
1316+
finalUrl: '',
1317+
redirected: false,
1318+
lastModified: null,
1319+
error: error.message,
1320+
};
11811321
}
1322+
}
11821323

1183-
return null;
1324+
export async function fetchFileHeadInfo(url, options = {}) {
1325+
return fetchFileResponseInfo(url, {
1326+
...options,
1327+
method: 'HEAD',
1328+
redirectMode: 'manual',
1329+
});
1330+
}
1331+
1332+
export async function fetchFileGetInfo(url, options = {}) {
1333+
return fetchFileResponseInfo(url, {
1334+
...options,
1335+
method: 'GET',
1336+
redirectMode: 'follow',
1337+
});
1338+
}
1339+
1340+
// Fetches Last-Modified timestamp for absolute URLs (PDFs, SVGs, fragments)
1341+
export async function fetchFileLastModified(url, timeoutMs = 5000, org = '', repo = '', ref = 'main') {
1342+
const headInfo = await fetchFileHeadInfo(url, {
1343+
timeoutMs,
1344+
org,
1345+
repo,
1346+
ref,
1347+
});
1348+
return headInfo.lastModified;
11841349
}

0 commit comments

Comments
 (0)