Skip to content

Commit 793f8c3

Browse files
fix(sitemap-scraper): remove skipNavigation and parse sitemap from body
1 parent 4bc51d9 commit 793f8c3

1 file changed

Lines changed: 5 additions & 4 deletions

File tree

packages/actor-scraper/sitemap-scraper/src/internals/crawler_setup.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,6 @@ export class CrawlerSetup {
200200
url: sitemapUrl,
201201
useExtendedUniqueKey: true,
202202
keepUrlFragment: this.input.keepUrlFragments,
203-
// sitemaps are fetched inside the handler
204-
skipNavigation: true,
205203
}),
206204
);
207205

@@ -249,6 +247,7 @@ export class CrawlerSetup {
249247

250248
const options: HttpCrawlerOptions = {
251249
proxyConfiguration: this.proxyConfiguration,
250+
httpClient: this.sitemapHttpClient,
252251
requestHandler: this._createRequestHandler(),
253252
preNavigationHooks: [],
254253
postNavigationHooks: [],
@@ -323,15 +322,17 @@ export class CrawlerSetup {
323322
protected async _handleSitemapRequest(
324323
crawlingContext: HttpCrawlingContext,
325324
) {
326-
const { request } = crawlingContext;
325+
const { request, body } = crawlingContext;
327326

328327
// Make sure that an object containing internal metadata
329328
// is present on every request.
330329
tools.ensureMetaData(request as any);
331330

332331
log.info('Processing sitemap', { url: request.url });
332+
const sitemapContent =
333+
typeof body === 'string' ? body : body.toString('utf8');
333334
const parsed = parseSitemap(
334-
[{ type: 'url', url: request.url }],
335+
[{ type: 'raw', content: sitemapContent }],
335336
await this.proxyConfiguration?.newUrl(),
336337
{
337338
emitNestedSitemaps: true,

0 commit comments

Comments
 (0)