Skip to content

Commit 1741930

Browse files
feat: bot detection logic (#170)
* bot detection logic * test fix * fix * fix tests * fix intermitent test failures * tests * merge main * update shared lib * read bot protection from scrape metadata * test * test * improve tests * merge main + update lib * read content scraper logs when bot protection detected * read bot protection flag from scrape results * test coverage * empty scrape.json to check bot protection * updated lib * simplify logic * simplify logic to just read logs for bot protection * adjust logs * refactor to simplify logic * increase tests * debug logs * use jobId to check content scraper logs * add scraper as dependency in opportunity map * log fix * tests * test fix * remove debug logs * update lib * new lib * package * package * test fix * update lib * simplify logic * test fixes * test fix * test fix * refactor to read abort info stats from database * test fix * test fix * fix to scraper client lib * debugs * add sqsWrapper dependency needed for scrape client * Add SCRAPE_JOB_CONFIGURATION * debug logs * use siteUrl * update lib * updated libs * resolveCanonialUrl null return handling * debug logs * retry logic to get abort info * adjust wait time * refactor to have bot detection logic in new file * cleanup * address review comments * update lib and log fix * slack lib * test fix * updated lib * add logs + updated libs * remove unused s3 lib * add test * revert slack client lib to old version * lock file * restore s3 lib + slack client * update helix deploy lib version * update lib * support for multiple jobIds for siteUrl * test improvement * show stats always irrespective of bot protection * adjust slack messaging * new tests
1 parent 5f30b92 commit 1741930

File tree

19 files changed

+9871
-6199
lines changed

19 files changed

+9871
-6199
lines changed

package-lock.json

Lines changed: 5272 additions & 5698 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,18 +76,19 @@
7676
"@adobe/helix-status": "10.1.5",
7777
"@adobe/helix-universal": "5.4.0",
7878
"@adobe/helix-universal-logger": "3.0.28",
79-
"@adobe/spacecat-shared-data-access": "2.100.1",
79+
"@adobe/spacecat-shared-data-access": "2.105.0",
8080
"@adobe/spacecat-shared-google-client": "1.4.63",
8181
"@adobe/spacecat-shared-gpt-client": "1.6.16",
8282
"@adobe/spacecat-shared-http-utils": "1.19.4",
8383
"@adobe/spacecat-shared-ims-client": "1.11.9",
8484
"@adobe/spacecat-shared-rum-api-client": "2.40.6",
85-
"@adobe/spacecat-shared-scrape-client": "2.3.7",
86-
"@adobe/spacecat-shared-slack-client": "1.5.33",
87-
"@adobe/spacecat-shared-utils": "1.90.1",
88-
"@aws-sdk/client-cloudwatch-logs": "3.985.0",
85+
"@adobe/spacecat-shared-scrape-client": "2.5.0",
86+
"@adobe/spacecat-shared-slack-client": "1.6.0",
87+
"@adobe/spacecat-shared-utils": "1.91.0",
8988
"@aws-sdk/client-lambda": "3.985.0",
9089
"@aws-sdk/client-sqs": "3.985.0",
90+
"@aws-sdk/client-s3": "3.985.0",
91+
"@aws-sdk/client-cloudwatch-logs": "3.985.0",
9192
"@aws-sdk/credential-provider-node": "3.972.6",
9293
"aws-xray-sdk": "3.12.0",
9394
"cheerio": "1.2.0",
@@ -98,7 +99,7 @@
9899
},
99100
"devDependencies": {
100101
"@adobe/eslint-config-helix": "3.0.17",
101-
"@adobe/helix-deploy": "13.2.11",
102+
"@adobe/helix-deploy": "13.2.12",
102103
"@adobe/helix-universal": "5.4.0",
103104
"@adobe/helix-universal-devserver": "1.1.145",
104105
"@adobe/semantic-release-coralogix": "1.1.40",
@@ -134,7 +135,7 @@
134135
},
135136
"overrides": {
136137
"@adobe/fetch": "4.2.3",
137-
"@adobe/spacecat-shared-data-access": "2.100.1"
138+
"@adobe/spacecat-shared-data-access": "2.105.0"
138139
},
139140
"lint-staged": {
140141
"*.js": "eslint",

src/index.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import {
2020
badRequest,
2121
} from '@adobe/spacecat-shared-http-utils';
2222
import { imsClientWrapper } from '@adobe/spacecat-shared-ims-client';
23-
import { isNonEmptyObject, sqsEventAdapter } from '@adobe/spacecat-shared-utils';
23+
import { isNonEmptyObject, sqsEventAdapter, sqsWrapper } from '@adobe/spacecat-shared-utils';
2424

2525
import { runOpportunityStatusProcessor as opportunityStatusProcessor } from './tasks/opportunity-status-processor/handler.js';
2626
import { runDisableImportAuditProcessor as disableImportAuditProcessor } from './tasks/disable-import-audit-processor/handler.js';
@@ -86,13 +86,15 @@ async function processTask(message, context) {
8686

8787
const runSQS = wrap(processTask)
8888
.with(dataAccess)
89+
.with(sqsWrapper)
8990
.with(sqsEventAdapter)
9091
.with(imsClientWrapper)
9192
.with(secrets, { name: getSecretName })
9293
.with(helixStatus);
9394

9495
const runDirect = wrap(processTask)
9596
.with(dataAccess)
97+
.with(sqsWrapper)
9698
.with(imsClientWrapper)
9799
.with(secrets, { name: getSecretName })
98100
.with(helixStatus);

src/tasks/cwv-demo-suggestions-processor/handler.js

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import fs from 'fs';
1414
import path from 'path';
1515
import { isNonEmptyArray } from '@adobe/spacecat-shared-utils';
16+
import { ok } from '@adobe/spacecat-shared-http-utils';
1617

1718
import { say } from '../../utils/slack-utils.js';
1819

@@ -299,32 +300,32 @@ export async function runCwvDemoSuggestionsProcessor(message, context) {
299300

300301
try {
301302
if (!profile || profile !== DEMO) {
302-
return {
303+
return ok({
303304
message: 'CWV processing skipped - not a demo profile',
304305
reason: 'non-demo-profile',
305306
profile,
306307
suggestionsAdded: 0,
307-
};
308+
});
308309
}
309310

310311
const site = await Site.findById(siteId);
311312
if (!site) {
312313
log.error(`Site not found for siteId: ${siteId}`);
313-
return {
314+
return ok({
314315
message: 'Site not found',
315316
suggestionsAdded: 0,
316-
};
317+
});
317318
}
318319

319320
const opportunities = await site.getOpportunities();
320321
const cwvOpportunities = opportunities.filter((opp) => opp.getType() === 'cwv');
321322

322323
if (cwvOpportunities.length === 0) {
323324
await say(env, log, slackContext, 'No CWV opportunities found for site, skipping generic suggestions');
324-
return {
325+
return ok({
325326
message: 'No CWV opportunities found',
326327
suggestionsAdded: 0,
327-
};
328+
});
328329
}
329330

330331
const suggestionsUpdated = await processCWVOpportunity(
@@ -334,18 +335,18 @@ export async function runCwvDemoSuggestionsProcessor(message, context) {
334335
slackContext,
335336
);
336337

337-
return {
338+
return ok({
338339
message: 'CWV demo suggestions processor completed',
339340
opportunitiesProcessed: 1,
340341
suggestionsAdded: suggestionsUpdated,
341-
};
342+
});
342343
} catch (error) {
343344
log.error('Error in CWV demo suggestions processor:', error);
344-
return {
345+
return ok({
345346
message: 'CWV demo suggestions processor completed with errors',
346347
error: error.message,
347348
suggestionsAdded: 0,
348-
};
349+
});
349350
}
350351
}
351352

src/tasks/demo-url-processor/handler.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,13 @@ export async function runDemoUrlProcessor(message, context) {
9191
}
9292

9393
const demoUrl = `${experienceUrl}?organizationId=${organizationId}#/@${imsTenantId}/sites-optimizer/sites/${siteId}/home`;
94-
const slackMessage = `:white_check_mark: Onboarding setup completed successfully for the site ${siteUrl}!\nAccess your environment here: ${demoUrl}`;
94+
const slackMessage = `:white_check_mark: Onboarding setup completed for the site ${siteUrl}!\nAccess your environment here: ${demoUrl}`;
9595

9696
if (slackContext) {
9797
await say(env, log, slackContext, slackMessage);
9898
}
9999

100-
log.info(`Onboarding setup completed successfully for the site ${siteUrl}! Access your environment here: ${demoUrl}`);
100+
log.info(`Onboarding setup completed for the site ${siteUrl}! Access your environment here: ${demoUrl}`);
101101

102102
return ok({ message: 'Demo URL processor completed' });
103103
}

0 commit comments

Comments
 (0)