Skip to content

Commit e06d296

Browse files
committed
Added new spiders
- Meta-WebIndexer - Meta-ExternalAds - Meta-ExternalAgent - Meta-ExternalFetcher - OAI-SearchBot - ChatGPT-User - Perplexity-User - PlagAwareBot - DuckAssistBot - CCBot - DataForSeoBot - Gemini-Deep-Research PerplexityBot doesn't appear in https://assets.woltlab.com/spiderlist/typhoon/list.xml (used by RefreshSearchRobotsCronjob prior 6.1), and it's missing in SpiderCollecting since 6.1.
1 parent a6150ef commit e06d296

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed

spiderList/spiderList.xml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1719,8 +1719,59 @@
17191719
<name>GPTBot</name>
17201720
<url>https://openai.com/gptbot</url>
17211721
</spider>
1722+
<spider ident="OAI-SearchBot/">
1723+
<name>OAI-SearchBot</name>
1724+
<url>https://openai.com/searchbot</url>
1725+
</spider>
1726+
<spider ident="ChatGPT-User/">
1727+
<name>ChatGPT-User</name>
1728+
<url>https://openai.com/bot</url>
1729+
</spider>
17221730
<spider ident="PerplexityBot/">
17231731
<name>PerplexityBot</name>
17241732
<url>https://perplexity.ai/perplexitybot</url>
17251733
</spider>
1734+
<spider ident="Perplexity-User/">
1735+
<name>Perplexity-User</name>
1736+
<url>https://perplexity.ai/perplexity-user</url>
1737+
</spider>
1738+
<spider ident="meta-webindexer/">
1739+
<name>Meta-WebIndexer</name>
1740+
<url>https://developers.facebook.com/docs/sharing/webmasters/web-crawlers</url>
1741+
</spider>
1742+
<spider ident="meta-externalads/">
1743+
<name>Meta-ExternalAds</name>
1744+
<url>https://developers.facebook.com/docs/sharing/webmasters/web-crawlers</url>
1745+
</spider>
1746+
<spider ident="meta-externalagent/">
1747+
<name>Meta-ExternalAgent</name>
1748+
<url>https://developers.facebook.com/docs/sharing/webmasters/web-crawlers</url>
1749+
</spider>
1750+
<spider ident="meta-externalfetcher/">
1751+
<name>Meta-ExternalFetcher</name>
1752+
<url>https://developers.facebook.com/docs/sharing/webmasters/web-crawlers</url>
1753+
</spider>
1754+
<spider ident="Bytespider/">
1755+
<name>Bytespider</name>
1756+
</spider>
1757+
<spider ident="PlagAwareBot/">
1758+
<name>PlagAware</name>
1759+
<url>https://www.plagaware.com/bot</url>
1760+
</spider>
1761+
<spider ident="DuckAssistBot/">
1762+
<name>DuckAssistBot</name>
1763+
<url>https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot</url>
1764+
</spider>
1765+
<spider ident="CCBot/">
1766+
<name>Common Crawl Bot</name>
1767+
<url>https://commoncrawl.org/faq</url>
1768+
</spider>
1769+
<spider ident="DataForSeoBot/">
1770+
<name>DataForSEO Link Bot</name>
1771+
<url>https://dataforseo.com/dataforseo-bot</url>
1772+
</spider>
1773+
<spider ident="Gemini-Deep-Research/">
1774+
<name>Gemini Deep Research</name>
1775+
<url>https://gemini.google/overview/deep-research/</url>
1776+
</spider>
17261777
</data>

wcfsetup/install/files/lib/event/spider/SpiderCollecting.class.php

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,56 @@ public function __construct()
641641
);
642642
$this->register(new Spider('360Spider', '360Spider'));
643643
$this->register(new Spider('GPTBot', 'GPTBot', 'https://openai.com/gptbot'));
644+
$this->register(new Spider('OAI-SearchBot', 'OAI-SearchBot', 'https://openai.com/searchbot'));
645+
$this->register(new Spider('ChatGPT-User', 'ChatGPT-User', 'https://openai.com/bot'));
646+
$this->register(new Spider('PerplexityBot', 'PerplexityBot', 'https://perplexity.ai/perplexitybot'));
647+
$this->register(new Spider('Perplexity-User', 'Perplexity-User', 'https://perplexity.ai/perplexity-user'));
648+
$this->register(
649+
new Spider(
650+
'meta-webindexer',
651+
'Meta-WebIndexer',
652+
'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers'
653+
)
654+
);
655+
$this->register(
656+
new Spider(
657+
'meta-externalads',
658+
'Meta-ExternalAds',
659+
'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers'
660+
)
661+
);
662+
$this->register(
663+
new Spider(
664+
'meta-externalagent',
665+
'Meta-ExternalAgent',
666+
'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers'
667+
)
668+
);
669+
$this->register(
670+
new Spider(
671+
'meta-externalfetcher',
672+
'Meta-ExternalFetcher',
673+
'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers'
674+
)
675+
);
676+
$this->register(new Spider('Bytespider', 'Bytespider'));
677+
$this->register(new Spider('PlagAwareBot', 'PlagAware', 'https://www.plagaware.com/bot'));
678+
$this->register(
679+
new Spider(
680+
'DuckAssistBot',
681+
'DuckAssistBot',
682+
'https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot'
683+
)
684+
);
685+
$this->register(new Spider('CCBot', 'Common Crawl Bot', 'https://commoncrawl.org/faq'));
686+
$this->register(new Spider('DataForSeoBot', 'DataForSEO Link Bot', 'https://dataforseo.com/dataforseo-bot'));
687+
$this->register(
688+
new Spider(
689+
'Gemini-Deep-Research',
690+
'Gemini Deep Research',
691+
'https://gemini.google/overview/deep-research/'
692+
)
693+
);
644694
}
645695

646696
/**

0 commit comments

Comments
 (0)