@@ -37,7 +37,7 @@ public static function getApp(string $value, array $data = []) : array {
3737 'duckduckgo-favicons-bot ' => 'search ' ,
3838 'coccocbot-image ' => 'search ' ,
3939 'coccocbot-web ' => 'search ' ,
40- 'applebot ' => 'search ' ,
40+ 'applebot ' => 'ai ' ,
4141 'yandexbot ' => 'search ' ,
4242 'mj12bot ' => 'search ' ,
4343 'mail.ru_bot ' => 'search ' ,
@@ -59,7 +59,13 @@ public static function getApp(string $value, array $data = []) : array {
5959 'telegrambot ' => 'feed ' ,
6060 'semrushbot ' => 'crawler ' ,
6161 'mediatoolkitbot ' => 'crawler ' ,
62- 'iploggerbot ' => 'monitor '
62+ 'iploggerbot ' => 'monitor ' ,
63+ 'baiduspider ' => 'search ' ,
64+ 'haosouspider ' => 'search ' ,
65+ 'yisouspider ' => 'search ' ,
66+ '360spider ' => 'search ' ,
67+ 'sogou web spider ' => 'search ' ,
68+ 'bytespider ' => 'crawler '
6369 ];
6470 $ apps = [
6571 'yacybot ' => 'YacyBot ' ,
@@ -94,6 +100,7 @@ public static function getApp(string $value, array $data = []) : array {
94100 'mediatoolkitbot ' => 'MediaToolkitBot ' ,
95101 'cfnetwork ' => 'Apple Core Foundation Network ' ,
96102 'ncsc web check feedback.webcheck@digital.ncsc.gov.uk ' => 'NCSC Web Check ' ,
103+ 'enhanced webcheck feedback@digital.ncsc.gov.uk ' => 'NCSC Enhanced Web Check ' ,
97104 'the national archives uk government web archive: ' => 'UK Government National Archives ' ,
98105 'google-site-verification ' => 'Google Site Verification ' ,
99106 'google-inspectiontool ' => 'Google Inspection Tool ' ,
@@ -115,17 +122,20 @@ public static function getApp(string $value, array $data = []) : array {
115122 'citoid ' => 'Wikimedia Citoid ' ,
116123 'censysinspect ' => 'Censys Inspect ' ,
117124 'googledocs ' => 'Google Docs ' ,
118- 'user-agent: seolyt ' => 'SEOlyt '
125+ 'user-agent: seolyt ' => 'SEOlyt ' ,
126+ 'bytespider ' => 'ByteDance Spider ' ,
127+ 'spider-feedback@bytedance.com ' => 'ByteDance Spider '
119128 ];
120129
121130 $ lower = \mb_strtolower ($ parts [0 ]);
122131 return \array_merge ([
123132 'type ' => 'robot ' ,
124- 'category ' => $ category [$ lower ] ?? (\mb_stripos ($ value , 'crawl ' ) !== false || \mb_stripos ($ value , 'bot ' ) !== false ? 'crawler ' : 'scraper ' ),
125133 'app ' => $ apps [$ lower ] ?? $ parts [0 ],
126134 'appname ' => $ parts [0 ],
127135 'appversion ' => empty ($ parts [1 ]) ? null : $ parts [1 ]
128- ], $ data );
136+ ], $ data , [
137+ 'category ' => $ category [$ lower ] ?? $ data ['category ' ] ?? (\mb_stripos ($ value , 'crawl ' ) !== false || \mb_stripos ($ value , 'bot ' ) !== false ? 'crawler ' : 'scraper ' )
138+ ]);
129139 }
130140 return [];
131141 }
@@ -150,18 +160,7 @@ public static function get() : array {
150160 ]
151161 )),
152162 'crawler ' => function (string $ value ) : array {
153- $ parts = \explode ('/ ' , $ value , 2 );
154- $ map = [
155- 'baiduspider ' => 'search ' ,
156- 'haosouspider ' => 'search ' ,
157- 'yisouspider ' => 'search ' ,
158- '360spider ' => 'search ' ,
159- 'sogou web spider ' => 'search ' ,
160- 'bytespider ' => 'search ' ,
161- ];
162- return self ::getApp ($ value , [
163- 'category ' => $ map [\mb_strtolower ($ parts [0 ])] ?? 'crawler '
164- ]);
163+ return self ::getApp ($ value , ['category ' => 'crawler ' ]);
165164 },
166165 'monitor ' => fn (string $ value ) : array => self ::getApp ($ value , ['category ' => 'monitor ' ]),
167166 'scraper ' => fn (string $ value ) : array => self ::getApp ($ value , ['category ' => 'scraper ' ]),
@@ -253,6 +252,7 @@ public static function get() : array {
253252 'Uptime/ ' => new props ('start ' , $ fn ['monitor ' ]),
254253 'HostTracker/ ' => new props ('start ' , $ fn ['monitor ' ]),
255254 'NCSC Web Check feedback.webcheck@digital.ncsc.gov.uk ' => new props ('exact ' , $ fn ['monitor ' ]),
255+ 'Enhanced WebCheck feedback@digital.ncsc.gov.uk ' => new props ('exact ' , $ fn ['monitor ' ]),
256256 'Pingdom.com ' => new props ('start ' , function (string $ value ) : array {
257257 $ version = \explode ('_ ' , \trim ($ value , '_ ' ));
258258 return [
0 commit comments