Skip to content

Commit 9b56e4b

Browse files
committed
User-Agent and GuzzleHttp bug fixes
1 parent 1084b15 commit 9b56e4b

1 file changed

Lines changed: 33 additions & 17 deletions

File tree

src/XRobotsTagParser.php

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
class XRobotsTagParser
2424
{
25-
const HEADER_RULE_IDENTIFIER = 'x-robots-tag';
25+
const HEADER_RULE_IDENTIFIER = 'X-Robots-Tag';
2626
const USERAGENT_DEFAULT = '';
2727

2828
const DIRECTIVE_ALL = 'all';
@@ -38,6 +38,7 @@ class XRobotsTagParser
3838

3939
protected $url = '';
4040
protected $userAgent = self::USERAGENT_DEFAULT;
41+
protected $userAgentMatch = self::USERAGENT_DEFAULT;
4142
protected $config = [];
4243

4344
protected $headers = [];
@@ -60,16 +61,15 @@ public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, array $c
6061
if (!filter_var($this->url, FILTER_VALIDATE_URL)) {
6162
throw new XRobotsTagParserException('Invalid URL provided');
6263
}
64+
// User-Agent for HTTP request
65+
$this->userAgent = $userAgent;
6366
// Set any optional configuration options
6467
$this->config = $config;
65-
if (isset($this->config['headers']) && is_array($this->config['headers'])) {
66-
$this->headers = $this->config['headers'];
67-
}
68-
// Set User-Agent
69-
$parser = new UserAgentParser($userAgent);
70-
$this->userAgent = $parser->match(array_keys($this->rules), self::USERAGENT_DEFAULT);
7168
// Parse rules
7269
$this->parse();
70+
// User-Agent matching rules
71+
$parser = new UserAgentParser($this->userAgent);
72+
$this->userAgentMatch = $parser->match(array_keys($this->rules), self::USERAGENT_DEFAULT);
7373
}
7474

7575
/**
@@ -79,12 +79,10 @@ public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, array $c
7979
*/
8080
protected function parse()
8181
{
82-
if (empty($this->headers)) {
83-
$this->headers = $this->getHeaders();
84-
}
82+
$this->headers = $this->selectHeaderSource();
8583
foreach ($this->headers as $header) {
8684
$parts = array_map('trim', explode(':', mb_strtolower($header), 2));
87-
if (count($parts) < 2 || $parts[0] != self::HEADER_RULE_IDENTIFIER) {
85+
if (count($parts) < 2 || $parts[0] != mb_strtolower(self::HEADER_RULE_IDENTIFIER)) {
8886
// Header is not a rule
8987
continue;
9088
}
@@ -93,6 +91,20 @@ protected function parse()
9391
}
9492
}
9593

94+
/**
95+
* Select HTTP header source
96+
*
97+
* @return array
98+
*/
99+
protected function selectHeaderSource()
100+
{
101+
if (isset($this->config['headers']) && is_array($this->config['headers'])) {
102+
return $this->config['headers'];
103+
}
104+
// No provided HTTP headers
105+
return $this->getHeaders();
106+
}
107+
96108
/**
97109
* Request the HTTP headers from an URL
98110
*
@@ -107,7 +119,11 @@ protected function getHeaders()
107119
}
108120
$client = new GuzzleHttp\Client();
109121
$res = $client->head($this->url, $this->config['guzzle']);
110-
return $res->getHeaders();
122+
$headers = [];
123+
foreach ($res->getHeader(self::HEADER_RULE_IDENTIFIER) as $name => $values) {
124+
$headers[] = $name . ': ' . implode(' ', $values) . "\r\n";
125+
}
126+
return $headers;
111127
} catch (GuzzleHttp\Exception\TransferException $e) {
112128
throw new XRobotsTagParserException($e->getMessage());
113129
}
@@ -129,7 +145,7 @@ protected function detectDirectives()
129145
foreach ($directives as $rule) {
130146
$directive = trim(explode(':', $rule, 2)[0]);
131147
if (in_array($directive, array_keys($this->directiveClasses()))) {
132-
$this->addRule($this->directiveClasses()[$directive]);
148+
$this->addRule($directive);
133149
}
134150
}
135151
$this->cleanup();
@@ -168,7 +184,7 @@ protected function addRule($directive)
168184
if (!isset($this->rules[$this->currentUserAgent])) {
169185
$this->rules[$this->currentUserAgent] = [];
170186
}
171-
$class = __NAMESPACE__ . "\\XRobotsTagParser\\directives\\$directive";
187+
$class = "\\" . __CLASS__ . "\\directives\\" . $this->directiveClasses()[$directive];
172188
$object = new $class($this->currentRule);
173189
if (!$object instanceof XRobotsTagParser\directives\directiveInterface) {
174190
throw new XRobotsTagParserException('Unsupported directive class');
@@ -201,8 +217,8 @@ public function getRules($raw = false)
201217
$rules = array_merge($rules, $this->rules[self::USERAGENT_DEFAULT]);
202218
}
203219
// Matching UserAgent
204-
if (isset($this->rules[$this->userAgent])) {
205-
$rules = array_merge($rules, $this->rules[$this->userAgent]);
220+
if (isset($this->rules[$this->userAgentMatch])) {
221+
$rules = array_merge($rules, $this->rules[$this->userAgentMatch]);
206222
}
207223
if (!$raw) {
208224
$rebuild = new Rebuild($rules);
@@ -234,7 +250,7 @@ public function getDirectiveMeaning($directive)
234250
if (!in_array($directive, array_keys($this->directiveClasses()))) {
235251
throw new XRobotsTagParserException('Unknown directive');
236252
}
237-
$class = "XRobotsTagParser\\directives\\$directive";
253+
$class = "\\" . __CLASS__ . "\\directives\\" . $this->directiveClasses()[$directive];
238254
$object = new $class($this->directiveClasses()[$directive]);
239255
if (!$object instanceof XRobotsTagParser\directives\directiveInterface) {
240256
throw new XRobotsTagParserException('Unsupported directive class');

0 commit comments

Comments
 (0)