2222
2323class XRobotsTagParser
2424{
25- const HEADER_RULE_IDENTIFIER = 'x-robots-tag ' ;
25+ const HEADER_RULE_IDENTIFIER = 'X-Robots-Tag ' ;
2626 const USERAGENT_DEFAULT = '' ;
2727
2828 const DIRECTIVE_ALL = 'all ' ;
@@ -38,6 +38,7 @@ class XRobotsTagParser
3838
3939 protected $ url = '' ;
4040 protected $ userAgent = self ::USERAGENT_DEFAULT ;
41+ protected $ userAgentMatch = self ::USERAGENT_DEFAULT ;
4142 protected $ config = [];
4243
4344 protected $ headers = [];
@@ -60,16 +61,15 @@ public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, array $c
6061 if (!filter_var ($ this ->url , FILTER_VALIDATE_URL )) {
6162 throw new XRobotsTagParserException ('Invalid URL provided ' );
6263 }
64+ // User-Agent for HTTP request
65+ $ this ->userAgent = $ userAgent ;
6366 // Set any optional configuration options
6467 $ this ->config = $ config ;
65- if (isset ($ this ->config ['headers ' ]) && is_array ($ this ->config ['headers ' ])) {
66- $ this ->headers = $ this ->config ['headers ' ];
67- }
68- // Set User-Agent
69- $ parser = new UserAgentParser ($ userAgent );
70- $ this ->userAgent = $ parser ->match (array_keys ($ this ->rules ), self ::USERAGENT_DEFAULT );
7168 // Parse rules
7269 $ this ->parse ();
70+ // User-Agent matching rules
71+ $ parser = new UserAgentParser ($ this ->userAgent );
72+ $ this ->userAgentMatch = $ parser ->match (array_keys ($ this ->rules ), self ::USERAGENT_DEFAULT );
7373 }
7474
7575 /**
@@ -79,12 +79,10 @@ public function __construct($url, $userAgent = self::USERAGENT_DEFAULT, array $c
7979 */
8080 protected function parse ()
8181 {
82- if (empty ($ this ->headers )) {
83- $ this ->headers = $ this ->getHeaders ();
84- }
82+ $ this ->headers = $ this ->selectHeaderSource ();
8583 foreach ($ this ->headers as $ header ) {
8684 $ parts = array_map ('trim ' , explode (': ' , mb_strtolower ($ header ), 2 ));
87- if (count ($ parts ) < 2 || $ parts [0 ] != self ::HEADER_RULE_IDENTIFIER ) {
85+ if (count ($ parts ) < 2 || $ parts [0 ] != mb_strtolower ( self ::HEADER_RULE_IDENTIFIER ) ) {
8886 // Header is not a rule
8987 continue ;
9088 }
@@ -93,6 +91,20 @@ protected function parse()
9391 }
9492 }
9593
94+ /**
95+ * Select HTTP header source
96+ *
97+ * @return array
98+ */
99+ protected function selectHeaderSource ()
100+ {
101+ if (isset ($ this ->config ['headers ' ]) && is_array ($ this ->config ['headers ' ])) {
102+ return $ this ->config ['headers ' ];
103+ }
104+ // No provided HTTP headers
105+ return $ this ->getHeaders ();
106+ }
107+
96108 /**
97109 * Request the HTTP headers from an URL
98110 *
@@ -107,7 +119,11 @@ protected function getHeaders()
107119 }
108120 $ client = new GuzzleHttp \Client ();
109121 $ res = $ client ->head ($ this ->url , $ this ->config ['guzzle ' ]);
110- return $ res ->getHeaders ();
122+ $ headers = [];
123+ foreach ($ res ->getHeader (self ::HEADER_RULE_IDENTIFIER ) as $ name => $ values ) {
124+ $ headers [] = $ name . ': ' . implode (' ' , $ values ) . "\r\n" ;
125+ }
126+ return $ headers ;
111127 } catch (GuzzleHttp \Exception \TransferException $ e ) {
112128 throw new XRobotsTagParserException ($ e ->getMessage ());
113129 }
@@ -129,7 +145,7 @@ protected function detectDirectives()
129145 foreach ($ directives as $ rule ) {
130146 $ directive = trim (explode (': ' , $ rule , 2 )[0 ]);
131147 if (in_array ($ directive , array_keys ($ this ->directiveClasses ()))) {
132- $ this ->addRule ($ this -> directiveClasses ()[ $ directive] );
148+ $ this ->addRule ($ directive );
133149 }
134150 }
135151 $ this ->cleanup ();
@@ -168,7 +184,7 @@ protected function addRule($directive)
168184 if (!isset ($ this ->rules [$ this ->currentUserAgent ])) {
169185 $ this ->rules [$ this ->currentUserAgent ] = [];
170186 }
171- $ class = __NAMESPACE__ . "\\XRobotsTagParser \\ directives \\$ directive" ;
187+ $ class = "\\" . __CLASS__ . "\\directives \\" . $ this -> directiveClasses ()[ $ directive] ;
172188 $ object = new $ class ($ this ->currentRule );
173189 if (!$ object instanceof XRobotsTagParser \directives \directiveInterface) {
174190 throw new XRobotsTagParserException ('Unsupported directive class ' );
@@ -201,8 +217,8 @@ public function getRules($raw = false)
201217 $ rules = array_merge ($ rules , $ this ->rules [self ::USERAGENT_DEFAULT ]);
202218 }
203219 // Matching UserAgent
204- if (isset ($ this ->rules [$ this ->userAgent ])) {
205- $ rules = array_merge ($ rules , $ this ->rules [$ this ->userAgent ]);
220+ if (isset ($ this ->rules [$ this ->userAgentMatch ])) {
221+ $ rules = array_merge ($ rules , $ this ->rules [$ this ->userAgentMatch ]);
206222 }
207223 if (!$ raw ) {
208224 $ rebuild = new Rebuild ($ rules );
@@ -234,7 +250,7 @@ public function getDirectiveMeaning($directive)
234250 if (!in_array ($ directive , array_keys ($ this ->directiveClasses ()))) {
235251 throw new XRobotsTagParserException ('Unknown directive ' );
236252 }
237- $ class = "XRobotsTagParser \\ directives \\$ directive" ;
253+ $ class = "\\" . __CLASS__ . "\\ directives \\" . $ this -> directiveClasses ()[ $ directive] ;
238254 $ object = new $ class ($ this ->directiveClasses ()[$ directive ]);
239255 if (!$ object instanceof XRobotsTagParser \directives \directiveInterface) {
240256 throw new XRobotsTagParserException ('Unsupported directive class ' );
0 commit comments