Skip to content

Commit a16bc2d

Browse files
committed
Refactoring
1 parent b3545a6 commit a16bc2d

18 files changed

Lines changed: 87 additions & 231 deletions

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
composer.lock
22
composer.phar
33
/vendor/
4-
/build/logs/*

.travis.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
sudo: false
22
language: php
33
php:
4+
- 7.1
45
- 7.0
56
- 5.6
67
- hhvm
78
matrix:
89
allow_failures:
910
- php: hhvm
10-
before_install:
11-
- composer selfupdate
1211
install:
1312
- composer install
1413
after_script:

README.md

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
[![Build Status](https://travis-ci.org/VIPnytt/X-Robots-Tag-parser.svg?branch=master)](https://travis-ci.org/VIPnytt/X-Robots-Tag-parser)
2-
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/VIPnytt/X-Robots-Tag-parser/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/VIPnytt/X-Robots-Tag-parser/?branch=master)
3-
[![Code Climate](https://codeclimate.com/github/VIPnytt/X-Robots-Tag-parser/badges/gpa.svg)](https://codeclimate.com/github/VIPnytt/X-Robots-Tag-parser)
4-
[![Test Coverage](https://codeclimate.com/github/VIPnytt/X-Robots-Tag-parser/badges/coverage.svg)](https://codeclimate.com/github/VIPnytt/X-Robots-Tag-parser/coverage)
5-
[![License](https://poser.pugx.org/VIPnytt/X-Robots-Tag-parser/license)](https://github.com/VIPnytt/X-Robots-Tag-parser/blob/master/LICENSE)
6-
[![Packagist](https://img.shields.io/packagist/v/vipnytt/x-robots-tag-parser.svg)](https://packagist.org/packages/vipnytt/x-robots-tag-parser)
7-
[![Join the chat at https://gitter.im/VIPnytt/X-Robots-Tag-parser](https://badges.gitter.im/VIPnytt/X-Robots-Tag-parser.svg)](https://gitter.im/VIPnytt/X-Robots-Tag-parser)
1+
[![Build Status](https://travis-ci.org/VIPnytt/RobotsTagParser.svg?branch=master)](https://travis-ci.org/VIPnytt/RobotsTagParser)
2+
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/VIPnytt/RobotsTagParser/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/VIPnytt/RobotsTagParser/?branch=master)
3+
[![Code Climate](https://codeclimate.com/github/VIPnytt/RobotsTagParser/badges/gpa.svg)](https://codeclimate.com/github/VIPnytt/RobotsTagParser)
4+
[![Test Coverage](https://codeclimate.com/github/VIPnytt/RobotsTagParser/badges/coverage.svg)](https://codeclimate.com/github/VIPnytt/RobotsTagParser/coverage)
5+
[![License](https://poser.pugx.org/VIPnytt/RobotsTagParser/license)](https://github.com/VIPnytt/RobotsTagParser/blob/master/LICENSE)
6+
[![Packagist](https://img.shields.io/packagist/v/vipnytt/robotstagparser.svg)](https://packagist.org/packages/vipnytt/robotstagparser)
7+
[![Gitter](https://badges.gitter.im/VIPnytt/RobotsTagParser.svg)](https://gitter.im/VIPnytt/RobotsTagParser)
88

99
# X-Robots-Tag HTTP header parser
1010
PHP class to parse X-Robots-Tag HTTP headers according to [Google X-Robots-Tag HTTP header specifications](https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag#using-the-x-robots-tag-http-header).
@@ -23,7 +23,7 @@ The library is available via [Composer](https://getcomposer.org). Add this to yo
2323
```json
2424
{
2525
"require": {
26-
"vipnytt/x-robots-tag-parser": "~0.2"
26+
"vipnytt/robotstagparser": "~0.3"
2727
}
2828
}
2929
```
@@ -94,16 +94,16 @@ $parser = new XRobotsTagParser('myUserAgent', $headers);
9494
$array = $parser->export();
9595
```
9696

97-
## Supported directives:
98-
- [x] ````all```` - There are no restrictions for indexing or serving.
99-
- [x] ````none```` - Equivalent to ````noindex```` and ````nofollow````.
100-
- [x] ````noindex```` - Do not show this page in search results and do not show a "Cached" link in search results.
101-
- [x] ````nofollow```` - Do not follow the links on this page.
102-
- [x] ````noarchive```` - Do not show a "Cached" link in search results.
103-
- [x] ````nosnippet```` - Do not show a snippet in the search results for this page.
104-
- [x] ````noodp```` - Do not use metadata from the [Open Directory project](http://dmoz.org/) for titles or snippets shown for this page.
105-
- [x] ````notranslate```` - Do not offer translation of this page in search results.
106-
- [x] ````noimageindex```` - Do not index images on this page.
107-
- [x] ````unavailable_after```` - Do not show this page in search results after the specified date/time.
97+
## Directives:
98+
- [x] `all` - There are no restrictions for indexing or serving.
99+
- [x] `none` - Equivalent to `noindex` and `nofollow`.
100+
- [x] `noindex` - Do not show this page in search results and do not show a "Cached" link in search results.
101+
- [x] `nofollow` - Do not follow the links on this page.
102+
- [x] `noarchive` - Do not show a "Cached" link in search results.
103+
- [x] `nosnippet` - Do not show a snippet in the search results for this page.
104+
- [x] `noodp` - Do not use metadata from the [Open Directory project](http://dmoz.org/) for titles or snippets shown for this page.
105+
- [x] `notranslate` - Do not offer translation of this page in search results.
106+
- [x] `noimageindex` - Do not index images on this page.
107+
- [x] `unavailable_after` - Do not show this page in search results after the specified date/time.
108108

109109
Source: [https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag](https://developers.google.com/webmasters/control-crawl-index/docs/robots_meta_tag#valid-indexing--serving-directives)

build/.gitkeep

Whitespace-only changes.

build/logs/.gitkeep

Whitespace-only changes.

composer.json

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"name": "vipnytt/x-robots-tag-parser",
2+
"name": "vipnytt/robotstagparser",
33
"description": "X-Robots-Tag HTTP header parser class",
44
"type": "library",
55
"keywords": [
@@ -13,7 +13,7 @@
1313
"Spider",
1414
"Web-Crawler"
1515
],
16-
"homepage": "https://github.com/VIPnytt/X-Robots-Tag-parser",
16+
"homepage": "https://github.com/VIPnytt/RobotsTagParser",
1717
"license": "MIT",
1818
"authors": [
1919
{
@@ -29,21 +29,24 @@
2929
],
3030
"support": {
3131
"email": "vipnytt@gmail.com",
32-
"issues": "https://github.com/VIPnytt/X-Robots-Tag-parser/issues",
33-
"wiki": "https://github.com/VIPnytt/X-Robots-Tag-parser/wiki",
34-
"source": "https://github.com/VIPnytt/X-Robots-Tag-parser",
35-
"docs": "https://github.com/VIPnytt/X-Robots-Tag-parser/wiki"
32+
"issues": "https://github.com/VIPnytt/RobotsTagParser/issues",
33+
"wiki": "https://github.com/VIPnytt/RobotsTagParser/wiki",
34+
"source": "https://github.com/VIPnytt/RobotsTagParser",
35+
"docs": "https://github.com/VIPnytt/RobotsTagParser/wiki"
3636
},
3737
"require": {
3838
"php": ">=5.6",
3939
"guzzlehttp/guzzle": "6.*",
40-
"vipnytt/useragentparser": "~0.2",
40+
"vipnytt/useragentparser": "~1.0",
4141
"ext-mbstring": "*"
4242
},
4343
"require-dev": {
4444
"phpunit/phpunit": ">=4.0",
4545
"codeclimate/php-test-reporter": ">=0.2.0"
4646
},
47+
"suggest": {
48+
"vipnytt/robotstxtparser": "Robots.txt parser."
49+
},
4750
"autoload": {
4851
"psr-4": {
4952
"vipnytt\\": "src/"

src/XRobotsTagParser.php

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
use vipnytt\XRobotsTagParser\Directives;
55
use vipnytt\XRobotsTagParser\Exceptions\XRobotsTagParserException;
6-
use vipnytt\XRobotsTagParser\Rebuild;
76
use vipnytt\XRobotsTagParser\RobotsTagInterface;
87

98
/**
@@ -16,7 +15,7 @@
1615
* @author Jan-Petter Gundersen (europe.jpg@gmail.com)
1716
*
1817
* Project:
19-
* @link https://github.com/VIPnytt/X-Robots-Tag-parser
18+
* @link https://github.com/VIPnytt/RobotsTagParser
2019
* @license https://opensource.org/licenses/MIT MIT license
2120
*
2221
* Specification:
@@ -29,14 +28,14 @@ class XRobotsTagParser implements RobotsTagInterface
2928
*
3029
* @var string
3130
*/
32-
protected $userAgent = '';
31+
protected $userAgent = self::USER_AGENT;
3332

3433
/**
3534
* User-Agent for rule selection
3635
*
3736
* @var string
3837
*/
39-
protected $userAgentMatch = '';
38+
protected $userAgentMatch = self::USER_AGENT;
4039

4140
/**
4241
* Current rule
@@ -63,12 +62,12 @@ class XRobotsTagParser implements RobotsTagInterface
6362
* Constructor
6463
*
6564
* @param string $userAgent
66-
* @param array $headers
65+
* @param array|null $headers
6766
*/
68-
public function __construct($userAgent = '', $headers = null)
67+
public function __construct($userAgent = self::USER_AGENT, $headers = null)
6968
{
7069
$this->userAgent = $userAgent;
71-
if (isset($headers)) {
70+
if (!empty($headers)) {
7271
$this->parse($headers);
7372
}
7473
}
@@ -102,13 +101,13 @@ protected function detectDirectives()
102101
{
103102
$directives = array_map('trim', mb_split(',', $this->currentRule));
104103
$pair = array_map('trim', mb_split(':', $directives[0], 2));
105-
if (count($pair) == 2 && !in_array($pair[0], array_keys(self::DIRECTIVES))) {
104+
if (count($pair) == 2 && !in_array($pair[0], self::DIRECTIVES)) {
106105
$this->currentUserAgent = $pair[0];
107106
$directives[0] = $pair[1];
108107
}
109108
foreach ($directives as $rule) {
110109
$directive = trim(mb_split(':', $rule, 2)[0]);
111-
if (in_array($directive, array_keys(self::DIRECTIVES))) {
110+
if (in_array($directive, self::DIRECTIVES)) {
112111
$this->addRule($directive);
113112
}
114113
}
@@ -145,7 +144,7 @@ protected function addRule($directive)
145144
protected function cleanup()
146145
{
147146
$this->currentRule = '';
148-
$this->currentUserAgent = '';
147+
$this->currentUserAgent = self::USER_AGENT;
149148
}
150149

151150
/**
@@ -156,32 +155,26 @@ protected function cleanup()
156155
protected function matchUserAgent()
157156
{
158157
$userAgentParser = new UserAgentParser($this->userAgent);
159-
$match = $userAgentParser->match(array_keys($this->rules));
160-
$this->userAgentMatch = ($match !== false) ? $match : '';
158+
$this->userAgentMatch = (($match = $userAgentParser->getMostSpecific(array_keys($this->rules))) !== false) ? $match : self::USER_AGENT;
161159
return $this->userAgentMatch;
162160
}
163161

164162
/**
165163
* Return all applicable rules
166164
*
167-
* @param bool $raw
168165
* @return array
169166
*/
170-
public function getRules($raw = false)
167+
public function getRules()
171168
{
172169
$rules = [];
173170
// Default UserAgent
174-
if (isset($this->rules[''])) {
175-
$rules = array_merge($rules, $this->rules['']);
171+
if (isset($this->rules[self::USER_AGENT])) {
172+
$rules = array_merge($rules, $this->rules[self::USER_AGENT]);
176173
}
177174
// Matching UserAgent
178175
if (isset($this->rules[$this->userAgentMatch])) {
179176
$rules = array_merge($rules, $this->rules[$this->userAgentMatch]);
180177
}
181-
if (!$raw) {
182-
$rebuild = new Rebuild($rules);
183-
$rules = $rebuild->getResult();
184-
}
185178
// Result
186179
return $rules;
187180
}
@@ -195,20 +188,4 @@ public function export()
195188
{
196189
return $this->rules;
197190
}
198-
199-
/**
200-
* Get the meaning of an Directive
201-
*
202-
* @param string $directive
203-
* @return string
204-
* @throws XRobotsTagParserException
205-
*/
206-
public function getDirectiveMeaning($directive)
207-
{
208-
$directive = mb_strtolower($directive);
209-
if (!in_array($directive, array_keys(self::DIRECTIVES))) {
210-
throw new XRobotsTagParserException('Unknown directive');
211-
}
212-
return self::DIRECTIVES[$directive];
213-
}
214191
}

src/XRobotsTagParser/Adapters/TextString.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class TextString extends XRobotsTagParser
2020
*/
2121
public function __construct($string, $userAgent = '')
2222
{
23-
$array = array_map('trim', mb_split('\r\n|\n|\r', $string));
23+
$array = array_map('trim', mb_split('\r\n|\r|\n', $string));
2424
parent::__construct($userAgent, $array);
2525
}
2626
}

src/XRobotsTagParser/Adapters/Url.php

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,18 @@ class Url extends XRobotsTagParser\Adapters\GuzzleHttp
1818
* @var array
1919
*/
2020
protected $config = [
21+
'allow_redirects' => [
22+
'referer' => true,
23+
'strict' => false,
24+
],
25+
'connect_timeout' => 30,
26+
'decode_content' => true,
2127
'headers' => [
22-
'User-Agent' => 'X-Robots-Tag-parser-VIPnytt/1.0 (+https://github.com/VIPnytt/X-Robots-Tag-parser/blob/master/README.md)',
28+
'user-agent' => 'XRobotsTagParser-VIPnytt/1.0 (+https://github.com/VIPnytt/RobotsTagParser/blob/master/README.md)',
2329
],
2430
'http_errors' => false,
25-
'timeout' => 30,
31+
'timeout' => 120,
32+
'verify' => true,
2633
];
2734

2835
/**

src/XRobotsTagParser/Directives/DirectiveInterface.php

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
<?php
22
namespace vipnytt\XRobotsTagParser\Directives;
33

4-
/**
5-
* Interface DirectiveInterface
6-
*
7-
* @package vipnytt\XRobotsTagParser\Directives
8-
*/
94
interface DirectiveInterface
105
{
116
/**

0 commit comments

Comments
 (0)