-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.js
More file actions
70 lines (67 loc) · 1.77 KB
/
index.js
File metadata and controls
70 lines (67 loc) · 1.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
const URI = require('url');
const Async = require('async');
const cheerio = require('cheerio');
const EventEmitter = require('events');
class Crawler extends EventEmitter {
constructor(options){
super();
Object.assign(this, {
page: 1,
concurrency: 5,
}, options);
this.queue = Async.queue(
this.crawl.bind(this), this.concurrency);
this.queue.drain = () => this.emit('end');
return this;
}
start(url){
this.push(url || this.url);
return this;
}
push(url){
this.queue.push(url, res =>
this.emit('done', res));
return this;
}
request(url, params){
params = Object.assign({
headers: {}
}, URI.parse(url), params);
const {
body,
headers,
protocol,
} = params;
const contentType = headers['content-type'];
return new Promise((resolve, reject) => {
const { request } = require(protocol.slice(0,-1));
const req = request(params, response => {
const buffer = []; response
.on('data', buffer.push.bind(buffer))
.on('end', () => {
response.data = Buffer.concat(buffer);
response.blob = () => response.data
response.text = () => response.data.toString();
response.json = () => JSON.parse(response.text());
resolve(response);
})
});
if(body) req.write(body);
req.end();
});
}
parse($){
const { url } = $;
const title = $('head > title').text();
return { title, url };
}
crawl(url, done){
const { request, parse } = this;
return request(url)
.then(res => res.text())
.then(html => cheerio.load(html))
.then($ => parse($, x => this.emit('commit', x)))
.then(done);
}
}
module.exports = Crawler;