Skip to content

Commit d239d0b

Browse files
committed
Add Cycle.js documentation scraper
1 parent 881d8cb commit d239d0b

3 files changed

Lines changed: 134 additions & 0 deletions

File tree

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
module Docs
2+
class Cyclejs
3+
class CleanHtmlFilter < Filter
4+
def call
5+
css('br').remove
6+
7+
css('pre > code').each do |node|
8+
parent = node.parent
9+
if node['class'] && node['class'] =~ /language-(\w+)/
10+
parent['data-language'] = Regexp.last_match(1)
11+
end
12+
parent.content = node.content.strip
13+
end
14+
15+
css('table[style]', 'tr[style]', 'td[style]', 'th[style]').remove_attr('style')
16+
css('img').each do |node|
17+
node['alt'] = node['alt'].presence || ''
18+
end
19+
20+
doc
21+
end
22+
end
23+
end
24+
end
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
module Docs
2+
class Cyclejs
3+
class EntriesFilter < Docs::EntriesFilter
4+
def get_name
5+
title = at_css('h1')
6+
name = title ? title.content.strip : subpath.sub(/\.html\z/, '').titleize
7+
name = 'Cycle.js' if root_page?
8+
name = 'API Reference' if slug == 'api/index'
9+
name
10+
end
11+
12+
def get_type
13+
slug.start_with?('api/') ? 'API' : 'Guide'
14+
end
15+
16+
def additional_entries
17+
css('h2[id], h3[id]').map do |node|
18+
name = node.content.strip
19+
name.sub!(/\A#\s*/, '')
20+
name.sub!(/\s+#\z/, '')
21+
[name, node['id']]
22+
end
23+
end
24+
end
25+
end
26+
end

lib/docs/scrapers/cyclejs.rb

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
require 'redcarpet'
2+
3+
module Docs
4+
class Cyclejs < UrlScraper
5+
self.name = 'Cycle.js'
6+
self.slug = 'cyclejs'
7+
self.type = 'cyclejs'
8+
self.release = '23.1.0'
9+
self.base_url = 'https://cycle.js.org/'
10+
self.root_path = 'index.html'
11+
self.initial_paths = %w(
12+
getting-started.html
13+
model-view-intent.html
14+
streams.html
15+
drivers.html
16+
components.html
17+
basic-examples.html
18+
dialogue.html
19+
releases.html
20+
api/index.html
21+
api/run.html
22+
api/rxjs-run.html
23+
api/most-run.html
24+
api/dom.html
25+
api/html.html
26+
api/http.html
27+
api/history.html
28+
api/isolate.html
29+
api/state.html
30+
)
31+
32+
self.links = {
33+
home: 'https://cycle.js.org/',
34+
code: 'https://github.com/cyclejs/cyclejs'
35+
}
36+
37+
html_filters.push 'cyclejs/clean_html', 'cyclejs/entries'
38+
39+
options[:only_patterns] = [
40+
/\Aindex\.html\z/,
41+
/\Agetting-started\.html\z/,
42+
/\Amodel-view-intent\.html\z/,
43+
/\Astreams\.html\z/,
44+
/\Adrivers\.html\z/,
45+
/\Acomponents\.html\z/,
46+
/\Abasic-examples\.html\z/,
47+
/\Adialogue\.html\z/,
48+
/\Areleases\.html\z/,
49+
/\Aapi\//
50+
]
51+
52+
options[:attribution] = <<-HTML
53+
&copy; 2014&ndash;present Cycle.js contributors.<br>
54+
Licensed under the MIT License.
55+
HTML
56+
57+
def get_latest_version(opts)
58+
get_npm_version('@cycle/dom', opts)
59+
end
60+
61+
private
62+
63+
def parse(response)
64+
document = Parser.new(response.body).html
65+
markdown = document.at_css('script#markdown')
66+
67+
return super unless markdown
68+
69+
html = markdown_renderer.render(markdown.content.strip)
70+
title = document.at_css('title').try(:content).try(:strip)
71+
[Parser.new("<html><head><title>#{title}</title></head><body>#{html}</body></html>").html, title]
72+
end
73+
74+
def markdown_renderer
75+
@markdown_renderer ||= Redcarpet::Markdown.new(
76+
Redcarpet::Render::HTML.new(with_toc_data: true),
77+
autolink: true,
78+
fenced_code_blocks: true,
79+
no_intra_emphasis: true,
80+
tables: true
81+
)
82+
end
83+
end
84+
end

0 commit comments

Comments
 (0)