Skip to content

Commit 7b0c412

Browse files
authored
Merge pull request #2685 from mcagriardic/add-polars-documentation
Add Polars documentation
2 parents 766f359 + 59e038a commit 7b0c412

6 files changed

Lines changed: 155 additions & 0 deletions

File tree

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
module Docs
2+
class Polars
3+
class CleanHtmlFilter < Filter
4+
def call
5+
# Remove pydata-sphinx-theme chrome that survives the container extraction
6+
# or sits inside the article (sidebars, in-page TOC, prev/next nav, footer).
7+
css(
8+
'.bd-sidebar-primary',
9+
'.bd-sidebar-secondary',
10+
'.bd-toc',
11+
'.bd-header-article',
12+
'.prev-next-area',
13+
'.prev-next-footer',
14+
'.bd-footer',
15+
'.headerlink',
16+
'form'
17+
).remove
18+
19+
# Drop banner/logo imagery on the landing page.
20+
css('img').remove if root_page?
21+
22+
# Make sure every code block is tagged so Prism highlights it as Python.
23+
css('.highlight pre').each do |node|
24+
node.content = node.content
25+
node['data-language'] = 'python'
26+
end
27+
28+
doc
29+
end
30+
end
31+
end
32+
end

lib/docs/filters/polars/entries.rb

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
module Docs
2+
class Polars
3+
class EntriesFilter < Docs::EntriesFilter
4+
# Map the leading path segment of a reference page to a human readable
5+
# type. The Polars reference is laid out as <section>/... under the base
6+
# url (e.g. dataframe/api/polars.DataFrame.count.html). Top-level members
7+
# (plain functions, datatypes, IO, config, ...) instead live flat under
8+
# api/ and are classified by name in #classify_api.
9+
SECTION_TYPES = {
10+
'dataframe' => 'DataFrame',
11+
'lazyframe' => 'LazyFrame',
12+
'series' => 'Series',
13+
'expressions' => 'Expressions',
14+
'functions' => 'Functions',
15+
'selectors' => 'Selectors',
16+
'datatypes' => 'Data Types',
17+
'datatype_expr' => 'Data Types',
18+
'config' => 'Config',
19+
'io' => 'Input/output',
20+
'sql' => 'SQL',
21+
'exceptions' => 'Exceptions',
22+
'testing' => 'Testing',
23+
'catalog' => 'Catalog',
24+
'metadata' => 'Metadata',
25+
'schema' => 'Schema',
26+
'plugins' => 'Plugins'
27+
}.freeze
28+
29+
def get_name
30+
name = at_css('h1').content.strip
31+
# This runs before clean_html removes the headerlink, so strip its
32+
# anchor character off the heading.
33+
name.sub! %r{\s*[#\u{00B6}]+\s*\z}, ''
34+
name
35+
end
36+
37+
def get_type
38+
return 'Manual' if root_page?
39+
segment = slug.split('/').first
40+
return classify_api(get_name) if segment == 'api'
41+
SECTION_TYPES[segment] || 'Manual'
42+
end
43+
44+
private
45+
46+
# Members stored flat under api/ (top-level polars.* objects).
47+
def classify_api(name)
48+
case name
49+
when %r{\Apolars\.datatypes\.} then 'Data Types'
50+
when %r{\Apolars\.Config\b} then 'Config'
51+
when %r{\Apolars\.exceptions\.} then 'Exceptions'
52+
when %r{\Apolars\.testing\.} then 'Testing'
53+
when %r{\Apolars\.(api|plugins)\.} then 'Plugins'
54+
when %r{\Apolars\.io\.} then 'Input/output'
55+
when %r{\Apolars\.DataFrame\.} then 'DataFrame'
56+
when %r{\Apolars\.LazyFrame\.} then 'LazyFrame'
57+
when %r{\Apolars\.(read_|scan_|write_|from_)}, %r{\Apolars\.json_normalize\b}
58+
'Input/output'
59+
else 'Functions'
60+
end
61+
end
62+
end
63+
end
64+
end

lib/docs/scrapers/polars.rb

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
module Docs
2+
class Polars < UrlScraper
3+
self.name = 'Polars'
4+
self.type = 'sphinx'
5+
self.release = '1.41.0'
6+
self.base_url = 'https://docs.pola.rs/api/python/stable/reference/'
7+
self.root_path = 'index.html'
8+
self.links = {
9+
home: 'https://pola.rs/',
10+
code: 'https://github.com/pola-rs/polars'
11+
}
12+
13+
html_filters.push 'polars/entries', 'sphinx/clean_html', 'polars/clean_html'
14+
15+
# pydata-sphinx-theme keeps the page content in the article body.
16+
options[:container] = 'article.bd-article'
17+
18+
options[:skip_patterns] = [/_changelog/, /whatsnew/]
19+
20+
# https://github.com/pola-rs/polars/blob/main/LICENSE
21+
options[:attribution] = <<-HTML
22+
&copy; 2020 Ritchie Vink<br>
23+
&copy; 2022 Polars contributors<br>
24+
Licensed under the MIT License.
25+
HTML
26+
27+
# Polars tags both Rust (rs-*) and Python (py-*) releases in the same repo.
28+
# The tags API only lists recent Rust ones, but the latest GitHub release is
29+
# always the Python one, so use that and drop the py- prefix.
30+
def get_latest_version(opts)
31+
get_latest_github_release('pola-rs', 'polars', opts).sub(/\Apy-/, '')
32+
end
33+
34+
private
35+
36+
def parse(response)
37+
if response.body.include?('class="sig')
38+
doc = Nokogiri::HTML5(response.body)
39+
doc.css('.sig').each do |node|
40+
node.css('.headerlink').remove
41+
node.css('.reference.external').each { |a| a.remove if a.text.strip == '[source]' }
42+
sig = node.text.gsub(/\s+/, ' ').strip
43+
if (m = sig.match(/\A(.+?\()\s*(.+?)\s*(\).*)\z/m))
44+
head, params, tail = m[1], m[2], m[3]
45+
split_params = params.split(/,\s+/).map { |p| p.sub(/,\z/, '') }.reject(&:empty?)
46+
sig = "#{head}\n #{split_params.join(",\n ")},\n#{tail}" unless split_params.empty?
47+
end
48+
pre = Nokogiri::XML::Node.new('pre', doc)
49+
pre['data-language'] = 'python'
50+
pre.content = sig
51+
node.replace(pre)
52+
end
53+
response.body.replace(doc.to_html)
54+
end
55+
super
56+
end
57+
end
58+
end

public/icons/docs/polars/16.png

411 Bytes
Loading

public/icons/docs/polars/16@2x.png

247 Bytes
Loading

public/icons/docs/polars/SOURCE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
https://raw.githubusercontent.com/pola-rs/polars-static/master/icons/favicon-32x32.png

0 commit comments

Comments
 (0)