|
| 1 | +module Docs |
| 2 | + class Coldfusion |
| 3 | + class CleanHtmlFilter < Filter |
| 4 | + def call |
| 5 | + # Listing/category pages (Tags, Functions, a category, or a guide index) |
| 6 | + # use a different layout; keep their main container as-is after cleanup. |
| 7 | + @doc = build_root |
| 8 | + |
| 9 | + # Remove site chrome and interactive widgets. |
| 10 | + css('nav', 'footer', 'script', 'noscript', '#cfbreak', '.newsletter').remove |
| 11 | + css('.modal', '.add-example-modal-lg', '.example-modal').remove |
| 12 | + css('.example-btn', '.copy-btn', '.issuebutton', '.issuecount').remove |
| 13 | + css('button').remove |
| 14 | + css('br').remove |
| 15 | + css('#search2').remove |
| 16 | + |
| 17 | + # Drop the "Add An Example" / edit / fork affordances. |
| 18 | + css('a[href*="github.com"]', '#forkme', '#foundeo').remove |
| 19 | + css('a.label.label-danger').remove # Edit links |
| 20 | + |
| 21 | + # Clean up the breadcrumb: keep the engine-version labels (they convey |
| 22 | + # ColdFusion/Lucee/BoxLang availability) but drop navigation links and |
| 23 | + # the issue tracker widget. |
| 24 | + if (crumb = at_css('.breadcrumb')) |
| 25 | + crumb.css('.label-warning').remove |
| 26 | + crumb.css('.divider').remove |
| 27 | + crumb.css('a[rel="nofollow"]').remove |
| 28 | + # Remove navigation breadcrumb items (CFDocs > Functions > cf45 > …) |
| 29 | + # that are not engine-availability labels. |
| 30 | + crumb.css('li:not(.pull-right)').each do |li| |
| 31 | + li.remove unless li.at_css('.label-acf, .label-lucee, .label-boxlang, .label-railo') |
| 32 | + end |
| 33 | + end |
| 34 | + |
| 35 | + # Code blocks: tag them so DevDocs applies CFML syntax highlighting. |
| 36 | + css('pre.prettyprint', 'pre').each do |node| |
| 37 | + node.remove_attribute('class') |
| 38 | + node['data-language'] = 'coldfusion' |
| 39 | + end |
| 40 | + |
| 41 | + # Inline code: nothing special needed, but strip prettyprint hints. |
| 42 | + css('code').each { |node| node.remove_attribute('class') } |
| 43 | + |
| 44 | + # Unwrap Bootstrap `.container` layout wrappers; DevDocs supplies its own |
| 45 | + # page width, so these only add centering/padding we don't want. |
| 46 | + css('.container').each { |node| node.before(node.children).remove } |
| 47 | + |
| 48 | + # Remove now-empty wrappers left behind by the source template's many |
| 49 | + # conditional blank lines. |
| 50 | + css('div', 'p', 'span', 'ul', 'ol').each do |node| |
| 51 | + node.remove if node.inner_html.strip.empty? && node.element_children.empty? |
| 52 | + end |
| 53 | + |
| 54 | + doc |
| 55 | + end |
| 56 | + |
| 57 | + # cfdocs splits an entry's content across the `.jumbotron` header (name, |
| 58 | + # description, syntax), the `.breadcrumb`, and the main `.container` |
| 59 | + # (arguments, compatibility, links, examples). Merge them into one root. |
| 60 | + # |
| 61 | + # NOTE: between filters the document is re-parsed as an HTML *fragment* |
| 62 | + # (there is no <body>), so selectors must not depend on `body >`. |
| 63 | + def build_root |
| 64 | + # First .jumbotron is the page header; #cfbreak is the trailing |
| 65 | + # newsletter jumbotron, which we ignore. |
| 66 | + header = css('.jumbotron').reject { |n| n['id'] == 'cfbreak' } |
| 67 | + .map { |n| n.at_css('.container') || n } |
| 68 | + .first |
| 69 | + breadcrumb = at_css('.breadcrumb') |
| 70 | + |
| 71 | + # The main content container holds the reference sections. It is a |
| 72 | + # `.container` that is not the breadcrumb and not inside a jumbotron or |
| 73 | + # nav. Identify it by the section headings it contains. |
| 74 | + main = css('.container').find do |node| |
| 75 | + next false if node.matches?('.breadcrumb') |
| 76 | + next false if node.ancestors('.jumbotron').any? || node.ancestors('nav').any? |
| 77 | + node.at_css('h2, .param, .panel') || node.at_css('#examples') |
| 78 | + end |
| 79 | + |
| 80 | + root = Nokogiri::HTML.fragment('<div></div>').at_css('div') |
| 81 | + root << header.dup if header |
| 82 | + root << breadcrumb.dup if breadcrumb |
| 83 | + root << main.dup if main |
| 84 | + |
| 85 | + # Fall back to the full document/fragment if the expected structure is |
| 86 | + # missing (e.g. some guide pages). |
| 87 | + root.element_children.any? ? root : (at_css('body') || doc) |
| 88 | + end |
| 89 | + end |
| 90 | + end |
| 91 | +end |
0 commit comments