Skip to content

Commit 01f0746

Browse files
authored
docs: improve autogenerated API docs (#606)
Related to #324
1 parent fef0874 commit 01f0746

10 files changed

Lines changed: 504 additions & 55 deletions

File tree

src/crawlee/configuration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ class Configuration(BaseSettings):
1818
"""Configuration of the Crawler.
1919
2020
Args:
21-
internal_timeout: timeout for internal operations such as marking a request as processed
22-
verbose_log: allows verbose logging
21+
internal_timeout: Timeout for internal operations such as marking a request as processed.
22+
verbose_log: Allows verbose logging.
2323
default_storage_id: The default storage ID.
2424
purge_on_start: Whether to purge the storage on start.
2525
"""

website/build_api_reference.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ sed_no_backup() {
1111
}
1212

1313
# Create docspec dump of this package's source code through pydoc-markdown
14-
poetry run pydoc-markdown --quiet --dump > docspec-dump.jsonl
14+
python ./pydoc-markdown/generate_ast.py > docspec-dump.jsonl
1515
sed_no_backup "s#${PWD}/..#REPO_ROOT_PLACEHOLDER#g" docspec-dump.jsonl
1616

1717
rm -rf "${apify_shared_tempdir}"

website/docusaurus.config.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ module.exports = {
8080
sortSidebar: groupSort,
8181
pathToCurrentVersionTypedocJSON: `${__dirname}/api-typedoc-generated.json`,
8282
routeBasePath: 'api',
83+
python: true,
8384
},
8485
],
8586
// [

website/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
"typescript": "5.6.2"
3434
},
3535
"dependencies": {
36-
"@apify/docusaurus-plugin-typedoc-api": "^4.2.2",
36+
"@apify/docusaurus-plugin-typedoc-api": "^4.2.6",
3737
"@apify/utilities": "^2.8.0",
3838
"@docusaurus/core": "^3.5.2",
3939
"@docusaurus/mdx-loader": "^3.5.2",
@@ -56,5 +56,5 @@
5656
"stream-browserify": "^3.0.0",
5757
"unist-util-visit": "^5.0.0"
5858
},
59-
"packageManager": "yarn@4.4.1"
59+
"packageManager": "yarn@4.5.1"
6060
}

website/pydoc-markdown/__init__.py

Whitespace-only changes.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""
2+
Replaces the default pydoc-markdown shell script with a custom Python script calling the pydoc-markdown API directly.
3+
4+
This script generates an AST from the Python source code in the `src` directory and prints it as a JSON object.
5+
"""
6+
7+
from pydoc_markdown.interfaces import Context
8+
from pydoc_markdown.contrib.loaders.python import PythonLoader
9+
from pydoc_markdown.contrib.processors.filter import FilterProcessor
10+
from pydoc_markdown.contrib.processors.crossref import CrossrefProcessor
11+
from pydoc_markdown.contrib.renderers.markdown import MarkdownReferenceResolver
12+
from google_docstring_processor import ApifyGoogleProcessor
13+
from docspec import dump_module
14+
15+
import json
16+
import os
17+
18+
project_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../src')
19+
20+
context = Context(directory='.')
21+
loader = PythonLoader(search_path=[project_path])
22+
filter = FilterProcessor(
23+
documented_only=False,
24+
skip_empty_modules=False,
25+
)
26+
crossref = CrossrefProcessor()
27+
google = ApifyGoogleProcessor()
28+
29+
loader.init(context)
30+
filter.init(context)
31+
google.init(context)
32+
crossref.init(context)
33+
34+
processors = [filter, google, crossref]
35+
36+
dump = []
37+
38+
modules = list(loader.load())
39+
40+
for processor in processors:
41+
processor.process(modules, None)
42+
43+
for module in modules:
44+
dump.append(dump_module(module))
45+
46+
print(json.dumps(dump, indent=4))
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
# -*- coding: utf8 -*-
2+
# Copyright (c) 2019 Niklas Rosenstein
3+
# !!! Modified 2024 Jindřich Bär
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to
7+
# deal in the Software without restriction, including without limitation the
8+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9+
# sell copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21+
# IN THE SOFTWARE.
22+
23+
import dataclasses
24+
import re
25+
import typing as t
26+
27+
import docspec
28+
29+
from pydoc_markdown.contrib.processors.sphinx import generate_sections_markdown
30+
from pydoc_markdown.interfaces import Processor, Resolver
31+
32+
import json
33+
34+
35+
@dataclasses.dataclass
36+
class ApifyGoogleProcessor(Processor):
37+
"""
38+
This class implements the preprocessor for Google and PEP 257 docstrings. It converts
39+
docstrings formatted in the Google docstyle to Markdown syntax.
40+
41+
References:
42+
43+
* https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
44+
* https://www.python.org/dev/peps/pep-0257/
45+
46+
Example:
47+
48+
```
49+
Attributes:
50+
module_level_variable1 (int): Module level variables may be documented in
51+
either the ``Attributes`` section of the module docstring, or in an
52+
inline docstring immediately following the variable.
53+
54+
Either form is acceptable, but the two should not be mixed. Choose
55+
one convention to document module level variables and be consistent
56+
with it.
57+
58+
Todo:
59+
* For module TODOs
60+
* You have to also use ``sphinx.ext.todo`` extension
61+
```
62+
63+
Renders as:
64+
65+
Attributes:
66+
module_level_variable1 (int): Module level variables may be documented in
67+
either the ``Attributes`` section of the module docstring, or in an
68+
inline docstring immediately following the variable.
69+
70+
Either form is acceptable, but the two should not be mixed. Choose
71+
one convention to document module level variables and be consistent
72+
with it.
73+
74+
Todo:
75+
* For module TODOs
76+
* You have to also use ``sphinx.ext.todo`` extension
77+
78+
@doc:fmt:google
79+
"""
80+
81+
_param_res = [
82+
re.compile(r"^(?P<param>\S+):\s+(?P<desc>.+)$"),
83+
re.compile(r"^(?P<param>\S+)\s+\((?P<type>[^)]+)\):\s+(?P<desc>.+)$"),
84+
re.compile(r"^(?P<param>\S+)\s+--\s+(?P<desc>.+)$"),
85+
re.compile(r"^(?P<param>\S+)\s+\{\[(?P<type>\S+)\]\}\s+--\s+(?P<desc>.+)$"),
86+
re.compile(r"^(?P<param>\S+)\s+\{(?P<type>\S+)\}\s+--\s+(?P<desc>.+)$"),
87+
]
88+
89+
_keywords_map = {
90+
"Args:": "Arguments",
91+
"Arguments:": "Arguments",
92+
"Attributes:": "Attributes",
93+
"Example:": "Example",
94+
"Examples:": "Examples",
95+
"Keyword Args:": "Arguments",
96+
"Keyword Arguments:": "Arguments",
97+
"Methods:": "Methods",
98+
"Note:": "Notes",
99+
"Notes:": "Notes",
100+
"Other Parameters:": "Arguments",
101+
"Parameters:": "Arguments",
102+
"Return:": "Returns",
103+
"Returns:": "Returns",
104+
"Raises:": "Raises",
105+
"References:": "References",
106+
"See Also:": "See Also",
107+
"Todo:": "Todo",
108+
"Warning:": "Warnings",
109+
"Warnings:": "Warnings",
110+
"Warns:": "Warns",
111+
"Yield:": "Yields",
112+
"Yields:": "Yields",
113+
}
114+
115+
def check_docstring_format(self, docstring: str) -> bool:
116+
for section_name in self._keywords_map:
117+
if section_name in docstring:
118+
return True
119+
return False
120+
121+
def process(self, modules: t.List[docspec.Module], resolver: t.Optional[Resolver]) -> None:
122+
docspec.visit(modules, self._process)
123+
124+
def _process(self, node: docspec.ApiObject):
125+
if not node.docstring:
126+
return
127+
128+
lines = []
129+
sections = []
130+
current_lines: t.List[str] = []
131+
in_codeblock = False
132+
keyword = None
133+
multiline_argument_offset = -1
134+
135+
def _commit():
136+
if keyword:
137+
sections.append({keyword: list(current_lines)})
138+
else:
139+
lines.extend(current_lines)
140+
current_lines.clear()
141+
142+
for line in node.docstring.content.split("\n"):
143+
multiline_argument_offset += 1
144+
if line.lstrip().startswith("```"):
145+
in_codeblock = not in_codeblock
146+
current_lines.append(line)
147+
if not in_codeblock:
148+
_commit()
149+
continue
150+
151+
if in_codeblock:
152+
current_lines.append(line)
153+
continue
154+
155+
line = line.strip()
156+
if line in self._keywords_map:
157+
_commit()
158+
keyword = self._keywords_map[line]
159+
continue
160+
161+
if keyword is None:
162+
lines.append(line)
163+
continue
164+
165+
for param_re in self._param_res:
166+
param_match = param_re.match(line)
167+
if param_match:
168+
current_lines.append(param_match.groupdict())
169+
multiline_argument_offset = 0
170+
break
171+
172+
if not param_match:
173+
if multiline_argument_offset == 1:
174+
current_lines[-1]["desc"] += "\n" + line
175+
multiline_argument_offset = 0
176+
else:
177+
current_lines.append(line)
178+
179+
_commit()
180+
node.docstring.content = json.dumps({
181+
"text": "\n".join(lines),
182+
"sections": sections,
183+
}, indent=None)
184+
185+

website/src/css/custom.css

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,3 +541,20 @@ div[class^=announcementBar_] button {
541541
box-shadow: var(--ifm-alert-shadow);
542542
padding: var(--ifm-alert-padding-vertical) var(--ifm-alert-padding-horizontal);
543543
}
544+
545+
.tsd-parameters li {
546+
margin-bottom: 16px;
547+
}
548+
549+
.tsd-parameters-title {
550+
font-size: 16px;
551+
margin-bottom: 16px !important;
552+
}
553+
554+
.tsd-returns-title {
555+
font-size: 16px;
556+
}
557+
558+
.tsd-api-options {
559+
display: none;
560+
}

0 commit comments

Comments
 (0)