diff --git a/README.md b/README.md index 882c082..73ffc38 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Lazydocs makes it easy to generate beautiful markdown documentation for your Pyt ### Installation -> _Requirements: Python 3.9+._ +> _Requirements: Python 3.6+._ ```bash pip install lazydocs diff --git a/setup.py b/setup.py index 332fb4b..b036aa2 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ EMAIL = "team@mltooling.org" AUTHOR = "ML Tooling Team" LICENSE = "MIT" -REQUIRES_PYTHON = ">=3.9" +REQUIRES_PYTHON = ">=3.6" VERSION = None # Only set version if you like to overwrite the version in _about.py PWD = os.path.abspath(os.path.dirname(__file__)) diff --git a/src/lazydocs/_about.py b/src/lazydocs/_about.py index 1092556..1ee0764 100644 --- a/src/lazydocs/_about.py +++ b/src/lazydocs/_about.py @@ -1,5 +1,5 @@ """Information about this library. This file will automatically changed.""" -__version__ = "0.6.0" +__version__ = "0.6.2" # __author__ # __email__ diff --git a/src/lazydocs/generation.py b/src/lazydocs/generation.py index 7013dd9..4a41350 100755 --- a/src/lazydocs/generation.py +++ b/src/lazydocs/generation.py @@ -8,15 +8,17 @@ import pkgutil import re import subprocess +import sys import types from dataclasses import dataclass, is_dataclass from enum import Enum from pydoc import locate -from typing import Any, Callable, Dict, List, Optional +from types import ModuleType +from typing import Any, Callable, Dict, List, Set, Optional, Union from urllib.parse import quote _RE_BLOCKSTART_LIST = re.compile( - r"^(Args:|Arg:|Arguments:|Parameters:|Kwargs:|Attributes:|Returns:|Yields:|Kwargs:|Raises:).{0,2}$", + r"^((?:Arg[s]?|Arguments|Parameters|Kwargs|Attributes|Returns|Yields|Raises|Methods):).{0,2}$", re.IGNORECASE, ) @@ -32,8 +34,43 @@ re.IGNORECASE ) -_RE_TYPED_ARGSTART = re.compile(r"^([\w\[\]_]{1,}?)[ ]*?\((.*?)\):[ ]+(.{2,})", re.IGNORECASE) -_RE_ARGSTART = re.compile(r"^(.+):[ ]+(.{2,})$", re.IGNORECASE) +_RE_TYPED_ARGSTART = re.compile( + r""" + ^ # start of the string + (?: + # Case 1: Argument with optional type + (?P[a-zA-Z_][a-zA-Z0-9_]*) # argument name: strict python variable name + \s* # optional spaces between argument name and type + # Optional group for argument type + ( + \(\s* # optional space after opening '(' + (?P[^)]+?) # type inside parentheses (non-greedy match) + \s*\) # optional space before closing ')' + )? # make parentheses optional for type + + | # Case 2: Return and Exception type (no argument name) + (?![Oo]r\s+|\|\s*) # Prevents starting with 'or' or '|' + (?P # Named capture group 'alt_name' + (?: + (?:\s+[Oo]r\s+|\s*\|\s*)? # Optional separator: ' or ' or '|' + (?![Oo]r|\|) # Prevent consecutive 'or' or '|' + [a-zA-Z_][\w.]+ # type_token, support lead _ and `.` + # Optional group for square bracket typed i.e list[dict[str, int]] + (?: + # No padding to square bracket conents + \[\b + [\w\|,.\[\]\ \t]* # support empty and nested, greedy match + \b\] + )? + )+ # One or more tokens + ) + ) + :\s+ # colon followed by at least one space + (?P.{2,}) # description (>= 2 chars) + + """, + re.IGNORECASE | re.VERBOSE +) _RE_CODE_TEXT = re.compile(r"^```[\w\-\.]*[ ]*$", re.IGNORECASE) @@ -328,19 +365,22 @@ def _is_object_ignored(obj: Any) -> bool: return False -def _is_module_ignored(module_name: str, ignored_modules: List[str], private_modules: bool = False) -> bool: +def _is_module_ignored(module_name: str, ignored_modules: Set[str], private_modules: bool = False) -> bool: """Checks if a given module is ignored.""" if module_name.split(".")[-1].startswith("_") and module_name[1] != "_" and not private_modules: return True - for ignored_module in ignored_modules: - if module_name == ignored_module: - return True + # Trivial exact match + if module_name in ignored_modules: + return True - # Check is module is subpackage of an ignored package - if module_name.startswith(ignored_module + "."): + # Check is module is subpackage of an ignored package + # Check parent packages from deepest to top level + parts = module_name.split(".") + for i in range(len(parts) - 1, 0, -1): + parent = ".".join(parts[:i]) + if parent in ignored_modules: return True - return False @@ -361,8 +401,15 @@ def _get_src_root_path(obj: Any) -> str: def _get_doc_summary(obj: Any) -> str: - # First line should contain the summary - return _get_docstring(obj).split("\n")[0] + # Summary should continue until blank space + summary = [] + for line in _get_docstring(obj).split("\n"): + if line == "" or line is None: + break + if line.endswith(" "): + line = f"{line}\n" + summary.append(line.lstrip()) + return " ".join(summary) def _get_anchor_tag(header: str) -> str: @@ -410,7 +457,7 @@ class SectionBlock(): indent: int offset: int - def _get_section_offset(lines: list, start_index: int, blockindent: int): + def _get_section_offset(lines: list, start_index: int, blockindent: int) -> int: """Determine base padding offset for section. Args: @@ -421,7 +468,7 @@ def _get_section_offset(lines: list, start_index: int, blockindent: int): Returns: int: Padding offset. """ - offset = [] + offset: List[int] = [] try: for line in lines[start_index:]: indent = len(line) - len(line.lstrip()) @@ -436,9 +483,9 @@ def _get_section_offset(lines: list, start_index: int, blockindent: int): return -min(offset) if offset else 0 def _lines_isvalid(lines: list, start_index: int, blockindent: int, - allow_same_level: bool = False, - require_next_is_blank: bool = False, - max_blank: int = None): + allow_same_level: bool = False, + require_next_is_blank: bool = False, + max_blank: Optional[int] = None) -> bool: """Determine following lines fit section rules. Args: @@ -517,11 +564,11 @@ def _lines_isvalid(lines: list, start_index: int, blockindent: int, line = line + "\n```" elif doctest_block and \ not _lines_isvalid(docstring, line_indx + 1, doctest_block.indent, - True, False, 1): + True, False, 1): # Doctest block Exit Condition offset = doctest_block.indent - indent - line = " " * (indent - doctest_block.indent + - doctest_block.offset) + line + "\n```" + line = " " * (indent - doctest_block.indent + + doctest_block.offset) + line + "\n```" block_exit = True elif line.endswith("::") and not (literal_block) and \ _lines_isvalid(docstring, line_indx + 1, indent, False, True, None): @@ -538,11 +585,11 @@ def _lines_isvalid(lines: list, start_index: int, blockindent: int, line = "```" + line indent = literal_block.indent elif not _lines_isvalid(docstring, line_indx + 1, literal_block.indent, - False, False, None): + False, False, None): # Literal block exit condition offset += literal_block.indent - indent - line = " " * (indent - literal_block.indent + - literal_block.offset) + line + "\n```" + line = " " * (indent - literal_block.indent + + literal_block.offset) + line + "\n```" block_exit = True elif line: offset += literal_block.offset @@ -561,7 +608,7 @@ def _lines_isvalid(lines: list, start_index: int, blockindent: int, offset = admonition_block.indent - indent line = "> {}".format(line.replace("\n", "\n> ")) if not _lines_isvalid(docstring, line_indx + 1, admonition_block.indent, - False, False, None): + False, False, None): admonition_block = None if (blockstart_result or blocktext_result): @@ -574,19 +621,24 @@ def _lines_isvalid(lines: list, start_index: int, blockindent: int, out.append("\n") out.append("**{}**\n".format(line.strip())) elif indent > blockindent and (arg_list or section_block): - if arg_list and not literal_block and _RE_TYPED_ARGSTART.match(line): + param_match = _RE_TYPED_ARGSTART.match(line) + + if arg_list and not md_code_snippet and \ + (indent <= argindent if argindent else indent) and param_match: # start of new argument - out.append( - "- " - + _RE_TYPED_ARGSTART.sub(r"`\1` (\2): \3", line) - ) - argindent = indent - elif arg_list and not literal_block and _RE_ARGSTART.match(line): - # start of an exception-type block - out.append( - "- " - + _RE_ARGSTART.sub(r"`\1`: \2", line) - ) + # Extract the matched groups + alt_name = param_match.group("alt_name") + arg_type = param_match.group("arg_type") + desc = param_match.group("arg_desc") + # support for exception style and return type + arg_name = alt_name if alt_name else param_match.group("arg") + + # output whether arg_type exists + if arg_type: + out.append("- `" + arg_name + "` (" + arg_type + "): " + + desc) + else: + out.append("- `" + arg_name + "`: " + desc) argindent = indent elif indent > argindent: # attach docs text of argument @@ -600,6 +652,11 @@ def _lines_isvalid(lines: list, start_index: int, blockindent: int, out.append(line.replace("\n", "\n" + " " * padding)) elif line: + # Assume require force markdown newline if line ends with ":" + if line.endswith(":") and \ + not any([md_code_snippet, literal_block, doctest_block]): + line = line + " " + padding = max(indent - blockindent + offset, 0) out.append(" " * padding + line.replace("\n", @@ -625,13 +682,6 @@ def _lines_isvalid(lines: list, start_index: int, blockindent: int, return "".join(out) -def get_module(loader, module_name: str) -> Optional[Any]: - spec = loader.find_spec(module_name) - if spec is None: - raise ImportError(f"Cannot find module {module_name}") - return spec.loader.load_module(spec.name) - - class MarkdownGenerator(object): """Markdown generator class.""" @@ -838,9 +888,6 @@ def class2md(self, cls: Any, depth: int = 2, is_mdx: bool = False) -> str: ) elif is_dataclass(cls): kind = "dataclass" - variables.append( - "%s attributes\n" % (sectionheader) - ) elif issubclass(cls, Exception): kind = "exception" else: @@ -889,10 +936,16 @@ def class2md(self, cls: Any, depth: int = 2, is_mdx: bool = False) -> str: variables.append( "- **%s** = %s\n" % (full_name, obj.value) ) - elif name == "__dataclass_fields__": + elif name == "__dataclass_fields__" and obj: + variables.append( + "%s fields\n" % (sectionheader) + ) for name, field in sorted((obj).items()): + full_name = f"{clsname}.{name}" + if self.remove_package_prefix: + full_name = name variables.append( - "- ```%s``` (%s)\n" % (name, + "- ```%s``` (%s)\n" % (full_name, field.type.__name__) ) @@ -1021,9 +1074,11 @@ def module2md(self, module: types.ModuleType, depth: int = 1, is_mdx: bool = Fal continue if hasattr(obj, "__name__") and not obj.__name__.startswith(modname): continue + if isinstance(obj, types.ModuleType): + continue comments = inspect.getcomments(obj) - comments = ": %s" % comments if comments else "" - variables.append("- **%s**%s" % (name, comments)) + comments = " \n\t*%s*" % comments if comments else "" + variables.append("- **%s** = %s%s" % (name, obj, comments)) line_nos.append(_get_line_no(obj) or 0) variables = _order_by_line_nos(variables, line_nos) @@ -1142,24 +1197,31 @@ def overview2md(self, is_mdx: bool = False) -> str: modules=modules_md, classes=classes_md, functions=functions_md ) - def toc2md(self, module: types.ModuleType = None, is_mdx: bool = False) -> str: - """Generates table of contents for imported object.""" + def toc2md(self, module: types.ModuleType, is_mdx: bool = False) -> str: + """Generates table of contents for imported object. + + Args: + module (ModuleType): Parsed module object for TOC generation. + is_mdx (bool, optional): JSX support. Default to False. + + Returns: + str: Markdown documentation of TOC file if TOC exist. + """ toc = [] for obj in self.generated_objects: if module and (module.__name__ != obj["module"] or obj["type"] == "module"): continue - # module_name = obj["module"].split(".")[-1] full_name = obj["full_name"] name = obj["name"] - if is_mdx: - link = "./" + obj["module"] + ".mdx#" + obj["anchor_tag"] - else: - link = "./" + obj["module"] + ".md#" + obj["anchor_tag"] - line = f"- [`{name}`]({link})" + summary = obj["description"] + link = f"./{obj['module']}.md{'x' if is_mdx else ''}#{obj['anchor_tag']}" + line = f"- [`{name}`]({link}){': ' if summary else ''}{summary}" depth = max(len(full_name.split(".")) - 1, 0) if depth: line = "\t" * depth + line toc.append(line) + if not toc: + return "" return _TOC_TEMPLATE.format(toc="\n".join(toc)) @@ -1169,7 +1231,7 @@ def generate_docs( src_root_path: Optional[str] = None, src_base_url: Optional[str] = None, remove_package_prefix: bool = False, - ignored_modules: Optional[List[str]] = None, + ignored_modules: Optional[Union[Set[str],List[str]]] = None, output_format: Optional[str] = None, overview_file: Optional[str] = None, watermark: bool = True, @@ -1186,22 +1248,27 @@ def generate_docs( src_root_path: The root folder name containing all the sources. Fallback to git repo root. src_base_url: The base url of the github link. Should include branch name. All source links are generated with this prefix. remove_package_prefix: If `True`, the package prefix will be removed from all functions and methods. - ignored_modules: A list of modules that should be ignored. + ignored_modules: A set of modules that should be ignored. output_format: Markdown file extension and format. overview_file: Filename of overview file. If not provided, no overview file will be generated. watermark: If `True`, add a watermark with a timestamp to bottom of the markdown files. validate: If `True`, validate the docstrings via pydocstyle. Requires pydocstyle to be installed. private_modules: If `True`, includes modules with `_` prefix. + include_toc: Include table of contents in module file. Defaults to False. url_line_prefix: Line prefix for git repository line url anchors. Default: None - github "L". """ stdout_mode = output_path.lower() == "stdout" - if not stdout_mode and not os.path.exists(output_path): - # Create output path - os.makedirs(output_path) + if not stdout_mode: + print(f"Output Location: {output_path}") + if not os.path.exists(output_path): + # Create output path + os.makedirs(output_path) if not ignored_modules: - ignored_modules = list() + ignored_modules = set() + elif isinstance(ignored_modules, list): + ignored_modules = set(ignored_modules) if output_format and output_format != 'md' and output_format != 'mdx': raise Exception(f"Unsupported output format: {output_format}. Choose either 'md' or 'mdx'.") @@ -1246,32 +1313,62 @@ def generate_docs( if not stdout_mode: print(f"Generating docs for python package at: {path}") + path_abs = os.path.abspath(path) + + # Work around for relative imports in top level modules + # requires adding parent directory as base package to sys modules namespace + parent_package = os.path.basename(path) + mod = ModuleType(parent_package) + mod.__path__ = [path_abs] + mod.__package__ = parent_package + if parent_package not in sys.modules: + sys.modules[parent_package] = mod # Add module to current namespace + # Generate one file for every discovered module - for loader, module_name, _ in pkgutil.walk_packages([path]): + for loader, module_name, is_pkg in pkgutil.walk_packages([path_abs]): if _is_module_ignored(module_name, ignored_modules, private_modules): # Add module to ignore list, so submodule will also be ignored - ignored_modules.append(module_name) + ignored_modules.add(module_name) continue + try: + # Modern PEP 451 path try: - mod_spec = importlib.util.spec_from_loader(module_name, loader) - mod = importlib.util.module_from_spec(mod_spec) - mod_spec.loader.exec_module(mod) + mod_spec = loader.find_spec(module_name) except AttributeError: - mod = get_module(loader, module_name) - module_md = generator.module2md(mod, is_mdx=is_mdx, include_toc=include_toc) + # Fallback if Loader object has no attribute `find_spec` + module_filepath = os.path.join( + path_abs, *module_name.split(".")) + ".py" + mod_spec = importlib.util.spec_from_file_location( + module_name, + os.path.join(loader.path, module_filepath) + ) + if mod_spec is None or mod_spec.loader is None: + raise ImportError(f"Cannot load module {module_name} from {path}") + mod = importlib.util.module_from_spec(mod_spec) + full_module_name = f"{parent_package}.{module_name}" + mod.__package__ = (module_name if is_pkg + else full_module_name).rsplit(".", 1)[0] + # Add module to current namespace + if mod.__name__ not in sys.modules: + sys.modules[mod.__name__] = mod + mod_spec.loader.exec_module(mod) + + module_md = generator.module2md(module=mod, + is_mdx=is_mdx, + include_toc=include_toc) if not module_md: # Module md is empty -> ignore module and all submodules # Add module to ignore list, so submodule will also be ignored - ignored_modules.append(module_name) + ignored_modules.add(module_name) continue if stdout_mode: print(module_md) else: to_md_file( - module_md, - mod.__name__, + markdown_str=module_md, + filename=mod.__name__, out_path=output_path, watermark=watermark, is_mdx=is_mdx, @@ -1285,26 +1382,34 @@ def generate_docs( raise Exception(f"Validation for {path} failed.") if not stdout_mode: - print(f"Generating docs for python module at: {path}") - - module_name = os.path.basename(path) - - spec = importlib.util.spec_from_file_location( - module_name, - path, - ) - assert spec is not None - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) # type: ignore + print(f"Generating docs for python module: {path}") + + path_abs = os.path.abspath(path) + src_dir, filename = os.path.split(path_abs) + # TODO: Possible new feature?? Dynamically locating package root to define full dotted module name and package + module_name, _ = os.path.splitext(filename) + parent_package = os.path.basename(src_dir) + + mod_spec = importlib.util.spec_from_file_location(module_name, path_abs) + if mod_spec is None or mod_spec.loader is None: + raise ImportError(f"Cannot load module {module_name} from {path}") + mod = importlib.util.module_from_spec(mod_spec) + # Define parent directory as package to support relative imports + mod.__package__ = parent_package + if mod.__name__ not in sys.modules: + sys.modules[mod.__name__] = mod # Add module to current namespace + mod_spec.loader.exec_module(mod) # type: ignore if mod: - module_md = generator.module2md(mod, is_mdx=is_mdx, include_toc=include_toc) + module_md = generator.module2md(module=mod, + is_mdx=is_mdx, + include_toc=include_toc) if stdout_mode: print(module_md) else: to_md_file( - module_md, - module_name, + markdown_str=module_md, + filename=mod.__name__, out_path=output_path, watermark=watermark, is_mdx=is_mdx, @@ -1330,32 +1435,39 @@ def generate_docs( path=obj.__path__, # type: ignore prefix=obj.__name__ + ".", # type: ignore ): + # Add module to ignore list, so submodule will also be ignored if _is_module_ignored(module_name, ignored_modules, private_modules): - # Add module to ignore list, so submodule will also be ignored - ignored_modules.append(module_name) + ignored_modules.add(module_name) continue try: - try: - mod_spec = importlib.util.spec_from_loader(module_name, loader) - mod = importlib.util.module_from_spec(mod_spec) - mod_spec.loader.exec_module(mod) - except AttributeError: - mod = get_module(loader, module_name) - module_md = generator.module2md(mod, is_mdx=is_mdx, include_toc=include_toc) + # Modern PEP 451 path + # mod = importlib.import_module(module_name) + mod_spec = importlib.util.find_spec(module_name) + if mod_spec is None or mod_spec.loader is None: + raise ImportError(f"Cannot load module {module_name} from {loader.path}") + mod = importlib.util.module_from_spec(mod_spec) + # Add module to current namespace + if mod.__name__ not in sys.modules: + sys.modules[mod.__name__] = mod + mod_spec.loader.exec_module(mod) + + module_md = generator.module2md(module=mod, + is_mdx=is_mdx, + include_toc=include_toc) if not module_md: # Module MD is empty -> ignore module and all submodules # Add module to ignore list, so submodule will also be ignored - ignored_modules.append(module_name) + ignored_modules.add(module_name) continue if stdout_mode: print(module_md) else: to_md_file( - module_md, - mod.__name__, + markdown_str=module_md, + filename=mod.__name__, out_path=output_path, watermark=watermark, is_mdx=is_mdx @@ -1366,15 +1478,19 @@ def generate_docs( + repr(ex) ) else: - import_md = generator.import2md(obj, is_mdx=is_mdx) + import_md = generator.import2md(obj, is_mdx=is_mdx, include_toc=include_toc) if stdout_mode: print(import_md) else: to_md_file( - import_md, path, out_path=output_path, watermark=watermark, is_mdx=is_mdx + markdown_str=import_md, + filename=path, + out_path=output_path, + watermark=watermark, + is_mdx=is_mdx ) else: - raise Exception(f"Failed to generate markdown for {path}.") + raise Exception(f"Failed to generate markdown. Path `{path}` not recognized.") if overview_file and not stdout_mode: if is_mdx: @@ -1395,5 +1511,5 @@ def generate_docs( # Write mkdocs pages file print("Writing mkdocs .pages file.") # TODO: generate navigation items to fix problem with naming - with open(os.path.join(output_path, ".pages"), "w", encoding="utf-8", newline="\n") as f: + with open(os.path.join(output_path, ".pages"), "w", encoding="utf-8", newline="\n") as f: f.write(_MKDOCS_PAGES_TEMPLATE.format(overview_file=overview_file)) diff --git a/tests/test_generation.py b/tests/test_generation.py index 1c81d9e..c53f489 100644 --- a/tests/test_generation.py +++ b/tests/test_generation.py @@ -1,7 +1,6 @@ import hashlib -from lazydocs import MarkdownGenerator, generate_docs -from tempfile import TemporaryDirectory +from lazydocs import MarkdownGenerator def test_import2md() -> None: @@ -36,31 +35,3 @@ def test_func2md() -> None: # Remove whitespaces: fix changes between py version 3.6 3.7 in signature method md_hash = hashlib.md5(markdown.replace(" ", "").encode("utf-8")).hexdigest() assert md_hash == "797bad8c00ee6f189cb6f578eaec02c4" - - -def test_integration_generate_docs(capsys) -> None: - test_class = """ -class TestClass: - \"\"\"just a test class\"\"\" - """ - with TemporaryDirectory() as d: - test_module_name = "test_module" - with open(f"{d}/{test_module_name}.py", "w") as f: - f.write(test_class) - - overview_file_name = "DOCS.md" - overview_file = f"{d}/output/{overview_file_name}" - generate_docs( - paths=[d], - output_path=f"{d}/output/", - overview_file=overview_file_name - ) - - captured = capsys.readouterr() - - with open(overview_file) as f: - result = f.read() - - assert test_module_name in result - assert f"{test_module_name}.TestClass" in result - assert "Failed to generate docs for module" not in captured.out