Merge pull request #718 from pyathena-dev/ci/markdownlint-cli2

laughingman7743 · web-flow · commit f41404c669cd · 2026-05-24T14:40:41.000+09:00
ci: introduce markdownlint-cli2 and verify docs build on PRs
diff --git a/.github/workflows/docs-lint.yaml b/.github/workflows/docs-lint.yaml
@@ -0,0 +1,36 @@
+name: Docs Lint
+
+on:
+  pull_request:
+    paths:
+      - 'docs/**'
+      - '**.md'
+      - '.markdownlint-cli2.jsonc'
+      - '.mise.toml'
+      - 'Makefile'
+      - '.github/workflows/docs-lint.yaml'
+
+permissions:
+  contents: read
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
+      - run: make docs/lint
+
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0  # Fetch all history for sphinx-multiversion
+      - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4.0.1
+      - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
+        with:
+          enable-cache: true
+      - run: |
+          uv sync --group dev
+          make docs/build
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
@@ -32,7 +32,7 @@ jobs:
           enable-cache: true
       - run: |
           uv sync --group dev
-          make docs
+          make docs/build
       - name: Upload artifact
         uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0
         with:
diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc
@@ -0,0 +1,34 @@
+{
+  // https://github.com/DavidAnson/markdownlint/blob/main/doc/Rules.md
+  "config": {
+    "default": true,
+    // Docs have long URLs and code lines that would be awkward to wrap
+    "MD013": false,
+    // Match existing style: dash bullets, 2-space nested indent
+    "MD004": { "style": "dash" },
+    "MD007": { "indent": 2 },
+    // Allow inline HTML used by README (centered images) and MyST admonitions
+    "MD033": {
+      "allowed_elements": ["details", "summary", "br", "kbd", "sub", "sup", "div", "img", "p", "a"]
+    },
+    // Cursor docs intentionally repeat subsection names ("Basic usage") under different cursors
+    "MD024": { "siblings_only": true },
+    // `$ command` style is intentional in docs/testing.md and README shell snippets
+    "MD014": false,
+    // MyST `(label)=` ref targets must come before the first heading, so the
+    // first line of many docs/*.md files is not an H1
+    "MD041": false
+  },
+  "globs": [
+    "docs/**/*.md",
+    "*.md"
+  ],
+  "ignores": [
+    "docs/_build/**",
+    "node_modules/**",
+    ".venv/**",
+    ".tox/**",
+    ".pytest_cache/**",
+    ".serena/**"
+  ]
+}
diff --git a/.mise.toml b/.mise.toml
@@ -1,2 +1,3 @@
 [tools]
 python = "3.12"
+"npm:markdownlint-cli2" = "0.18.1"
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -1,39 +1,47 @@
 # PyAthena Development Guide for AI Assistants
 
 ## Project Overview
+
 PyAthena is a Python DB API 2.0 (PEP 249) compliant client for Amazon Athena. See `pyproject.toml` for Python version support and dependencies.
 
 ## Rules and Constraints
 
 ### Git Workflow
+
 - **NEVER** commit directly to `master` — always create a feature branch and PR
 - Create PRs as drafts: `gh pr create --draft`
 
 ### Import Rules
+
 - **NEVER** use runtime imports (inside functions, methods, or conditional blocks)
 - All imports must be at the top of the file, after the license header
 - Exception: the existing codebase uses runtime imports for optional dependencies (`pyarrow`, `pandas`, etc.) in source code. For new code, use `TYPE_CHECKING` instead when possible
 
 ### Code Quality — Always Run Before Committing
+
 ```bash
 make format   # Auto-fix formatting and imports
 make lint   # Lint + format check + mypy
 ```
 
 ### Testing
+
 ```bash
 # ALWAYS run `make lint` first — tests will fail if lint doesn't pass
-make test       # Unit tests (runs chk first)
-make test-sqla  # SQLAlchemy dialect tests
+make test/pyathena    # Unit tests (runs lint first)
+make test/sqla        # SQLAlchemy dialect tests
+make test/sqla-async  # SQLAlchemy async dialect tests
 ```
 
 Tests require AWS environment variables. Use a `.env` file (gitignored):
+
 ```bash
 AWS_DEFAULT_REGION=<region>
 AWS_ATHENA_S3_STAGING_DIR=s3://<bucket>/<path>/
 AWS_ATHENA_WORKGROUP=<workgroup>
 AWS_ATHENA_SPARK_WORKGROUP=<spark-workgroup>
 ```
+
 ```bash
 export $(cat .env | xargs) && uv run pytest tests/pyathena/test_file.py -v
 ```
@@ -43,36 +51,58 @@ export $(cat .env | xargs) && uv run pytest tests/pyathena/test_file.py -v
 - New features require tests; changes to SQLAlchemy dialects must pass `make test-sqla`
 
 #### Test Conventions
+
 - **Class-based tests** for integration tests that use fixtures (cursors, engines): `class TestCursor:` with methods like `def test_fetchone(self, cursor):`
 - **Standalone functions** for unit tests of pure logic (converters, parsers, utils): `def test_to_struct_json_formats(input_value, expected):`
 - Test file naming mirrors source: `pyathena/parser.py` → `tests/pyathena/test_parser.py`
 - **Fixtures**: Cursor/engine fixtures are defined in `conftest.py` and injected by name (e.g., `cursor`, `engine`, `async_cursor`). Use `indirect=True` parametrization to pass connection options:
+
   ```python
   @pytest.mark.parametrize("engine", [{"driver": "rest"}], indirect=True)
   def test_query(self, engine):
       engine, conn = engine
   ```
+
 - **Parametrize** with `@pytest.mark.parametrize(("input", "expected"), [...])` for data-driven tests
 - **Integration tests** (need AWS) use cursor/engine fixtures with real Athena queries; **unit tests** (no AWS) call functions directly with test data
 
+### Markdown Lint
+
+`docs/**/*.md` and project-root `*.md` files are linted with [markdownlint-cli2](https://github.com/DavidAnson/markdownlint-cli2). The config lives at `.markdownlint-cli2.jsonc`. CI runs lint + Sphinx build on PRs that touch docs (`.github/workflows/docs-lint.yaml`).
+
+`markdownlint-cli2` is pinned in `.mise.toml`, so [`mise`](https://mise.jdx.dev/) installs the exact version used in CI. Run locally:
+
+```bash
+mise install          # one-time: installs markdownlint-cli2
+make docs/lint        # check
+make docs/format      # auto-fix what's possible
+make docs/build       # build the Sphinx site under docs/_build/html
+```
+
 ## Architecture — Key Design Decisions
 
 These are non-obvious conventions that can't be discovered by reading code alone.
 
 ### PEP 249 Compliance
+
 All cursor types must implement: `execute()`, `fetchone()`, `fetchmany()`, `fetchall()`, `close()`. New cursor features must follow the DB API 2.0 specification.
 
 ### Cursor Module Pattern
+
 Each cursor type lives in its own subpackage (`pandas/`, `arrow/`, `polars/`, `s3fs/`, `spark/`) with a consistent structure: `cursor.py`, `async_cursor.py`, `converter.py`, `result_set.py`. When adding features, consider impact on all cursor types.
 
 ### Filesystem (fsspec) Compatibility
+
 `pyathena/filesystem/s3.py` implements fsspec's `AbstractFileSystem`. When modifying:
+
 - Match `s3fs` library behavior where possible (users migrate from it)
 - Use `delimiter="/"` in S3 API calls to minimize requests
 - Handle edge cases: empty paths, trailing slashes, bucket-only paths
 
 ### Version Management
+
 Versions are derived from git tags via `hatch-vcs` — never edit `pyathena/_version.py` manually.
 
 ### Google-style Docstrings
+
 Use Google-style docstrings for public methods. See existing code for examples.
diff --git a/Makefile b/Makefile
@@ -13,29 +13,37 @@ lint:
 	uvx ruff@$(RUFF_VERSION) format --check .
 	uv run mypy .
 
-.PHONY: test
-test: lint
+.PHONY: test/pyathena
+test/pyathena: lint
 	uv run pytest -n 8 --cov pyathena --cov-report html --cov-report term tests/pyathena/
 
-.PHONY: test-sqla
-test-sqla:
+.PHONY: test/sqla
+test/sqla:
 	uv run pytest -n 8 --cov pyathena --cov-report html --cov-report term tests/sqlalchemy/
 
-.PHONY: test-sqla-async
-test-sqla-async:
+.PHONY: test/sqla-async
+test/sqla-async:
 	uv run pytest -n 8 --cov pyathena --cov-report html --cov-report term tests/sqlalchemy/ --dburi async
 
 .PHONY: tox
 tox:
 	uvx tox@$(TOX_VERSION) -c pyproject.toml run
 
-.PHONY: docs
-docs:
+.PHONY: docs/build
+docs/build:
 	uv run sphinx-multiversion docs docs/_build/html
 	echo '<meta http-equiv="refresh" content="0; url=./master/index.html">' > docs/_build/html/index.html
 	echo 'pyathena.dev' > docs/_build/html/CNAME
 	touch docs/_build/html/.nojekyll
 
+.PHONY: docs/lint
+docs/lint:
+	mise exec -- markdownlint-cli2
+
+.PHONY: docs/format
+docs/format:
+	mise exec -- markdownlint-cli2 --fix
+
 .PHONY: tool
 tool:
 	uv tool install ruff@$(RUFF_VERSION)
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ PyAthena is a Python [DB API 2.0 (PEP 249)](https://www.python.org/dev/peps/pep-
 
 ## Requirements
 
-* Python
+- Python
 
   - CPython 3.10, 3.11, 3.12, 3.13, 3.14
 
@@ -77,10 +77,10 @@ Many of the implementations in this library are based on [PyHive](https://github
 
 ## Links
 
-- Documentation: https://pyathena.dev/
-- PyPI Releases: https://pypi.org/project/PyAthena/
-- Source Code: https://github.com/pyathena-dev/PyAthena/
-- Issue Tracker: https://github.com/pyathena-dev/PyAthena/issues
+- Documentation: <https://pyathena.dev/>
+- PyPI Releases: <https://pypi.org/project/PyAthena/>
+- Source Code: <https://github.com/pyathena-dev/PyAthena/>
+- Issue Tracker: <https://github.com/pyathena-dev/PyAthena/issues>
 
 ## Logo
 
diff --git a/docs/cursor.md b/docs/cursor.md
@@ -293,7 +293,6 @@ cursor = connect(s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
                  region_name="us-west-2").cursor(cursor=AsyncDictCursor, dict_type=OrderedDict)
 ```
 
-
 ## AioCursor
 
 See {ref}`aio-cursor`.
diff --git a/docs/introduction.md b/docs/introduction.md
@@ -6,7 +6,7 @@
 
 ## Requirements
 
-* Python
+- Python
 
   - CPython 3.10, 3.11, 3.12, 3.13, 3.14
 
@@ -35,23 +35,26 @@ Extra packages:
 PyAthena provides comprehensive support for Amazon Athena's data types and features:
 
 **Core Features:**
-  - **DB API 2.0 Compliance**: Full PEP 249 compatibility for database operations
-  - **SQLAlchemy Integration**: Native dialect support with table reflection and ORM capabilities
-  - **Multiple Cursor Types**: Standard, Pandas, Arrow, Polars, S3FS and Spark cursor implementations
-  - **Async Support**: Asynchronous query execution for non-blocking operations
+
+- **DB API 2.0 Compliance**: Full PEP 249 compatibility for database operations
+- **SQLAlchemy Integration**: Native dialect support with table reflection and ORM capabilities
+- **Multiple Cursor Types**: Standard, Pandas, Arrow, Polars, S3FS and Spark cursor implementations
+- **Async Support**: Asynchronous query execution for non-blocking operations
 
 **Data Type Support:**
-  - **STRUCT/ROW Types**: {ref}`Complete support <sqlalchemy>` for complex nested data structures
-  - **ARRAY Types**: {ref}`Complete support <sqlalchemy>` for ordered collections with automatic Python list conversion
-  - **MAP Types**: {ref}`Complete support <sqlalchemy>` for key-value dictionary-like data structures
-  - **JSON Integration**: Seamless JSON data parsing and conversion
-  - **Performance Optimized**: Smart format detection for efficient data processing
+
+- **STRUCT/ROW Types**: {ref}`Complete support <sqlalchemy>` for complex nested data structures
+- **ARRAY Types**: {ref}`Complete support <sqlalchemy>` for ordered collections with automatic Python list conversion
+- **MAP Types**: {ref}`Complete support <sqlalchemy>` for key-value dictionary-like data structures
+- **JSON Integration**: Seamless JSON data parsing and conversion
+- **Performance Optimized**: Smart format detection for efficient data processing
 
 **Additional Features:**
-  - **Connection Management**: Efficient connection pooling and configuration
-  - **Result Caching**: Athena query result reuse capabilities
-  - **Error Handling**: Comprehensive exception handling and recovery
-  - **S3 Integration**: Direct S3 data access and staging support
+
+- **Connection Management**: Efficient connection pooling and configuration
+- **Result Caching**: Athena query result reuse capabilities
+- **Error Handling**: Comprehensive exception handling and recovery
+- **S3 Integration**: Direct S3 data access and staging support
 
 (license)=
 
diff --git a/docs/pandas.md b/docs/pandas.md
@@ -33,7 +33,7 @@ df = as_pandas(cursor)
 print(df.describe())
 ```
 
-If you want to use the query results output to S3 directly, you can use [PandasCursor](#pandas-cursor).
+If you want to use the query results output to S3 directly, you can use {ref}`pandas-cursor`.
 This cursor fetches query results faster than the default cursor. (See [benchmark results](https://github.com/pyathena-dev/PyAthena/tree/master/benchmarks).)
 
 (to-sql)=
@@ -392,7 +392,7 @@ for df in df_iter:
     print(df.head())
 ```
 
-**Memory-efficient iteration with iter_chunks()**
+#### Memory-efficient iteration with iter_chunks()
 
 PandasCursor provides an `iter_chunks()` method for convenient chunked processing:
 
@@ -456,7 +456,7 @@ df_iter.get_chunk(10)
 df_iter.get_chunk(10)  # raise StopIteration
 ```
 
-**Auto-optimization of chunksize**
+#### Auto-optimization of chunksize
 
 PandasCursor can automatically determine optimal chunksize based on result file size when enabled:
 
@@ -506,7 +506,7 @@ AthenaPandasResultSet.AUTO_CHUNK_SIZE_LARGE = 200_000  # Larger chunks
 AthenaPandasResultSet.AUTO_CHUNK_SIZE_MEDIUM = 100_000
 ```
 
-**Performance tuning options**
+#### Performance tuning options
 
 PandasCursor accepts additional pandas.read_csv() options for performance optimization:
 
@@ -829,4 +829,3 @@ async with await aio_connect(s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
     await cursor.execute("SELECT * FROM many_rows")
     df = cursor.as_pandas()
 ```
-
diff --git a/docs/polars.md b/docs/polars.md
@@ -649,4 +649,3 @@ async with await aio_connect(s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
     await cursor.execute("SELECT * FROM many_rows")
     df = cursor.as_polars()
 ```
-
diff --git a/docs/sqlalchemy.md b/docs/sqlalchemy.md
diff --git a/docs/testing.md b/docs/testing.md
diff --git a/docs/usage.md b/docs/usage.md
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`[tools]`
`2`	`2`	`python = "3.12"`
	`3`	`+"npm:markdownlint-cli2" = "0.18.1"`