Enable single doctest (opensearch-project#4130)

ykmr1224 · web-flow · commit d1a1519ce32f · 2025-08-26T11:31:16.000-07:00
Signed-off-by: Tomoyuki Morita &lt;moritato@amazon.com&gt;
diff --git a/DEVELOPER_GUIDE.rst b/DEVELOPER_GUIDE.rst
@@ -294,7 +294,7 @@ Most of the time you just need to run ./gradlew build which will make sure you p
    * - ./gradlew :integ-test:yamlRestTest
      - Run rest integration test.
    * - ./gradlew :doctest:doctest
-     - Run doctests
+     - Run doctests in docs folder. You can use ``-Pdocs=file1,file2`` to run specific file(s). See more info in `Documentation <#documentation>`_ section.
    * - ./gradlew build
      - Build plugin by run all tasks above (this takes time).
    * - ./gradlew pitest
@@ -466,6 +466,18 @@ Doctest
 
 Python doctest library makes our document executable which keeps it up-to-date to source code. The doc generator aforementioned served as scaffolding and generated many docs in short time. Now the examples inside is changed to doctest gradually. For more details please read `testing-doctest <./docs/dev/testing-doctest.md>`_.
 
+.. code-block:: bash
+   # Test all docs
+   ./gradlew :doctest:doctest
+
+   # Test single file using main doctest task
+   ./gradlew :doctest:doctest -Pdocs=search
+   
+   # Test multiple files at once
+   ./gradlew :doctest:doctest -Pdocs=search,fields,basics
+   
+   # With verbose output
+   ./gradlew :doctest:doctest -Pdocs=stats -Pverbose=true
 
 Backports
 >>>>>>>>>
diff --git a/doctest/build.gradle b/doctest/build.gradle
@@ -76,13 +76,44 @@ task startOpenSearch(type: SpawnProcessTask) {
 }
 
 task doctest(type: Exec, dependsOn: ['bootstrap']) {
-
-    commandLine "$projectDir/bin/test-docs"
+    description = 'Run doctest for all files, or specific files if provided'
+    
+    def docs = project.findProperty('docs')
+    def verbose = project.findProperty('verbose')
+    def endpoint = project.findProperty('endpoint')
+    
+    if (docs) {
+        // Single file or multiple files mode
+        def args = ['.venv/bin/python', 'test_docs.py']
+        
+        // Handle multiple files (comma-separated)
+        if (docs.contains(',')) {
+            args.addAll(docs.split(',').collect { it.trim() })
+        } else {
+            args.add(docs)
+        }
+        
+        if (verbose == 'true') {
+            args.add('--verbose')
+        }
+        if (endpoint) {
+            args.addAll(['--endpoint', endpoint])
+        }
+        
+        commandLine args
+    } else {
+        // Full test suite mode (original behavior)
+        commandLine "$projectDir/bin/test-docs"
+    }
 
     doLast {
         // remove the cloned sql-cli folder
         file("$projectDir/sql-cli").deleteDir()
-        println("Doctest Done")
+        if (docs) {
+            println("Single file doctest done")
+        } else {
+            println("Full doctest suite done")
+        }
     }
 }
 
diff --git a/doctest/test_docs.py b/doctest/test_docs.py
@@ -1,21 +1,23 @@
 # Copyright OpenSearch Contributors
 # SPDX-License-Identifier: Apache-2.0
 
+import argparse
 import doctest
+import json
 import os
 import os.path
-import zc.customdoctests
-import json
-import re
 import random
+import re
 import subprocess
+import sys
 import unittest
-import click
-
 from functools import partial
+
+import click
+import zc.customdoctests
+from opensearch_sql_cli.formatter import Formatter
 from opensearch_sql_cli.opensearch_connection import OpenSearchConnection
 from opensearch_sql_cli.utils import OutputSettings
-from opensearch_sql_cli.formatter import Formatter
 from opensearchpy import OpenSearch, helpers
 
 ENDPOINT = "http://localhost:9200"
@@ -244,3 +246,292 @@ def load_tests(loader, suite, ignore):
     random.shuffle(tests)
 
     return DocTests(tests)
+
+
+# Single file doctest functionality
+def find_doc_file(filename_or_path):
+    """Find documentation file by name or return the path if it's already a full path"""
+    # If it's already a full path that exists, return it
+    if os.path.exists(filename_or_path):
+        return filename_or_path
+    
+    # If it's just a filename, search for it in the docs directory
+    if not os.path.sep in filename_or_path:
+        try:
+            with open('../docs/category.json') as json_file:
+                category = json.load(json_file)
+            
+            # Search in all categories
+            all_docs = category['bash'] + category['ppl_cli'] + category['sql_cli']
+            
+            # Add .rst extension if not present
+            search_filename = filename_or_path
+            if not search_filename.endswith('.rst'):
+                search_filename += '.rst'
+            
+            # Find files that end with the given filename
+            matches = [doc for doc in all_docs if doc.endswith(search_filename)]
+            
+            if len(matches) == 1:
+                found_path = f"../docs/{matches[0]}"
+                print(f"Found: {found_path}")
+                return found_path
+            elif len(matches) > 1:
+                print(f"Multiple files found matching '{search_filename}':")
+                for match in matches:
+                    print(f"  ../docs/{match}")
+                print("Please specify the full path or a more specific filename.")
+                return None
+            else:
+                print(f"No documentation file found matching '{search_filename}'")
+                print("Use --list to see all available files")
+                return None
+                
+        except Exception as e:
+            print(f"Error searching for file: {e}")
+            return None
+    
+    # If it's a relative path, try to find it
+    if not filename_or_path.startswith('../docs/'):
+        potential_path = f"../docs/{filename_or_path}"
+        if os.path.exists(potential_path):
+            return potential_path
+    
+    return filename_or_path
+
+
+def determine_doc_type(file_path):
+    """Determine the type of documentation file based on category.json"""
+    try:
+        with open('../docs/category.json') as json_file:
+            category = json.load(json_file)
+        
+        # Convert absolute path to relative path from docs directory
+        rel_path = os.path.relpath(file_path, '../docs')
+        
+        if rel_path in category['bash']:
+            return 'bash'
+        elif rel_path in category['ppl_cli']:
+            return 'ppl_cli'
+        elif rel_path in category['sql_cli']:
+            return 'sql_cli'
+        else:
+            # Try to guess based on file path
+            if '/ppl/' in file_path:
+                return 'ppl_cli'
+            elif '/sql/' in file_path or '/dql/' in file_path:
+                return 'sql_cli'
+            else:
+                return 'bash'  # default fallback
+    except Exception as e:
+        print(f"Warning: Could not determine doc type from category.json: {e}")
+        # Fallback to path-based detection
+        if '/ppl/' in file_path:
+            return 'ppl_cli'
+        elif '/sql/' in file_path or '/dql/' in file_path:
+            return 'sql_cli'
+        else:
+            return 'bash'
+
+
+def run_single_doctest(file_path, verbose=False, endpoint=None):
+    """Run doctest for a single documentation file"""
+    
+    if not os.path.exists(file_path):
+        print(f"Error: File {file_path} does not exist")
+        return False
+    
+    # Update endpoint if provided
+    if endpoint:
+        global ENDPOINT
+        ENDPOINT = endpoint
+        print(f"Using custom endpoint: {endpoint}")
+    
+    doc_type = determine_doc_type(file_path)
+    print(f"Detected doc type: {doc_type}")
+    print(f"Running doctest for: {file_path}")
+    
+    # Configure doctest options
+    optionflags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
+    if verbose:
+        optionflags |= doctest.REPORT_NDIFF
+    
+    # Choose appropriate parser and setup based on doc type
+    if doc_type == 'bash':
+        parser = bash_parser
+        setup_func = set_up_test_indices
+        globs = {
+            'sh': partial(
+                subprocess.run,
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                timeout=60,
+                shell=True
+            ),
+            'pretty_print': pretty_print
+        }
+    elif doc_type == 'ppl_cli':
+        parser = ppl_cli_parser
+        setup_func = set_up_test_indices
+        globs = {}
+    else:  # sql_cli
+        parser = sql_cli_parser
+        setup_func = set_up_test_indices
+        globs = {}
+    
+    try:
+        print("Setting up test environment...")
+        
+        # Create and run the doctest suite
+        suite = doctest.DocFileSuite(
+            file_path,
+            parser=parser,
+            setUp=setup_func,
+            tearDown=tear_down,
+            optionflags=optionflags,
+            encoding='utf-8',
+            globs=globs
+        )
+        
+        # Run the test
+        runner = unittest.TextTestRunner(verbosity=2 if verbose else 1)
+        result = runner.run(suite)
+        
+        # Print summary
+        if result.wasSuccessful():
+            print(f"\nSUCCESS: All tests in {os.path.basename(file_path)} passed!")
+            print(f"Tests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")
+            return True
+        else:
+            print(f"\nFAILED: {len(result.failures + result.errors)} test(s) failed in {os.path.basename(file_path)}")
+            print(f"Tests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")
+            
+            if verbose:
+                print("\nDetailed failure information:")
+                for failure in result.failures:
+                    print(f"\n--- FAILURE in {failure[0]} ---")
+                    print(failure[1])
+                for error in result.errors:
+                    print(f"\n--- ERROR in {error[0]} ---")
+                    print(error[1])
+            else:
+                print("Use --verbose for detailed failure information")
+            
+            return False
+            
+    except Exception as e:
+        print(f"Error running doctest: {e}")
+        if verbose:
+            import traceback
+            traceback.print_exc()
+        return False
+
+
+def list_available_docs():
+    """List all available documentation files that can be tested"""
+    try:
+        with open('../docs/category.json') as json_file:
+            category = json.load(json_file)
+        
+        print("Available documentation files for testing:")
+        print(f"\nBash-based docs ({len(category['bash'])} files):")
+        for doc in sorted(category['bash']):
+            print(f"  ../docs/{doc}")
+        
+        print(f"\nPPL CLI docs ({len(category['ppl_cli'])} files):")
+        for doc in sorted(category['ppl_cli']):
+            print(f"  ../docs/{doc}")
+        
+        print(f"\nSQL CLI docs ({len(category['sql_cli'])} files):")
+        for doc in sorted(category['sql_cli']):
+            print(f"  ../docs/{doc}")
+        
+        total_docs = len(category['bash']) + len(category['ppl_cli']) + len(category['sql_cli'])
+        print(f"\nTotal: {total_docs} documentation files available for testing")
+            
+    except Exception as e:
+        print(f"Error reading category.json: {e}")
+
+
+def main():
+    """Main entry point for single file testing"""
+    parser = argparse.ArgumentParser(
+        description="Run doctest for one or more documentation files, or all files if no arguments provided",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python test_docs.py                    # Run all tests (default behavior)
+  python test_docs.py stats              # Run single file (extension optional)
+  python test_docs.py stats.rst --verbose
+  python test_docs.py stats fields basics
+  python test_docs.py ../docs/user/ppl/cmd/stats.rst --endpoint http://localhost:9201
+
+Performance Tips:
+  - Use --verbose for detailed debugging information
+  - Ensure OpenSearch is running on the specified endpoint before testing
+  - Extension .rst can be omitted for convenience
+        """
+    )
+    
+    parser.add_argument('file_paths', nargs='*', help='Path(s) to the documentation file(s) to test')
+    parser.add_argument('--verbose', '-v', action='store_true', 
+                       help='Enable verbose output with detailed diff information')
+    parser.add_argument('--endpoint', '-e', default=None,
+                       help='Custom OpenSearch endpoint (default: http://localhost:9200)')
+    parser.add_argument('--list', '-l', action='store_true',
+                       help='List all available documentation files')
+    
+    args = parser.parse_args()
+    
+    if args.list:
+        list_available_docs()
+        return
+    
+    # If no file paths provided, run the default unittest behavior
+    if not args.file_paths:
+        print("No specific files provided. Running full doctest suite...")
+        # Run the standard unittest discovery
+        unittest.main(module=None, argv=['test_docs.py'], exit=False)
+        return
+    
+    # Single file testing mode
+    all_success = True
+    total_files = len(args.file_paths)
+    
+    for i, file_path in enumerate(args.file_paths, 1):
+        if total_files > 1:
+            print(f"\n{'='*60}")
+            print(f"Testing file {i}/{total_files}: {file_path}")
+            print('='*60)
+        
+        # Find the actual file path (handles both full paths and just filenames)
+        actual_file_path = find_doc_file(file_path)
+        if not actual_file_path:
+            print(f"Skipping {file_path} - file not found")
+            all_success = False
+            continue
+        
+        success = run_single_doctest(
+            actual_file_path, 
+            verbose=args.verbose,
+            endpoint=args.endpoint
+        )
+        
+        if not success:
+            all_success = False
+    
+    if total_files > 1:
+        print(f"\n{'='*60}")
+        print(f"SUMMARY: Tested {total_files} files")
+        if all_success:
+            print("All tests passed!")
+        else:
+            print("Some tests failed!")
+        print('='*60)
+    
+    sys.exit(0 if all_success else 1)
+
+
+if __name__ == '__main__':
+    main()