Hoblovski · Hoblovski · Sep 30, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
@@ -13,12 +13,27 @@ jobs:
     runs-on: ubuntu-latest
     #if: "!contains(github.event.pull_request.title, '[NO-REGRESSION-TEST]')"
     env:
-      LANGS: "go rust python java typescript"
+      LANGS: "typescript"
+      DIFFJSON_IGNORE: "  ['id']   ['Path']  "
     steps:
+      - name: Checkout pull request code
+        uses: actions/checkout@v4
+        with:
+          path: 'pr_repo'
+
+      - name: Checkout main branch code
+        uses: actions/checkout@v4
+        with:
+          ref: 'main'
+          path: 'main_repo'
+
       - name: Setup Go environment
         uses: actions/setup-go@v5
         with:
           go-version: '1.22'
+          cache-dependency-path: |
+            main_repo/go.sum
+            pr_repo/go.sum
 
       - name: Setup Rust toolchain
         uses: dtolnay/rust-toolchain@stable
@@ -42,17 +57,6 @@ jobs:
         with:
           node-version: '22'
 
-      - name: Checkout pull request code
-        uses: actions/checkout@v4
-        with:
-          path: 'pr_repo'
-
-      - name: Checkout main branch code
-        uses: actions/checkout@v4
-        with:
-          ref: 'main'
-          path: 'main_repo'
-
       - name: Compile both binaries
         run: |
           (cd main_repo && go build -o ../abcoder_old)
@@ -61,25 +65,32 @@ jobs:
       - name: Install evaluation dependencies
         run: pip install -r ./pr_repo/script/requirements.txt
 
-      - name: Install LSPs
+      ##############################################################################
+      - name: Install dependencies for old
         run: |
-          OUTDIR=out_new ABCEXE=./abcoder_new ./pr_repo/script/run_testdata.sh first
-          # use the same JDTLS for consistency and to avoid wasting time installing a duplicate JDTLS
-          echo "JDTLS_ROOT_PATH=$(realpath ./pr_repo/lang/java/lsp/jdtls/jdt-language-server-*)" >> $GITHUB_ENV
+          # HACK: auto installation uses the published version, not our local version
+          (cd ./main_repo/ts-parser && npm install && npm run build && npm install -g .)
+          OUTDIR=out_old ABCEXE=./abcoder_old ./main_repo/script/run_testdata.sh first
+          # avoid wasting time install a new jdtls
+          echo "JDTLS_ROOT_PATH=$(realpath ./main_repo/lang/java/lsp/jdtls/jdt-language-server-*)" >> $GITHUB_ENV
 
       - name: Run OLD abcoder
         run:
-          OUTDIR=out_old ABCEXE=./abcoder_old ./pr_repo/script/run_testdata.sh all
+          OUTDIR=out_old ABCEXE=./abcoder_old ./main_repo/script/run_testdata.sh all
+
+      - name: Reset dependencies
+        run: |
+          npm uninstall -g abcoder-ts-parser
+
+      - name: Install dependencies for new
+        run: |
+          (cd ./pr_repo/ts-parser && npm install && npm run build && npm install -g .)
+          OUTDIR=out_new ABCEXE=./abcoder_new ./pr_repo/script/run_testdata.sh first
 
       - name: Run NEW abcoder
         run:
           OUTDIR=out_new ABCEXE=./abcoder_new ./pr_repo/script/run_testdata.sh all
 
-      - name: Compare outputs and check for regression
-        id: diff_check
-        run: ./pr_repo/script/diffjson.py out_old out_new || echo "failed=true" >> $GITHUB_OUTPUT
-        continue-on-error: true
-
       - name: Upload output directories
         uses: actions/upload-artifact@v4
         if: always()
@@ -90,6 +101,5 @@ jobs:
             out_new
           retention-days: 3
 
-      - name: Status check
-        if: steps.diff_check.outputs.failed == 'true'
-        run: exit 1
+      - name: Compare outputs and check for regression
+        run: ./pr_repo/script/diffjson.py out_old out_new $COMPARE_IGNORE
diff --git a/lang/python/spec.go b/lang/python/spec.go
@@ -104,10 +104,8 @@ func (c *PythonSpec) NameSpace(path string, file *uniast.File) (string, string,
 	}
 
 	for _, sysPath := range c.sysPaths {
-		log.Error("PythonSpec: path %s sysPath %s\n", path, sysPath)
 		if strings.HasPrefix(path, sysPath) {
 			relPath, err := filepath.Rel(sysPath, path)
-			log.Error("PythonSpec: matched relPath %s, sysPath %s\n", relPath, sysPath)
 			if err != nil {
 				return "", "", err
 			}

diff --git a/script/diffjson.py b/script/diffjson.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 import argparse
 import json
+import os
+import re
 import sys
 from pathlib import Path
 from typing import Literal
@@ -11,6 +13,56 @@
 Status = Literal["OK", "BAD", "FILE_ERROR"]
 
 
+def parse_accessor(accessor_string: str) -> list[str | int]:
+    """
+    Parses a field accessor string like "['key'][0]" into a list ['key', 0].
+    This allows for programmatic access to nested JSON elements.
+    """
+    # Regex to find content within brackets, e.g., ['key'] or [0]
+    parts = re.findall(r"\[([^\]]+)\]", accessor_string)
+    keys = []
+    for part in parts:
+        try:
+            # Try to convert to an integer for list indices
+            keys.append(int(part))
+        except ValueError:
+            # Otherwise, it's a string key; strip surrounding quotes
+            keys.append(part.strip("'\""))
+    return keys
+
+
+def delete_path(data: dict | list, path: list[str | int]):
+    """
+    Deletes a value from a nested dictionary or list based on a path.
+    This function modifies the data in place. If the path is invalid
+    or doesn't exist, it does nothing.
+    """
+    if not path:
+        return
+
+    # Traverse to the parent of the target element to delete it
+    parent = data
+    key_to_delete = path[-1]
+    path_to_parent = path[:-1]
+
+    try:
+        for key in path_to_parent:
+            parent = parent[key]
+
+        # Check if the final key/index exists in the parent before deleting
+        if isinstance(parent, dict) and key_to_delete in parent:
+            del parent[key_to_delete]
+        elif (
+            isinstance(parent, list)
+            and isinstance(key_to_delete, int)
+            and 0 <= key_to_delete < len(parent)
+        ):
+            del parent[key_to_delete]
+    except (KeyError, IndexError, TypeError):
+        # Path is invalid (e.g., key missing, index out of bounds). Ignore and proceed.
+        pass
+
+
 def format_diff_custom(diff: DeepDiff) -> str:
     """
     Formats a DeepDiff object into a custom human-readable string.
@@ -65,27 +117,39 @@ def format_value(value):
     return "\n".join(output)
 
 
-def compare_json_files(file1_path: Path, file2_path: Path) -> Status:
+def compare_json_files(
+    file1_path: Path, file2_path: Path, ignore_fields: list[str] | None = None
+) -> tuple[Status, DeepDiff | None]:
     """
-    Compares the content of two JSON files without printing output.
+    Compares two JSON files, optionally ignoring specified fields.
 
     Returns:
-        "OK" if they match, "BAD" if they don't, "FILE_ERROR" on read/parse error.
+        A tuple containing the status ("OK", "BAD", "FILE_ERROR")
+        and the DeepDiff object if differences were found.
     """
     try:
         with open(file1_path, "r", encoding="utf-8") as f1:
             json1 = json.load(f1)
         with open(file2_path, "r", encoding="utf-8") as f2:
             json2 = json.load(f2)
     except (FileNotFoundError, json.JSONDecodeError):
-        return "FILE_ERROR"
+        return "FILE_ERROR", None
+
+    # Delete ignored fields from both JSON objects before comparison
+    if ignore_fields:
+        for field_accessor in ignore_fields:
+            path = parse_accessor(field_accessor)
+            delete_path(json1, path)
+            delete_path(json2, path)
 
     diff = DeepDiff(json1, json2, ignore_order=True)
 
-    return "BAD" if diff else "OK"
+    return ("BAD", diff) if diff else ("OK", None)
 
 
-def process_directory_comparison(old_dir: Path, new_dir: Path) -> bool:
+def process_directory_comparison(
+    old_dir: Path, new_dir: Path, ignore_fields: list[str] | None = None
+) -> bool:
     """
     Compares JSON files across two directories and prints results in a list format.
     """
@@ -94,7 +158,9 @@ def process_directory_comparison(old_dir: Path, new_dir: Path) -> bool:
     new_files = {p.name for p in new_dir.glob("*.json")}
 
     for filename in sorted(old_files.intersection(new_files)):
-        status = compare_json_files(old_dir / filename, new_dir / filename)
+        status, _ = compare_json_files(
+            old_dir / filename, new_dir / filename, ignore_fields
+        )
         results["BAD" if status != "OK" else "OK"].append(filename)
 
     for filename in sorted(old_files - new_files):
@@ -125,8 +191,25 @@ def main():
     parser.add_argument(
         "path2", type=Path, help="Path to the second file or 'new' directory."
     )
+    parser.add_argument(
+        "-i",
+        "--ignore",
+        action="append",
+        default=[],
+        help="Field to ignore, as an accessor string. Can be used multiple times. "
+        "Also reads whitespace-separated values from $DIFFJSON_IGNORE. "
+        "Example: -i \"['metadata']['timestamp']\"",
+    )
     args = parser.parse_args()
 
+    # --- Combine ignore fields from CLI and environment variable ---
+    cli_ignore_fields = args.ignore
+    env_ignore_str = os.environ.get("DIFFJSON_IGNORE", "")
+    env_ignore_fields = env_ignore_str.split() if env_ignore_str else []
+
+    # Combine both sources and remove duplicates
+    all_ignore_fields = list(set(cli_ignore_fields + env_ignore_fields))
+
     path1, path2 = args.path1, args.path2
 
     if not path1.exists() or not path2.exists():
@@ -139,7 +222,7 @@ def main():
     # --- Handle Directory Comparison ---
     if path1.is_dir() and path2.is_dir():
         print(f"Comparing directories:\n- Old: {path1}\n- New: {path2}\n")
-        if process_directory_comparison(path1, path2):
+        if process_directory_comparison(path1, path2, all_ignore_fields):
             print("\nComparison finished with errors.", file=sys.stderr)
             return 1
         else:
@@ -148,23 +231,17 @@ def main():
 
     # --- Handle Single File Comparison ---
     elif path1.is_file() and path2.is_file():
-        try:
-            with open(path1, "r", encoding="utf-8") as f1:
-                json1 = json.load(f1)
-            with open(path2, "r", encoding="utf-8") as f2:
-                json2 = json.load(f2)
-        except (FileNotFoundError, json.JSONDecodeError) as e:
-            print(f"Error reading or parsing file: {e}", file=sys.stderr)
-            return 1
+        status, diff = compare_json_files(path1, path2, all_ignore_fields)
 
-        diff = DeepDiff(json1, json2, ignore_order=True)
+        if status == "FILE_ERROR":
+            print("Error reading or parsing a file.", file=sys.stderr)
+            return 1
 
-        if diff:
+        if status == "BAD" and diff:
             print(
                 f"Differences found between '{path1.name}' and '{path2.name}':\n",
                 file=sys.stderr,
             )
-            # Format the diff into a custom readable format and print to stderr
             custom_output = format_diff_custom(diff)
             print(custom_output, file=sys.stderr)
             return 1