QData · qiyanjun · Apr 17, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/.github/workflows/check-formatting.yml b/.github/workflows/check-formatting.yml
@@ -1,6 +1,3 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
-
 name: Formatting with black & isort
 
 on:
@@ -10,25 +7,21 @@ on:
     branches: [ master ]
 
 jobs:
-  build:
-
+  lint:
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: [3.9]
 
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
       with:
-        python-version: ${{ matrix.python-version }}
+        python-version: "3.9"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip setuptools wheel
-        python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537
-        pip install -e .[dev]
-        pip install black flake8 isort --upgrade # Testing packages
+        python setup.py install_egg_info
+        pip install "click<8.1.0"
+        pip install -e .[test]
     - name: Check code format with black and isort
       run: |
         make lint
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
@@ -1,21 +1,9 @@
-# For most projects, this workflow file will not need changing; you simply need
-# to commit it to your repository.
-#
-# You may wish to alter this file to override the set of languages analyzed,
-# or to provide custom queries or build logic.
-#
-# ******** NOTE ********
-# We have attempted to detect the languages in your repository. Please check
-# the `language` matrix defined below to confirm you have the correct set of
-# supported CodeQL languages.
-#
 name: "CodeQL"
 
 on:
   push:
     branches: [ master, master* ]
   pull_request:
-    # The branches below must be a subset of the branches above
     branches: [ master ]
   schedule:
     - cron: '24 1 * * 0'
@@ -29,39 +17,18 @@ jobs:
       fail-fast: false
       matrix:
         language: [ 'python' ]
-        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
-        # Learn more:
-        # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
 
-    # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ matrix.language }}
-        # If you wish to specify custom queries, you can do so here or in a config file.
-        # By default, queries listed here will override any specified in a config file.
-        # Prefix the list here with "+" to use these queries and those in the config file.
-        # queries: ./path/to/local/query, your-org/your-repo/queries@main
 
-    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
-    # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v1
-
-    # ℹ️ Command-line programs to run using the OS shell.
-    # 📚 https://git.io/JvXDl
-
-    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
-    #    and modify them (or add more) to build your code if your project
-    #    uses a compiled language
-
-    #- run: |
-    #   make bootstrap
-    #   make release
+      uses: github/codeql-action/autobuild@v3
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1
+      uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/make-docs.yml b/.github/workflows/make-docs.yml
@@ -1,6 +1,3 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
-
 name: Build documentation with Sphinx
 
 on:
@@ -10,29 +7,23 @@ on:
     branches: [ master ]
 
 jobs:
-  build:
-
+  docs:
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: [3.8]
 
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
       with:
-        python-version: ${{ matrix.python-version }}
+        python-version: "3.11"
     - name: Install dependencies
       run: |
-        sudo sed -i 's/azure\.//' /etc/apt/sources.list # workaround for flaky pandoc install
-        sudo apt-get update # from here https://github.com/actions/virtual-environments/issues/675
-        sudo apt-get install pandoc -o Acquire::Retries=3 # install pandoc
-        python -m pip install --upgrade pip setuptools wheel # update python
-        pip install ipython --upgrade # needed for Github for whatever reason
-        python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537
-        pip install -e .[dev]
-        pip install jupyter 'ipykernel<5.0.0' 'ipython<7.0.0' # ipykernel workaround: github.com/jupyter/notebook/issues/4050
+        sudo apt-get update
+        sudo apt-get install pandoc -o Acquire::Retries=3
+        python -m pip install --upgrade pip setuptools wheel
+        python setup.py install_egg_info
+        pip install -e .[docs]
+        pip install jupyter ipykernel
     - name: Build docs with Sphinx and check for errors
       run: |
         sphinx-build -b html docs docs/_build/html
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
@@ -1,6 +1,3 @@
-# This workflows will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
 name: Upload Python Package to PyPI
 
 on:
@@ -9,19 +6,17 @@ on:
 
 jobs:
   deploy:
-
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v5
       with:
-        python-version: '3.x'
+        python-version: "3.11"
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip setuptools wheel
-        pip install setuptools wheel twine
+        python -m pip install --upgrade pip setuptools wheel twine
     - name: Build and publish
       env:
         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}

diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
@@ -1,6 +1,3 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
-
 name: Test with PyTest
 
 on:
@@ -10,48 +7,34 @@ on:
     branches: [ master ]
 
 jobs:
-  build:
-
+  test:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8, 3.9]
+        python-version: ["3.10", "3.11"]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip setuptools wheel
-        pip install pytest pytest-xdist # Testing packages
-        pip uninstall textattack --yes # Remove TA if it's already installed 
-        python setup.py install_egg_info # Workaround https://github.com/pypa/pip/issues/4537
-        pip install -e .[dev]
+        pip install pytest pytest-xdist
+        pip uninstall textattack --yes
+        python setup.py install_egg_info
+        pip install -e .[test]
         pip freeze
+    - name: Download NLTK data
+      run: |
+        python -c "import nltk; nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('stopwords'); nltk.download('omw-1.4'); nltk.download('wordnet')"
     - name: Free disk space
       run: |
-        sudo apt-get remove mysql-client libmysqlclient-dev -y >/dev/null 2>&1
-        sudo apt-get remove php* -y >/dev/null 2>&1
-        sudo apt-get autoremove -y >/dev/null 2>&1
-        sudo apt-get autoclean -y >/dev/null 2>&1
         sudo rm -rf /usr/local/lib/android >/dev/null 2>&1
-        docker rmi $(docker image ls -aq) >/dev/null 2>&1
+        sudo rm -rf /usr/share/dotnet >/dev/null 2>&1
         df -h
-    - name: Increase swap space
-      run: |
-        swapon --show
-        export SWAP_FILE=$(swapon --show=NAME | tail -n 1)
-        sudo swapoff $SWAP_FILE
-        sudo dd if=/dev/zero of=$SWAP_FILE bs=1M count=8k oflag=append conv=notrunc # Increase by 8GB
-        sudo chmod 0600 $SWAP_FILE
-        sudo mkswap $SWAP_FILE
-        sudo swapon $SWAP_FILE
-        swapon --show
     - name: Test with pytest
       run: |
-        echo "skipping tests!"
-        # pytest tests -v
-
+        pytest tests -v
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,101 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+TextAttack (v0.3.10) is a Python framework for adversarial attacks, data augmentation, and model training in NLP. It provides a modular system where attacks are composed of four pluggable components: goal functions, constraints, transformations, and search methods. The project is maintained by UVA QData Lab.
+
+## Common Commands
+
+### Installation (dev mode)
+```bash
+pip install -e .[dev]
+```
+
+### Testing
+```bash
+make test                    # Run full test suite (pytest --dist=loadfile -n auto)
+pytest tests -v              # Verbose test run
+pytest tests/test_augment_api.py  # Run a single test file
+pytest --lf                  # Re-run only last failed tests
+```
+
+### Formatting & Linting
+```bash
+make format    # Auto-format with black, isort, docformatter
+make lint      # Check formatting (black --check, isort --check-only, flake8)
+```
+
+### Building Docs
+```bash
+make docs       # Build HTML docs with Sphinx
+make docs-auto  # Hot-reload docs server on port 8765
+```
+
+### CLI Usage
+```bash
+textattack attack --recipe textfooler --model bert-base-uncased-mr --num-examples 100
+textattack augment --input-csv examples.csv --output-csv output.csv --input-column text --recipe embedding
+textattack train --model-name-or-path lstm --dataset yelp_polarity --epochs 50
+textattack list attack-recipes
+textattack peek-dataset --dataset-from-huggingface snli
+```
+
+## Architecture
+
+### Core Attack Pipeline (`textattack/attack.py`, `textattack/attacker.py`)
+
+An `Attack` is composed of exactly four components:
+1. **GoalFunction** (`textattack/goal_functions/`) - Determines if an attack succeeded. Categories: `classification/` (untargeted, targeted), `text/` (BLEU, translation overlap), `custom/`.
+2. **Constraints** (`textattack/constraints/`) - Filter invalid perturbations. Categories: `semantics/` (sentence encoders, word embeddings), `grammaticality/` (POS, language models, grammar tools), `overlap/` (edit distance, BLEU), `pre_transformation/` (restrict search space before transforming).
+3. **Transformation** (`textattack/transformations/`) - Generate candidate perturbations. Types: `word_swaps/` (embedding, gradient, homoglyph, WordNet), `word_insertions/`, `word_merges/`, `sentence_transformations/`, `WordDeletion`, `CompositeTransformation`.
+4. **SearchMethod** (`textattack/search_methods/`) - Traverse the perturbation space. Includes: `BeamSearch`, `GreedySearch`, `GreedyWordSwapWIR`, `GeneticAlgorithm`, `ParticleSwarmOptimization`, `DifferentialEvolution`.
+
+The `Attacker` class orchestrates running attacks on datasets with parallel processing, checkpointing, and logging.
+
+### Attack Recipes (`textattack/attack_recipes/`)
+
+Pre-built attack configurations from the literature (e.g., TextFooler, DeepWordBug, BAE, BERT-Attack, CLARE, CheckList, etc.). Each recipe subclasses `AttackRecipe` and implements a `build(model_wrapper)` classmethod that returns a configured `Attack` object. Includes multi-lingual recipes for French, Spanish, and Chinese.
+
+### Key Abstractions
+
+- **`AttackedText`** (`textattack/shared/attacked_text.py`) - Central text representation that maintains both token list and original text with punctuation. Used throughout the pipeline instead of raw strings.
+- **`ModelWrapper`** (`textattack/models/wrappers/`) - Abstract interface for models. Implementations for PyTorch, HuggingFace, TensorFlow, sklearn. Models must accept string input and return predictions.
+- **`Dataset`** (`textattack/datasets/`) - Iterable of `(input, output)` pairs. Supports HuggingFace datasets and custom files.
+- **`Augmenter`** (`textattack/augmentation/`) - Uses transformations and constraints for data augmentation (not adversarial attacks). Built-in recipes: wordnet, embedding, charswap, eda, checklist, clare, back_trans.
+- **`PromptAugmentationPipeline`** (`textattack/prompt_augmentation/`) - Augments prompts and generates LLM responses.
+- **LLM Wrappers** (`textattack/llms/`) - Wrappers for using LLMs (HuggingFace, ChatGPT) with prompt augmentation.
+
+### CLI Commands (`textattack/commands/`)
+
+Entry point: `textattack/commands/textattack_cli.py`. Each command (attack, augment, train, eval-model, list, peek-dataset, benchmark-recipe, attack-resume) is a subclass of `TextAttackCommand` with `register_subcommand()` and `run()` methods.
+
+### Configuration
+
+- Version tracked in `docs/conf.py` (imported by `setup.py`)
+- Cache directory: `~/.cache/textattack/` (override with `TA_CACHE_DIR` env var)
+- Formatting: black (line length 88), isort (skip `__init__.py`), flake8 (ignores: E203, E266, E501, W503, D203)
+
+### CI Workflows (`.github/workflows/`)
+
+- `check-formatting.yml` - Runs `make lint` on Python 3.9
+- `run-pytest.yml` - Sets up Python 3.8/3.9 (pytest currently skipped in CI)
+- `publish-to-pypi.yml` - PyPI publishing
+- `make-docs.yml` - Documentation build
+- `codeql-analysis.yml` - Security analysis
+
+### Test Structure
+
+Tests are in `tests/` organized by feature:
+- `test_command_line/` - CLI command integration tests (attack, augment, train, eval, list, loggers)
+- `test_constraints/` - Constraint unit tests
+- `test_augment_api.py`, `test_transformations.py`, `test_attacked_text.py`, `test_tokenizers.py`, `test_word_embedding.py`, `test_metric_api.py`, `test_prompt_augmentation.py`
+- `test_command_line/update_test_outputs.py` - Script to regenerate expected test outputs
+
+### Adding New Components
+
+- **Attack recipe**: Subclass `AttackRecipe` in `textattack/attack_recipes/`, implement `build(model_wrapper)`, add import to `__init__.py`, add doc reference in `docs/attack_recipes.rst`.
+- **Transformation**: Subclass `Transformation` in appropriate subfolder under `textattack/transformations/`.
+- **Constraint**: Subclass `Constraint` or `PreTransformationConstraint` in appropriate subfolder under `textattack/constraints/`.
+- **Search method**: Subclass `SearchMethod` in `textattack/search_methods/`.