diff --git a/.github/scripts/unicode_conversion.py b/.github/scripts/unicode_conversion.py index 1a37e27b..77d2a1b2 100644 --- a/.github/scripts/unicode_conversion.py +++ b/.github/scripts/unicode_conversion.py @@ -45,7 +45,7 @@ def convert_module(module_path): logging.info(f'Converting {module_path}') result = subprocess.run( - [tlauc_path, module_path] + (['--ascii'] if to_ascii else []), + [tlauc_path, module_path, '--skip', 'numsets'] + (['--ascii'] if to_ascii else []), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True diff --git a/.github/scripts/unicode_number_set_shim.py b/.github/scripts/unicode_number_set_shim.py deleted file mode 100644 index 0e959f08..00000000 --- a/.github/scripts/unicode_number_set_shim.py +++ /dev/null @@ -1,145 +0,0 @@ -""" -While Unicode support in the Java tools goes through a trial period, the core -Naturals/Integers/Reals modules will remain Unicode-free. So, the Unicode -number sets ℕ, ℤ, and ℝ must be defined in any module that wishes to use -them. This script iterates through all modules in the manifests and replaces -their imports of the Naturals/Integers/Reals modules with shims containing -a definition of the Unicode number sets. -""" - -from argparse import ArgumentParser -from dataclasses import dataclass -import logging -from os.path import dirname, normpath, join -import tla_utils -from typing import List -import tree_sitter_tlaplus -from tree_sitter import Language, Parser - -logging.basicConfig(level=logging.INFO) - -@dataclass(frozen=True) -class NumberSetShim: - module : str - ascii : str - unicode : str - capture : str - imports : List[str] - -def shim_module_name(shim_module): - """ - The name of the shim module. - """ - return f'{shim_module}_UnicodeShim' - -shims = [ - NumberSetShim('Naturals', 'Nat', 'ℕ', 'nat', []), - NumberSetShim('Integers', 'Int', 'ℤ', 'int', [shim_module_name('Naturals')]), - NumberSetShim('Reals', 'Real', 'ℝ', 'real', [shim_module_name('Integers')]) -] - -def build_shim_module(shim): - """ - Derives the contents of a shim module. - """ - module_name = shim_module_name(shim.module) - imports = ', '.join(shim.imports + [shim.module]) - return f'---- MODULE {module_name} ----\nEXTENDS {imports}\n{shim.unicode} ≜ {shim.ascii}\n====' - -def create_shim_module(module_dir, shim): - """ - Creates a shim module for the given shim. - """ - shim_path = join(module_dir, f'{shim_module_name(shim.module)}.tla') - with open(shim_path, 'w', encoding='utf-8') as module: - module.write(build_shim_module(shim)) - -def create_shim_modules(examples_root, module_path): - """ - Creates shim modules in the same directory as the module so they are - automatically imported. Since this creates quite a few .tla files, they - can be easily deleted with find -iname *_UnicodeShim.tla -delete. - """ - module_path = tla_utils.from_cwd(examples_root, module_path) - module_dir = dirname(module_path) - for shim in shims: - create_shim_module(module_dir, shim) - -def build_imports_query(language): - """ - Builds query to get import locations for shim insertion. - """ - queries = [ - '(extends (identifier_ref) @import)', - '(instance (identifier_ref) @import)' - ] - return language.query(' '.join(queries)) - -def replace_with_shim(module_bytes, node, byte_offset, shim): - """ - Replace the text covered by the given parse tree node with a reference to - a shim module. - """ - source_len = node.byte_range[1] - node.byte_range[0] - target = bytes(shim_module_name(shim.module), 'utf-8') - target_len = len(target) - module_bytes[node.byte_range[0]+byte_offset:node.byte_range[1]+byte_offset] = target - return byte_offset + target_len - source_len - -def replace_imports(module_bytes, tree, query): - """ - Replaces imports with unicode shim version. - """ - shim_modules = {shim.module : shim for shim in shims} - imported_modules = [ - (imported_module, shim_modules[module_name]) - for imported_module in tla_utils.all_nodes_of(query.captures(tree.root_node)) - if (module_name := tla_utils.node_to_string(module_bytes, imported_module)) in shim_modules - ] - byte_offset = 0 - for imported_module, shim in imported_modules: - byte_offset = replace_with_shim(module_bytes, imported_module, byte_offset, shim) - -def write_module(examples_root, module_path, module_bytes): - """ - Overwrites a module with the given bytes. - """ - module_path = tla_utils.from_cwd(examples_root, module_path) - with open(module_path, 'wb') as module: - module.write(module_bytes) - -if __name__ == '__main__': - parser = ArgumentParser(description='Adds ℕ/ℤ/ℝ Unicode number set shim definitions to modules as needed.') - parser.add_argument('--examples_root', help='Root directory of the tlaplus/examples repository', required=True) - parser.add_argument('--skip', nargs='+', help='Space-separated list of .tla modules to skip', required=False, default=[]) - parser.add_argument('--only', nargs='+', help='If provided, only modify models in this space-separated list', required=False, default=[]) - args = parser.parse_args() - - examples_root = args.examples_root - manifest = tla_utils.load_all_manifests(examples_root) - skip_modules = args.skip - only_modules = args.only - - TLAPLUS_LANGUAGE = Language(tree_sitter_tlaplus.language()) - parser = Parser(TLAPLUS_LANGUAGE) - imports_query = build_imports_query(TLAPLUS_LANGUAGE) - - modules = [ - module['path'] - for spec in manifest - for module in spec['modules'] - if module['path'] not in skip_modules - and (only_modules == [] or module['path'] in only_modules) - ] - - for module_path in modules: - logging.info(f'Processing {module_path}') - tree, module_bytes, parse_failure = tla_utils.parse_module(examples_root, parser, module_path) - if parse_failure: - logging.error(f'Failed to parse {module_path}') - exit(1) - module_bytes = bytearray(module_bytes) - replace_imports(module_bytes, tree, imports_query) - write_module(examples_root, module_path, module_bytes) - create_shim_modules(examples_root, module_path) - diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index de05abf0..6d696a08 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -67,11 +67,6 @@ jobs: python "$SCRIPT_DIR/unicode_conversion.py" \ --tlauc_path "$DEPS_DIR/tlauc/tlauc" \ --examples_root . - - name: Add unicode shims - if: matrix.unicode - run: | - python "$SCRIPT_DIR/unicode_number_set_shim.py" \ - --examples_root . - name: Translate PlusCal # PlusCal translations will be reverted at the end of this step, # since we want to support people manually editing the generated TLA+ diff --git a/DEVELOPING.md b/DEVELOPING.md index 0a063d5a..7b1d083b 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -15,9 +15,6 @@ Here is a brief overview of each script in the order they are run in [the CI](.g 1. [`check_markdown_table.py`](.github/scripts/check_markdown_table.py): this uses the [mistletoe](https://pypi.org/project/mistletoe/) markdown parser python package to parse [`README.md`](README.md), extract the spec table, then validate its format & contents against each `manifest.json`; before this script was developed the table tended to diverge wildly from the actual content of this repo. 1. [`unicode_conversion.py`](.github/scripts/unicode_conversion.py): this script uses [tlauc](https://github.com/tlaplus-community/tlauc) to convert each spec into its Unicode form to ensure TLA⁺ tooling functions identically on ASCII & Unicode specs. The CI spawns separate runs with and without conversion of specs to Unicode. -1. [`unicode_number_set_shim.py`](.github/scripts/unicode_number_set_shim.py): since Unicode adoption is not yet fully ratified, Unicode definitions like `ℕ`, `ℤ`, and `ℝ` (as synonyms for `Nat`, `Int`, and `Real` respectively) are not yet included in the standard modules. - This script works around that by using the [tree-sitter-tlaplus](https://pypi.org/project/tree-sitter-tlaplus/) python package to rewrite all imports of the `Naturals`, `Integers`, and `Reals` modules to import shim modules instead that define `ℕ`, `ℤ`, and `ℝ`. - Once TLA⁺ fully adopts Unicode this (quite hackish) script will no longer be necessary. 1. [`translate_pluscal.py`](.github/scripts/translate_pluscal.py): this script runs the PlusCal translator on all modules containing PlusCal, to ensure their PlusCal syntax is valid. 1. [`parse_modules.py`](.github/scripts/parse_modules.py): this script runs the SANY parser against all `.tla` files in the repository to ensure they are syntactically & semantically valid. This can get quite complicated as many modules import specs defined in Apalache, TLAPM, or the community modules. @@ -69,14 +66,10 @@ python .github/scripts/check_manifest_features.py --examples_root . ```sh python .github/scripts/check_markdown_table.py --readme_path README.md ``` -**WARNING:** the `unicode_conversion.py`, `unicode_number_set_shim.py`, and `translate_pluscal.py` scripts make large numbers of changes to files in your local repository, so be sure to run them on a clean branch where your own changes can't be clobbered and you can easily revert with `git reset --hard HEAD`. +**WARNING:** the `unicode_conversion.py` and `translate_pluscal.py` scripts make large numbers of changes to files in your local repository, so be sure to run them on a clean branch where your own changes can't be clobbered and you can easily revert with `git reset --hard HEAD`. ```sh python .github/scripts/unicode_conversion.py --tlauc_path deps/tlauc/tlauc --examples_root . ``` -Delete all the shim files generated by `unicode_number_set_shim.py` with `find . -iname "*_UnicodeShim.tla" -delete`. -```sh -python .github/scripts/unicode_number_set_shim.py --examples_root . -``` ```sh python .github/scripts/translate_pluscal.py --tools_jar_path deps/tools/tla2tools.jar --examples_root . ```