Skip to content

Commit a51c83d

Browse files
mpcabdclaude
andcommitted
Modernise packaging and clean up Python 2 residue
- Replace setup.py with pyproject.toml (hatchling build backend, uv-native) - Add uv.lock and dependency-groups for dev dependencies (pytest, pytest-cov) - Drop tox.ini in favour of direct `uv run pytest` - Replace .travis.yml with GitHub Actions CI (uv, Python 3.10–3.13 matrix) - Drop Python <3.10 support (3.6–3.9 all EOL) - Remove (object) base class and super() call on ArabicReshaper - Switch _ligatures_re from @Property with hidden mutation to @cached_property - Add -> str type hint and sentinel comment to reshape() - Add ArabicReshaperConfigurationError(ValueError); replace bare Exception raises - Add docstring and ImportError to config_for_true_type_font() - Fix LETTERS -> letters parameter naming in letters.py helper functions - Add comment explaining the RIAL SIGN regex pattern in ligatures.py - Remove unused import os from __init__.py; export ArabicReshaperConfigurationError - Remove debug print() from test_003_reshaping.py Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent c12b08f commit a51c83d

11 files changed

Lines changed: 500 additions & 106 deletions

File tree

.github/workflows/ci.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [master]
6+
pull_request:
7+
branches: [master]
8+
9+
jobs:
10+
test:
11+
runs-on: ubuntu-latest
12+
strategy:
13+
fail-fast: false
14+
matrix:
15+
python-version: ["3.10", "3.11", "3.12", "3.13"]
16+
17+
steps:
18+
- uses: actions/checkout@v4
19+
20+
- name: Install uv
21+
uses: astral-sh/setup-uv@v5
22+
with:
23+
enable-cache: true
24+
25+
- name: Set up Python ${{ matrix.python-version }}
26+
run: uv python install ${{ matrix.python-version }}
27+
28+
- name: Install dependencies
29+
run: uv sync --group dev
30+
31+
- name: Run tests
32+
run: uv run pytest --cov=arabic_reshaper --cov-report=term-missing

arabic_reshaper/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
import os
2-
31
from .arabic_reshaper import reshape, default_reshaper, ArabicReshaper
42
from .reshaper_config import (config_for_true_type_font,
3+
ArabicReshaperConfigurationError,
54
ENABLE_NO_LIGATURES,
65
ENABLE_SENTENCES_LIGATURES,
76
ENABLE_WORDS_LIGATURES,

arabic_reshaper/arabic_reshaper.py

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import re
1111

12+
from functools import cached_property
1213
from itertools import repeat
1314

1415
from .ligatures import LIGATURES
@@ -35,7 +36,7 @@
3536
)
3637

3738

38-
class ArabicReshaper(object):
39+
class ArabicReshaper:
3940
"""
4041
A class for Arabic reshaper, it allows for fine-tune configuration over the
4142
API.
@@ -56,7 +57,7 @@ class ArabicReshaper(object):
5657
"""
5758

5859
def __init__(self, configuration=None, configuration_file=None):
59-
super(ArabicReshaper, self).__init__()
60+
super().__init__()
6061

6162
self.configuration = auto_config(configuration, configuration_file)
6263
self.language = self.configuration.get('language')
@@ -68,33 +69,25 @@ def __init__(self, configuration=None, configuration_file=None):
6869
else:
6970
self.letters = LETTERS_ARABIC
7071

71-
@property
72+
@cached_property
7273
def _ligatures_re(self):
73-
if not hasattr(self, '__ligatures_re'):
74-
patterns = []
75-
re_group_index_to_ligature_forms = {}
76-
index = 0
77-
FORMS = 1
78-
MATCH = 0
79-
for ligature_record in LIGATURES:
80-
ligature, replacement = ligature_record
81-
if not self.configuration.getboolean(ligature):
82-
continue
83-
re_group_index_to_ligature_forms[index] = replacement[FORMS]
84-
patterns.append('({})'.format(replacement[MATCH]))
85-
index += 1
86-
self._re_group_index_to_ligature_forms = (
87-
re_group_index_to_ligature_forms
88-
)
89-
self.__ligatures_re = re.compile('|'.join(patterns), re.UNICODE)
90-
return self.__ligatures_re
74+
patterns = []
75+
self._re_group_index_to_ligature_forms = {}
76+
index = 0
77+
FORMS = 1
78+
MATCH = 0
79+
for ligature, replacement in LIGATURES:
80+
if not self.configuration.getboolean(ligature):
81+
continue
82+
self._re_group_index_to_ligature_forms[index] = replacement[FORMS]
83+
patterns.append(f'({replacement[MATCH]})')
84+
index += 1
85+
return re.compile('|'.join(patterns), re.UNICODE)
9186

9287
def _get_ligature_forms_from_re_group_index(self, group_index):
93-
if not hasattr(self, '_re_group_index_to_ligature_forms'):
94-
return self._ligatures_re
9588
return self._re_group_index_to_ligature_forms[group_index]
9689

97-
def reshape(self, text):
90+
def reshape(self, text: str) -> str:
9891
if not text:
9992
return ''
10093

@@ -217,6 +210,8 @@ def reshape(self, text):
217210
if not forms[ligature_form]:
218211
continue
219212
output[a] = (forms[ligature_form], NOT_SUPPORTED)
213+
# Pad the replaced positions with empty sentinels so that
214+
# Harakat position indices remain aligned with the output list.
220215
output[a+1:b] = repeat(('', NOT_SUPPORTED), b - 1 - a)
221216

222217
result = []

arabic_reshaper/letters.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -506,22 +506,22 @@
506506
ZWJ: (ZWJ, ZWJ, ZWJ, ZWJ),
507507
}
508508

509-
def connects_with_letter_before(letter,LETTERS):
510-
if letter not in LETTERS:
509+
def connects_with_letter_before(letter, letters):
510+
if letter not in letters:
511511
return False
512-
forms = LETTERS[letter]
512+
forms = letters[letter]
513513
return forms[FINAL] or forms[MEDIAL]
514514

515515

516-
def connects_with_letter_after(letter,LETTERS):
517-
if letter not in LETTERS:
516+
def connects_with_letter_after(letter, letters):
517+
if letter not in letters:
518518
return False
519-
forms = LETTERS[letter]
519+
forms = letters[letter]
520520
return forms[INITIAL] or forms[MEDIAL]
521521

522522

523-
def connects_with_letters_before_and_after(letter,LETTERS):
524-
if letter not in LETTERS:
523+
def connects_with_letters_before_and_after(letter, letters):
524+
if letter not in letters:
525525
return False
526-
forms = LETTERS[letter]
526+
forms = letters[letter]
527527
return forms[MEDIAL]

arabic_reshaper/ligatures.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
'\u0648\u0633\u0644\u0645', ('\uFDF8', '', '', ''),
7272
)),
7373
('RIAL SIGN', (
74+
# Regex (not a literal string): matches both Farsi YEH (U+06CC) and Arabic YEH (U+064A).
7475
'\u0631[\u06CC\u064A]\u0627\u0644', ('\uFDFC', '', '', ''),
7576
)),
7677
)

arabic_reshaper/reshaper_config.py

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
except ImportError:
2323
with_font_config = False
2424

25+
class ArabicReshaperConfigurationError(ValueError):
26+
"""Raised when the reshaper configuration is invalid or missing."""
27+
28+
2529
ENABLE_NO_LIGATURES = 0b000
2630
ENABLE_SENTENCES_LIGATURES = 0b001
2731
ENABLE_WORDS_LIGATURES = 0b010
@@ -378,14 +382,12 @@ def auto_config(configuration=None, configuration_file=None):
378382

379383
if configuration_file:
380384
if not os.path.exists(configuration_file):
381-
raise Exception(
382-
'Configuration file {} not found{}.'.format(
383-
configuration_file,
384-
loaded_from_envvar and (
385-
' it is set in your environment variable ' +
386-
'PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE'
387-
) or ''
388-
)
385+
env_note = (
386+
' (set via PYTHON_ARABIC_RESHAPER_CONFIGURATION_FILE)'
387+
if loaded_from_envvar else ''
388+
)
389+
raise ArabicReshaperConfigurationError(
390+
f'Configuration file not found: {configuration_file}{env_note}'
389391
)
390392
configuration_parser.read((configuration_file,))
391393

@@ -395,22 +397,40 @@ def auto_config(configuration=None, configuration_file=None):
395397
})
396398

397399
if 'ArabicReshaper' not in configuration_parser:
398-
raise ValueError(
400+
raise ArabicReshaperConfigurationError(
399401
'Invalid configuration: '
400402
'A section with the name ArabicReshaper was not found'
401403
)
402404

403405
return configuration_parser['ArabicReshaper']
404406

405407

406-
def config_for_true_type_font(font_file_path,
407-
ligatures_config=ENABLE_ALL_LIGATURES):
408+
def config_for_true_type_font(font_file_path, ligatures_config=ENABLE_ALL_LIGATURES):
409+
"""Return a reshaper configuration dict tuned to the capabilities of a TrueType font.
410+
411+
Inspects the font's cmap table to determine which positional Arabic letter
412+
forms are present, and checks each ligature glyph so that only ligatures
413+
the font actually supports are enabled.
414+
415+
Args:
416+
font_file_path: Path to the .ttf/.otf font file.
417+
ligatures_config: Bitmask of ENABLE_*_LIGATURES flags controlling which
418+
ligature categories to probe. Defaults to ENABLE_ALL_LIGATURES.
419+
420+
Returns:
421+
A configuration dict suitable for passing to ArabicReshaper().
422+
423+
Raises:
424+
ImportError: If fonttools is not installed.
425+
ArabicReshaperConfigurationError: If the font path is invalid.
426+
"""
408427
if not with_font_config:
409-
raise Exception('fonttools not installed, ' +
410-
'install it then rerun this.\n' +
411-
'$ pip install arabic-teshaper[with-fonttools]')
428+
raise ImportError(
429+
'fonttools is not installed. '
430+
'Install it with: pip install arabic-reshaper[with-fonttools]'
431+
)
412432
if not font_file_path or not os.path.exists(font_file_path):
413-
raise Exception('Invalid path to font file')
433+
raise ArabicReshaperConfigurationError(f'Invalid path to font file: {font_file_path}')
414434
ttfont = TTFont(font_file_path)
415435
has_isolated = True
416436
for k, v in LETTERS_ARABIC.items():

arabic_reshaper/tests/test_003_reshaping.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ def setUp(self):
2828

2929

3030
)
31-
print(self.cases[0][0])
32-
3331
def test_reshaping(self):
3432
_reshaping_test(self)
3533

pyproject.toml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
[build-system]
2+
requires = ["hatchling"]
3+
build-backend = "hatchling.build"
4+
5+
[project]
6+
name = "arabic-reshaper"
7+
version = "3.0.0"
8+
description = "Reconstruct Arabic sentences to be used in applications that do not support Arabic"
9+
readme = "README.md"
10+
license = { text = "MIT" }
11+
authors = [
12+
{ name = "Abdullah Diab", email = "mpcabd@gmail.com" },
13+
]
14+
maintainers = [
15+
{ name = "Abdullah Diab", email = "mpcabd@gmail.com" },
16+
]
17+
keywords = ["arabic", "shaping", "reshaping", "reshaper"]
18+
classifiers = [
19+
"Natural Language :: Arabic",
20+
"Operating System :: OS Independent",
21+
"Programming Language :: Python :: 3",
22+
"Programming Language :: Python :: 3.10",
23+
"Programming Language :: Python :: 3.11",
24+
"Programming Language :: Python :: 3.12",
25+
"Programming Language :: Python :: 3.13",
26+
"Topic :: Software Development :: Libraries :: Python Modules",
27+
]
28+
requires-python = ">=3.10"
29+
30+
[project.optional-dependencies]
31+
with-fonttools = ["fonttools>=4.0"]
32+
33+
[project.urls]
34+
Homepage = "https://github.com/mpcabd/python-arabic-reshaper/"
35+
Repository = "https://github.com/mpcabd/python-arabic-reshaper/"
36+
37+
[dependency-groups]
38+
dev = ["pytest>=8", "pytest-cov"]
39+
40+
[tool.hatch.build]
41+
exclude = ["arabic_reshaper/tests"]
42+
43+
[tool.pytest.ini_options]
44+
testpaths = ["arabic_reshaper/tests"]
45+
46+
[tool.uv]
47+
package = true

setup.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

tox.ini

Lines changed: 0 additions & 7 deletions
This file was deleted.

0 commit comments

Comments
 (0)