Skip to content

Commit 3348757

Browse files
author
jayeshmepani
committed
Initial commit: 1:1 C-FFI wrapper for libpostal with cross-platform build system
1 parent 98a244a commit 3348757

17 files changed

Lines changed: 1172 additions & 0 deletions

File tree

.github/workflows/ci.yml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [ "main" ]
6+
pull_request:
7+
branches: [ "main" ]
8+
9+
jobs:
10+
test:
11+
runs-on: ${{ matrix.os }}
12+
strategy:
13+
matrix:
14+
os: [ubuntu-latest, macos-latest, windows-latest]
15+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
16+
17+
steps:
18+
- uses: actions/checkout@v4
19+
20+
- name: Set up Python ${{ matrix.python-version }}
21+
uses: actions/setup-python@v5
22+
with:
23+
python-version: ${{ matrix.python-version }}
24+
25+
- name: Install dependencies
26+
run: |
27+
python -m pip install --upgrade pip
28+
pip install -e ".[dev]"
29+
30+
- name: Test with pytest
31+
run: |
32+
pytest tests/
33+
34+
build-wheels:
35+
runs-on: ubuntu-latest
36+
steps:
37+
- uses: actions/checkout@v4
38+
- name: Set up Python
39+
uses: actions/setup-python@v5
40+
with:
41+
python-version: "3.10"
42+
- name: Install build tools
43+
run: pip install build
44+
- name: Build wheel and sdist
45+
run: python -m build
46+
- name: Upload artifacts
47+
uses: actions/upload-artifact@v4
48+
with:
49+
name: python-packages
50+
path: dist/*
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
name: Release Prebuilt Libraries
2+
3+
on:
4+
release:
5+
types:
6+
- published
7+
workflow_dispatch:
8+
inputs:
9+
tag:
10+
description: Tag name to upload assets to, for example v1.0.0
11+
required: true
12+
type: string
13+
14+
permissions:
15+
contents: write
16+
17+
jobs:
18+
build-linux-x64:
19+
name: Build Linux x64
20+
runs-on: ubuntu-24.04
21+
steps:
22+
- uses: actions/checkout@v4
23+
24+
- name: Install build tools
25+
run: sudo apt-get update && sudo apt-get install -y curl autoconf automake libtool pkg-config git
26+
27+
- name: Compile shared library
28+
run: bash build/compile-linux.sh
29+
30+
- name: Create release archive
31+
run: tar -czf build/libpostal-linux-x64.tar.gz -C postalkit/libs/linux-x64 libpostal.so
32+
33+
- name: Upload workflow artifact
34+
uses: actions/upload-artifact@v4
35+
with:
36+
name: libpostal-linux-x64
37+
path: build/libpostal-linux-x64.tar.gz
38+
if-no-files-found: error
39+
40+
build-linux-arm64:
41+
name: Build Linux arm64
42+
runs-on: ubuntu-24.04-arm
43+
steps:
44+
- uses: actions/checkout@v4
45+
46+
- name: Install build tools
47+
run: sudo apt-get update && sudo apt-get install -y curl autoconf automake libtool pkg-config git
48+
49+
- name: Compile shared library
50+
run: bash build/compile-linux.sh
51+
52+
- name: Create release archive
53+
run: tar -czf build/libpostal-linux-arm64.tar.gz -C postalkit/libs/linux-arm64 libpostal.so
54+
55+
- name: Upload workflow artifact
56+
uses: actions/upload-artifact@v4
57+
with:
58+
name: libpostal-linux-arm64
59+
path: build/libpostal-linux-arm64.tar.gz
60+
if-no-files-found: error
61+
62+
build-macos-x64:
63+
name: Build macOS x64
64+
runs-on: macos-13
65+
steps:
66+
- uses: actions/checkout@v4
67+
68+
- name: Install build tools
69+
run: brew install autoconf automake libtool pkg-config
70+
71+
- name: Compile shared library
72+
run: bash build/compile-macos.sh
73+
74+
- name: Create release archive
75+
run: tar -czf build/libpostal-macos-x64.tar.gz -C postalkit/libs/macos-x64 libpostal.dylib
76+
77+
- name: Upload workflow artifact
78+
uses: actions/upload-artifact@v4
79+
with:
80+
name: libpostal-macos-x64
81+
path: build/libpostal-macos-x64.tar.gz
82+
if-no-files-found: error
83+
84+
build-macos-arm64:
85+
name: Build macOS arm64
86+
runs-on: macos-14
87+
steps:
88+
- uses: actions/checkout@v4
89+
90+
- name: Install build tools
91+
run: brew install autoconf automake libtool pkg-config
92+
93+
- name: Compile shared library
94+
run: bash build/compile-macos.sh
95+
96+
- name: Create release archive
97+
run: tar -czf build/libpostal-macos-arm64.tar.gz -C postalkit/libs/macos-arm64 libpostal.dylib
98+
99+
- name: Upload workflow artifact
100+
uses: actions/upload-artifact@v4
101+
with:
102+
name: libpostal-macos-arm64
103+
path: build/libpostal-macos-arm64.tar.gz
104+
if-no-files-found: error
105+
106+
build-windows-x64:
107+
name: Build Windows x64
108+
runs-on: windows-2022
109+
steps:
110+
- uses: actions/checkout@v4
111+
112+
- name: Set up MSYS2 with MinGW
113+
uses: msys2/setup-msys2@v2
114+
with:
115+
msystem: MINGW64
116+
update: true
117+
install: >-
118+
base-devel
119+
mingw-w64-x86_64-toolchain
120+
mingw-w64-x86_64-autotools
121+
git
122+
123+
- name: Compile shared library
124+
shell: pwsh
125+
run: .\build\compile-windows.ps1
126+
127+
- name: Create release archive
128+
shell: pwsh
129+
run: Compress-Archive -Path postalkit\libs\windows-x64\postal.dll -DestinationPath build\libpostal-windows-x64.zip -Force
130+
131+
- name: Upload workflow artifact
132+
uses: actions/upload-artifact@v4
133+
with:
134+
name: libpostal-windows-x64
135+
path: build/libpostal-windows-x64.zip
136+
if-no-files-found: error
137+
138+
upload-release-assets:
139+
name: Upload Release Assets
140+
if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
141+
needs:
142+
- build-linux-x64
143+
- build-linux-arm64
144+
- build-macos-x64
145+
- build-macos-arm64
146+
- build-windows-x64
147+
runs-on: ubuntu-latest
148+
steps:
149+
- name: Download workflow artifacts
150+
uses: actions/download-artifact@v4
151+
with:
152+
path: dist
153+
merge-multiple: true
154+
155+
- name: Generate SHA256 Checksums
156+
run: |
157+
cd dist
158+
for file in *; do
159+
sha256sum "$file" > "${file}.sha256"
160+
done
161+
162+
- name: Resolve target tag
163+
id: target
164+
shell: bash
165+
run: |
166+
if [ "${{ github.event_name }}" = "release" ]; then
167+
echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
168+
else
169+
echo "tag=${{ inputs.tag }}" >> "$GITHUB_OUTPUT"
170+
fi
171+
172+
- name: Attach assets to GitHub Release
173+
uses: softprops/action-gh-release@v2
174+
with:
175+
tag_name: ${{ steps.target.outputs.tag }}
176+
files: |
177+
dist/libpostal-linux-x64.tar.gz
178+
dist/libpostal-linux-x64.tar.gz.sha256
179+
dist/libpostal-linux-arm64.tar.gz
180+
dist/libpostal-linux-arm64.tar.gz.sha256
181+
dist/libpostal-macos-x64.tar.gz
182+
dist/libpostal-macos-x64.tar.gz.sha256
183+
dist/libpostal-macos-arm64.tar.gz
184+
dist/libpostal-macos-arm64.tar.gz.sha256
185+
dist/libpostal-windows-x64.zip
186+
dist/libpostal-windows-x64.zip.sha256
187+
fail_on_unmatched_files: true

.gitignore

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# Environment
30+
.env
31+
.venv
32+
env/
33+
venv/
34+
ENV/
35+
env.bak/
36+
venv.bak/
37+
38+
# IDEs
39+
.idea/
40+
.vscode/
41+
*.swp
42+
*.swo
43+
44+
# Project specific ignores
45+
postalkit/libs/*/*
46+
!postalkit/libs/README.md
47+
48+
# Build artifacts
49+
libpostal/
50+
build/*.tar.gz
51+
build/*.zip

README.md

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# PostalKit
2+
3+
**Zero-setup, one-command install Python package for libpostal. Designed as a strict 1:1 C-FFI wrapper.**
4+
5+
Parsing international street addresses shouldn't require a Ph.D. in C compilation. `postalkit` provides the ultimate zero-friction environment to run the amazing [libpostal](https://github.com/openvenues/libpostal) C library natively in Python, without abstracting away its raw power.
6+
7+
Like `FFI` implementations in PHP, this exposes the exact C structs, constants, and functions so that you can port C logic directly to Python.
8+
9+
## ✨ Why PostalKit?
10+
11+
The standard `postal` package requires you to manually compile C code, install `autoconf`, `make`, `pkg-config`, and manually download a ~2GB machine learning model.
12+
13+
**PostalKit handles everything automatically:**
14+
-**Zero C compilation:** Downloads pre-compiled `libpostal` shared binaries for your OS and architecture.
15+
-**Auto-downloads models:** Fetches the required libpostal ML models transparently on first use.
16+
-**Strict 1:1 C Mapping:** Exposes `libpostal_parse_address`, `libpostal_expand_address`, and all `ctypes` structs exactly as defined in `libpostal.h`.
17+
-**Cross-platform:** Works on Linux (x86_64, arm64), macOS (Intel, Apple Silicon), and Windows.
18+
19+
## 📦 Installation
20+
21+
```bash
22+
pip install postalkit
23+
```
24+
25+
## 🚀 Quickstart
26+
27+
Because this is a **true 1:1 FFI wrapper**, you use the exact function names and C-structs defined in the upstream libpostal C headers. Memory is managed precisely as it is in C.
28+
29+
```python
30+
import ctypes
31+
import postalkit
32+
33+
# 1. Get the C-struct for parser options
34+
options = postalkit.libpostal_get_address_parser_default_options()
35+
36+
# 2. Call the C-function directly (strings must be passed as bytes)
37+
address = b"221B Baker St London"
38+
response_ptr = postalkit.libpostal_parse_address(address, options)
39+
40+
# 3. Access the raw C-arrays
41+
response = response_ptr.contents
42+
for i in range(response.num_components):
43+
component = response.components[i].decode('utf-8')
44+
label = response.labels[i].decode('utf-8')
45+
print(f"{label}: {component}")
46+
47+
# 4. Manually destroy the C pointer to free memory, exactly as in C!
48+
postalkit.libpostal_address_parser_response_destroy(response_ptr)
49+
```
50+
51+
## 🧠 True 1:1 FFI Coverage
52+
53+
This package leaves absolutely nothing behind. It natively exposes:
54+
- **All 46 C functions** (`libpostal_tokenize`, `libpostal_classify_language`, `libpostal_is_name_duplicate_fuzzy`, etc.)
55+
- **All 10 C Structs** (`libpostal_normalize_options_t`, `libpostal_duplicate_options_t`, etc.)
56+
- **All 42 C Constants & Bitwise Flags** (`LIBPOSTAL_ADDRESS_HOUSE_NUMBER`, `LIBPOSTAL_NORMALIZE_TOKEN_DELETE_HYPHENS`, etc.)
57+
58+
You can directly port any libpostal C/C++ tutorial code into Python line-by-line.
59+
60+
## 🛠️ Advanced Usage
61+
62+
**Pre-downloading assets (e.g., for Docker images or CI):**
63+
```python
64+
from postalkit.data.manager import ensure_all_assets
65+
ensure_all_assets()
66+
```
67+
68+
## 📄 License
69+
70+
MIT License.

postalkit/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""
2+
PostalKit: Zero-setup, one-command install Python package for libpostal.
3+
Provides a 1:1 FFI mapping to the underlying libpostal C library.
4+
"""
5+
6+
from .core.ffi import *
7+
8+
__version__ = "0.1.0"

postalkit/core/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .ffi import initialize
2+
from ..exceptions import PostalKitError, InitializationError, DependencyMissingError
3+
4+
__all__ = ["initialize", "PostalKitError", "InitializationError", "DependencyMissingError"]

0 commit comments

Comments
 (0)