Skip to content

Commit 268e9f6

Browse files
committed
Add native pHash deduplication acceleration
1 parent 2c34b24 commit 268e9f6

8 files changed

Lines changed: 883 additions & 11 deletions

File tree

.circleci/config.yml

Lines changed: 162 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ version: 2.1
44
orbs:
55
slack: circleci/slack@4.4.2
66
python: circleci/python@2.1.0
7+
win: circleci/windows@5.0
78

89
jobs:
910
build_test:
@@ -75,11 +76,16 @@ jobs:
7576
event: fail
7677
template: basic_fail_1
7778

78-
pypi_publish:
79+
build_sdist:
7980
docker:
8081
- image: cimg/python:3.10
8182
steps:
8283
- checkout # checkout source code to working directory
84+
- run:
85+
name: Install Build Tools
86+
command: |
87+
pip install --upgrade pip
88+
pip install poetry
8389
- run:
8490
name: Validate Tag Version # Check if the tag name matches the package version
8591
command: |
@@ -99,9 +105,121 @@ jobs:
99105
exit 1;
100106
fi
101107
- run:
102-
name: Build
108+
name: Build sdist
103109
command: | # install env dependencies
104-
poetry build
110+
rm -rf dist
111+
poetry build --format sdist
112+
- persist_to_workspace:
113+
root: .
114+
paths:
115+
- dist
116+
117+
build_linux_wheels:
118+
docker:
119+
- image: cimg/python:3.10
120+
steps:
121+
- checkout # checkout source code to working directory
122+
- setup_remote_docker
123+
- run:
124+
name: Build Linux wheels
125+
command: |
126+
pip install --upgrade pip
127+
pip install cibuildwheel
128+
129+
export CIB_ARCHS_LINUX="x86_64"
130+
export CIB_BUILD="cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp314-manylinux_x86_64"
131+
export CIB_SKIP="pp* *-musllinux_*"
132+
export CIB_TEST_COMMAND='python -c "import nucleus._native_dedup as native; assert native.deduplicate_phashes([0, (1 << 10) - 1, (1 << 11) - 1], 10) == [0, 2]"'
133+
python -m cibuildwheel --platform linux --output-dir dist
134+
135+
ls -lh dist
136+
- persist_to_workspace:
137+
root: .
138+
paths:
139+
- dist
140+
141+
build_macos_wheels:
142+
macos:
143+
xcode: 16.4.0
144+
resource_class: m4pro.medium
145+
steps:
146+
- checkout # checkout source code to working directory
147+
- run:
148+
name: Build macOS wheels
149+
command: |
150+
python3 -m pip install --upgrade pip
151+
python3 -m pip install cibuildwheel
152+
softwareupdate --install-rosetta --agree-to-license || true
153+
154+
export CIB_ARCHS_MACOS="universal2"
155+
export CIB_BUILD="cp310-macosx_* cp311-macosx_* cp312-macosx_* cp313-macosx_* cp314-macosx_*"
156+
export CIB_SKIP="pp*"
157+
export CIB_TEST_COMMAND='python -c "import nucleus._native_dedup as native; assert native.deduplicate_phashes([0, (1 << 10) - 1, (1 << 11) - 1], 10) == [0, 2]"'
158+
python3 -m cibuildwheel --platform macos --output-dir dist
159+
160+
ls -lh dist
161+
- persist_to_workspace:
162+
root: .
163+
paths:
164+
- dist
165+
166+
build_windows_wheels:
167+
executor:
168+
name: win/default
169+
size: medium
170+
steps:
171+
- checkout # checkout source code to working directory
172+
- run:
173+
name: Build Windows wheels
174+
command: |
175+
python -m pip install --upgrade pip
176+
python -m pip install cibuildwheel
177+
178+
$env:CIB_BUILD = "cp310-win_amd64 cp311-win_amd64 cp312-win_amd64 cp313-win_amd64 cp314-win_amd64"
179+
$env:CIB_SKIP = "pp*"
180+
$env:CIB_TEST_COMMAND = 'python -c "import nucleus._native_dedup as native; assert native.deduplicate_phashes([0, (1 << 10) - 1, (1 << 11) - 1], 10) == [0, 2]"'
181+
python -m cibuildwheel --platform windows --output-dir dist
182+
183+
Get-ChildItem dist
184+
- persist_to_workspace:
185+
root: .
186+
paths:
187+
- dist
188+
189+
pypi_publish:
190+
docker:
191+
- image: cimg/python:3.10
192+
steps:
193+
- checkout # checkout source code to working directory
194+
- attach_workspace:
195+
at: .
196+
- run:
197+
name: Install Publish Tools
198+
command: |
199+
pip install --upgrade pip
200+
pip install poetry
201+
- run:
202+
name: Validate Tag Version # Check if the tag name matches the package version
203+
command: |
204+
PKG_VERSION=$(sed -n 's/^version = //p' pyproject.toml | sed -e 's/^"//' -e 's/"$//')
205+
if [[ "$CIRCLE_TAG" != "v${PKG_VERSION}" ]]; then
206+
echo "ERROR: Tag name ($CIRCLE_TAG) must match package version (v${PKG_VERSION})."
207+
exit 1;
208+
fi
209+
- run:
210+
name: Validate SDK Version Increment # Check if the version is already on PyPI
211+
command: |
212+
PKG_VERSION=$(sed -n 's/^version = //p' pyproject.toml | sed -e 's/^"//' -e 's/"$//')
213+
if pip install "scale-nucleus>=${PKG_VERSION}" > /dev/null 2>&1;
214+
then
215+
echo "ERROR: You need to increment to a new version before publishing!"
216+
echo "Version (${PKG_VERSION}) already exists on PyPI."
217+
exit 1;
218+
fi
219+
- run:
220+
name: List artifacts to publish
221+
command: |
222+
ls -lh dist
105223
- run:
106224
name: Publish to PyPI
107225
command: |
@@ -165,12 +283,51 @@ workflows:
165283
filters:
166284
tags:
167285
only: /^v\d+\.\d+\.\d+$/ # Runs only for tags with the format [v1.2.3]
168-
- pypi_publish:
286+
- build_sdist:
287+
context: Nucleus
169288
requires:
170289
- build_test
171290
filters:
172291
branches:
173292
ignore: /.*/ # Runs for none of the branches
174293
tags:
175294
only: /^v\d+\.\d+\.\d+$/ # Runs only for tags with the format [v1.2.3]
176-
295+
- build_linux_wheels:
296+
context: Nucleus
297+
requires:
298+
- build_test
299+
filters:
300+
branches:
301+
ignore: /.*/ # Runs for none of the branches
302+
tags:
303+
only: /^v\d+\.\d+\.\d+$/ # Runs only for tags with the format [v1.2.3]
304+
- build_macos_wheels:
305+
context: Nucleus
306+
requires:
307+
- build_test
308+
filters:
309+
branches:
310+
ignore: /.*/ # Runs for none of the branches
311+
tags:
312+
only: /^v\d+\.\d+\.\d+$/ # Runs only for tags with the format [v1.2.3]
313+
- build_windows_wheels:
314+
context: Nucleus
315+
requires:
316+
- build_test
317+
filters:
318+
branches:
319+
ignore: /.*/ # Runs for none of the branches
320+
tags:
321+
only: /^v\d+\.\d+\.\d+$/ # Runs only for tags with the format [v1.2.3]
322+
- pypi_publish:
323+
context: Nucleus
324+
requires:
325+
- build_sdist
326+
- build_linux_wheels
327+
- build_macos_wheels
328+
- build_windows_wheels
329+
filters:
330+
branches:
331+
ignore: /.*/ # Runs for none of the branches
332+
tags:
333+
only: /^v\d+\.\d+\.\d+$/ # Runs only for tags with the format [v1.2.3]

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.18.5](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.18.5) - 2026-06-08
9+
10+
### Added
11+
- Native C acceleration for `deduplicate_by_phash`. When the compiled extension is available, all threshold values are handled in native code: thresholds `0` through `11` use the chunked Hamming index, thresholds `12` through `63` use a native linear scan, and threshold `64` uses the keep-first fast path. The public Python API is unchanged and falls back to the pure-Python implementation when the native extension is unavailable.
12+
13+
### Tooling / CI
14+
- Publish Linux `x86_64`, macOS `universal2`, and Windows `amd64` wheels for Python 3.10 through 3.14 using `cibuildwheel`, alongside the source distribution.
15+
816
## [0.18.4](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.18.4) - 2026-06-08
917

1018
### Added

build.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from __future__ import annotations
2+
3+
import sys
4+
5+
from setuptools import Extension
6+
7+
8+
def build(setup_kwargs):
9+
extra_compile_args = []
10+
if sys.platform != "win32":
11+
extra_compile_args.extend(["-std=c11", "-O3"])
12+
13+
setup_kwargs.update(
14+
{
15+
"ext_modules": [
16+
Extension(
17+
"nucleus._native_dedup",
18+
["nucleus/_native_dedup.c"],
19+
extra_compile_args=extra_compile_args,
20+
optional=True,
21+
)
22+
],
23+
}
24+
)

0 commit comments

Comments
 (0)