diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 18c7e488..6e9bb403 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,10 +16,10 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: 3.9 @@ -57,29 +57,13 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [macos-latest, ubuntu-latest, windows-latest] - python-version: [36, 37, 38, 39, 310, 311, 312] - cibw-arch: [auto, aarch64] - exclude: - - os: macos-latest - cibw-arch: aarch64 - - os: windows-latest - cibw-arch: aarch64 + os: [macos-latest, ubuntu-latest, windows-latest, ubuntu-24.04-arm] steps: - - uses: actions/checkout@v3 - - - name: Set up QEMU - if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v3 - with: - platforms: all + - uses: actions/checkout@v6 - name: Build wheels - uses: pypa/cibuildwheel@v2.16.2 - env: - CIBW_BUILD: cp${{matrix.python-version}}-* - CIBW_ARCHS_LINUX: ${{ matrix.cibw-arch }} + uses: pypa/cibuildwheel@v3.4.1 - name: Build sdist run: | @@ -93,28 +77,28 @@ jobs: if: runner.os == 'Linux' - name: Upload Binaries - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v7 with: - name: wheels - path: wheelhouse + name: wheels-${{ matrix.os }} + path: ./wheelhouse/*.whl test-wheels: needs: [build-wheels] runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.7, 3.8, 3.9, '3.10', '3.11', '3.12'] - os: [macos-latest, ubuntu-latest, windows-latest] + python-version: [3.8, 3.9, '3.10', '3.11', '3.12', '3.13', '3.14'] + os: [macos-latest, ubuntu-latest, windows-latest, ubuntu-24.04-arm] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v6 with: path: implicit_source - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v8 with: - name: wheels + name: wheels-${{ matrix.os }} - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -122,13 +106,9 @@ jobs: python -m pip install --upgrade pip pip install pytest pip install -r implicit_source/requirements.txt - - name: Install h5py - run: pip install h5py - - name: Install ANN Libraries run: pip install annoy nmslib - if: ${{ matrix.python-version != '3.12' && matrix.python-version != '3.11' && runner.os == 'Linux' }} - + if: ${{ matrix.python-version != '3.14' && runner.os == 'Linux' }} - name: Install wheel run: | pip install --force-reinstall --no-deps --no-index --find-links . implicit @@ -142,15 +122,16 @@ jobs: if: "startsWith(github.ref, 'refs/tags/')" needs: [test-wheels] steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v8 with: - name: wheels + pattern: wheels-* + merge-multiple: true - name: Create GitHub Release uses: fnkr/github-action-ghr@v1.3 env: GHR_PATH: . GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v6 with: python-version: 3.9 - name: Push to PyPi diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 05a10a43..97af9980 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,19 +1,19 @@ repos: - repo: https://github.com/timothycrosley/isort - rev: 5.12.0 + rev: 8.0.1 hooks: - id: isort additional_dependencies: [toml] - repo: https://github.com/python/black - rev: 23.3.0 + rev: 26.1.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 7.3.0 hooks: - id: flake8 - repo: https://github.com/pycqa/pylint - rev: v2.17.4 + rev: v4.0.5 hooks: - id: pylint - repo: https://github.com/codespell-project/codespell diff --git a/.pylintrc b/.pylintrc index ead351a9..515aeaef 100644 --- a/.pylintrc +++ b/.pylintrc @@ -45,3 +45,6 @@ disable=fixme, min-similarity-lines=64 ignore-docstrings=yes ignore-imports=yes + +[DESIGN] +max-positional-arguments=12 diff --git a/benchmarks/benchmark_als.py b/benchmarks/benchmark_als.py index 5803d5c1..35122992 100644 --- a/benchmarks/benchmark_als.py +++ b/benchmarks/benchmark_als.py @@ -1,4 +1,5 @@ -""" test script to verify the CG method works, and time it versus cholesky """ +"""test script to verify the CG method works, and time it versus cholesky""" + import argparse import json import logging diff --git a/examples/lastfm.py b/examples/lastfm.py index 6de0b8a4..8dc1fd56 100644 --- a/examples/lastfm.py +++ b/examples/lastfm.py @@ -1,4 +1,4 @@ -""" An example of using this library to calculate related artists +"""An example of using this library to calculate related artists from the last.fm dataset. More details can be found at http://www.benfrederickson.com/matrix-factorization/ @@ -6,8 +6,8 @@ GitHub when it is first run. The original dataset can also be found at http://ocelma.net/MusicRecommendationDataset/lastfm-360K.html """ + import argparse -import codecs import logging import time @@ -102,7 +102,7 @@ def calculate_similar_artists(output_filename, model_name="als"): # write out as a TSV of artistid, otherartistid, score logging.debug("writing similar items") with tqdm.tqdm(total=len(to_generate)) as progress: - with codecs.open(output_filename, "w", "utf8") as o: + with open(output_filename, "w", encoding="utf8") as o: batch_size = 1000 for startidx in range(0, len(to_generate), batch_size): batch = to_generate[startidx : startidx + batch_size] @@ -146,7 +146,7 @@ def calculate_recommendations(output_filename, model_name="als"): # generate recommendations for each user and write out to a file start = time.time() with tqdm.tqdm(total=len(users)) as progress: - with codecs.open(output_filename, "w", "utf8") as o: + with open(output_filename, "w", encoding="utf8") as o: batch_size = 1000 to_generate = np.arange(len(users)) for startidx in range(0, len(to_generate), batch_size): diff --git a/examples/movielens.py b/examples/movielens.py index a9c144ed..8df0b7ec 100644 --- a/examples/movielens.py +++ b/examples/movielens.py @@ -13,7 +13,6 @@ from __future__ import print_function import argparse -import codecs import logging import time @@ -87,7 +86,7 @@ def calculate_similar_movies(output_filename, model_name="als", min_rating=4.0, log.debug("calculating similar movies") with tqdm.tqdm(total=len(to_generate)) as progress: - with codecs.open(output_filename, "w", "utf8") as o: + with open(output_filename, "w", encoding="utf8") as o: batch_size = 1000 for startidx in range(0, len(to_generate), batch_size): batch = to_generate[startidx : startidx + batch_size] diff --git a/examples/tutorial_lastfm.ipynb b/examples/tutorial_lastfm.ipynb index cd39397f..7fbf7b39 100644 --- a/examples/tutorial_lastfm.ipynb +++ b/examples/tutorial_lastfm.ipynb @@ -293,7 +293,7 @@ "# Use pandas to display the output in a table, pandas isn't a dependency of implicit otherwise\n", "import numpy as np\n", "import pandas as pd\n", - "pd.DataFrame({\"artist\": artists[ids], \"score\": scores, \"already_liked\": np.in1d(ids, user_plays[userid].indices)})" + "pd.DataFrame({\"artist\": artists[ids], \"score\": scores, \"already_liked\": np.isin(ids, user_plays[userid].indices)})" ] }, { diff --git a/implicit/_nearest_neighbours.pyx b/implicit/_nearest_neighbours.pyx index 84fa810f..85f32f09 100644 --- a/implicit/_nearest_neighbours.pyx +++ b/implicit/_nearest_neighbours.pyx @@ -11,6 +11,7 @@ from cython cimport floating, integral from cython.operator import dereference from cython.parallel import parallel, prange +from libc.stdint cimport int32_t from libcpp cimport bool from libcpp.algorithm cimport sort_heap from libcpp.utility cimport pair @@ -131,8 +132,8 @@ def all_pairs_knn(users, unsigned int K=100, int num_threads=0, show_progress=Tr # holds triples of output cdef double[:] values = np.zeros(item_count * K) - cdef long[:] rows = np.zeros(item_count * K, dtype=int) - cdef long[:] cols = np.zeros(item_count * K, dtype=int) + cdef int32_t[:] rows = np.zeros(item_count * K, dtype="int32") + cdef int32_t[:] cols = np.zeros(item_count * K, dtype="int32") progress = tqdm(total=item_count, disable=not show_progress) with nogil, parallel(num_threads=num_threads): diff --git a/implicit/ann/annoy.py b/implicit/ann/annoy.py index 87d1c44a..29231407 100644 --- a/implicit/ann/annoy.py +++ b/implicit/ann/annoy.py @@ -12,7 +12,6 @@ class AnnoyModel(RecommenderBase): - """Speeds up inference calls to MatrixFactorization models by using an `Annoy `_ index to calculate similar items and recommend items. diff --git a/implicit/ann/nmslib.py b/implicit/ann/nmslib.py index 320e9fdf..18f49ed9 100644 --- a/implicit/ann/nmslib.py +++ b/implicit/ann/nmslib.py @@ -12,7 +12,6 @@ class NMSLibModel(RecommenderBase): - """Speeds up inference calls to MatrixFactorization models by using `NMSLib `_ to create approximate nearest neighbours indices of the latent factors. diff --git a/implicit/approximate_als.py b/implicit/approximate_als.py index f3e3a04f..daa7f7d6 100644 --- a/implicit/approximate_als.py +++ b/implicit/approximate_als.py @@ -1,8 +1,9 @@ -""" Models that use various Approximate Nearest Neighbours libraries in order to quickly +"""Models that use various Approximate Nearest Neighbours libraries in order to quickly generate recommendations and lists of similar items. See http://www.benfrederickson.com/approximate-nearest-neighbours-for-recommender-systems/ """ + import implicit.gpu diff --git a/implicit/cpu/als.py b/implicit/cpu/als.py index 1ec7a919..9032a1a5 100644 --- a/implicit/cpu/als.py +++ b/implicit/cpu/als.py @@ -1,4 +1,5 @@ -""" Implicit Alternating Least Squares """ +"""Implicit Alternating Least Squares""" + import functools import heapq import logging diff --git a/implicit/cpu/matrix_factorization_base.py b/implicit/cpu/matrix_factorization_base.py index dcd37fa8..a448e839 100644 --- a/implicit/cpu/matrix_factorization_base.py +++ b/implicit/cpu/matrix_factorization_base.py @@ -1,4 +1,5 @@ -""" Base class for recommendation algorithms in this package """ +"""Base class for recommendation algorithms in this package""" + import warnings import numpy as np diff --git a/implicit/evaluation.pyx b/implicit/evaluation.pyx index bd590401..403176fb 100644 --- a/implicit/evaluation.pyx +++ b/implicit/evaluation.pyx @@ -431,7 +431,7 @@ def ranking_metrics_at_k(model, train_user_items, test_user_items, int K=10, while start_idx < len(to_generate): batch = to_generate[start_idx: start_idx + batch_size] - ids, _ = model.recommend(batch, train_user_items[batch], N=K) + ids, _ = model.recommend(batch, train_user_items[np.asarray(batch)], N=K) start_idx += batch_size with nogil: diff --git a/implicit/nearest_neighbours.py b/implicit/nearest_neighbours.py index d33717dd..a1f36c1e 100644 --- a/implicit/nearest_neighbours.py +++ b/implicit/nearest_neighbours.py @@ -89,15 +89,15 @@ def recommend( ) if filter_items is not None: - mask = np.in1d(ids, filter_items, invert=True) + mask = np.isin(ids, filter_items, invert=True) ids, scores = ids[mask][:N], scores[mask][:N] elif items is not None: - mask = np.in1d(ids, items) + mask = np.isin(ids, items) ids, scores = ids[mask], scores[mask] # returned items should be equal to input selected items - missing = items[np.in1d(items, ids, invert=True)] + missing = items[np.isin(items, ids, invert=True)] if missing.size: ids = np.append(ids, missing) scores = np.append(scores, np.full(missing.size, -np.finfo(scores.dtype).max)) @@ -128,15 +128,15 @@ def similar_items( scores = self.similarity[itemid].data if filter_items is not None: - mask = np.in1d(ids, filter_items, invert=True) + mask = np.isin(ids, filter_items, invert=True) ids, scores = ids[mask], scores[mask] elif items is not None: - mask = np.in1d(ids, items) + mask = np.isin(ids, items) ids, scores = ids[mask], scores[mask] # returned items should be equal to input selected items - missing = items[np.in1d(items, ids, invert=True)] + missing = items[np.isin(items, ids, invert=True)] if missing.size: ids = np.append(ids, missing) scores = np.append(scores, np.full(missing.size, -np.finfo(scores.dtype).max)) diff --git a/implicit/recommender_base.py b/implicit/recommender_base.py index 25c7c0a1..acb3cb37 100644 --- a/implicit/recommender_base.py +++ b/implicit/recommender_base.py @@ -1,4 +1,5 @@ -""" Base class for recommendation algorithms in this package """ +"""Base class for recommendation algorithms in this package""" + import warnings from abc import ABCMeta, abstractmethod diff --git a/implicit/utils.py b/implicit/utils.py index 4e86ee71..f61da18d 100644 --- a/implicit/utils.py +++ b/implicit/utils.py @@ -138,14 +138,14 @@ def _batch_call(func, ids, *args, N=10, **kwargs): def _filter_items_from_results(queryid, ids, scores, filter_items, N): if np.isscalar(queryid): - mask = np.in1d(ids, filter_items, invert=True) + mask = np.isin(ids, filter_items, invert=True) ids, scores = ids[mask][:N], scores[mask][:N] else: rows = len(queryid) filtered_scores = np.zeros((rows, N), dtype=scores.dtype) filtered_ids = np.zeros((rows, N), dtype=ids.dtype) for row in range(rows): - mask = np.in1d(ids[row], filter_items, invert=True) + mask = np.isin(ids[row], filter_items, invert=True) filtered_ids[row] = ids[row][mask][:N] filtered_scores[row] = scores[row][mask][:N] ids, scores = filtered_ids, filtered_scores diff --git a/pyproject.toml b/pyproject.toml index 5e95e224..e4e7169a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ build-backend = "setuptools.build_meta" # skip testing in the cibuildwheel phase, will install the wheels later # and verify test-command = "" -skip = ["pp*", "*musl*", "*-manylinux_i686", "*win32"] +skip = ["*t-*", "*musl*", "*-manylinux_i686", "*win32"] [[tool.cibuildwheel.overrides]] select = "*-manylinux_x86_64*" diff --git a/tests/recommender_base_test.py b/tests/recommender_base_test.py index b2e2edec..48d853e5 100644 --- a/tests/recommender_base_test.py +++ b/tests/recommender_base_test.py @@ -1,4 +1,5 @@ -""" Common test functions for all recommendation models """ +"""Common test functions for all recommendation models""" + import os import pickle import random