refactor: initial commit

flavioschneider · flavioschneider · commit b0241ce354dc · 2022-11-09T16:34:10.000+01:00
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -0,0 +1,39 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__
+.mypy_cache
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,41 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.3.0
+    hooks:
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+
+# Formats code correctly
+-   repo: https://github.com/psf/black
+    rev: 22.3.0
+    hooks:
+    -   id: black
+        args: [
+            '--experimental-string-processing'
+        ]
+
+# Sorts imports
+-   repo: https://github.com/pycqa/isort
+    rev: 5.10.1
+    hooks:
+    -   id: isort
+        name: isort (python)
+        args: ["--profile", "black"]
+
+# Checks unused imports, like lengths, etc
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 4.0.0
+    hooks:
+    -   id: flake8
+        args: [
+            '--per-file-ignores=__init__.py:F401',
+            '--max-line-length=88',
+            '--ignore=E1,W1,E2,W2,E4,W4,E5,W5' # Handled by black
+        ]
+
+# Checks types
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: 'v0.971'
+    hooks:
+    -   id: mypy
+        additional_dependencies: [data-science-types>=0.2, torch>=1.6]
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 archinet.ai
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,75 @@
+
+# Bitcodes - PyTorch
+
+A new vector quantization method with binary codes, in PyTorch.
+
+```bash
+pip install bitcodes-pytorch
+```
+[![PyPI - Python Version](https://img.shields.io/pypi/v/bitcodes-pytorch?style=flat&colorA=black&colorB=black)](https://pypi.org/project/bitcodes-pytorch/)
+
+
+## Usage
+
+### Quantize
+```python
+from bitcodes_pytorch import Bitcodes
+
+bitcodes = Bitcodes(
+    features=8,
+    num_bits=4,
+    temperature=10,
+)
+
+# Set to eval during inference to make deterministic
+bitcodes.eval()
+
+x = torch.randn(1, 6, 8)
+# Computes y, the quantzed version of x, and the bitcodes
+y, bits = bitcodes(x)
+
+"""
+y.shape = torch.Size([1, 6, 8])
+
+bits = tensor([[
+  [0, 0, 0, 0],
+  [1, 0, 1, 1],
+  [1, 0, 0, 1],
+  [1, 0, 0, 0],
+  [0, 1, 1, 1],
+  [0, 0, 1, 0]
+]])
+"""
+```
+
+### Recover Output from Bits
+```python
+y_decoded = bitcodes.from_bits(bits)
+
+assert torch.allclose(y, y_decoded) # Assert passes in eval mode!
+```
+
+### Decimal-Binary Conversion
+```python
+from bitcodes_pytorch import to_decimal, to_binary
+
+indices = to_decimal(bits)
+# tensor([[ 0, 11,  9,  8,  7,  2]])
+
+bits = to_binary(indices, num_bits=4)
+
+"""
+bits = tensor([[
+  [0, 0, 0, 0],
+  [1, 0, 1, 1],
+  [1, 0, 0, 1],
+  [1, 0, 0, 0],
+  [0, 1, 1, 1],
+  [0, 0, 1, 0]
+]])
+"""
+```
+
+## Explaination
+
+TODO
diff --git a/bitcodes_pytorch/__init__.py b/bitcodes_pytorch/__init__.py
@@ -0,0 +1 @@
+from .bitcodes import Bitcodes, to_bits, to_decimal
diff --git a/bitcodes_pytorch/bitcodes.py b/bitcodes_pytorch/bitcodes.py
@@ -0,0 +1,49 @@
+from typing import Tuple
+
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from torch import Tensor, einsum, nn
+
+""" Utils """
+
+
+def to_bits(indices: Tensor, num_bits: int) -> Tensor:
+    bitmask = 2 ** torch.arange(num_bits - 1, -1, -1)
+    return indices.unsqueeze(-1).bitwise_and(bitmask).ne(0).long()
+
+
+def to_decimal(bits: Tensor) -> Tensor:
+    num_bits = bits.shape[-1]
+    bitmask = 2 ** torch.arange(num_bits - 1, -1, -1)
+    return torch.sum(bitmask * bits, dim=-1)
+
+
+""" Bincodes """
+
+
+class Bitcodes(nn.Module):
+    def __init__(self, features: int, num_bits: int, temperature: int):
+        super().__init__()
+        self.temperature = temperature
+        self.codebook = nn.Parameter(torch.randn(2 * num_bits, features))
+
+    def from_bits(self, bits: Tensor) -> Tensor:
+        attn = F.one_hot(bits.long(), num_classes=2).float()
+        attn = rearrange(attn, "b m p q -> b m (p q)")
+        out = einsum("b m n, n d -> b m d", attn, self.codebook)
+        return out
+
+    def forward(self, x: Tensor) -> Tuple[Tensor, Tensor]:
+        sim = einsum("b m d, n d -> b m n", x, self.codebook)
+        pairs = rearrange(sim, "b m (p q) -> b m p q", q=2)
+
+        if self.training:
+            attn = F.gumbel_softmax(pairs, tau=self.temperature, dim=-1, hard=True)
+        else:
+            attn = F.one_hot(pairs.argmax(dim=-1), num_classes=2).float()
+
+        attn = rearrange(attn, "b m p q -> b m (p q)")
+        out = einsum("b m n, n d -> b m d", attn, self.codebook)
+        bits = pairs.argmax(dim=-1)
+        return out, bits
diff --git a/setup.py b/setup.py
@@ -0,0 +1,26 @@
+from setuptools import find_packages, setup
+
+setup(
+    name="bitcodes-pytorch",
+    packages=find_packages(exclude=[]),
+    version="0.0.1",
+    license="MIT",
+    description="Bitcodes - Pytorch",
+    long_description_content_type="text/markdown",
+    author="Flavio Schneider",
+    author_email="archinetai@protonmail.com",
+    url="https://github.com/archinetai/bitcodes-pytorch",
+    keywords=["artificial intelligence", "deep learning"],
+    install_requires=[
+        "torch>=1.6",
+        "data-science-types>=0.2",
+        "einops>=0.4",
+    ],
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Developers",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3.6",
+    ],
+)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .bitcodes import Bitcodes, to_bits, to_decimal`