CLMBRs
diff --git a/‎.github/workflows/black.yml‎
Lines changed: 27 additions & 0 deletions b/‎.github/workflows/black.yml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 19 additions & 0 deletions b/‎.github/workflows/test.yml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 674 additions & 0 deletions b/‎LICENSE‎
Lines changed: 674 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 21 additions & 1 deletion b/‎README.md‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 30 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎src/eff_conv/convexity.py‎
Lines changed: 168 additions & 0 deletions b/‎src/eff_conv/convexity.py‎
Lines changed: 168 additions & 0 deletions
diff --git a/‎src/eff_conv/ib/language.py‎
Lines changed: 106 additions & 0 deletions b/‎src/eff_conv/ib/language.py‎
Lines changed: 106 additions & 0 deletions
@@ -0,0 +1,27 @@
+name: auto-format 
+
+on: pull_request
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.ref }}
+      - name: black
+        uses: psf/black@stable
+        with:
+          options: ""
+      - name: Check for modified files
+        id: git-check
+        run: echo "modified=$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi)" >> $GITHUB_OUTPUT
+      - name: Push changes
+        if: steps.git-check.outputs.modified == 'true'
+        run: |
+          git config --global user.name 'github-actions[bot]'
+          git config --global user.email 'github-actions[bot]@users.noreply.github.com'
+          git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
+          git commit -am "Automated black formatting"
+          git push
@@ -0,0 +1,19 @@
+name: test
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+      - name: Install package 
+        run: pip install -e .
+      - name: Test with pytest
+        run: |
+          pip install pytest 
+          pytest
@@ -205,3 +205,6 @@ cython_debug/
 marimo/_static/
 marimo/_lsp/
 __marimo__/
+
+# .DS_Store
+.DS_Store
@@ -1 +1,21 @@
-# efficiency-convexity
+# Efficiency & Convexity
+
+## Installing Efficiency & Convexity
+
+First, set up a virtual environment (e.g. via [miniconda](https://docs.conda.io/en/latest/miniconda.html), `conda create -n eff_conv python=3.11`, and `conda activate eff_conv`).
+
+1. Download or clone this repository and navigate to the root folder.
+
+2. Install the IB Optimizer (We recommend doing this inside a virtual environment)
+
+   `pip install -e .`
+
+## References
+
+<summary>Links:</summary>
+
+> Tishby, N., Fernando P., & Bialek, W. (2000). The information bottleneck method. The 37th annual Allerton Conference on Communication, Control, and Computing. pp. 368–377. https://doi.org/10.48550/arXiv.physics/0004057
+
+> Skinner, L. (2025). Convexity is a Fundamental Feature of Efficient Semantic Compression in Probability Spaces. https://hdl.handle.net/1773/53008
+
+</details>
@@ -0,0 +1,30 @@
+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "eff_conv"
+version = "0.1.0"
+authors = [
+  { name="Ashvin Ranjan", email="ar31@uw.edu"},
+  { name="Shane Steinert-Threlkeld", email="shanest@uw.edu" },
+]
+description = "Efficiency & Convexity is a toolkit for generating and analyzing encoders under the Information Bottleneck framework and calculating the quazi-convexity of probabilty distributions."
+readme = "README.md"
+requires-python = ">=3.7"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
+    "Operating System :: OS Independent",
+]
+license = {file = "LICENSE.txt"}
+dependencies = [
+    "scipy",
+    "numpy",
+    "pytest",
+    "black",
+]
+
+[project.urls]
+"Homepage" = "https://github.com/CLMBRs/efficiency-convexity"
+"Bug Tracker" = "https://github.com/CLMBRs/efficiency-convexity/issues"
@@ -0,0 +1,168 @@
+from eff_conv.ib.language import IBLanguage
+
+from scipy.spatial import ConvexHull
+
+import numpy as np
+
+
+class SimilaritySpace:
+    """A similarity space contains points (which should correspond in order to referents or meanings) and the priors upon those points.
+
+    Properties:
+        sim_space: A matrix which stores a list of points, each point should correspond to a referent or meaning. Dimensions are D x ||P||.
+        Where D is the dimension of the points and P is the set of points.
+
+        point_prior: Probability distribution for the points. Length must be ||P||. Cannot have any 0s in it.
+        If no value is passed in then a uniform distribution will be given.
+    """
+
+    sim_space: np.ndarray
+    point_prior: np.ndarray
+
+    def __init__(self, sim_space: np.ndarray, point_prior: np.ndarray = None):
+        if len(sim_space.shape) != 2:
+            raise ValueError("Similarity space input must be a 2d matrix")
+        self.sim_space = sim_space
+        if point_prior is not None:
+            if (
+                len(point_prior.shape) != 1
+                or point_prior.shape[0] != sim_space.shape[0]
+            ):
+                raise ValueError("Point priors not of correct size")
+            self.point_prior = point_prior
+        else:
+            self.point_prior = np.array(
+                [1.0 / sim_space.shape[0] for _ in range(sim_space.shape[0])]
+            )
+
+    def _1d_convexity_amount(self, points: np.ndarray, level: np.ndarray) -> int:
+        """Finds the number of points which are contained within a 1d range.
+
+        Args:
+            points (np.ndarray): The points to check.
+            level (np.ndarray): The points which make up the 1d range
+
+        Returns:
+            int: The number of points which are in the range spanned by levels.
+        """
+        l_flat = level.flatten()
+        p_flat = points.flatten()
+        lo, hi = min(l_flat), max(l_flat)
+        return sum((p_flat <= hi) & (p_flat >= lo)) + level.shape[0]
+
+    def quasi_convexity(self, point_dist: np.ndarray, steps: int) -> float:
+        """Finds the quasi-convexity of a probability. Algorithm from Skinner L. (2025).
+
+        Args:
+            point_dist (np.ndarray): The probability distribution to be evaluated.
+            steps (int): The number of steps to interate over the probability (higher is more accurate but slower)
+
+        Returns:
+            float: The quasi-convexity of the probabilty distribution.
+        """
+
+        if len(point_dist.shape) != 1:
+            raise ValueError("Quasi-Convexity input must be a probability distribution")
+        if np.size(point_dist) != self.sim_space.shape[0]:
+            raise ValueError("Quasi-Convexity input must map to all points")
+        if steps <= 0:
+            raise ValueError("Steps must be positive")
+
+        mesh = 1.0 / steps
+
+        qc = 0
+
+        steps = np.linspace(0, np.max(point_dist), steps)[::-1]
+
+        for i in steps:
+            level = self.sim_space[point_dist >= i]
+            out_points = self.sim_space[point_dist < i]
+            # If everything is on a line a very simple calculation can be done
+            if self.sim_space.shape[1] == 1:
+                qc += (
+                    mesh * level.shape[0] / self._1d_convexity_amount(out_points, level)
+                )
+            else:
+                # See if the points don't span the space (If so, ConvexHull will throw an error)
+                consider = out_points
+                rank = np.linalg.matrix_rank(level - level[0])
+                if rank < self.sim_space.shape[1]:
+                    consider = []
+                    for p in out_points:
+                        check = np.concatenate((level, [p]))
+                        if rank == np.linalg.matrix_rank(check - check[0]):
+                            consider.append(p)
+
+                    # Project down
+                    U, _, _ = np.linalg.svd(level.T, full_matrices=False)
+                    proj = U[:, :rank].T
+                    if len(consider) > 0:
+                        consider = (proj @ np.array(consider).T).T
+                    else:
+                        qc += mesh
+                        continue
+                    level = (proj @ level.T).T
+
+                if rank == 1:
+                    amount = self._1d_convexity_amount(consider, level)
+                else:
+                    hull = ConvexHull(level)
+                    eqs = hull.equations[:, :-1]
+                    end = hull.equations[:, -1]
+                    amount = level.shape[0] + sum(
+                        np.all(eqs @ consider.T + end[:, None] <= 1e-12, axis=0)
+                    )
+
+                qc += mesh * level.shape[0] / amount
+        return qc
+
+    def encoder_convexity(
+        self, distrubitions: np.ndarray, prior: np.ndarray, steps: int = 100
+    ) -> float:
+        """Finds the quasi-convexity of a conditional probabilty matrix, typically an IB encoder. Algorithm from Skinner L. (2025).
+
+        Args:
+            distrubitions (np.ndarray): The conditional probaility matrix to be evaluated. Shape is of ||P|| x n where n > 0.
+            Each column of the matrix should be a probability distrubtion over P.
+
+            prior (np.ndarray): The probability distribution of inputs into the encoder. Must be of size n.
+            steps (int, default: 100): The number of steps to interate over the probability (higher is more accurate but slower)
+
+        Returns:
+            float: The quasi-convexity of the matrix.
+        """
+
+        # Apply Bayes' rule
+        reconstructed = distrubitions.T * prior[:, None] / self.point_prior
+        maximums = np.max(reconstructed, axis=0)
+
+        reconstructed[~(reconstructed == maximums)] = 0
+        reconstructed[reconstructed == maximums] = 1
+
+        weighted_sum = np.sum(reconstructed.T, axis=0)
+        weighted_sum = weighted_sum / np.sum(weighted_sum)
+
+        convexities = []
+        for word in distrubitions.T:
+            convexities.append(self.quasi_convexity(word, steps))
+        return np.sum(np.array(convexities) * weighted_sum)
+
+    def language_convexity(
+        self, lang: IBLanguage, steps: int = 100, referents=False
+    ) -> float:
+        """Finds the quasi-convexity of an IBLanguage by evaluating the Q(m|w) matrix.
+
+        Args:
+            lang (IBLanguage): The language to be evaluated.
+            steps (int, default: 100): The number of steps to iterate over the probability (higher is more accurate but slower).
+            refeernts (boolean, default: False): Whether to evaluate q(u|w) or q(m|w).
+
+        Returns:
+            float: The quasi-convexity of the language.
+        """
+
+        return self.encoder_convexity(
+            lang.reconstructed_meanings if referents else lang.qmw,
+            lang.expressions_prior,
+            steps=steps,
+        )
@@ -0,0 +1,106 @@
+from functools import cached_property
+from eff_conv.ib.structure import IBStructure
+from eff_conv.ib.utils import IB_EPSILON, kl_divergence, mutual_information
+
+import numpy as np
+
+
+class IBLanguage:
+    """A language has expressions which are mapped to from meanings and which can map to expressions.
+
+    Properties:
+        structure: This is the structure in which the language exists.
+
+        qwm: This is a conditional probaiblity matrix which maps a meaning distribution to expressions. Dimensions are ||W|| x ||M||.
+        Note: The columns of the matrix are the probability distributions. This differs from other implementations.
+
+        qmw: Reconstructed conditional probability matrix which maps an expression distrubution to meanings. Created using Bayes' rule.
+        Dimensions are ||M|| x ||W||.
+
+        complexity: Mutual information between expressions and meanings. Formally I(W; M).
+
+        expressions_prior: Probability distribution for expressions. Constructed from the structure's meaning priors and qwm. Formally p(w).
+
+        reconstructed_meanings: Conditional probability matrix which maps an expression distrubition to referents. Created using qmw and structure.pum.
+        Dimensions are ||U|| x ||W||.
+
+        divergence_array: Matrix which stores the different KL Divergences between the referent probability distrubutions per meaning and per expression.
+        Dimensions are ||W|| x ||M||. (It is important to note that the KL Divergence function uses base 2 logarithms)
+
+        expected_divergence: This is the expected KL Divergence between the language's reconstructed meanings and the structure's meanings.
+        expected divergence = I(U; M) - I(W; U)
+
+        iwu: The mutual information between the expressions of a language and the referents. Also referred to as accuracy. Formally I(W; U)
+    """
+
+    structure: IBStructure
+    qwm: np.ndarray
+
+    def __init__(
+        self,
+        structure: IBStructure,
+        qwm: np.ndarray,
+    ):
+        if len(qwm.shape) != 2:
+            raise ValueError("Must be a 2d matrix")
+        if qwm.shape[1] != structure.pum.shape[1]:
+            raise ValueError(
+                f"Input matrix is for {qwm.shape[1]} meanings, not {structure.pum.shape[1]}"
+            )
+        if (np.abs(np.sum(qwm, axis=0) - 1) > IB_EPSILON).any():
+            raise ValueError(
+                "All columns of conditional probability matrix must sum to 1"
+            )
+        if (qwm < 0).any():
+            raise ValueError(
+                "No negative numbers are allowed in the probability matrix"
+            )
+        self.structure = structure
+        self.qwm = qwm
+
+    @cached_property
+    def qmw(self) -> np.ndarray:
+        # Apply Bayes' rule
+        return (
+            self.qwm.T * self.structure.meanings_prior[:, None] / self.expressions_prior
+        )
+
+    @cached_property
+    def complexity(self) -> float:
+        return mutual_information(
+            self.qwm, self.expressions_prior, self.structure.meanings_prior
+        )
+
+    @cached_property
+    def expressions_prior(self) -> np.ndarray:
+        # Normalization does become important at really small values
+        intermediate = self.qwm @ self.structure.meanings_prior
+        return intermediate / np.sum(intermediate)
+
+    @cached_property
+    def reconstructed_meanings(self) -> np.ndarray:
+        # Normalization does become important at really small values
+        intermediate = self.structure.pum @ self.qmw
+        return intermediate / np.sum(intermediate, axis=0)
+
+    @cached_property
+    def divergence_array(self) -> np.ndarray:
+        return np.array(
+            [
+                [kl_divergence(k, r) for k in self.structure.pum.T]
+                for r in self.reconstructed_meanings.T
+            ]
+        )
+
+    @cached_property
+    def expected_divergence(self) -> float:
+        left = self.qwm * self.structure.meanings_prior
+        return np.sum(left * self.divergence_array)
+
+    @cached_property
+    def iwu(self) -> float:
+        return mutual_information(
+            self.reconstructed_meanings,
+            self.structure.referents_prior,
+            self.expressions_prior,
+        )