Skip to content

Commit 06e8b63

Browse files
authored
Merge pull request #1 from CLMBRs/eff_conv
Add IB Optimization & convexity
2 parents 2817654 + b43a990 commit 06e8b63

15 files changed

Lines changed: 1731 additions & 1 deletion

.github/workflows/black.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: auto-format
2+
3+
on: pull_request
4+
5+
jobs:
6+
lint:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v3
10+
with:
11+
repository: ${{ github.event.pull_request.head.repo.full_name }}
12+
ref: ${{ github.event.pull_request.head.ref }}
13+
- name: black
14+
uses: psf/black@stable
15+
with:
16+
options: ""
17+
- name: Check for modified files
18+
id: git-check
19+
run: echo "modified=$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi)" >> $GITHUB_OUTPUT
20+
- name: Push changes
21+
if: steps.git-check.outputs.modified == 'true'
22+
run: |
23+
git config --global user.name 'github-actions[bot]'
24+
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
25+
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
26+
git commit -am "Automated black formatting"
27+
git push

.github/workflows/test.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: test
2+
3+
on: [push]
4+
5+
jobs:
6+
build:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v3
10+
- name: Set up Python
11+
uses: actions/setup-python@v4
12+
with:
13+
python-version: '3.11'
14+
- name: Install package
15+
run: pip install -e .
16+
- name: Test with pytest
17+
run: |
18+
pip install pytest
19+
pytest

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,3 +205,6 @@ cython_debug/
205205
marimo/_static/
206206
marimo/_lsp/
207207
__marimo__/
208+
209+
# .DS_Store
210+
.DS_Store

LICENSE

Lines changed: 674 additions & 0 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,21 @@
1-
# efficiency-convexity
1+
# Efficiency & Convexity
2+
3+
## Installing Efficiency & Convexity
4+
5+
First, set up a virtual environment (e.g. via [miniconda](https://docs.conda.io/en/latest/miniconda.html), `conda create -n eff_conv python=3.11`, and `conda activate eff_conv`).
6+
7+
1. Download or clone this repository and navigate to the root folder.
8+
9+
2. Install the IB Optimizer (We recommend doing this inside a virtual environment)
10+
11+
`pip install -e .`
12+
13+
## References
14+
15+
<summary>Links:</summary>
16+
17+
> Tishby, N., Fernando P., & Bialek, W. (2000). The information bottleneck method. The 37th annual Allerton Conference on Communication, Control, and Computing. pp. 368–377. https://doi.org/10.48550/arXiv.physics/0004057
18+
19+
> Skinner, L. (2025). Convexity is a Fundamental Feature of Efficient Semantic Compression in Probability Spaces. https://hdl.handle.net/1773/53008
20+
21+
</details>

pyproject.toml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
[build-system]
2+
requires = ["setuptools>=42", "wheel"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
name = "eff_conv"
7+
version = "0.1.0"
8+
authors = [
9+
{ name="Ashvin Ranjan", email="ar31@uw.edu"},
10+
{ name="Shane Steinert-Threlkeld", email="shanest@uw.edu" },
11+
]
12+
description = "Efficiency & Convexity is a toolkit for generating and analyzing encoders under the Information Bottleneck framework and calculating the quazi-convexity of probabilty distributions."
13+
readme = "README.md"
14+
requires-python = ">=3.7"
15+
classifiers = [
16+
"Programming Language :: Python :: 3",
17+
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
18+
"Operating System :: OS Independent",
19+
]
20+
license = {file = "LICENSE.txt"}
21+
dependencies = [
22+
"scipy",
23+
"numpy",
24+
"pytest",
25+
"black",
26+
]
27+
28+
[project.urls]
29+
"Homepage" = "https://github.com/CLMBRs/efficiency-convexity"
30+
"Bug Tracker" = "https://github.com/CLMBRs/efficiency-convexity/issues"

src/eff_conv/convexity.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
from eff_conv.ib.language import IBLanguage
2+
3+
from scipy.spatial import ConvexHull
4+
5+
import numpy as np
6+
7+
8+
class SimilaritySpace:
9+
"""A similarity space contains points (which should correspond in order to referents or meanings) and the priors upon those points.
10+
11+
Properties:
12+
sim_space: A matrix which stores a list of points, each point should correspond to a referent or meaning. Dimensions are D x ||P||.
13+
Where D is the dimension of the points and P is the set of points.
14+
15+
point_prior: Probability distribution for the points. Length must be ||P||. Cannot have any 0s in it.
16+
If no value is passed in then a uniform distribution will be given.
17+
"""
18+
19+
sim_space: np.ndarray
20+
point_prior: np.ndarray
21+
22+
def __init__(self, sim_space: np.ndarray, point_prior: np.ndarray = None):
23+
if len(sim_space.shape) != 2:
24+
raise ValueError("Similarity space input must be a 2d matrix")
25+
self.sim_space = sim_space
26+
if point_prior is not None:
27+
if (
28+
len(point_prior.shape) != 1
29+
or point_prior.shape[0] != sim_space.shape[0]
30+
):
31+
raise ValueError("Point priors not of correct size")
32+
self.point_prior = point_prior
33+
else:
34+
self.point_prior = np.array(
35+
[1.0 / sim_space.shape[0] for _ in range(sim_space.shape[0])]
36+
)
37+
38+
def _1d_convexity_amount(self, points: np.ndarray, level: np.ndarray) -> int:
39+
"""Finds the number of points which are contained within a 1d range.
40+
41+
Args:
42+
points (np.ndarray): The points to check.
43+
level (np.ndarray): The points which make up the 1d range
44+
45+
Returns:
46+
int: The number of points which are in the range spanned by levels.
47+
"""
48+
l_flat = level.flatten()
49+
p_flat = points.flatten()
50+
lo, hi = min(l_flat), max(l_flat)
51+
return sum((p_flat <= hi) & (p_flat >= lo)) + level.shape[0]
52+
53+
def quasi_convexity(self, point_dist: np.ndarray, steps: int) -> float:
54+
"""Finds the quasi-convexity of a probability. Algorithm from Skinner L. (2025).
55+
56+
Args:
57+
point_dist (np.ndarray): The probability distribution to be evaluated.
58+
steps (int): The number of steps to interate over the probability (higher is more accurate but slower)
59+
60+
Returns:
61+
float: The quasi-convexity of the probabilty distribution.
62+
"""
63+
64+
if len(point_dist.shape) != 1:
65+
raise ValueError("Quasi-Convexity input must be a probability distribution")
66+
if np.size(point_dist) != self.sim_space.shape[0]:
67+
raise ValueError("Quasi-Convexity input must map to all points")
68+
if steps <= 0:
69+
raise ValueError("Steps must be positive")
70+
71+
mesh = 1.0 / steps
72+
73+
qc = 0
74+
75+
steps = np.linspace(0, np.max(point_dist), steps)[::-1]
76+
77+
for i in steps:
78+
level = self.sim_space[point_dist >= i]
79+
out_points = self.sim_space[point_dist < i]
80+
# If everything is on a line a very simple calculation can be done
81+
if self.sim_space.shape[1] == 1:
82+
qc += (
83+
mesh * level.shape[0] / self._1d_convexity_amount(out_points, level)
84+
)
85+
else:
86+
# See if the points don't span the space (If so, ConvexHull will throw an error)
87+
consider = out_points
88+
rank = np.linalg.matrix_rank(level - level[0])
89+
if rank < self.sim_space.shape[1]:
90+
consider = []
91+
for p in out_points:
92+
check = np.concatenate((level, [p]))
93+
if rank == np.linalg.matrix_rank(check - check[0]):
94+
consider.append(p)
95+
96+
# Project down
97+
U, _, _ = np.linalg.svd(level.T, full_matrices=False)
98+
proj = U[:, :rank].T
99+
if len(consider) > 0:
100+
consider = (proj @ np.array(consider).T).T
101+
else:
102+
qc += mesh
103+
continue
104+
level = (proj @ level.T).T
105+
106+
if rank == 1:
107+
amount = self._1d_convexity_amount(consider, level)
108+
else:
109+
hull = ConvexHull(level)
110+
eqs = hull.equations[:, :-1]
111+
end = hull.equations[:, -1]
112+
amount = level.shape[0] + sum(
113+
np.all(eqs @ consider.T + end[:, None] <= 1e-12, axis=0)
114+
)
115+
116+
qc += mesh * level.shape[0] / amount
117+
return qc
118+
119+
def encoder_convexity(
120+
self, distrubitions: np.ndarray, prior: np.ndarray, steps: int = 100
121+
) -> float:
122+
"""Finds the quasi-convexity of a conditional probabilty matrix, typically an IB encoder. Algorithm from Skinner L. (2025).
123+
124+
Args:
125+
distrubitions (np.ndarray): The conditional probaility matrix to be evaluated. Shape is of ||P|| x n where n > 0.
126+
Each column of the matrix should be a probability distrubtion over P.
127+
128+
prior (np.ndarray): The probability distribution of inputs into the encoder. Must be of size n.
129+
steps (int, default: 100): The number of steps to interate over the probability (higher is more accurate but slower)
130+
131+
Returns:
132+
float: The quasi-convexity of the matrix.
133+
"""
134+
135+
# Apply Bayes' rule
136+
reconstructed = distrubitions.T * prior[:, None] / self.point_prior
137+
maximums = np.max(reconstructed, axis=0)
138+
139+
reconstructed[~(reconstructed == maximums)] = 0
140+
reconstructed[reconstructed == maximums] = 1
141+
142+
weighted_sum = np.sum(reconstructed.T, axis=0)
143+
weighted_sum = weighted_sum / np.sum(weighted_sum)
144+
145+
convexities = []
146+
for word in distrubitions.T:
147+
convexities.append(self.quasi_convexity(word, steps))
148+
return np.sum(np.array(convexities) * weighted_sum)
149+
150+
def language_convexity(
151+
self, lang: IBLanguage, steps: int = 100, referents=False
152+
) -> float:
153+
"""Finds the quasi-convexity of an IBLanguage by evaluating the Q(m|w) matrix.
154+
155+
Args:
156+
lang (IBLanguage): The language to be evaluated.
157+
steps (int, default: 100): The number of steps to iterate over the probability (higher is more accurate but slower).
158+
refeernts (boolean, default: False): Whether to evaluate q(u|w) or q(m|w).
159+
160+
Returns:
161+
float: The quasi-convexity of the language.
162+
"""
163+
164+
return self.encoder_convexity(
165+
lang.reconstructed_meanings if referents else lang.qmw,
166+
lang.expressions_prior,
167+
steps=steps,
168+
)

src/eff_conv/ib/language.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from functools import cached_property
2+
from eff_conv.ib.structure import IBStructure
3+
from eff_conv.ib.utils import IB_EPSILON, kl_divergence, mutual_information
4+
5+
import numpy as np
6+
7+
8+
class IBLanguage:
9+
"""A language has expressions which are mapped to from meanings and which can map to expressions.
10+
11+
Properties:
12+
structure: This is the structure in which the language exists.
13+
14+
qwm: This is a conditional probaiblity matrix which maps a meaning distribution to expressions. Dimensions are ||W|| x ||M||.
15+
Note: The columns of the matrix are the probability distributions. This differs from other implementations.
16+
17+
qmw: Reconstructed conditional probability matrix which maps an expression distrubution to meanings. Created using Bayes' rule.
18+
Dimensions are ||M|| x ||W||.
19+
20+
complexity: Mutual information between expressions and meanings. Formally I(W; M).
21+
22+
expressions_prior: Probability distribution for expressions. Constructed from the structure's meaning priors and qwm. Formally p(w).
23+
24+
reconstructed_meanings: Conditional probability matrix which maps an expression distrubition to referents. Created using qmw and structure.pum.
25+
Dimensions are ||U|| x ||W||.
26+
27+
divergence_array: Matrix which stores the different KL Divergences between the referent probability distrubutions per meaning and per expression.
28+
Dimensions are ||W|| x ||M||. (It is important to note that the KL Divergence function uses base 2 logarithms)
29+
30+
expected_divergence: This is the expected KL Divergence between the language's reconstructed meanings and the structure's meanings.
31+
expected divergence = I(U; M) - I(W; U)
32+
33+
iwu: The mutual information between the expressions of a language and the referents. Also referred to as accuracy. Formally I(W; U)
34+
"""
35+
36+
structure: IBStructure
37+
qwm: np.ndarray
38+
39+
def __init__(
40+
self,
41+
structure: IBStructure,
42+
qwm: np.ndarray,
43+
):
44+
if len(qwm.shape) != 2:
45+
raise ValueError("Must be a 2d matrix")
46+
if qwm.shape[1] != structure.pum.shape[1]:
47+
raise ValueError(
48+
f"Input matrix is for {qwm.shape[1]} meanings, not {structure.pum.shape[1]}"
49+
)
50+
if (np.abs(np.sum(qwm, axis=0) - 1) > IB_EPSILON).any():
51+
raise ValueError(
52+
"All columns of conditional probability matrix must sum to 1"
53+
)
54+
if (qwm < 0).any():
55+
raise ValueError(
56+
"No negative numbers are allowed in the probability matrix"
57+
)
58+
self.structure = structure
59+
self.qwm = qwm
60+
61+
@cached_property
62+
def qmw(self) -> np.ndarray:
63+
# Apply Bayes' rule
64+
return (
65+
self.qwm.T * self.structure.meanings_prior[:, None] / self.expressions_prior
66+
)
67+
68+
@cached_property
69+
def complexity(self) -> float:
70+
return mutual_information(
71+
self.qwm, self.expressions_prior, self.structure.meanings_prior
72+
)
73+
74+
@cached_property
75+
def expressions_prior(self) -> np.ndarray:
76+
# Normalization does become important at really small values
77+
intermediate = self.qwm @ self.structure.meanings_prior
78+
return intermediate / np.sum(intermediate)
79+
80+
@cached_property
81+
def reconstructed_meanings(self) -> np.ndarray:
82+
# Normalization does become important at really small values
83+
intermediate = self.structure.pum @ self.qmw
84+
return intermediate / np.sum(intermediate, axis=0)
85+
86+
@cached_property
87+
def divergence_array(self) -> np.ndarray:
88+
return np.array(
89+
[
90+
[kl_divergence(k, r) for k in self.structure.pum.T]
91+
for r in self.reconstructed_meanings.T
92+
]
93+
)
94+
95+
@cached_property
96+
def expected_divergence(self) -> float:
97+
left = self.qwm * self.structure.meanings_prior
98+
return np.sum(left * self.divergence_array)
99+
100+
@cached_property
101+
def iwu(self) -> float:
102+
return mutual_information(
103+
self.reconstructed_meanings,
104+
self.structure.referents_prior,
105+
self.expressions_prior,
106+
)

0 commit comments

Comments
 (0)