Skip to content

Commit d1d807d

Browse files
authored
Merge pull request #2 from CLMBRs/conv_update
Change encoder_convexity weighted summation
2 parents 06e8b63 + 16db3e1 commit d1d807d

5 files changed

Lines changed: 197 additions & 34 deletions

File tree

src/eff_conv/convexity.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import numpy as np
66

7+
from eff_conv.ib.utils import IB_EPSILON
8+
79

810
class SimilaritySpace:
911
"""A similarity space contains points (which should correspond in order to referents or meanings) and the priors upon those points.
@@ -29,6 +31,10 @@ def __init__(self, sim_space: np.ndarray, point_prior: np.ndarray = None):
2931
or point_prior.shape[0] != sim_space.shape[0]
3032
):
3133
raise ValueError("Point priors not of correct size")
34+
if np.abs(np.sum(point_prior) - 1) > IB_EPSILON:
35+
raise ValueError("Point priors must sum to 1")
36+
if (point_prior <= 0).any():
37+
raise ValueError("Prior values must be greater than 0")
3238
self.point_prior = point_prior
3339
else:
3440
self.point_prior = np.array(
@@ -65,6 +71,10 @@ def quasi_convexity(self, point_dist: np.ndarray, steps: int) -> float:
6571
raise ValueError("Quasi-Convexity input must be a probability distribution")
6672
if np.size(point_dist) != self.sim_space.shape[0]:
6773
raise ValueError("Quasi-Convexity input must map to all points")
74+
if np.abs(np.sum(point_dist) - 1) > IB_EPSILON:
75+
raise ValueError("Quasi-Convexity input must sum to 1")
76+
if (point_dist < 0).any():
77+
raise ValueError("Quasi-Convexity input must be greater than or equal to 0")
6878
if steps <= 0:
6979
raise ValueError("Steps must be positive")
7080

@@ -116,10 +126,10 @@ def quasi_convexity(self, point_dist: np.ndarray, steps: int) -> float:
116126
qc += mesh * level.shape[0] / amount
117127
return qc
118128

119-
def encoder_convexity(
129+
def skinner_encoder_convexity(
120130
self, distrubitions: np.ndarray, prior: np.ndarray, steps: int = 100
121131
) -> float:
122-
"""Finds the quasi-convexity of a conditional probabilty matrix, typically an IB encoder. Algorithm from Skinner L. (2025).
132+
"""Finds the quasi-convexity of a conditional probabilty matrix, typically an IB encoder. Weighting algorithm from Skinner L. (2025).
123133
124134
Args:
125135
distrubitions (np.ndarray): The conditional probaility matrix to be evaluated. Shape is of ||P|| x n where n > 0.
@@ -132,6 +142,11 @@ def encoder_convexity(
132142
float: The quasi-convexity of the matrix.
133143
"""
134144

145+
if np.abs(np.sum(prior) - 1) > IB_EPSILON:
146+
raise ValueError("Prior must sum to 1")
147+
if (prior <= 0).any():
148+
raise ValueError("Prior must be greater than 0")
149+
135150
# Apply Bayes' rule
136151
reconstructed = distrubitions.T * prior[:, None] / self.point_prior
137152
maximums = np.max(reconstructed, axis=0)
@@ -147,6 +162,32 @@ def encoder_convexity(
147162
convexities.append(self.quasi_convexity(word, steps))
148163
return np.sum(np.array(convexities) * weighted_sum)
149164

165+
def encoder_convexity(
166+
self, distrubitions: np.ndarray, prior: np.ndarray, steps: int = 100
167+
) -> float:
168+
"""Finds the quasi-convexity of a conditional probabilty matrix, typically an IB encoder. Weighting is based on the p(y) for p(x|y).
169+
170+
Args:
171+
distrubitions (np.ndarray): The conditional probaility matrix to be evaluated. Shape is of ||P|| x n where n > 0.
172+
Each column of the matrix should be a probability distrubtion over P.
173+
174+
prior (np.ndarray): The probability distribution of inputs into the encoder. Must be of size n.
175+
steps (int, default: 100): The number of steps to interate over the probability (higher is more accurate but slower)
176+
177+
Returns:
178+
float: The quasi-convexity of the matrix.
179+
"""
180+
181+
if np.abs(np.sum(prior) - 1) > IB_EPSILON:
182+
raise ValueError("Prior must sum to 1")
183+
if (prior <= 0).any():
184+
raise ValueError("Prior must be greater than 0")
185+
186+
convexities = []
187+
for word in distrubitions.T:
188+
convexities.append(self.quasi_convexity(word, steps))
189+
return np.sum(np.array(convexities) * prior)
190+
150191
def language_convexity(
151192
self, lang: IBLanguage, steps: int = 100, referents=False
152193
) -> float:

src/eff_conv/ib/language.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ def __init__(
5151
raise ValueError(
5252
"All columns of conditional probability matrix must sum to 1"
5353
)
54+
if (np.sum(qwm, axis=1) <= 0).any():
55+
raise ValueError(
56+
"All rows of conditional probability matrix must sum to a value greater than 0"
57+
)
5458
if (qwm < 0).any():
5559
raise ValueError(
5660
"No negative numbers are allowed in the probability matrix"

src/eff_conv/ib/structure.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __init__(
4545
)
4646
if (pum <= 0).any():
4747
raise ValueError("Priors must all be greater than 0")
48-
if abs(np.sum(prior)) - 1 > IB_EPSILON:
48+
if abs(np.sum(prior) - 1) > IB_EPSILON:
4949
raise ValueError("Priors must sum to 1")
5050

5151
# If priors are not passed in, make it uniform

src/tests/test_convexity.py

Lines changed: 143 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
import pytest
23

34
from eff_conv.convexity import SimilaritySpace
45
from eff_conv.ib.language import IBLanguage
@@ -32,56 +33,127 @@ class TestConvexity:
3233
)
3334

3435
# Tests for convexity.py
36+
def test_similarity_space_check(self):
37+
with pytest.raises(ValueError):
38+
space_wrong_shape = SimilaritySpace(np.array([1]))
39+
with pytest.raises(ValueError):
40+
space_wrong_prior_len = SimilaritySpace(
41+
np.array([[1]]), point_prior=np.array([1, 2])
42+
)
43+
with pytest.raises(ValueError):
44+
space_wrong_prior_shape = SimilaritySpace(
45+
np.array([[1]]), point_prior=np.array([[1]])
46+
)
47+
with pytest.raises(ValueError):
48+
space_invalid_prior = SimilaritySpace(
49+
np.array([[1], [2]]), point_prior=np.array([1, 2])
50+
)
51+
with pytest.raises(ValueError):
52+
space_zero_prior = SimilaritySpace(
53+
np.array([[1], [2]]), point_prior=np.array([1, 0])
54+
)
55+
with pytest.raises(ValueError):
56+
space_negative_prior = SimilaritySpace(
57+
np.array([[1], [2]]), point_prior=np.array([2, -1])
58+
)
59+
3560
def test_convexity_calculation(self):
3661
assert (
37-
self.sim_space.encoder_convexity(np.ones((9, 1)) / 9, np.array([1])) - 1
62+
abs(
63+
self.sim_space.encoder_convexity(np.ones((9, 1)) / 9, np.array([1])) - 1
64+
)
3865
< IB_EPSILON
3966
)
4067
# Epsilon is higher because rounding errors are more common beacuse of the nature of the algorithm
4168
assert (
42-
self.sim_space.encoder_convexity(
43-
np.array([[0.2], [0], [0.2], [0], [0.2], [0], [0.2], [0], [0.2]]),
44-
np.array([1]),
69+
abs(
70+
self.sim_space.skinner_encoder_convexity(
71+
np.array(
72+
[
73+
# fmt: off
74+
[0.2, 0],
75+
[0, 1],
76+
[0.2, 0],
77+
[0, 0],
78+
[0.2, 0],
79+
[0, 0],
80+
[0.2, 0],
81+
[0, 0],
82+
[0.2, 0],
83+
# fmt: on
84+
]
85+
),
86+
np.array([0.5, 0.5]),
87+
)
88+
- 19 / 27
89+
)
90+
< 0.005
91+
)
92+
assert (
93+
abs(
94+
self.sim_space.encoder_convexity(
95+
np.array(
96+
[
97+
# fmt: off
98+
[0.2, 0],
99+
[0, 1],
100+
[0.2, 0],
101+
[0, 0],
102+
[0.2, 0],
103+
[0, 0],
104+
[0.2, 0],
105+
[0, 0],
106+
[0.2, 0],
107+
# fmt: on
108+
]
109+
),
110+
np.array([0.5, 0.5]),
111+
)
112+
- 14 / 18
45113
)
46-
- 5 / 9
47114
< 0.005
48115
)
49116

50117
def test_projection_down(self):
51118
# This also tests 1d spaces
52119
assert (
53-
self.sim_space_large.encoder_convexity(
54-
np.array(
55-
[
56-
# fmt: off
120+
abs(
121+
self.sim_space_large.encoder_convexity(
122+
np.array(
123+
[
124+
# fmt: off
57125
[0.33], [0], [0], [0],
58126
[0], [0], [0], [0],
59127
[0.33], [0], [0], [0],
60128
[0.33], [0], [0], [0],
61-
# fmt: on
62-
]
63-
),
64-
np.array([1]),
129+
# fmt: on
130+
]
131+
)
132+
/ 0.99,
133+
np.array([1]),
134+
)
135+
- 3 / 4
65136
)
66-
- 3 / 4
67137
< 0.005
68138
)
69139
assert (
70-
self.sim_space_large.encoder_convexity(
71-
np.array(
72-
[
73-
# fmt: off
140+
abs(
141+
self.sim_space_large.encoder_convexity(
142+
np.array(
143+
[
144+
# fmt: off
74145
[0.5], [0], [0], [0],
75146
[0], [0], [0], [0],
76147
[0], [0], [0], [0],
77148
[0.5], [0], [0], [0],
78-
# fmt: on
79-
]
80-
),
81-
np.array([1]),
82-
steps=1000,
149+
# fmt: on
150+
]
151+
),
152+
np.array([1]),
153+
steps=1000,
154+
)
155+
- 1 / 2
83156
)
84-
- 1 / 2
85157
< 0.005
86158
)
87159

@@ -96,17 +168,57 @@ def test_language_convexity(self):
96168
)
97169
simple_lang = IBLanguage(simple_struct, np.ones((1, 9)))
98170
assert (
99-
self.sim_space.language_convexity(simple_lang)
100-
- self.sim_space.encoder_convexity(np.ones((9, 1)) / 9, np.array([1]))
171+
abs(
172+
self.sim_space.language_convexity(simple_lang)
173+
- self.sim_space.encoder_convexity(np.ones((9, 1)) / 9, np.array([1]))
174+
)
101175
< IB_EPSILON
102176
)
103177
# This will fail on remote if the epsilon is not higher
104178
assert (
105-
self.sim_space.language_convexity(simple_lang, referents=True, steps=1000)
106-
- self.sim_space.encoder_convexity(
107-
np.array([[0.465, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.465]]).T,
108-
np.array([1]),
109-
steps=1000,
179+
abs(
180+
self.sim_space.language_convexity(
181+
simple_lang, referents=True, steps=1000
182+
)
183+
- self.sim_space.encoder_convexity(
184+
np.array(
185+
[[0.465, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.465]]
186+
).T,
187+
np.array([1]),
188+
steps=1000,
189+
)
110190
)
111191
< 0.005
112192
)
193+
194+
def test_convexity_check(self):
195+
with pytest.raises(ValueError):
196+
input_wrong_shape = self.sim_space.quasi_convexity(np.ones((9, 1)), 100)
197+
with pytest.raises(ValueError):
198+
input_wrong_size = self.sim_space.quasi_convexity(np.ones(8) / 8, 100)
199+
with pytest.raises(ValueError):
200+
input_invalid = self.sim_space.quasi_convexity(np.ones(9), 100)
201+
with pytest.raises(ValueError):
202+
input_negative = self.sim_space.quasi_convexity(
203+
np.array([-7, 1, 1, 1, 1, 1, 1, 1, 1, 1]), 100
204+
)
205+
with pytest.raises(ValueError):
206+
steps_negative = self.sim_space.quasi_convexity(np.ones(9) / 9, -1)
207+
208+
def test_encoder_check(self):
209+
with pytest.raises(ValueError):
210+
prior_invalid = self.sim_space.encoder_convexity(
211+
np.ones((9, 1)) / 9, np.array([0.9])
212+
)
213+
with pytest.raises(ValueError):
214+
prior_invalid_skinner = self.sim_space.skinner_encoder_convexity(
215+
np.ones((9, 1)) / 9, np.array([0.9])
216+
)
217+
with pytest.raises(ValueError):
218+
prior_negative = self.sim_space.encoder_convexity(
219+
np.ones((9, 2)) / 9, np.array([-1, 2])
220+
)
221+
with pytest.raises(ValueError):
222+
prior_negative_skinner = self.sim_space.skinner_encoder_convexity(
223+
np.ones((9, 2)) / 9, np.array([-1, 2])
224+
)

src/tests/test_ib.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ def test_ib_structure_check(self):
5555
struct_invalid_prior = IBStructure(
5656
np.array([[0.25, 0.25, 0.25, 0.25]]), np.array([1, 2, 3, 4])
5757
)
58+
with pytest.raises(ValueError):
59+
struct_small_prior = IBStructure(
60+
np.array([[0.25, 0.25, 0.25, 0.25]]), np.array([0.1, 0.1, 0.1, 0.1])
61+
)
5862

5963
# Tests for utils.py
6064
def test_safe_log(self):
@@ -182,3 +186,5 @@ def test_ib_language_check(self):
182186
lang_negative_probability = IBLanguage(
183187
self.simple_struct, np.array([[-1, 1], [2, 0]])
184188
)
189+
with pytest.raises(ValueError):
190+
lang_zero_row = IBLanguage(self.simple_struct, np.array([[1, 1], [0, 0]]))

0 commit comments

Comments
 (0)