Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/examples/colors/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ uv run -m colors.utils.ols_model color_model

#### Data Generation

To generate the test models for experiments 2 and 3, you can run the followign command in the `examples` directory:
To generate the test models for experiments 2 and 3, you can run the following command in the `examples` directory:
```sh
uv run -m colors.exp2_3.generate_models
```
Expand Down
32,438 changes: 16,219 additions & 16,219 deletions src/examples/colors/data/minimized/color_model.csv

Large diffs are not rendered by default.

29,876 changes: 14,938 additions & 14,938 deletions src/examples/colors/data/minimized/model-cpdm.csv

Large diffs are not rendered by default.

29,772 changes: 14,886 additions & 14,886 deletions src/examples/colors/data/minimized/model-cpdm_adj.csv

Large diffs are not rendered by default.

29,924 changes: 14,962 additions & 14,962 deletions src/examples/colors/data/minimized/model-cpdm_convex.csv

Large diffs are not rendered by default.

29,898 changes: 14,949 additions & 14,949 deletions src/examples/colors/data/minimized/model-cpdm_dual.csv

Large diffs are not rendered by default.

29,422 changes: 14,711 additions & 14,711 deletions src/examples/colors/data/minimized/model-cpdm_split.csv

Large diffs are not rendered by default.

29,722 changes: 14,861 additions & 14,861 deletions src/examples/colors/data/minimized/model-cpum.csv

Large diffs are not rendered by default.

29,550 changes: 14,775 additions & 14,775 deletions src/examples/colors/data/minimized/model-cpum_dual.csv

Large diffs are not rendered by default.

29,018 changes: 14,509 additions & 14,509 deletions src/examples/colors/data/minimized/model-cpum_split.csv

Large diffs are not rendered by default.

29,968 changes: 14,984 additions & 14,984 deletions src/examples/colors/data/minimized/model-manhattan_5_5.csv

Large diffs are not rendered by default.

29,936 changes: 14,968 additions & 14,968 deletions src/examples/colors/data/minimized/model-npdm.csv

Large diffs are not rendered by default.

29,864 changes: 14,932 additions & 14,932 deletions src/examples/colors/data/minimized/model-npdm_adj.csv

Large diffs are not rendered by default.

29,876 changes: 14,938 additions & 14,938 deletions src/examples/colors/data/minimized/model-npdm_dual.csv

Large diffs are not rendered by default.

29,902 changes: 14,951 additions & 14,951 deletions src/examples/colors/data/minimized/model-npdm_shift.csv

Large diffs are not rendered by default.

29,406 changes: 14,703 additions & 14,703 deletions src/examples/colors/data/minimized/model-npum.csv

Large diffs are not rendered by default.

29,260 changes: 14,630 additions & 14,630 deletions src/examples/colors/data/minimized/model-npum_dual.csv

Large diffs are not rendered by default.

31 changes: 29 additions & 2 deletions src/examples/colors/utils/minimize_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import pickle
import os

from eff_conv.ib.utils import IB_EPSILON


def find_frontier_optimality(frontier: np.ndarray, point: np.ndarray) -> float:
"""
Expand All @@ -18,6 +20,28 @@ def find_frontier_optimality(frontier: np.ndarray, point: np.ndarray) -> float:
return -np.min(np.linalg.norm(frontier - point, axis=1))


def find_epsilon_measure(
frontier: np.ndarray, point: np.ndarray, betas: np.ndarray
) -> float:
"""
Finds the negative epsilon measure for a given language (this is the negative difference from the nearest optimal language over
the reciporical of the beta value for that optimal language.)

Args:
frontier (np.ndarray): A list of 2-dimensional points. This is the points which the input point is checked against.
point (np.ndarray): A 2-dimensional point. This is the point which is being checked.
betas (np.ndarray): A list of beta values for each language in the frontier.

Returns:
float: the negative epsilon measure for the language.
"""
optimal_values = frontier[:, 0] - frontier[:, 1] * betas
point_values = point[0] - point[1] * betas
dists = point_values - optimal_values
nearest = np.argmin(dists)
return -dists[nearest] / (betas[nearest] + IB_EPSILON)


def minimize_model(name: str):
"""
Converts a model file consisting of various languages to a `.csv` file which has a row per encoder contains the following columns:
Expand Down Expand Up @@ -66,6 +90,9 @@ def minimize_model(name: str):
frontier.append([lang.complexity, lang.iwu])

frontier = np.array(frontier)
with open(f"./colors/data/model.pkl", "rb") as f:
optimal_model = pickle.load(f)
betas = np.array(optimal_model["betas"][::-1])

if not artificial:
offset = len(model["optimal"])
Expand All @@ -76,7 +103,7 @@ def minimize_model(name: str):
df_data["convexity-quw"].append(model["convexity"]["quw"]["natural"][i])
df_data["type"].append("natural")
df_data["optimality"].append(
find_frontier_optimality(frontier, np.array([n.complexity, n.iwu]))
find_epsilon_measure(frontier, np.array([n.complexity, n.iwu]), betas)
)
df_data["base_item_id"].append(i + offset)

Expand All @@ -94,7 +121,7 @@ def minimize_model(name: str):
df_data["convexity-quw"].append(model["convexity"]["quw"]["suboptimal"][i])
df_data["type"].append("suboptimal")
df_data["optimality"].append(
find_frontier_optimality(frontier, np.array([s.complexity, s.iwu]))
find_epsilon_measure(frontier, np.array([s.complexity, s.iwu]), betas)
)
df_data["base_item_id"].append(i // 10)

Expand Down
Loading