Improve PointMass Reward Heatmap Script (#9)

AdamGleave · web-flow · commit 24d90b21b2bb · 2020-01-23T15:43:23.000-08:00
* Change labels in divergence heatmaps

* Have plot_pm_reward use stylesheets too

* Fix MPL import and backend

* Change figure label positions, tweak parameters

* Fix docs

* Bugfix: do not delete TEXINPUTS if never existed

* Avoid unnecessary TeX

* Fix TeX and os.environ for reals
diff --git a/src/evaluating_rewards/analysis/latex/figsymbols.sty b/src/evaluating_rewards/analysis/latex/figsymbols.sty
@@ -10,4 +10,9 @@
 \newcommand{\backward}[1]{\reflectbox{#1}}
 
 \newcommand{\controlpenalty}{\reflectbox{\emoji{noto_snail}}}
-\newcommand{\nocontrolpenalty}{\reflectbox{\emoji{mozilla_cheetah}}}
+\newcommand{\nocontrolpenalty}{\reflectbox{\emoji{mozilla_cheetah}}}
+
+\newcommand{\sparse}{\texttt{S}}
+\newcommand{\dense}{\texttt{D}}
+\newcommand{\magnitude}{\texttt{M}}
+\newcommand{\zeroreward}{\texttt{Zero}}
diff --git a/src/evaluating_rewards/analysis/plot_divergence_heatmap.py b/src/evaluating_rewards/analysis/plot_divergence_heatmap.py
@@ -19,6 +19,7 @@
 from typing import Any, Iterable, Mapping, Optional
 
 from imitation import util
+import matplotlib.pyplot as plt
 import sacred
 
 from evaluating_rewards import serialize
@@ -30,8 +31,6 @@
 
 def horizontal_ticks() -> None:
     # lazy import to allow custom backend
-    import matplotlib.pyplot as plt  # pylint:disable=import-outside-toplevel
-
     plt.xticks(rotation="horizontal")
     plt.yticks(rotation="horizontal")
 
@@ -87,6 +86,18 @@ def fast():
     del _
 
 
+@plot_divergence_heatmap_ex.named_config
+def dataset_transition():
+    """Searches for comparisons using `random_transition_generator`."""
+    search = {  # noqa: F841  pylint:disable=unused-variable
+        "dataset_factory": {
+            "escape/py/function": (
+                "evaluating_rewards.experiments.datasets.random_transition_generator"
+            ),
+        },
+    }
+
+
 def _norm(args: Iterable[str]) -> bool:
     return any(visualize.match("evaluating_rewards/PointMassGroundTruth-v0")(args))
 
@@ -97,7 +108,6 @@ def point_mass():
     search = {  # noqa: F841  pylint:disable=unused-variable
         "env_name": "evaluating_rewards/PointMassLine-v0",
         "dataset_factory": {
-            # can also use evaluating_rewards.experiments.datasets.random_transition_generator
             "escape/py/function": "evaluating_rewards.experiments.datasets.random_policy_generator",
         },
     }
@@ -109,7 +119,7 @@ def point_mass():
         "norm": [visualize.zero, visualize.same, _norm],
         "all": [visualize.always_true],
     }
-    order = ["SparseNoCtrl", "Sparse", "DenseNoCtrl", "Dense", "GroundTruth"]
+    order = ["SparseNoCtrl", "SparseWithCtrl", "DenseNoCtrl", "DenseWithCtrl", "GroundTruth"]
     heatmap_kwargs["order"] = [f"evaluating_rewards/PointMass{label}-v0" for label in order]
     heatmap_kwargs["after_plot"] = horizontal_ticks
     del order
@@ -210,16 +220,7 @@ def plot_divergence_heatmap(
         log_dir: directory to write figures and other logging to.
         save_kwargs: passed through to `analysis.save_figs`.
         """
-    if "tex" in styles:
-        import matplotlib  # pylint:disable=import-outside-toplevel
-
-        matplotlib.use("pgf")  # PGF backend best for LaTeX
-        os.environ["TEXINPUTS"] = stylesheets.LATEX_DIR + ":"
-    styles = [stylesheets.STYLES[style] for style in styles]
-
-    import matplotlib.pyplot as plt  # pylint:disable=import-outside-toplevel
-
-    with plt.style.context(styles):
+    with stylesheets.setup_styles(styles):
         data_dir = data_root
         if data_subdir is not None:
             data_dir = os.path.join(data_dir, data_subdir)
diff --git a/src/evaluating_rewards/analysis/plot_pm_reward.py b/src/evaluating_rewards/analysis/plot_pm_reward.py
@@ -18,7 +18,7 @@
 """
 
 import os
-from typing import Any, Mapping, Sequence, Tuple
+from typing import Any, Iterable, Mapping, Sequence, Tuple
 
 import gym
 from imitation import util
@@ -28,7 +28,7 @@
 import xarray as xr
 
 from evaluating_rewards import serialize
-from evaluating_rewards.analysis import visualize
+from evaluating_rewards.analysis import stylesheets, visualize
 from evaluating_rewards.experiments import point_mass_analysis
 from evaluating_rewards.scripts import script_utils
 
@@ -55,10 +55,9 @@ def default_config():
     act_lim = lim  # action point range
 
     # Figure parameters
+    styles = ["paper", "pointmass-2col", "tex"]
     ncols = 3  # number of heatmaps per row
-    width = 5  # in
-    height = 4  # in
-    cbar_kwargs = {"fraction": 0.1, "pad": 0.05}
+    cbar_kwargs = {"fraction": 0.07, "pad": 0.02}
     fmt = "pdf"  # file type
     _ = locals()  # quieten flake8 unused variable warning
     del _
@@ -69,12 +68,11 @@ def default_config():
 
 @plot_pm_reward_ex.config
 def logging_config(log_root, models, reward_type, reward_path):
+    data_root = os.path.join(log_root, "model_comparison")
     if models is None:
-        save_path = os.path.join(
+        log_dir = os.path.join(
             log_root, reward_type.replace("/", "_"), reward_path.replace("/", "_")
         )
-    else:
-        save_path = util.make_unique_timestamp()
     _ = locals()  # quieten flake8 unused variable warning
     del _
 
@@ -87,7 +85,7 @@ def reward_config(models, reward_type, reward_path):
     del _
 
 
-STRIP_CONFIG = dict(pos_density=7, ncols=7, width=9.5, height=1.5)
+STRIP_CONFIG = dict(pos_density=7, ncols=7)
 
 
 @plot_pm_reward_ex.named_config
@@ -100,7 +98,6 @@ def strip():
 def dense_no_ctrl_sparsified():
     """PointMassDenseNoCtrl along with sparsified and ground-truth sparse reward."""
     locals().update(**STRIP_CONFIG)
-    height = 4.5
     pos_lim = 0.15
     # Use lists of tuples rather than OrderedDict as Sacred reorders dictionaries
     models = [
@@ -109,8 +106,6 @@ def dense_no_ctrl_sparsified():
             "Sparsified",
             "evaluating_rewards/RewardModel-v0",
             os.path.join(
-                serialize.get_output_dir(),
-                "model_comparison",
                 "evaluating_rewards_PointMassLine-v0",
                 "20190921_190606_58935eb0a51849508381daf1055d0360",
                 "model",
@@ -125,13 +120,18 @@ def dense_no_ctrl_sparsified():
 @plot_pm_reward_ex.named_config
 def fast():
     """Small config, intended for tests / debugging."""
-    density = 5  # noqa: F841  pylint:disable=unused-variable
+    density = 5
+    styles = ["paper", "pointmass-2col"]  # don't use TeX for tests
+    _ = locals()
+    del _
 
 
 @plot_pm_reward_ex.main
 def plot_pm_reward(
+    styles: Iterable[str],
     env_name: str,
     models: Sequence[Tuple[str, str, str]],
+    data_root: str,
     # Mesh parameters
     pos_lim: float,
     pos_density: int,
@@ -140,46 +140,45 @@ def plot_pm_reward(
     density: int,
     # Figure parameters
     ncols: int,
-    width: float,
-    height: float,
     cbar_kwargs: Mapping[str, Any],
-    save_path: str,
+    log_dir: str,
     fmt: str,
 ) -> xr.DataArray:
     """Entry-point into script to visualize a reward model for point mass."""
-    env = gym.make(env_name)
-    venv = vec_env.DummyVecEnv([lambda: env])
-    goal = np.array([0.0])
-
-    rewards = {}
-    with util.make_session():
-        for model_name, reward_type, reward_path in models:
-            model = serialize.load_reward(reward_type, reward_path, venv)
-            reward = point_mass_analysis.evaluate_reward_model(
-                env,
-                model,
-                goal=goal,
-                pos_lim=pos_lim,
-                pos_density=pos_density,
-                vel_lim=vel_lim,
-                act_lim=act_lim,
-                density=density,
-            )
-            rewards[model_name] = reward
-
-    if len(rewards) == 1:
-        reward = next(iter(rewards.values()))
-        kwargs = {"col_wrap": ncols}
-    else:
-        reward = xr.Dataset(rewards).to_array("model")
-        kwargs = {"row": "Model"}
-
-    fig = point_mass_analysis.plot_reward(
-        reward, figsize=(width, height), cbar_kwargs=cbar_kwargs, **kwargs
-    )
-    visualize.save_fig(save_path, fig, fmt=fmt)
-
-    return reward
+    with stylesheets.setup_styles(styles):
+        env = gym.make(env_name)
+        venv = vec_env.DummyVecEnv([lambda: env])
+        goal = np.array([0.0])
+
+        rewards = {}
+        with util.make_session():
+            for model_name, reward_type, reward_path in models:
+                reward_path = os.path.join(data_root, reward_path)
+                model = serialize.load_reward(reward_type, reward_path, venv)
+                reward = point_mass_analysis.evaluate_reward_model(
+                    env,
+                    model,
+                    goal=goal,
+                    pos_lim=pos_lim,
+                    pos_density=pos_density,
+                    vel_lim=vel_lim,
+                    act_lim=act_lim,
+                    density=density,
+                )
+                rewards[model_name] = reward
+
+        if len(rewards) == 1:
+            reward = next(iter(rewards.values()))
+            kwargs = {"col_wrap": ncols}
+        else:
+            reward = xr.Dataset(rewards).to_array("model")
+            kwargs = {"row": "Model"}
+
+        fig = point_mass_analysis.plot_reward(reward, cbar_kwargs=cbar_kwargs, **kwargs)
+        save_path = os.path.join(log_dir, "reward")
+        visualize.save_fig(save_path, fig, fmt=fmt)
+
+        return reward
 
 
 if __name__ == "__main__":
diff --git a/src/evaluating_rewards/analysis/stylesheets.py b/src/evaluating_rewards/analysis/stylesheets.py
@@ -1,6 +1,8 @@
 """matplotlib styles."""
 
+import contextlib
 import os
+from typing import Iterable, Iterator
 
 LATEX_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "latex")
 
@@ -16,6 +18,15 @@
         "xtick.labelsize": 10,
         "ytick.labelsize": 10,
     },
+    "pointmass-2col": {
+        "figure.figsize": (6.75, 2.5),
+        "figure.subplot.left": 0.2,
+        "figure.subplot.right": 1.0,
+        "figure.subplot.top": 0.92,
+        "figure.subplot.bottom": 0.16,
+        "figure.subplot.hspace": 0.2,
+        "figure.subplot.wspace": 0.25,
+    },
     "heatmap-2col": {"figure.figsize": (6.75, 5.0625)},
     "heatmap-1col": {
         "font.size": 8,
@@ -24,14 +35,49 @@
         "figure.figsize": (3.25, 2.4375),
         "figure.subplot.top": 0.99,
         "figure.subplot.bottom": 0.16,
-        "figure.subplot.left": 0.15,
+        "figure.subplot.left": 0.16,
         "figure.subplot.right": 0.91,
     },
     "tex": {
-        "backend": "pgf",
         "text.usetex": True,
         "pgf.texsystem": "pdflatex",
         "pgf.rcfonts": False,
-        "pgf.preamble": [r"\usepackage{figemojis}", r"\usepackage{times}"],
+        "pgf.preamble": [r"\usepackage{figsymbols}", r"\usepackage{times}"],
     },
 }
+
+
+@contextlib.contextmanager
+def setup_styles(styles: Iterable[str]) -> Iterator[None]:
+    """Context manager: uses specified matplotlib styles while in context.
+
+    Side-effect: if "tex" is in styles, will switch `matplotlib` backend to `pgf`.
+
+    Args:
+        styles: keys of styles defined in `STYLES`.
+
+    Returns:
+        A ContextManager. While entered in the context, the specified styles are applied,
+        and (if "tex" is one of the styles) the environment variable "TEXINPUTS" is set
+        to support custom macros."""
+    old_tex_inputs = os.environ.get("TEXINPUTS")
+    try:
+        if "tex" in styles:
+            import matplotlib  # pylint:disable=import-outside-toplevel
+
+            # PGF backend best for LaTeX. matplotlib probably already imported:
+            # but should be able to switch as non-interactive.
+            matplotlib.use("pgf", warn=False, force=True)
+            os.environ["TEXINPUTS"] = LATEX_DIR + ":"
+        styles = [STYLES[style] for style in styles]
+
+        import matplotlib.pyplot as plt  # pylint:disable=import-outside-toplevel
+
+        with plt.style.context(styles):
+            yield
+    finally:
+        if "tex" in styles:
+            if old_tex_inputs is None:
+                del os.environ["TEXINPUTS"]
+            else:
+                os.environ["TEXINPUTS"] = old_tex_inputs
diff --git a/src/evaluating_rewards/analysis/visualize.py b/src/evaluating_rewards/analysis/visualize.py
@@ -31,12 +31,10 @@
 TRANSFORMATIONS = {
     r"^evaluating_rewards[_/](.*)-v0": r"\1",
     r"^imitation[_/](.*)-v0": r"\1",
-    "^Zero-v0": "Zero",
-    "^PointMassDense": "Dense",
-    "^PointMassDenseNoCtrl": "Dense\nNo Ctrl",
-    "^PointMassGroundTruth": "Norm",
-    "^PointMassSparse": "Sparse",
-    "^PointMassSparseNoCtrl": "Sparse\nNo Ctrl",
+    "^Zero": r"\\zeroreward{}",
+    "^PointMassDense": r"\\dense{}",
+    "^PointMassGroundTruth": r"\\magnitude{}\\controlpenalty{}",
+    "^PointMassSparse": r"\\sparse{}",
     "^PointMazeGroundTruth": "GT",
     r"(.*)(Hopper|HalfCheetah)GroundTruth(.*)": r"\1\2\\running{}\3",
     r"(.*)(Hopper|HalfCheetah)Backflip(.*)": r"\1\2\\backflipping{}\3",
diff --git a/src/evaluating_rewards/experiments/point_mass_analysis.py b/src/evaluating_rewards/experiments/point_mass_analysis.py