Merge pull request #447 from pints-team/i444-eggbox

MichaelClerx · web-flow · commit 273e1fcee150 · 2018-08-24T19:02:25.000+01:00
Simple egg box toy logpdf for functional testing
diff --git a/docs/source/toy/index.rst b/docs/source/toy/index.rst
@@ -23,5 +23,6 @@ examples.
     repressilator_model
     rosenbrock
     sir_model
+    simple_egg_box_logpdf
     twisted_gaussian_logpdf
 
diff --git a/docs/source/toy/simple_egg_box_logpdf.rst b/docs/source/toy/simple_egg_box_logpdf.rst
@@ -0,0 +1,8 @@
+***************************
+Simple Egg Box Distribution
+***************************
+
+.. module:: pints.toy
+
+.. autoclass:: SimpleEggBoxLogPDF
+
diff --git a/examples/README.md b/examples/README.md
@@ -69,5 +69,6 @@ relevant code.
 
 - [Multimodal normal distribution](./toy-distribution-multimodal-normal.ipynb)
 - [Rosenbrock function](./toy-distribution-rosenbrock.ipynb)
+- [Simple Egg Box](./toy-distribution-simple-egg-box.ipynb)
 - [Twisted Gaussian Banana](./toy-distribution-twisted-gaussian.ipynb)
 
diff --git a/examples/toy-distribution-simple-egg-box.ipynb b/examples/toy-distribution-simple-egg-box.ipynb
diff --git a/pints/tests/test_toy_simple_egg_box_logpdf.py b/pints/tests/test_toy_simple_egg_box_logpdf.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+#
+# Tests the simple egg box toy LogPDF.
+#
+# This file is part of PINTS.
+#  Copyright (c) 2017-2018, University of Oxford.
+#  For licensing information, see the LICENSE file distributed with the PINTS
+#  software package.
+#
+import pints
+import pints.toy
+import unittest
+import numpy as np
+
+
+class TestSimpleEggBoxLogPDF(unittest.TestCase):
+    """
+    Tests the simple egg box logpdf toy distribution.
+    """
+    def test_simple_egg_box_logpdf(self):
+        # Test basics
+        f = pints.toy.SimpleEggBoxLogPDF()
+        self.assertEqual(f.n_parameters(), 2)
+        self.assertTrue(np.isscalar(f(np.zeros(2))))
+
+        # Test construction errors
+        self.assertRaises(
+            ValueError, pints.toy.SimpleEggBoxLogPDF, sigma=0)
+        self.assertRaises(
+            ValueError, pints.toy.SimpleEggBoxLogPDF, r=0)
+
+    def test_sampling_and_divergence(self):
+        """
+        Tests :meth:`SimpleEggBoxLogPDF.kl_score()`.
+        """
+        # Ensure consistent output
+        np.random.seed(1)
+
+        # Create some log pdfs
+        log_pdf1 = pints.toy.SimpleEggBoxLogPDF(2, 4)
+        log_pdf2 = pints.toy.SimpleEggBoxLogPDF(3, 6)
+
+        # Generate samples from each
+        n = 100
+        samples1 = log_pdf1.sample(n)
+        samples2 = log_pdf2.sample(n)
+
+        # Test divergence scores
+        s11 = log_pdf1.kl_score(samples1)
+        s12 = log_pdf1.kl_score(samples2)
+        self.assertLess(s11, s12)
+        s21 = log_pdf2.kl_score(samples1)
+        s22 = log_pdf2.kl_score(samples2)
+        self.assertLess(s22, s21)
+
+        # Test penalising if a mode is missing
+        samples3 = np.vstack((
+            samples2[samples2[:, 0] > 0],   # Top half
+            samples2[samples2[:, 1] < 0],   # Left half
+        ))
+        s23 = log_pdf2.kl_score(samples3)
+        self.assertLess(s22, s23)
+        self.assertGreater(s23 / s22, 100)
+
+        # Test sample arguments
+        self.assertRaises(ValueError, log_pdf1.sample, -1)
+
+        # Test shape testing
+        self.assertEqual(samples1.shape, (n, 2))
+        x = np.ones((n, 3))
+        self.assertRaises(ValueError, log_pdf1.kl_score, x)
+        x = np.ones((n, 2, 2))
+        self.assertRaises(ValueError, log_pdf1.kl_score, x)
+
+
+if __name__ == '__main__':
+    print('Add -v for more debug output')
+    import sys
+    if '-v' in sys.argv:
+        debug = True
+    unittest.main()
+
diff --git a/pints/toy/__init__.py b/pints/toy/__init__.py
@@ -23,6 +23,7 @@
 from ._parabola import ParabolicError                               # noqa
 from ._repressilator_model import RepressilatorModel                # noqa
 from ._rosenbrock import RosenbrockError, RosenbrockLogPDF          # noqa
+from ._simple_egg_box import SimpleEggBoxLogPDF                     # noqa
 from ._sir_model import SIRModel                                    # noqa
 from ._twisted_gaussian_banana import TwistedGaussianLogPDF         # noqa
 
diff --git a/pints/toy/_simple_egg_box.py b/pints/toy/_simple_egg_box.py
@@ -0,0 +1,137 @@
+#
+# Simple egg-box LogPDF
+#
+# This file is part of PINTS.
+#  Copyright (c) 2017, University of Oxford.
+#  For licensing information, see the LICENSE file distributed with the PINTS
+#  software package.
+#
+from __future__ import absolute_import, division
+from __future__ import print_function, unicode_literals
+import pints
+import numpy as np
+import scipy.stats
+
+
+class SimpleEggBoxLogPDF(pints.LogPDF):
+    """
+    Two-dimensional multimodal Normal distribution, with four more-or-less
+    independent modes, each centered in a different quadrant.
+
+    Arguments:
+
+    ``sigma``
+        The variance of each mode.
+    ``r``
+        The first mode will be located at ``(d, d)``, ``(-d, d)``, (-d, -d)``,
+        and ``(d, -d)``, where ``d = r * sigma``.
+
+    *Extends:* :class:`pints.LogPDF`.
+    """
+    def __init__(self, sigma=2, r=4):
+
+        # Sigma for every mode
+        self._sigma = float(sigma)
+        if self._sigma <= 0:
+            raise ValueError('Sigma must be greater than zero.')
+
+        # Set modes
+        r = float(r)
+        if r <= 0:
+            raise ValueError('Argument r must be greater than zero.')
+        d = r * self._sigma
+        self._modes = [
+            [d, d],
+            [-d, d],
+            [-d, -d],
+            [d, -d],
+        ]
+
+        # Set covariances
+        self._covs = [np.eye(2) * sigma] * 4
+
+        # Create scipy 'random variables'
+        self._vars = [
+            scipy.stats.multivariate_normal(mode, self._covs[i])
+            for i, mode in enumerate(self._modes)]
+
+    def __call__(self, x):
+        f = np.sum([var.pdf(x) for var in self._vars])
+        return -float('inf') if f == 0 else np.log(f)
+
+    def n_parameters(self):
+        """ See :meth:`pints.LogPDF.n_parameters()`. """
+        return 2
+
+    def kl_score(self, samples):
+        """
+        Calculates a heuristic score for how well a given set of samples
+        matches this LogPDF's underlying distribution, based on
+        Kullback-Leibler divergence of the individual modes. This only works
+        well if the modes are nicely separated, i.e. for larger values of
+        ``r``.
+        """
+        dimension = 2
+
+        # Check size of input
+        if not len(samples.shape) == 2:
+            raise ValueError('Given samples list must be 2x2.')
+        if samples.shape[1] != dimension:
+            raise ValueError(
+                'Given samples must have length ' + str(dimension))
+
+        # Separate samples into quadrants
+        q12 = samples[samples[:, 1] >= 0]
+        q34 = samples[samples[:, 1] < 0]
+        q1 = q12[q12[:, 0] >= 0]
+        q2 = q12[q12[:, 0] < 0]
+        q3 = q34[q34[:, 0] < 0]
+        q4 = q34[q34[:, 0] >= 0]
+        qs = [q1, q2, q3, q4]
+
+        # Calculate kullback-leibler for each quadrant-mode pair
+        dkls = np.array([0, 0, 0, 0], dtype=float)
+        for i, q in enumerate(qs):
+            if len(q) == 0:
+                continue
+            m0 = np.mean(q, axis=0)
+            s0 = np.cov(q.T)
+            m1 = self._modes[i]
+            s1 = self._covs[i]
+            cov_inv = np.linalg.inv(s1)
+            dkl1 = np.trace(cov_inv.dot(s0))
+            dkl2 = np.dot((m1 - m0).T, cov_inv).dot(m1 - m0)
+            dkl3 = np.log(np.linalg.det(s1) / np.linalg.det(s0))
+            dkls[i] = 0.5 * (dkl1 + dkl2 + dkl3 - dimension)
+
+        # No samples in a given quadrant? Then use 100 times max divergence
+        penalty1 = 100 * np.max(dkls)
+        dkls[dkls == 0] = penalty1
+
+        # Sum divergences together
+        score = np.sum(dkls)
+
+        # Penalise unequal distribution of the points, and return
+        ns = [len(q) for q in qs]
+        penalty2 = np.max(ns) / max(1, np.min(ns))
+        return score * penalty2
+
+    def sample(self, n):
+        """
+        Returns ``n`` samples from the underlying distribution.
+        """
+        if n < 0:
+            raise ValueError('Number of samples cannot be negative.')
+
+        # Calculate number of samples from each distribution
+        weights = [0.25] * 4
+        ns = np.sum(scipy.stats.multinomial.rvs(1, weights, n), axis=0)
+
+        # Draw samples from each distribution, then join them together
+        x = [v.rvs(ns[i]) for i, v in enumerate(self._vars)]
+        x = np.vstack(x)
+
+        # Shuffle the samples and return
+        np.random.shuffle(x)
+        return x
+