|
| 1 | +# Copyright 2017 Joachim van der Herten, Ivo Couckuyt |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +from .acquisition import Acquisition |
| 16 | +from ..pareto import Pareto |
| 17 | + |
| 18 | +from GPflow.param import DataHolder |
| 19 | +from GPflow import settings |
| 20 | + |
| 21 | +import numpy as np |
| 22 | +import tensorflow as tf |
| 23 | + |
| 24 | +stability = settings.numerics.jitter_level |
| 25 | +float_type = settings.dtypes.float_type |
| 26 | + |
| 27 | + |
| 28 | +class HVProbabilityOfImprovement(Acquisition): |
| 29 | + """ |
| 30 | + Hypervolume-based Probability of Improvement. |
| 31 | +
|
| 32 | + A multiobjective acquisition function for multiobjective optimization. It is used to identify a complete Pareto set |
| 33 | + of non-dominated solutions. |
| 34 | + |
| 35 | + Key reference: |
| 36 | +
|
| 37 | + :: |
| 38 | +
|
| 39 | + @article{Couckuyt:2014, |
| 40 | + title={Fast calculation of multiobjective probability of improvement and expected improvement criteria for Pareto optimization}, |
| 41 | + author={Couckuyt, Ivo and Deschrijver, Dirk and Dhaene, Tom}, |
| 42 | + journal={Journal of Global Optimization}, |
| 43 | + volume={60}, |
| 44 | + number={3}, |
| 45 | + pages={575--594}, |
| 46 | + year={2014}, |
| 47 | + publisher={Springer} |
| 48 | + } |
| 49 | +
|
| 50 | + For a Pareto set :math:`\\mathcal{P}`, the non-dominated section of the objective space is denoted by :math:`A`. |
| 51 | + The :meth:`~..pareto.Pareto.hypervolume` of the dominated part of the space is denoted by :math:`\\mathcal{H}` |
| 52 | + and can be used as indicator for the optimality of the Pareto set (the higher the better). |
| 53 | +
|
| 54 | + .. math:: |
| 55 | + \\boldsymbol{\\mu} &= \\left[ \\mathbb{E} \\left[ f^{(1)}_{\\star}\\,|\\, \\mathbf x, \\mathbf y, \\mathbf x_{\\star} \\right], |
| 56 | + ..., \\mathbb{E} \\left[ f^{(p)}_{\\star}\\,|\\, \\mathbf x, \\mathbf y, \\mathbf x_{\\star} \\right]\\right] \\\\ |
| 57 | + I\\left(\\boldsymbol{\\mu}, \\mathcal{P}\\right) &= |
| 58 | + \\begin{cases} \\left( \\mathcal{H} \\left( \\mathcal{P} \\cup \\boldsymbol{\\mu} \\right) - \\mathcal{H} |
| 59 | + \\left( \\mathcal{P} \\right)) \\right) ~ if ~ \\boldsymbol{\\mu} \\in A |
| 60 | + \\\\ 0 ~ \\mbox{otherwise} \\end{cases} \\\\ |
| 61 | + \\alpha(\\mathbf x_{\\star}) &= I\\left(\\boldsymbol{\\mu}, \\mathcal{P}\\right) p\\left(\\mathbf x_{\\star} \\in A \\right) |
| 62 | +
|
| 63 | + Attributes: |
| 64 | + pareto: An instance of :class:`~..pareto.Pareto`. |
| 65 | + """ |
| 66 | + |
| 67 | + def __init__(self, models): |
| 68 | + """ |
| 69 | + :param models: A list of (possibly multioutput) GPflow representing our belief of the objectives. |
| 70 | + """ |
| 71 | + super(HVProbabilityOfImprovement, self).__init__(models) |
| 72 | + assert self.data[1].shape[1] > 1 |
| 73 | + self.pareto = Pareto(np.hstack((m.predict_f(self.data[0])[0] for m in self.models))) |
| 74 | + self.reference = DataHolder(self._estimate_reference()) |
| 75 | + |
| 76 | + def _estimate_reference(self): |
| 77 | + pf = self.pareto.front.value |
| 78 | + f = np.max(pf, axis=0, keepdims=True) - np.min(pf, axis=0, keepdims=True) |
| 79 | + return np.max(pf, axis=0, keepdims=True) + 2 * f / pf.shape[0] |
| 80 | + |
| 81 | + def setup(self): |
| 82 | + """ |
| 83 | + Pre-computes the Pareto set and cell bounds for integrating over the non-dominated region. |
| 84 | + """ |
| 85 | + super(HVProbabilityOfImprovement, self).setup() |
| 86 | + |
| 87 | + # Obtain hypervolume cell bounds, use prediction mean |
| 88 | + F = np.hstack((m.predict_f(self.data[0])[0] for m in self.models)) |
| 89 | + self.pareto.update(F) |
| 90 | + |
| 91 | + # Calculate reference point. |
| 92 | + self.reference = self._estimate_reference() |
| 93 | + |
| 94 | + def build_acquisition(self, Xcand): |
| 95 | + outdim = tf.shape(self.data[1])[1] |
| 96 | + num_cells = tf.shape(self.pareto.bounds.lb)[0] |
| 97 | + N = tf.shape(Xcand)[0] |
| 98 | + |
| 99 | + # Extended Pareto front |
| 100 | + pf_ext = tf.concat([-np.inf * tf.ones([1, outdim], dtype=float_type), self.pareto.front, self.reference], 0) |
| 101 | + |
| 102 | + # Predictions for candidates, concatenate columns |
| 103 | + preds = [m.build_predict(Xcand) for m in self.models] |
| 104 | + candidate_mean, candidate_var = (tf.concat(moment, 1) for moment in zip(*preds)) |
| 105 | + candidate_var = tf.maximum(candidate_var, stability) # avoid zeros |
| 106 | + |
| 107 | + # Calculate the cdf's for all candidates for every predictive distribution in the data points |
| 108 | + normal = tf.contrib.distributions.Normal(candidate_mean, tf.sqrt(candidate_var)) |
| 109 | + Phi = tf.transpose(normal.cdf(tf.expand_dims(pf_ext, 1)), [1, 0, 2]) # N x pf_ext_size x outdim |
| 110 | + |
| 111 | + # tf.gather_nd indices for bound points |
| 112 | + col_idx = tf.tile(tf.range(outdim), (num_cells,)) |
| 113 | + ub_idx = tf.stack((tf.reshape(self.pareto.bounds.ub, [-1]), col_idx), axis=1) # (num_cells*outdim x 2) |
| 114 | + lb_idx = tf.stack((tf.reshape(self.pareto.bounds.lb, [-1]), col_idx), axis=1) # (num_cells*outdim x 2) |
| 115 | + |
| 116 | + # Calculate PoI |
| 117 | + P1 = tf.transpose(tf.gather_nd(tf.transpose(Phi, perm=[1, 2, 0]), ub_idx)) # N x num_cell*outdim |
| 118 | + P2 = tf.transpose(tf.gather_nd(tf.transpose(Phi, perm=[1, 2, 0]), lb_idx)) # N x num_cell*outdim |
| 119 | + P = tf.reshape(P1 - P2, [N, num_cells, outdim]) |
| 120 | + PoI = tf.reduce_sum(tf.reduce_prod(P, axis=2), axis=1, keep_dims=True) # N x 1 |
| 121 | + |
| 122 | + # Calculate Hypervolume contribution of points Y |
| 123 | + ub_points = tf.reshape(tf.gather_nd(pf_ext, ub_idx), [num_cells, outdim]) |
| 124 | + lb_points = tf.reshape(tf.gather_nd(pf_ext, lb_idx), [num_cells, outdim]) |
| 125 | + |
| 126 | + splus_valid = tf.reduce_all(tf.tile(tf.expand_dims(ub_points, 1), [1, N, 1]) > candidate_mean, |
| 127 | + axis=2) # num_cells x N |
| 128 | + splus_idx = tf.expand_dims(tf.cast(splus_valid, dtype=float_type), -1) # num_cells x N x 1 |
| 129 | + splus_lb = tf.tile(tf.expand_dims(lb_points, 1), [1, N, 1]) # num_cells x N x outdim |
| 130 | + splus_lb = tf.maximum(splus_lb, candidate_mean) # num_cells x N x outdim |
| 131 | + splus_ub = tf.tile(tf.expand_dims(ub_points, 1), [1, N, 1]) # num_cells x N x outdim |
| 132 | + splus = tf.concat([splus_idx, splus_ub - splus_lb], axis=2) # num_cells x N x (outdim+1) |
| 133 | + Hv = tf.transpose(tf.reduce_sum(tf.reduce_prod(splus, axis=2), axis=0, keep_dims=True)) # N x 1 |
| 134 | + |
| 135 | + # return HvPoI |
| 136 | + return tf.multiply(Hv, PoI) |
0 commit comments