Merge pull request #299 from SubstraFoundation/grad-fus-v2

arthurPignet · web-flow · commit c7426f5012b8 · 2021-01-05T17:58:37.000+01:00
Add gradient fusion method for multi-partner-learning
diff --git a/mplc/doc/documentation.md b/mplc/doc/documentation.md
@@ -301,7 +301,10 @@ There are several parameters influencing how the collaborative and distributed l
   - `'fedavg'`: stands for federated averaging
 
     ![Schema fedavg](../../img/collaborative_rounds_fedavg.png)
-
+    
+  - `'fedgrads'`: stands for gradient averaging, quite similar to federated averaging, but the partner-loss's gradients are averaged before the optimization step, instead of averaged the model's weights after the optimization step.
+  Warning : This method needs a Keras model to work with. The `gradient_pass_per_update` is set to 1 in the current implementation. 
+ 
   - `'seq-...'`: stands for sequential and comes with 2 variations, `'seq-pure'` with no aggregation at all, and `'seq-with-final-agg'` where an aggregation is performed before evaluating on the validation set and test set (on last mini-batch of each epoch) for mitigating impact when the very last subset on which the model is trained is of low quality, or corrupted, or just detrimental to the model performance.
 
     ![Schema seq](../../img/collaborative_rounds_seq.png)
@@ -358,7 +361,7 @@ There are several parameters influencing how the collaborative and distributed l
   - "Federated SBS linear"
   - "Federated SBS quadratic"
   - "Federated SBS constant"
-  - "LFlip"
+  - "Smodel"
   - "PVRL"
   ```
 
diff --git a/mplc/mpl_utils.py b/mplc/mpl_utils.py
@@ -6,6 +6,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import tensorflow as tf
 
 
 class History:
@@ -109,6 +110,23 @@ def save_data(self, binary=False):
         plt.close()
 
 
+#####################################
+#
+# tensorflow functions for aggregator
+#
+#####################################
+
+@tf.function
+def _tf_aggregete_grads(grads, agg_w):
+    global_grad = list()
+    for grad_per_layer in zip(*grads):
+        g = list()
+        for g_p, w in zip(grad_per_layer, agg_w):
+            g.append(g_p)
+        global_grad.append(tf.reduce_mean(g, axis=0))
+    return global_grad
+
+
 class Aggregator(ABC):
     name = 'abstract'
 
@@ -117,7 +135,7 @@ def __init__(self, mpl):
         :type mpl: MultiPartnerLearning
         """
         self.mpl = mpl
-        self.aggregation_weights = np.zeros(self.mpl.partners_count)
+        self.aggregation_weights = np.zeros(self.mpl.partners_count, dtype='float32')
 
     def __str__(self):
         return f'{self.name} aggregator'
@@ -136,13 +154,17 @@ def aggregate_model_weights(self):
 
         return new_weights
 
+    def aggregate_gradients(self):
+        assert isinstance(self.aggregation_weights, list), 'Aggregation weights must be a list.'
+        return _tf_aggregete_grads([p.grads for p in self.mpl.partners_list], self.aggregation_weights)
+
 
 class UniformAggregator(Aggregator):
     name = 'Uniform'
 
     def __init__(self, mpl):
         super(UniformAggregator, self).__init__(mpl)
-        self.aggregation_weights = [1 / self.mpl.partners_count] * self.mpl.partners_count
+        self.aggregation_weights = list(np.ones(self.mpl.partners_count, dtype='float32') * self.mpl.partners_count)
 
 
 class DataVolumeAggregator(Aggregator):
@@ -151,7 +173,7 @@ class DataVolumeAggregator(Aggregator):
     def __init__(self, mpl):
         super(DataVolumeAggregator, self).__init__(mpl)
         partners_sizes = [partner.data_volume for partner in self.mpl.partners_list]
-        self.aggregation_weights = partners_sizes / np.sum(partners_sizes)
+        self.aggregation_weights = list((partners_sizes / np.sum(partners_sizes).astype('float32')))
 
 
 class ScoresAggregator(Aggregator):
@@ -162,12 +184,16 @@ def __init__(self, mpl):
 
     def prepare_aggregation_weights(self):
         last_scores = [partner.last_round_score for partner in self.mpl.partners_list]
-        self.aggregation_weights = last_scores / np.sum(last_scores)
+        self.aggregation_weights = list((last_scores / np.sum(last_scores)).astype('float32'))
 
     def aggregate_model_weights(self):
         self.prepare_aggregation_weights()
         super(ScoresAggregator, self).aggregate_model_weights()
 
+    def aggregate_gradients(self):
+        self.prepare_aggregation_weights()
+        super(ScoresAggregator, self).aggregate_gradients()
+
 
 # Supported _aggregation weights approaches
 AGGREGATORS = {
diff --git a/mplc/multi_partner_learning.py b/mplc/multi_partner_learning.py
@@ -9,6 +9,7 @@
 from timeit import default_timer as timer
 
 import numpy as np
+import tensorflow as tf
 from loguru import logger
 from sklearn.metrics import confusion_matrix
 from tensorflow.keras import Input, Model
@@ -603,12 +604,73 @@ def fit_minibatch(self):
         logger.debug("End of S-Model collaborative round.")
 
 
+class FederatedGradients(MultiPartnerLearning):
+    def __init__(self, scenario, **kwargs):
+        super(FederatedGradients, self).__init__(scenario, **kwargs)
+        if self.partners_count == 1:
+            raise ValueError('Only one partner is provided. Please use the dedicated SinglePartnerLearning class')
+        self.model = self.build_model()
+
+    def fit_epoch(self):
+        # Split the train dataset in mini-batches
+        self.split_in_minibatches()
+        # Iterate over mini-batches and train
+        for i in range(self.minibatch_count):
+            self.minibatch_index = i
+            self.fit_minibatch()
+
+        self.minibatch_index = 0
+
+    def fit_minibatch(self):
+        """Proceed to a collaborative round with a federated averaging approach"""
+
+        logger.debug("Start new gradients fusion collaborative round ...")
+
+        # Starting model for each partner is the aggregated model from the previous mini-batch iteration
+        logger.info(f"(gradient fusion) Minibatch n°{self.minibatch_index} of epoch n°{self.epoch_index}, "
+                    f"init each partner's models with a copy of the global model")
+
+        for partner in self.partners_list:
+            # Evaluate and store accuracy of mini-batch start model
+            partner.model_weights = self.model_weights
+        self.eval_and_log_model_val_perf()
+
+        # Iterate over partners for training each individual model
+        for partner_index, partner in enumerate(self.partners_list):
+            with tf.GradientTape() as tape:
+                loss = self.model.loss(partner.minibatched_y_train[self.minibatch_index],
+                                       self.model(partner.minibatched_x_train[self.minibatch_index]))
+            partner.grads = tape.gradient(loss, self.model.trainable_weights)
+
+        global_grad = self.aggregator.aggregate_gradients()
+        self.model.optimizer.apply_gradients(zip(global_grad, self.model.trainable_weights))
+        self.model_weights = self.model.get_weights()
+
+        for partner_index, partner in enumerate(self.partners_list):
+            val_history = self.model.evaluate(self.val_data[0], self.val_data[1], verbose=False)
+            history = self.model.evaluate(partner.minibatched_x_train[self.minibatch_index],
+                                          partner.minibatched_y_train[self.minibatch_index], verbose=False)
+            history = {
+                "loss": [history[0]],
+                'accuracy': [history[1]],
+                'val_loss': [val_history[0]],
+                'val_accuracy': [val_history[1]]
+            }
+
+            # Log results of the round
+            self.log_partner_perf(partner.id, partner_index, history)
+
+        logger.debug("End of grads-fusion collaborative round.")
+
+
 # Supported multi-partner learning approaches
 
 MULTI_PARTNER_LEARNING_APPROACHES = {
     "fedavg": FederatedAverageLearning,
+    'fedgrads': FederatedGradients,
     "seq-pure": SequentialLearning,
     "seq-with-final-agg": SequentialWithFinalAggLearning,
     "seqavg": SequentialAverageLearning,
-    "lflip": MplSModel
+    "smodel": MplSModel,
+
 }
diff --git a/mplc/partner.py b/mplc/partner.py
@@ -76,6 +76,7 @@ def __init__(self, partner_parent, mpl):
         :type partner_parent: Partner
         :type mpl: MultiPartnerLearning
         """
+        self.grads = None
         self.mpl = mpl
         self.id = partner_parent.id
         self.batch_size = partner_parent.batch_size