diff --git a/mlfromscratch/supervised_learning/naive_bayes.py b/mlfromscratch/supervised_learning/naive_bayes.py index 9cbc4e08..317914bd 100644 --- a/mlfromscratch/supervised_learning/naive_bayes.py +++ b/mlfromscratch/supervised_learning/naive_bayes.py @@ -5,8 +5,13 @@ from mlfromscratch.utils import Plot, accuracy_score class NaiveBayes(): - """The Gaussian Naive Bayes classifier. """ + + def __init__(self): + """The Gaussian Naive Bayes classifier. """ + self.eps = 1e-30 # Added in denominator to prevent division by zero + def fit(self, X, y): + """Fit the model to a Dataset. """ self.X, self.y = X, y self.classes = np.unique(y) self.parameters = [] @@ -22,9 +27,8 @@ def fit(self, X, y): def _calculate_likelihood(self, mean, var, x): """ Gaussian likelihood of the data x given mean and var """ - eps = 1e-4 # Added in denominator to prevent division by zero - coeff = 1.0 / math.sqrt(2.0 * math.pi * var + eps) - exponent = math.exp(-(math.pow(x - mean, 2) / (2 * var + eps))) + coeff = 1.0 / math.sqrt(2.0 * math.pi * var + self.eps) + exponent = math.exp(-(math.pow(x - mean, 2) / (2 * var + self.eps))) return coeff * exponent def _calculate_prior(self, c): @@ -36,7 +40,7 @@ def _calculate_prior(self, c): def _classify(self, sample): """ Classification using Bayes Rule P(Y|X) = P(X|Y)*P(Y)/P(X), or Posterior = Likelihood * Prior / Scaling Factor - + P(Y|X) - The posterior is the probability that sample x is of class y given the feature values of x being distributed according to distribution of y and the prior. P(X|Y) - Likelihood of data X given class distribution Y. @@ -45,21 +49,21 @@ def _classify(self, sample): P(X) - Scales the posterior to make it a proper probability distribution. This term is ignored in this implementation since it doesn't affect which class distribution the sample is most likely to belong to. - Classifies the sample as the class that results in the largest P(Y|X) (posterior) - """ + """ posteriors = [] # Go through list of classes for i, c in enumerate(self.classes): # Initialize posterior as prior - posterior = self._calculate_prior(c) + posterior = np.log(self._calculate_prior(c)) # Naive assumption (independence): # P(x1,x2,x3|Y) = P(x1|Y)*P(x2|Y)*P(x3|Y) # Posterior is product of prior and likelihoods (ignoring scaling factor) for feature_value, params in zip(sample, self.parameters[i]): # Likelihood of feature value given distribution of feature values given y likelihood = self._calculate_likelihood(params["mean"], params["var"], feature_value) - posterior *= likelihood + # Calculate Loglikelihood to prevent overflowing in multiplications. + posterior += np.log(likelihood + self.eps) posteriors.append(posterior) # Return the class with the largest posterior probability return self.classes[np.argmax(posteriors)]