|
| 1 | +# perceptron.r |
| 2 | +# Perceptron classifier implementation in R |
| 3 | +# A simple linear classifier using the perceptron learning rule. |
| 4 | +# Supports binary classification and multiclass classification |
| 5 | +# using direct multiclass perceptron updates. |
| 6 | +# Time Complexity: O(n_epochs * n_samples * n_features) |
| 7 | +# Space Complexity: O(n_classes * n_features) |
| 8 | + |
| 9 | +library(R6) |
| 10 | + |
| 11 | +Perceptron <- R6Class("Perceptron", |
| 12 | + public = list( |
| 13 | + learning_rate = NULL, |
| 14 | + n_epochs = NULL, |
| 15 | + shuffle = NULL, |
| 16 | + fit_intercept = NULL, |
| 17 | + random_state = NULL, |
| 18 | + classes = NULL, |
| 19 | + weights = NULL, |
| 20 | + is_multiclass = NULL, |
| 21 | + |
| 22 | + initialize = function(learning_rate = 0.1, |
| 23 | + n_epochs = 100, |
| 24 | + shuffle = TRUE, |
| 25 | + fit_intercept = TRUE, |
| 26 | + random_state = NULL) { |
| 27 | + self$learning_rate <- learning_rate |
| 28 | + self$n_epochs <- n_epochs |
| 29 | + self$shuffle <- shuffle |
| 30 | + self$fit_intercept <- fit_intercept |
| 31 | + self$random_state <- random_state |
| 32 | + }, |
| 33 | + |
| 34 | + fit = function(X, y) { |
| 35 | + if (is.data.frame(X)) X <- as.matrix(X) |
| 36 | + if (!is.matrix(X)) stop("X must be a numeric matrix or data.frame.") |
| 37 | + if (!is.numeric(X)) stop("X must contain numeric features.") |
| 38 | + if (any(is.na(X))) stop("X must not contain missing values.") |
| 39 | + |
| 40 | + if (is.character(y)) y <- factor(y) |
| 41 | + if (is.factor(y)) { |
| 42 | + self$classes <- levels(y) |
| 43 | + } else { |
| 44 | + self$classes <- sort(unique(y)) |
| 45 | + } |
| 46 | + |
| 47 | + if (length(y) != nrow(X)) stop("Length of y must match rows of X.") |
| 48 | + if (length(self$classes) < 2) stop("Perceptron requires at least two classes.") |
| 49 | + |
| 50 | + X <- as.matrix(X) |
| 51 | + n_samples <- nrow(X) |
| 52 | + n_features <- ncol(X) |
| 53 | + if (self$fit_intercept) { |
| 54 | + X <- cbind(1, X) |
| 55 | + n_features <- n_features + 1 |
| 56 | + } |
| 57 | + |
| 58 | + if (length(self$classes) == 2) { |
| 59 | + self$is_multiclass <- FALSE |
| 60 | + self$weights <- rep(0, n_features) |
| 61 | + } else { |
| 62 | + self$is_multiclass <- TRUE |
| 63 | + self$weights <- matrix(0, nrow = length(self$classes), ncol = n_features) |
| 64 | + } |
| 65 | + |
| 66 | + if (!is.null(self$random_state)) { |
| 67 | + set.seed(self$random_state) |
| 68 | + } |
| 69 | + |
| 70 | + y_encoded <- self$encode_labels(y) |
| 71 | + |
| 72 | + for (epoch in seq_len(self$n_epochs)) { |
| 73 | + indices <- seq_len(n_samples) |
| 74 | + if (self$shuffle) { |
| 75 | + indices <- sample(indices) |
| 76 | + } |
| 77 | + |
| 78 | + for (i in indices) { |
| 79 | + x_i <- X[i, ] |
| 80 | + y_i <- y_encoded[i] |
| 81 | + |
| 82 | + if (self$is_multiclass) { |
| 83 | + scores <- self$weights %*% x_i |
| 84 | + predicted <- which.max(scores) |
| 85 | + if (predicted != y_i) { |
| 86 | + self$weights[y_i, ] <- self$weights[y_i, ] + self$learning_rate * x_i |
| 87 | + self$weights[predicted, ] <- self$weights[predicted, ] - self$learning_rate * x_i |
| 88 | + } |
| 89 | + } else { |
| 90 | + score <- sum(self$weights * x_i) |
| 91 | + if (y_i * score <= 0) { |
| 92 | + self$weights <- self$weights + self$learning_rate * y_i * x_i |
| 93 | + } |
| 94 | + } |
| 95 | + } |
| 96 | + } |
| 97 | + |
| 98 | + invisible(self) |
| 99 | + }, |
| 100 | + |
| 101 | + predict = function(X_new) { |
| 102 | + if (is.data.frame(X_new)) X_new <- as.matrix(X_new) |
| 103 | + if (is.vector(X_new)) X_new <- matrix(X_new, nrow = 1) |
| 104 | + if (!is.matrix(X_new)) stop("X_new must be a numeric matrix, data.frame, or vector.") |
| 105 | + if (!is.numeric(X_new)) stop("X_new must contain numeric features.") |
| 106 | + if (any(is.na(X_new))) stop("X_new must not contain missing values.") |
| 107 | + |
| 108 | + if (self$fit_intercept) { |
| 109 | + X_new <- cbind(1, X_new) |
| 110 | + } |
| 111 | + |
| 112 | + if (self$is_multiclass) { |
| 113 | + scores <- X_new %*% t(self$weights) |
| 114 | + predicted_idx <- apply(scores, 1, which.max) |
| 115 | + return(self$classes[predicted_idx]) |
| 116 | + } |
| 117 | + |
| 118 | + raw_scores <- as.numeric(X_new %*% self$weights) |
| 119 | + labels <- self$classes |
| 120 | + predictions <- ifelse(raw_scores >= 0, labels[2], labels[1]) |
| 121 | + return(predictions) |
| 122 | + }, |
| 123 | + |
| 124 | + score = function(X, y) { |
| 125 | + predictions <- self$predict(X) |
| 126 | + if (is.factor(y) || is.character(y)) { |
| 127 | + y <- as.character(y) |
| 128 | + predictions <- as.character(predictions) |
| 129 | + } |
| 130 | + mean(predictions == y) |
| 131 | + }, |
| 132 | + |
| 133 | + encode_labels = function(y) { |
| 134 | + if (self$is_multiclass) { |
| 135 | + if (is.factor(y)) { |
| 136 | + return(as.integer(y)) |
| 137 | + } |
| 138 | + return(match(y, self$classes)) |
| 139 | + } |
| 140 | + |
| 141 | + if (is.factor(y)) { |
| 142 | + y <- as.character(y) |
| 143 | + } |
| 144 | + labels <- self$classes |
| 145 | + if (is.factor(labels)) { |
| 146 | + labels <- as.character(labels) |
| 147 | + } |
| 148 | + if (is.null(labels) || length(labels) == 0) { |
| 149 | + labels <- unique(y) |
| 150 | + if (length(labels) != 2) stop("Binary perceptron requires exactly two classes.") |
| 151 | + self$classes <- labels |
| 152 | + } else { |
| 153 | + if (length(labels) != 2) stop("Binary perceptron requires exactly two classes.") |
| 154 | + } |
| 155 | + if (any(!y %in% labels)) { |
| 156 | + stop("Binary perceptron received labels not present in self$classes.") |
| 157 | + } |
| 158 | + y_bin <- ifelse(y == labels[2], 1, -1) |
| 159 | + return(y_bin) |
| 160 | + } |
| 161 | + ) |
| 162 | +) |
| 163 | + |
| 164 | +# Example usage: |
| 165 | +# data(iris) |
| 166 | +# X <- as.matrix(iris[, 1:4]) |
| 167 | +# y <- iris$Species |
| 168 | +# model <- Perceptron$new(learning_rate = 0.1, n_epochs = 50, shuffle = TRUE) |
| 169 | +# model$fit(X, y) |
| 170 | +# preds <- model$predict(X) |
| 171 | +# cat('Training accuracy:', model$score(X, y), '\n') |
0 commit comments