From 13f837737f56c7d74318b22f16ca7e52f2b5f527 Mon Sep 17 00:00:00 2001 From: Diogo Ribeiro Date: Fri, 10 Apr 2026 15:37:47 +0100 Subject: [PATCH 1/2] feat(dynamic_programming): add Levenshtein edit distance algorithm with documentation --- DIRECTORY.md | 1 + documentation/edit_distance.md | 16 +++++ dynamic_programming/edit_distance.r | 105 ++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+) create mode 100644 documentation/edit_distance.md create mode 100644 dynamic_programming/edit_distance.r diff --git a/DIRECTORY.md b/DIRECTORY.md index a23287b5..964b0ae8 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -60,6 +60,7 @@ * [Matrix Chain Multiplication](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/matrix_chain_multiplication.r) * [Minimum Path Sum](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/minimum_path_sum.r) * [Subset Sum](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/subset_sum.r) + * [Edit Distance](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/edit_distance.r) ## Graph Algorithms * [Bellman Ford Shortest Path](https://github.com/TheAlgorithms/R/blob/HEAD/graph_algorithms/bellman_ford_shortest_path.r) diff --git a/documentation/edit_distance.md b/documentation/edit_distance.md new file mode 100644 index 00000000..0ac30449 --- /dev/null +++ b/documentation/edit_distance.md @@ -0,0 +1,16 @@ +# Edit Distance + +Levenshtein edit distance calculates the minimum number of single-character insertions, deletions, and substitutions required to transform one string into another. + +``` r +source("../dynamic_programming/edit_distance.r") + +# Compute the edit distance +distance <- edit_distance("kitten", "sitting") +print(distance) + +# Reconstruct the optimal sequence of operations +result <- edit_distance_with_path("kitten", "sitting") +print(result$distance) +print(result$operations) +``` diff --git a/dynamic_programming/edit_distance.r b/dynamic_programming/edit_distance.r new file mode 100644 index 00000000..5e0e6427 --- /dev/null +++ b/dynamic_programming/edit_distance.r @@ -0,0 +1,105 @@ +# edit_distance.r +# Levenshtein edit distance algorithm in R +# Computes the minimum number of insertions, deletions, and substitutions +# required to transform one string into another. +# Time Complexity: O(m * n) +# Space Complexity: O(m * n) + +# Compute the Levenshtein distance between two strings +edit_distance <- function(str1, str2) { + #' @param str1: First string + #' @param str2: Second string + #' @return: Integer edit distance + if (!is.character(str1) || !is.character(str2)) { + stop("Both inputs must be character strings.") + } + if (length(str1) != 1 || length(str2) != 1) { + stop("Each input must be a single string.") + } + + m <- nchar(str1) + n <- nchar(str2) + dp <- matrix(0L, nrow = m + 1, ncol = n + 1) + + # base cases: transform empty prefix + dp[, 1] <- seq(0L, m) + dp[1, ] <- seq(0L, n) + + for (i in 2:(m + 1)) { + for (j in 2:(n + 1)) { + cost <- if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) 0L else 1L + dp[i, j] <- min( + dp[i - 1, j] + 1L, # deletion + dp[i, j - 1] + 1L, # insertion + dp[i - 1, j - 1] + cost # substitution + ) + } + } + + return(dp[m + 1, n + 1]) +} + +# Compute the edit distance and reconstruct an optimal alignment path +edit_distance_with_path <- function(str1, str2) { + #' @param str1: First string + #' @param str2: Second string + #' @return: List with distance, operations, and dp table + if (!is.character(str1) || !is.character(str2)) { + stop("Both inputs must be character strings.") + } + if (length(str1) != 1 || length(str2) != 1) { + stop("Each input must be a single string.") + } + + m <- nchar(str1) + n <- nchar(str2) + dp <- matrix(0L, nrow = m + 1, ncol = n + 1) + dp[, 1] <- seq(0L, m) + dp[1, ] <- seq(0L, n) + + for (i in 2:(m + 1)) { + for (j in 2:(n + 1)) { + cost <- if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) 0L else 1L + dp[i, j] <- min( + dp[i - 1, j] + 1L, + dp[i, j - 1] + 1L, + dp[i - 1, j - 1] + cost + ) + } + } + + i <- m + 1 + j <- n + 1 + ops <- character() + + while (i > 1 || j > 1) { + if (i > 1 && j > 1 && dp[i, j] == dp[i - 1, j - 1] + + (substr(str1, i - 1, i - 1) != substr(str2, j - 1, j - 1))) { + if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) { + ops <- c("match", ops) + } else { + ops <- c(sprintf("substitute '%s' -> '%s'", substr(str1, i - 1, i - 1), substr(str2, j - 1, j - 1)), ops) + } + i <- i - 1 + j <- j - 1 + } else if (i > 1 && dp[i, j] == dp[i - 1, j] + 1L) { + ops <- c(sprintf("delete '%s'", substr(str1, i - 1, i - 1)), ops) + i <- i - 1 + } else { + ops <- c(sprintf("insert '%s'", substr(str2, j - 1, j - 1)), ops) + j <- j - 1 + } + } + + return(list( + distance = dp[m + 1, n + 1], + operations = ops, + dp_table = dp + )) +} + +# Example usage: +# print(edit_distance("kitten", "sitting")) +# result <- edit_distance_with_path("kitten", "sitting") +# print(result$distance) +# print(result$operations) From d7c31f2d451a64059d53b8a3d15918b0c83f6fd6 Mon Sep 17 00:00:00 2001 From: Diogo Ribeiro Date: Fri, 10 Apr 2026 21:34:33 +0100 Subject: [PATCH 2/2] fix: handle empty-string cases and align docs/directory entries --- DIRECTORY.md | 2 +- documentation/edit_distance.md | 2 +- dynamic_programming/edit_distance.r | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/DIRECTORY.md b/DIRECTORY.md index 964b0ae8..35d14a4a 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -60,7 +60,7 @@ * [Matrix Chain Multiplication](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/matrix_chain_multiplication.r) * [Minimum Path Sum](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/minimum_path_sum.r) * [Subset Sum](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/subset_sum.r) - * [Edit Distance](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/edit_distance.r) + * [Edit Distance (Levenshtein, Dynamic Programming)](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/edit_distance.r) ## Graph Algorithms * [Bellman Ford Shortest Path](https://github.com/TheAlgorithms/R/blob/HEAD/graph_algorithms/bellman_ford_shortest_path.r) diff --git a/documentation/edit_distance.md b/documentation/edit_distance.md index 0ac30449..aa2c6786 100644 --- a/documentation/edit_distance.md +++ b/documentation/edit_distance.md @@ -3,7 +3,7 @@ Levenshtein edit distance calculates the minimum number of single-character insertions, deletions, and substitutions required to transform one string into another. ``` r -source("../dynamic_programming/edit_distance.r") +source("dynamic_programming/edit_distance.r") # Compute the edit distance distance <- edit_distance("kitten", "sitting") diff --git a/dynamic_programming/edit_distance.r b/dynamic_programming/edit_distance.r index 5e0e6427..09ed434b 100644 --- a/dynamic_programming/edit_distance.r +++ b/dynamic_programming/edit_distance.r @@ -25,8 +25,8 @@ edit_distance <- function(str1, str2) { dp[, 1] <- seq(0L, m) dp[1, ] <- seq(0L, n) - for (i in 2:(m + 1)) { - for (j in 2:(n + 1)) { + for (i in seq_len(m) + 1L) { + for (j in seq_len(n) + 1L) { cost <- if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) 0L else 1L dp[i, j] <- min( dp[i - 1, j] + 1L, # deletion @@ -57,8 +57,8 @@ edit_distance_with_path <- function(str1, str2) { dp[, 1] <- seq(0L, m) dp[1, ] <- seq(0L, n) - for (i in 2:(m + 1)) { - for (j in 2:(n + 1)) { + for (i in seq_len(m) + 1L) { + for (j in seq_len(n) + 1L) { cost <- if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) 0L else 1L dp[i, j] <- min( dp[i - 1, j] + 1L,