|
| 1 | +//! Longest Common Subsequence problem implementation. |
| 2 | +//! |
| 3 | +//! Given k strings over an alphabet, find the longest string that is a |
| 4 | +//! subsequence of every input string. NP-hard for variable k (Maier, 1978). |
| 5 | +
|
| 6 | +use serde::{Deserialize, Serialize}; |
| 7 | + |
| 8 | +use crate::{ |
| 9 | + registry::{FieldInfo, ProblemSchemaEntry}, |
| 10 | + traits::{OptimizationProblem, Problem}, |
| 11 | + types::{Direction, SolutionSize}, |
| 12 | +}; |
| 13 | + |
| 14 | +inventory::submit! { |
| 15 | + ProblemSchemaEntry { |
| 16 | + name: "LongestCommonSubsequence", |
| 17 | + module_path: module_path!(), |
| 18 | + description: "Find the longest string that is a subsequence of every input string", |
| 19 | + fields: &[ |
| 20 | + FieldInfo { name: "strings", type_name: "Vec<Vec<u8>>", description: "The input strings" }, |
| 21 | + ], |
| 22 | + } |
| 23 | +} |
| 24 | + |
| 25 | +/// The Longest Common Subsequence problem. |
| 26 | +/// |
| 27 | +/// Given `k` strings `s_1, ..., s_k` over an alphabet, find a longest |
| 28 | +/// string `w` that is a subsequence of every `s_i`. |
| 29 | +/// |
| 30 | +/// A string `w` is a **subsequence** of `s` if `w` can be obtained by |
| 31 | +/// deleting zero or more characters from `s` without changing the order |
| 32 | +/// of the remaining characters. |
| 33 | +/// |
| 34 | +/// # Representation |
| 35 | +/// |
| 36 | +/// Configuration is binary selection over the characters of the shortest |
| 37 | +/// string. Each variable in `{0, 1}` indicates whether the corresponding |
| 38 | +/// character of the shortest string is included in the candidate subsequence. |
| 39 | +/// The candidate is valid if the resulting subsequence is also a subsequence |
| 40 | +/// of every other input string. |
| 41 | +/// |
| 42 | +/// # Example |
| 43 | +/// |
| 44 | +/// ``` |
| 45 | +/// use problemreductions::{models::misc::LongestCommonSubsequence, BruteForce, Problem, Solver}; |
| 46 | +/// |
| 47 | +/// let problem = LongestCommonSubsequence::new(vec![ |
| 48 | +/// vec![b'A', b'B', b'C', b'D', b'A', b'B'], |
| 49 | +/// vec![b'B', b'D', b'C', b'A', b'B', b'A'], |
| 50 | +/// vec![b'B', b'C', b'A', b'D', b'B', b'A'], |
| 51 | +/// ]); |
| 52 | +/// let solver = BruteForce::new(); |
| 53 | +/// let solution = solver.find_best(&problem); |
| 54 | +/// assert!(solution.is_some()); |
| 55 | +/// ``` |
| 56 | +#[derive(Debug, Clone, Serialize, Deserialize)] |
| 57 | +pub struct LongestCommonSubsequence { |
| 58 | + /// The input strings. |
| 59 | + strings: Vec<Vec<u8>>, |
| 60 | +} |
| 61 | + |
| 62 | +impl LongestCommonSubsequence { |
| 63 | + /// Create a new LCS problem from a list of strings. |
| 64 | + /// |
| 65 | + /// # Panics |
| 66 | + /// |
| 67 | + /// Panics if `strings` is empty. |
| 68 | + pub fn new(strings: Vec<Vec<u8>>) -> Self { |
| 69 | + assert!(!strings.is_empty(), "must have at least one string"); |
| 70 | + Self { strings } |
| 71 | + } |
| 72 | + |
| 73 | + /// Get the input strings. |
| 74 | + pub fn strings(&self) -> &[Vec<u8>] { |
| 75 | + &self.strings |
| 76 | + } |
| 77 | + |
| 78 | + /// Get the number of input strings. |
| 79 | + pub fn num_strings(&self) -> usize { |
| 80 | + self.strings.len() |
| 81 | + } |
| 82 | + |
| 83 | + /// Get the total length of all input strings. |
| 84 | + pub fn total_length(&self) -> usize { |
| 85 | + self.strings.iter().map(|s| s.len()).sum() |
| 86 | + } |
| 87 | + |
| 88 | + /// Index of the shortest string. |
| 89 | + fn shortest_index(&self) -> usize { |
| 90 | + self.strings |
| 91 | + .iter() |
| 92 | + .enumerate() |
| 93 | + .min_by_key(|(_, s)| s.len()) |
| 94 | + .map(|(i, _)| i) |
| 95 | + .unwrap_or(0) |
| 96 | + } |
| 97 | + |
| 98 | + /// Length of the shortest string. |
| 99 | + fn shortest_len(&self) -> usize { |
| 100 | + self.strings.iter().map(|s| s.len()).min().unwrap_or(0) |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +impl Problem for LongestCommonSubsequence { |
| 105 | + const NAME: &'static str = "LongestCommonSubsequence"; |
| 106 | + type Metric = SolutionSize<i32>; |
| 107 | + |
| 108 | + fn variant() -> Vec<(&'static str, &'static str)> { |
| 109 | + crate::variant_params![] |
| 110 | + } |
| 111 | + |
| 112 | + fn dims(&self) -> Vec<usize> { |
| 113 | + vec![2; self.shortest_len()] |
| 114 | + } |
| 115 | + |
| 116 | + fn evaluate(&self, config: &[usize]) -> SolutionSize<i32> { |
| 117 | + let si = self.shortest_index(); |
| 118 | + let shortest = &self.strings[si]; |
| 119 | + if config.len() != shortest.len() { |
| 120 | + return SolutionSize::Invalid; |
| 121 | + } |
| 122 | + if config.iter().any(|&v| v > 1) { |
| 123 | + return SolutionSize::Invalid; |
| 124 | + } |
| 125 | + // Build the candidate subsequence from selected characters |
| 126 | + let candidate: Vec<u8> = config |
| 127 | + .iter() |
| 128 | + .enumerate() |
| 129 | + .filter(|(_, &v)| v == 1) |
| 130 | + .map(|(i, _)| shortest[i]) |
| 131 | + .collect(); |
| 132 | + // Check that candidate is a subsequence of every other string |
| 133 | + for (j, s) in self.strings.iter().enumerate() { |
| 134 | + if j == si { |
| 135 | + continue; |
| 136 | + } |
| 137 | + if !is_subsequence(&candidate, s) { |
| 138 | + return SolutionSize::Invalid; |
| 139 | + } |
| 140 | + } |
| 141 | + SolutionSize::Valid(candidate.len() as i32) |
| 142 | + } |
| 143 | +} |
| 144 | + |
| 145 | +impl OptimizationProblem for LongestCommonSubsequence { |
| 146 | + type Value = i32; |
| 147 | + |
| 148 | + fn direction(&self) -> Direction { |
| 149 | + Direction::Maximize |
| 150 | + } |
| 151 | +} |
| 152 | + |
| 153 | +/// Check if `sub` is a subsequence of `full`. |
| 154 | +fn is_subsequence(sub: &[u8], full: &[u8]) -> bool { |
| 155 | + let mut it = full.iter(); |
| 156 | + for &c in sub { |
| 157 | + if !it.any(|&x| x == c) { |
| 158 | + return false; |
| 159 | + } |
| 160 | + } |
| 161 | + true |
| 162 | +} |
| 163 | + |
| 164 | +crate::declare_variants! { |
| 165 | + LongestCommonSubsequence => "2^total_length", |
| 166 | +} |
| 167 | + |
| 168 | +#[cfg(test)] |
| 169 | +#[path = "../../unit_tests/models/misc/longest_common_subsequence.rs"] |
| 170 | +mod tests; |
0 commit comments