|
59 | 59 | "SubsetSum": [Subset Sum], |
60 | 60 | "MinimumFeedbackArcSet": [Minimum Feedback Arc Set], |
61 | 61 | "MinimumFeedbackVertexSet": [Minimum Feedback Vertex Set], |
| 62 | + "ShortestCommonSupersequence": [Shortest Common Supersequence], |
62 | 63 | "MinimumSumMulticenter": [Minimum Sum Multicenter], |
63 | 64 | "SubgraphIsomorphism": [Subgraph Isomorphism], |
64 | 65 | "SubsetSum": [Subset Sum], |
@@ -1038,6 +1039,66 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa |
1038 | 1039 | *Example.* Let $A = {3, 7, 1, 8, 2, 4}$ ($n = 6$) and target $B = 11$. Selecting $A' = {3, 8}$ gives sum $3 + 8 = 11 = B$. Another solution: $A' = {7, 4}$ with sum $7 + 4 = 11 = B$. |
1039 | 1040 | ] |
1040 | 1041 |
|
| 1042 | +#problem-def("ShortestCommonSupersequence")[ |
| 1043 | + Given a finite alphabet $Sigma$, a set $R = {r_1, dots, r_m}$ of strings over $Sigma^*$, and a positive integer $K$, determine whether there exists a string $w in Sigma^*$ with $|w| lt.eq K$ such that every string $r_i in R$ is a _subsequence_ of $w$: there exist indices $1 lt.eq j_1 < j_2 < dots < j_(|r_i|) lt.eq |w|$ with $w[j_k] = r_i [k]$ for all $k$. |
| 1044 | +][ |
| 1045 | + A classic NP-complete string problem, listed as problem SR8 in Garey and Johnson @garey1979. #cite(<maier1978>, form: "prose") proved NP-completeness; #cite(<raiha1981>, form: "prose") showed the problem remains NP-complete even over a binary alphabet ($|Sigma| = 2$). Note that _subsequence_ (characters may be non-contiguous) differs from _substring_ (contiguous block): the Shortest Common Supersequence asks that each input string can be embedded into $w$ by selecting characters in order but not necessarily adjacently. |
| 1046 | + |
| 1047 | + For $|R| = 2$ strings, the problem is solvable in polynomial time via the duality with the Longest Common Subsequence (LCS): if $"LCS"(r_1, r_2)$ has length $ell$, then the shortest common supersequence has length $|r_1| + |r_2| - ell$, computable in $O(|r_1| dot |r_2|)$ time by dynamic programming. For general $|R| = m$, the brute-force search over all strings of length at most $K$ takes $O(|Sigma|^K)$ time. Applications include bioinformatics (reconstructing ancestral sequences from fragments), data compression (representing multiple strings compactly), and scheduling (merging instruction sequences). |
| 1048 | + |
| 1049 | + *Example.* Let $Sigma = {a, b, c}$ and $R = {"abc", "bac"}$. We seek the shortest string $w$ containing both $"abc"$ and $"bac"$ as subsequences. |
| 1050 | + |
| 1051 | + #figure({ |
| 1052 | + let w = ("b", "a", "b", "c") |
| 1053 | + let r1 = ("a", "b", "c") // "abc" |
| 1054 | + let r2 = ("b", "a", "c") // "bac" |
| 1055 | + let embed1 = (1, 2, 3) // positions of a, b, c in w (0-indexed) |
| 1056 | + let embed2 = (0, 1, 3) // positions of b, a, c in w (0-indexed) |
| 1057 | + let blue = graph-colors.at(0) |
| 1058 | + let teal = rgb("#76b7b2") |
| 1059 | + let red = graph-colors.at(1) |
| 1060 | + align(center, stack(dir: ttb, spacing: 0.6cm, |
| 1061 | + // Row 1: the supersequence w |
| 1062 | + stack(dir: ltr, spacing: 0pt, |
| 1063 | + box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt)[$w =$])), |
| 1064 | + ..w.enumerate().map(((i, ch)) => { |
| 1065 | + let is1 = embed1.contains(i) |
| 1066 | + let is2 = embed2.contains(i) |
| 1067 | + let fill = if is1 and is2 { blue.transparentize(60%) } else if is1 { blue.transparentize(80%) } else if is2 { teal.transparentize(80%) } else { white } |
| 1068 | + box(width: 0.55cm, height: 0.55cm, fill: fill, stroke: 0.5pt + luma(120), |
| 1069 | + align(center + horizon, text(9pt, weight: "bold", ch))) |
| 1070 | + }), |
| 1071 | + ), |
| 1072 | + // Row 2: embedding of r1 |
| 1073 | + stack(dir: ltr, spacing: 0pt, |
| 1074 | + box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt, fill: blue)[$r_1 =$])), |
| 1075 | + ..range(w.len()).map(i => { |
| 1076 | + let idx = embed1.position(j => j == i) |
| 1077 | + let ch = if idx != none { r1.at(idx) } else { sym.dot.c } |
| 1078 | + let col = if idx != none { blue } else { luma(200) } |
| 1079 | + box(width: 0.55cm, height: 0.55cm, |
| 1080 | + align(center + horizon, text(9pt, fill: col, weight: if idx != none { "bold" } else { "regular" }, ch))) |
| 1081 | + }), |
| 1082 | + ), |
| 1083 | + // Row 3: embedding of r2 |
| 1084 | + stack(dir: ltr, spacing: 0pt, |
| 1085 | + box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt, fill: teal)[$r_2 =$])), |
| 1086 | + ..range(w.len()).map(i => { |
| 1087 | + let idx = embed2.position(j => j == i) |
| 1088 | + let ch = if idx != none { r2.at(idx) } else { sym.dot.c } |
| 1089 | + let col = if idx != none { teal } else { luma(200) } |
| 1090 | + box(width: 0.55cm, height: 0.55cm, |
| 1091 | + align(center + horizon, text(9pt, fill: col, weight: if idx != none { "bold" } else { "regular" }, ch))) |
| 1092 | + }), |
| 1093 | + ), |
| 1094 | + )) |
| 1095 | + }, |
| 1096 | + caption: [Shortest Common Supersequence: $w = "babc"$ (length 4) contains $r_1 = "abc"$ (blue, positions 1,2,3) and $r_2 = "bac"$ (teal, positions 0,1,3) as subsequences. Dots mark unused positions in each embedding.], |
| 1097 | + ) <fig:scs> |
| 1098 | + |
| 1099 | + The supersequence $w = "babc"$ has length 4 and contains both input strings as subsequences. This is optimal because $"LCS"("abc", "bac") = "ac"$ (length 2), so the shortest common supersequence has length $3 + 3 - 2 = 4$. |
| 1100 | +] |
| 1101 | + |
1041 | 1102 | #problem-def("MinimumFeedbackArcSet")[ |
1042 | 1103 | Given a directed graph $G = (V, A)$, find a minimum-size subset $A' subset.eq A$ such that $G - A'$ is a directed acyclic graph (DAG). Equivalently, $A'$ must contain at least one arc from every directed cycle in $G$. |
1043 | 1104 | ][ |
|
0 commit comments