Skip to content

Commit 3811aff

Browse files
zazabapclaudeGiggleLiu
authored
Fix #412: Add ShortestCommonSupersequence model (#627)
* Add plan for #412: ShortestCommonSupersequence model * Implement #412: Add ShortestCommonSupersequence model * style: apply rustfmt formatting * Address review: fix docs, add --alphabet-size flag, add edge case tests * chore: remove plan file after implementation * Address Copilot review comments - Align docs/schema wording: bound is exact config length, equivalent to |w| ≤ B via padding - Add alphabet_size > 0 validation in constructor - Handle empty string segments in --strings CLI parsing - Add --alphabet-size to CLI help text - Regenerate problem_schemas.json Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Fix agentic test issues: add SCS to prelude, fix CLI duplicate fields - Add ShortestCommonSupersequence to prelude re-exports (was missing unlike all other misc models) - Fix duplicate CLI struct fields (strings, bound) by sharing between LCS/SCS and RuralPostman/SCS with i64 type and casts at usage sites Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: trigger CI Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * style: apply rustfmt formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: GiggleLiu <cacate0129@gmail.com>
1 parent f1a6b72 commit 3811aff

13 files changed

Lines changed: 447 additions & 10 deletions

File tree

docs/paper/reductions.typ

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
"SubsetSum": [Subset Sum],
6060
"MinimumFeedbackArcSet": [Minimum Feedback Arc Set],
6161
"MinimumFeedbackVertexSet": [Minimum Feedback Vertex Set],
62+
"ShortestCommonSupersequence": [Shortest Common Supersequence],
6263
"MinimumSumMulticenter": [Minimum Sum Multicenter],
6364
"SubgraphIsomorphism": [Subgraph Isomorphism],
6465
"SubsetSum": [Subset Sum],
@@ -1038,6 +1039,66 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa
10381039
*Example.* Let $A = {3, 7, 1, 8, 2, 4}$ ($n = 6$) and target $B = 11$. Selecting $A' = {3, 8}$ gives sum $3 + 8 = 11 = B$. Another solution: $A' = {7, 4}$ with sum $7 + 4 = 11 = B$.
10391040
]
10401041

1042+
#problem-def("ShortestCommonSupersequence")[
1043+
Given a finite alphabet $Sigma$, a set $R = {r_1, dots, r_m}$ of strings over $Sigma^*$, and a positive integer $K$, determine whether there exists a string $w in Sigma^*$ with $|w| lt.eq K$ such that every string $r_i in R$ is a _subsequence_ of $w$: there exist indices $1 lt.eq j_1 < j_2 < dots < j_(|r_i|) lt.eq |w|$ with $w[j_k] = r_i [k]$ for all $k$.
1044+
][
1045+
A classic NP-complete string problem, listed as problem SR8 in Garey and Johnson @garey1979. #cite(<maier1978>, form: "prose") proved NP-completeness; #cite(<raiha1981>, form: "prose") showed the problem remains NP-complete even over a binary alphabet ($|Sigma| = 2$). Note that _subsequence_ (characters may be non-contiguous) differs from _substring_ (contiguous block): the Shortest Common Supersequence asks that each input string can be embedded into $w$ by selecting characters in order but not necessarily adjacently.
1046+
1047+
For $|R| = 2$ strings, the problem is solvable in polynomial time via the duality with the Longest Common Subsequence (LCS): if $"LCS"(r_1, r_2)$ has length $ell$, then the shortest common supersequence has length $|r_1| + |r_2| - ell$, computable in $O(|r_1| dot |r_2|)$ time by dynamic programming. For general $|R| = m$, the brute-force search over all strings of length at most $K$ takes $O(|Sigma|^K)$ time. Applications include bioinformatics (reconstructing ancestral sequences from fragments), data compression (representing multiple strings compactly), and scheduling (merging instruction sequences).
1048+
1049+
*Example.* Let $Sigma = {a, b, c}$ and $R = {"abc", "bac"}$. We seek the shortest string $w$ containing both $"abc"$ and $"bac"$ as subsequences.
1050+
1051+
#figure({
1052+
let w = ("b", "a", "b", "c")
1053+
let r1 = ("a", "b", "c") // "abc"
1054+
let r2 = ("b", "a", "c") // "bac"
1055+
let embed1 = (1, 2, 3) // positions of a, b, c in w (0-indexed)
1056+
let embed2 = (0, 1, 3) // positions of b, a, c in w (0-indexed)
1057+
let blue = graph-colors.at(0)
1058+
let teal = rgb("#76b7b2")
1059+
let red = graph-colors.at(1)
1060+
align(center, stack(dir: ttb, spacing: 0.6cm,
1061+
// Row 1: the supersequence w
1062+
stack(dir: ltr, spacing: 0pt,
1063+
box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt)[$w =$])),
1064+
..w.enumerate().map(((i, ch)) => {
1065+
let is1 = embed1.contains(i)
1066+
let is2 = embed2.contains(i)
1067+
let fill = if is1 and is2 { blue.transparentize(60%) } else if is1 { blue.transparentize(80%) } else if is2 { teal.transparentize(80%) } else { white }
1068+
box(width: 0.55cm, height: 0.55cm, fill: fill, stroke: 0.5pt + luma(120),
1069+
align(center + horizon, text(9pt, weight: "bold", ch)))
1070+
}),
1071+
),
1072+
// Row 2: embedding of r1
1073+
stack(dir: ltr, spacing: 0pt,
1074+
box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt, fill: blue)[$r_1 =$])),
1075+
..range(w.len()).map(i => {
1076+
let idx = embed1.position(j => j == i)
1077+
let ch = if idx != none { r1.at(idx) } else { sym.dot.c }
1078+
let col = if idx != none { blue } else { luma(200) }
1079+
box(width: 0.55cm, height: 0.55cm,
1080+
align(center + horizon, text(9pt, fill: col, weight: if idx != none { "bold" } else { "regular" }, ch)))
1081+
}),
1082+
),
1083+
// Row 3: embedding of r2
1084+
stack(dir: ltr, spacing: 0pt,
1085+
box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt, fill: teal)[$r_2 =$])),
1086+
..range(w.len()).map(i => {
1087+
let idx = embed2.position(j => j == i)
1088+
let ch = if idx != none { r2.at(idx) } else { sym.dot.c }
1089+
let col = if idx != none { teal } else { luma(200) }
1090+
box(width: 0.55cm, height: 0.55cm,
1091+
align(center + horizon, text(9pt, fill: col, weight: if idx != none { "bold" } else { "regular" }, ch)))
1092+
}),
1093+
),
1094+
))
1095+
},
1096+
caption: [Shortest Common Supersequence: $w = "babc"$ (length 4) contains $r_1 = "abc"$ (blue, positions 1,2,3) and $r_2 = "bac"$ (teal, positions 0,1,3) as subsequences. Dots mark unused positions in each embedding.],
1097+
) <fig:scs>
1098+
1099+
The supersequence $w = "babc"$ has length 4 and contains both input strings as subsequences. This is optimal because $"LCS"("abc", "bac") = "ac"$ (length 2), so the shortest common supersequence has length $3 + 3 - 2 = 4$.
1100+
]
1101+
10411102
#problem-def("MinimumFeedbackArcSet")[
10421103
Given a directed graph $G = (V, A)$, find a minimum-size subset $A' subset.eq A$ such that $G - A'$ is a directed acyclic graph (DAG). Equivalently, $A'$ must contain at least one arc from every directed cycle in $G$.
10431104
][

docs/paper/references.bib

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,17 @@ @article{cygan2014
489489
doi = {10.1137/140990255}
490490
}
491491

492+
@article{raiha1981,
493+
author = {Kari-Jouko R{\"a}ih{\"a} and Esko Ukkonen},
494+
title = {The Shortest Common Supersequence Problem over Binary Alphabet is {NP}-Complete},
495+
journal = {Theoretical Computer Science},
496+
volume = {16},
497+
number = {2},
498+
pages = {187--198},
499+
year = {1981},
500+
doi = {10.1016/0304-3975(81)90075-X}
501+
}
502+
492503
@article{bodlaender2012,
493504
author = {Hans L. Bodlaender and Fedor V. Fomin and Arie M. C. A. Koster and Dieter Kratsch and Dimitrios M. Thilikos},
494505
title = {A Note on Exact Algorithms for Vertex Ordering Problems on Graphs},

docs/src/reductions/problem_schemas.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,27 @@
488488
}
489489
]
490490
},
491+
{
492+
"name": "ShortestCommonSupersequence",
493+
"description": "Find a common supersequence of bounded length for a set of strings",
494+
"fields": [
495+
{
496+
"name": "alphabet_size",
497+
"type_name": "usize",
498+
"description": "Size of the alphabet"
499+
},
500+
{
501+
"name": "strings",
502+
"type_name": "Vec<Vec<usize>>",
503+
"description": "Input strings over the alphabet {0, ..., alphabet_size-1}"
504+
},
505+
{
506+
"name": "bound",
507+
"type_name": "usize",
508+
"description": "Bound on supersequence length (configuration has exactly this many symbols)"
509+
}
510+
]
511+
},
491512
{
492513
"name": "SpinGlass",
493514
"description": "Minimize Ising Hamiltonian on a graph",

problemreductions-cli/src/cli.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ Flags by problem type:
224224
LCS --strings
225225
FAS --arcs [--weights] [--num-vertices]
226226
FVS --arcs [--weights] [--num-vertices]
227+
SCS --strings, --bound [--alphabet-size]
227228
ILP, CircuitSAT (via reduction only)
228229
229230
Geometry graph variants (use slash notation, e.g., MIS/KingsSubgraph):
@@ -338,18 +339,21 @@ pub struct CreateArgs {
338339
/// Required edge indices for RuralPostman (comma-separated, e.g., "0,2,4")
339340
#[arg(long)]
340341
pub required_edges: Option<String>,
341-
/// Upper bound B for RuralPostman
342+
/// Upper bound (for RuralPostman or SCS)
342343
#[arg(long)]
343-
pub bound: Option<i32>,
344+
pub bound: Option<i64>,
344345
/// Pattern graph edge list for SubgraphIsomorphism (e.g., 0-1,1-2,2-0)
345346
#[arg(long)]
346347
pub pattern: Option<String>,
347-
/// Input strings for LCS (semicolon-separated, e.g., "ABAC;BACA")
348+
/// Input strings for LCS (e.g., "ABAC;BACA") or SCS (e.g., "0,1,2;1,2,0")
348349
#[arg(long)]
349350
pub strings: Option<String>,
350351
/// Directed arcs for directed graph problems (e.g., 0>1,1>2,2>0)
351352
#[arg(long)]
352353
pub arcs: Option<String>,
354+
/// Alphabet size for SCS (optional; inferred from max symbol + 1 if omitted)
355+
#[arg(long)]
356+
pub alphabet_size: Option<usize>,
353357
}
354358

355359
#[derive(clap::Args)]

problemreductions-cli/src/commands/create.rs

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ use crate::util;
66
use anyhow::{bail, Context, Result};
77
use problemreductions::models::algebraic::{ClosestVectorProblem, BMF};
88
use problemreductions::models::graph::{GraphPartitioning, HamiltonianPath};
9-
use problemreductions::models::misc::{BinPacking, LongestCommonSubsequence, PaintShop, SubsetSum};
9+
use problemreductions::models::misc::{
10+
BinPacking, LongestCommonSubsequence, PaintShop, ShortestCommonSupersequence, SubsetSum,
11+
};
1012
use problemreductions::prelude::*;
1113
use problemreductions::registry::collect_schemas;
1214
use problemreductions::topology::{
@@ -52,6 +54,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
5254
&& args.pattern.is_none()
5355
&& args.strings.is_none()
5456
&& args.arcs.is_none()
57+
&& args.alphabet_size.is_none()
5558
}
5659

5760
fn type_format_hint(type_name: &str, graph_type: Option<&str>) -> &'static str {
@@ -103,6 +106,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str {
103106
}
104107
"SubgraphIsomorphism" => "--graph 0-1,1-2,2-0 --pattern 0-1",
105108
"SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11",
109+
"ShortestCommonSupersequence" => "--strings \"0,1,2;1,2,0\" --bound 4",
106110
_ => "",
107111
}
108112
}
@@ -280,7 +284,7 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
280284
"RuralPostman requires --bound\n\n\
281285
Usage: pred create RuralPostman --graph 0-1,1-2,2-3 --edge-weights 1,1,1 --required-edges 0,2 --bound 6"
282286
)
283-
})?;
287+
})? as i32;
284288
(
285289
ser(RuralPostman::new(
286290
graph,
@@ -667,6 +671,57 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
667671
)
668672
}
669673

674+
// ShortestCommonSupersequence
675+
"ShortestCommonSupersequence" => {
676+
let usage = "Usage: pred create SCS --strings \"0,1,2;1,2,0\" --bound 4";
677+
let strings_str = args.strings.as_deref().ok_or_else(|| {
678+
anyhow::anyhow!("ShortestCommonSupersequence requires --strings\n\n{usage}")
679+
})?;
680+
let bound = args.bound.ok_or_else(|| {
681+
anyhow::anyhow!("ShortestCommonSupersequence requires --bound\n\n{usage}")
682+
})? as usize;
683+
let strings: Vec<Vec<usize>> = strings_str
684+
.split(';')
685+
.map(|s| {
686+
let trimmed = s.trim();
687+
if trimmed.is_empty() {
688+
return Ok(Vec::new());
689+
}
690+
trimmed
691+
.split(',')
692+
.map(|v| {
693+
v.trim()
694+
.parse::<usize>()
695+
.map_err(|e| anyhow::anyhow!("Invalid alphabet index: {}", e))
696+
})
697+
.collect::<Result<Vec<_>>>()
698+
})
699+
.collect::<Result<Vec<_>>>()?;
700+
let inferred = strings
701+
.iter()
702+
.flat_map(|s| s.iter())
703+
.copied()
704+
.max()
705+
.map(|m| m + 1)
706+
.unwrap_or(0);
707+
let alphabet_size = args.alphabet_size.unwrap_or(inferred);
708+
if alphabet_size < inferred {
709+
anyhow::bail!(
710+
"--alphabet-size {} is smaller than the largest symbol + 1 ({}) in the strings",
711+
alphabet_size,
712+
inferred
713+
);
714+
}
715+
(
716+
ser(ShortestCommonSupersequence::new(
717+
alphabet_size,
718+
strings,
719+
bound,
720+
))?,
721+
resolved_variant.clone(),
722+
)
723+
}
724+
670725
// MinimumFeedbackVertexSet
671726
"MinimumFeedbackVertexSet" => {
672727
let arcs_str = args.arcs.as_deref().ok_or_else(|| {

problemreductions-cli/src/dispatch.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use anyhow::{bail, Context, Result};
22
use problemreductions::models::algebraic::{ClosestVectorProblem, ILP};
3-
use problemreductions::models::misc::{BinPacking, Knapsack, LongestCommonSubsequence, SubsetSum};
3+
use problemreductions::models::misc::{
4+
BinPacking, Knapsack, LongestCommonSubsequence, ShortestCommonSupersequence, SubsetSum,
5+
};
46
use problemreductions::prelude::*;
57
use problemreductions::rules::{MinimizeSteps, ReductionGraph};
68
use problemreductions::solvers::{BruteForce, ILPSolver, Solver};
@@ -254,6 +256,7 @@ pub fn load_problem(
254256
"LongestCommonSubsequence" => deser_opt::<LongestCommonSubsequence>(data),
255257
"MinimumFeedbackVertexSet" => deser_opt::<MinimumFeedbackVertexSet<i32>>(data),
256258
"SubsetSum" => deser_sat::<SubsetSum>(data),
259+
"ShortestCommonSupersequence" => deser_sat::<ShortestCommonSupersequence>(data),
257260
"MinimumFeedbackArcSet" => deser_opt::<MinimumFeedbackArcSet<i32>>(data),
258261
_ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)),
259262
}
@@ -324,6 +327,7 @@ pub fn serialize_any_problem(
324327
"LongestCommonSubsequence" => try_ser::<LongestCommonSubsequence>(any),
325328
"MinimumFeedbackVertexSet" => try_ser::<MinimumFeedbackVertexSet<i32>>(any),
326329
"SubsetSum" => try_ser::<SubsetSum>(any),
330+
"ShortestCommonSupersequence" => try_ser::<ShortestCommonSupersequence>(any),
327331
"MinimumFeedbackArcSet" => try_ser::<MinimumFeedbackArcSet<i32>>(any),
328332
_ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)),
329333
}

problemreductions-cli/src/problem_name.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pub const ALIASES: &[(&str, &str)] = &[
2424
("LCS", "LongestCommonSubsequence"),
2525
("MaxMatching", "MaximumMatching"),
2626
("FVS", "MinimumFeedbackVertexSet"),
27+
("SCS", "ShortestCommonSupersequence"),
2728
("FAS", "MinimumFeedbackArcSet"),
2829
("pmedian", "MinimumSumMulticenter"),
2930
];
@@ -66,6 +67,7 @@ pub fn resolve_alias(input: &str) -> String {
6667
"fas" | "minimumfeedbackarcset" => "MinimumFeedbackArcSet".to_string(),
6768
"minimumsummulticenter" | "pmedian" => "MinimumSumMulticenter".to_string(),
6869
"subsetsum" => "SubsetSum".to_string(),
70+
"scs" | "shortestcommonsupersequence" => "ShortestCommonSupersequence".to_string(),
6971
"hamiltonianpath" => "HamiltonianPath".to_string(),
7072
_ => input.to_string(), // pass-through for exact names
7173
}

src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,12 @@ pub mod prelude {
4646
pub use crate::models::graph::{
4747
KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, MaximumMatching,
4848
MinimumDominatingSet, MinimumFeedbackArcSet, MinimumFeedbackVertexSet,
49-
MinimumSumMulticenter, MinimumVertexCover,
50-
PartitionIntoTriangles, RuralPostman, TravelingSalesman,
49+
MinimumSumMulticenter, MinimumVertexCover, PartitionIntoTriangles,
50+
RuralPostman, TravelingSalesman,
5151
};
5252
pub use crate::models::misc::{
53-
BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop, SubsetSum,
53+
BinPacking, Factoring, Knapsack, LongestCommonSubsequence, PaintShop,
54+
ShortestCommonSupersequence, SubsetSum,
5455
};
5556
pub use crate::models::set::{MaximumSetPacking, MinimumSetCovering};
5657

src/models/misc/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,21 @@
66
//! - [`Knapsack`]: 0-1 Knapsack (maximize value subject to weight capacity)
77
//! - [`LongestCommonSubsequence`]: Longest Common Subsequence
88
//! - [`PaintShop`]: Minimize color switches in paint shop scheduling
9+
//! - [`ShortestCommonSupersequence`]: Find a common supersequence of bounded length
910
//! - [`SubsetSum`]: Find a subset summing to exactly a target value
1011
1112
mod bin_packing;
1213
pub(crate) mod factoring;
1314
mod knapsack;
1415
mod longest_common_subsequence;
1516
pub(crate) mod paintshop;
17+
pub(crate) mod shortest_common_supersequence;
1618
mod subset_sum;
1719

1820
pub use bin_packing::BinPacking;
1921
pub use factoring::Factoring;
2022
pub use knapsack::Knapsack;
2123
pub use longest_common_subsequence::LongestCommonSubsequence;
2224
pub use paintshop::PaintShop;
25+
pub use shortest_common_supersequence::ShortestCommonSupersequence;
2326
pub use subset_sum::SubsetSum;

0 commit comments

Comments
 (0)