Skip to content

Commit 08d5574

Browse files
GiggleLiuisPANNclaude
authored
Fix #439: Add StringToStringCorrection model (#675)
* Add plan for #439: StringToStringCorrection * Implement #439: Add StringToStringCorrection model Add the String-to-String Correction satisfaction problem (Garey & Johnson SR20). Given source/target strings and bound K, determine if the target can be derived from the source using at most K deletions and adjacent swaps. - Model: src/models/misc/string_to_string_correction.rs - Tests: 11 unit tests + 1 doctest (creation, evaluation, solver, paper example) - CLI: --source-string, --target-string, --bound, --alphabet-size flags - Paper: problem-def with figure, Wagner 1974/1975 references - Example-db: canonical instance with 2 satisfying solutions * chore: remove plan file after implementation * fix: address PR #675 review feedback - reject negative StringToStringCorrection bounds in CLI creation - tighten model pruning and add integration coverage for help/create/solve flows - align StringToStringCorrection help and docs with brute-force solver usage * Fix paper figure highlighting logic for repeated symbols The previous highlighting used value-based comparison via `.position()`, which finds the first occurrence and would produce incorrect highlights for strings with repeated symbols. Now compares by position index directly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Rename bound_k to bound for consistency with other models ShortestCommonSupersequence and other models use `bound` as the field name. Rename for consistency across the codebase. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Fix formatting issues in PR code Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Xiwei Pan <xiwei.pan@connect.hkust-gz.edu.cn> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6d2981d commit 08d5574

12 files changed

Lines changed: 685 additions & 19 deletions

File tree

docs/paper/reductions.typ

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
"MinimumTardinessSequencing": [Minimum Tardiness Sequencing],
120120
"SequencingWithinIntervals": [Sequencing Within Intervals],
121121
"DirectedTwoCommodityIntegralFlow": [Directed Two-Commodity Integral Flow],
122+
"StringToStringCorrection": [String-to-String Correction],
122123
)
123124

124125
// Definition label: "def:<ProblemName>" — each definition block must have a matching label
@@ -2260,6 +2261,72 @@ NP-completeness was established by Garey, Johnson, and Stockmeyer @gareyJohnsonS
22602261
]
22612262
}
22622263

2264+
#{
2265+
let x = load-model-example("StringToStringCorrection")
2266+
let source = x.instance.source
2267+
let target = x.instance.target
2268+
let alpha-size = x.instance.alphabet_size
2269+
let bound-k = x.instance.bound
2270+
let n = source.len()
2271+
// Alphabet mapping: 0->a, 1->b, 2->c, 3->d
2272+
let alpha-map = range(alpha-size).map(i => str.from-unicode(97 + i))
2273+
let fmt-str(s) = s.map(c => alpha-map.at(c)).join("")
2274+
let src-str = fmt-str(source)
2275+
let tgt-str = fmt-str(target)
2276+
// Use solution [8, 5]: swap(2,3) then delete(5)
2277+
let sol = x.optimal.at(1)
2278+
// Trace the operations
2279+
let after-swap = (source.at(0), source.at(1), source.at(3), source.at(2), source.at(4), source.at(5))
2280+
let after-swap-str = after-swap.map(c => alpha-map.at(c)).join("")
2281+
[
2282+
#problem-def("StringToStringCorrection")[
2283+
Given a finite alphabet $Sigma$, a source string $x in Sigma^*$, a target string $y in Sigma^*$, and a positive integer $K$, determine whether $y$ can be derived from $x$ by a sequence of at most $K$ operations, where each operation is either a _single-symbol deletion_ (remove one character at a chosen position) or an _adjacent-symbol interchange_ (swap two neighboring characters).
2284+
][
2285+
A classical NP-complete problem listed as SR20 in Garey and Johnson @garey1979. #cite(<wagner1975>, form: "prose") proved NP-completeness via transformation from Set Covering. The standard edit distance problem --- allowing insertion, deletion, and substitution --- is solvable in $O(|x| dot |y|)$ time by the Wagner--Fischer dynamic programming algorithm @wagner1974. However, restricting the operation set to only deletions and adjacent swaps makes the problem NP-complete for unbounded alphabets. When only adjacent swaps are allowed (no deletions), the problem reduces to counting inversions and is polynomial @wagner1975.#footnote[No algorithm improving on brute-force is known for the general swap-and-delete variant.]
2286+
2287+
*Example.* Let $Sigma = {#alpha-map.join(", ")}$, source $x = #src-str$ (length #n), target $y = #tgt-str$ (length #target.len()), and $K = #bound-k$.
2288+
2289+
#figure({
2290+
let blue = graph-colors.at(0)
2291+
let red = rgb("#e15759")
2292+
let cell(ch, highlight: false, strike: false) = {
2293+
let fill = if highlight { blue.transparentize(70%) } else { white }
2294+
box(width: 0.55cm, height: 0.55cm, fill: fill, stroke: 0.5pt + luma(120),
2295+
align(center + horizon, text(9pt, weight: "bold",
2296+
if strike { text(fill: red, [#sym.times]) } else { ch })))
2297+
}
2298+
align(center, stack(dir: ttb, spacing: 0.5cm,
2299+
// Step 0: source
2300+
stack(dir: ltr, spacing: 0pt,
2301+
box(width: 2.2cm, height: 0.5cm, align(right + horizon, text(8pt)[$x: quad$])),
2302+
..source.map(c => cell(alpha-map.at(c))),
2303+
),
2304+
// Step 1: after swap at positions 2,3
2305+
stack(dir: ltr, spacing: 0pt,
2306+
box(width: 2.2cm, height: 0.5cm, align(right + horizon, text(8pt)[swap$(2,3)$: quad])),
2307+
..range(after-swap.len()).map(i => cell(alpha-map.at(after-swap.at(i)), highlight: after-swap.at(i) != source.at(i))),
2308+
),
2309+
// Step 2: after delete at position 5
2310+
stack(dir: ltr, spacing: 0pt,
2311+
box(width: 2.2cm, height: 0.5cm, align(right + horizon, text(8pt)[del$(5)$: quad])),
2312+
..target.map(c => cell(alpha-map.at(c))),
2313+
cell([], strike: true),
2314+
),
2315+
// Result
2316+
stack(dir: ltr, spacing: 0pt,
2317+
box(width: 2.2cm, height: 0.5cm, align(right + horizon, text(8pt)[$= y$: quad])),
2318+
..target.map(c => cell(alpha-map.at(c), highlight: true)),
2319+
),
2320+
))
2321+
},
2322+
caption: [String-to-String Correction: transforming $x = #src-str$ into $y = #tgt-str$ with $K = #bound-k$ operations. Step 1 swaps adjacent symbols at positions 2 and 3; step 2 deletes the symbol at position 5.],
2323+
) <fig:stsc>
2324+
2325+
The transformation uses exactly $K = #bound-k$ operations (1 swap + 1 deletion), which is the minimum: a single operation cannot account for both the transposition of two symbols and the removal of one.
2326+
]
2327+
]
2328+
}
2329+
22632330
#problem-def("MinimumFeedbackArcSet")[
22642331
Given a directed graph $G = (V, A)$, find a minimum-size subset $A' subset.eq A$ such that $G - A'$ is a directed acyclic graph (DAG). Equivalently, $A'$ must contain at least one arc from every directed cycle in $G$.
22652332
][

docs/paper/references.bib

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
@inproceedings{wagner1975,
2+
author = {Robert A. Wagner},
3+
title = {On the Complexity of the Extended String-to-String Correction Problem},
4+
booktitle = {Proceedings of the 7th Annual ACM Symposium on Theory of Computing (STOC)},
5+
pages = {218--223},
6+
year = {1975},
7+
doi = {10.1145/800116.803771}
8+
}
9+
10+
@article{wagner1974,
11+
author = {Robert A. Wagner and Michael J. Fischer},
12+
title = {The String-to-String Correction Problem},
13+
journal = {Journal of the ACM},
14+
volume = {21},
15+
number = {1},
16+
pages = {168--173},
17+
year = {1974},
18+
doi = {10.1145/321796.321811}
19+
}
20+
121
@article{juttner2018,
222
author = {Alpár Jüttner and Péter Madarasi},
323
title = {VF2++ — An improved subgraph isomorphism algorithm},

docs/src/cli.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ pred reduce problem.json --to QUBO -o reduced.json
8787
pred solve reduced.json --solver brute-force
8888

8989
# Pipe commands together (use - to read from stdin)
90-
pred create MIS --graph 0-1,1-2,2-3 | pred solve -
90+
pred create MIS --graph 0-1,1-2,2-3 | pred solve - # when an ILP reduction path exists
91+
pred create StringToStringCorrection --source-string "0,1,2,3,1,0" --target-string "0,1,3,2,1" --bound 2 | pred solve - --solver brute-force
9192
pred create MIS --graph 0-1,1-2,2-3 | pred reduce - --to QUBO | pred solve -
9293
```
9394

@@ -352,6 +353,7 @@ pred create Factoring --target 15 --bits-m 4 --bits-n 4 -o factoring.json
352353
pred create Factoring --target 21 --bits-m 3 --bits-n 3 -o factoring2.json
353354
pred create X3C --universe 9 --sets "0,1,2;0,2,4;3,4,5;3,5,7;6,7,8;1,4,6;2,5,8" -o x3c.json
354355
pred create MinimumTardinessSequencing --n 5 --deadlines 5,5,5,3,3 --precedence-pairs "0>3,1>3,1>4,2>4" -o mts.json
356+
pred create StringToStringCorrection --source-string "0,1,2,3,1,0" --target-string "0,1,3,2,1" --bound 2 | pred solve - --solver brute-force
355357
pred create StrongConnectivityAugmentation --arcs "0>1,1>2,2>0,3>4,4>3,2>3,4>5,5>3" --candidate-arcs "3>0:5,3>1:3,3>2:4,4>0:6,4>1:2,4>2:7,5>0:4,5>1:3,5>2:1,0>3:8,0>4:3,0>5:2,1>3:6,1>4:4,1>5:5,2>4:3,2>5:7,1>0:2" --bound 1 -o sca.json
356358
```
357359
@@ -375,7 +377,8 @@ pred create MaxCut --random --num-vertices 20 --edge-prob 0.5 -o maxcut.json
375377
Without `-o`, the problem JSON is printed to stdout, which can be piped to other commands:
376378
377379
```bash
378-
pred create MIS --graph 0-1,1-2,2-3 | pred solve -
380+
pred create MIS --graph 0-1,1-2,2-3 | pred solve - # when an ILP reduction path exists
381+
pred create StringToStringCorrection --source-string "0,1,2,3,1,0" --target-string "0,1,3,2,1" --bound 2 | pred solve - --solver brute-force
379382
pred create MIS --random --num-vertices 10 | pred inspect -
380383
```
381384

problemreductions-cli/src/cli.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ Typical workflow:
1313
pred evaluate problem.json --config 1,0,1,0
1414
1515
Piping (use - to read from stdin):
16-
pred create MIS --graph 0-1,1-2 | pred solve -
16+
pred create MIS --graph 0-1,1-2 | pred solve - # when an ILP reduction path exists
17+
pred create StringToStringCorrection --source-string \"0,1,2,3,1,0\" --target-string \"0,1,3,2,1\" --bound 2 | pred solve - --solver brute-force
1718
pred create MIS --graph 0-1,1-2 | pred evaluate - --config 1,0,1
1819
pred create MIS --graph 0-1,1-2 | pred reduce - --to QUBO
1920
@@ -257,6 +258,7 @@ Flags by problem type:
257258
StaffScheduling --schedules, --requirements, --num-workers, --k
258259
MinimumTardinessSequencing --n, --deadlines [--precedence-pairs]
259260
SCS --strings, --bound [--alphabet-size]
261+
StringToStringCorrection --source-string, --target-string, --bound [--alphabet-size]
260262
D2CIF --arcs, --capacities, --source-1, --sink-1, --source-2, --sink-2, --requirement-1, --requirement-2
261263
ILP, CircuitSAT (via reduction only)
262264
@@ -275,6 +277,7 @@ Examples:
275277
pred create SAT --num-vars 3 --clauses \"1,2;-1,3\"
276278
pred create QUBO --matrix \"1,0.5;0.5,2\"
277279
pred create MultipleChoiceBranching/i32 --arcs \"0>1,0>2,1>3,2>3,1>4,3>5,4>5,2>4\" --weights 3,2,4,1,2,3,1,3 --partition \"0,1;2,3;4,7;5,6\" --bound 10
280+
pred create StringToStringCorrection --source-string \"0,1,2,3,1,0\" --target-string \"0,1,3,2,1\" --bound 2 | pred solve - --solver brute-force
278281
pred create MIS/KingsSubgraph --positions \"0,0;1,0;1,1;0,1\"
279282
pred create MIS/UnitDiskGraph --positions \"0,0;1,0;0.5,0.8\" --radius 1.5
280283
pred create MIS --random --num-vertices 10 --edge-prob 0.3
@@ -447,7 +450,7 @@ pub struct CreateArgs {
447450
/// Required edge indices for RuralPostman (comma-separated, e.g., "0,2,4")
448451
#[arg(long)]
449452
pub required_edges: Option<String>,
450-
/// Upper bound or length bound (for BoundedComponentSpanningForest, LengthBoundedDisjointPaths, MultipleChoiceBranching, OptimalLinearArrangement, RuralPostman, or SCS)
453+
/// Upper bound or length bound (for BoundedComponentSpanningForest, LengthBoundedDisjointPaths, MultipleChoiceBranching, OptimalLinearArrangement, RuralPostman, SCS, or StringToStringCorrection)
451454
#[arg(long, allow_hyphen_values = true)]
452455
pub bound: Option<i64>,
453456
/// Pattern graph edge list for SubgraphIsomorphism (e.g., 0-1,1-2,2-0)
@@ -492,9 +495,15 @@ pub struct CreateArgs {
492495
/// Number of available workers for StaffScheduling
493496
#[arg(long)]
494497
pub num_workers: Option<u64>,
495-
/// Alphabet size for SCS (optional; inferred from max symbol + 1 if omitted)
498+
/// Alphabet size for SCS or StringToStringCorrection (optional; inferred from max symbol + 1 if omitted)
496499
#[arg(long)]
497500
pub alphabet_size: Option<usize>,
501+
/// Source string for StringToStringCorrection (comma-separated symbol indices, e.g., "0,1,2,3")
502+
#[arg(long)]
503+
pub source_string: Option<String>,
504+
/// Target string for StringToStringCorrection (comma-separated symbol indices, e.g., "0,1,3,2")
505+
#[arg(long)]
506+
pub target_string: Option<String>,
498507
}
499508

500509
#[derive(clap::Args)]
@@ -504,7 +513,8 @@ Examples:
504513
pred solve problem.json --solver brute-force # brute-force (exhaustive search)
505514
pred solve reduced.json # solve a reduction bundle
506515
pred solve reduced.json -o solution.json # save result to file
507-
pred create MIS --graph 0-1,1-2 | pred solve - # read from stdin
516+
pred create MIS --graph 0-1,1-2 | pred solve - # read from stdin when an ILP path exists
517+
pred create StringToStringCorrection --source-string \"0,1,2,3,1,0\" --target-string \"0,1,3,2,1\" --bound 2 | pred solve - --solver brute-force
508518
pred solve problem.json --timeout 10 # abort after 10 seconds
509519
510520
Typical workflow:
@@ -518,6 +528,8 @@ Solve via explicit reduction:
518528
Input: a problem JSON from `pred create`, or a reduction bundle from `pred reduce`.
519529
When given a bundle, the target is solved and the solution is mapped back to the source.
520530
The ILP solver auto-reduces non-ILP problems before solving.
531+
Problems without an ILP reduction path, such as `LengthBoundedDisjointPaths` and
532+
`StringToStringCorrection`, currently need `--solver brute-force`.
521533
522534
ILP backend (default: HiGHS). To use a different backend:
523535
cargo install problemreductions-cli --features coin-cbc

problemreductions-cli/src/commands/create.rs

Lines changed: 92 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use problemreductions::models::graph::{
1515
use problemreductions::models::misc::{
1616
BinPacking, FlowShopScheduling, LongestCommonSubsequence, MinimumTardinessSequencing,
1717
MultiprocessorScheduling, PaintShop, SequencingWithinIntervals, ShortestCommonSupersequence,
18-
SubsetSum,
18+
StringToStringCorrection, SubsetSum,
1919
};
2020
use problemreductions::models::BiconnectivityAugmentation;
2121
use problemreductions::prelude::*;
@@ -94,6 +94,15 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
9494
&& args.requirements.is_none()
9595
&& args.num_workers.is_none()
9696
&& args.alphabet_size.is_none()
97+
&& args.source_string.is_none()
98+
&& args.target_string.is_none()
99+
&& args.capacities.is_none()
100+
&& args.source_1.is_none()
101+
&& args.sink_1.is_none()
102+
&& args.source_2.is_none()
103+
&& args.sink_2.is_none()
104+
&& args.requirement_1.is_none()
105+
&& args.requirement_2.is_none()
97106
}
98107

99108
fn emit_problem_output(output: &ProblemJsonOutput, out: &OutputConfig) -> Result<()> {
@@ -321,6 +330,9 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str {
321330
}
322331
"SetBasis" => "--universe 4 --sets \"0,1;1,2;0,2;0,1,2\" --k 3",
323332
"ShortestCommonSupersequence" => "--strings \"0,1,2;1,2,0\" --bound 4",
333+
"StringToStringCorrection" => {
334+
"--source-string \"0,1,2,3,1,0\" --target-string \"0,1,3,2,1\" --bound 2"
335+
}
324336
_ => "",
325337
}
326338
}
@@ -408,12 +420,7 @@ fn print_problem_help(canonical: &str, graph_type: Option<&str>) -> Result<()> {
408420
);
409421
} else {
410422
let hint = help_flag_hint(canonical, &field.name, &field.type_name, graph_type);
411-
eprintln!(
412-
" --{:<16} {} ({})",
413-
help_flag_name(canonical, &field.name),
414-
field.description,
415-
hint
416-
);
423+
eprintln!(" --{:<16} {} ({})", flag_name, field.description, hint);
417424
}
418425
}
419426
} else {
@@ -450,10 +457,15 @@ fn problem_help_flag_name(
450457
if canonical == "LengthBoundedDisjointPaths" && field_name == "max_length" {
451458
return "bound".to_string();
452459
}
453-
if canonical == "StaffScheduling" && field_name == "shifts_per_schedule" {
454-
return "k".to_string();
460+
if canonical == "StringToStringCorrection" {
461+
return match field_name {
462+
"source" => "source-string".to_string(),
463+
"target" => "target-string".to_string(),
464+
"bound" => "bound".to_string(),
465+
_ => help_flag_name(canonical, field_name),
466+
};
455467
}
456-
field_name.replace('_', "-")
468+
help_flag_name(canonical, field_name)
457469
}
458470

459471
fn lbdp_validation_error(message: &str, usage: Option<&str>) -> anyhow::Error {
@@ -1779,6 +1791,58 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
17791791
resolved_variant.clone(),
17801792
)
17811793
}
1794+
1795+
// StringToStringCorrection
1796+
"StringToStringCorrection" => {
1797+
let usage = "Usage: pred create StringToStringCorrection --source-string \"0,1,2,3,1,0\" --target-string \"0,1,3,2,1\" --bound 2";
1798+
let source_str = args.source_string.as_deref().ok_or_else(|| {
1799+
anyhow::anyhow!("StringToStringCorrection requires --source-string\n\n{usage}")
1800+
})?;
1801+
let target_str = args.target_string.as_deref().ok_or_else(|| {
1802+
anyhow::anyhow!("StringToStringCorrection requires --target-string\n\n{usage}")
1803+
})?;
1804+
let bound = parse_nonnegative_usize_bound(
1805+
args.bound.ok_or_else(|| {
1806+
anyhow::anyhow!("StringToStringCorrection requires --bound\n\n{usage}")
1807+
})?,
1808+
"StringToStringCorrection",
1809+
usage,
1810+
)?;
1811+
let parse_symbols = |s: &str| -> Result<Vec<usize>> {
1812+
if s.trim().is_empty() {
1813+
return Ok(Vec::new());
1814+
}
1815+
s.split(',')
1816+
.map(|v| v.trim().parse::<usize>().context("invalid symbol index"))
1817+
.collect()
1818+
};
1819+
let source = parse_symbols(source_str)?;
1820+
let target = parse_symbols(target_str)?;
1821+
let inferred = source
1822+
.iter()
1823+
.chain(target.iter())
1824+
.copied()
1825+
.max()
1826+
.map_or(0, |m| m + 1);
1827+
let alphabet_size = args.alphabet_size.unwrap_or(inferred);
1828+
if alphabet_size < inferred {
1829+
anyhow::bail!(
1830+
"--alphabet-size {} is smaller than max symbol + 1 ({}) in the strings",
1831+
alphabet_size,
1832+
inferred
1833+
);
1834+
}
1835+
(
1836+
ser(StringToStringCorrection::new(
1837+
alphabet_size,
1838+
source,
1839+
target,
1840+
bound,
1841+
))?,
1842+
resolved_variant.clone(),
1843+
)
1844+
}
1845+
17821846
_ => bail!("{}", crate::problem_name::unknown_problem_error(canonical)),
17831847
};
17841848

@@ -2987,6 +3051,22 @@ mod tests {
29873051
);
29883052
}
29893053

3054+
#[test]
3055+
fn test_problem_help_uses_string_to_string_correction_cli_flags() {
3056+
assert_eq!(
3057+
problem_help_flag_name("StringToStringCorrection", "source", "Vec<usize>", false),
3058+
"source-string"
3059+
);
3060+
assert_eq!(
3061+
problem_help_flag_name("StringToStringCorrection", "target", "Vec<usize>", false),
3062+
"target-string"
3063+
);
3064+
assert_eq!(
3065+
problem_help_flag_name("StringToStringCorrection", "bound", "usize", false),
3066+
"bound"
3067+
);
3068+
}
3069+
29903070
#[test]
29913071
fn test_problem_help_uses_k_for_staff_scheduling() {
29923072
assert_eq!(
@@ -3160,6 +3240,8 @@ mod tests {
31603240
deadline: None,
31613241
num_processors: None,
31623242
alphabet_size: None,
3243+
source_string: None,
3244+
target_string: None,
31633245
schedules: None,
31643246
requirements: None,
31653247
num_workers: None,

0 commit comments

Comments
 (0)