Skip to content

Commit 837bc1e

Browse files
committed
Merge branch 'main' of github.com:CodingThrust/problem-reductions
2 parents 0a51e51 + 1a923f0 commit 837bc1e

15 files changed

Lines changed: 951 additions & 12 deletions

File tree

docs/paper/reductions.typ

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,11 @@
5959
"SubsetSum": [Subset Sum],
6060
"MinimumFeedbackArcSet": [Minimum Feedback Arc Set],
6161
"MinimumFeedbackVertexSet": [Minimum Feedback Vertex Set],
62+
"ShortestCommonSupersequence": [Shortest Common Supersequence],
6263
"MinimumSumMulticenter": [Minimum Sum Multicenter],
6364
"SubgraphIsomorphism": [Subgraph Isomorphism],
6465
"SubsetSum": [Subset Sum],
66+
"FlowShopScheduling": [Flow Shop Scheduling],
6567
)
6668

6769
// Definition label: "def:<ProblemName>" — each definition block must have a matching label
@@ -1038,6 +1040,66 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa
10381040
*Example.* Let $A = {3, 7, 1, 8, 2, 4}$ ($n = 6$) and target $B = 11$. Selecting $A' = {3, 8}$ gives sum $3 + 8 = 11 = B$. Another solution: $A' = {7, 4}$ with sum $7 + 4 = 11 = B$.
10391041
]
10401042

1043+
#problem-def("ShortestCommonSupersequence")[
1044+
Given a finite alphabet $Sigma$, a set $R = {r_1, dots, r_m}$ of strings over $Sigma^*$, and a positive integer $K$, determine whether there exists a string $w in Sigma^*$ with $|w| lt.eq K$ such that every string $r_i in R$ is a _subsequence_ of $w$: there exist indices $1 lt.eq j_1 < j_2 < dots < j_(|r_i|) lt.eq |w|$ with $w[j_k] = r_i [k]$ for all $k$.
1045+
][
1046+
A classic NP-complete string problem, listed as problem SR8 in Garey and Johnson @garey1979. #cite(<maier1978>, form: "prose") proved NP-completeness; #cite(<raiha1981>, form: "prose") showed the problem remains NP-complete even over a binary alphabet ($|Sigma| = 2$). Note that _subsequence_ (characters may be non-contiguous) differs from _substring_ (contiguous block): the Shortest Common Supersequence asks that each input string can be embedded into $w$ by selecting characters in order but not necessarily adjacently.
1047+
1048+
For $|R| = 2$ strings, the problem is solvable in polynomial time via the duality with the Longest Common Subsequence (LCS): if $"LCS"(r_1, r_2)$ has length $ell$, then the shortest common supersequence has length $|r_1| + |r_2| - ell$, computable in $O(|r_1| dot |r_2|)$ time by dynamic programming. For general $|R| = m$, the brute-force search over all strings of length at most $K$ takes $O(|Sigma|^K)$ time. Applications include bioinformatics (reconstructing ancestral sequences from fragments), data compression (representing multiple strings compactly), and scheduling (merging instruction sequences).
1049+
1050+
*Example.* Let $Sigma = {a, b, c}$ and $R = {"abc", "bac"}$. We seek the shortest string $w$ containing both $"abc"$ and $"bac"$ as subsequences.
1051+
1052+
#figure({
1053+
let w = ("b", "a", "b", "c")
1054+
let r1 = ("a", "b", "c") // "abc"
1055+
let r2 = ("b", "a", "c") // "bac"
1056+
let embed1 = (1, 2, 3) // positions of a, b, c in w (0-indexed)
1057+
let embed2 = (0, 1, 3) // positions of b, a, c in w (0-indexed)
1058+
let blue = graph-colors.at(0)
1059+
let teal = rgb("#76b7b2")
1060+
let red = graph-colors.at(1)
1061+
align(center, stack(dir: ttb, spacing: 0.6cm,
1062+
// Row 1: the supersequence w
1063+
stack(dir: ltr, spacing: 0pt,
1064+
box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt)[$w =$])),
1065+
..w.enumerate().map(((i, ch)) => {
1066+
let is1 = embed1.contains(i)
1067+
let is2 = embed2.contains(i)
1068+
let fill = if is1 and is2 { blue.transparentize(60%) } else if is1 { blue.transparentize(80%) } else if is2 { teal.transparentize(80%) } else { white }
1069+
box(width: 0.55cm, height: 0.55cm, fill: fill, stroke: 0.5pt + luma(120),
1070+
align(center + horizon, text(9pt, weight: "bold", ch)))
1071+
}),
1072+
),
1073+
// Row 2: embedding of r1
1074+
stack(dir: ltr, spacing: 0pt,
1075+
box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt, fill: blue)[$r_1 =$])),
1076+
..range(w.len()).map(i => {
1077+
let idx = embed1.position(j => j == i)
1078+
let ch = if idx != none { r1.at(idx) } else { sym.dot.c }
1079+
let col = if idx != none { blue } else { luma(200) }
1080+
box(width: 0.55cm, height: 0.55cm,
1081+
align(center + horizon, text(9pt, fill: col, weight: if idx != none { "bold" } else { "regular" }, ch)))
1082+
}),
1083+
),
1084+
// Row 3: embedding of r2
1085+
stack(dir: ltr, spacing: 0pt,
1086+
box(width: 1.2cm, height: 0.5cm, align(center + horizon, text(8pt, fill: teal)[$r_2 =$])),
1087+
..range(w.len()).map(i => {
1088+
let idx = embed2.position(j => j == i)
1089+
let ch = if idx != none { r2.at(idx) } else { sym.dot.c }
1090+
let col = if idx != none { teal } else { luma(200) }
1091+
box(width: 0.55cm, height: 0.55cm,
1092+
align(center + horizon, text(9pt, fill: col, weight: if idx != none { "bold" } else { "regular" }, ch)))
1093+
}),
1094+
),
1095+
))
1096+
},
1097+
caption: [Shortest Common Supersequence: $w = "babc"$ (length 4) contains $r_1 = "abc"$ (blue, positions 1,2,3) and $r_2 = "bac"$ (teal, positions 0,1,3) as subsequences. Dots mark unused positions in each embedding.],
1098+
) <fig:scs>
1099+
1100+
The supersequence $w = "babc"$ has length 4 and contains both input strings as subsequences. This is optimal because $"LCS"("abc", "bac") = "ac"$ (length 2), so the shortest common supersequence has length $3 + 3 - 2 = 4$.
1101+
]
1102+
10411103
#problem-def("MinimumFeedbackArcSet")[
10421104
Given a directed graph $G = (V, A)$, find a minimum-size subset $A' subset.eq A$ such that $G - A'$ is a directed acyclic graph (DAG). Equivalently, $A'$ must contain at least one arc from every directed cycle in $G$.
10431105
][
@@ -1046,6 +1108,77 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa
10461108
*Example.* Consider $G$ with $V = {0, 1, 2, 3, 4, 5}$ and arcs $(0 arrow 1), (1 arrow 2), (2 arrow 0), (1 arrow 3), (3 arrow 4), (4 arrow 1), (2 arrow 5), (5 arrow 3), (3 arrow 0)$. This graph contains four directed cycles: $0 arrow 1 arrow 2 arrow 0$, $1 arrow 3 arrow 4 arrow 1$, $0 arrow 1 arrow 3 arrow 0$, and $2 arrow 5 arrow 3 arrow 0 arrow 1 arrow 2$. Removing $A' = {(0 arrow 1), (3 arrow 4)}$ breaks all four cycles (vertex 0 becomes a sink in the residual graph), giving a minimum FAS of size 2.
10471109
]
10481110

1111+
#problem-def("FlowShopScheduling")[
1112+
Given $m$ processors and a set $J$ of $n$ jobs, where each job $j in J$ consists of $m$ tasks $t_1 [j], t_2 [j], dots, t_m [j]$ with lengths $ell(t_i [j]) in ZZ^+_0$, and a deadline $D in ZZ^+$, determine whether there exists a permutation schedule $pi$ of the jobs such that all jobs complete by time $D$. Each job must be processed on machines $1, 2, dots, m$ in order, and job $j$ cannot start on machine $i+1$ until its task on machine $i$ is completed.
1113+
][
1114+
Flow Shop Scheduling is a classical NP-complete problem from Garey & Johnson (A5 SS15), strongly NP-hard for $m >= 3$ @garey1976. For $m = 2$, it is solvable in $O(n log n)$ by Johnson's rule @johnson1954. The problem is fundamental in operations research, manufacturing planning, and VLSI design. When restricted to permutation schedules (same job order on all machines), the search space is $n!$ orderings. The best known exact algorithm for $m = 3$ runs in $O^*(3^n)$ time @shang2018; for general $m$, brute-force over $n!$ permutations gives $O(n! dot m n)$.
1115+
1116+
*Example.* Let $m = 3$ machines, $n = 5$ jobs with task lengths:
1117+
$ ell = mat(
1118+
3, 4, 2;
1119+
2, 3, 5;
1120+
4, 1, 3;
1121+
1, 5, 4;
1122+
3, 2, 3;
1123+
) $
1124+
and deadline $D = 25$. The job order $pi = (j_4, j_1, j_5, j_3, j_2)$ (0-indexed: $3, 0, 4, 2, 1$) yields makespan $23 <= 25$, so a feasible schedule exists.
1125+
1126+
#figure(
1127+
canvas(length: 1cm, {
1128+
import draw: *
1129+
// Gantt chart for job order [3, 0, 4, 2, 1] on 3 machines
1130+
// Schedule computed greedily:
1131+
// M1: j3[0,1], j0[1,4], j4[4,7], j2[7,11], j1[11,13]
1132+
// M2: j3[1,6], j0[6,10], j4[10,12], j2[12,13], j1[13,16]
1133+
// M3: j3[6,10], j0[10,12], j4[12,15], j2[15,18], j1[18,23]
1134+
let colors = (rgb("#4e79a7"), rgb("#e15759"), rgb("#76b7b2"), rgb("#f28e2b"), rgb("#59a14f"))
1135+
let job-names = ("$j_1$", "$j_2$", "$j_3$", "$j_4$", "$j_5$")
1136+
let scale = 0.38
1137+
let row-h = 0.6
1138+
let gap = 0.15
1139+
1140+
// Machine labels
1141+
for (mi, label) in ("M1", "M2", "M3").enumerate() {
1142+
let y = -mi * (row-h + gap)
1143+
content((-0.8, y), text(8pt, label))
1144+
}
1145+
1146+
// Draw schedule blocks: (machine, job-index, start, end)
1147+
let blocks = (
1148+
(0, 3, 0, 1), (0, 0, 1, 4), (0, 4, 4, 7), (0, 2, 7, 11), (0, 1, 11, 13),
1149+
(1, 3, 1, 6), (1, 0, 6, 10), (1, 4, 10, 12), (1, 2, 12, 13), (1, 1, 13, 16),
1150+
(2, 3, 6, 10), (2, 0, 10, 12), (2, 4, 12, 15), (2, 2, 15, 18), (2, 1, 18, 23),
1151+
)
1152+
1153+
for (mi, ji, s, e) in blocks {
1154+
let x0 = s * scale
1155+
let x1 = e * scale
1156+
let y = -mi * (row-h + gap)
1157+
rect((x0, y - row-h / 2), (x1, y + row-h / 2),
1158+
fill: colors.at(ji).transparentize(30%), stroke: 0.4pt + colors.at(ji))
1159+
content(((x0 + x1) / 2, y), text(6pt, job-names.at(ji)))
1160+
}
1161+
1162+
// Time axis
1163+
let max-t = 23
1164+
let y-axis = -2 * (row-h + gap) - row-h / 2 - 0.2
1165+
line((0, y-axis), (max-t * scale, y-axis), stroke: 0.4pt)
1166+
for t in (0, 5, 10, 15, 20, 23) {
1167+
let x = t * scale
1168+
line((x, y-axis), (x, y-axis - 0.1), stroke: 0.4pt)
1169+
content((x, y-axis - 0.25), text(6pt, str(t)))
1170+
}
1171+
content((max-t * scale / 2, y-axis - 0.5), text(7pt)[$t$])
1172+
1173+
// Deadline marker
1174+
let dl-x = 25 * scale
1175+
line((dl-x, row-h / 2 + 0.1), (dl-x, y-axis), stroke: (paint: red, thickness: 0.8pt, dash: "dashed"))
1176+
content((dl-x, row-h / 2 + 0.25), text(6pt, fill: red)[$D = 25$])
1177+
}),
1178+
caption: [Flow shop schedule for 5 jobs on 3 machines. Job order $(j_4, j_1, j_5, j_3, j_2)$ achieves makespan 23, within deadline $D = 25$ (dashed red line).],
1179+
) <fig:flowshop>
1180+
]
1181+
10491182
// Completeness check: warn about problem types in JSON but missing from paper
10501183
#{
10511184
let json-models = {

docs/paper/references.bib

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,17 @@ @article{cygan2014
489489
doi = {10.1137/140990255}
490490
}
491491

492+
@article{raiha1981,
493+
author = {Kari-Jouko R{\"a}ih{\"a} and Esko Ukkonen},
494+
title = {The Shortest Common Supersequence Problem over Binary Alphabet is {NP}-Complete},
495+
journal = {Theoretical Computer Science},
496+
volume = {16},
497+
number = {2},
498+
pages = {187--198},
499+
year = {1981},
500+
doi = {10.1016/0304-3975(81)90075-X}
501+
}
502+
492503
@article{bodlaender2012,
493504
author = {Hans L. Bodlaender and Fedor V. Fomin and Arie M. C. A. Koster and Dieter Kratsch and Dimitrios M. Thilikos},
494505
title = {A Note on Exact Algorithms for Vertex Ordering Problems on Graphs},

docs/src/reductions/problem_schemas.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,27 @@
488488
}
489489
]
490490
},
491+
{
492+
"name": "ShortestCommonSupersequence",
493+
"description": "Find a common supersequence of bounded length for a set of strings",
494+
"fields": [
495+
{
496+
"name": "alphabet_size",
497+
"type_name": "usize",
498+
"description": "Size of the alphabet"
499+
},
500+
{
501+
"name": "strings",
502+
"type_name": "Vec<Vec<usize>>",
503+
"description": "Input strings over the alphabet {0, ..., alphabet_size-1}"
504+
},
505+
{
506+
"name": "bound",
507+
"type_name": "usize",
508+
"description": "Bound on supersequence length (configuration has exactly this many symbols)"
509+
}
510+
]
511+
},
491512
{
492513
"name": "SpinGlass",
493514
"description": "Minimize Ising Hamiltonian on a graph",

problemreductions-cli/src/cli.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ Flags by problem type:
224224
LCS --strings
225225
FAS --arcs [--weights] [--num-vertices]
226226
FVS --arcs [--weights] [--num-vertices]
227+
FlowShopScheduling --task-lengths, --deadline [--num-processors]
228+
SCS --strings, --bound [--alphabet-size]
227229
ILP, CircuitSAT (via reduction only)
228230
229231
Geometry graph variants (use slash notation, e.g., MIS/KingsSubgraph):
@@ -338,18 +340,30 @@ pub struct CreateArgs {
338340
/// Required edge indices for RuralPostman (comma-separated, e.g., "0,2,4")
339341
#[arg(long)]
340342
pub required_edges: Option<String>,
341-
/// Upper bound B for RuralPostman
343+
/// Upper bound (for RuralPostman or SCS)
342344
#[arg(long)]
343-
pub bound: Option<i32>,
345+
pub bound: Option<i64>,
344346
/// Pattern graph edge list for SubgraphIsomorphism (e.g., 0-1,1-2,2-0)
345347
#[arg(long)]
346348
pub pattern: Option<String>,
347-
/// Input strings for LCS (semicolon-separated, e.g., "ABAC;BACA")
349+
/// Input strings for LCS (e.g., "ABAC;BACA") or SCS (e.g., "0,1,2;1,2,0")
348350
#[arg(long)]
349351
pub strings: Option<String>,
350352
/// Directed arcs for directed graph problems (e.g., 0>1,1>2,2>0)
351353
#[arg(long)]
352354
pub arcs: Option<String>,
355+
/// Task lengths for FlowShopScheduling (semicolon-separated rows: "3,4,2;2,3,5;4,1,3")
356+
#[arg(long)]
357+
pub task_lengths: Option<String>,
358+
/// Deadline for FlowShopScheduling
359+
#[arg(long)]
360+
pub deadline: Option<u64>,
361+
/// Number of processors/machines for FlowShopScheduling
362+
#[arg(long)]
363+
pub num_processors: Option<usize>,
364+
/// Alphabet size for SCS (optional; inferred from max symbol + 1 if omitted)
365+
#[arg(long)]
366+
pub alphabet_size: Option<usize>,
353367
}
354368

355369
#[derive(clap::Args)]

problemreductions-cli/src/commands/create.rs

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ use crate::util;
66
use anyhow::{bail, Context, Result};
77
use problemreductions::models::algebraic::{ClosestVectorProblem, BMF};
88
use problemreductions::models::graph::{GraphPartitioning, HamiltonianPath};
9-
use problemreductions::models::misc::{BinPacking, LongestCommonSubsequence, PaintShop, SubsetSum};
9+
use problemreductions::models::misc::{
10+
BinPacking, FlowShopScheduling, LongestCommonSubsequence, PaintShop,
11+
ShortestCommonSupersequence, SubsetSum,
12+
};
1013
use problemreductions::prelude::*;
1114
use problemreductions::registry::collect_schemas;
1215
use problemreductions::topology::{
@@ -52,6 +55,10 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
5255
&& args.pattern.is_none()
5356
&& args.strings.is_none()
5457
&& args.arcs.is_none()
58+
&& args.task_lengths.is_none()
59+
&& args.deadline.is_none()
60+
&& args.num_processors.is_none()
61+
&& args.alphabet_size.is_none()
5562
}
5663

5764
fn type_format_hint(type_name: &str, graph_type: Option<&str>) -> &'static str {
@@ -103,6 +110,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str {
103110
}
104111
"SubgraphIsomorphism" => "--graph 0-1,1-2,2-0 --pattern 0-1",
105112
"SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11",
113+
"ShortestCommonSupersequence" => "--strings \"0,1,2;1,2,0\" --bound 4",
106114
_ => "",
107115
}
108116
}
@@ -280,7 +288,7 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
280288
"RuralPostman requires --bound\n\n\
281289
Usage: pred create RuralPostman --graph 0-1,1-2,2-3 --edge-weights 1,1,1 --required-edges 0,2 --bound 6"
282290
)
283-
})?;
291+
})? as i32;
284292
(
285293
ser(RuralPostman::new(
286294
graph,
@@ -563,6 +571,49 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
563571
)
564572
}
565573

574+
// FlowShopScheduling
575+
"FlowShopScheduling" => {
576+
let task_str = args.task_lengths.as_deref().ok_or_else(|| {
577+
anyhow::anyhow!(
578+
"FlowShopScheduling requires --task-lengths and --deadline\n\n\
579+
Usage: pred create FlowShopScheduling --task-lengths \"3,4,2;2,3,5;4,1,3\" --deadline 25 --num-processors 3"
580+
)
581+
})?;
582+
let deadline = args.deadline.ok_or_else(|| {
583+
anyhow::anyhow!(
584+
"FlowShopScheduling requires --deadline\n\n\
585+
Usage: pred create FlowShopScheduling --task-lengths \"3,4,2;2,3,5;4,1,3\" --deadline 25 --num-processors 3"
586+
)
587+
})?;
588+
let task_lengths: Vec<Vec<u64>> = task_str
589+
.split(';')
590+
.map(|row| util::parse_comma_list(row.trim()))
591+
.collect::<Result<Vec<_>>>()?;
592+
let num_processors = if let Some(np) = args.num_processors {
593+
np
594+
} else if let Some(m) = args.m {
595+
m
596+
} else if let Some(first) = task_lengths.first() {
597+
first.len()
598+
} else {
599+
bail!("Cannot infer num_processors from empty task list; use --num-processors");
600+
};
601+
for (j, row) in task_lengths.iter().enumerate() {
602+
if row.len() != num_processors {
603+
bail!(
604+
"task_lengths row {} has {} entries, expected {} (num_processors)",
605+
j,
606+
row.len(),
607+
num_processors
608+
);
609+
}
610+
}
611+
(
612+
ser(FlowShopScheduling::new(num_processors, task_lengths, deadline))?,
613+
resolved_variant.clone(),
614+
)
615+
}
616+
566617
// MinimumFeedbackArcSet
567618
"MinimumFeedbackArcSet" => {
568619
let arcs_str = args.arcs.as_deref().ok_or_else(|| {
@@ -667,6 +718,57 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
667718
)
668719
}
669720

721+
// ShortestCommonSupersequence
722+
"ShortestCommonSupersequence" => {
723+
let usage = "Usage: pred create SCS --strings \"0,1,2;1,2,0\" --bound 4";
724+
let strings_str = args.strings.as_deref().ok_or_else(|| {
725+
anyhow::anyhow!("ShortestCommonSupersequence requires --strings\n\n{usage}")
726+
})?;
727+
let bound = args.bound.ok_or_else(|| {
728+
anyhow::anyhow!("ShortestCommonSupersequence requires --bound\n\n{usage}")
729+
})? as usize;
730+
let strings: Vec<Vec<usize>> = strings_str
731+
.split(';')
732+
.map(|s| {
733+
let trimmed = s.trim();
734+
if trimmed.is_empty() {
735+
return Ok(Vec::new());
736+
}
737+
trimmed
738+
.split(',')
739+
.map(|v| {
740+
v.trim()
741+
.parse::<usize>()
742+
.map_err(|e| anyhow::anyhow!("Invalid alphabet index: {}", e))
743+
})
744+
.collect::<Result<Vec<_>>>()
745+
})
746+
.collect::<Result<Vec<_>>>()?;
747+
let inferred = strings
748+
.iter()
749+
.flat_map(|s| s.iter())
750+
.copied()
751+
.max()
752+
.map(|m| m + 1)
753+
.unwrap_or(0);
754+
let alphabet_size = args.alphabet_size.unwrap_or(inferred);
755+
if alphabet_size < inferred {
756+
anyhow::bail!(
757+
"--alphabet-size {} is smaller than the largest symbol + 1 ({}) in the strings",
758+
alphabet_size,
759+
inferred
760+
);
761+
}
762+
(
763+
ser(ShortestCommonSupersequence::new(
764+
alphabet_size,
765+
strings,
766+
bound,
767+
))?,
768+
resolved_variant.clone(),
769+
)
770+
}
771+
670772
// MinimumFeedbackVertexSet
671773
"MinimumFeedbackVertexSet" => {
672774
let arcs_str = args.arcs.as_deref().ok_or_else(|| {

0 commit comments

Comments
 (0)