Skip to content

Commit d9625ee

Browse files
committed
paper(v2): fix overfull hboxes — emergencystretch + resizebox on wide tables
1 parent 1625255 commit d9625ee

2 files changed

Lines changed: 35 additions & 16 deletions

File tree

15.3 KB
Binary file not shown.

docs/Context-Selection-for-Git-Diff/v2/main.tex

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
\usepackage{algorithm}
1212
\usepackage{algpseudocode}
1313
\usepackage[margin=1in]{geometry}
14+
% Allow TeX to stretch interword space in emergencies to avoid overfull hboxes
15+
% caused by long unbreakable \texttt{} or compound identifiers.
16+
\setlength{\emergencystretch}{5em}
17+
% Permit \texttt{} and \url to break at common code separators (./_\-).
18+
\hyphenpenalty=200
19+
\exhyphenpenalty=200
20+
\sloppy
1421

1522
\title{diffctx: Budgeted Typed-Graph Retrieval for Diff-Aware Code Context Selection}
1623

@@ -121,17 +128,18 @@ \subsection{Inputs and Definitions}
121128
\small
122129
\caption{Algorithm $\times$ constraint $\times$ guarantee map. The deployed default is a heuristic; analyzable variants of the framework admit the listed guarantees on their respective surrogate problems, and a submodular concept-coverage extension is described in Section~\ref{sec:utility} but is not the deployed default.}
123130
\label{tab:algo-constraint-guarantee}
124-
\begin{tabular}{p{3.4cm}p{3.0cm}p{2.6cm}p{2.4cm}p{2.6cm}}
131+
\resizebox{\textwidth}{!}{%
132+
\begin{tabular}{p{3.6cm}p{3.0cm}p{2.6cm}p{2.6cm}p{3.6cm}}
125133
\toprule
126134
\textbf{Variant} & \textbf{Objective} & \textbf{Constraint} & \textbf{Algorithm} & \textbf{Claim} \\
127135
\midrule
128-
Deployed default (this paper) & modular relevance + adaptive stopping + rescue phase & budget $\cap$ partition matroid & lazy density-greedy with heuristics & empirical only \\
129-
Modular cost-blind variant & modular & partition matroid only & best representative per class & exact (Edmonds) \\
130-
Modular + knapsack with $\arg\max$-singleton modification & modular & knapsack & modified density-greedy & $\frac{1}{2}(1-1/e)$ (degenerates from~\cite{khuller1999budgeted}) \\
131-
Submodular extension (concept coverage, Section~\ref{sec:utility}) & monotone submodular & knapsack & modified density-greedy & $\frac{1}{2}(1-1/e)$~\cite{khuller1999budgeted,sviridenko2004note} \\
132-
Submodular extension under matroid only & monotone submodular & partition matroid & greedy / continuous greedy & $1/2$ greedy; $(1-1/e)$ continuous~\cite{nemhauser1978analysis,calinescu2011maximizing} \\
136+
Deployed default (this paper) & modular relevance + adaptive stopping + rescue & budget $\cap$ partition matroid & lazy density-greedy with heuristics & empirical only \\
137+
Modular cost-blind variant & modular & partition matroid & best representative per class & exact (Edmonds) \\
138+
Modular + knapsack, $\arg\max$-singleton modification & modular & knapsack & modified density-greedy & $\frac{1}{2}(1-1/e)$ (from~\cite{khuller1999budgeted}) \\
139+
Submodular extension (Section~\ref{sec:utility}) & monotone submodular & knapsack & modified density-greedy & $\frac{1}{2}(1-1/e)$~\cite{khuller1999budgeted,sviridenko2004note} \\
140+
Submodular extension, matroid only & monotone submodular & partition matroid & greedy; continuous greedy & $1/2$ greedy; $(1-1/e)$ continuous~\cite{nemhauser1978analysis,calinescu2011maximizing} \\
133141
\bottomrule
134-
\end{tabular}
142+
\end{tabular}}
135143
\end{table}
136144

137145
\paragraph{Objective.} The objective takes the same algebraic form in both modes but with different sources for the per-fragment weight $w(f, \Delta) \geq 0$:
@@ -501,9 +509,11 @@ \subsection{Interim Results: Hybrid Mode}
501509

502510
\begin{table}[h]
503511
\centering
512+
\small
504513
\caption{Per-benchmark file-level metrics, scoring=hybrid, $B{=}8000$ tokens, with 95\% percentile bootstrap CIs ($B{=}10{,}000$ resamples, seed=42). Status \texttt{ok} excludes \texttt{clone\_fail} (4 Java instances on ContextBench Verified) and pending instances. SWE-bench Verified row is a placeholder pending completion of the in-flight run; the full table will be re-emitted when $n=1500$.}
505514
\label{tab:prelim-bench}
506-
\begin{tabular}{lrll r}
515+
\resizebox{\textwidth}{!}{%
516+
\begin{tabular}{lrllr}
507517
\toprule
508518
\textbf{Test set} & \textbf{n} & \textbf{File recall} & \textbf{File precision} & \textbf{ok\%} \\
509519
\midrule
@@ -513,13 +523,15 @@ \subsection{Interim Results: Hybrid Mode}
513523
\midrule
514524
Pooled (interim) & 845 & 0.855 [0.837, 0.873] & 0.122 [0.114, 0.131] & 99.5\% \\
515525
\bottomrule
516-
\end{tabular}
526+
\end{tabular}}
517527
\end{table}
518528

519529
\begin{table}[h]
520530
\centering
531+
\small
521532
\caption{Per-language interim metrics with 95\% percentile bootstrap CIs ($B{=}10{,}000$). Pooled across the two completed test sets, status=ok only.}
522533
\label{tab:prelim-lang}
534+
\resizebox{\textwidth}{!}{%
523535
\begin{tabular}{lrll}
524536
\toprule
525537
\textbf{Language} & \textbf{n} & \textbf{File recall} & \textbf{File precision} \\
@@ -533,7 +545,7 @@ \subsection{Interim Results: Hybrid Mode}
533545
Rust & 20 & 0.899 [0.831, 0.959] & 0.201 [0.140, 0.270] \\
534546
C++ & 10 & 0.702 [0.583, 0.835] & 0.257 [0.115, 0.447] \\
535547
\bottomrule
536-
\end{tabular}
548+
\end{tabular}}
537549
\end{table}
538550

539551
\subsection{Observations}
@@ -555,8 +567,10 @@ \subsection{Baseline Comparisons}
555567

556568
\begin{table}[h]
557569
\centering
570+
\small
558571
\caption{diffctx (hybrid) vs.\ external baselines at $B{=}8000$ tokens. Δ is the per-instance paired delta in file recall (positive favors diffctx). 95\% paired-bootstrap percentile CI on Δ; $p$-value from Wilcoxon signed-rank. The \emph{Aider (oracle)} row is an upper-bound stress test, not a comparison baseline. \emph{Placeholder: cells to be filled when baseline runs complete.}}
559572
\label{tab:prelim-baselines}
573+
\resizebox{\textwidth}{!}{%
560574
\begin{tabular}{lrlllc}
561575
\toprule
562576
\textbf{Test set} & \textbf{n} & \textbf{diffctx} & \textbf{baseline} & \textbf{Δ recall [95\% CI]} & \textbf{Wilcoxon $p$} \\
@@ -582,7 +596,7 @@ \subsection{Baseline Comparisons}
582596
SWE-bench Verified & \emph{500} & \emph{TBD} & \emph{TBD} & \emph{TBD} & \emph{TBD} \\
583597
\textbf{Pooled} & \emph{1500} & \emph{TBD} & \emph{TBD} & \emph{TBD} & \emph{TBD} \\
584598
\bottomrule
585-
\end{tabular}
599+
\end{tabular}}
586600
\end{table}
587601

588602
\subsection{Scoring-Mode Ablation}
@@ -594,8 +608,10 @@ \subsection{Scoring-Mode Ablation}
594608

595609
\begin{table}[h]
596610
\centering
611+
\small
597612
\caption{Scoring-mode ablation at $B{=}8000$, hybrid-optimal operational hyperparameters. File recall with 95\% percentile bootstrap CIs. \emph{Placeholder: cells to be filled once each non-hybrid mode completes on the ablation subset.}}
598613
\label{tab:prelim-ablation}
614+
\resizebox{\textwidth}{!}{%
599615
\begin{tabular}{lrllll}
600616
\toprule
601617
\textbf{Test set} & \textbf{n} & \textbf{Hybrid} & \textbf{PPR} & \textbf{EGO} & \textbf{BM25 (internal)} \\
@@ -605,7 +621,7 @@ \subsection{Scoring-Mode Ablation}
605621
SWE-bench Verified & \emph{TBD} & \emph{TBD} & \emph{TBD} & \emph{TBD} & \emph{TBD} \\
606622
\textbf{Pooled} & \emph{TBD} & \emph{TBD} & \emph{TBD} & \emph{TBD} & \emph{TBD} \\
607623
\bottomrule
608-
\end{tabular}
624+
\end{tabular}}
609625
\end{table}
610626

611627
\subsection{Budget Curve}
@@ -615,8 +631,10 @@ \subsection{Budget Curve}
615631

616632
\begin{table}[h]
617633
\centering
634+
\small
618635
\caption{Budget curve: pooled file recall under hybrid mode at three budgets, on the full 1500-instance test set. \emph{Placeholder: $B{=}16{,}000$ and $B{=}32{,}000$ runs queued.}}
619636
\label{tab:prelim-budget}
637+
\resizebox{\textwidth}{!}{%
620638
\begin{tabular}{lrlll}
621639
\toprule
622640
\textbf{Budget $B$} & \textbf{n} & \textbf{Mean recall [95\% CI]} & \textbf{Mean used tokens} & \textbf{Recall / used token (k)} \\
@@ -625,7 +643,7 @@ \subsection{Budget Curve}
625643
$16{,}000$ & \emph{TBD} & \emph{TBD} & \emph{TBD} & \emph{TBD} \\
626644
$32{,}000$ & \emph{TBD} & \emph{TBD} & \emph{TBD} & \emph{TBD} \\
627645
\bottomrule
628-
\end{tabular}
646+
\end{tabular}}
629647
\end{table}
630648

631649
$^{\dagger}$ Mean used tokens is presently zero in v1 result rows due to a key-mapping bug in \texttt{benchmarks/diffctx\_eval\_fn.py} that reads a key not emitted by the pipeline. Recall and precision are unaffected. The bug is documented in our project tracker and fixed runs are scheduled before the budget curve is finalized; once \texttt{used\_tokens} reflects the actual encoder count, recall-per-used-token (rather than recall-per-nominal-budget) becomes the reportable efficiency metric.
@@ -782,8 +800,9 @@ \section{Symbol-to-Code Map}
782800

783801
\begin{table}[h]
784802
\centering
785-
\small
786-
\begin{tabular}{llp{4.5cm}l}
803+
\scriptsize
804+
\resizebox{\textwidth}{!}{%
805+
\begin{tabular}{llp{5cm}l}
787806
\toprule
788807
\textbf{Paper symbol} & \textbf{Meaning} & \textbf{Code identifier} & \textbf{Source} \\
789808
\midrule
@@ -811,7 +830,7 @@ \section{Symbol-to-Code Map}
811830
Selection & Lazy greedy / Boltzmann & \texttt{select::lazy\_greedy\_select} & \texttt{select.rs} \\
812831
Coherence post-pass & Rescue dangling references & \texttt{postpass::coherence\_post\_pass} & \texttt{postpass.rs} \\
813832
\bottomrule
814-
\end{tabular}
833+
\end{tabular}}
815834
\caption{Symbol-to-code map. Paper symbols on the left correspond to the named code identifiers on the right, located in the listed source file under \texttt{diffctx/src/}. Implementation-only parameters are documented inline in Appendix~A and are not duplicated here.}
816835
\label{tab:symbol-map}
817836
\end{table}

0 commit comments

Comments
 (0)