Skip to content

Commit b2d0c58

Browse files
Copilotalexr00
andauthored
Improve commit line break unwrapping for variable-length list markers (#8417)
* Initial plan * Improve commit line break unwrapping for lists - Fix list continuation to use content indent (not marker indent) - Support variable-length numbered list markers (1., 10., 100., etc.) - Allow flexible continuation spacing (1+ spaces, but not 4+ which is code) - Preserve multi-paragraph list formatting (blank lines within list items) - Track list context across blank lines for proper paragraph unwrapping - Add comprehensive test cases for all scenarios Co-authored-by: alexr00 <38270282+alexr00@users.noreply.github.com> * Fix plain text joining to trim whitespace Use appendWithSpace with trimmed text to avoid double spaces when joining lines with leading whitespace. Co-authored-by: alexr00 <38270282+alexr00@users.noreply.github.com> * Use named captures in LIST_ITEM_PATTERN regex Renamed capture groups for clarity: - leadingWhitespace: spaces/tabs before marker - marker: the list item marker (*, +, -, or digits.) - markerTrailingWhitespace: spaces after marker Co-authored-by: alexr00 <38270282+alexr00@users.noreply.github.com> * Undo proposal changes * Fix list continuation unwrapping logic The previous implementation incorrectly prevented joining list continuations when the next line was a list item at the same level. This caused wrapped list content to not be unwrapped properly. Changes: - Remove incorrect check that prevented joining based on next line - Remove unused getNextNonBlankLineInfo function - Update test names and expectations to reflect correct behavior: - List continuations should be unwrapped (joined), not preserved - Renamed 'preserves list item continuations' to 'unwraps list item continuations' - Renamed 'unwraps regular text but preserves list item continuations' to 'unwraps regular text and list item continuations' - Fixed expected output for nested list content tests Co-authored-by: alexr00 <38270282+alexr00@users.noreply.github.com> * Add test * Fix unwrapping of non-indented list continuations When a list item is followed by a wrapped line that has no indentation (but is not itself a list item), the line should be joined to the previous list item. This is common in commit messages wrapped at 72 chars. The fix moves the handling of non-indented continuations BEFORE calling getActiveListContentIndent, which was clearing the list stack and preventing the join from happening. Co-authored-by: alexr00 <38270282+alexr00@users.noreply.github.com> * Refactor unwrapCommitMessageBody to gitUtils.ts Move the unwrapCommitMessageBody function from folderRepositoryManager.ts to gitUtils.ts as a reusable utility function. Co-authored-by: alexr00 <38270282+alexr00@users.noreply.github.com> * Revert proposal changes --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: alexr00 <38270282+alexr00@users.noreply.github.com>
1 parent cc5ddb5 commit b2d0c58

File tree

3 files changed

+462
-252
lines changed

3 files changed

+462
-252
lines changed

src/common/gitUtils.ts

Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,350 @@ import * as vscode from 'vscode';
77
import { Repository } from '../api/api';
88
import { GitApiImpl } from '../api/api1';
99

10+
/**
11+
* Unwraps lines that were wrapped for conventional commit message formatting (typically at 72 characters).
12+
* Similar to GitHub's behavior when converting commit messages to PR descriptions.
13+
*
14+
* Rules:
15+
* - Preserves blank lines as paragraph breaks
16+
* - Preserves fenced code blocks (```)
17+
* - Preserves list items (-, *, +, numbered)
18+
* - Preserves blockquotes (>)
19+
* - Preserves indented code blocks (4+ spaces at start, when not in a list context)
20+
* - Joins consecutive plain text lines that appear to be wrapped mid-sentence
21+
*/
22+
export function unwrapCommitMessageBody(body: string): string {
23+
if (!body) {
24+
return body;
25+
}
26+
27+
// Pattern to detect list item markers at the start of a line and capture the marker
28+
const LIST_ITEM_PATTERN = /^(?<leadingWhitespace>[ \t]*)(?<marker>[*+\-]|\d+\.)(?<markerTrailingWhitespace>[ \t]+)/;
29+
// Pattern to detect blockquote markers
30+
const BLOCKQUOTE_PATTERN = /^[ \t]*>/;
31+
// Pattern to detect fenced code block markers
32+
const FENCE_PATTERN = /^[ \t]*```/;
33+
34+
const getLeadingWhitespaceLength = (text: string): number => text.match(/^[ \t]*/)?.[0].length ?? 0;
35+
const hasHardLineBreak = (text: string): boolean => / {2}$/.test(text);
36+
const appendWithSpace = (base: string, addition: string): string => {
37+
if (!addition) {
38+
return base;
39+
}
40+
return base.length > 0 && !/\s$/.test(base) ? `${base} ${addition}` : `${base}${addition}`;
41+
};
42+
43+
// Get the content indent for a list item (position where actual content starts)
44+
const getListItemContentIndent = (line: string): number => {
45+
const match = line.match(LIST_ITEM_PATTERN);
46+
if (!match?.groups) {
47+
return 0;
48+
}
49+
// Content indent = leading whitespace + marker + space after marker
50+
return match.groups.leadingWhitespace.length + match.groups.marker.length + match.groups.markerTrailingWhitespace.length;
51+
};
52+
53+
const lines = body.split('\n');
54+
const result: string[] = [];
55+
let i = 0;
56+
let inFencedBlock = false;
57+
// Stack stores { markerIndent, contentIndent } for each nesting level
58+
const listStack: { markerIndent: number; contentIndent: number }[] = [];
59+
60+
// Find the active list context for a given line indent
61+
// Returns the content indent if the line is within an active list context
62+
const getActiveListContentIndent = (lineIndent: number): number | undefined => {
63+
for (let idx = listStack.length - 1; idx >= 0; idx--) {
64+
const { markerIndent, contentIndent } = listStack[idx];
65+
// A line is part of a list item if it has at least 1 space indent
66+
// (but less than contentIndent + 4 which would be a code block)
67+
if (lineIndent >= 1 && lineIndent >= markerIndent) {
68+
listStack.length = idx + 1;
69+
return contentIndent;
70+
}
71+
listStack.pop();
72+
}
73+
return undefined;
74+
};
75+
76+
const shouldJoinListContinuation = (lineIndex: number, contentIndent: number, baseLine: string): boolean => {
77+
const currentLine = lines[lineIndex];
78+
if (!currentLine) {
79+
return false;
80+
}
81+
82+
const trimmed = currentLine.trim();
83+
if (!trimmed) {
84+
return false;
85+
}
86+
87+
if (hasHardLineBreak(baseLine) || hasHardLineBreak(currentLine)) {
88+
return false;
89+
}
90+
91+
if (LIST_ITEM_PATTERN.test(currentLine)) {
92+
return false;
93+
}
94+
95+
if (BLOCKQUOTE_PATTERN.test(currentLine) || FENCE_PATTERN.test(currentLine)) {
96+
return false;
97+
}
98+
99+
const currentIndent = getLeadingWhitespaceLength(currentLine);
100+
// Need at least 1 space to be a continuation
101+
if (currentIndent < 1) {
102+
return false;
103+
}
104+
105+
// 4+ spaces beyond content indent is an indented code block
106+
if (currentIndent >= contentIndent + 4) {
107+
return false;
108+
}
109+
110+
return true;
111+
};
112+
113+
while (i < lines.length) {
114+
const line = lines[i];
115+
116+
// Preserve blank lines but don't clear list context
117+
// (multi-paragraph lists are allowed in GitHub markdown)
118+
if (line.trim() === '') {
119+
result.push(line);
120+
i++;
121+
continue;
122+
}
123+
124+
// Check for fenced code block markers
125+
if (FENCE_PATTERN.test(line)) {
126+
inFencedBlock = !inFencedBlock;
127+
result.push(line);
128+
i++;
129+
continue;
130+
}
131+
132+
// Preserve everything inside fenced code blocks
133+
if (inFencedBlock) {
134+
result.push(line);
135+
i++;
136+
continue;
137+
}
138+
139+
const lineIndent = getLeadingWhitespaceLength(line);
140+
const listItemMatch = line.match(LIST_ITEM_PATTERN);
141+
142+
if (listItemMatch?.groups) {
143+
const markerIndent = listItemMatch.groups.leadingWhitespace.length;
144+
const contentIndent = getListItemContentIndent(line);
145+
146+
// Pop list levels that are at or beyond this indent
147+
while (listStack.length && markerIndent <= listStack[listStack.length - 1].markerIndent) {
148+
listStack.pop();
149+
}
150+
151+
listStack.push({ markerIndent, contentIndent });
152+
result.push(line);
153+
i++;
154+
continue;
155+
}
156+
157+
// Handle non-indented lines that should be joined to a previous list item
158+
// This happens when commit messages are wrapped at 72 characters
159+
// Check this BEFORE calling getActiveListContentIndent which would clear the stack
160+
if (listStack.length > 0 && lineIndent === 0 && !LIST_ITEM_PATTERN.test(line)) {
161+
const isBlockquote = BLOCKQUOTE_PATTERN.test(line);
162+
if (!isBlockquote) {
163+
const baseIndex = result.length - 1;
164+
const baseLine = baseIndex >= 0 ? result[baseIndex] : '';
165+
const previousLineIsBlank = baseLine.trim() === '';
166+
167+
if (!previousLineIsBlank && baseIndex >= 0) {
168+
// Join this line and any following non-list-item lines with the previous list item
169+
let joinedLine = baseLine;
170+
let currentIndex = i;
171+
172+
while (currentIndex < lines.length) {
173+
const currentLine = lines[currentIndex];
174+
const trimmed = currentLine.trim();
175+
176+
// Stop at blank lines
177+
if (!trimmed) {
178+
break;
179+
}
180+
181+
// Stop at list items
182+
if (LIST_ITEM_PATTERN.test(currentLine)) {
183+
break;
184+
}
185+
186+
// Stop at blockquotes or fences
187+
if (BLOCKQUOTE_PATTERN.test(currentLine) || FENCE_PATTERN.test(currentLine)) {
188+
break;
189+
}
190+
191+
// Stop at indented code blocks
192+
const currentLineIndent = getLeadingWhitespaceLength(currentLine);
193+
if (currentLineIndent >= 4) {
194+
break;
195+
}
196+
197+
// Stop if previous line has hard line break
198+
if (hasHardLineBreak(joinedLine)) {
199+
break;
200+
}
201+
202+
joinedLine = appendWithSpace(joinedLine, trimmed);
203+
currentIndex++;
204+
}
205+
206+
if (currentIndex > i) {
207+
result[baseIndex] = joinedLine;
208+
i = currentIndex;
209+
continue;
210+
}
211+
}
212+
}
213+
}
214+
215+
const activeContentIndent = getActiveListContentIndent(lineIndent);
216+
const codeIndentThreshold = activeContentIndent !== undefined ? activeContentIndent + 4 : 4;
217+
const isBlockquote = BLOCKQUOTE_PATTERN.test(line);
218+
const isIndentedCode = lineIndent >= codeIndentThreshold;
219+
220+
if (isBlockquote || isIndentedCode) {
221+
result.push(line);
222+
i++;
223+
continue;
224+
}
225+
226+
// Handle list item continuations
227+
if (activeContentIndent !== undefined && lineIndent >= 1) {
228+
const baseIndex = result.length - 1;
229+
// Only try to join with previous line if it's not blank
230+
// Multi-paragraph lists have blank lines that should be preserved
231+
const baseLine = baseIndex >= 0 ? result[baseIndex] : '';
232+
const previousLineIsBlank = baseLine.trim() === '';
233+
234+
if (!previousLineIsBlank && baseIndex >= 0) {
235+
let joinedLine = baseLine;
236+
let appended = false;
237+
let currentIndex = i;
238+
239+
while (
240+
currentIndex < lines.length &&
241+
shouldJoinListContinuation(currentIndex, activeContentIndent, joinedLine)
242+
) {
243+
const continuationText = lines[currentIndex].trim();
244+
if (continuationText) {
245+
joinedLine = appendWithSpace(joinedLine, continuationText);
246+
appended = true;
247+
}
248+
currentIndex++;
249+
}
250+
251+
if (appended) {
252+
result[baseIndex] = joinedLine;
253+
i = currentIndex;
254+
continue;
255+
}
256+
}
257+
258+
// For multi-paragraph continuations or standalone indented lines,
259+
// preserve indentation but unwrap consecutive continuation lines
260+
let joinedLine = line;
261+
i++;
262+
263+
while (i < lines.length) {
264+
const nextLine = lines[i];
265+
266+
if (nextLine.trim() === '') {
267+
break;
268+
}
269+
270+
if (FENCE_PATTERN.test(nextLine)) {
271+
break;
272+
}
273+
274+
if (LIST_ITEM_PATTERN.test(nextLine)) {
275+
break;
276+
}
277+
278+
if (BLOCKQUOTE_PATTERN.test(nextLine)) {
279+
break;
280+
}
281+
282+
const nextIndent = getLeadingWhitespaceLength(nextLine);
283+
// Check for code block
284+
if (nextIndent >= activeContentIndent + 4) {
285+
break;
286+
}
287+
288+
// Must have at least 1 space to be a continuation
289+
if (nextIndent < 1) {
290+
break;
291+
}
292+
293+
// Check for hard line break
294+
if (hasHardLineBreak(joinedLine)) {
295+
break;
296+
}
297+
298+
// Join this line - preserve the original indentation for the first line
299+
joinedLine = appendWithSpace(joinedLine, nextLine.trim());
300+
i++;
301+
}
302+
303+
result.push(joinedLine);
304+
continue;
305+
}
306+
307+
// Start accumulating lines that should be joined (plain text)
308+
let joinedLine = line;
309+
i++;
310+
311+
// Keep joining lines until we hit a blank line or a line that shouldn't be joined
312+
while (i < lines.length) {
313+
const nextLine = lines[i];
314+
315+
// Stop at blank lines
316+
if (nextLine.trim() === '') {
317+
break;
318+
}
319+
320+
// Stop at fenced code blocks
321+
if (FENCE_PATTERN.test(nextLine)) {
322+
break;
323+
}
324+
325+
// Stop at list items
326+
if (LIST_ITEM_PATTERN.test(nextLine)) {
327+
break;
328+
}
329+
330+
// Stop at blockquotes
331+
if (BLOCKQUOTE_PATTERN.test(nextLine)) {
332+
break;
333+
}
334+
335+
// Check if next line is indented code (4+ spaces, when not in a list context)
336+
const nextLeadingSpaces = getLeadingWhitespaceLength(nextLine);
337+
const nextIsIndentedCode = nextLeadingSpaces >= 4;
338+
339+
if (nextIsIndentedCode) {
340+
break;
341+
}
342+
343+
// Join this line with a space
344+
joinedLine = appendWithSpace(joinedLine, nextLine.trim());
345+
i++;
346+
}
347+
348+
result.push(joinedLine);
349+
}
350+
351+
return result.join('\n');
352+
}
353+
10354
/**
11355
* Determines if a repository is a submodule by checking if its path
12356
* appears in any other repository's submodules list.

0 commit comments

Comments
 (0)