1+ #include < algorithm>
12#include < cstdio>
3+ #include < cstring>
24#include < string>
35#include < vector>
46
@@ -16,59 +18,216 @@ constexpr cfbox::help::HelpEntry HELP = {
1618 .extra = " " ,
1719};
1820
19- static auto lcs_diff (const std::vector<std::string>& a, const std::vector<std::string>& b) -> void {
20- auto m = a.size (), n = b.size ();
21- std::vector<std::vector<int >> dp (m + 1 , std::vector<int >(n + 1 , 0 ));
22- for (std::size_t i = 1 ; i <= m; ++i)
23- for (std::size_t j = 1 ; j <= n; ++j)
24- dp[i][j] = (a[i-1 ] == b[j-1 ]) ? dp[i-1 ][j-1 ] + 1 : std::max (dp[i-1 ][j], dp[i][j-1 ]);
25-
26- std::vector<std::pair<char , std::string>> edits;
27- std::size_t i = m, j = n;
28- while (i > 0 || j > 0 ) {
29- if (i > 0 && j > 0 && a[i-1 ] == b[j-1 ]) {
30- edits.push_back ({' ' , a[i-1 ]});
31- --i; --j;
32- } else if (j > 0 && (i == 0 || dp[i][j-1 ] >= dp[i-1 ][j])) {
33- edits.push_back ({' +' , b[j-1 ]});
34- --j;
21+ struct Edit {
22+ char op; // ' ', '+', '-'
23+ std::size_t line; // line content index (a for ' '/'-', b for '+')
24+ };
25+
26+ // Myers O(ND) diff — compute shortest edit script
27+ static auto myers_diff (const std::vector<std::string>& a, const std::vector<std::string>& b)
28+ -> std::vector<Edit> {
29+ auto N = static_cast <int >(a.size ());
30+ auto M = static_cast <int >(b.size ());
31+ if (N == 0 && M == 0 ) return {};
32+
33+ // Simple cases: one side empty
34+ if (N == 0 ) {
35+ std::vector<Edit> e;
36+ for (int j = 0 ; j < M; ++j) e.push_back ({' +' , static_cast <std::size_t >(j)});
37+ return e;
38+ }
39+ if (M == 0 ) {
40+ std::vector<Edit> e;
41+ for (int i = 0 ; i < N; ++i) e.push_back ({' -' , static_cast <std::size_t >(i)});
42+ return e;
43+ }
44+
45+ // Forward pass with V-trace storage
46+ int max_d = N + M;
47+ int off = max_d; // offset to make k index non-negative
48+ // Store complete V array at each d
49+ std::vector<std::vector<int >> vv;
50+
51+ {
52+ std::vector<int > v (static_cast <std::size_t >(2 * max_d + 1 ), 0 );
53+ v[static_cast <std::size_t >(1 + off)] = 0 ;
54+
55+ for (int d = 0 ; d <= max_d; ++d) {
56+ std::vector<int > prev = v;
57+ for (int k = -d; k <= d; k += 2 ) {
58+ int x;
59+ if (k == -d || (k != d && prev[static_cast <std::size_t >(k - 1 + off)] < prev[static_cast <std::size_t >(k + 1 + off)])) {
60+ x = prev[static_cast <std::size_t >(k + 1 + off)];
61+ } else {
62+ x = prev[static_cast <std::size_t >(k - 1 + off)] + 1 ;
63+ }
64+ int y = x - k;
65+ while (x < N && y < M && a[static_cast <std::size_t >(x)] == b[static_cast <std::size_t >(y)]) {
66+ ++x; ++y;
67+ }
68+ v[static_cast <std::size_t >(k + off)] = x;
69+ if (x >= N && y >= M) {
70+ vv.push_back (v);
71+ goto forward_done;
72+ }
73+ }
74+ vv.push_back (v);
75+ }
76+ }
77+ forward_done:
78+
79+ // Backtrack through vv to recover edit script
80+ std::vector<Edit> edits;
81+ int x = N, y = M;
82+
83+ for (int d = static_cast <int >(vv.size ()) - 1 ; d > 0 ; --d) {
84+ int k = x - y;
85+ auto & prev = vv[static_cast <std::size_t >(d - 1 )];
86+
87+ // Determine if we came from k+1 (insert) or k-1 (delete)
88+ bool from_below = (k == -d) ||
89+ (k != d && prev[static_cast <std::size_t >(k - 1 + off)] < prev[static_cast <std::size_t >(k + 1 + off)]);
90+
91+ int mid_x, mid_y; // position after the non-diagonal step
92+ if (from_below) {
93+ mid_x = prev[static_cast <std::size_t >(k + 1 + off)];
94+ mid_y = mid_x - (k + 1 );
3595 } else {
36- edits. push_back ({ ' - ' , a[i- 1 ]}) ;
37- --i ;
96+ mid_x = prev[ static_cast <std:: size_t >(k - 1 + off)] + 1 ;
97+ mid_y = mid_x - (k - 1 ) ;
3898 }
99+
100+ // Record diagonal steps (equal lines) from (x,y) back to (mid_x, mid_y)
101+ while (x > mid_x && y > mid_y) {
102+ --x; --y;
103+ edits.push_back ({' ' , static_cast <std::size_t >(x)});
104+ }
105+
106+ // Record the non-diagonal step
107+ if (from_below) {
108+ // insert b[y-1] — but after the step, we're at (mid_x, mid_y) = (prev[k+1], prev[k+1]-(k+1))
109+ // The step moved from (mid_x, mid_y+1) down to (mid_x, mid_y)
110+ edits.push_back ({' +' , static_cast <std::size_t >(mid_y)}); // b[mid_y] was inserted
111+ --y; // adjust to position before insert
112+ } else {
113+ // delete a[x-1]
114+ edits.push_back ({' -' , static_cast <std::size_t >(mid_x - 1 )}); // a[mid_x-1] was deleted
115+ --x; // adjust to position before delete
116+ }
117+
118+ // Now (x,y) should match prev[k'] where k' is the diagonal we came from
39119 }
40- for (auto it = edits.rbegin (); it != edits.rend (); ++it) {
41- std::printf (" %c%s\n " , it->first , it->second .c_str ());
120+
121+ // d=0: only diagonal steps from (x,y) to (0,0)
122+ while (x > 0 && y > 0 ) {
123+ --x; --y;
124+ edits.push_back ({' ' , static_cast <std::size_t >(x)});
42125 }
126+
127+ std::reverse (edits.begin (), edits.end ());
128+ return edits;
43129}
44130
45- static auto unified_diff (const std::string& file1, const std::string& file2,
46- const std::vector<std::string>& a, const std::vector<std::string>& b) -> void {
47- std::printf (" --- %s\n +++ %s\n @@ -1,%zu +1,%zu @@\n " , file1.c_str (), file2.c_str (), a.size (), b.size ());
48- auto m = a.size (), n = b.size ();
49- std::vector<std::vector<int >> dp (m + 1 , std::vector<int >(n + 1 , 0 ));
50- for (std::size_t i = 1 ; i <= m; ++i)
51- for (std::size_t j = 1 ; j <= n; ++j)
52- dp[i][j] = (a[i-1 ] == b[j-1 ]) ? dp[i-1 ][j-1 ] + 1 : std::max (dp[i-1 ][j], dp[i][j-1 ]);
53-
54- std::vector<std::pair<char , std::string>> edits;
55- std::size_t i = m, j = n;
56- while (i > 0 || j > 0 ) {
57- if (i > 0 && j > 0 && a[i-1 ] == b[j-1 ]) {
58- edits.push_back ({' ' , a[i-1 ]});
59- --i; --j;
60- } else if (j > 0 && (i == 0 || dp[i][j-1 ] >= dp[i-1 ][j])) {
61- edits.push_back ({' +' , b[j-1 ]});
62- --j;
131+ static auto print_edits (const std::vector<Edit>& edits,
132+ const std::vector<std::string>& a,
133+ const std::vector<std::string>& b) -> void {
134+ for (auto & e : edits) {
135+ if (e.op == ' ' || e.op == ' -' ) {
136+ std::printf (" %c%s\n " , e.op , a[e.line ].c_str ());
63137 } else {
64- edits.push_back ({' -' , a[i-1 ]});
65- --i;
138+ std::printf (" +%s\n " , b[e.line ].c_str ());
66139 }
67140 }
68- for (auto it = edits.rbegin (); it != edits.rend (); ++it) {
69- std::printf (" %c%s\n " , it->first , it->second .c_str ());
141+ }
142+
143+ struct Hunk {
144+ int a_start, a_count;
145+ int b_start, b_count;
146+ std::vector<Edit> edits;
147+ };
148+
149+ static auto build_hunks (const std::vector<Edit>& edits,
150+ int context = 3 ) -> std::vector<Hunk> {
151+ if (edits.empty ()) return {};
152+
153+ // Find change positions
154+ std::vector<int > change_idx;
155+ for (int i = 0 ; i < static_cast <int >(edits.size ()); ++i) {
156+ if (edits[static_cast <std::size_t >(i)].op != ' ' )
157+ change_idx.push_back (i);
158+ }
159+ if (change_idx.empty ()) return {};
160+
161+ // Group changes into hunks with context
162+ std::vector<Hunk> hunks;
163+ int hunk_start = std::max (0 , change_idx[0 ] - context);
164+
165+ for (int ci = 1 ; ci < static_cast <int >(change_idx.size ()); ++ci) {
166+ int gap_start = change_idx[static_cast <std::size_t >(ci - 1 )] + 1 ;
167+ int gap_end = change_idx[static_cast <std::size_t >(ci)] - 1 ;
168+ // If gap between changes exceeds 2*context, split into new hunk
169+ if (gap_end - gap_start + 1 > 2 * context) {
170+ int hunk_end = std::min (static_cast <int >(edits.size ()) - 1 ,
171+ change_idx[static_cast <std::size_t >(ci - 1 )] + context);
172+ Hunk h;
173+ h.edits .assign (edits.begin () + hunk_start, edits.begin () + hunk_end + 1 );
174+ // Count a/b lines for this hunk
175+ h.a_start = 1 ; h.a_count = 0 ;
176+ h.b_start = 1 ; h.b_count = 0 ;
177+ bool a_init = false , b_init = false ;
178+ for (auto & e : h.edits ) {
179+ if (e.op == ' ' || e.op == ' -' ) {
180+ if (!a_init) { h.a_start = static_cast <int >(e.line ) + 1 ; a_init = true ; }
181+ ++h.a_count ;
182+ }
183+ if (e.op == ' ' || e.op == ' +' ) {
184+ if (!b_init) { h.b_start = static_cast <int >(e.line ) + 1 ; b_init = true ; }
185+ ++h.b_count ;
186+ }
187+ }
188+ hunks.push_back (std::move (h));
189+ hunk_start = std::max (0 , change_idx[static_cast <std::size_t >(ci)] - context);
190+ }
191+ }
192+ // Last hunk
193+ int hunk_end = std::min (static_cast <int >(edits.size ()) - 1 ,
194+ change_idx.back () + context);
195+ Hunk h;
196+ h.edits .assign (edits.begin () + hunk_start, edits.begin () + hunk_end + 1 );
197+ h.a_start = 1 ; h.a_count = 0 ;
198+ h.b_start = 1 ; h.b_count = 0 ;
199+ bool a_init = false , b_init = false ;
200+ for (auto & e : h.edits ) {
201+ if (e.op == ' ' || e.op == ' -' ) {
202+ if (!a_init) { h.a_start = static_cast <int >(e.line ) + 1 ; a_init = true ; }
203+ ++h.a_count ;
204+ }
205+ if (e.op == ' ' || e.op == ' +' ) {
206+ if (!b_init) { h.b_start = static_cast <int >(e.line ) + 1 ; b_init = true ; }
207+ ++h.b_count ;
208+ }
209+ }
210+ hunks.push_back (std::move (h));
211+ return hunks;
212+ }
213+
214+ static auto unified_diff (const std::string& file1, const std::string& file2,
215+ const std::vector<std::string>& a, const std::vector<std::string>& b) -> void {
216+ std::printf (" --- %s\n +++ %s\n " , file1.c_str (), file2.c_str ());
217+ auto edits = myers_diff (a, b);
218+ auto hunks = build_hunks (edits);
219+ for (auto & h : hunks) {
220+ std::printf (" @@ -%d,%d +%d,%d @@\n " ,
221+ h.a_start , h.a_count , h.b_start , h.b_count );
222+ for (auto & e : h.edits ) {
223+ if (e.op == ' ' || e.op == ' -' )
224+ std::printf (" %c%s\n " , e.op , a[e.line ].c_str ());
225+ else
226+ std::printf (" +%s\n " , b[e.line ].c_str ());
227+ }
70228 }
71229}
230+
72231} // namespace
73232
74233auto diff_main (int argc, char * argv[]) -> int {
@@ -96,7 +255,8 @@ auto diff_main(int argc, char* argv[]) -> int {
96255 if (unified) {
97256 unified_diff (std::string{pos[0 ]}, std::string{pos[1 ]}, *a_result, *b_result);
98257 } else {
99- lcs_diff (*a_result, *b_result);
258+ auto edits = myers_diff (*a_result, *b_result);
259+ print_edits (edits, *a_result, *b_result);
100260 }
101261 return 1 ;
102262}
0 commit comments