Skip to content

Commit 5a62a8e

Browse files
authored
match beginning and end of line correctly in FindNext and ReplaceRegex (micro-editor#3575)
1 parent bf4156c commit 5a62a8e

2 files changed

Lines changed: 132 additions & 87 deletions

File tree

internal/buffer/loc.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,16 @@ func (l Loc) LessEqual(b Loc) bool {
4747
return l == b
4848
}
4949

50+
// Clamp clamps a loc between start and end
51+
func (l Loc) Clamp(start, end Loc) Loc {
52+
if l.GreaterEqual(end) {
53+
return end
54+
} else if l.LessThan(start) {
55+
return start
56+
}
57+
return l
58+
}
59+
5060
// The following functions require a buffer to know where newlines are
5161

5262
// Diff returns the distance between two locations
@@ -139,10 +149,5 @@ func ByteOffset(pos Loc, buf *Buffer) int {
139149

140150
// clamps a loc within a buffer
141151
func clamp(pos Loc, la *LineArray) Loc {
142-
if pos.GreaterEqual(la.End()) {
143-
return la.End()
144-
} else if pos.LessThan(la.Start()) {
145-
return la.Start()
146-
}
147-
return pos
152+
return pos.Clamp(la.Start(), la.End())
148153
}

internal/buffer/search.go

Lines changed: 121 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,56 @@ package buffer
22

33
import (
44
"regexp"
5+
"unicode/utf8"
56

67
"github.com/zyedidia/micro/v2/internal/util"
78
)
89

10+
// We want "^" and "$" to match only the beginning/end of a line, not the
11+
// beginning/end of the search region if it is in the middle of a line.
12+
// In that case we use padded regexps to require a rune before or after
13+
// the match. (This also affects other empty-string patters like "\\b".)
14+
// The following two flags indicate the padding used.
15+
const (
16+
padStart = 1 << iota
17+
padEnd
18+
)
19+
20+
func findLineParams(b *Buffer, start, end Loc, i int, r *regexp.Regexp) ([]byte, int, int, *regexp.Regexp) {
21+
l := b.LineBytes(i)
22+
charpos := 0
23+
padMode := 0
24+
25+
if i == end.Y {
26+
nchars := util.CharacterCount(l)
27+
end.X = util.Clamp(end.X, 0, nchars)
28+
if end.X < nchars {
29+
l = util.SliceStart(l, end.X+1)
30+
padMode |= padEnd
31+
}
32+
}
33+
34+
if i == start.Y {
35+
nchars := util.CharacterCount(l)
36+
start.X = util.Clamp(start.X, 0, nchars)
37+
if start.X > 0 {
38+
charpos = start.X - 1
39+
l = util.SliceEnd(l, charpos)
40+
padMode |= padStart
41+
}
42+
}
43+
44+
if padMode == padStart {
45+
r = regexp.MustCompile(".(?:" + r.String() + ")")
46+
} else if padMode == padEnd {
47+
r = regexp.MustCompile("(?:" + r.String() + ").")
48+
} else if padMode == padStart|padEnd {
49+
r = regexp.MustCompile(".(?:" + r.String() + ").")
50+
}
51+
52+
return l, charpos, padMode, r
53+
}
54+
955
func (b *Buffer) findDown(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) {
1056
lastcn := util.CharacterCount(b.LineBytes(b.LinesNum() - 1))
1157
if start.Y > b.LinesNum()-1 {
@@ -22,30 +68,19 @@ func (b *Buffer) findDown(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) {
2268
}
2369

2470
for i := start.Y; i <= end.Y; i++ {
25-
l := b.LineBytes(i)
26-
charpos := 0
27-
28-
if i == start.Y && start.Y == end.Y {
29-
nchars := util.CharacterCount(l)
30-
start.X = util.Clamp(start.X, 0, nchars)
31-
end.X = util.Clamp(end.X, 0, nchars)
32-
l = util.SliceStart(l, end.X)
33-
l = util.SliceEnd(l, start.X)
34-
charpos = start.X
35-
} else if i == start.Y {
36-
nchars := util.CharacterCount(l)
37-
start.X = util.Clamp(start.X, 0, nchars)
38-
l = util.SliceEnd(l, start.X)
39-
charpos = start.X
40-
} else if i == end.Y {
41-
nchars := util.CharacterCount(l)
42-
end.X = util.Clamp(end.X, 0, nchars)
43-
l = util.SliceStart(l, end.X)
44-
}
71+
l, charpos, padMode, rPadded := findLineParams(b, start, end, i, r)
4572

46-
match := r.FindIndex(l)
73+
match := rPadded.FindIndex(l)
4774

4875
if match != nil {
76+
if padMode&padStart != 0 {
77+
_, size := utf8.DecodeRune(l[match[0]:])
78+
match[0] += size
79+
}
80+
if padMode&padEnd != 0 {
81+
_, size := utf8.DecodeLastRune(l[:match[1]])
82+
match[1] -= size
83+
}
4984
start := Loc{charpos + util.RunePos(l, match[0]), i}
5085
end := Loc{charpos + util.RunePos(l, match[1]), i}
5186
return [2]Loc{start, end}, true
@@ -70,39 +105,39 @@ func (b *Buffer) findUp(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) {
70105
}
71106

72107
for i := end.Y; i >= start.Y; i-- {
73-
l := b.LineBytes(i)
74-
charpos := 0
75-
76-
if i == start.Y && start.Y == end.Y {
77-
nchars := util.CharacterCount(l)
78-
start.X = util.Clamp(start.X, 0, nchars)
79-
end.X = util.Clamp(end.X, 0, nchars)
80-
l = util.SliceStart(l, end.X)
81-
l = util.SliceEnd(l, start.X)
82-
charpos = start.X
83-
} else if i == start.Y {
84-
nchars := util.CharacterCount(l)
85-
start.X = util.Clamp(start.X, 0, nchars)
86-
l = util.SliceEnd(l, start.X)
87-
charpos = start.X
88-
} else if i == end.Y {
89-
nchars := util.CharacterCount(l)
90-
end.X = util.Clamp(end.X, 0, nchars)
91-
l = util.SliceStart(l, end.X)
92-
}
93-
94-
allMatches := r.FindAllIndex(l, -1)
108+
charCount := util.CharacterCount(b.LineBytes(i))
109+
from := Loc{0, i}.Clamp(start, end)
110+
to := Loc{charCount, i}.Clamp(start, end)
95111

112+
allMatches := b.findAll(r, from, to)
96113
if allMatches != nil {
97114
match := allMatches[len(allMatches)-1]
98-
start := Loc{charpos + util.RunePos(l, match[0]), i}
99-
end := Loc{charpos + util.RunePos(l, match[1]), i}
100-
return [2]Loc{start, end}, true
115+
return [2]Loc{match[0], match[1]}, true
101116
}
102117
}
103118
return [2]Loc{}, false
104119
}
105120

121+
func (b *Buffer) findAll(r *regexp.Regexp, start, end Loc) [][2]Loc {
122+
var matches [][2]Loc
123+
loc := start
124+
for {
125+
match, found := b.findDown(r, loc, end)
126+
if !found {
127+
break
128+
}
129+
matches = append(matches, match)
130+
if match[0] != match[1] {
131+
loc = match[1]
132+
} else if match[1] != end {
133+
loc = match[1].Move(1, b)
134+
} else {
135+
break
136+
}
137+
}
138+
return matches
139+
}
140+
106141
// FindNext finds the next occurrence of a given string in the buffer
107142
// It returns the start and end location of the match (if found) and
108143
// a boolean indicating if it was found
@@ -146,53 +181,58 @@ func (b *Buffer) FindNext(s string, start, end, from Loc, down bool, useRegex bo
146181
}
147182

148183
// ReplaceRegex replaces all occurrences of 'search' with 'replace' in the given area
149-
// and returns the number of replacements made and the number of runes
184+
// and returns the number of replacements made and the number of characters
150185
// added or removed on the last line of the range
151186
func (b *Buffer) ReplaceRegex(start, end Loc, search *regexp.Regexp, replace []byte, captureGroups bool) (int, int) {
152187
if start.GreaterThan(end) {
153188
start, end = end, start
154189
}
155190

156-
netrunes := 0
157-
191+
charsEnd := util.CharacterCount(b.LineBytes(end.Y))
158192
found := 0
159193
var deltas []Delta
194+
160195
for i := start.Y; i <= end.Y; i++ {
161-
l := b.lines[i].data
162-
charpos := 0
163-
164-
if start.Y == end.Y && i == start.Y {
165-
l = util.SliceStart(l, end.X)
166-
l = util.SliceEnd(l, start.X)
167-
charpos = start.X
168-
} else if i == start.Y {
169-
l = util.SliceEnd(l, start.X)
170-
charpos = start.X
171-
} else if i == end.Y {
172-
l = util.SliceStart(l, end.X)
173-
}
174-
newText := search.ReplaceAllFunc(l, func(in []byte) []byte {
175-
var result []byte
176-
if captureGroups {
177-
for _, submatches := range search.FindAllSubmatchIndex(in, -1) {
178-
result = search.Expand(result, replace, in, submatches)
196+
l := b.LineBytes(i)
197+
charCount := util.CharacterCount(l)
198+
if (i == start.Y && start.X > 0) || (i == end.Y && end.X < charCount) {
199+
// This replacement code works in general, but it creates a separate
200+
// modification for each match. We only use it for the first and last
201+
// lines, which may use padded regexps
202+
203+
from := Loc{0, i}.Clamp(start, end)
204+
to := Loc{charCount, i}.Clamp(start, end)
205+
matches := b.findAll(search, from, to)
206+
found += len(matches)
207+
208+
for j := len(matches) - 1; j >= 0; j-- {
209+
// if we counted upwards, the different deltas would interfere
210+
match := matches[j]
211+
var newText []byte
212+
if captureGroups {
213+
newText = search.ReplaceAll(b.Substr(match[0], match[1]), replace)
214+
} else {
215+
newText = replace
179216
}
180-
} else {
181-
result = replace
217+
deltas = append(deltas, Delta{newText, match[0], match[1]})
182218
}
183-
found++
184-
if i == end.Y {
185-
netrunes += util.CharacterCount(result) - util.CharacterCount(in)
186-
}
187-
return result
188-
})
189-
190-
from := Loc{charpos, i}
191-
to := Loc{charpos + util.CharacterCount(l), i}
192-
193-
deltas = append(deltas, Delta{newText, from, to})
219+
} else {
220+
newLine := search.ReplaceAllFunc(l, func(in []byte) []byte {
221+
found++
222+
var result []byte
223+
if captureGroups {
224+
match := search.FindSubmatchIndex(in)
225+
result = search.Expand(result, replace, in, match)
226+
} else {
227+
result = replace
228+
}
229+
return result
230+
})
231+
deltas = append(deltas, Delta{newLine, Loc{0, i}, Loc{charCount, i}})
232+
}
194233
}
234+
195235
b.MultipleReplace(deltas)
196236

197-
return found, netrunes
237+
return found, util.CharacterCount(b.LineBytes(end.Y)) - charsEnd
198238
}

0 commit comments

Comments
 (0)