diff --git a/go.mod b/go.mod index 9f9f549b..0447dde9 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/Masterminds/sprig/v3 v3.3.0 github.com/google/go-querystring v1.1.0 // indirect github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 - github.com/sourcegraph/go-diff v0.6.1 + github.com/sourcegraph/go-diff v0.8.0 github.com/spf13/viper v1.20.1 github.com/stretchr/testify v1.11.1 golang.org/x/oauth2 v0.33.0 diff --git a/go.sum b/go.sum index af9f185f..527f2b8c 100644 --- a/go.sum +++ b/go.sum @@ -206,12 +206,10 @@ github.com/sagikazarmark/locafero v0.7.0 h1:5MqpDsTGNDhY8sGp0Aowyf0qKsPrhewaLSsF github.com/sagikazarmark/locafero v0.7.0/go.mod h1:2za3Cg5rMaTMoG/2Ulr9AwtFaIppKXTRYnozin4aB5k= github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= -github.com/shurcooL/go v0.0.0-20180423040247-9e1955d9fb6e/go.mod h1:TDJrrUr11Vxrven61rcy3hJMUqaf/CLWYhHNPmT14Lk= -github.com/shurcooL/go-goon v0.0.0-20170922171312-37c2f522c041/go.mod h1:N5mDOmsrJOB+vfqUK+7DmDyjhSLIIBnXo9lvZJj3MWQ= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= -github.com/sourcegraph/go-diff v0.6.1 h1:hmA1LzxW0n1c3Q4YbrFgg4P99GSnebYa3x8gr0HZqLQ= -github.com/sourcegraph/go-diff v0.6.1/go.mod h1:iBszgVvyxdc8SFZ7gm69go2KDdt3ag071iBaWPF6cjs= +github.com/sourcegraph/go-diff v0.8.0 h1:ipIyu4cTsLbIrln4l0qtHA3r0a7gyK4ntKjtQytHhvY= +github.com/sourcegraph/go-diff v0.8.0/go.mod h1:hWlcO7Al+UZStZAP8rBumHpCK5ZHQ5BXsMls8p4+F5E= github.com/spf13/afero v1.12.0 h1:UcOPyRBYczmFn6yvphxkn9ZEOY65cpwGKb5mL36mrqs= github.com/spf13/afero v1.12.0/go.mod h1:ZTlWwG4/ahT8W7T0WQ5uYmjI9duaLQGy3Q2OAl4sk/4= github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= diff --git a/vendor/github.com/sourcegraph/go-diff/LICENSE b/vendor/github.com/sourcegraph/go-diff/LICENSE index 0733b6e5..5ba1c443 100644 --- a/vendor/github.com/sourcegraph/go-diff/LICENSE +++ b/vendor/github.com/sourcegraph/go-diff/LICENSE @@ -33,3 +33,14 @@ in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/sourcegraph/go-diff/diff/diff.go b/vendor/github.com/sourcegraph/go-diff/diff/diff.go index 0f465b9e..cc19fe52 100644 --- a/vendor/github.com/sourcegraph/go-diff/diff/diff.go +++ b/vendor/github.com/sourcegraph/go-diff/diff/diff.go @@ -5,12 +5,18 @@ import ( "time" ) +// ParseOptions specifies options for parsing diffs. +type ParseOptions struct { + // KeepCR specifies whether to keep trailing carriage return characters (\r) in lines. + KeepCR bool +} + // A FileDiff represents a unified diff for a single file. // // A file unified diff has a header that resembles the following: // -// --- oldname 2009-10-11 15:12:20.000000000 -0700 -// +++ newname 2009-10-11 15:12:30.000000000 -0700 +// --- oldname 2009-10-11 15:12:20.000000000 -0700 +// +++ newname 2009-10-11 15:12:30.000000000 -0700 type FileDiff struct { // the original name of the file OrigName string @@ -120,6 +126,10 @@ const onlyInMessage = "Only in %s: %s\n" // See https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html. const diffTimeParseLayout = "2006-01-02 15:04:05 -0700" +// Apple's diff is based on freebsd diff, which uses a timestamp format that does +// not include the timezone offset. +const diffTimeParseWithoutTZLayout = "2006-01-02 15:04:05" + // diffTimeFormatLayout is the layout used to format (i.e., print) the time in unified diff file // header timestamps. // See https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html. diff --git a/vendor/github.com/sourcegraph/go-diff/diff/parse.go b/vendor/github.com/sourcegraph/go-diff/diff/parse.go index 8d5cfc23..b73e2301 100644 --- a/vendor/github.com/sourcegraph/go-diff/diff/parse.go +++ b/vendor/github.com/sourcegraph/go-diff/diff/parse.go @@ -1,7 +1,6 @@ package diff import ( - "bufio" "bytes" "errors" "fmt" @@ -17,20 +16,31 @@ import ( // case of per-file errors. If it cannot detect when the diff of the next file // begins, the hunks are added to the FileDiff of the previous file. func ParseMultiFileDiff(diff []byte) ([]*FileDiff, error) { - return NewMultiFileDiffReader(bytes.NewReader(diff)).ReadAllFiles() + return ParseMultiFileDiffOptions(diff, ParseOptions{}) +} + +// ParseMultiFileDiffOptions parses a multi-file unified diff with the given options. +func ParseMultiFileDiffOptions(diff []byte, opts ParseOptions) ([]*FileDiff, error) { + return NewMultiFileDiffReaderOptions(bytes.NewReader(diff), opts).ReadAllFiles() } // NewMultiFileDiffReader returns a new MultiFileDiffReader that reads // a multi-file unified diff from r. func NewMultiFileDiffReader(r io.Reader) *MultiFileDiffReader { - return &MultiFileDiffReader{reader: bufio.NewReader(r)} + return NewMultiFileDiffReaderOptions(r, ParseOptions{}) +} + +// NewMultiFileDiffReaderOptions returns a new MultiFileDiffReader that reads +// a multi-file unified diff from r with the given options. +func NewMultiFileDiffReaderOptions(r io.Reader, opts ParseOptions) *MultiFileDiffReader { + return &MultiFileDiffReader{reader: newLineReaderOptions(r, opts)} } // MultiFileDiffReader reads a multi-file unified diff. type MultiFileDiffReader struct { line int offset int64 - reader *bufio.Reader + reader *lineReader // TODO(sqs): line and offset tracking in multi-file diffs is broken; add tests and fix @@ -46,6 +56,14 @@ type MultiFileDiffReader struct { // all hunks) from r. If there are no more files in the diff, it // returns error io.EOF. func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) { + fd, _, err := r.ReadFileWithTrailingContent() + return fd, err +} + +// ReadFileWithTrailingContent reads the next file unified diff (including +// headers and all hunks) from r, also returning any trailing content. If there +// are no more files in the diff, it returns error io.EOF. +func (r *MultiFileDiffReader) ReadFileWithTrailingContent() (*FileDiff, string, error) { fr := &FileDiffReader{ line: r.line, offset: r.offset, @@ -59,23 +77,33 @@ func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) { switch e := err.(type) { case *ParseError: if e.Err == ErrNoFileHeader || e.Err == ErrExtendedHeadersEOF { - return nil, io.EOF + // Any non-diff content preceding a valid diff is included in the + // extended headers of the following diff. In this way, mixed diff / + // non-diff content can be parsed. Trailing non-diff content is + // different: it doesn't make sense to return a FileDiff with only + // extended headers populated. Instead, we return any trailing content + // in case the caller needs it. + trailing := "" + if fd != nil { + trailing = strings.Join(fd.Extended, "\n") + } + return nil, trailing, io.EOF } - return nil, err + return nil, "", err case OverflowError: r.nextFileFirstLine = []byte(e) - return fd, nil + return fd, "", nil default: - return nil, err + return nil, "", err } } // FileDiff is added/deleted file // No further collection of hunks needed if fd.NewName == "" { - return fd, nil + return fd, "", nil } // Before reading hunks, check to see if there are any. If there @@ -85,9 +113,9 @@ func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) { // caused by the lack of any hunks, or a malformatted hunk, so we // need to perform the check here. hr := fr.HunksReader() - line, err := readLine(r.reader) + line, err := r.reader.readLine() if err != nil && err != io.EOF { - return fd, err + return fd, "", err } line = bytes.TrimSuffix(line, []byte{'\n'}) if bytes.HasPrefix(line, hunkPrefix) { @@ -101,10 +129,10 @@ func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) { // This just means we finished reading the hunks for the // current file. See the ErrBadHunkLine doc for more info. r.nextFileFirstLine = e.Line - return fd, nil + return fd, "", nil } } - return nil, err + return nil, "", err } } else { // There weren't any hunks, so that line we peeked ahead at @@ -112,7 +140,7 @@ func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) { r.nextFileFirstLine = line } - return fd, nil + return fd, "", nil } // ReadAllFiles reads all file unified diffs (including headers and all @@ -135,20 +163,31 @@ func (r *MultiFileDiffReader) ReadAllFiles() ([]*FileDiff, error) { // ParseFileDiff parses a file unified diff. func ParseFileDiff(diff []byte) (*FileDiff, error) { - return NewFileDiffReader(bytes.NewReader(diff)).Read() + return ParseFileDiffOptions(diff, ParseOptions{}) +} + +// ParseFileDiffOptions parses a file unified diff with the given options. +func ParseFileDiffOptions(diff []byte, opts ParseOptions) (*FileDiff, error) { + return NewFileDiffReaderOptions(bytes.NewReader(diff), opts).Read() } // NewFileDiffReader returns a new FileDiffReader that reads a file // unified diff. func NewFileDiffReader(r io.Reader) *FileDiffReader { - return &FileDiffReader{reader: bufio.NewReader(r)} + return NewFileDiffReaderOptions(r, ParseOptions{}) +} + +// NewFileDiffReaderOptions returns a new FileDiffReader that reads a file +// unified diff with the given options. +func NewFileDiffReaderOptions(r io.Reader, opts ParseOptions) *FileDiffReader { + return &FileDiffReader{reader: newLineReaderOptions(r, opts)} } // FileDiffReader reads a unified file diff. type FileDiffReader struct { line int offset int64 - reader *bufio.Reader + reader *lineReader // fileHeaderLine is the first file header line, set by: // @@ -236,7 +275,6 @@ func (r *FileDiffReader) ReadFileHeaders() (origName, newName string, origTimest "", nil, nil, nil } } - origName, origTimestamp, err = r.readOneFileHeader([]byte("--- ")) if err != nil { return "", "", nil, nil, err @@ -266,7 +304,7 @@ func (r *FileDiffReader) readOneFileHeader(prefix []byte) (filename string, time if r.fileHeaderLine == nil { var err error - line, err = readLine(r.reader) + line, err = r.reader.readLine() if err == io.EOF { return "", nil, &ParseError{r.line, r.offset, ErrNoFileHeader} } else if err != nil { @@ -289,10 +327,16 @@ func (r *FileDiffReader) readOneFileHeader(prefix []byte) (filename string, time parts := strings.SplitN(trimmedLine, "\t", 2) filename = parts[0] if len(parts) == 2 { + var ts time.Time // Timestamp is optional, but this header has it. - ts, err := time.Parse(diffTimeParseLayout, parts[1]) + ts, err = time.Parse(diffTimeParseLayout, parts[1]) if err != nil { - return "", nil, err + var err1 error + ts, err1 = time.Parse(diffTimeParseWithoutTZLayout, parts[1]) + if err1 != nil { + return "", nil, err + } + err = nil } timestamp = &ts } @@ -318,7 +362,7 @@ func (r *FileDiffReader) ReadExtendedHeaders() ([]string, error) { var line []byte if r.fileHeaderLine == nil { var err error - line, err = readLine(r.reader) + line, err = r.reader.readLine() if err == io.EOF { return xheaders, &ParseError{r.line, r.offset, ErrExtendedHeadersEOF} } else if err != nil { @@ -354,65 +398,194 @@ func (r *FileDiffReader) ReadExtendedHeaders() ([]string, error) { } } +// readQuotedFilename extracts a quoted filename from the beginning of a string, +// returning the unquoted filename and any remaining text after the filename. +func readQuotedFilename(text string) (value string, remainder string, err error) { + if text == "" || text[0] != '"' { + return "", "", fmt.Errorf(`string must start with a '"': %s`, text) + } + + // The end quote is the first quote NOT preceeded by an uneven number of backslashes. + numberOfBackslashes := 0 + for i, c := range text { + if c == '"' && i > 0 && numberOfBackslashes%2 == 0 { + value, err = strconv.Unquote(text[:i+1]) + remainder = text[i+1:] + return + } else if c == '\\' { + numberOfBackslashes++ + } else { + numberOfBackslashes = 0 + } + } + return "", "", fmt.Errorf(`end of string found while searching for '"': %s`, text) +} + +// parseDiffGitArgs extracts the two filenames from a 'diff --git' line. +// Returns false on syntax error, true if syntax is valid. Even with a +// valid syntax, it may be impossible to extract filenames; if so, the +// function returns ("", "", true). +func parseDiffGitArgs(diffArgs string) (string, string, bool) { + diffArgs = strings.TrimSuffix(diffArgs, "\r") + length := len(diffArgs) + if length < 3 { + return "", "", false + } + + if diffArgs[0] != '"' && diffArgs[length-1] != '"' { + // Both filenames are unquoted. + firstSpace := strings.IndexByte(diffArgs, ' ') + if firstSpace <= 0 || firstSpace == length-1 { + return "", "", false + } + + secondSpace := strings.IndexByte(diffArgs[firstSpace+1:], ' ') + if secondSpace == -1 { + if diffArgs[firstSpace+1] == '"' { + // The second filename begins with '"', but doesn't end with one. + return "", "", false + } + return diffArgs[:firstSpace], diffArgs[firstSpace+1:], true + } + + // One or both filenames contain a space, but the names are + // unquoted. Here, the 'diff --git' syntax is ambiguous, and + // we have to obtain the filenames elsewhere (e.g. from the + // hunk headers or extended headers). HOWEVER, if the file + // is newly created and empty, there IS no other place to + // find the filename. In this case, the two filenames are + // identical (except for the leading 'a/' prefix), and we have + // to handle that case here. + first := diffArgs[:length/2] + second := diffArgs[length/2+1:] + + // If the two strings could be equal, based on length, proceed. + if length%2 == 1 { + // If the name minus the a/ b/ prefixes is equal, proceed. + if len(first) >= 3 && first[1] == '/' && first[1:] == second[1:] { + return first, second, true + } + // If the names don't have the a/ and b/ prefixes and they're equal, proceed. + if !(first[:2] == "a/" && second[:2] == "b/") && first == second { + return first, second, true + } + } + + // The syntax is (unfortunately) valid, but we could not extract + // the filenames. + return "", "", true + } + + if diffArgs[0] == '"' { + first, remainder, err := readQuotedFilename(diffArgs) + if err != nil || len(remainder) < 2 || remainder[0] != ' ' { + return "", "", false + } + if remainder[1] == '"' { + second, remainder, err := readQuotedFilename(remainder[1:]) + if remainder != "" || err != nil { + return "", "", false + } + return first, second, true + } + return first, remainder[1:], true + } + + // In this case, second argument MUST be quoted (or it's a syntax error) + i := strings.IndexByte(diffArgs, '"') + if i == -1 || i+2 >= length || diffArgs[i-1] != ' ' { + return "", "", false + } + + second, remainder, err := readQuotedFilename(diffArgs[i:]) + if remainder != "" || err != nil { + return "", "", false + } + return diffArgs[:i-1], second, true +} + // handleEmpty detects when FileDiff was an empty diff and will not have any hunks // that follow. It updates fd fields from the parsed extended headers. func handleEmpty(fd *FileDiff) (wasEmpty bool) { - var err error lineCount := len(fd.Extended) if lineCount > 0 && !strings.HasPrefix(fd.Extended[0], "diff --git ") { return false } - switch { - case (lineCount == 3 || lineCount == 4 && strings.HasPrefix(fd.Extended[3], "Binary files ") || lineCount > 4 && strings.HasPrefix(fd.Extended[3], "GIT binary patch")) && - strings.HasPrefix(fd.Extended[1], "new file mode "): - names := strings.SplitN(fd.Extended[0][len("diff --git "):], " ", 2) + lineHasPrefix := func(idx int, prefix string) bool { + return strings.HasPrefix(fd.Extended[idx], prefix) + } + + linesHavePrefixes := func(idx1 int, prefix1 string, idx2 int, prefix2 string) bool { + return lineHasPrefix(idx1, prefix1) && lineHasPrefix(idx2, prefix2) + } + + isCopy := (lineCount == 4 && linesHavePrefixes(2, "copy from ", 3, "copy to ")) || + (lineCount == 6 && linesHavePrefixes(2, "copy from ", 3, "copy to ") && lineHasPrefix(5, "Binary files ")) || + (lineCount == 6 && linesHavePrefixes(1, "old mode ", 2, "new mode ") && linesHavePrefixes(4, "copy from ", 5, "copy to ")) + + isRename := (lineCount == 4 && linesHavePrefixes(2, "rename from ", 3, "rename to ")) || + (lineCount == 5 && linesHavePrefixes(2, "rename from ", 3, "rename to ") && lineHasPrefix(4, "Binary files ")) || + (lineCount == 6 && linesHavePrefixes(2, "rename from ", 3, "rename to ") && lineHasPrefix(5, "Binary files ")) || + (lineCount == 6 && linesHavePrefixes(1, "old mode ", 2, "new mode ") && linesHavePrefixes(4, "rename from ", 5, "rename to ")) + + isDeletedFile := (lineCount == 3 || lineCount == 4 && lineHasPrefix(3, "Binary files ") || lineCount > 4 && lineHasPrefix(3, "GIT binary patch")) && + lineHasPrefix(1, "deleted file mode ") + + isNewFile := (lineCount == 3 || lineCount == 4 && lineHasPrefix(3, "Binary files ") || lineCount > 4 && lineHasPrefix(3, "GIT binary patch")) && + lineHasPrefix(1, "new file mode ") + + isModeChange := lineCount == 3 && linesHavePrefixes(1, "old mode ", 2, "new mode ") + + isBinaryPatch := lineCount == 3 && lineHasPrefix(2, "Binary files ") || lineCount > 3 && lineHasPrefix(2, "GIT binary patch") + + if !isModeChange && !isCopy && !isRename && !isBinaryPatch && !isNewFile && !isDeletedFile { + return false + } + + var success bool + fd.OrigName, fd.NewName, success = parseDiffGitArgs(fd.Extended[0][len("diff --git "):]) + if isNewFile { fd.OrigName = "/dev/null" - fd.NewName, err = strconv.Unquote(names[1]) - if err != nil { - fd.NewName = names[1] - } - return true - case (lineCount == 3 || lineCount == 4 && strings.HasPrefix(fd.Extended[3], "Binary files ") || lineCount > 4 && strings.HasPrefix(fd.Extended[3], "GIT binary patch")) && - strings.HasPrefix(fd.Extended[1], "deleted file mode "): + } - names := strings.SplitN(fd.Extended[0][len("diff --git "):], " ", 2) - fd.OrigName, err = strconv.Unquote(names[0]) - if err != nil { - fd.OrigName = names[0] - } + if isDeletedFile { fd.NewName = "/dev/null" - return true - case lineCount == 4 && strings.HasPrefix(fd.Extended[2], "rename from ") && strings.HasPrefix(fd.Extended[3], "rename to "): - names := strings.SplitN(fd.Extended[0][len("diff --git "):], " ", 2) - fd.OrigName, err = strconv.Unquote(names[0]) - if err != nil { - fd.OrigName = names[0] - } - fd.NewName, err = strconv.Unquote(names[1]) - if err != nil { - fd.NewName = names[1] - } - return true - case lineCount == 6 && strings.HasPrefix(fd.Extended[5], "Binary files ") && strings.HasPrefix(fd.Extended[2], "rename from ") && strings.HasPrefix(fd.Extended[3], "rename to "): - names := strings.SplitN(fd.Extended[0][len("diff --git "):], " ", 2) - fd.OrigName = names[0] - fd.NewName = names[1] - return true - case lineCount == 3 && strings.HasPrefix(fd.Extended[2], "Binary files ") || lineCount > 3 && strings.HasPrefix(fd.Extended[2], "GIT binary patch"): - names := strings.SplitN(fd.Extended[0][len("diff --git "):], " ", 2) - fd.OrigName, err = strconv.Unquote(names[0]) - if err != nil { - fd.OrigName = names[0] + } + + // For ambiguous 'diff --git' lines, try to reconstruct filenames using extended headers. + if success && (isCopy || isRename) && fd.OrigName == "" && fd.NewName == "" { + diffArgs := fd.Extended[0][len("diff --git "):] + + tryReconstruct := func(header string, prefix string, whichFile int, result *string) { + if !strings.HasPrefix(header, prefix) { + return + } + rawFilename := header[len(prefix):] + rawFilename = strings.TrimSuffix(rawFilename, "\r") + + // extract the filename prefix (e.g. "a/") from the 'diff --git' line. + var prefixLetterIndex int + if whichFile == 1 { + prefixLetterIndex = 0 + } else if whichFile == 2 { + prefixLetterIndex = len(diffArgs) - len(rawFilename) - 2 + } + if prefixLetterIndex < 0 || diffArgs[prefixLetterIndex+1] != '/' { + return + } + + *result = diffArgs[prefixLetterIndex:prefixLetterIndex+2] + rawFilename } - fd.NewName, err = strconv.Unquote(names[1]) - if err != nil { - fd.NewName = names[1] + + for _, header := range fd.Extended { + tryReconstruct(header, "copy from ", 1, &fd.OrigName) + tryReconstruct(header, "copy to ", 2, &fd.NewName) + tryReconstruct(header, "rename from ", 1, &fd.OrigName) + tryReconstruct(header, "rename to ", 2, &fd.NewName) } - return true - default: - return false } + return success } var ( @@ -436,7 +609,12 @@ var ( // only of hunks and not include a file header; if it has a file // header, use ParseFileDiff. func ParseHunks(diff []byte) ([]*Hunk, error) { - r := NewHunksReader(bytes.NewReader(diff)) + return ParseHunksOptions(diff, ParseOptions{}) +} + +// ParseHunksOptions parses hunks from a unified diff with the given options. +func ParseHunksOptions(diff []byte, opts ParseOptions) ([]*Hunk, error) { + r := NewHunksReaderOptions(bytes.NewReader(diff), opts) hunks, err := r.ReadAllHunks() if err != nil { return nil, err @@ -447,7 +625,13 @@ func ParseHunks(diff []byte) ([]*Hunk, error) { // NewHunksReader returns a new HunksReader that reads unified diff hunks // from r. func NewHunksReader(r io.Reader) *HunksReader { - return &HunksReader{reader: bufio.NewReader(r)} + return NewHunksReaderOptions(r, ParseOptions{}) +} + +// NewHunksReaderOptions returns a new HunksReader that reads unified diff hunks +// from r with the given options. +func NewHunksReaderOptions(r io.Reader, opts ParseOptions) *HunksReader { + return &HunksReader{reader: newLineReaderOptions(r, opts)} } // A HunksReader reads hunks from a unified diff. @@ -455,7 +639,7 @@ type HunksReader struct { line int offset int64 hunk *Hunk - reader *bufio.Reader + reader *lineReader nextHunkHeaderLine []byte } @@ -474,7 +658,7 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) { line = r.nextHunkHeaderLine r.nextHunkHeaderLine = nil } else { - line, err = readLine(r.reader) + line, err = r.reader.readLine() if err != nil { if err == io.EOF && r.hunk != nil { return r.hunk, nil @@ -518,12 +702,15 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) { // If the line starts with `---` and the next one with `+++` we're // looking at a non-extended file header and need to abort. if bytes.HasPrefix(line, []byte("---")) { - ok, err := peekPrefix(r.reader, "+++") + ok, err := r.reader.nextLineStartsWith("+++") if err != nil { return r.hunk, err } if ok { - return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}} + ok2, _ := r.reader.nextNextLineStartsWith(string(hunkPrefix)) + if ok2 { + return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}} + } } } @@ -548,7 +735,7 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) { // handle that case. return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}} } - if bytes.Equal(line, []byte(noNewlineMessage)) { + if bytes.Equal(bytes.TrimSuffix(line, []byte("\r")), []byte(noNewlineMessage)) { if lastLineFromOrig { // Retain the newline in the body (otherwise the // diff line would be like "-a+b", where "+b" is @@ -593,19 +780,6 @@ func linePrefix(c byte) bool { return false } -// peekPrefix peeks into the given reader to check whether the next -// bytes match the given prefix. -func peekPrefix(reader *bufio.Reader, prefix string) (bool, error) { - next, err := reader.Peek(len(prefix)) - if err != nil { - if err == io.EOF { - return false, nil - } - return false, err - } - return bytes.HasPrefix(next, []byte(prefix)), nil -} - // normalizeHeader takes a header of the form: // "@@ -linestart[,chunksize] +linestart[,chunksize] @@ section" // and returns two strings, with the first in the form: @@ -615,6 +789,7 @@ func peekPrefix(reader *bufio.Reader, prefix string) (bool, error) { // if its value is 1. normalizeHeader returns an error if the header // is not in the correct format. func normalizeHeader(header string) (string, string, error) { + header = strings.TrimSuffix(header, "\r") // Split the header into five parts: the first '@@', the two // ranges, the last '@@', and the optional section. pieces := strings.SplitN(header, " ", 5) @@ -675,7 +850,8 @@ func parseOnlyInMessage(line []byte) (bool, []byte, []byte) { if idx < 0 { return false, nil, nil } - return true, line[:idx], line[idx+2:] + filename := bytes.TrimSuffix(line[idx+2:], []byte("\r")) + return true, line[:idx], filename } // A ParseError is a description of a unified diff syntax error. diff --git a/vendor/github.com/sourcegraph/go-diff/diff/reader_util.go b/vendor/github.com/sourcegraph/go-diff/diff/reader_util.go index 395fb7ba..3356283d 100644 --- a/vendor/github.com/sourcegraph/go-diff/diff/reader_util.go +++ b/vendor/github.com/sourcegraph/go-diff/diff/reader_util.go @@ -2,30 +2,117 @@ package diff import ( "bufio" + "bytes" + "errors" "io" ) +var ErrLineReaderUninitialized = errors.New("line reader not initialized") + +func newLineReader(r io.Reader) *lineReader { + return &lineReader{reader: bufio.NewReader(r)} +} + +func newLineReaderOptions(r io.Reader, opts ParseOptions) *lineReader { + return &lineReader{ + reader: bufio.NewReader(r), + keepCR: opts.KeepCR, + } +} + +// lineReader is a wrapper around a bufio.Reader that caches the next line to +// provide lookahead functionality for the next two lines. +type lineReader struct { + reader *bufio.Reader + + cachedNextLine []byte + cachedNextLineErr error + + keepCR bool +} + +func (l *lineReader) ensureCachedNextLine() { + if l.cachedNextLine == nil && l.cachedNextLineErr == nil { + l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR) + } +} + +// readLine returns the next unconsumed line and advances the internal cache of +// the lineReader. +func (l *lineReader) readLine() ([]byte, error) { + l.ensureCachedNextLine() + + if l.cachedNextLineErr != nil { + return nil, l.cachedNextLineErr + } + + next := l.cachedNextLine + + l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR) + + return next, nil +} + +// nextLineStartsWith looks at the line that would be returned by the next call +// to readLine to check whether it has the given prefix. +// +// io.EOF and bufio.ErrBufferFull errors are ignored so that the function can +// be used when at the end of the file. +func (l *lineReader) nextLineStartsWith(prefix string) (bool, error) { + l.ensureCachedNextLine() + + return l.lineHasPrefix(l.cachedNextLine, prefix, l.cachedNextLineErr) +} + +// nextNextLineStartsWith checks the prefix of the line *after* the line that +// would be returned by the next readLine. +// +// io.EOF and bufio.ErrBufferFull errors are ignored so that the function can +// be used when at the end of the file. +func (l *lineReader) nextNextLineStartsWith(prefix string) (bool, error) { + l.ensureCachedNextLine() + + next, err := l.reader.Peek(len(prefix)) + return l.lineHasPrefix(next, prefix, err) +} + +// lineHasPrefix checks whether the given line has the given prefix with +// bytes.HasPrefix. +// +// The readErr should be the error that was returned when the line was read. +// lineHasPrefix checks the error to adjust its return value to, e.g., return +// false and ignore the error when readErr is io.EOF. +func (l *lineReader) lineHasPrefix(line []byte, prefix string, readErr error) (bool, error) { + if readErr != nil { + if readErr == io.EOF || readErr == bufio.ErrBufferFull { + return false, nil + } + return false, readErr + } + + return bytes.HasPrefix(line, []byte(prefix)), nil +} + // readLine is a helper that mimics the functionality of calling bufio.Scanner.Scan() and // bufio.Scanner.Bytes(), but without the token size limitation. It will read and return // the next line in the Reader with the trailing newline stripped. It will return an // io.EOF error when there is nothing left to read (at the start of the function call). It // will return any other errors it receives from the underlying call to ReadBytes. -func readLine(r *bufio.Reader) ([]byte, error) { - line_, err := r.ReadBytes('\n') - if err == io.EOF { - if len(line_) == 0 { - return nil, io.EOF - } - - // ReadBytes returned io.EOF, because it didn't find another newline, but there is - // still the remainder of the file to return as a line. - line := line_ - return line, nil - } else if err != nil { +func readLine(r *bufio.Reader, keepCR bool) ([]byte, error) { + line, err := r.ReadBytes('\n') + if err == io.EOF && len(line) == 0 { + return nil, io.EOF + } + if err != nil && err != io.EOF { return nil, err } - line := line_[0 : len(line_)-1] - return dropCR(line), nil + if line[len(line)-1] == '\n' { + line = line[:len(line)-1] + } + if !keepCR { + return dropCR(line), nil + } + return line, nil } // dropCR drops a terminal \r from the data. diff --git a/vendor/github.com/sourcegraph/go-diff/diff/reverse.go b/vendor/github.com/sourcegraph/go-diff/diff/reverse.go new file mode 100644 index 00000000..87715efb --- /dev/null +++ b/vendor/github.com/sourcegraph/go-diff/diff/reverse.go @@ -0,0 +1,192 @@ +package diff + +import ( + "bytes" + "errors" + "fmt" +) + +// ReverseFileDiff takes a diff.FileDiff, and returns the reverse operation. +// This is a FileDiff that undoes the edit of the original. +func ReverseFileDiff(fd *FileDiff) (*FileDiff, error) { + reverse := FileDiff{ + OrigName: fd.NewName, + OrigTime: fd.NewTime, + NewName: fd.OrigName, + NewTime: fd.OrigTime, + Extended: fd.Extended, + } + for _, hunk := range fd.Hunks { + invHunk, err := reverseHunk(hunk) + if err != nil { + return nil, err + } + reverse.Hunks = append(reverse.Hunks, invHunk) + } + return &reverse, nil +} + +// ReverseMultiFileDiff reverses a series of FileDiffs. +func ReverseMultiFileDiff(fds []*FileDiff) ([]*FileDiff, error) { + var reverse []*FileDiff + for _, fd := range fds { + r, err := ReverseFileDiff(fd) + if err != nil { + return nil, err + } + reverse = append(reverse, r) + } + return reverse, nil +} + +// A subhunk represents a portion of a Hunk.Body, split into three sections. +// It consists of zero or more context lines, followed by zero or more orig +// lines and then zero or more new lines. +// +// Each line is stored WITHOUT its starting character, but with the newlines +// included. The final entry in a section may be missing a trailing newline. +// +// A missing newline in orig is represented in a Hunk by OrigNoNewlineAt, +// but is represented here as a missing newline. +type contextLine struct { + body []byte + bare bool +} + +type subhunk struct { + context []contextLine + orig [][]byte + new [][]byte +} + +// reverseHunk converts a Hunk into its reverse operation. +func reverseHunk(forward *Hunk) (*Hunk, error) { + reverse := Hunk{ + OrigStartLine: forward.NewStartLine, + OrigLines: forward.NewLines, + OrigNoNewlineAt: 0, // we may change this below + NewStartLine: forward.OrigStartLine, + NewLines: forward.OrigLines, + Section: forward.Section, + StartPosition: forward.StartPosition, + } + subs, err := toSubhunks(forward) + if err != nil { + return nil, err + } + for _, sub := range subs { + invSub := subhunk{ + context: sub.context, + orig: sub.new, + new: sub.orig, + } + for _, line := range invSub.context { + if line.bare { + reverse.Body = append(reverse.Body, line.body...) + continue + } + reverse.Body = append(reverse.Body, ' ') + reverse.Body = append(reverse.Body, line.body...) + } + for _, line := range invSub.orig { + reverse.Body = append(reverse.Body, '-') + reverse.Body = append(reverse.Body, line...) + } + if len(invSub.orig) > 0 && reverse.Body[len(reverse.Body)-1] != '\n' { + // There was a missing newline in `orig`, which we encode in a + // hunk with an offset. + reverse.Body = append(reverse.Body, '\n') + reverse.OrigNoNewlineAt = int32(len(reverse.Body)) + } + for _, line := range invSub.new { + reverse.Body = append(reverse.Body, '+') + reverse.Body = append(reverse.Body, line...) + } + } + return &reverse, nil +} + +func extractContextLines(from *[]byte) []contextLine { + var lines []contextLine + for len(*from) > 0 { + if (*from)[0] == '\n' { + lines = append(lines, contextLine{body: []byte{'\n'}, bare: true}) + *from = (*from)[1:] + continue + } + if (*from)[0] != ' ' { + break + } + + newline := bytes.IndexByte(*from, '\n') + if newline < 0 { + lines = append(lines, contextLine{body: (*from)[1:]}) + *from = nil + continue + } + + lines = append(lines, contextLine{body: (*from)[1 : newline+1]}) + *from = (*from)[newline+1:] + } + return lines +} + +func extractLinesStartingWith(from *[]byte, startingWith byte) [][]byte { + var lines [][]byte + for len(*from) > 0 { + if (*from)[0] != startingWith { + break + } + + newline := bytes.IndexByte(*from, '\n') + if newline < 0 { + lines = append(lines, (*from)[1:]) + *from = nil + continue + } + + lines = append(lines, (*from)[1:newline+1]) + *from = (*from)[newline+1:] + } + return lines +} + +// Extracts the subhunks from a diff.Hunk. +// +// This groups a Hunk's buffer into one or more subhunks, matching the conditions +// of `subhunk` above. This function groups, strips prefix characters, and strips +// a newline for `OrigNoNewlineAt` if necessary. +func toSubhunks(hunk *Hunk) ([]subhunk, error) { + var body []byte = hunk.Body + var subhunks []subhunk + if len(body) == 0 { + return nil, nil + } + for len(body) > 0 { + sh := subhunk{ + context: extractContextLines(&body), + orig: extractLinesStartingWith(&body, '-'), + new: extractLinesStartingWith(&body, '+'), + } + if len(sh.context) == 0 && len(sh.orig) == 0 && len(sh.new) == 0 { + // The first line didn't start with any expected prefix. + return nil, fmt.Errorf("unexpected character %q at start of line", body[0]) + } + subhunks = append(subhunks, sh) + } + if hunk.OrigNoNewlineAt > 0 { + // The Hunk represents a missing newline at the end of an "orig" line with a + // OrigNoNewlineAt index. We represent it here as an actual missing newline. + var lastSubhunk *subhunk = &subhunks[len(subhunks)-1] + s := len(lastSubhunk.orig) + if s == 0 { + return nil, errors.New("inconsistent OrigNoNewlineAt in input") + } + var cut bool + lastSubhunk.orig[s-1], cut = bytes.CutSuffix(lastSubhunk.orig[s-1], []byte("\n")) + if !cut { + return nil, errors.New("missing newline in input") + } + } + return subhunks, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index f2a1c645..39d739e9 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -162,8 +162,8 @@ github.com/sourcegraph/conc github.com/sourcegraph/conc/internal/multierror github.com/sourcegraph/conc/iter github.com/sourcegraph/conc/panics -# github.com/sourcegraph/go-diff v0.6.1 -## explicit; go 1.14 +# github.com/sourcegraph/go-diff v0.8.0 +## explicit; go 1.20 github.com/sourcegraph/go-diff/diff # github.com/spf13/afero v1.12.0 ## explicit; go 1.21