peridot/vendor/github.com/bluekeyes/go-gitdiff/gitdiff/file_header.go
2022-07-07 22:13:21 +02:00

470 lines
12 KiB
Go

package gitdiff
import (
"fmt"
"io"
"os"
"strconv"
"strings"
"time"
)
const (
devNull = "/dev/null"
)
// ParseNextFileHeader finds and parses the next file header in the stream. If
// a header is found, it returns a file and all input before the header. It
// returns nil if no headers are found before the end of the input.
func (p *parser) ParseNextFileHeader() (*File, string, error) {
var preamble strings.Builder
var file *File
for {
// check for disconnected fragment headers (corrupt patch)
frag, err := p.ParseTextFragmentHeader()
if err != nil {
// not a valid header, nothing to worry about
goto NextLine
}
if frag != nil {
return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header())
}
// check for a git-generated patch
file, err = p.ParseGitFileHeader()
if err != nil {
return nil, "", err
}
if file != nil {
return file, preamble.String(), nil
}
// check for a "traditional" patch
file, err = p.ParseTraditionalFileHeader()
if err != nil {
return nil, "", err
}
if file != nil {
return file, preamble.String(), nil
}
NextLine:
preamble.WriteString(p.Line(0))
if err := p.Next(); err != nil {
if err == io.EOF {
break
}
return nil, "", err
}
}
return nil, "", nil
}
func (p *parser) ParseGitFileHeader() (*File, error) {
const prefix = "diff --git "
if !strings.HasPrefix(p.Line(0), prefix) {
return nil, nil
}
header := p.Line(0)[len(prefix):]
defaultName, err := parseGitHeaderName(header)
if err != nil {
return nil, p.Errorf(0, "git file header: %v", err)
}
f := &File{}
for {
end, err := parseGitHeaderData(f, p.Line(1), defaultName)
if err != nil {
return nil, p.Errorf(1, "git file header: %v", err)
}
if err := p.Next(); err != nil {
if err == io.EOF {
break
}
return nil, err
}
if end {
break
}
}
if f.OldName == "" && f.NewName == "" {
if defaultName == "" {
return nil, p.Errorf(0, "git file header: missing filename information")
}
f.OldName = defaultName
f.NewName = defaultName
}
if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) {
return nil, p.Errorf(0, "git file header: missing filename information")
}
return f, nil
}
func (p *parser) ParseTraditionalFileHeader() (*File, error) {
const shortestValidFragHeader = "@@ -1 +1 @@\n"
const (
oldPrefix = "--- "
newPrefix = "+++ "
)
oldLine, newLine := p.Line(0), p.Line(1)
if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) {
return nil, nil
}
// heuristic: only a file header if followed by a (probable) fragment header
if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") {
return nil, nil
}
// advance past the first two lines so parser is after the header
// no EOF check needed because we know there are >=3 valid lines
if err := p.Next(); err != nil {
return nil, err
}
if err := p.Next(); err != nil {
return nil, err
}
oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0)
if err != nil {
return nil, p.Errorf(0, "file header: %v", err)
}
newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0)
if err != nil {
return nil, p.Errorf(1, "file header: %v", err)
}
f := &File{}
switch {
case oldName == devNull || hasEpochTimestamp(oldLine):
f.IsNew = true
f.NewName = newName
case newName == devNull || hasEpochTimestamp(newLine):
f.IsDelete = true
f.OldName = oldName
default:
// if old name is a prefix of new name, use that instead
// this avoids picking variants like "file.bak" or "file~"
if strings.HasPrefix(newName, oldName) {
f.OldName = oldName
f.NewName = oldName
} else {
f.OldName = newName
f.NewName = newName
}
}
return f, nil
}
// parseGitHeaderName extracts a default file name from the Git file header
// line. This is required for mode-only changes and creation/deletion of empty
// files. Other types of patch include the file name(s) in the header data.
// If the names in the header do not match because the patch is a rename,
// return an empty default name.
func parseGitHeaderName(header string) (string, error) {
firstName, n, err := parseName(header, -1, 1)
if err != nil {
return "", err
}
if n < len(header) && (header[n] == ' ' || header[n] == '\t') {
n++
}
secondName, _, err := parseName(header[n:], -1, 1)
if err != nil {
return "", err
}
if firstName != secondName {
return "", nil
}
return firstName, nil
}
// parseGitHeaderData parses a single line of metadata from a Git file header.
// It returns true when header parsing is complete; in that case, line was the
// first line of non-header content.
func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) {
if len(line) > 0 && line[len(line)-1] == '\n' {
line = line[:len(line)-1]
}
for _, hdr := range []struct {
prefix string
end bool
parse func(*File, string, string) error
}{
{"@@ -", true, nil},
{"--- ", false, parseGitHeaderOldName},
{"+++ ", false, parseGitHeaderNewName},
{"old mode ", false, parseGitHeaderOldMode},
{"new mode ", false, parseGitHeaderNewMode},
{"deleted file mode ", false, parseGitHeaderDeletedMode},
{"new file mode ", false, parseGitHeaderCreatedMode},
{"copy from ", false, parseGitHeaderCopyFrom},
{"copy to ", false, parseGitHeaderCopyTo},
{"rename old ", false, parseGitHeaderRenameFrom},
{"rename new ", false, parseGitHeaderRenameTo},
{"rename from ", false, parseGitHeaderRenameFrom},
{"rename to ", false, parseGitHeaderRenameTo},
{"similarity index ", false, parseGitHeaderScore},
{"dissimilarity index ", false, parseGitHeaderScore},
{"index ", false, parseGitHeaderIndex},
} {
if strings.HasPrefix(line, hdr.prefix) {
if hdr.parse != nil {
err = hdr.parse(f, line[len(hdr.prefix):], defaultName)
}
return hdr.end, err
}
}
// unknown line indicates the end of the header
// this usually happens if the diff is empty
return true, nil
}
func parseGitHeaderOldName(f *File, line, defaultName string) error {
name, _, err := parseName(line, '\t', 1)
if err != nil {
return err
}
if f.OldName == "" && !f.IsNew {
f.OldName = name
return nil
}
return verifyGitHeaderName(name, f.OldName, f.IsNew, "old")
}
func parseGitHeaderNewName(f *File, line, defaultName string) error {
name, _, err := parseName(line, '\t', 1)
if err != nil {
return err
}
if f.NewName == "" && !f.IsDelete {
f.NewName = name
return nil
}
return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new")
}
func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) {
f.OldMode, err = parseMode(line)
return
}
func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) {
f.NewMode, err = parseMode(line)
return
}
func parseGitHeaderDeletedMode(f *File, line, defaultName string) error {
f.IsDelete = true
f.OldName = defaultName
return parseGitHeaderOldMode(f, line, defaultName)
}
func parseGitHeaderCreatedMode(f *File, line, defaultName string) error {
f.IsNew = true
f.NewName = defaultName
return parseGitHeaderNewMode(f, line, defaultName)
}
func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) {
f.IsCopy = true
f.OldName, _, err = parseName(line, -1, 0)
return
}
func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) {
f.IsCopy = true
f.NewName, _, err = parseName(line, -1, 0)
return
}
func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) {
f.IsRename = true
f.OldName, _, err = parseName(line, -1, 0)
return
}
func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) {
f.IsRename = true
f.NewName, _, err = parseName(line, -1, 0)
return
}
func parseGitHeaderScore(f *File, line, defaultName string) error {
score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32)
if err != nil {
nerr := err.(*strconv.NumError)
return fmt.Errorf("invalid score line: %v", nerr.Err)
}
if score <= 100 {
f.Score = int(score)
}
return nil
}
func parseGitHeaderIndex(f *File, line, defaultName string) error {
const sep = ".."
// note that git stops parsing if the OIDs are too long to be valid
// checking this requires knowing if the repository uses SHA1 or SHA256
// hashes, which we don't know, so we just skip that check
parts := strings.SplitN(line, " ", 2)
oids := strings.SplitN(parts[0], sep, 2)
if len(oids) < 2 {
return fmt.Errorf("invalid index line: missing %q", sep)
}
f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1]
if len(parts) > 1 {
return parseGitHeaderOldMode(f, parts[1], defaultName)
}
return nil
}
func parseMode(s string) (os.FileMode, error) {
mode, err := strconv.ParseInt(s, 8, 32)
if err != nil {
nerr := err.(*strconv.NumError)
return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err)
}
return os.FileMode(mode), nil
}
// parseName extracts a file name from the start of a string and returns the
// name and the index of the first character after the name. If the name is
// unquoted and term is non-negative, parsing stops at the first occurrence of
// term. Otherwise parsing of unquoted names stops at the first space or tab.
//
// If the name is exactly "/dev/null", no further processing occurs. Otherwise,
// if dropPrefix is greater than zero, that number of prefix components
// separated by forward slashes are dropped from the name and any duplicate
// slashes are collapsed.
func parseName(s string, term rune, dropPrefix int) (name string, n int, err error) {
if len(s) > 0 && s[0] == '"' {
name, n, err = parseQuotedName(s)
} else {
name, n, err = parseUnquotedName(s, term)
}
if err != nil {
return "", 0, err
}
if name == devNull {
return name, n, nil
}
return cleanName(name, dropPrefix), n, nil
}
func parseQuotedName(s string) (name string, n int, err error) {
for n = 1; n < len(s); n++ {
if s[n] == '"' && s[n-1] != '\\' {
n++
break
}
}
if n == 2 {
return "", 0, fmt.Errorf("missing name")
}
if name, err = strconv.Unquote(s[:n]); err != nil {
return "", 0, err
}
return name, n, err
}
func parseUnquotedName(s string, term rune) (name string, n int, err error) {
for n = 0; n < len(s); n++ {
if s[n] == '\n' {
break
}
if term >= 0 && rune(s[n]) == term {
break
}
if term < 0 && (s[n] == ' ' || s[n] == '\t') {
break
}
}
if n == 0 {
return "", 0, fmt.Errorf("missing name")
}
return s[:n], n, nil
}
// verifyGitHeaderName checks a parsed name against state set by previous lines
func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error {
if existing != "" {
if isNull {
return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing)
}
if existing != parsed {
return fmt.Errorf("inconsistent %s filename", side)
}
}
if isNull && parsed != devNull {
return fmt.Errorf("expected %s", devNull)
}
return nil
}
// cleanName removes double slashes and drops prefix segments.
func cleanName(name string, drop int) string {
var b strings.Builder
for i := 0; i < len(name); i++ {
if name[i] == '/' {
if i < len(name)-1 && name[i+1] == '/' {
continue
}
if drop > 0 {
drop--
b.Reset()
continue
}
}
b.WriteByte(name[i])
}
return b.String()
}
// hasEpochTimestamp returns true if the string ends with a POSIX-formatted
// timestamp for the UNIX epoch after a tab character. According to git, this
// is used by GNU diff to mark creations and deletions.
func hasEpochTimestamp(s string) bool {
const posixTimeLayout = "2006-01-02 15:04:05.9 -0700"
start := strings.IndexRune(s, '\t')
if start < 0 {
return false
}
ts := strings.TrimSuffix(s[start+1:], "\n")
// a valid timestamp can have optional ':' in zone specifier
// remove that if it exists so we have a single format
if ts[len(ts)-3] == ':' {
ts = ts[:len(ts)-3] + ts[len(ts)-2:]
}
t, err := time.Parse(posixTimeLayout, ts)
if err != nil {
return false
}
if !t.Equal(time.Unix(0, 0)) {
return false
}
return true
}