mirror of
https://github.com/rocky-linux/peridot.git
synced 2024-11-27 15:36:25 +00:00
547 lines
13 KiB
Go
547 lines
13 KiB
Go
package gitdiff
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
devNull = "/dev/null"
|
|
)
|
|
|
|
// ParseNextFileHeader finds and parses the next file header in the stream. If
|
|
// a header is found, it returns a file and all input before the header. It
|
|
// returns nil if no headers are found before the end of the input.
|
|
func (p *parser) ParseNextFileHeader() (*File, string, error) {
|
|
var preamble strings.Builder
|
|
var file *File
|
|
for {
|
|
// check for disconnected fragment headers (corrupt patch)
|
|
frag, err := p.ParseTextFragmentHeader()
|
|
if err != nil {
|
|
// not a valid header, nothing to worry about
|
|
goto NextLine
|
|
}
|
|
if frag != nil {
|
|
return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header())
|
|
}
|
|
|
|
// check for a git-generated patch
|
|
file, err = p.ParseGitFileHeader()
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
if file != nil {
|
|
return file, preamble.String(), nil
|
|
}
|
|
|
|
// check for a "traditional" patch
|
|
file, err = p.ParseTraditionalFileHeader()
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
if file != nil {
|
|
return file, preamble.String(), nil
|
|
}
|
|
|
|
NextLine:
|
|
preamble.WriteString(p.Line(0))
|
|
if err := p.Next(); err != nil {
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
return nil, "", err
|
|
}
|
|
}
|
|
return nil, "", nil
|
|
}
|
|
|
|
func (p *parser) ParseGitFileHeader() (*File, error) {
|
|
const prefix = "diff --git "
|
|
|
|
if !strings.HasPrefix(p.Line(0), prefix) {
|
|
return nil, nil
|
|
}
|
|
header := p.Line(0)[len(prefix):]
|
|
|
|
defaultName, err := parseGitHeaderName(header)
|
|
if err != nil {
|
|
return nil, p.Errorf(0, "git file header: %v", err)
|
|
}
|
|
|
|
f := &File{}
|
|
for {
|
|
end, err := parseGitHeaderData(f, p.Line(1), defaultName)
|
|
if err != nil {
|
|
return nil, p.Errorf(1, "git file header: %v", err)
|
|
}
|
|
|
|
if err := p.Next(); err != nil {
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
if end {
|
|
break
|
|
}
|
|
}
|
|
|
|
if f.OldName == "" && f.NewName == "" {
|
|
if defaultName == "" {
|
|
return nil, p.Errorf(0, "git file header: missing filename information")
|
|
}
|
|
f.OldName = defaultName
|
|
f.NewName = defaultName
|
|
}
|
|
|
|
if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) {
|
|
return nil, p.Errorf(0, "git file header: missing filename information")
|
|
}
|
|
|
|
return f, nil
|
|
}
|
|
|
|
func (p *parser) ParseTraditionalFileHeader() (*File, error) {
|
|
const shortestValidFragHeader = "@@ -1 +1 @@\n"
|
|
const (
|
|
oldPrefix = "--- "
|
|
newPrefix = "+++ "
|
|
)
|
|
|
|
oldLine, newLine := p.Line(0), p.Line(1)
|
|
|
|
if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) {
|
|
return nil, nil
|
|
}
|
|
// heuristic: only a file header if followed by a (probable) fragment header
|
|
if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") {
|
|
return nil, nil
|
|
}
|
|
|
|
// advance past the first two lines so parser is after the header
|
|
// no EOF check needed because we know there are >=3 valid lines
|
|
if err := p.Next(); err != nil {
|
|
return nil, err
|
|
}
|
|
if err := p.Next(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0)
|
|
if err != nil {
|
|
return nil, p.Errorf(0, "file header: %v", err)
|
|
}
|
|
|
|
newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0)
|
|
if err != nil {
|
|
return nil, p.Errorf(1, "file header: %v", err)
|
|
}
|
|
|
|
f := &File{}
|
|
switch {
|
|
case oldName == devNull || hasEpochTimestamp(oldLine):
|
|
f.IsNew = true
|
|
f.NewName = newName
|
|
case newName == devNull || hasEpochTimestamp(newLine):
|
|
f.IsDelete = true
|
|
f.OldName = oldName
|
|
default:
|
|
// if old name is a prefix of new name, use that instead
|
|
// this avoids picking variants like "file.bak" or "file~"
|
|
if strings.HasPrefix(newName, oldName) {
|
|
f.OldName = oldName
|
|
f.NewName = oldName
|
|
} else {
|
|
f.OldName = newName
|
|
f.NewName = newName
|
|
}
|
|
}
|
|
|
|
return f, nil
|
|
}
|
|
|
|
// parseGitHeaderName extracts a default file name from the Git file header
|
|
// line. This is required for mode-only changes and creation/deletion of empty
|
|
// files. Other types of patch include the file name(s) in the header data.
|
|
// If the names in the header do not match because the patch is a rename,
|
|
// return an empty default name.
|
|
func parseGitHeaderName(header string) (string, error) {
|
|
header = strings.TrimSuffix(header, "\n")
|
|
if len(header) == 0 {
|
|
return "", nil
|
|
}
|
|
|
|
var err error
|
|
var first, second string
|
|
|
|
// there are 4 cases to account for:
|
|
//
|
|
// 1) unquoted unquoted
|
|
// 2) unquoted "quoted"
|
|
// 3) "quoted" unquoted
|
|
// 4) "quoted" "quoted"
|
|
//
|
|
quote := strings.IndexByte(header, '"')
|
|
switch {
|
|
case quote < 0:
|
|
// case 1
|
|
first = header
|
|
|
|
case quote > 0:
|
|
// case 2
|
|
first = header[:quote-1]
|
|
if !isSpace(header[quote-1]) {
|
|
return "", fmt.Errorf("missing separator")
|
|
}
|
|
|
|
second, _, err = parseQuotedName(header[quote:])
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
case quote == 0:
|
|
// case 3 or case 4
|
|
var n int
|
|
first, n, err = parseQuotedName(header)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// git accepts multiple spaces after a quoted name, but not after an
|
|
// unquoted name, since the name might end with one or more spaces
|
|
for n < len(header) && isSpace(header[n]) {
|
|
n++
|
|
}
|
|
if n == len(header) {
|
|
return "", nil
|
|
}
|
|
|
|
if header[n] == '"' {
|
|
second, _, err = parseQuotedName(header[n:])
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
} else {
|
|
second = header[n:]
|
|
}
|
|
}
|
|
|
|
first = trimTreePrefix(first, 1)
|
|
if second != "" {
|
|
if first == trimTreePrefix(second, 1) {
|
|
return first, nil
|
|
}
|
|
return "", nil
|
|
}
|
|
|
|
// at this point, both names are unquoted (case 1)
|
|
// since names may contain spaces, we can't use a known separator
|
|
// instead, look for a split that produces two equal names
|
|
|
|
for i := 0; i < len(first)-1; i++ {
|
|
if !isSpace(first[i]) {
|
|
continue
|
|
}
|
|
second = trimTreePrefix(first[i+1:], 1)
|
|
if name := first[:i]; name == second {
|
|
return name, nil
|
|
}
|
|
}
|
|
return "", nil
|
|
}
|
|
|
|
// parseGitHeaderData parses a single line of metadata from a Git file header.
|
|
// It returns true when header parsing is complete; in that case, line was the
|
|
// first line of non-header content.
|
|
func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) {
|
|
if len(line) > 0 && line[len(line)-1] == '\n' {
|
|
line = line[:len(line)-1]
|
|
}
|
|
|
|
for _, hdr := range []struct {
|
|
prefix string
|
|
end bool
|
|
parse func(*File, string, string) error
|
|
}{
|
|
{"@@ -", true, nil},
|
|
{"--- ", false, parseGitHeaderOldName},
|
|
{"+++ ", false, parseGitHeaderNewName},
|
|
{"old mode ", false, parseGitHeaderOldMode},
|
|
{"new mode ", false, parseGitHeaderNewMode},
|
|
{"deleted file mode ", false, parseGitHeaderDeletedMode},
|
|
{"new file mode ", false, parseGitHeaderCreatedMode},
|
|
{"copy from ", false, parseGitHeaderCopyFrom},
|
|
{"copy to ", false, parseGitHeaderCopyTo},
|
|
{"rename old ", false, parseGitHeaderRenameFrom},
|
|
{"rename new ", false, parseGitHeaderRenameTo},
|
|
{"rename from ", false, parseGitHeaderRenameFrom},
|
|
{"rename to ", false, parseGitHeaderRenameTo},
|
|
{"similarity index ", false, parseGitHeaderScore},
|
|
{"dissimilarity index ", false, parseGitHeaderScore},
|
|
{"index ", false, parseGitHeaderIndex},
|
|
} {
|
|
if strings.HasPrefix(line, hdr.prefix) {
|
|
if hdr.parse != nil {
|
|
err = hdr.parse(f, line[len(hdr.prefix):], defaultName)
|
|
}
|
|
return hdr.end, err
|
|
}
|
|
}
|
|
|
|
// unknown line indicates the end of the header
|
|
// this usually happens if the diff is empty
|
|
return true, nil
|
|
}
|
|
|
|
func parseGitHeaderOldName(f *File, line, defaultName string) error {
|
|
name, _, err := parseName(line, '\t', 1)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if f.OldName == "" && !f.IsNew {
|
|
f.OldName = name
|
|
return nil
|
|
}
|
|
return verifyGitHeaderName(name, f.OldName, f.IsNew, "old")
|
|
}
|
|
|
|
func parseGitHeaderNewName(f *File, line, defaultName string) error {
|
|
name, _, err := parseName(line, '\t', 1)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if f.NewName == "" && !f.IsDelete {
|
|
f.NewName = name
|
|
return nil
|
|
}
|
|
return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new")
|
|
}
|
|
|
|
func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) {
|
|
f.OldMode, err = parseMode(strings.TrimSpace(line))
|
|
return
|
|
}
|
|
|
|
func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) {
|
|
f.NewMode, err = parseMode(strings.TrimSpace(line))
|
|
return
|
|
}
|
|
|
|
func parseGitHeaderDeletedMode(f *File, line, defaultName string) error {
|
|
f.IsDelete = true
|
|
f.OldName = defaultName
|
|
return parseGitHeaderOldMode(f, line, defaultName)
|
|
}
|
|
|
|
func parseGitHeaderCreatedMode(f *File, line, defaultName string) error {
|
|
f.IsNew = true
|
|
f.NewName = defaultName
|
|
return parseGitHeaderNewMode(f, line, defaultName)
|
|
}
|
|
|
|
func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) {
|
|
f.IsCopy = true
|
|
f.OldName, _, err = parseName(line, 0, 0)
|
|
return
|
|
}
|
|
|
|
func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) {
|
|
f.IsCopy = true
|
|
f.NewName, _, err = parseName(line, 0, 0)
|
|
return
|
|
}
|
|
|
|
func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) {
|
|
f.IsRename = true
|
|
f.OldName, _, err = parseName(line, 0, 0)
|
|
return
|
|
}
|
|
|
|
func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) {
|
|
f.IsRename = true
|
|
f.NewName, _, err = parseName(line, 0, 0)
|
|
return
|
|
}
|
|
|
|
func parseGitHeaderScore(f *File, line, defaultName string) error {
|
|
score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32)
|
|
if err != nil {
|
|
nerr := err.(*strconv.NumError)
|
|
return fmt.Errorf("invalid score line: %v", nerr.Err)
|
|
}
|
|
if score <= 100 {
|
|
f.Score = int(score)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func parseGitHeaderIndex(f *File, line, defaultName string) error {
|
|
const sep = ".."
|
|
|
|
// note that git stops parsing if the OIDs are too long to be valid
|
|
// checking this requires knowing if the repository uses SHA1 or SHA256
|
|
// hashes, which we don't know, so we just skip that check
|
|
|
|
parts := strings.SplitN(line, " ", 2)
|
|
oids := strings.SplitN(parts[0], sep, 2)
|
|
|
|
if len(oids) < 2 {
|
|
return fmt.Errorf("invalid index line: missing %q", sep)
|
|
}
|
|
f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1]
|
|
|
|
if len(parts) > 1 {
|
|
return parseGitHeaderOldMode(f, parts[1], defaultName)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func parseMode(s string) (os.FileMode, error) {
|
|
mode, err := strconv.ParseInt(s, 8, 32)
|
|
if err != nil {
|
|
nerr := err.(*strconv.NumError)
|
|
return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err)
|
|
}
|
|
return os.FileMode(mode), nil
|
|
}
|
|
|
|
// parseName extracts a file name from the start of a string and returns the
|
|
// name and the index of the first character after the name. If the name is
|
|
// unquoted and term is non-zero, parsing stops at the first occurrence of
|
|
// term.
|
|
//
|
|
// If the name is exactly "/dev/null", no further processing occurs. Otherwise,
|
|
// if dropPrefix is greater than zero, that number of prefix components
|
|
// separated by forward slashes are dropped from the name and any duplicate
|
|
// slashes are collapsed.
|
|
func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) {
|
|
if len(s) > 0 && s[0] == '"' {
|
|
name, n, err = parseQuotedName(s)
|
|
} else {
|
|
name, n, err = parseUnquotedName(s, term)
|
|
}
|
|
if err != nil {
|
|
return "", 0, err
|
|
}
|
|
if name == devNull {
|
|
return name, n, nil
|
|
}
|
|
return cleanName(name, dropPrefix), n, nil
|
|
}
|
|
|
|
func parseQuotedName(s string) (name string, n int, err error) {
|
|
for n = 1; n < len(s); n++ {
|
|
if s[n] == '"' && s[n-1] != '\\' {
|
|
n++
|
|
break
|
|
}
|
|
}
|
|
if n == 2 {
|
|
return "", 0, fmt.Errorf("missing name")
|
|
}
|
|
if name, err = strconv.Unquote(s[:n]); err != nil {
|
|
return "", 0, err
|
|
}
|
|
return name, n, err
|
|
}
|
|
|
|
func parseUnquotedName(s string, term byte) (name string, n int, err error) {
|
|
for n = 0; n < len(s); n++ {
|
|
if s[n] == '\n' {
|
|
break
|
|
}
|
|
if term > 0 && s[n] == term {
|
|
break
|
|
}
|
|
}
|
|
if n == 0 {
|
|
return "", 0, fmt.Errorf("missing name")
|
|
}
|
|
return s[:n], n, nil
|
|
}
|
|
|
|
// verifyGitHeaderName checks a parsed name against state set by previous lines
|
|
func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error {
|
|
if existing != "" {
|
|
if isNull {
|
|
return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing)
|
|
}
|
|
if existing != parsed {
|
|
return fmt.Errorf("inconsistent %s filename", side)
|
|
}
|
|
}
|
|
if isNull && parsed != devNull {
|
|
return fmt.Errorf("expected %s", devNull)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// cleanName removes double slashes and drops prefix segments.
|
|
func cleanName(name string, drop int) string {
|
|
var b strings.Builder
|
|
for i := 0; i < len(name); i++ {
|
|
if name[i] == '/' {
|
|
if i < len(name)-1 && name[i+1] == '/' {
|
|
continue
|
|
}
|
|
if drop > 0 {
|
|
drop--
|
|
b.Reset()
|
|
continue
|
|
}
|
|
}
|
|
b.WriteByte(name[i])
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
// trimTreePrefix removes up to n leading directory components from name.
|
|
func trimTreePrefix(name string, n int) string {
|
|
i := 0
|
|
for ; i < len(name) && n > 0; i++ {
|
|
if name[i] == '/' {
|
|
n--
|
|
}
|
|
}
|
|
return name[i:]
|
|
}
|
|
|
|
// hasEpochTimestamp returns true if the string ends with a POSIX-formatted
|
|
// timestamp for the UNIX epoch after a tab character. According to git, this
|
|
// is used by GNU diff to mark creations and deletions.
|
|
func hasEpochTimestamp(s string) bool {
|
|
const posixTimeLayout = "2006-01-02 15:04:05.9 -0700"
|
|
|
|
start := strings.IndexRune(s, '\t')
|
|
if start < 0 {
|
|
return false
|
|
}
|
|
|
|
ts := strings.TrimSuffix(s[start+1:], "\n")
|
|
|
|
// a valid timestamp can have optional ':' in zone specifier
|
|
// remove that if it exists so we have a single format
|
|
if len(ts) >= 3 && ts[len(ts)-3] == ':' {
|
|
ts = ts[:len(ts)-3] + ts[len(ts)-2:]
|
|
}
|
|
|
|
t, err := time.Parse(posixTimeLayout, ts)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
if !t.Equal(time.Unix(0, 0)) {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func isSpace(c byte) bool {
|
|
return c == ' ' || c == '\t' || c == '\n'
|
|
}
|