peridot/vendor/github.com/bluekeyes/go-gitdiff/gitdiff/patch_header.go
2022-07-07 22:13:21 +02:00

456 lines
11 KiB
Go

package gitdiff
import (
"bufio"
"errors"
"fmt"
"io"
"net/mail"
"strconv"
"strings"
"time"
"unicode"
)
const (
mailHeaderPrefix = "From "
prettyHeaderPrefix = "commit "
)
// PatchHeader is a parsed version of the preamble content that appears before
// the first diff in a patch. It includes metadata about the patch, such as the
// author and a subject.
type PatchHeader struct {
// The SHA of the commit the patch was generated from. Empty if the SHA is
// not included in the header.
SHA string
// The author details of the patch. If these details are not included in
// the header, Author is nil and AuthorDate is the zero time.
Author *PatchIdentity
AuthorDate time.Time
// The committer details of the patch. If these details are not included in
// the header, Committer is nil and CommitterDate is the zero time.
Committer *PatchIdentity
CommitterDate time.Time
// The title and body of the commit message describing the changes in the
// patch. Empty if no message is included in the header.
Title string
Body string
// If the preamble looks like an email, ParsePatchHeader will
// remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the
// Title and place them here.
SubjectPrefix string
// If the preamble looks like an email, and it contains a `---`
// line, that line will be removed and everything after it will be
// placed in BodyAppendix.
BodyAppendix string
}
// Message returns the commit message for the header. The message consists of
// the title and the body separated by an empty line.
func (h *PatchHeader) Message() string {
var msg strings.Builder
if h != nil {
msg.WriteString(h.Title)
if h.Body != "" {
msg.WriteString("\n\n")
msg.WriteString(h.Body)
}
}
return msg.String()
}
// PatchIdentity identifies a person who authored or committed a patch.
type PatchIdentity struct {
Name string
Email string
}
func (i PatchIdentity) String() string {
name := i.Name
if name == "" {
name = `""`
}
return fmt.Sprintf("%s <%s>", name, i.Email)
}
// ParsePatchIdentity parses a patch identity string. A valid string contains a
// non-empty name followed by an email address in angle brackets. Like Git,
// ParsePatchIdentity does not require that the email address is valid or
// properly formatted, only that it is non-empty. The name must not contain a
// left angle bracket, '<', and the email address must not contain a right
// angle bracket, '>'.
func ParsePatchIdentity(s string) (PatchIdentity, error) {
var emailStart, emailEnd int
for i, c := range s {
if c == '<' && emailStart == 0 {
emailStart = i + 1
}
if c == '>' && emailStart > 0 {
emailEnd = i
break
}
}
if emailStart > 0 && emailEnd == 0 {
return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s)
}
var name, email string
if emailStart > 0 {
name = strings.TrimSpace(s[:emailStart-1])
}
if emailStart > 0 && emailEnd > 0 {
email = strings.TrimSpace(s[emailStart:emailEnd])
}
if name == "" || email == "" {
return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s)
}
return PatchIdentity{Name: name, Email: email}, nil
}
// ParsePatchDate parses a patch date string. It returns the parsed time or an
// error if s has an unknown format. ParsePatchDate supports the iso, rfc,
// short, raw, unix, and default formats (with local variants) used by the
// --date flag in Git.
func ParsePatchDate(s string) (time.Time, error) {
const (
isoFormat = "2006-01-02 15:04:05 -0700"
isoStrictFormat = "2006-01-02T15:04:05-07:00"
rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700"
shortFormat = "2006-01-02"
defaultFormat = "Mon Jan 2 15:04:05 2006 -0700"
defaultLocalFormat = "Mon Jan 2 15:04:05 2006"
)
if s == "" {
return time.Time{}, nil
}
for _, fmt := range []string{
isoFormat,
isoStrictFormat,
rfc2822Format,
shortFormat,
defaultFormat,
defaultLocalFormat,
} {
if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil {
return t, nil
}
}
// unix format
if unix, err := strconv.ParseInt(s, 10, 64); err == nil {
return time.Unix(unix, 0), nil
}
// raw format
if space := strings.IndexByte(s, ' '); space > 0 {
unix, uerr := strconv.ParseInt(s[:space], 10, 64)
zone, zerr := time.Parse("-0700", s[space+1:])
if uerr == nil && zerr == nil {
return time.Unix(unix, 0).In(zone.Location()), nil
}
}
return time.Time{}, fmt.Errorf("unknown date format: %s", s)
}
// ParsePatchHeader parses a preamble string as returned by Parse into a
// PatchHeader. Due to the variety of header formats, some fields of the parsed
// PatchHeader may be unset after parsing.
//
// Supported formats are the short, medium, full, fuller, and email pretty
// formats used by git diff, git log, and git show and the UNIX mailbox format
// used by git format-patch.
//
// If ParsePatchHeader detects that it is handling an email, it will
// remove extra content at the beginning of the title line, such as
// `[PATCH]` or `Re:` in the same way that `git mailinfo` does.
// SubjectPrefix will be set to the value of this removed string.
// (`git mailinfo` is the core part of `git am` that pulls information
// out of an individual mail.)
//
// Additionally, if ParsePatchHeader detects that it's handling an
// email, it will remove a `---` line and put anything after it into
// BodyAppendix.
//
// Those wishing the effect of a plain `git am` should use
// `PatchHeader.Title + "\n" + PatchHeader.Body` (or
// `PatchHeader.Message()`). Those wishing to retain the subject
// prefix and appendix material should use `PatchHeader.SubjectPrefix
// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" +
// PatchHeader.BodyAppendix`.
func ParsePatchHeader(s string) (*PatchHeader, error) {
r := bufio.NewReader(strings.NewReader(s))
var line string
for {
var err error
line, err = r.ReadString('\n')
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
line = strings.TrimSpace(line)
if len(line) > 0 {
break
}
}
switch {
case strings.HasPrefix(line, mailHeaderPrefix):
return parseHeaderMail(line, r)
case strings.HasPrefix(line, prettyHeaderPrefix):
return parseHeaderPretty(line, r)
}
return nil, errors.New("unrecognized patch header format")
}
func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) {
const (
authorPrefix = "Author:"
commitPrefix = "Commit:"
datePrefix = "Date:"
authorDatePrefix = "AuthorDate:"
commitDatePrefix = "CommitDate:"
)
h := &PatchHeader{}
prettyLine = prettyLine[len(prettyHeaderPrefix):]
if i := strings.IndexByte(prettyLine, ' '); i > 0 {
h.SHA = prettyLine[:i]
} else {
h.SHA = prettyLine
}
s := bufio.NewScanner(r)
for s.Scan() {
line := s.Text()
// empty line marks end of fields, remaining lines are title/message
if strings.TrimSpace(line) == "" {
break
}
switch {
case strings.HasPrefix(line, authorPrefix):
u, err := ParsePatchIdentity(line[len(authorPrefix):])
if err != nil {
return nil, err
}
h.Author = &u
case strings.HasPrefix(line, commitPrefix):
u, err := ParsePatchIdentity(line[len(commitPrefix):])
if err != nil {
return nil, err
}
h.Committer = &u
case strings.HasPrefix(line, datePrefix):
d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):]))
if err != nil {
return nil, err
}
h.AuthorDate = d
case strings.HasPrefix(line, authorDatePrefix):
d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):]))
if err != nil {
return nil, err
}
h.AuthorDate = d
case strings.HasPrefix(line, commitDatePrefix):
d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):]))
if err != nil {
return nil, err
}
h.CommitterDate = d
}
}
if s.Err() != nil {
return nil, s.Err()
}
title, indent := scanMessageTitle(s)
if s.Err() != nil {
return nil, s.Err()
}
h.Title = title
if title != "" {
// Don't check for an appendix
body, _ := scanMessageBody(s, indent, false)
if s.Err() != nil {
return nil, s.Err()
}
h.Body = body
}
return h, nil
}
func scanMessageTitle(s *bufio.Scanner) (title string, indent string) {
var b strings.Builder
for i := 0; s.Scan(); i++ {
line := s.Text()
trimLine := strings.TrimSpace(line)
if trimLine == "" {
break
}
if i == 0 {
if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 {
indent = line[:start]
}
}
if b.Len() > 0 {
b.WriteByte(' ')
}
b.WriteString(trimLine)
}
return b.String(), indent
}
func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) {
// Body and appendix
var body, appendix strings.Builder
c := &body
var empty int
for i := 0; s.Scan(); i++ {
line := s.Text()
line = strings.TrimRightFunc(line, unicode.IsSpace)
line = strings.TrimPrefix(line, indent)
if line == "" {
empty++
continue
}
// If requested, parse out "appendix" information (often added
// by `git format-patch` and removed by `git am`).
if separateAppendix && c == &body && line == "---" {
c = &appendix
continue
}
if c.Len() > 0 {
c.WriteByte('\n')
if empty > 0 {
c.WriteByte('\n')
}
}
empty = 0
c.WriteString(line)
}
return body.String(), appendix.String()
}
func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) {
msg, err := mail.ReadMessage(r)
if err != nil {
return nil, err
}
h := &PatchHeader{}
mailLine = mailLine[len(mailHeaderPrefix):]
if i := strings.IndexByte(mailLine, ' '); i > 0 {
h.SHA = mailLine[:i]
}
addrs, err := msg.Header.AddressList("From")
if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) {
return nil, err
}
if len(addrs) > 0 {
addr := addrs[0]
if addr.Name == "" {
return nil, fmt.Errorf("invalid user string: %s", addr)
}
h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address}
}
date := msg.Header.Get("Date")
if date != "" {
d, err := ParsePatchDate(date)
if err != nil {
return nil, err
}
h.AuthorDate = d
}
subject := msg.Header.Get("Subject")
h.SubjectPrefix, h.Title = parseSubject(subject)
s := bufio.NewScanner(msg.Body)
h.Body, h.BodyAppendix = scanMessageBody(s, "", true)
if s.Err() != nil {
return nil, s.Err()
}
return h, nil
}
// Takes an email subject and returns the patch prefix and commit
// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH
// v3 3/5] ` and `Implement foo`
func parseSubject(s string) (string, string) {
// This is meant to be compatible with
// https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject().
// If compatibility with `git am` drifts, go there to see if there
// are any updates.
at := 0
for at < len(s) {
switch s[at] {
case 'r', 'R':
// Detect re:, Re:, rE: and RE:
if at+2 < len(s) &&
(s[at+1] == 'e' || s[at+1] == 'E') &&
s[at+2] == ':' {
at += 3
continue
}
case ' ', '\t', ':':
// Delete whitespace and duplicate ':' characters
at++
continue
case '[':
// Look for closing parenthesis
j := at + 1
for ; j < len(s); j++ {
if s[j] == ']' {
break
}
}
if j < len(s) {
at = j + 1
continue
}
}
// Only loop if we actually removed something
break
}
return s[:at], s[at:]
}