peridot/vendor/github.com/ProtonMail/go-mime/parser.go
2022-07-07 22:13:21 +02:00

528 lines
15 KiB
Go

package gomime
import (
"bufio"
"bytes"
"io"
"io/ioutil"
"mime"
"mime/multipart"
"net/http"
"net/mail"
"net/textproto"
"regexp"
"strings"
log "github.com/sirupsen/logrus"
)
// VisitAcceptor decidest what to do with part which is processed
// It is used by MIMEVisitor
type VisitAcceptor interface {
Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error)
}
func VisitAll(part io.Reader, h textproto.MIMEHeader, accepter VisitAcceptor) (err error) {
mediaType, _, err := getContentType(h)
if err != nil {
return
}
return accepter.Accept(part, h, mediaType == "text/plain", true, true)
}
func IsLeaf(h textproto.MIMEHeader) bool {
return !strings.HasPrefix(h.Get("Content-Type"), "multipart/")
}
// MIMEVisitor is main object to parse (visit) and process (accept) all parts of MIME message
type MimeVisitor struct {
target VisitAcceptor
}
// Accept reads part recursively if needed
// hasPlainSibling is there when acceptor want to check alternatives
func (mv *MimeVisitor) Accept(part io.Reader, h textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
if !isFirst {
return
}
parentMediaType, params, err := getContentType(h)
if err != nil {
return
}
if err = mv.target.Accept(part, h, hasPlainSibling, true, false); err != nil {
return
}
if !IsLeaf(h) {
var multiparts []io.Reader
var multipartHeaders []textproto.MIMEHeader
if multiparts, multipartHeaders, err = GetMultipartParts(part, params); err != nil {
return
}
hasPlainChild := false
for _, header := range multipartHeaders {
mediaType, _, _ := getContentType(header)
if mediaType == "text/plain" {
hasPlainChild = true
}
}
if hasPlainSibling && parentMediaType == "multipart/related" {
hasPlainChild = true
}
for i, p := range multiparts {
if err = mv.Accept(p, multipartHeaders[i], hasPlainChild, true, true); err != nil {
return
}
if err = mv.target.Accept(part, h, hasPlainSibling, false, i == (len(multiparts)-1)); err != nil {
return
}
}
}
return
}
// NewMIMEVisitor initialiazed with acceptor
func NewMimeVisitor(targetAccepter VisitAcceptor) *MimeVisitor {
return &MimeVisitor{targetAccepter}
}
func GetRawMimePart(rawdata io.Reader, boundary string) (io.Reader, io.Reader) {
b, _ := ioutil.ReadAll(rawdata)
tee := bytes.NewReader(b)
reader := bufio.NewReader(bytes.NewReader(b))
byteBoundary := []byte(boundary)
bodyBuffer := &bytes.Buffer{}
for {
line, _, err := reader.ReadLine()
if err != nil {
return tee, bytes.NewReader(bodyBuffer.Bytes())
}
if bytes.HasPrefix(line, byteBoundary) {
break
}
}
lineEndingLength := 0
for {
line, isPrefix, err := reader.ReadLine()
if err != nil {
return tee, bytes.NewReader(bodyBuffer.Bytes())
}
if bytes.HasPrefix(line, byteBoundary) {
break
}
lineEndingLength = 0
bodyBuffer.Write(line)
if !isPrefix {
reader.UnreadByte()
reader.UnreadByte()
token, _ := reader.ReadByte()
if token == '\r' {
lineEndingLength++
bodyBuffer.WriteByte(token)
}
lineEndingLength++
bodyBuffer.WriteByte(token)
}
}
ioutil.ReadAll(reader)
data := bodyBuffer.Bytes()
return tee, bytes.NewReader(data[0 : len(data)-lineEndingLength])
}
func GetAllChildParts(part io.Reader, h textproto.MIMEHeader) (parts []io.Reader, headers []textproto.MIMEHeader, err error) {
mediaType, params, err := getContentType(h)
if err != nil {
return
}
if strings.HasPrefix(mediaType, "multipart/") {
var multiparts []io.Reader
var multipartHeaders []textproto.MIMEHeader
if multiparts, multipartHeaders, err = GetMultipartParts(part, params); err != nil {
return
}
if strings.Contains(mediaType, "alternative") {
var chosenPart io.Reader
var chosenHeader textproto.MIMEHeader
if chosenPart, chosenHeader, err = pickAlternativePart(multiparts, multipartHeaders); err != nil {
return
}
var childParts []io.Reader
var childHeaders []textproto.MIMEHeader
if childParts, childHeaders, err = GetAllChildParts(chosenPart, chosenHeader); err != nil {
return
}
parts = append(parts, childParts...)
headers = append(headers, childHeaders...)
} else {
for i, p := range multiparts {
var childParts []io.Reader
var childHeaders []textproto.MIMEHeader
if childParts, childHeaders, err = GetAllChildParts(p, multipartHeaders[i]); err != nil {
return
}
parts = append(parts, childParts...)
headers = append(headers, childHeaders...)
}
}
} else {
parts = append(parts, part)
headers = append(headers, h)
}
return
}
func GetMultipartParts(r io.Reader, params map[string]string) (parts []io.Reader, headers []textproto.MIMEHeader, err error) {
mr := multipart.NewReader(r, params["boundary"])
parts = []io.Reader{}
headers = []textproto.MIMEHeader{}
var p *multipart.Part
for {
p, err = mr.NextPart()
if err == io.EOF {
err = nil
break
}
if err != nil {
return
}
b, _ := ioutil.ReadAll(p)
buffer := bytes.NewBuffer(b)
parts = append(parts, buffer)
headers = append(headers, p.Header)
}
return
}
func pickAlternativePart(parts []io.Reader, headers []textproto.MIMEHeader) (part io.Reader, h textproto.MIMEHeader, err error) {
for i, h := range headers {
mediaType, _, err := getContentType(h)
if err != nil {
continue
}
if strings.HasPrefix(mediaType, "multipart/") {
return parts[i], headers[i], nil
}
}
for i, h := range headers {
mediaType, _, err := getContentType(h)
if err != nil {
continue
}
if mediaType == "text/html" {
return parts[i], headers[i], nil
}
}
for i, h := range headers {
mediaType, _, err := getContentType(h)
if err != nil {
continue
}
if mediaType == "text/plain" {
return parts[i], headers[i], nil
}
}
//if we get all the way here, part will be nil
return
}
// Parse address comment as defined in http://tools.wordtothewise.com/rfc/822
// FIXME: Does not work for address groups
// NOTE: This should be removed for go>1.10 (please check)
func parseAddressComment(raw string) string {
parsed := []string{}
for _, item := range regexp.MustCompile("[,;]").Split(raw, -1) {
re := regexp.MustCompile("[(][^)]*[)]")
comments := strings.Join(re.FindAllString(item, -1), " ")
comments = strings.Replace(comments, "(", "", -1)
comments = strings.Replace(comments, ")", "", -1)
withoutComments := re.ReplaceAllString(item, "")
addr, err := mail.ParseAddress(withoutComments)
if err != nil {
continue
}
if addr.Name == "" {
addr.Name = comments
}
parsed = append(parsed, addr.String())
}
return strings.Join(parsed, ", ")
}
func checkHeaders(headers []textproto.MIMEHeader) bool {
foundAttachment := false
for i := 0; i < len(headers); i++ {
h := headers[i]
mediaType, _, _ := getContentType(h)
if !strings.HasPrefix(mediaType, "text/") {
foundAttachment = true
} else if foundAttachment {
//this means that there is a text part after the first attachment, so we will have to convert the body from plain->HTML
return true
}
}
return false
}
func decodePart(partReader io.Reader, header textproto.MIMEHeader) (decodedPart io.Reader) {
decodedPart = DecodeContentEncoding(partReader, header.Get("Content-Transfer-Encoding"))
if decodedPart == nil {
log.Warnf("Unsupported Content-Transfer-Encoding '%v'", header.Get("Content-Transfer-Encoding"))
decodedPart = partReader
}
return
}
// assume 'text/plain' if missing
func getContentType(header textproto.MIMEHeader) (mediatype string, params map[string]string, err error) {
contentType := header.Get("Content-Type")
if contentType == "" {
contentType = "text/plain"
}
return mime.ParseMediaType(contentType)
}
// ===================== MIME Printer ===================================
// Simply print resulting MIME tree into text form
// TODO to file mime_printer.go
type stack []string
func (s stack) Push(v string) stack {
return append(s, v)
}
func (s stack) Pop() (stack, string) {
l := len(s)
return s[:l-1], s[l-1]
}
func (s stack) Peek() string {
return s[len(s)-1]
}
type MIMEPrinter struct {
result *bytes.Buffer
boundaryStack stack
}
func NewMIMEPrinter() (pd *MIMEPrinter) {
return &MIMEPrinter{
result: bytes.NewBuffer([]byte("")),
boundaryStack: stack{},
}
}
func (pd *MIMEPrinter) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
if isFirst {
http.Header(header).Write(pd.result)
pd.result.Write([]byte("\n"))
if IsLeaf(header) {
pd.result.ReadFrom(partReader)
} else {
_, params, _ := getContentType(header)
boundary := params["boundary"]
pd.boundaryStack = pd.boundaryStack.Push(boundary)
pd.result.Write([]byte("\nThis is a multi-part message in MIME format.\n--" + boundary + "\n"))
}
} else {
if !isLast {
pd.result.Write([]byte("\n--" + pd.boundaryStack.Peek() + "\n"))
} else {
var boundary string
pd.boundaryStack, boundary = pd.boundaryStack.Pop()
pd.result.Write([]byte("\n--" + boundary + "--\n.\n"))
}
}
return nil
}
func (pd *MIMEPrinter) String() string {
return pd.result.String()
}
// ======================== PlainText Collector =========================
// Collect contents of all non-attachment text/plain parts and return
// it is a string
// TODO to file collector_plaintext.go
type PlainTextCollector struct {
target VisitAcceptor
plainTextContents *bytes.Buffer
}
func NewPlainTextCollector(targetAccepter VisitAcceptor) *PlainTextCollector {
return &PlainTextCollector{
target: targetAccepter,
plainTextContents: bytes.NewBuffer([]byte("")),
}
}
func (ptc *PlainTextCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
if isFirst {
if IsLeaf(header) {
mediaType, params, _ := getContentType(header)
disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
if mediaType == "text/plain" && disp != "attachment" {
partData, _ := ioutil.ReadAll(partReader)
decodedPart := decodePart(bytes.NewReader(partData), header)
if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
buffer, err = DecodeCharset(buffer, mediaType, params)
if err != nil {
log.Warnln("Decode charset error:", err)
err = nil // Don't fail parsing on decoding errors, use original
}
ptc.plainTextContents.Write(buffer)
}
err = ptc.target.Accept(bytes.NewReader(partData), header, hasPlainSibling, isFirst, isLast)
return
}
}
}
err = ptc.target.Accept(partReader, header, hasPlainSibling, isFirst, isLast)
return
}
func (ptc PlainTextCollector) GetPlainText() string {
return ptc.plainTextContents.String()
}
// ======================== Body Collector ==============
// Collect contents of all non-attachment parts and return
// it as a string
// TODO to file collector_body.go
type BodyCollector struct {
target VisitAcceptor
htmlBodyBuffer *bytes.Buffer
plainBodyBuffer *bytes.Buffer
htmlHeaderBuffer *bytes.Buffer
plainHeaderBuffer *bytes.Buffer
hasHtml bool
}
func NewBodyCollector(targetAccepter VisitAcceptor) *BodyCollector {
return &BodyCollector{
target: targetAccepter,
htmlBodyBuffer: bytes.NewBuffer([]byte("")),
plainBodyBuffer: bytes.NewBuffer([]byte("")),
htmlHeaderBuffer: bytes.NewBuffer([]byte("")),
plainHeaderBuffer: bytes.NewBuffer([]byte("")),
}
}
func (bc *BodyCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
// TODO: collect html and plaintext - if there's html with plain sibling don't include plain/text
if isFirst {
if IsLeaf(header) {
mediaType, params, _ := getContentType(header)
disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
if disp != "attachment" {
partData, _ := ioutil.ReadAll(partReader)
decodedPart := decodePart(bytes.NewReader(partData), header)
if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
buffer, err = DecodeCharset(buffer, mediaType, params)
if err != nil {
log.Warnln("Decode charset error:", err)
err = nil // Don't fail parsing on decoding errors, use original
}
if mediaType == "text/html" {
bc.hasHtml = true
http.Header(header).Write(bc.htmlHeaderBuffer)
bc.htmlBodyBuffer.Write(buffer)
} else if mediaType == "text/plain" {
http.Header(header).Write(bc.plainHeaderBuffer)
bc.plainBodyBuffer.Write(buffer)
}
}
err = bc.target.Accept(bytes.NewReader(partData), header, hasPlainSibling, isFirst, isLast)
return
}
}
}
err = bc.target.Accept(partReader, header, hasPlainSibling, isFirst, isLast)
return
}
func (bc *BodyCollector) GetBody() (string, string) {
if bc.hasHtml {
return bc.htmlBodyBuffer.String(), "text/html"
} else {
return bc.plainBodyBuffer.String(), "text/plain"
}
}
func (bc *BodyCollector) GetHeaders() string {
if bc.hasHtml {
return bc.htmlHeaderBuffer.String()
} else {
return bc.plainHeaderBuffer.String()
}
}
// ======================== Attachments Collector ==============
// Collect contents of all attachment parts and return
// them as a string
// TODO to file collector_attachment.go
type AttachmentsCollector struct {
target VisitAcceptor
attBuffers []string
attHeaders []string
}
func NewAttachmentsCollector(targetAccepter VisitAcceptor) *AttachmentsCollector {
return &AttachmentsCollector{
target: targetAccepter,
attBuffers: []string{},
attHeaders: []string{},
}
}
func (ac *AttachmentsCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
if isFirst {
if IsLeaf(header) {
mediaType, params, _ := getContentType(header)
disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
if (mediaType != "text/html" && mediaType != "text/plain") || disp == "attachment" {
partData, _ := ioutil.ReadAll(partReader)
decodedPart := decodePart(bytes.NewReader(partData), header)
if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
buffer, err = DecodeCharset(buffer, mediaType, params)
if err != nil {
log.Warnln("Decode charset error:", err)
err = nil // Don't fail parsing on decoding errors, use original
}
headerBuf := new(bytes.Buffer)
http.Header(header).Write(headerBuf)
ac.attHeaders = append(ac.attHeaders, headerBuf.String())
ac.attBuffers = append(ac.attBuffers, string(buffer))
}
err = ac.target.Accept(bytes.NewReader(partData), header, hasPlainSibling, isFirst, isLast)
return
}
}
}
err = ac.target.Accept(partReader, header, hasPlainSibling, isFirst, isLast)
return
}
func (ac AttachmentsCollector) GetAttachments() []string {
return ac.attBuffers
}
func (ac AttachmentsCollector) GetAttHeaders() []string {
return ac.attHeaders
}