mirror of
https://github.com/rocky-linux/peridot.git
synced 2024-11-05 06:21:24 +00:00
528 lines
15 KiB
Go
528 lines
15 KiB
Go
package gomime
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"io"
|
|
"io/ioutil"
|
|
"mime"
|
|
"mime/multipart"
|
|
"net/http"
|
|
"net/mail"
|
|
"net/textproto"
|
|
"regexp"
|
|
"strings"
|
|
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// VisitAcceptor decidest what to do with part which is processed
|
|
// It is used by MIMEVisitor
|
|
type VisitAcceptor interface {
|
|
Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error)
|
|
}
|
|
|
|
func VisitAll(part io.Reader, h textproto.MIMEHeader, accepter VisitAcceptor) (err error) {
|
|
mediaType, _, err := getContentType(h)
|
|
if err != nil {
|
|
return
|
|
}
|
|
return accepter.Accept(part, h, mediaType == "text/plain", true, true)
|
|
}
|
|
|
|
func IsLeaf(h textproto.MIMEHeader) bool {
|
|
return !strings.HasPrefix(h.Get("Content-Type"), "multipart/")
|
|
}
|
|
|
|
// MIMEVisitor is main object to parse (visit) and process (accept) all parts of MIME message
|
|
type MimeVisitor struct {
|
|
target VisitAcceptor
|
|
}
|
|
|
|
// Accept reads part recursively if needed
|
|
// hasPlainSibling is there when acceptor want to check alternatives
|
|
func (mv *MimeVisitor) Accept(part io.Reader, h textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
|
if !isFirst {
|
|
return
|
|
}
|
|
|
|
parentMediaType, params, err := getContentType(h)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
if err = mv.target.Accept(part, h, hasPlainSibling, true, false); err != nil {
|
|
return
|
|
}
|
|
|
|
if !IsLeaf(h) {
|
|
var multiparts []io.Reader
|
|
var multipartHeaders []textproto.MIMEHeader
|
|
if multiparts, multipartHeaders, err = GetMultipartParts(part, params); err != nil {
|
|
return
|
|
}
|
|
hasPlainChild := false
|
|
for _, header := range multipartHeaders {
|
|
mediaType, _, _ := getContentType(header)
|
|
if mediaType == "text/plain" {
|
|
hasPlainChild = true
|
|
}
|
|
}
|
|
if hasPlainSibling && parentMediaType == "multipart/related" {
|
|
hasPlainChild = true
|
|
}
|
|
|
|
for i, p := range multiparts {
|
|
if err = mv.Accept(p, multipartHeaders[i], hasPlainChild, true, true); err != nil {
|
|
return
|
|
}
|
|
if err = mv.target.Accept(part, h, hasPlainSibling, false, i == (len(multiparts)-1)); err != nil {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// NewMIMEVisitor initialiazed with acceptor
|
|
func NewMimeVisitor(targetAccepter VisitAcceptor) *MimeVisitor {
|
|
return &MimeVisitor{targetAccepter}
|
|
}
|
|
|
|
func GetRawMimePart(rawdata io.Reader, boundary string) (io.Reader, io.Reader) {
|
|
b, _ := ioutil.ReadAll(rawdata)
|
|
tee := bytes.NewReader(b)
|
|
|
|
reader := bufio.NewReader(bytes.NewReader(b))
|
|
byteBoundary := []byte(boundary)
|
|
bodyBuffer := &bytes.Buffer{}
|
|
for {
|
|
line, _, err := reader.ReadLine()
|
|
if err != nil {
|
|
return tee, bytes.NewReader(bodyBuffer.Bytes())
|
|
}
|
|
if bytes.HasPrefix(line, byteBoundary) {
|
|
break
|
|
}
|
|
}
|
|
lineEndingLength := 0
|
|
for {
|
|
line, isPrefix, err := reader.ReadLine()
|
|
if err != nil {
|
|
return tee, bytes.NewReader(bodyBuffer.Bytes())
|
|
}
|
|
if bytes.HasPrefix(line, byteBoundary) {
|
|
break
|
|
}
|
|
lineEndingLength = 0
|
|
bodyBuffer.Write(line)
|
|
if !isPrefix {
|
|
reader.UnreadByte()
|
|
reader.UnreadByte()
|
|
token, _ := reader.ReadByte()
|
|
if token == '\r' {
|
|
lineEndingLength++
|
|
bodyBuffer.WriteByte(token)
|
|
}
|
|
lineEndingLength++
|
|
bodyBuffer.WriteByte(token)
|
|
}
|
|
}
|
|
ioutil.ReadAll(reader)
|
|
data := bodyBuffer.Bytes()
|
|
return tee, bytes.NewReader(data[0 : len(data)-lineEndingLength])
|
|
}
|
|
|
|
func GetAllChildParts(part io.Reader, h textproto.MIMEHeader) (parts []io.Reader, headers []textproto.MIMEHeader, err error) {
|
|
mediaType, params, err := getContentType(h)
|
|
if err != nil {
|
|
return
|
|
}
|
|
if strings.HasPrefix(mediaType, "multipart/") {
|
|
var multiparts []io.Reader
|
|
var multipartHeaders []textproto.MIMEHeader
|
|
if multiparts, multipartHeaders, err = GetMultipartParts(part, params); err != nil {
|
|
return
|
|
}
|
|
if strings.Contains(mediaType, "alternative") {
|
|
var chosenPart io.Reader
|
|
var chosenHeader textproto.MIMEHeader
|
|
if chosenPart, chosenHeader, err = pickAlternativePart(multiparts, multipartHeaders); err != nil {
|
|
return
|
|
}
|
|
var childParts []io.Reader
|
|
var childHeaders []textproto.MIMEHeader
|
|
if childParts, childHeaders, err = GetAllChildParts(chosenPart, chosenHeader); err != nil {
|
|
return
|
|
}
|
|
parts = append(parts, childParts...)
|
|
headers = append(headers, childHeaders...)
|
|
} else {
|
|
for i, p := range multiparts {
|
|
var childParts []io.Reader
|
|
var childHeaders []textproto.MIMEHeader
|
|
if childParts, childHeaders, err = GetAllChildParts(p, multipartHeaders[i]); err != nil {
|
|
return
|
|
}
|
|
parts = append(parts, childParts...)
|
|
headers = append(headers, childHeaders...)
|
|
}
|
|
}
|
|
} else {
|
|
parts = append(parts, part)
|
|
headers = append(headers, h)
|
|
}
|
|
return
|
|
}
|
|
|
|
func GetMultipartParts(r io.Reader, params map[string]string) (parts []io.Reader, headers []textproto.MIMEHeader, err error) {
|
|
mr := multipart.NewReader(r, params["boundary"])
|
|
parts = []io.Reader{}
|
|
headers = []textproto.MIMEHeader{}
|
|
var p *multipart.Part
|
|
for {
|
|
p, err = mr.NextPart()
|
|
if err == io.EOF {
|
|
err = nil
|
|
break
|
|
}
|
|
if err != nil {
|
|
return
|
|
}
|
|
b, _ := ioutil.ReadAll(p)
|
|
buffer := bytes.NewBuffer(b)
|
|
|
|
parts = append(parts, buffer)
|
|
headers = append(headers, p.Header)
|
|
}
|
|
return
|
|
}
|
|
|
|
func pickAlternativePart(parts []io.Reader, headers []textproto.MIMEHeader) (part io.Reader, h textproto.MIMEHeader, err error) {
|
|
|
|
for i, h := range headers {
|
|
mediaType, _, err := getContentType(h)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
if strings.HasPrefix(mediaType, "multipart/") {
|
|
return parts[i], headers[i], nil
|
|
}
|
|
}
|
|
for i, h := range headers {
|
|
mediaType, _, err := getContentType(h)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
if mediaType == "text/html" {
|
|
return parts[i], headers[i], nil
|
|
}
|
|
}
|
|
for i, h := range headers {
|
|
mediaType, _, err := getContentType(h)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
if mediaType == "text/plain" {
|
|
return parts[i], headers[i], nil
|
|
}
|
|
}
|
|
//if we get all the way here, part will be nil
|
|
return
|
|
}
|
|
|
|
// Parse address comment as defined in http://tools.wordtothewise.com/rfc/822
|
|
// FIXME: Does not work for address groups
|
|
// NOTE: This should be removed for go>1.10 (please check)
|
|
func parseAddressComment(raw string) string {
|
|
parsed := []string{}
|
|
for _, item := range regexp.MustCompile("[,;]").Split(raw, -1) {
|
|
re := regexp.MustCompile("[(][^)]*[)]")
|
|
comments := strings.Join(re.FindAllString(item, -1), " ")
|
|
comments = strings.Replace(comments, "(", "", -1)
|
|
comments = strings.Replace(comments, ")", "", -1)
|
|
withoutComments := re.ReplaceAllString(item, "")
|
|
addr, err := mail.ParseAddress(withoutComments)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
if addr.Name == "" {
|
|
addr.Name = comments
|
|
}
|
|
parsed = append(parsed, addr.String())
|
|
}
|
|
return strings.Join(parsed, ", ")
|
|
}
|
|
|
|
func checkHeaders(headers []textproto.MIMEHeader) bool {
|
|
foundAttachment := false
|
|
|
|
for i := 0; i < len(headers); i++ {
|
|
h := headers[i]
|
|
|
|
mediaType, _, _ := getContentType(h)
|
|
|
|
if !strings.HasPrefix(mediaType, "text/") {
|
|
foundAttachment = true
|
|
} else if foundAttachment {
|
|
//this means that there is a text part after the first attachment, so we will have to convert the body from plain->HTML
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func decodePart(partReader io.Reader, header textproto.MIMEHeader) (decodedPart io.Reader) {
|
|
decodedPart = DecodeContentEncoding(partReader, header.Get("Content-Transfer-Encoding"))
|
|
if decodedPart == nil {
|
|
log.Warnf("Unsupported Content-Transfer-Encoding '%v'", header.Get("Content-Transfer-Encoding"))
|
|
decodedPart = partReader
|
|
}
|
|
return
|
|
}
|
|
|
|
// assume 'text/plain' if missing
|
|
func getContentType(header textproto.MIMEHeader) (mediatype string, params map[string]string, err error) {
|
|
contentType := header.Get("Content-Type")
|
|
if contentType == "" {
|
|
contentType = "text/plain"
|
|
}
|
|
|
|
return mime.ParseMediaType(contentType)
|
|
}
|
|
|
|
// ===================== MIME Printer ===================================
|
|
// Simply print resulting MIME tree into text form
|
|
// TODO to file mime_printer.go
|
|
type stack []string
|
|
|
|
func (s stack) Push(v string) stack {
|
|
return append(s, v)
|
|
}
|
|
func (s stack) Pop() (stack, string) {
|
|
l := len(s)
|
|
return s[:l-1], s[l-1]
|
|
}
|
|
func (s stack) Peek() string {
|
|
return s[len(s)-1]
|
|
}
|
|
|
|
type MIMEPrinter struct {
|
|
result *bytes.Buffer
|
|
boundaryStack stack
|
|
}
|
|
|
|
func NewMIMEPrinter() (pd *MIMEPrinter) {
|
|
return &MIMEPrinter{
|
|
result: bytes.NewBuffer([]byte("")),
|
|
boundaryStack: stack{},
|
|
}
|
|
}
|
|
|
|
func (pd *MIMEPrinter) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
|
if isFirst {
|
|
http.Header(header).Write(pd.result)
|
|
pd.result.Write([]byte("\n"))
|
|
if IsLeaf(header) {
|
|
pd.result.ReadFrom(partReader)
|
|
} else {
|
|
_, params, _ := getContentType(header)
|
|
boundary := params["boundary"]
|
|
pd.boundaryStack = pd.boundaryStack.Push(boundary)
|
|
pd.result.Write([]byte("\nThis is a multi-part message in MIME format.\n--" + boundary + "\n"))
|
|
}
|
|
} else {
|
|
if !isLast {
|
|
pd.result.Write([]byte("\n--" + pd.boundaryStack.Peek() + "\n"))
|
|
} else {
|
|
var boundary string
|
|
pd.boundaryStack, boundary = pd.boundaryStack.Pop()
|
|
pd.result.Write([]byte("\n--" + boundary + "--\n.\n"))
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (pd *MIMEPrinter) String() string {
|
|
return pd.result.String()
|
|
}
|
|
|
|
// ======================== PlainText Collector =========================
|
|
// Collect contents of all non-attachment text/plain parts and return
|
|
// it is a string
|
|
// TODO to file collector_plaintext.go
|
|
|
|
type PlainTextCollector struct {
|
|
target VisitAcceptor
|
|
plainTextContents *bytes.Buffer
|
|
}
|
|
|
|
func NewPlainTextCollector(targetAccepter VisitAcceptor) *PlainTextCollector {
|
|
return &PlainTextCollector{
|
|
target: targetAccepter,
|
|
plainTextContents: bytes.NewBuffer([]byte("")),
|
|
}
|
|
}
|
|
|
|
func (ptc *PlainTextCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
|
if isFirst {
|
|
if IsLeaf(header) {
|
|
mediaType, params, _ := getContentType(header)
|
|
disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
|
|
if mediaType == "text/plain" && disp != "attachment" {
|
|
partData, _ := ioutil.ReadAll(partReader)
|
|
decodedPart := decodePart(bytes.NewReader(partData), header)
|
|
|
|
if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
|
|
buffer, err = DecodeCharset(buffer, mediaType, params)
|
|
if err != nil {
|
|
log.Warnln("Decode charset error:", err)
|
|
err = nil // Don't fail parsing on decoding errors, use original
|
|
}
|
|
ptc.plainTextContents.Write(buffer)
|
|
}
|
|
|
|
err = ptc.target.Accept(bytes.NewReader(partData), header, hasPlainSibling, isFirst, isLast)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
err = ptc.target.Accept(partReader, header, hasPlainSibling, isFirst, isLast)
|
|
return
|
|
}
|
|
|
|
func (ptc PlainTextCollector) GetPlainText() string {
|
|
return ptc.plainTextContents.String()
|
|
}
|
|
|
|
// ======================== Body Collector ==============
|
|
// Collect contents of all non-attachment parts and return
|
|
// it as a string
|
|
// TODO to file collector_body.go
|
|
|
|
type BodyCollector struct {
|
|
target VisitAcceptor
|
|
htmlBodyBuffer *bytes.Buffer
|
|
plainBodyBuffer *bytes.Buffer
|
|
htmlHeaderBuffer *bytes.Buffer
|
|
plainHeaderBuffer *bytes.Buffer
|
|
hasHtml bool
|
|
}
|
|
|
|
func NewBodyCollector(targetAccepter VisitAcceptor) *BodyCollector {
|
|
return &BodyCollector{
|
|
target: targetAccepter,
|
|
htmlBodyBuffer: bytes.NewBuffer([]byte("")),
|
|
plainBodyBuffer: bytes.NewBuffer([]byte("")),
|
|
htmlHeaderBuffer: bytes.NewBuffer([]byte("")),
|
|
plainHeaderBuffer: bytes.NewBuffer([]byte("")),
|
|
}
|
|
}
|
|
|
|
func (bc *BodyCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
|
// TODO: collect html and plaintext - if there's html with plain sibling don't include plain/text
|
|
if isFirst {
|
|
if IsLeaf(header) {
|
|
mediaType, params, _ := getContentType(header)
|
|
disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
|
|
if disp != "attachment" {
|
|
partData, _ := ioutil.ReadAll(partReader)
|
|
decodedPart := decodePart(bytes.NewReader(partData), header)
|
|
if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
|
|
buffer, err = DecodeCharset(buffer, mediaType, params)
|
|
if err != nil {
|
|
log.Warnln("Decode charset error:", err)
|
|
err = nil // Don't fail parsing on decoding errors, use original
|
|
}
|
|
if mediaType == "text/html" {
|
|
bc.hasHtml = true
|
|
http.Header(header).Write(bc.htmlHeaderBuffer)
|
|
bc.htmlBodyBuffer.Write(buffer)
|
|
} else if mediaType == "text/plain" {
|
|
http.Header(header).Write(bc.plainHeaderBuffer)
|
|
bc.plainBodyBuffer.Write(buffer)
|
|
}
|
|
}
|
|
|
|
err = bc.target.Accept(bytes.NewReader(partData), header, hasPlainSibling, isFirst, isLast)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
err = bc.target.Accept(partReader, header, hasPlainSibling, isFirst, isLast)
|
|
return
|
|
}
|
|
|
|
func (bc *BodyCollector) GetBody() (string, string) {
|
|
if bc.hasHtml {
|
|
return bc.htmlBodyBuffer.String(), "text/html"
|
|
} else {
|
|
return bc.plainBodyBuffer.String(), "text/plain"
|
|
}
|
|
}
|
|
|
|
func (bc *BodyCollector) GetHeaders() string {
|
|
if bc.hasHtml {
|
|
return bc.htmlHeaderBuffer.String()
|
|
} else {
|
|
return bc.plainHeaderBuffer.String()
|
|
}
|
|
}
|
|
|
|
// ======================== Attachments Collector ==============
|
|
// Collect contents of all attachment parts and return
|
|
// them as a string
|
|
// TODO to file collector_attachment.go
|
|
|
|
type AttachmentsCollector struct {
|
|
target VisitAcceptor
|
|
attBuffers []string
|
|
attHeaders []string
|
|
}
|
|
|
|
func NewAttachmentsCollector(targetAccepter VisitAcceptor) *AttachmentsCollector {
|
|
return &AttachmentsCollector{
|
|
target: targetAccepter,
|
|
attBuffers: []string{},
|
|
attHeaders: []string{},
|
|
}
|
|
}
|
|
|
|
func (ac *AttachmentsCollector) Accept(partReader io.Reader, header textproto.MIMEHeader, hasPlainSibling bool, isFirst, isLast bool) (err error) {
|
|
if isFirst {
|
|
if IsLeaf(header) {
|
|
mediaType, params, _ := getContentType(header)
|
|
disp, _, _ := mime.ParseMediaType(header.Get("Content-Disposition"))
|
|
if (mediaType != "text/html" && mediaType != "text/plain") || disp == "attachment" {
|
|
partData, _ := ioutil.ReadAll(partReader)
|
|
decodedPart := decodePart(bytes.NewReader(partData), header)
|
|
|
|
if buffer, err := ioutil.ReadAll(decodedPart); err == nil {
|
|
buffer, err = DecodeCharset(buffer, mediaType, params)
|
|
if err != nil {
|
|
log.Warnln("Decode charset error:", err)
|
|
err = nil // Don't fail parsing on decoding errors, use original
|
|
}
|
|
headerBuf := new(bytes.Buffer)
|
|
http.Header(header).Write(headerBuf)
|
|
ac.attHeaders = append(ac.attHeaders, headerBuf.String())
|
|
ac.attBuffers = append(ac.attBuffers, string(buffer))
|
|
}
|
|
|
|
err = ac.target.Accept(bytes.NewReader(partData), header, hasPlainSibling, isFirst, isLast)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
err = ac.target.Accept(partReader, header, hasPlainSibling, isFirst, isLast)
|
|
return
|
|
}
|
|
|
|
func (ac AttachmentsCollector) GetAttachments() []string {
|
|
return ac.attBuffers
|
|
}
|
|
|
|
func (ac AttachmentsCollector) GetAttHeaders() []string {
|
|
return ac.attHeaders
|
|
}
|