2019-08-05 09:16:07 +02:00
|
|
|
package lib
|
|
|
|
|
|
|
|
import (
|
2022-04-22 13:38:41 +02:00
|
|
|
"bufio"
|
2019-08-05 09:16:07 +02:00
|
|
|
"bytes"
|
2020-05-05 11:26:43 +02:00
|
|
|
"errors"
|
2019-08-05 09:16:07 +02:00
|
|
|
"fmt"
|
|
|
|
"io"
|
2020-05-05 11:26:43 +02:00
|
|
|
"regexp"
|
2019-08-05 09:16:07 +02:00
|
|
|
"strings"
|
2020-05-05 11:26:43 +02:00
|
|
|
"time"
|
2019-08-05 09:16:07 +02:00
|
|
|
|
2022-09-21 00:27:58 +02:00
|
|
|
"git.sr.ht/~rjarry/aerc/logging"
|
2021-11-05 10:19:46 +01:00
|
|
|
"git.sr.ht/~rjarry/aerc/models"
|
2019-08-05 09:16:07 +02:00
|
|
|
"github.com/emersion/go-message"
|
|
|
|
_ "github.com/emersion/go-message/charset"
|
|
|
|
"github.com/emersion/go-message/mail"
|
|
|
|
)
|
|
|
|
|
2020-05-05 11:26:43 +02:00
|
|
|
// RFC 1123Z regexp
|
|
|
|
var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` +
|
|
|
|
`(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` +
|
2022-08-20 09:46:24 +02:00
|
|
|
`([0-9]{4})\s([0-9]{2}):([0-9]{2})(:([0-9]{2}))?\s([\+|\-][0-9]{4})`)
|
2020-05-05 11:26:43 +02:00
|
|
|
|
2019-08-05 09:16:07 +02:00
|
|
|
func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
|
2020-06-19 17:58:08 +02:00
|
|
|
if len(index) == 0 {
|
|
|
|
// non multipart, simply return everything
|
|
|
|
return bufReader(e)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
if mpr := e.MultipartReader(); mpr != nil {
|
|
|
|
idx := 0
|
|
|
|
for {
|
|
|
|
idx++
|
|
|
|
part, err := mpr.NextPart()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if idx == index[0] {
|
|
|
|
rest := index[1:]
|
|
|
|
if len(rest) < 1 {
|
2020-01-04 21:13:53 +01:00
|
|
|
return bufReader(part)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
return FetchEntityPartReader(part, index[1:])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-06-19 17:58:08 +02:00
|
|
|
return nil, fmt.Errorf("FetchEntityPartReader: unexpected code reached")
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
|
2022-07-31 22:16:40 +02:00
|
|
|
// TODO: the UI doesn't seem to like readers which aren't buffers
|
2020-01-04 21:13:53 +01:00
|
|
|
func bufReader(e *message.Entity) (io.Reader, error) {
|
2019-08-05 09:16:07 +02:00
|
|
|
var buf bytes.Buffer
|
|
|
|
if _, err := io.Copy(&buf, e.Body); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &buf, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// split a MIME type into its major and minor parts
|
|
|
|
func splitMIME(m string) (string, string) {
|
|
|
|
parts := strings.Split(m, "/")
|
|
|
|
if len(parts) != 2 {
|
|
|
|
return parts[0], ""
|
|
|
|
}
|
|
|
|
return parts[0], parts[1]
|
|
|
|
}
|
|
|
|
|
2022-06-02 14:43:34 +02:00
|
|
|
func fixContentType(h message.Header) (string, map[string]string) {
|
|
|
|
ct, rest := h.Get("Content-Type"), ""
|
|
|
|
if i := strings.Index(ct, ";"); i > 0 {
|
|
|
|
ct, rest = ct[:i], ct[i:]
|
|
|
|
}
|
|
|
|
|
|
|
|
// check if there are quotes around the content type
|
|
|
|
if strings.Contains(ct, "\"") {
|
|
|
|
header := strings.ReplaceAll(ct, "\"", "")
|
|
|
|
if rest != "" {
|
|
|
|
header += rest
|
|
|
|
}
|
|
|
|
h.Set("Content-Type", header)
|
|
|
|
if contenttype, params, err := h.ContentType(); err == nil {
|
|
|
|
return contenttype, params
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// if all else fails, return text/plain
|
|
|
|
return "text/plain", nil
|
|
|
|
}
|
|
|
|
|
2020-03-03 22:20:07 +01:00
|
|
|
func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
|
2019-08-05 09:16:07 +02:00
|
|
|
var body models.BodyStructure
|
|
|
|
contentType, ctParams, err := e.Header.ContentType()
|
|
|
|
if err != nil {
|
2022-06-02 14:43:34 +02:00
|
|
|
// try to fix the error; if all measures fail, then return a
|
|
|
|
// text/plain content type to display at least plaintext
|
|
|
|
contentType, ctParams = fixContentType(e.Header)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
2022-06-02 14:43:34 +02:00
|
|
|
|
2019-08-05 09:16:07 +02:00
|
|
|
mimeType, mimeSubType := splitMIME(contentType)
|
|
|
|
body.MIMEType = mimeType
|
|
|
|
body.MIMESubType = mimeSubType
|
|
|
|
body.Params = ctParams
|
|
|
|
body.Description = e.Header.Get("content-description")
|
|
|
|
body.Encoding = e.Header.Get("content-transfer-encoding")
|
|
|
|
if cd := e.Header.Get("content-disposition"); cd != "" {
|
|
|
|
contentDisposition, cdParams, err := e.Header.ContentDisposition()
|
|
|
|
if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not parse content disposition: %w", err)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
body.Disposition = contentDisposition
|
|
|
|
body.DispositionParams = cdParams
|
|
|
|
}
|
|
|
|
body.Parts = []*models.BodyStructure{}
|
|
|
|
if mpr := e.MultipartReader(); mpr != nil {
|
|
|
|
for {
|
|
|
|
part, err := mpr.NextPart()
|
2022-07-31 15:15:27 +02:00
|
|
|
if errors.Is(err, io.EOF) {
|
2019-08-05 09:16:07 +02:00
|
|
|
return &body, nil
|
|
|
|
} else if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-03-03 22:20:07 +01:00
|
|
|
ps, err := ParseEntityStructure(part)
|
2019-08-05 09:16:07 +02:00
|
|
|
if err != nil {
|
2020-08-21 20:58:30 +02:00
|
|
|
return nil, fmt.Errorf("could not parse child entity structure: %w", err)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
body.Parts = append(body.Parts, ps)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &body, nil
|
|
|
|
}
|
|
|
|
|
2020-07-27 01:48:22 +02:00
|
|
|
var DateParseError = errors.New("date parsing failed")
|
|
|
|
|
2019-08-05 09:16:07 +02:00
|
|
|
func parseEnvelope(h *mail.Header) (*models.Envelope, error) {
|
|
|
|
from, err := parseAddressList(h, "from")
|
|
|
|
if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not read from address: %w", err)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
to, err := parseAddressList(h, "to")
|
|
|
|
if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not read to address: %w", err)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
cc, err := parseAddressList(h, "cc")
|
|
|
|
if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not read cc address: %w", err)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
bcc, err := parseAddressList(h, "bcc")
|
|
|
|
if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not read bcc address: %w", err)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
2019-11-15 11:00:44 +01:00
|
|
|
replyTo, err := parseAddressList(h, "reply-to")
|
|
|
|
if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not read reply-to address: %w", err)
|
2019-11-15 11:00:44 +01:00
|
|
|
}
|
2019-08-05 09:16:07 +02:00
|
|
|
subj, err := h.Subject()
|
|
|
|
if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not read subject: %w", err)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
2020-11-08 13:39:51 +01:00
|
|
|
msgID, err := h.MessageID()
|
2019-08-05 09:16:07 +02:00
|
|
|
if err != nil {
|
2022-07-31 22:16:40 +02:00
|
|
|
// proper parsing failed, so fall back to whatever is there
|
2020-11-08 13:39:51 +01:00
|
|
|
msgID, err = h.Text("message-id")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
2020-07-27 01:48:22 +02:00
|
|
|
date, err := parseDate(h)
|
|
|
|
if err != nil {
|
|
|
|
// still return a valid struct plus a sentinel date parsing error
|
|
|
|
// if only the date parsing failed
|
2022-08-04 22:48:14 +02:00
|
|
|
err = fmt.Errorf("%w: %v", DateParseError, err)
|
2020-07-27 01:48:22 +02:00
|
|
|
}
|
2019-08-05 09:16:07 +02:00
|
|
|
return &models.Envelope{
|
|
|
|
Date: date,
|
|
|
|
Subject: subj,
|
|
|
|
MessageId: msgID,
|
|
|
|
From: from,
|
2019-11-15 11:00:44 +01:00
|
|
|
ReplyTo: replyTo,
|
2019-08-05 09:16:07 +02:00
|
|
|
To: to,
|
|
|
|
Cc: cc,
|
|
|
|
Bcc: bcc,
|
2020-07-27 01:48:22 +02:00
|
|
|
}, err
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
|
2020-07-27 01:48:22 +02:00
|
|
|
// parseDate tries to parse the date from the Date header with non std formats
|
|
|
|
// if this fails it tries to parse the received header as well
|
2020-05-05 11:26:43 +02:00
|
|
|
func parseDate(h *mail.Header) (time.Time, error) {
|
2020-07-27 01:48:22 +02:00
|
|
|
t, err := h.Date()
|
|
|
|
if err == nil {
|
2020-05-05 11:26:43 +02:00
|
|
|
return t, nil
|
|
|
|
}
|
|
|
|
text, err := h.Text("date")
|
2020-07-27 01:48:22 +02:00
|
|
|
// sometimes, no error occurs but the date is empty.
|
|
|
|
// In this case, guess time from received header field
|
|
|
|
if err != nil || text == "" {
|
|
|
|
t, err := parseReceivedHeader(h)
|
|
|
|
if err == nil {
|
|
|
|
return t, nil
|
2020-05-05 11:26:43 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
layouts := []string{
|
|
|
|
// X-Mailer: EarthLink Zoo Mail 1.0
|
|
|
|
"Mon, _2 Jan 2006 15:04:05 -0700 (GMT-07:00)",
|
|
|
|
}
|
|
|
|
for _, layout := range layouts {
|
|
|
|
if t, err := time.Parse(layout, text); err == nil {
|
|
|
|
return t, nil
|
|
|
|
}
|
|
|
|
}
|
2020-07-27 01:48:22 +02:00
|
|
|
// still no success, try the received header as a last resort
|
|
|
|
t, err = parseReceivedHeader(h)
|
|
|
|
if err != nil {
|
|
|
|
return time.Time{}, fmt.Errorf("unrecognized date format: %s", text)
|
|
|
|
}
|
|
|
|
return t, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseReceivedHeader(h *mail.Header) (time.Time, error) {
|
|
|
|
guess, err := h.Text("received")
|
|
|
|
if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return time.Time{}, fmt.Errorf("received header not parseable: %w",
|
2020-07-27 01:48:22 +02:00
|
|
|
err)
|
|
|
|
}
|
|
|
|
return time.Parse(time.RFC1123Z, dateRe.FindString(guess))
|
2020-05-05 11:26:43 +02:00
|
|
|
}
|
|
|
|
|
2020-11-10 19:57:09 +01:00
|
|
|
func parseAddressList(h *mail.Header, key string) ([]*mail.Address, error) {
|
2021-02-22 11:46:58 +01:00
|
|
|
hdr, err := h.Text(key)
|
2022-09-28 23:18:42 +02:00
|
|
|
if err != nil && !message.IsUnknownCharset(err) {
|
2019-08-05 09:16:07 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
2022-09-28 23:18:42 +02:00
|
|
|
if hdr == "" {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
add, err := mail.ParseAddressList(hdr)
|
|
|
|
if err != nil {
|
|
|
|
return []*mail.Address{{Name: hdr}}, nil
|
|
|
|
}
|
|
|
|
return add, err
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// RawMessage is an interface that describes a raw message
|
|
|
|
type RawMessage interface {
|
2022-01-19 19:10:08 +01:00
|
|
|
NewReader() (io.ReadCloser, error)
|
2019-08-05 09:16:07 +02:00
|
|
|
ModelFlags() ([]models.Flag, error)
|
2019-12-23 12:51:58 +01:00
|
|
|
Labels() ([]string, error)
|
2019-08-05 09:16:07 +02:00
|
|
|
UID() uint32
|
|
|
|
}
|
|
|
|
|
|
|
|
// MessageInfo populates a models.MessageInfo struct for the message.
|
|
|
|
// based on the reader returned by NewReader
|
|
|
|
func MessageInfo(raw RawMessage) (*models.MessageInfo, error) {
|
2020-08-21 20:58:30 +02:00
|
|
|
var parseErr error
|
2019-08-05 09:16:07 +02:00
|
|
|
r, err := raw.NewReader()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-01-19 19:10:08 +01:00
|
|
|
defer r.Close()
|
2022-09-21 00:27:58 +02:00
|
|
|
msg, err := ReadMessage(r)
|
2019-08-05 09:16:07 +02:00
|
|
|
if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not read message: %w", err)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
2020-03-03 22:20:07 +01:00
|
|
|
bs, err := ParseEntityStructure(msg)
|
2020-08-21 20:58:30 +02:00
|
|
|
if errors.As(err, new(message.UnknownEncodingError)) {
|
|
|
|
parseErr = err
|
|
|
|
} else if err != nil {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not get structure: %w", err)
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
2022-03-18 09:53:02 +01:00
|
|
|
h := &mail.Header{Header: msg.Header}
|
2020-07-27 01:48:22 +02:00
|
|
|
env, err := parseEnvelope(h)
|
|
|
|
if err != nil && !errors.Is(err, DateParseError) {
|
2022-07-31 15:15:27 +02:00
|
|
|
return nil, fmt.Errorf("could not parse envelope: %w", err)
|
2020-07-27 01:48:22 +02:00
|
|
|
// if only the date parsing failed we still get the rest of the
|
|
|
|
// envelop structure in a valid state.
|
|
|
|
// Date parsing errors are fairly common and it's better to be
|
|
|
|
// slightly off than to not be able to read the mails at all
|
|
|
|
// hence we continue here
|
|
|
|
}
|
|
|
|
recDate, _ := parseReceivedHeader(h)
|
|
|
|
if recDate.IsZero() {
|
|
|
|
// better than nothing, if incorrect
|
|
|
|
recDate = env.Date
|
2019-08-05 09:16:07 +02:00
|
|
|
}
|
|
|
|
flags, err := raw.ModelFlags()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2019-12-23 12:51:58 +01:00
|
|
|
labels, err := raw.Labels()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2019-08-05 09:16:07 +02:00
|
|
|
return &models.MessageInfo{
|
|
|
|
BodyStructure: bs,
|
|
|
|
Envelope: env,
|
|
|
|
Flags: flags,
|
2019-12-23 12:51:58 +01:00
|
|
|
Labels: labels,
|
2020-07-27 01:48:22 +02:00
|
|
|
InternalDate: recDate,
|
2022-03-18 09:53:02 +01:00
|
|
|
RFC822Headers: &mail.Header{Header: msg.Header},
|
2019-08-05 09:16:07 +02:00
|
|
|
Size: 0,
|
|
|
|
Uid: raw.UID(),
|
2020-08-21 20:58:30 +02:00
|
|
|
Error: parseErr,
|
2019-08-05 09:16:07 +02:00
|
|
|
}, nil
|
|
|
|
}
|
2022-04-22 13:38:41 +02:00
|
|
|
|
|
|
|
// NewCRLFReader returns a reader with CRLF line endings
|
|
|
|
func NewCRLFReader(r io.Reader) io.Reader {
|
|
|
|
var buf bytes.Buffer
|
|
|
|
scanner := bufio.NewScanner(r)
|
|
|
|
for scanner.Scan() {
|
|
|
|
buf.WriteString(scanner.Text() + "\r\n")
|
|
|
|
}
|
|
|
|
return &buf
|
|
|
|
}
|
2022-09-21 00:27:58 +02:00
|
|
|
|
|
|
|
// ReadMessage is a wrapper for the message.Read function to read a message
|
|
|
|
// from r. The message's encoding and charset are automatically decoded to
|
|
|
|
// UTF-8. If an unknown charset is encountered, the error is logged but a nil
|
|
|
|
// error is returned since the entity object can still be read.
|
|
|
|
func ReadMessage(r io.Reader) (*message.Entity, error) {
|
|
|
|
entity, err := message.Read(r)
|
|
|
|
if message.IsUnknownCharset(err) {
|
|
|
|
logging.Warnf("unknown charset encountered")
|
|
|
|
} else if err != nil {
|
|
|
|
return nil, fmt.Errorf("could not read message: %w", err)
|
|
|
|
}
|
|
|
|
return entity, nil
|
|
|
|
}
|