From e1d8bc4d17cb7bd79e19fdf866d1da2fedda4de5 Mon Sep 17 00:00:00 2001 From: Koni Marti Date: Tue, 14 Jun 2022 21:10:48 +0200 Subject: [PATCH] msgviewer: open http links from messages Parse http links from a message and display them as completions in the :open-link command. Add the following binds to the [view] section in your binds.conf: = :open-link Parsing can be disabled in aerc.conf by setting parse-http-links to false in the viewer section. Thanks to Moritz for the help with the regular expression. Signed-off-by: Koni Marti Reviewed-by: Moritz Poldrack Acked-by: Robin Jarry --- commands/msgview/open.go | 20 +++++++- config/aerc.conf | 6 +++ config/binds.conf | 2 + config/config.go | 2 + doc/aerc-config.5.scd | 6 +++ lib/parse/hyperlinks.go | 44 ++++++++++++++++ lib/parse/hyperlinks_test.go | 99 ++++++++++++++++++++++++++++++++++++ widgets/msgviewer.go | 14 ++++- widgets/providesmessage.go | 1 + 9 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 lib/parse/hyperlinks.go create mode 100644 lib/parse/hyperlinks_test.go diff --git a/commands/msgview/open.go b/commands/msgview/open.go index d8a4de0..3d62fe2 100644 --- a/commands/msgview/open.go +++ b/commands/msgview/open.go @@ -8,6 +8,7 @@ import ( "os" "time" + "git.sr.ht/~rjarry/aerc/commands" "git.sr.ht/~rjarry/aerc/lib" "git.sr.ht/~rjarry/aerc/logging" "git.sr.ht/~rjarry/aerc/widgets" @@ -20,10 +21,16 @@ func init() { } func (Open) Aliases() []string { - return []string{"open"} + return []string{"open", "open-link"} } func (Open) Complete(aerc *widgets.Aerc, args []string) []string { + mv := aerc.SelectedTab().(*widgets.MessageViewer) + if mv != nil { + if p := mv.SelectedMessagePart(); p != nil { + return commands.CompletionFromList(aerc, p.Links, args) + } + } return nil } @@ -31,6 +38,17 @@ func (Open) Execute(aerc *widgets.Aerc, args []string) error { mv := aerc.SelectedTab().(*widgets.MessageViewer) p := mv.SelectedMessagePart() + if args[0] == "open-link" && len(args) > 1 { + if link := args[1]; link != "" { + go func() { + if err := lib.NewXDGOpen(link).Start(); err != nil { + aerc.PushError(fmt.Sprintf("%s: %s", args[0], err.Error())) + } + }() + } + return nil + } + store := mv.Store() store.FetchBodyPart(p.Msg.Uid, p.Index, func(reader io.Reader) { extension := "" diff --git a/config/aerc.conf b/config/aerc.conf index 1a3c1f4..8043412 100644 --- a/config/aerc.conf +++ b/config/aerc.conf @@ -209,6 +209,12 @@ header-layout=From|To,Cc|Bcc,Date,Subject # Default: false always-show-mime=false +# Parses and extracts http links when viewing a message. Links can then be +# accessed with the open-link command. +# +# Default: true +parse-http-links=true + # How long to wait after the last input before auto-completion is triggered. # # Default: 250ms diff --git a/config/binds.conf b/config/binds.conf index 4f67bbe..589dc55 100644 --- a/config/binds.conf +++ b/config/binds.conf @@ -64,6 +64,8 @@ S = :save D = :delete A = :archive flat + = :open-link + f = :forward rr = :reply -a rq = :reply -aq diff --git a/config/config.go b/config/config.go index 923a1a0..dae32b7 100644 --- a/config/config.go +++ b/config/config.go @@ -160,6 +160,7 @@ type ViewerConfig struct { Alternatives []string ShowHeaders bool `ini:"show-headers"` AlwaysShowMime bool `ini:"always-show-mime"` + ParseHttpLinks bool `ini:"parse-http-links"` HeaderLayout [][]string `ini:"-"` KeyPassthrough bool `ini:"-"` } @@ -705,6 +706,7 @@ func LoadConfigFromFile(root *string, logger *log.Logger) (*AercConfig, error) { {"Date"}, {"Subject"}, }, + ParseHttpLinks: true, }, Statusline: StatuslineConfig{ diff --git a/doc/aerc-config.5.scd b/doc/aerc-config.5.scd index 15c6ca3..47ae752 100644 --- a/doc/aerc-config.5.scd +++ b/doc/aerc-config.5.scd @@ -401,6 +401,12 @@ These options are configured in the *[viewer]* section of aerc.conf. Default: false +*parse-http-links* + Parses and extracts http links when viewing a message. Links can then be + accessed with the open-link command. + + Default: true + ## COMPOSE These options are configured in the *[compose]* section of aerc.conf. diff --git a/lib/parse/hyperlinks.go b/lib/parse/hyperlinks.go new file mode 100644 index 0000000..7a00538 --- /dev/null +++ b/lib/parse/hyperlinks.go @@ -0,0 +1,44 @@ +package parse + +import ( + "bufio" + "bytes" + "io" + "regexp" + "strings" +) + +var submatch = `(https?:\/\/[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,10}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*))` +var httpRe = regexp.MustCompile("\"" + submatch + "\"" + "|" + "\\(" + submatch + "\\)" + "|" + "<" + submatch + ">" + "|" + submatch) + +// HttpLinks searches a reader for a http link and returns a copy of the +// reader and a slice with links. +func HttpLinks(r io.Reader) (io.Reader, []string) { + var buf bytes.Buffer + tr := io.TeeReader(r, &buf) + + scanner := bufio.NewScanner(tr) + linkMap := make(map[string]struct{}) + for scanner.Scan() { + line := scanner.Text() + if !strings.Contains(line, "http") { + continue + } + for _, word := range strings.Fields(line) { + if links := httpRe.FindStringSubmatch(word); len(links) > 0 { + for _, l := range links[1:] { + if l != "" { + linkMap[strings.TrimSpace(l)] = struct{}{} + } + } + } + } + } + + results := []string{} + for link, _ := range linkMap { + results = append(results, link) + } + + return &buf, results +} diff --git a/lib/parse/hyperlinks_test.go b/lib/parse/hyperlinks_test.go new file mode 100644 index 0000000..f16392a --- /dev/null +++ b/lib/parse/hyperlinks_test.go @@ -0,0 +1,99 @@ +package parse_test + +import ( + "io/ioutil" + "strings" + "testing" + + "git.sr.ht/~rjarry/aerc/lib/parse" +) + +func TestHyperlinks(t *testing.T) { + tests := []struct { + text string + links []string + }{ + { + text: "http://aerc-mail.org", + links: []string{"http://aerc-mail.org"}, + }, + { + text: "https://aerc-mail.org", + links: []string{"https://aerc-mail.org"}, + }, + { + text: "text https://aerc-mail.org more text", + links: []string{"https://aerc-mail.org"}, + }, + { + text: "text (https://aerc-mail.org) more text", + links: []string{"https://aerc-mail.org"}, + }, + { + text: "text \"https://aerc-mail.org\" more text", + links: []string{"https://aerc-mail.org"}, + }, + { + text: "text more text", + links: []string{"https://aerc-mail.org"}, + }, + { + text: "", + links: []string{"https://aerc-mail.org"}, + }, + { + text: "text https://aerc-mail.org more text https://aerc-mail.org more text", + links: []string{"https://aerc-mail.org"}, + }, + { + text: "text https://aerc-mail.org more text http://git.sr.ht/~rjarry/aerc more text", + links: []string{"https://aerc-mail.org", "http://git.sr.ht/~rjarry/aerc"}, + }, + { + text: "text http://www.ietf.org/rfc/rfc2396.txt more text", + links: []string{"http://www.ietf.org/rfc/rfc2396.txt"}, + }, + { + text: "text more text", + links: []string{"http://example.com:8042/over/there?name=ferret#nose"}, + }, + { + text: "text http://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm more text", + links: []string{"http://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm"}, + }, + { + text: "text https://www.ics.uci.edu/pub/ietf/uri/#Related more text", + links: []string{"https://www.ics.uci.edu/pub/ietf/uri/#Related"}, + }, + { + text: "text https://www.example.com/index.php?id_sezione=360&sid=3a5ebc944f41daa6f849f730f1 more text", + links: []string{"https://www.example.com/index.php?id_sezione=360&sid=3a5ebc944f41daa6f849f730f1"}, + }, + } + + for _, test := range tests { + + // make sure reader is exact copy of input reader + reader, links := parse.HttpLinks(strings.NewReader(test.text)) + if data, err := ioutil.ReadAll(reader); err != nil { + t.Errorf("could not read text: %v", err) + } else if string(data) != test.text { + t.Errorf("did not copy input reader correctly") + } + + // check correct parsed links + if len(links) != len(test.links) { + t.Errorf("different number of links: got %d but expected %d", len(links), len(test.links)) + } + linkMap := make(map[string]struct{}) + for _, got := range links { + linkMap[got] = struct{}{} + } + for _, expected := range test.links { + if _, ok := linkMap[expected]; !ok { + t.Errorf("link not parsed: %s", expected) + } + } + + } +} diff --git a/widgets/msgviewer.go b/widgets/msgviewer.go index 9876467..b41b57c 100644 --- a/widgets/msgviewer.go +++ b/widgets/msgviewer.go @@ -19,6 +19,7 @@ import ( "git.sr.ht/~rjarry/aerc/lib" "git.sr.ht/~rjarry/aerc/lib/auth" "git.sr.ht/~rjarry/aerc/lib/format" + "git.sr.ht/~rjarry/aerc/lib/parse" "git.sr.ht/~rjarry/aerc/lib/ui" "git.sr.ht/~rjarry/aerc/logging" "git.sr.ht/~rjarry/aerc/models" @@ -325,6 +326,7 @@ func (mv *MessageViewer) SelectedMessagePart() *PartInfo { Index: part.index, Msg: part.msg.MessageInfo(), Part: part.part, + Links: part.links, } } @@ -518,6 +520,8 @@ type PartViewer struct { term *Terminal grid *ui.Grid uiConfig config.UIConfig + + links []string } func NewPartViewer(acct *AccountView, conf *config.AercConfig, @@ -670,6 +674,14 @@ func (pv *PartViewer) writeMailHeaders() { } } +func (pv *PartViewer) hyperlinks(r io.Reader) (reader io.Reader) { + if !pv.conf.Viewer.ParseHttpLinks { + return r + } + reader, pv.links = parse.HttpLinks(r) + return reader +} + func (pv *PartViewer) copyFilterOutToPager() { stdout, _ := pv.filter.StdoutPipe() stderr, _ := pv.filter.StderrPipe() @@ -708,7 +720,7 @@ func (pv *PartViewer) copyFilterOutToPager() { } func (pv *PartViewer) copySourceToSinkStripAnsi() { - scanner := bufio.NewScanner(pv.source) + scanner := bufio.NewScanner(pv.hyperlinks(pv.source)) // some people send around huge html without any newline in between // this did overflow the default 64KB buffer of bufio.Scanner. // If something can't fit in a GB there's no hope left diff --git a/widgets/providesmessage.go b/widgets/providesmessage.go index c1d821c..b0f261d 100644 --- a/widgets/providesmessage.go +++ b/widgets/providesmessage.go @@ -10,6 +10,7 @@ type PartInfo struct { Index []int Msg *models.MessageInfo Part *models.BodyStructure + Links []string } type ProvidesMessage interface {