From 897d12ed4e20fca3abb4cdcd7992938620d9d90a Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Fri, 20 Jan 2023 23:17:14 +0100 Subject: [PATCH] Support for Atom feeds. Closes: #2 --- src/background.ts | 14 ++- src/main.ts | 269 +++++++++++++++++++++++++++++++-------------- src/viewer/App.vue | 32 +++++- 3 files changed, 221 insertions(+), 94 deletions(-) diff --git a/src/background.ts b/src/background.ts index 00821c3..a32b390 100644 --- a/src/background.ts +++ b/src/background.ts @@ -38,13 +38,14 @@ const onFeedDownloaded = (req: XMLHttpRequest) => { await browser.tabs.sendMessage( tab.id, { type: 'renderFeed', + url: req.responseURL, document: req.responseText } ) } } -const renderFeed = (url: string) => { +const downloadFeed = (url: string) => { state.awaitingResponse = true const req = new XMLHttpRequest() req.onload = onFeedDownloaded(req) @@ -66,7 +67,7 @@ const updateFeedUrl = (tabId: number, feedUrl: string | null) => { browser.pageAction.onClicked.addListener( async () => { if (state.feedUrl?.length) - renderFeed(state.feedUrl) + downloadFeed(state.feedUrl) } ) @@ -74,8 +75,8 @@ browser.webNavigation.onCompleted.addListener( async (event: {tabId: number}) => { const { tabId } = event const feedUrl = await browser.tabs.sendMessage(tabId, {type: 'extractFeedUrl'}) + await browser.tabs.sendMessage(tabId, {type: 'renderFeed', url: feedUrl}) updateFeedUrl(tabId, feedUrl) - await browser.tabs.sendMessage(tabId, {type: 'renderFeed'}) } ) @@ -92,8 +93,11 @@ browser.webRequest.onHeadersReceived.addListener( h => h.name.toLowerCase() === 'content-type' )?.value || '' - if (contentType.startsWith('application/rss+xml')) - renderFeed(url) + if ( + contentType.startsWith('application/rss+xml') || + contentType.startsWith('application/atom+xml') + ) + downloadFeed(url) }, {urls: ['']}, ['blocking', 'responseHeaders'] diff --git a/src/main.ts b/src/main.ts index cc11e53..bd61ca9 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,36 +1,119 @@ import browser from 'webextension-polyfill'; -const parseItemImage = (item: Element) => { - const images = - Array.from(item.getElementsByTagName('media:content')) - .filter((content) => - (content.getAttribute('type') || '').startsWith('image/') || - content.getAttribute('medium') === 'image' - ) +// Parse a feed, given as a DOM element +const parseFeed = (feed: Element, url: string | null) => { + if (feed.tagName.toLowerCase() == 'channel') + return parseRSSFeed(feed, url) - if (!images.length) - return - - const { url } = images.reduce((maxImage, content) => { - const width = parseFloat(content.getAttribute('width') || '0') - if (width > maxImage.width) { - maxImage.url = content.getAttribute('url') || '' - maxImage.width = width - } - - return maxImage - }, { - width: parseFloat(images[0].getAttribute('width') || '0'), - url: images[0].getAttribute('url'), - }) - - return { - url: url - } + return parseAtomFeed(feed, url) } -const pubDateToInterval = (item: Element) => { - const dateStr = getNodeContent(item, 'pubDate') +// Parse an RSS feed +const parseRSSFeed = (feed: Element, url: string | null) => { + const imageElement = feed.getElementsByTagName('image')[0] + return { + feedData: { + title: getNodeContent(feed, 'title'), + description: getNodeContent(feed, 'description'), + feedUrl: url?.length ? url : window.location.href, + homeUrl: getNodeContent(feed, 'link'), + image: imageElement ? { + title: getNodeContent(imageElement, 'title'), + imageUrl: getNodeContent(imageElement, 'url'), + targetUrl: getNodeContent(imageElement, 'link'), + } : null, + html: { + title: false, + description: false, + }, + + items: Array.from(feed.getElementsByTagName('item')).map((item) => { + return { + title: getNodeContent(item, 'title'), + description: getNodeContent(item, 'description'), + url: getNodeContent(item, 'link'), + image: parseRSSItemImage(item), + pubDate: getNodeContent(item, 'pubDate'), + age: pubDateToInterval(getNodeContent(item, 'pubDate'),), + categories: Array.from(item.getElementsByTagName('category')).map((cat) => + cat.firstChild?.textContent + ), + html: { + title: false, + }, + } + }).sort((a, b) => itemTime(b) - itemTime(a)) + } + } +} + +// Parse an Atom feed +const parseAtomFeed = (feed: Element, url: string | null) => { + const homeURL = getAtomLinksByType(feed, 'text/html')?.[0] + const logoURL = toAbsoluteURL(getNodeContent(feed, 'logo')) + const iconURL = toAbsoluteURL(getNodeContent(feed, 'icon')) + + return { + feedData: { + title: getNodeContent(feed, 'title'), + description: getNodeContent(feed, 'subtitle'), + feedUrl: url?.length ? url : window.location.href, + homeUrl: homeURL, + image: { + imageUrl: logoURL || iconURL, + targetUrl: homeURL, + }, + html: { + title: feed.getElementsByTagName('title')[0]?.getAttribute('type') === 'html', + description: feed.getElementsByTagName('subtitle')[0]?.getAttribute('type') === 'html', + }, + items: Array.from(feed.getElementsByTagName('entry')).map((item) => { + return { + title: getNodeContent(item, 'title'), + description: getNodeContent(item, 'content') || getNodeContent(item, 'summary'), + url: getAtomLinksByType(item, 'text/html')?.[0], + image: getAtomLinksByType(item, 'image/')?.[0], + pubDate: getNodeContent(item, 'updated'), + age: pubDateToInterval(getNodeContent(item, 'updated')), + categories: Array.from(item.getElementsByTagName('category')).map((cat) => + cat.firstChild?.textContent + ), + html: { + title: item.getElementsByTagName('title')[0]?.getAttribute('type') === 'html', + }, + } + }).sort((a, b) => itemTime(b) - itemTime(a)) + } + } +} + +// Convert relative URLs to absolute +const toAbsoluteURL = (link: string | null): string | null => { + if (link?.length && !link.match(/^https?:\/\//)) { + let port = window.location.port + if (port.length) + port = `:${port}` + link = `${window.location.protocol}//${window.location.hostname}${port}${link}` + } + + return link +} + +// Get the raw text content of an XML node +const getNodeContent = (parent: Element, tagName: string) => + // @ts-ignore + parent.getElementsByTagName(tagName)[0]?.firstChild?.wholeText + +// Extract the publication time of an item as a timestamp +const itemTime = (item: {pubDate: string}) => { + const dateStr = item.pubDate + if (!dateStr?.length) + return 0 + return (new Date(dateStr)).getTime() +} + +// Convert the publication date to an age string +const pubDateToInterval = (dateStr: string) => { if (!dateStr?.length) return @@ -61,53 +144,62 @@ const pubDateToInterval = (item: Element) => { return `${interval.toFixed(0)} ${unit}` } -const getNodeContent = (parent: Element, tagName: string) => - // @ts-ignore - parent.getElementsByTagName(tagName)[0]?.firstChild?.wholeText +// Extract the main image of an RSS item +const parseRSSItemImage = (item: Element) => { + const images = + Array.from(item.getElementsByTagName('media:content')) + .filter((content) => + (content.getAttribute('type') || '').startsWith('image/') || + content.getAttribute('medium') === 'image' + ) -const parseFeed = (channel: Element) => { - const imageElement = channel.getElementsByTagName('image')[0] - const itemTime = (item: {pubDate: string}) => { - const dateStr = item.pubDate - if (!dateStr?.length) - return 0 - return (new Date(dateStr)).getTime() - } + if (!images.length) + return - return { - feedData: { - title: getNodeContent(channel, 'title'), - description: getNodeContent(channel, 'description'), - feedUrl: window.location.href, - homeUrl: getNodeContent(channel, 'link'), - image: imageElement ? { - title: getNodeContent(imageElement, 'title'), - imageUrl: getNodeContent(imageElement, 'url'), - targetUrl: getNodeContent(imageElement, 'link'), - } : null, + const { url } = images.reduce((maxImage, content) => { + const width = parseFloat(content.getAttribute('width') || '0') + if (width > maxImage.width) { + maxImage.url = content.getAttribute('url') || '' + maxImage.width = width + } - items: Array.from(channel.getElementsByTagName('item')).map((item) => { - return { - title: getNodeContent(item, 'title'), - description: getNodeContent(item, 'description'), - url: getNodeContent(item, 'link'), - image: parseItemImage(item), - pubDate: getNodeContent(item, 'pubDate'), - age: pubDateToInterval(item), - categories: Array.from(item.getElementsByTagName('category')).map((cat) => - cat.firstChild?.textContent - ), - } - }).sort((a, b) => itemTime(b) - itemTime(a)) + return maxImage + }, { + width: parseFloat(images[0].getAttribute('width') || '0'), + url: images[0].getAttribute('url'), + }) + + return { + url: url } - } } +// Get the HREFs of an Atom element +const getAtomLinksByType = (parent: Element, type: string): Array => { + // @ts-ignore + return Array.from(parent.children). + filter( + (e) => + e.tagName.toLowerCase() == 'link' && + (e.getAttribute('type') || '').toLowerCase().startsWith(type.toLowerCase()) && + (e.getAttribute('href') || '').length > 0 + ). + map((e) => toAbsoluteURL(e.getAttribute('href'))). + filter((l) => l != null) +} + +// Get the RSS/Atom root element of the current page, if available const getFeedRoot = (): HTMLElement | null => { const xmlDoc = document.documentElement + + // Check if it's an RSS feed if (xmlDoc.tagName.toLowerCase() === 'rss') return xmlDoc + // Check if it's an Atom feed + if (xmlDoc.tagName.toLowerCase() === 'feed') + return xmlDoc + // Chrome-based browsers may wrap the XML into an HTML view const webkitSource = document.getElementById('webkit-xml-viewer-source-xml') if (webkitSource) @@ -121,6 +213,7 @@ const getFeedRoot = (): HTMLElement | null => { return preElements[0] } +// Convert an XML string to a DOM object if it's a valid feed const textToDOM = (text: string) => { const parser = new DOMParser() let xmlDoc = null @@ -135,55 +228,65 @@ const textToDOM = (text: string) => { // @ts-ignore const root = xmlDoc.documentElement - if (root.tagName.toLowerCase() === 'rss') + if ( + root.tagName.toLowerCase() === 'rss' || + root.tagName.toLowerCase() === 'feed' + ) return root } -const renderFeed = (text: string) => { +// Render a feed. It accepts an XML string as an argument. +// If not passed, it will try to render any feeds on the current page. +const renderFeed = (text: string, url: string | null) => { const xmlDoc = text?.length ? textToDOM(text) : getFeedRoot() if (!xmlDoc) - // Not an RSS feed + // Not a feed return - const channel = xmlDoc.getElementsByTagName('channel')[0] - if (!channel) - return + // Check if it's an RSS feed + let feed = xmlDoc.getElementsByTagName('channel')[0] - browser.storage.local.set(parseFeed(channel)) + if (!feed) { + // Check if it's an Atom feed + if (xmlDoc.tagName.toLowerCase() !== 'feed') + return + + feed = xmlDoc + } + + // Save the parsed feed to the storage and redirect to the viewer + browser.storage.local.set(parseFeed(feed, url)) window.location.href = browser.runtime.getURL('viewer/index.html') } +// Extract any feed URL published on the page const extractFeedUrl = () => { const links = Array.from(document.getElementsByTagName('link')) .filter((link) => link.getAttribute('rel') === 'alternate' && - link.getAttribute('type')?.startsWith('application/rss+xml') + ( + link.getAttribute('type')?.startsWith('application/rss+xml') || + link.getAttribute('type')?.startsWith('application/atom+xml') + ) ) if (!links.length) return - let link = links[0].getAttribute('href') || '' - if (link.length && !link.match(/^https?:\/\//)) { - let port = window.location.port - if (port.length) - port = `:${port}` - link = `${window.location.protocol}//${window.location.hostname}${port}${link}` - } - - return link.length ? link : null + return toAbsoluteURL(links[0].getAttribute('href')) } +// Main message listener browser.runtime.onMessage.addListener( async ( message: { type: string, - url: string, + url: string | null, document: string, } ) => { if (message.type === 'renderFeed') - return renderFeed(message.document) + return renderFeed(message.document, message.url) if (message.type === 'extractFeedUrl') return extractFeedUrl() diff --git a/src/viewer/App.vue b/src/viewer/App.vue index ff75ae0..47ffc95 100644 --- a/src/viewer/App.vue +++ b/src/viewer/App.vue @@ -3,17 +3,21 @@
-

+
+

+

+

@@ -21,10 +25,11 @@