Extract tile and subtitle from initial paragraphs

Medium guides each post to have a Title and Subtitle. They are rendered
as the first two paragraphs: H3 and H4 respectively. If they exist, a
new PageConverter class extracts them and sets them on the page.

However, they aren't required. If the first two paragraphs aren't H3
and H4, the PageConverter falls back to using the first paragraph as
the title, and setting the subtitle to blank.

The remaining paragraphs are passed into the ParagraphConverter as
normal.
This commit is contained in:
Edward Loveall 2021-08-14 17:36:10 -04:00
parent f48f7c2932
commit 05c18f6451
No known key found for this signature in database
GPG key ID: 789A4AE983AC8901
5 changed files with 288 additions and 99 deletions

View file

@ -0,0 +1,97 @@
require "../spec_helper"
include Nodes
describe PageConverter do
it "sets the title and subtitle if present" do
paragraphs = Array(PostResponse::Paragraph).from_json <<-JSON
[
{
"text": "Title",
"type": "H3",
"markups": [],
"href": null,
"iframe": null,
"layout": null,
"metadata": null
},
{
"text": "Subtitle",
"type": "H4",
"markups": [],
"href": null,
"iframe": null,
"layout": null,
"metadata": null
}
]
JSON
page = PageConverter.new.convert(paragraphs)
page.title.should eq "Title"
page.subtitle.should eq "Subtitle"
end
it "sets the title to the first paragraph if no title" do
paragraphs = Array(PostResponse::Paragraph).from_json <<-JSON
[
{
"text": "Not a title",
"type": "P",
"markups": [],
"href": null,
"iframe": null,
"layout": null,
"metadata": null
}
]
JSON
page = PageConverter.new.convert(paragraphs)
page.title.should eq "Not a title"
page.subtitle.should eq nil
end
it "calls ParagraphConverter to convert the remaining paragraph content" do
paragraphs = Array(PostResponse::Paragraph).from_json <<-JSON
[
{
"text": "Title",
"type": "H3",
"markups": [],
"href": null,
"iframe": null,
"layout": null,
"metadata": null
},
{
"text": "Subtitle",
"type": "H4",
"markups": [],
"href": null,
"iframe": null,
"layout": null,
"metadata": null
},
{
"text": "Content",
"type": "P",
"markups": [],
"href": null,
"iframe": null,
"layout": null,
"metadata": null
}
]
JSON
page = PageConverter.new.convert(paragraphs)
page.nodes.should eq [
Paragraph.new([
Text.new("Content"),
] of Child),
]
end
end

View file

@ -4,11 +4,15 @@ include Nodes
describe PageContent do
it "renders a single parent/child node structure" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Paragraph.new(children: [
Text.new(content: "hi"),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -16,7 +20,10 @@ describe PageContent do
end
it "renders multiple childrens" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Paragraph.new(children: [
Text.new(content: "Hello, "),
Emphasis.new(children: [
@ -31,7 +38,8 @@ describe PageContent do
Text.new(content: "Again!"),
] of Child),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -39,9 +47,13 @@ describe PageContent do
end
it "renders an anchor" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Anchor.new(children: [Text.new("link")] of Child, href: "https://example.com"),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -49,11 +61,15 @@ describe PageContent do
end
it "renders a blockquote" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
BlockQuote.new(children: [
Text.new("Wayne Gretzky. Michael Scott."),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -61,11 +77,15 @@ describe PageContent do
end
it "renders code" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Code.new(children: [
Text.new("foo = bar"),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -73,14 +93,18 @@ describe PageContent do
end
it "renders empasis" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Paragraph.new(children: [
Text.new(content: "This is "),
Emphasis.new(children: [
Text.new(content: "neat!"),
] of Child),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -88,14 +112,18 @@ describe PageContent do
end
it "renders a figure and figure caption" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Figure.new(children: [
Image.new(src: "image.png", originalWidth: 100, originalHeight: 200),
FigureCaption.new(children: [
Text.new("A caption"),
] of Child),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -108,11 +136,15 @@ describe PageContent do
end
it "renders an H3" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Heading2.new(children: [
Text.new(content: "Title!"),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -120,11 +152,15 @@ describe PageContent do
end
it "renders an H4" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Heading3.new(children: [
Text.new(content: "In Conclusion..."),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -132,11 +168,15 @@ describe PageContent do
end
it "renders an image" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Paragraph.new(children: [
Image.new(src: "image.png", originalWidth: 100, originalHeight: 200),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -148,11 +188,15 @@ describe PageContent do
end
it "renders an iframe container" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Paragraph.new(children: [
IFrame.new(href: "https://example.com"),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -166,12 +210,16 @@ describe PageContent do
end
it "renders an ordered list" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
OrderedList.new(children: [
ListItem.new(children: [Text.new("One")] of Child),
ListItem.new(children: [Text.new("Two")] of Child),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -179,11 +227,15 @@ describe PageContent do
end
it "renders an preformatted text" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Paragraph.new(children: [
Text.new("Hello, world!"),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -191,11 +243,15 @@ describe PageContent do
end
it "renders an preformatted text" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Preformatted.new(children: [
Text.new("New\nline"),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -203,11 +259,15 @@ describe PageContent do
end
it "renders strong text" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
Strong.new(children: [
Text.new("Oh yeah!"),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -215,12 +275,16 @@ describe PageContent do
end
it "renders an unordered list" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
UnorderedList.new(children: [
ListItem.new(children: [Text.new("Apple")] of Child),
ListItem.new(children: [Text.new("Banana")] of Child),
] of Child),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string
@ -228,9 +292,13 @@ describe PageContent do
end
it "renders a user anchor" do
page = Page.new(nodes: [
page = Page.new(
title: "Title",
subtitle: nil,
nodes: [
UserAnchor.new(children: [Text.new("Some User")] of Child, userId: "abc123"),
] of Child)
] of Child
)
html = PageContent.new(page: page).render_to_string

View file

@ -7,10 +7,9 @@ class Articles::Show < BrowserAction
else
response = MediumClient.post_data(post_id)
end
content = ParagraphConverter.new.convert(
page = PageConverter.new.convert(
response.data.post.content.bodyModel.paragraphs
)
page = Page.new(nodes: content)
html ShowPage, page: page
end
end

View file

@ -0,0 +1,19 @@
class PageConverter
def convert(paragraphs : Array(PostResponse::Paragraph)) : Page
first_two_paragraphs = paragraphs.first(2)
first_two_types = first_two_paragraphs.map(&.type)
if first_two_types == [PostResponse::ParagraphType::H3, PostResponse::ParagraphType::H4]
Page.new(
title: first_two_paragraphs[0].text,
subtitle: first_two_paragraphs[1].text,
nodes: ParagraphConverter.new.convert(paragraphs[2..]),
)
else
Page.new(
title: first_two_paragraphs[0].text,
subtitle: nil,
nodes: ParagraphConverter.new.convert(paragraphs[1..]),
)
end
end
end

View file

@ -1,6 +1,12 @@
class Page
getter nodes : Nodes::Children
getter title : String
getter subtitle : String?
def initialize(@nodes : Nodes::Children)
def initialize(
@title : String,
@subtitle : String?,
@nodes : Nodes::Children = [] of Nodes::Child
)
end
end