From 05c18f6451e8015af81082ae3abfa7cb3ebdba1c Mon Sep 17 00:00:00 2001 From: Edward Loveall Date: Sat, 14 Aug 2021 17:36:10 -0400 Subject: [PATCH] Extract tile and subtitle from initial paragraphs Medium guides each post to have a Title and Subtitle. They are rendered as the first two paragraphs: H3 and H4 respectively. If they exist, a new PageConverter class extracts them and sets them on the page. However, they aren't required. If the first two paragraphs aren't H3 and H4, the PageConverter falls back to using the first paragraph as the title, and setting the subtitle to blank. The remaining paragraphs are passed into the ParagraphConverter as normal. --- spec/classes/page_converter_spec.cr | 97 ++++++++++ spec/components/page_content_spec.cr | 260 +++++++++++++++++---------- src/actions/articles/show.cr | 3 +- src/classes/page_converter.cr | 19 ++ src/models/page.cr | 8 +- 5 files changed, 288 insertions(+), 99 deletions(-) create mode 100644 spec/classes/page_converter_spec.cr create mode 100644 src/classes/page_converter.cr diff --git a/spec/classes/page_converter_spec.cr b/spec/classes/page_converter_spec.cr new file mode 100644 index 0000000..3a832d7 --- /dev/null +++ b/spec/classes/page_converter_spec.cr @@ -0,0 +1,97 @@ +require "../spec_helper" + +include Nodes + +describe PageConverter do + it "sets the title and subtitle if present" do + paragraphs = Array(PostResponse::Paragraph).from_json <<-JSON + [ + { + "text": "Title", + "type": "H3", + "markups": [], + "href": null, + "iframe": null, + "layout": null, + "metadata": null + }, + { + "text": "Subtitle", + "type": "H4", + "markups": [], + "href": null, + "iframe": null, + "layout": null, + "metadata": null + } + ] + JSON + + page = PageConverter.new.convert(paragraphs) + + page.title.should eq "Title" + page.subtitle.should eq "Subtitle" + end + + it "sets the title to the first paragraph if no title" do + paragraphs = Array(PostResponse::Paragraph).from_json <<-JSON + [ + { + "text": "Not a title", + "type": "P", + "markups": [], + "href": null, + "iframe": null, + "layout": null, + "metadata": null + } + ] + JSON + page = PageConverter.new.convert(paragraphs) + + page.title.should eq "Not a title" + page.subtitle.should eq nil + end + + it "calls ParagraphConverter to convert the remaining paragraph content" do + paragraphs = Array(PostResponse::Paragraph).from_json <<-JSON + [ + { + "text": "Title", + "type": "H3", + "markups": [], + "href": null, + "iframe": null, + "layout": null, + "metadata": null + }, + { + "text": "Subtitle", + "type": "H4", + "markups": [], + "href": null, + "iframe": null, + "layout": null, + "metadata": null + }, + { + "text": "Content", + "type": "P", + "markups": [], + "href": null, + "iframe": null, + "layout": null, + "metadata": null + } + ] + JSON + + page = PageConverter.new.convert(paragraphs) + + page.nodes.should eq [ + Paragraph.new([ + Text.new("Content"), + ] of Child), + ] + end +end diff --git a/spec/components/page_content_spec.cr b/spec/components/page_content_spec.cr index e0fbc7f..6028dfe 100644 --- a/spec/components/page_content_spec.cr +++ b/spec/components/page_content_spec.cr @@ -4,11 +4,15 @@ include Nodes describe PageContent do it "renders a single parent/child node structure" do - page = Page.new(nodes: [ - Paragraph.new(children: [ - Text.new(content: "hi"), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Paragraph.new(children: [ + Text.new(content: "hi"), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -16,22 +20,26 @@ describe PageContent do end it "renders multiple childrens" do - page = Page.new(nodes: [ - Paragraph.new(children: [ - Text.new(content: "Hello, "), - Emphasis.new(children: [ - Text.new(content: "World!"), + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Paragraph.new(children: [ + Text.new(content: "Hello, "), + Emphasis.new(children: [ + Text.new(content: "World!"), + ] of Child), ] of Child), - ] of Child), - UnorderedList.new(children: [ - ListItem.new(children: [ - Text.new(content: "List!"), + UnorderedList.new(children: [ + ListItem.new(children: [ + Text.new(content: "List!"), + ] of Child), + ListItem.new(children: [ + Text.new(content: "Again!"), + ] of Child), ] of Child), - ListItem.new(children: [ - Text.new(content: "Again!"), - ] of Child), - ] of Child), - ] of Child) + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -39,9 +47,13 @@ describe PageContent do end it "renders an anchor" do - page = Page.new(nodes: [ - Anchor.new(children: [Text.new("link")] of Child, href: "https://example.com"), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Anchor.new(children: [Text.new("link")] of Child, href: "https://example.com"), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -49,11 +61,15 @@ describe PageContent do end it "renders a blockquote" do - page = Page.new(nodes: [ - BlockQuote.new(children: [ - Text.new("Wayne Gretzky. Michael Scott."), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + BlockQuote.new(children: [ + Text.new("Wayne Gretzky. Michael Scott."), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -61,11 +77,15 @@ describe PageContent do end it "renders code" do - page = Page.new(nodes: [ - Code.new(children: [ - Text.new("foo = bar"), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Code.new(children: [ + Text.new("foo = bar"), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -73,14 +93,18 @@ describe PageContent do end it "renders empasis" do - page = Page.new(nodes: [ - Paragraph.new(children: [ - Text.new(content: "This is "), - Emphasis.new(children: [ - Text.new(content: "neat!"), + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Paragraph.new(children: [ + Text.new(content: "This is "), + Emphasis.new(children: [ + Text.new(content: "neat!"), + ] of Child), ] of Child), - ] of Child), - ] of Child) + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -88,14 +112,18 @@ describe PageContent do end it "renders a figure and figure caption" do - page = Page.new(nodes: [ - Figure.new(children: [ - Image.new(src: "image.png", originalWidth: 100, originalHeight: 200), - FigureCaption.new(children: [ - Text.new("A caption"), + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Figure.new(children: [ + Image.new(src: "image.png", originalWidth: 100, originalHeight: 200), + FigureCaption.new(children: [ + Text.new("A caption"), + ] of Child), ] of Child), - ] of Child), - ] of Child) + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -108,11 +136,15 @@ describe PageContent do end it "renders an H3" do - page = Page.new(nodes: [ - Heading2.new(children: [ - Text.new(content: "Title!"), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Heading2.new(children: [ + Text.new(content: "Title!"), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -120,11 +152,15 @@ describe PageContent do end it "renders an H4" do - page = Page.new(nodes: [ - Heading3.new(children: [ - Text.new(content: "In Conclusion..."), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Heading3.new(children: [ + Text.new(content: "In Conclusion..."), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -132,11 +168,15 @@ describe PageContent do end it "renders an image" do - page = Page.new(nodes: [ - Paragraph.new(children: [ - Image.new(src: "image.png", originalWidth: 100, originalHeight: 200), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Paragraph.new(children: [ + Image.new(src: "image.png", originalWidth: 100, originalHeight: 200), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -148,11 +188,15 @@ describe PageContent do end it "renders an iframe container" do - page = Page.new(nodes: [ - Paragraph.new(children: [ - IFrame.new(href: "https://example.com"), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Paragraph.new(children: [ + IFrame.new(href: "https://example.com"), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -166,12 +210,16 @@ describe PageContent do end it "renders an ordered list" do - page = Page.new(nodes: [ - OrderedList.new(children: [ - ListItem.new(children: [Text.new("One")] of Child), - ListItem.new(children: [Text.new("Two")] of Child), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + OrderedList.new(children: [ + ListItem.new(children: [Text.new("One")] of Child), + ListItem.new(children: [Text.new("Two")] of Child), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -179,11 +227,15 @@ describe PageContent do end it "renders an preformatted text" do - page = Page.new(nodes: [ - Paragraph.new(children: [ - Text.new("Hello, world!"), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Paragraph.new(children: [ + Text.new("Hello, world!"), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -191,11 +243,15 @@ describe PageContent do end it "renders an preformatted text" do - page = Page.new(nodes: [ - Preformatted.new(children: [ - Text.new("New\nline"), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Preformatted.new(children: [ + Text.new("New\nline"), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -203,11 +259,15 @@ describe PageContent do end it "renders strong text" do - page = Page.new(nodes: [ - Strong.new(children: [ - Text.new("Oh yeah!"), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + Strong.new(children: [ + Text.new("Oh yeah!"), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -215,12 +275,16 @@ describe PageContent do end it "renders an unordered list" do - page = Page.new(nodes: [ - UnorderedList.new(children: [ - ListItem.new(children: [Text.new("Apple")] of Child), - ListItem.new(children: [Text.new("Banana")] of Child), - ] of Child), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + UnorderedList.new(children: [ + ListItem.new(children: [Text.new("Apple")] of Child), + ListItem.new(children: [Text.new("Banana")] of Child), + ] of Child), + ] of Child + ) html = PageContent.new(page: page).render_to_string @@ -228,9 +292,13 @@ describe PageContent do end it "renders a user anchor" do - page = Page.new(nodes: [ - UserAnchor.new(children: [Text.new("Some User")] of Child, userId: "abc123"), - ] of Child) + page = Page.new( + title: "Title", + subtitle: nil, + nodes: [ + UserAnchor.new(children: [Text.new("Some User")] of Child, userId: "abc123"), + ] of Child + ) html = PageContent.new(page: page).render_to_string diff --git a/src/actions/articles/show.cr b/src/actions/articles/show.cr index 842e17f..4a8ba44 100644 --- a/src/actions/articles/show.cr +++ b/src/actions/articles/show.cr @@ -7,10 +7,9 @@ class Articles::Show < BrowserAction else response = MediumClient.post_data(post_id) end - content = ParagraphConverter.new.convert( + page = PageConverter.new.convert( response.data.post.content.bodyModel.paragraphs ) - page = Page.new(nodes: content) html ShowPage, page: page end end diff --git a/src/classes/page_converter.cr b/src/classes/page_converter.cr new file mode 100644 index 0000000..f3a8e1b --- /dev/null +++ b/src/classes/page_converter.cr @@ -0,0 +1,19 @@ +class PageConverter + def convert(paragraphs : Array(PostResponse::Paragraph)) : Page + first_two_paragraphs = paragraphs.first(2) + first_two_types = first_two_paragraphs.map(&.type) + if first_two_types == [PostResponse::ParagraphType::H3, PostResponse::ParagraphType::H4] + Page.new( + title: first_two_paragraphs[0].text, + subtitle: first_two_paragraphs[1].text, + nodes: ParagraphConverter.new.convert(paragraphs[2..]), + ) + else + Page.new( + title: first_two_paragraphs[0].text, + subtitle: nil, + nodes: ParagraphConverter.new.convert(paragraphs[1..]), + ) + end + end +end diff --git a/src/models/page.cr b/src/models/page.cr index 06e103d..a350981 100644 --- a/src/models/page.cr +++ b/src/models/page.cr @@ -1,6 +1,12 @@ class Page getter nodes : Nodes::Children + getter title : String + getter subtitle : String? - def initialize(@nodes : Nodes::Children) + def initialize( + @title : String, + @subtitle : String?, + @nodes : Nodes::Children = [] of Nodes::Child + ) end end