From 24d3ab9ab34634f640a73443ec966c4e3860e3bd Mon Sep 17 00:00:00 2001 From: Edward Loveall Date: Sun, 13 Feb 2022 10:08:16 -0500 Subject: [PATCH] Better article ID parsing A new ArticleIdParser class takes in an HTTP::Request object and parses the article ID from it. It intentinoally fails on tag, user, and search pages and attempts to only catch articles. --- CHANGELOG | 4 ++ spec/classes/article_id_parser_spec.cr | 96 ++++++++++++++++++++++++++ spec/requests/articles/show_spec.cr | 77 --------------------- src/actions/articles/show.cr | 23 +----- src/classes/article_id_parser.cr | 31 +++++++++ 5 files changed, 132 insertions(+), 99 deletions(-) create mode 100644 spec/classes/article_id_parser_spec.cr delete mode 100644 spec/requests/articles/show_spec.cr create mode 100644 src/classes/article_id_parser.cr diff --git a/CHANGELOG b/CHANGELOG index 67357db..e08c01d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,7 @@ +2022-02-13 + +* Better article ID parsing + 2022-02-12 * Error page matches Scribe styles diff --git a/spec/classes/article_id_parser_spec.cr b/spec/classes/article_id_parser_spec.cr new file mode 100644 index 0000000..ec91493 --- /dev/null +++ b/spec/classes/article_id_parser_spec.cr @@ -0,0 +1,96 @@ +require "../spec_helper" + +def resource_request(resource : String) + headers = HTTP::Headers{"Host" => "example.com"} + HTTP::Request.new("GET", resource, headers) +end + +describe ArticleIdParser do + it "parses the post id for urls like /@user/:post_slug" do + request = resource_request("/@user/my-post-111111abcdef") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Just.new("111111abcdef")) + end + + it "parses the post id for urls like /user/:post_slug" do + request = resource_request("/user/my-post-222222abcdef") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Just.new("222222abcdef")) + end + + it "parses the post id for urls like /p/:post_slug" do + request = resource_request("/p/my-post-333333abcdef") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Just.new("333333abcdef")) + end + + it "parses the post id for urls like /posts/:post_slug" do + request = resource_request("/posts/my-post-444444abcdef") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Just.new("444444abcdef")) + end + + it "parses the post id for urls like /p/:post_id" do + request = resource_request("/p/555555abcdef") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Just.new("555555abcdef")) + end + + it "parses the post id for urls like /:post_slug" do + request = resource_request("/my-post-666666abcdef") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Just.new("666666abcdef")) + end + + it "parses the post id for urls like /https:/medium.com/@user/:post_slug" do + request = resource_request("/https:/medium.com/@user/777777abcdef") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Just.new("777777abcdef")) + end + + it "parses the post id for global identity redirects" do + request = resource_request("/m/global-identity?redirectUrl=https%3A%2F%2Fexample.com%2Fmy-post-888888abcdef") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Just.new("888888abcdef")) + end + + it "returns Nothing if path is a username" do + request = resource_request("/@ba5eba11") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Nothing(String).new) + end + + it "returns Nothing if path is a tag" do + request = resource_request("/tag/0ddba11") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Nothing(String).new) + end + + it "returns Nothing if path is search result" do + request = resource_request("/search?q=ba5eba11") + + result = ArticleIdParser.parse(request) + + result.should eq(Monads::Nothing(String).new) + end +end diff --git a/spec/requests/articles/show_spec.cr b/spec/requests/articles/show_spec.cr deleted file mode 100644 index 0a2fadc..0000000 --- a/spec/requests/articles/show_spec.cr +++ /dev/null @@ -1,77 +0,0 @@ -require "../../spec_helper" - -class TestClient < MediumClient - class_property last_post_id : String = "" - - def self.post_data(post_id : String) : PostResponse::Root - self.last_post_id = post_id - PostResponse::Root.from_json <<-JSON - { - "data": { - "post": { - "title": "a title", - "createdAt": 0, - "creator": { "id": "0", "name": "username" }, - "content": { "bodyModel": { "paragraphs": [] } } - } - } - } - JSON - end -end - -class Articles::Show - def client_class - TestClient - end -end - -describe Articles::Show do - it "parses the post id for urls like /@user/:post_slug" do - HttpClient.get("/@user/my-post-111111abcdef") - - TestClient.last_post_id.should eq("111111abcdef") - end - - it "parses the post id for urls like /user/:post_slug" do - HttpClient.get("/user/my-post-222222abcdef") - - TestClient.last_post_id.should eq("222222abcdef") - end - - it "parses the post id for urls like /p/:post_slug" do - HttpClient.get("/p/my-post-333333abcdef") - - TestClient.last_post_id.should eq("333333abcdef") - end - - it "parses the post id for urls like /posts/:post_slug" do - HttpClient.get("/posts/my-post-444444abcdef") - - TestClient.last_post_id.should eq("444444abcdef") - end - - it "parses the post id for urls like /p/:post_id" do - HttpClient.get("/p/555555abcdef") - - TestClient.last_post_id.should eq("555555abcdef") - end - - it "parses the post id for urls like /:post_slug" do - HttpClient.get("/my-post-666666abcdef") - - TestClient.last_post_id.should eq("666666abcdef") - end - - it "parses the post id for urls like /https:/medium.com/@user/:post_slug" do - HttpClient.get("/https:/medium.com/@user/777777abcdef") - - TestClient.last_post_id.should eq("777777abcdef") - end - - it "parses the post id for global identity redirects" do - HttpClient.get("/m/global-identity?redirectUrl=https%3A%2F%2Fexample.com%2Fmy-post-888888abcdef") - - TestClient.last_post_id.should eq("888888abcdef") - end -end diff --git a/src/actions/articles/show.cr b/src/actions/articles/show.cr index 9c95157..7bd1e30 100644 --- a/src/actions/articles/show.cr +++ b/src/actions/articles/show.cr @@ -2,7 +2,7 @@ require "json" class Articles::Show < BrowserAction fallback do - post_id = maybe_post_id(context.request) + post_id = ArticleIdParser.parse(context.request) case post_id in Monads::Just response = client_class.post_data(post_id.value!) @@ -18,27 +18,6 @@ class Articles::Show < BrowserAction end end - def maybe_post_id(request : HTTP::Request) - from_params = post_id_from_params(request.query_params) - from_path = post_id_from_path(request.path) - from_path.or(from_params) - end - - def post_id_from_path(request_path : String) - Monads::Try(Regex::MatchData) - .new(->{ request_path.match(/([0-9a-f]+)$/i) }) - .to_maybe - .fmap(->(matches : Regex::MatchData) { matches[1] }) - end - - def post_id_from_params(params : URI::Params) - maybe_uri = Monads::Try(String) - .new(->{ params["redirectUrl"] }) - .to_maybe - .fmap(->(url : String) { URI.parse(url) }) - .bind(->(uri : URI) { post_id_from_path(uri.path) }) - end - def client_class if use_local? LocalClient diff --git a/src/classes/article_id_parser.cr b/src/classes/article_id_parser.cr new file mode 100644 index 0000000..524be6b --- /dev/null +++ b/src/classes/article_id_parser.cr @@ -0,0 +1,31 @@ +class ArticleIdParser + include Monads + + ID_REGEX = /[\/\-]([0-9a-f]+)/i + + def self.parse(request : HTTP::Request) + new.parse(request) + end + + def parse(request : HTTP::Request) : Maybe + from_params = post_id_from_params(request.query_params) + from_path = post_id_from_path(request.path) + from_path.or(from_params) + end + + private def post_id_from_path(request_path : String) + return Nothing(String).new if request_path.starts_with?("/tag/") + Try(Regex::MatchData) + .new(->{ request_path.match(ID_REGEX) }) + .to_maybe + .fmap(->(matches : Regex::MatchData) { matches[1] }) + end + + private def post_id_from_params(params : URI::Params) + maybe_uri = Try(String) + .new(->{ params["redirectUrl"] }) + .to_maybe + .fmap(->(url : String) { URI.parse(url) }) + .bind(->(uri : URI) { post_id_from_path(uri.path) }) + end +end