Better article ID parsing

A new ArticleIdParser class takes in an HTTP::Request object and parses
the article ID from it. It intentinoally fails on tag, user, and search
pages and attempts to only catch articles.
This commit is contained in:
Edward Loveall 2022-02-13 10:08:16 -05:00
parent f056a0b68a
commit 24d3ab9ab3
No known key found for this signature in database
GPG Key ID: A7606DFEC2BA731F
5 changed files with 132 additions and 99 deletions

View File

@ -1,3 +1,7 @@
2022-02-13
* Better article ID parsing
2022-02-12
* Error page matches Scribe styles

View File

@ -0,0 +1,96 @@
require "../spec_helper"
def resource_request(resource : String)
headers = HTTP::Headers{"Host" => "example.com"}
HTTP::Request.new("GET", resource, headers)
end
describe ArticleIdParser do
it "parses the post id for urls like /@user/:post_slug" do
request = resource_request("/@user/my-post-111111abcdef")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Just.new("111111abcdef"))
end
it "parses the post id for urls like /user/:post_slug" do
request = resource_request("/user/my-post-222222abcdef")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Just.new("222222abcdef"))
end
it "parses the post id for urls like /p/:post_slug" do
request = resource_request("/p/my-post-333333abcdef")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Just.new("333333abcdef"))
end
it "parses the post id for urls like /posts/:post_slug" do
request = resource_request("/posts/my-post-444444abcdef")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Just.new("444444abcdef"))
end
it "parses the post id for urls like /p/:post_id" do
request = resource_request("/p/555555abcdef")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Just.new("555555abcdef"))
end
it "parses the post id for urls like /:post_slug" do
request = resource_request("/my-post-666666abcdef")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Just.new("666666abcdef"))
end
it "parses the post id for urls like /https:/medium.com/@user/:post_slug" do
request = resource_request("/https:/medium.com/@user/777777abcdef")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Just.new("777777abcdef"))
end
it "parses the post id for global identity redirects" do
request = resource_request("/m/global-identity?redirectUrl=https%3A%2F%2Fexample.com%2Fmy-post-888888abcdef")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Just.new("888888abcdef"))
end
it "returns Nothing if path is a username" do
request = resource_request("/@ba5eba11")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Nothing(String).new)
end
it "returns Nothing if path is a tag" do
request = resource_request("/tag/0ddba11")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Nothing(String).new)
end
it "returns Nothing if path is search result" do
request = resource_request("/search?q=ba5eba11")
result = ArticleIdParser.parse(request)
result.should eq(Monads::Nothing(String).new)
end
end

View File

@ -1,77 +0,0 @@
require "../../spec_helper"
class TestClient < MediumClient
class_property last_post_id : String = ""
def self.post_data(post_id : String) : PostResponse::Root
self.last_post_id = post_id
PostResponse::Root.from_json <<-JSON
{
"data": {
"post": {
"title": "a title",
"createdAt": 0,
"creator": { "id": "0", "name": "username" },
"content": { "bodyModel": { "paragraphs": [] } }
}
}
}
JSON
end
end
class Articles::Show
def client_class
TestClient
end
end
describe Articles::Show do
it "parses the post id for urls like /@user/:post_slug" do
HttpClient.get("/@user/my-post-111111abcdef")
TestClient.last_post_id.should eq("111111abcdef")
end
it "parses the post id for urls like /user/:post_slug" do
HttpClient.get("/user/my-post-222222abcdef")
TestClient.last_post_id.should eq("222222abcdef")
end
it "parses the post id for urls like /p/:post_slug" do
HttpClient.get("/p/my-post-333333abcdef")
TestClient.last_post_id.should eq("333333abcdef")
end
it "parses the post id for urls like /posts/:post_slug" do
HttpClient.get("/posts/my-post-444444abcdef")
TestClient.last_post_id.should eq("444444abcdef")
end
it "parses the post id for urls like /p/:post_id" do
HttpClient.get("/p/555555abcdef")
TestClient.last_post_id.should eq("555555abcdef")
end
it "parses the post id for urls like /:post_slug" do
HttpClient.get("/my-post-666666abcdef")
TestClient.last_post_id.should eq("666666abcdef")
end
it "parses the post id for urls like /https:/medium.com/@user/:post_slug" do
HttpClient.get("/https:/medium.com/@user/777777abcdef")
TestClient.last_post_id.should eq("777777abcdef")
end
it "parses the post id for global identity redirects" do
HttpClient.get("/m/global-identity?redirectUrl=https%3A%2F%2Fexample.com%2Fmy-post-888888abcdef")
TestClient.last_post_id.should eq("888888abcdef")
end
end

View File

@ -2,7 +2,7 @@ require "json"
class Articles::Show < BrowserAction
fallback do
post_id = maybe_post_id(context.request)
post_id = ArticleIdParser.parse(context.request)
case post_id
in Monads::Just
response = client_class.post_data(post_id.value!)
@ -18,27 +18,6 @@ class Articles::Show < BrowserAction
end
end
def maybe_post_id(request : HTTP::Request)
from_params = post_id_from_params(request.query_params)
from_path = post_id_from_path(request.path)
from_path.or(from_params)
end
def post_id_from_path(request_path : String)
Monads::Try(Regex::MatchData)
.new(->{ request_path.match(/([0-9a-f]+)$/i) })
.to_maybe
.fmap(->(matches : Regex::MatchData) { matches[1] })
end
def post_id_from_params(params : URI::Params)
maybe_uri = Monads::Try(String)
.new(->{ params["redirectUrl"] })
.to_maybe
.fmap(->(url : String) { URI.parse(url) })
.bind(->(uri : URI) { post_id_from_path(uri.path) })
end
def client_class
if use_local?
LocalClient

View File

@ -0,0 +1,31 @@
class ArticleIdParser
include Monads
ID_REGEX = /[\/\-]([0-9a-f]+)/i
def self.parse(request : HTTP::Request)
new.parse(request)
end
def parse(request : HTTP::Request) : Maybe
from_params = post_id_from_params(request.query_params)
from_path = post_id_from_path(request.path)
from_path.or(from_params)
end
private def post_id_from_path(request_path : String)
return Nothing(String).new if request_path.starts_with?("/tag/")
Try(Regex::MatchData)
.new(->{ request_path.match(ID_REGEX) })
.to_maybe
.fmap(->(matches : Regex::MatchData) { matches[1] })
end
private def post_id_from_params(params : URI::Params)
maybe_uri = Try(String)
.new(->{ params["redirectUrl"] })
.to_maybe
.fmap(->(url : String) { URI.parse(url) })
.bind(->(uri : URI) { post_id_from_path(uri.path) })
end
end