diff --git a/data/article.go b/data/article.go new file mode 100644 index 0000000..39e7c51 --- /dev/null +++ b/data/article.go @@ -0,0 +1,64 @@ +package data + +import ( + "encoding/json" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/rramiachraf/dumb/utils" +) + +type Article struct { + Title string + Subtitle string + HTML string + Authors []Author + PublishedAt time.Time + Image string +} + +type Author struct { + Name string + Role string `json:"human_readable_role_for_display"` + About string `json:"about_me_summary"` +} + +type articleResponse struct { + Article struct { + Title string + Subtitle string `json:"dek"` + Authors []Author + Body struct { + HTML string + } + PublishedAt int64 `json:"published_at"` + Image string `json:"preview_image"` + } +} + +func (a *Article) parseArticleData(doc *goquery.Document) error { + pageMetadata, exists := doc.Find("meta[itemprop='page_data']").Attr("content") + if !exists { + return nil + } + + var articleData articleResponse + if err := json.Unmarshal([]byte(pageMetadata), &articleData); err != nil { + return err + } + data := articleData.Article + + a.Title = data.Title + a.Subtitle = data.Subtitle + + a.HTML = utils.CleanBody(data.Body.HTML) + a.Authors = data.Authors + a.PublishedAt = time.Unix(data.PublishedAt, 0) + a.Image = ExtractImageURL(data.Image) + + return nil +} + +func (a *Article) Parse(doc *goquery.Document) error { + return a.parseArticleData(doc) +} diff --git a/handlers/annotations.go b/handlers/annotations.go index c895256..137bab3 100644 --- a/handlers/annotations.go +++ b/handlers/annotations.go @@ -6,10 +6,7 @@ import ( "encoding/json" "fmt" "net/http" - "regexp" - "strings" - "github.com/PuerkitoBio/goquery" "github.com/gorilla/mux" "github.com/rramiachraf/dumb/data" "github.com/rramiachraf/dumb/utils" @@ -67,7 +64,7 @@ func annotations(l *utils.Logger) http.HandlerFunc { } body := data.Response.Referent.Annotations[0].Body - body.HTML = cleanBody(body.HTML) + body.HTML = utils.CleanBody(body.HTML) w.Header().Set("content-type", "application/json") encoder := json.NewEncoder(w) @@ -82,31 +79,3 @@ func annotations(l *utils.Logger) http.HandlerFunc { } } } - -func cleanBody(body string) string { - if doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)); err == nil { - doc.Find("iframe").Each(func(i int, s *goquery.Selection) { - src, exists := s.Attr("src") - if exists { - html := fmt.Sprintf(`Link`, src) - s.ReplaceWithHtml(html) - } - }) - - doc.Find("img").Each(func(i int, s *goquery.Selection) { - src, exists := s.Attr("src") - if exists { - re := regexp.MustCompile(`(?i)https:\/\/images\.(rapgenius|genius)\.com\/`) - pSrc := re.ReplaceAllString(src, "/images/") - s.SetAttr("src", pSrc) - } - }) - - if source, err := doc.Html(); err == nil { - body = source - } - } - - re := regexp.MustCompile(`https?:\/\/[a-z]*.?genius.com`) - return re.ReplaceAllString(body, "") -} diff --git a/handlers/article.go b/handlers/article.go new file mode 100644 index 0000000..08b5e22 --- /dev/null +++ b/handlers/article.go @@ -0,0 +1,68 @@ +package handlers + +import ( + "context" + "fmt" + "net/http" + + "github.com/PuerkitoBio/goquery" + "github.com/gorilla/mux" + "github.com/rramiachraf/dumb/data" + "github.com/rramiachraf/dumb/utils" + "github.com/rramiachraf/dumb/views" +) + +func article(l *utils.Logger) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + articleSlug := mux.Vars(r)["article"] + + if a, err := getCache[data.Article](articleSlug); err == nil { + views.ArticlePage(a).Render(context.Background(), w) + return + } + + url := fmt.Sprintf("https://genius.com/a/%s", articleSlug) + + resp, err := utils.SendRequest(url) + if err != nil { + l.Error(err.Error()) + w.WriteHeader(http.StatusInternalServerError) + views.ErrorPage(500, "cannot reach Genius servers").Render(context.Background(), w) + return + } + + defer resp.Body.Close() + + if resp.StatusCode == http.StatusNotFound { + w.WriteHeader(http.StatusNotFound) + views.ErrorPage(404, "page not found").Render(context.Background(), w) + return + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + l.Error(err.Error()) + w.WriteHeader(http.StatusInternalServerError) + views.ErrorPage(500, "something went wrong").Render(context.Background(), w) + return + } + + cf := doc.Find(".cloudflare_content").Length() + if cf > 0 { + l.Error("cloudflare got in the way") + views.ErrorPage(500, "cloudflare is detected").Render(context.Background(), w) + return + } + + var a data.Article + if err = a.Parse(doc); err != nil { + l.Error(err.Error()) + } + + views.ArticlePage(a).Render(context.Background(), w) + + if err = setCache(articleSlug, a); err != nil { + l.Error(err.Error()) + } + } +} diff --git a/handlers/article_test.go b/handlers/article_test.go new file mode 100644 index 0000000..9b6d13f --- /dev/null +++ b/handlers/article_test.go @@ -0,0 +1,55 @@ +package handlers + +import ( + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/PuerkitoBio/goquery" + + "github.com/rramiachraf/dumb/utils" +) + +func TestArticle(t *testing.T) { + url := "/a/genius-celebrates-hip-hops-50th-anniversary-with-a-look-back-at-the-music-thats-defined-this-site" + title := "Genius Celebrates Hip-Hop’s 50th Anniversary With A Look Back At The Music That’s Defined This Site" + subtitle := "The first post in a yearlong look at the genre’s storied history." + + r, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + l := utils.NewLogger(os.Stdout) + m := New(l, &assets{}) + + m.ServeHTTP(rr, r) + + defer rr.Result().Body.Close() + + if rr.Result().StatusCode != http.StatusOK { + t.Fatalf("expected %d, got %d\n", http.StatusOK, rr.Result().StatusCode) + } + + doc, err := goquery.NewDocumentFromReader(rr.Result().Body) + if err != nil { + t.Fatal(err) + } + + articleTitle := doc.Find("#article-title").First().Text() + if articleTitle != title { + t.Fatalf("expected %q, got %q\n", title, articleTitle) + } + + articleSubtitle := doc.Find("#article-subtitle").First().Text() + if articleSubtitle != subtitle { + t.Fatalf("expected %q, got %q\n", subtitle, articleSubtitle) + } + + articleBody := doc.Find("#article-body").First().Text() + if len(articleBody) == 0 { + t.Fatal("missing article body\n") + } +} diff --git a/handlers/cache.go b/handlers/cache.go index d07a876..9ceab9e 100644 --- a/handlers/cache.go +++ b/handlers/cache.go @@ -10,7 +10,7 @@ import ( ) type cachable interface { - data.Album | data.Song | data.Annotation | data.Artist | []byte + data.Album | data.Song | data.Annotation | data.Artist | data.Article | []byte } var c, _ = bigcache.New(context.Background(), bigcache.DefaultConfig(time.Hour*24)) diff --git a/handlers/handler.go b/handlers/handler.go index f679d06..62e0f57 100644 --- a/handlers/handler.go +++ b/handlers/handler.go @@ -29,6 +29,7 @@ func New(logger *utils.Logger, staticFiles static) *mux.Router { {Path: "/robots.txt", Handler: robotsHandler}, {Path: "/albums/{artist}/{albumName}", Handler: album}, {Path: "/artists/{artist}", Handler: artist}, + {Path: "/a/{article}", Handler: article}, {Path: "/images/{filename}.{ext}", Handler: imageProxy}, {Path: "/search", Handler: search}, {Path: "/{annotation-id}/{artist-song}/{verse}/annotations", Handler: annotations}, diff --git a/style/annotation.css b/style/annotation.css index 8a197db..1cb43a3 100644 --- a/style/annotation.css +++ b/style/annotation.css @@ -1,32 +1,33 @@ -.annotation #iframed-link { +#iframed-link { font-weight: 500; background-color: #ffcd38; padding: 2px 6px; } -.annotation { +.annotation, blockquote { padding: 1rem; border-radius: 4px; background: #eee; border: 1px solid #ddd; color: #222; + margin: 1rem 0; } -.annotation img { +.annotation img, blockquote img { max-width: 100%; height: auto; } -.annotation a { +.annotation a, blockquote a { background: none; font-weight: 500; } -.annotation ul { +.annotation ul, blockquote ul { padding-left: 1em; } -.dark .annotation { +.dark .annotation, .dark blockquote { background-color: #272d44; color: inherit; } diff --git a/style/article.css b/style/article.css new file mode 100644 index 0000000..0be76b9 --- /dev/null +++ b/style/article.css @@ -0,0 +1,60 @@ +#article-metadata { + display: flex; + flex-direction: column; + align-items: center; + text-align: center; + gap: 0.5rem; +} + +#article-body { + line-height: 1.75; + color: #171717; +} + +#article-subtitle { + font-size: 1.8rem; + text-align: center; +} + +#article-image { + width: 100%; + height: 50rem; + border-radius: 5px; + object-fit: contain; + object-position: center; + background-color: #f7f7f7; + border: 1px solid #e4e4e4; +} + +#metadata, +#article-subtitle, +#article-date { + color: #333; +} + +#article-authors { + color: #1e1e1e; + display: flex; + flex-direction: column; + gap: 0.5rem; +} + +.dark #article-image { + background-color: #151515; + border: 1px solid #2f2f2f; +} + +.dark #metadata, +.dark #article-subtitle, +.dark #article-date, +.dark #article-authors { + color: #ccc; +} + +.dark #article-title { + color: #eee; +} + +.dark #article-body { + color: #eee; +} diff --git a/style/layout.css b/style/layout.css index ec126f9..2fe9bdd 100644 --- a/style/layout.css +++ b/style/layout.css @@ -12,6 +12,10 @@ flex: 1; } +.solo { + gap: 4rem; +} + .trio-split { grid-template-columns: 24rem calc(1024px - 56rem) 24rem; gap: 4rem; diff --git a/utils/clean_body.go b/utils/clean_body.go new file mode 100644 index 0000000..11150d3 --- /dev/null +++ b/utils/clean_body.go @@ -0,0 +1,37 @@ +package utils + +import ( + "fmt" + "regexp" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +func CleanBody(body string) string { + if doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)); err == nil { + doc.Find("iframe").Each(func(i int, s *goquery.Selection) { + src, exists := s.Attr("src") + if exists { + html := fmt.Sprintf(`Link`, src) + s.ReplaceWithHtml(html) + } + }) + + doc.Find("img").Each(func(i int, s *goquery.Selection) { + src, exists := s.Attr("src") + if exists { + re := regexp.MustCompile(`(?i)https:\/\/images\.(rapgenius|genius)\.com\/(images\/)?`) + pSrc := re.ReplaceAllString(src, "/images/") + s.SetAttr("src", pSrc) + } + }) + + if source, err := doc.Html(); err == nil { + body = source + } + } + + re := regexp.MustCompile(`https?:\/\/[a-z]*.?genius.com`) + return re.ReplaceAllString(body, "") +} diff --git a/views/article.templ b/views/article.templ new file mode 100644 index 0000000..0b76597 --- /dev/null +++ b/views/article.templ @@ -0,0 +1,34 @@ +package views + +import ( + "time" + + "github.com/rramiachraf/dumb/data" +) + +templ ArticlePage(a data.Article) { + @layout(a.Title) { +
+
+

{ a.Title }

+ +
+ Article image +

{ a.Subtitle }

+
+ @templ.Raw(a.HTML) +
+
+

Authors

+ for _, author := range a.Authors { +
+ { author.Name } - { author.Role } + { author.About } +
+ } +
+
+ } +}