feat: add article page

This commit is contained in:
Solomon Victorino 2024-06-23 16:38:53 -06:00
parent 941cafc664
commit 0ced2495ee
10 changed files with 279 additions and 34 deletions

64
data/article.go Normal file
View File

@ -0,0 +1,64 @@
package data
import (
"encoding/json"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/rramiachraf/dumb/utils"
)
type Article struct {
Title string
Subtitle string
HTML string
Authors []Author
PublishedAt time.Time
Image string
}
type Author struct {
Name string
Role string `json:"human_readable_role_for_display"`
About string `json:"about_me_summary"`
}
type articleResponse struct {
Article struct {
Title string
Subtitle string `json:"dek"`
Authors []Author
Body struct {
HTML string
}
PublishedAt int64 `json:"published_at"`
Image string `json:"preview_image"`
}
}
func (a *Article) parseArticleData(doc *goquery.Document) error {
pageMetadata, exists := doc.Find("meta[itemprop='page_data']").Attr("content")
if !exists {
return nil
}
var articleData articleResponse
if err := json.Unmarshal([]byte(pageMetadata), &articleData); err != nil {
return err
}
data := articleData.Article
a.Title = data.Title
a.Subtitle = data.Subtitle
a.HTML = utils.CleanBody(data.Body.HTML)
a.Authors = data.Authors
a.PublishedAt = time.Unix(data.PublishedAt, 0)
a.Image = ExtractImageURL(data.Image)
return nil
}
func (a *Article) Parse(doc *goquery.Document) error {
return a.parseArticleData(doc)
}

View File

@ -6,10 +6,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/gorilla/mux"
"github.com/rramiachraf/dumb/data"
"github.com/rramiachraf/dumb/utils"
@ -67,7 +64,7 @@ func annotations(l *utils.Logger) http.HandlerFunc {
}
body := data.Response.Referent.Annotations[0].Body
body.HTML = cleanBody(body.HTML)
body.HTML = utils.CleanBody(body.HTML)
w.Header().Set("content-type", "application/json")
encoder := json.NewEncoder(w)
@ -82,31 +79,3 @@ func annotations(l *utils.Logger) http.HandlerFunc {
}
}
}
func cleanBody(body string) string {
if doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)); err == nil {
doc.Find("iframe").Each(func(i int, s *goquery.Selection) {
src, exists := s.Attr("src")
if exists {
html := fmt.Sprintf(`<a id="iframed-link" href="%s">Link</a>`, src)
s.ReplaceWithHtml(html)
}
})
doc.Find("img").Each(func(i int, s *goquery.Selection) {
src, exists := s.Attr("src")
if exists {
re := regexp.MustCompile(`(?i)https:\/\/images\.(rapgenius|genius)\.com\/`)
pSrc := re.ReplaceAllString(src, "/images/")
s.SetAttr("src", pSrc)
}
})
if source, err := doc.Html(); err == nil {
body = source
}
}
re := regexp.MustCompile(`https?:\/\/[a-z]*.?genius.com`)
return re.ReplaceAllString(body, "")
}

68
handlers/article.go Normal file
View File

@ -0,0 +1,68 @@
package handlers
import (
"context"
"fmt"
"net/http"
"github.com/PuerkitoBio/goquery"
"github.com/gorilla/mux"
"github.com/rramiachraf/dumb/data"
"github.com/rramiachraf/dumb/utils"
"github.com/rramiachraf/dumb/views"
)
func article(l *utils.Logger) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
articleSlug := mux.Vars(r)["article"]
if a, err := getCache[data.Article](articleSlug); err == nil {
views.ArticlePage(a).Render(context.Background(), w)
return
}
url := fmt.Sprintf("https://genius.com/a/%s", articleSlug)
resp, err := utils.SendRequest(url)
if err != nil {
l.Error(err.Error())
w.WriteHeader(http.StatusInternalServerError)
views.ErrorPage(500, "cannot reach Genius servers").Render(context.Background(), w)
return
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound {
w.WriteHeader(http.StatusNotFound)
views.ErrorPage(404, "page not found").Render(context.Background(), w)
return
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
l.Error(err.Error())
w.WriteHeader(http.StatusInternalServerError)
views.ErrorPage(500, "something went wrong").Render(context.Background(), w)
return
}
cf := doc.Find(".cloudflare_content").Length()
if cf > 0 {
l.Error("cloudflare got in the way")
views.ErrorPage(500, "cloudflare is detected").Render(context.Background(), w)
return
}
var a data.Article
if err = a.Parse(doc); err != nil {
l.Error(err.Error())
}
views.ArticlePage(a).Render(context.Background(), w)
if err = setCache(articleSlug, a); err != nil {
l.Error(err.Error())
}
}
}

55
handlers/article_test.go Normal file
View File

@ -0,0 +1,55 @@
package handlers
import (
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/PuerkitoBio/goquery"
"github.com/rramiachraf/dumb/utils"
)
func TestArticle(t *testing.T) {
url := "/a/genius-celebrates-hip-hops-50th-anniversary-with-a-look-back-at-the-music-thats-defined-this-site"
title := "Genius Celebrates Hip-Hops 50th Anniversary With A Look Back At The Music Thats Defined This Site"
subtitle := "The first post in a yearlong look at the genres storied history."
r, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
t.Fatal(err)
}
rr := httptest.NewRecorder()
l := utils.NewLogger(os.Stdout)
m := New(l, &assets{})
m.ServeHTTP(rr, r)
defer rr.Result().Body.Close()
if rr.Result().StatusCode != http.StatusOK {
t.Fatalf("expected %d, got %d\n", http.StatusOK, rr.Result().StatusCode)
}
doc, err := goquery.NewDocumentFromReader(rr.Result().Body)
if err != nil {
t.Fatal(err)
}
articleTitle := doc.Find("#article-title").First().Text()
if articleTitle != title {
t.Fatalf("expected %q, got %q\n", title, articleTitle)
}
articleSubtitle := doc.Find("#article-subtitle").First().Text()
if articleSubtitle != subtitle {
t.Fatalf("expected %q, got %q\n", subtitle, articleSubtitle)
}
articleBody := doc.Find("#article-body").First().Text()
if len(articleBody) == 0 {
t.Fatal("missing article body\n")
}
}

View File

@ -10,7 +10,7 @@ import (
)
type cachable interface {
data.Album | data.Song | data.Annotation | data.Artist | []byte
data.Album | data.Song | data.Annotation | data.Artist | data.Article | []byte
}
var c, _ = bigcache.New(context.Background(), bigcache.DefaultConfig(time.Hour*24))

View File

@ -29,6 +29,7 @@ func New(logger *utils.Logger, staticFiles static) *mux.Router {
{Path: "/robots.txt", Handler: robotsHandler},
{Path: "/albums/{artist}/{albumName}", Handler: album},
{Path: "/artists/{artist}", Handler: artist},
{Path: "/a/{article}", Handler: article},
{Path: "/images/{filename}.{ext}", Handler: imageProxy},
{Path: "/search", Handler: search},
{Path: "/{annotation-id}/{artist-song}/{verse}/annotations", Handler: annotations},

View File

@ -1,4 +1,4 @@
.annotation #iframed-link {
#iframed-link {
font-weight: 500;
background-color: #ffcd38;
padding: 2px 6px;

19
style/article.css Normal file
View File

@ -0,0 +1,19 @@
#article-body {
line-height: 1.75;
}
#metadata, #article-subtitle, #article-date {
color: #333;
}
.dark #metadata, .dark #article-subtitle, .dark #article-date {
color: #ccc;
}
.dark #article-title {
color: #eee;
}
.dark #article-body {
color: #eee;
}

37
utils/clean_body.go Normal file
View File

@ -0,0 +1,37 @@
package utils
import (
"fmt"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
)
func CleanBody(body string) string {
if doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)); err == nil {
doc.Find("iframe").Each(func(i int, s *goquery.Selection) {
src, exists := s.Attr("src")
if exists {
html := fmt.Sprintf(`<a id="iframed-link" href="%s">Link</a>`, src)
s.ReplaceWithHtml(html)
}
})
doc.Find("img").Each(func(i int, s *goquery.Selection) {
src, exists := s.Attr("src")
if exists {
re := regexp.MustCompile(`(?i)https:\/\/images\.(rapgenius|genius)\.com\/(images\/)?`)
pSrc := re.ReplaceAllString(src, "/images/")
s.SetAttr("src", pSrc)
}
})
if source, err := doc.Html(); err == nil {
body = source
}
}
re := regexp.MustCompile(`https?:\/\/[a-z]*.?genius.com`)
return re.ReplaceAllString(body, "")
}

32
views/article.templ Normal file
View File

@ -0,0 +1,32 @@
package views
import (
"time"
"github.com/rramiachraf/dumb/data"
)
templ ArticlePage(a data.Article) {
@layout(a.Title) {
<div id="container" class="duo-split">
<div id="metadata">
<img src={ a.Image } alt="Article image"/>
<h3>Authored By</h3>
for _, author := range a.Authors {
<details>
<summary>{ author.Name } - { author.Role }</summary>
{ author.About }
</details>
}
</div>
<div>
<h1 id="article-title">{ a.Title }</h1>
<time datetime={ a.PublishedAt.Format(time.RFC3339) } id="article-date">{ a.PublishedAt.Format(time.DateOnly) }</time>
<h2 id="article-subtitle">{ a.Subtitle }</h2>
<div id="article-body">
@templ.Raw(a.HTML)
</div>
</div>
</div>
}
}