feat: add article page
This commit is contained in:
parent
941cafc664
commit
0ced2495ee
64
data/article.go
Normal file
64
data/article.go
Normal file
@ -0,0 +1,64 @@
|
||||
package data
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/rramiachraf/dumb/utils"
|
||||
)
|
||||
|
||||
type Article struct {
|
||||
Title string
|
||||
Subtitle string
|
||||
HTML string
|
||||
Authors []Author
|
||||
PublishedAt time.Time
|
||||
Image string
|
||||
}
|
||||
|
||||
type Author struct {
|
||||
Name string
|
||||
Role string `json:"human_readable_role_for_display"`
|
||||
About string `json:"about_me_summary"`
|
||||
}
|
||||
|
||||
type articleResponse struct {
|
||||
Article struct {
|
||||
Title string
|
||||
Subtitle string `json:"dek"`
|
||||
Authors []Author
|
||||
Body struct {
|
||||
HTML string
|
||||
}
|
||||
PublishedAt int64 `json:"published_at"`
|
||||
Image string `json:"preview_image"`
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Article) parseArticleData(doc *goquery.Document) error {
|
||||
pageMetadata, exists := doc.Find("meta[itemprop='page_data']").Attr("content")
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
var articleData articleResponse
|
||||
if err := json.Unmarshal([]byte(pageMetadata), &articleData); err != nil {
|
||||
return err
|
||||
}
|
||||
data := articleData.Article
|
||||
|
||||
a.Title = data.Title
|
||||
a.Subtitle = data.Subtitle
|
||||
|
||||
a.HTML = utils.CleanBody(data.Body.HTML)
|
||||
a.Authors = data.Authors
|
||||
a.PublishedAt = time.Unix(data.PublishedAt, 0)
|
||||
a.Image = ExtractImageURL(data.Image)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Article) Parse(doc *goquery.Document) error {
|
||||
return a.parseArticleData(doc)
|
||||
}
|
@ -6,10 +6,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/rramiachraf/dumb/data"
|
||||
"github.com/rramiachraf/dumb/utils"
|
||||
@ -67,7 +64,7 @@ func annotations(l *utils.Logger) http.HandlerFunc {
|
||||
}
|
||||
|
||||
body := data.Response.Referent.Annotations[0].Body
|
||||
body.HTML = cleanBody(body.HTML)
|
||||
body.HTML = utils.CleanBody(body.HTML)
|
||||
|
||||
w.Header().Set("content-type", "application/json")
|
||||
encoder := json.NewEncoder(w)
|
||||
@ -82,31 +79,3 @@ func annotations(l *utils.Logger) http.HandlerFunc {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func cleanBody(body string) string {
|
||||
if doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)); err == nil {
|
||||
doc.Find("iframe").Each(func(i int, s *goquery.Selection) {
|
||||
src, exists := s.Attr("src")
|
||||
if exists {
|
||||
html := fmt.Sprintf(`<a id="iframed-link" href="%s">Link</a>`, src)
|
||||
s.ReplaceWithHtml(html)
|
||||
}
|
||||
})
|
||||
|
||||
doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
||||
src, exists := s.Attr("src")
|
||||
if exists {
|
||||
re := regexp.MustCompile(`(?i)https:\/\/images\.(rapgenius|genius)\.com\/`)
|
||||
pSrc := re.ReplaceAllString(src, "/images/")
|
||||
s.SetAttr("src", pSrc)
|
||||
}
|
||||
})
|
||||
|
||||
if source, err := doc.Html(); err == nil {
|
||||
body = source
|
||||
}
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(`https?:\/\/[a-z]*.?genius.com`)
|
||||
return re.ReplaceAllString(body, "")
|
||||
}
|
||||
|
68
handlers/article.go
Normal file
68
handlers/article.go
Normal file
@ -0,0 +1,68 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/rramiachraf/dumb/data"
|
||||
"github.com/rramiachraf/dumb/utils"
|
||||
"github.com/rramiachraf/dumb/views"
|
||||
)
|
||||
|
||||
func article(l *utils.Logger) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
articleSlug := mux.Vars(r)["article"]
|
||||
|
||||
if a, err := getCache[data.Article](articleSlug); err == nil {
|
||||
views.ArticlePage(a).Render(context.Background(), w)
|
||||
return
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("https://genius.com/a/%s", articleSlug)
|
||||
|
||||
resp, err := utils.SendRequest(url)
|
||||
if err != nil {
|
||||
l.Error(err.Error())
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
views.ErrorPage(500, "cannot reach Genius servers").Render(context.Background(), w)
|
||||
return
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusNotFound {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
views.ErrorPage(404, "page not found").Render(context.Background(), w)
|
||||
return
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
l.Error(err.Error())
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
views.ErrorPage(500, "something went wrong").Render(context.Background(), w)
|
||||
return
|
||||
}
|
||||
|
||||
cf := doc.Find(".cloudflare_content").Length()
|
||||
if cf > 0 {
|
||||
l.Error("cloudflare got in the way")
|
||||
views.ErrorPage(500, "cloudflare is detected").Render(context.Background(), w)
|
||||
return
|
||||
}
|
||||
|
||||
var a data.Article
|
||||
if err = a.Parse(doc); err != nil {
|
||||
l.Error(err.Error())
|
||||
}
|
||||
|
||||
views.ArticlePage(a).Render(context.Background(), w)
|
||||
|
||||
if err = setCache(articleSlug, a); err != nil {
|
||||
l.Error(err.Error())
|
||||
}
|
||||
}
|
||||
}
|
55
handlers/article_test.go
Normal file
55
handlers/article_test.go
Normal file
@ -0,0 +1,55 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
|
||||
"github.com/rramiachraf/dumb/utils"
|
||||
)
|
||||
|
||||
func TestArticle(t *testing.T) {
|
||||
url := "/a/genius-celebrates-hip-hops-50th-anniversary-with-a-look-back-at-the-music-thats-defined-this-site"
|
||||
title := "Genius Celebrates Hip-Hop’s 50th Anniversary With A Look Back At The Music That’s Defined This Site"
|
||||
subtitle := "The first post in a yearlong look at the genre’s storied history."
|
||||
|
||||
r, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rr := httptest.NewRecorder()
|
||||
l := utils.NewLogger(os.Stdout)
|
||||
m := New(l, &assets{})
|
||||
|
||||
m.ServeHTTP(rr, r)
|
||||
|
||||
defer rr.Result().Body.Close()
|
||||
|
||||
if rr.Result().StatusCode != http.StatusOK {
|
||||
t.Fatalf("expected %d, got %d\n", http.StatusOK, rr.Result().StatusCode)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(rr.Result().Body)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
articleTitle := doc.Find("#article-title").First().Text()
|
||||
if articleTitle != title {
|
||||
t.Fatalf("expected %q, got %q\n", title, articleTitle)
|
||||
}
|
||||
|
||||
articleSubtitle := doc.Find("#article-subtitle").First().Text()
|
||||
if articleSubtitle != subtitle {
|
||||
t.Fatalf("expected %q, got %q\n", subtitle, articleSubtitle)
|
||||
}
|
||||
|
||||
articleBody := doc.Find("#article-body").First().Text()
|
||||
if len(articleBody) == 0 {
|
||||
t.Fatal("missing article body\n")
|
||||
}
|
||||
}
|
@ -10,7 +10,7 @@ import (
|
||||
)
|
||||
|
||||
type cachable interface {
|
||||
data.Album | data.Song | data.Annotation | data.Artist | []byte
|
||||
data.Album | data.Song | data.Annotation | data.Artist | data.Article | []byte
|
||||
}
|
||||
|
||||
var c, _ = bigcache.New(context.Background(), bigcache.DefaultConfig(time.Hour*24))
|
||||
|
@ -29,6 +29,7 @@ func New(logger *utils.Logger, staticFiles static) *mux.Router {
|
||||
{Path: "/robots.txt", Handler: robotsHandler},
|
||||
{Path: "/albums/{artist}/{albumName}", Handler: album},
|
||||
{Path: "/artists/{artist}", Handler: artist},
|
||||
{Path: "/a/{article}", Handler: article},
|
||||
{Path: "/images/{filename}.{ext}", Handler: imageProxy},
|
||||
{Path: "/search", Handler: search},
|
||||
{Path: "/{annotation-id}/{artist-song}/{verse}/annotations", Handler: annotations},
|
||||
|
@ -1,4 +1,4 @@
|
||||
.annotation #iframed-link {
|
||||
#iframed-link {
|
||||
font-weight: 500;
|
||||
background-color: #ffcd38;
|
||||
padding: 2px 6px;
|
||||
|
19
style/article.css
Normal file
19
style/article.css
Normal file
@ -0,0 +1,19 @@
|
||||
#article-body {
|
||||
line-height: 1.75;
|
||||
}
|
||||
|
||||
#metadata, #article-subtitle, #article-date {
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.dark #metadata, .dark #article-subtitle, .dark #article-date {
|
||||
color: #ccc;
|
||||
}
|
||||
|
||||
.dark #article-title {
|
||||
color: #eee;
|
||||
}
|
||||
|
||||
.dark #article-body {
|
||||
color: #eee;
|
||||
}
|
37
utils/clean_body.go
Normal file
37
utils/clean_body.go
Normal file
@ -0,0 +1,37 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
func CleanBody(body string) string {
|
||||
if doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)); err == nil {
|
||||
doc.Find("iframe").Each(func(i int, s *goquery.Selection) {
|
||||
src, exists := s.Attr("src")
|
||||
if exists {
|
||||
html := fmt.Sprintf(`<a id="iframed-link" href="%s">Link</a>`, src)
|
||||
s.ReplaceWithHtml(html)
|
||||
}
|
||||
})
|
||||
|
||||
doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
||||
src, exists := s.Attr("src")
|
||||
if exists {
|
||||
re := regexp.MustCompile(`(?i)https:\/\/images\.(rapgenius|genius)\.com\/(images\/)?`)
|
||||
pSrc := re.ReplaceAllString(src, "/images/")
|
||||
s.SetAttr("src", pSrc)
|
||||
}
|
||||
})
|
||||
|
||||
if source, err := doc.Html(); err == nil {
|
||||
body = source
|
||||
}
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(`https?:\/\/[a-z]*.?genius.com`)
|
||||
return re.ReplaceAllString(body, "")
|
||||
}
|
32
views/article.templ
Normal file
32
views/article.templ
Normal file
@ -0,0 +1,32 @@
|
||||
package views
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/rramiachraf/dumb/data"
|
||||
)
|
||||
|
||||
templ ArticlePage(a data.Article) {
|
||||
@layout(a.Title) {
|
||||
<div id="container" class="duo-split">
|
||||
<div id="metadata">
|
||||
<img src={ a.Image } alt="Article image"/>
|
||||
<h3>Authored By</h3>
|
||||
for _, author := range a.Authors {
|
||||
<details>
|
||||
<summary>{ author.Name } - { author.Role }</summary>
|
||||
{ author.About }
|
||||
</details>
|
||||
}
|
||||
</div>
|
||||
<div>
|
||||
<h1 id="article-title">{ a.Title }</h1>
|
||||
<time datetime={ a.PublishedAt.Format(time.RFC3339) } id="article-date">{ a.PublishedAt.Format(time.DateOnly) }</time>
|
||||
<h2 id="article-subtitle">{ a.Subtitle }</h2>
|
||||
<div id="article-body">
|
||||
@templ.Raw(a.HTML)
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user