dumb/data/article.go
2024-06-23 17:03:18 -06:00

65 lines
1.3 KiB
Go

package data
import (
"encoding/json"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/rramiachraf/dumb/utils"
)
type Article struct {
Title string
Subtitle string
HTML string
Authors []Author
PublishedAt time.Time
Image string
}
type Author struct {
Name string
Role string `json:"human_readable_role_for_display"`
About string `json:"about_me_summary"`
}
type articleResponse struct {
Article struct {
Title string
Subtitle string `json:"dek"`
Authors []Author
Body struct {
HTML string
}
PublishedAt int64 `json:"published_at"`
Image string `json:"preview_image"`
}
}
func (a *Article) parseArticleData(doc *goquery.Document) error {
pageMetadata, exists := doc.Find("meta[itemprop='page_data']").Attr("content")
if !exists {
return nil
}
var articleData articleResponse
if err := json.Unmarshal([]byte(pageMetadata), &articleData); err != nil {
return err
}
data := articleData.Article
a.Title = data.Title
a.Subtitle = data.Subtitle
a.HTML = utils.CleanBody(data.Body.HTML)
a.Authors = data.Authors
a.PublishedAt = time.Unix(data.PublishedAt, 0)
a.Image = ExtractImageURL(data.Image)
return nil
}
func (a *Article) Parse(doc *goquery.Document) error {
return a.parseArticleData(doc)
}