refactor: highlight code blocks function
This commit is contained in:
parent
634c7f1ad0
commit
42ad68fe34
@ -180,7 +180,7 @@ func extractQuestionData(doc *goquery.Document, domain string) (question types.F
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return question, err
|
return question, err
|
||||||
}
|
}
|
||||||
question.Body = template.HTML(utils.ReplaceImgTags(questionBodyParentHTML))
|
question.Body = template.HTML(processHTMLBody(questionBodyParentHTML))
|
||||||
|
|
||||||
// Extract the shortened body description.
|
// Extract the shortened body description.
|
||||||
shortenedBody := strings.TrimSpace(questionBodyParent.Text())
|
shortenedBody := strings.TrimSpace(questionBodyParent.Text())
|
||||||
@ -212,8 +212,6 @@ func extractMetadata(selection *goquery.Selection, question *types.FilteredQuest
|
|||||||
questionAuthorURL += questionAuthor.AttrOr("href", "")
|
questionAuthorURL += questionAuthor.AttrOr("href", "")
|
||||||
question.AuthorURL = questionAuthorURL
|
question.AuthorURL = questionAuthorURL
|
||||||
|
|
||||||
fmt.Printf("Author name is: %s\n", question.AuthorName)
|
|
||||||
|
|
||||||
// Determine if the question has been edited and update author details accordingly.
|
// Determine if the question has been edited and update author details accordingly.
|
||||||
isQuestionEdited := selection.Find("a.js-gps-track").Text() == "edited"
|
isQuestionEdited := selection.Find("a.js-gps-track").Text() == "edited"
|
||||||
if isQuestionEdited {
|
if isQuestionEdited {
|
||||||
@ -247,9 +245,7 @@ func extractAnswersData(doc *goquery.Document, domain string) ([]types.FilteredA
|
|||||||
answerBodyHTML, _ := answerBody.Html()
|
answerBodyHTML, _ := answerBody.Html()
|
||||||
|
|
||||||
// Process code blocks within the answer.
|
// Process code blocks within the answer.
|
||||||
processedAnswerBody := processAnswerBody(answerBodyHTML, domain)
|
processedAnswerBody := processHTMLBody(answerBodyHTML)
|
||||||
processedAnswerBody = utils.ReplaceImgTags(processedAnswerBody)
|
|
||||||
fmt.Println(processedAnswerBody)
|
|
||||||
answer.Body = template.HTML(html.UnescapeString(processedAnswerBody))
|
answer.Body = template.HTML(html.UnescapeString(processedAnswerBody))
|
||||||
|
|
||||||
// Extract author information and timestamp.
|
// Extract author information and timestamp.
|
||||||
@ -261,10 +257,11 @@ func extractAnswersData(doc *goquery.Document, domain string) ([]types.FilteredA
|
|||||||
return answers, nil
|
return answers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// processAnswerBody highlights syntax and processes code blocks within an answer's body.
|
// processHTMLBody highlights syntax and replaces images with proxied versions.
|
||||||
func processAnswerBody(bodyHTML string, domain string) string {
|
func processHTMLBody(bodyHTML string) string {
|
||||||
highlightedBody := utils.HighlightSyntaxViaContent(bodyHTML)
|
highlightedBody := utils.HighlightCodeBlocks(bodyHTML)
|
||||||
return highlightedBody
|
imageProxiedBody := utils.ReplaceImgTags(highlightedBody)
|
||||||
|
return imageProxiedBody
|
||||||
}
|
}
|
||||||
|
|
||||||
// extractAnswerAuthorInfo extracts the author name, URL, and timestamp from an answer block.
|
// extractAnswerAuthorInfo extracts the author name, URL, and timestamp from an answer block.
|
||||||
|
@ -12,7 +12,9 @@ import (
|
|||||||
"github.com/alecthomas/chroma/styles"
|
"github.com/alecthomas/chroma/styles"
|
||||||
)
|
)
|
||||||
|
|
||||||
func HighlightSyntaxViaContent(content string) (htmlOut string) {
|
// highlightSyntaxViaContent uses Chroma to lex code content and apply the appropriate tokenizer engine.
|
||||||
|
// If it can't find one, it defaults to JavaScript syntax highlighting.
|
||||||
|
func highlightSyntaxViaContent(content string) (htmlOut string) {
|
||||||
content = html.UnescapeString(content)
|
content = html.UnescapeString(content)
|
||||||
|
|
||||||
fallbackOut := html.EscapeString(content)
|
fallbackOut := html.EscapeString(content)
|
||||||
@ -20,9 +22,7 @@ func HighlightSyntaxViaContent(content string) (htmlOut string) {
|
|||||||
// identify the language
|
// identify the language
|
||||||
lexer := lexers.Analyse(content)
|
lexer := lexers.Analyse(content)
|
||||||
if lexer == nil {
|
if lexer == nil {
|
||||||
// unable to identify, so just return the wrapped content
|
lexer = lexers.Get(".js")
|
||||||
htmlOut = fallbackOut
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
style := styles.Get("xcode")
|
style := styles.Get("xcode")
|
||||||
@ -54,7 +54,9 @@ func HighlightSyntaxViaContent(content string) (htmlOut string) {
|
|||||||
|
|
||||||
var preClassRegex = regexp.MustCompile(`(?s)<pre class=".+">`)
|
var preClassRegex = regexp.MustCompile(`(?s)<pre class=".+">`)
|
||||||
|
|
||||||
func StripBlockTags(content string) (result string) {
|
// stripBlockTags takes an extracted code block from HTML and strips it of its pre and code tags.
|
||||||
|
// What's returned is just the code.
|
||||||
|
func stripBlockTags(content string) (result string) {
|
||||||
// strip all "<code>" tags
|
// strip all "<code>" tags
|
||||||
content = strings.Replace(content, "<code>", "", -1)
|
content = strings.Replace(content, "<code>", "", -1)
|
||||||
content = strings.Replace(content, "</code>", "", -1)
|
content = strings.Replace(content, "</code>", "", -1)
|
||||||
@ -68,3 +70,26 @@ func StripBlockTags(content string) (result string) {
|
|||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var codeBlockRegex = regexp.MustCompile(`(?s)<pre><code>(.*?)<\/code><\/pre>`)
|
||||||
|
|
||||||
|
// HighlightCodeBlocks uses both highlightSyntaxViaContent stripCodeBlocks and returns the newly highlighted code HTML.
|
||||||
|
func HighlightCodeBlocks(html string) string {
|
||||||
|
// Replace each code block with the highlighted version
|
||||||
|
highlightedHTML := codeBlockRegex.ReplaceAllStringFunc(html, func(codeBlock string) string {
|
||||||
|
// Extract the code content from the code block
|
||||||
|
codeContent := codeBlockRegex.FindStringSubmatch(codeBlock)[1]
|
||||||
|
|
||||||
|
codeContent = stripBlockTags(codeContent)
|
||||||
|
|
||||||
|
// Highlight the code content
|
||||||
|
highlightedCode := highlightSyntaxViaContent(codeContent)
|
||||||
|
|
||||||
|
// Replace the original code block with the highlighted version
|
||||||
|
highlightedCodeBlock := "<pre>" + highlightedCode + "</pre>"
|
||||||
|
|
||||||
|
return highlightedCodeBlock
|
||||||
|
})
|
||||||
|
|
||||||
|
return highlightedHTML
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user