refactor: highlight code blocks function
This commit is contained in:
parent
634c7f1ad0
commit
42ad68fe34
@ -180,7 +180,7 @@ func extractQuestionData(doc *goquery.Document, domain string) (question types.F
|
||||
if err != nil {
|
||||
return question, err
|
||||
}
|
||||
question.Body = template.HTML(utils.ReplaceImgTags(questionBodyParentHTML))
|
||||
question.Body = template.HTML(processHTMLBody(questionBodyParentHTML))
|
||||
|
||||
// Extract the shortened body description.
|
||||
shortenedBody := strings.TrimSpace(questionBodyParent.Text())
|
||||
@ -212,8 +212,6 @@ func extractMetadata(selection *goquery.Selection, question *types.FilteredQuest
|
||||
questionAuthorURL += questionAuthor.AttrOr("href", "")
|
||||
question.AuthorURL = questionAuthorURL
|
||||
|
||||
fmt.Printf("Author name is: %s\n", question.AuthorName)
|
||||
|
||||
// Determine if the question has been edited and update author details accordingly.
|
||||
isQuestionEdited := selection.Find("a.js-gps-track").Text() == "edited"
|
||||
if isQuestionEdited {
|
||||
@ -247,9 +245,7 @@ func extractAnswersData(doc *goquery.Document, domain string) ([]types.FilteredA
|
||||
answerBodyHTML, _ := answerBody.Html()
|
||||
|
||||
// Process code blocks within the answer.
|
||||
processedAnswerBody := processAnswerBody(answerBodyHTML, domain)
|
||||
processedAnswerBody = utils.ReplaceImgTags(processedAnswerBody)
|
||||
fmt.Println(processedAnswerBody)
|
||||
processedAnswerBody := processHTMLBody(answerBodyHTML)
|
||||
answer.Body = template.HTML(html.UnescapeString(processedAnswerBody))
|
||||
|
||||
// Extract author information and timestamp.
|
||||
@ -261,10 +257,11 @@ func extractAnswersData(doc *goquery.Document, domain string) ([]types.FilteredA
|
||||
return answers, nil
|
||||
}
|
||||
|
||||
// processAnswerBody highlights syntax and processes code blocks within an answer's body.
|
||||
func processAnswerBody(bodyHTML string, domain string) string {
|
||||
highlightedBody := utils.HighlightSyntaxViaContent(bodyHTML)
|
||||
return highlightedBody
|
||||
// processHTMLBody highlights syntax and replaces images with proxied versions.
|
||||
func processHTMLBody(bodyHTML string) string {
|
||||
highlightedBody := utils.HighlightCodeBlocks(bodyHTML)
|
||||
imageProxiedBody := utils.ReplaceImgTags(highlightedBody)
|
||||
return imageProxiedBody
|
||||
}
|
||||
|
||||
// extractAnswerAuthorInfo extracts the author name, URL, and timestamp from an answer block.
|
||||
|
@ -12,7 +12,9 @@ import (
|
||||
"github.com/alecthomas/chroma/styles"
|
||||
)
|
||||
|
||||
func HighlightSyntaxViaContent(content string) (htmlOut string) {
|
||||
// highlightSyntaxViaContent uses Chroma to lex code content and apply the appropriate tokenizer engine.
|
||||
// If it can't find one, it defaults to JavaScript syntax highlighting.
|
||||
func highlightSyntaxViaContent(content string) (htmlOut string) {
|
||||
content = html.UnescapeString(content)
|
||||
|
||||
fallbackOut := html.EscapeString(content)
|
||||
@ -20,9 +22,7 @@ func HighlightSyntaxViaContent(content string) (htmlOut string) {
|
||||
// identify the language
|
||||
lexer := lexers.Analyse(content)
|
||||
if lexer == nil {
|
||||
// unable to identify, so just return the wrapped content
|
||||
htmlOut = fallbackOut
|
||||
return
|
||||
lexer = lexers.Get(".js")
|
||||
}
|
||||
|
||||
style := styles.Get("xcode")
|
||||
@ -54,7 +54,9 @@ func HighlightSyntaxViaContent(content string) (htmlOut string) {
|
||||
|
||||
var preClassRegex = regexp.MustCompile(`(?s)<pre class=".+">`)
|
||||
|
||||
func StripBlockTags(content string) (result string) {
|
||||
// stripBlockTags takes an extracted code block from HTML and strips it of its pre and code tags.
|
||||
// What's returned is just the code.
|
||||
func stripBlockTags(content string) (result string) {
|
||||
// strip all "<code>" tags
|
||||
content = strings.Replace(content, "<code>", "", -1)
|
||||
content = strings.Replace(content, "</code>", "", -1)
|
||||
@ -68,3 +70,26 @@ func StripBlockTags(content string) (result string) {
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
var codeBlockRegex = regexp.MustCompile(`(?s)<pre><code>(.*?)<\/code><\/pre>`)
|
||||
|
||||
// HighlightCodeBlocks uses both highlightSyntaxViaContent stripCodeBlocks and returns the newly highlighted code HTML.
|
||||
func HighlightCodeBlocks(html string) string {
|
||||
// Replace each code block with the highlighted version
|
||||
highlightedHTML := codeBlockRegex.ReplaceAllStringFunc(html, func(codeBlock string) string {
|
||||
// Extract the code content from the code block
|
||||
codeContent := codeBlockRegex.FindStringSubmatch(codeBlock)[1]
|
||||
|
||||
codeContent = stripBlockTags(codeContent)
|
||||
|
||||
// Highlight the code content
|
||||
highlightedCode := highlightSyntaxViaContent(codeContent)
|
||||
|
||||
// Replace the original code block with the highlighted version
|
||||
highlightedCodeBlock := "<pre>" + highlightedCode + "</pre>"
|
||||
|
||||
return highlightedCodeBlock
|
||||
})
|
||||
|
||||
return highlightedHTML
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user