diff --git a/src/routes/question.go b/src/routes/question.go index e797d60..7c0b603 100644 --- a/src/routes/question.go +++ b/src/routes/question.go @@ -180,7 +180,7 @@ func extractQuestionData(doc *goquery.Document, domain string) (question types.F if err != nil { return question, err } - question.Body = template.HTML(utils.ReplaceImgTags(questionBodyParentHTML)) + question.Body = template.HTML(processHTMLBody(questionBodyParentHTML)) // Extract the shortened body description. shortenedBody := strings.TrimSpace(questionBodyParent.Text()) @@ -212,8 +212,6 @@ func extractMetadata(selection *goquery.Selection, question *types.FilteredQuest questionAuthorURL += questionAuthor.AttrOr("href", "") question.AuthorURL = questionAuthorURL - fmt.Printf("Author name is: %s\n", question.AuthorName) - // Determine if the question has been edited and update author details accordingly. isQuestionEdited := selection.Find("a.js-gps-track").Text() == "edited" if isQuestionEdited { @@ -247,9 +245,7 @@ func extractAnswersData(doc *goquery.Document, domain string) ([]types.FilteredA answerBodyHTML, _ := answerBody.Html() // Process code blocks within the answer. - processedAnswerBody := processAnswerBody(answerBodyHTML, domain) - processedAnswerBody = utils.ReplaceImgTags(processedAnswerBody) - fmt.Println(processedAnswerBody) + processedAnswerBody := processHTMLBody(answerBodyHTML) answer.Body = template.HTML(html.UnescapeString(processedAnswerBody)) // Extract author information and timestamp. @@ -261,10 +257,11 @@ func extractAnswersData(doc *goquery.Document, domain string) ([]types.FilteredA return answers, nil } -// processAnswerBody highlights syntax and processes code blocks within an answer's body. -func processAnswerBody(bodyHTML string, domain string) string { - highlightedBody := utils.HighlightSyntaxViaContent(bodyHTML) - return highlightedBody +// processHTMLBody highlights syntax and replaces images with proxied versions. +func processHTMLBody(bodyHTML string) string { + highlightedBody := utils.HighlightCodeBlocks(bodyHTML) + imageProxiedBody := utils.ReplaceImgTags(highlightedBody) + return imageProxiedBody } // extractAnswerAuthorInfo extracts the author name, URL, and timestamp from an answer block. diff --git a/src/utils/syntax.go b/src/utils/syntax.go index cb4c4ea..24c5d38 100644 --- a/src/utils/syntax.go +++ b/src/utils/syntax.go @@ -12,7 +12,9 @@ import ( "github.com/alecthomas/chroma/styles" ) -func HighlightSyntaxViaContent(content string) (htmlOut string) { +// highlightSyntaxViaContent uses Chroma to lex code content and apply the appropriate tokenizer engine. +// If it can't find one, it defaults to JavaScript syntax highlighting. +func highlightSyntaxViaContent(content string) (htmlOut string) { content = html.UnescapeString(content) fallbackOut := html.EscapeString(content) @@ -20,9 +22,7 @@ func HighlightSyntaxViaContent(content string) (htmlOut string) { // identify the language lexer := lexers.Analyse(content) if lexer == nil { - // unable to identify, so just return the wrapped content - htmlOut = fallbackOut - return + lexer = lexers.Get(".js") } style := styles.Get("xcode") @@ -54,7 +54,9 @@ func HighlightSyntaxViaContent(content string) (htmlOut string) { var preClassRegex = regexp.MustCompile(`(?s)
`)
-func StripBlockTags(content string) (result string) {
+// stripBlockTags takes an extracted code block from HTML and strips it of its pre and code tags.
+// What's returned is just the code.
+func stripBlockTags(content string) (result string) {
// strip all "" tags
content = strings.Replace(content, "", "", -1)
content = strings.Replace(content, "
", "", -1)
@@ -68,3 +70,26 @@ func StripBlockTags(content string) (result string) {
return
}
+
+var codeBlockRegex = regexp.MustCompile(`(?s)(.*?)<\/code><\/pre>`)
+
+// HighlightCodeBlocks uses both highlightSyntaxViaContent stripCodeBlocks and returns the newly highlighted code HTML.
+func HighlightCodeBlocks(html string) string {
+ // Replace each code block with the highlighted version
+ highlightedHTML := codeBlockRegex.ReplaceAllStringFunc(html, func(codeBlock string) string {
+ // Extract the code content from the code block
+ codeContent := codeBlockRegex.FindStringSubmatch(codeBlock)[1]
+
+ codeContent = stripBlockTags(codeContent)
+
+ // Highlight the code content
+ highlightedCode := highlightSyntaxViaContent(codeContent)
+
+ // Replace the original code block with the highlighted version
+ highlightedCodeBlock := "" + highlightedCode + "
"
+
+ return highlightedCodeBlock
+ })
+
+ return highlightedHTML
+}