Replace StackOverflow Links (#90)

* feat: replace stackoverflow and exchange links

* fix: replace stackoverflow.com links with path

* feat: run stack overflow link replacer on process

* feat: process HTML on comment text
This commit is contained in:
httpjamesm
2024-03-09 12:06:41 -05:00
committed by GitHub
parent ff66f41f47
commit e82646635e
7 changed files with 112 additions and 10 deletions

View File

@ -50,7 +50,7 @@ func FindAndReturnComments(inHtml, domain string, postLayout *goquery.Selection)
commentTimestamp := commentBody.Find("span.relativetime-clean").Text()
newFilteredComment := types.FilteredComment{
Text: template.HTML(commentCopy),
Text: template.HTML(ProcessHTMLBody(commentCopy)),
Timestamp: commentTimestamp,
AuthorName: commentAuthor.Text(),
AuthorURL: commentAuthorURL,

45
src/utils/links.go Normal file
View File

@ -0,0 +1,45 @@
package utils
import (
"net/url"
"regexp"
"strings"
)
// stackOverflowLinkQualifierRegex matches all anchor elements that meet the following conditions:
// * must be an anchor element
// * the anchor element must have a pathname beginning with /q or /questions
// * if there is a host, it must be stackoverflow.com or a subdomain
var stackOverflowLinkQualifierRegex = regexp.MustCompile(`<a\s[^>]*href="(?:https?://(?:www\.)?(?:\w+\.)*(?:stackoverflow|stackexchange)\.com)?/(?:q|questions)/[^"]*"[^>]*>.*?</a>`)
func ReplaceStackOverflowLinks(html string) string {
return stackOverflowLinkQualifierRegex.ReplaceAllStringFunc(html, func(match string) string {
// Extract the href attribute value from the anchor tag
hrefRegex := regexp.MustCompile(`href="([^"]*)"`)
hrefMatch := hrefRegex.FindStringSubmatch(match)
if len(hrefMatch) < 2 {
return match
}
href := hrefMatch[1]
// Parse the URL
url, err := url.Parse(href)
if err != nil {
return match
}
newUrl := url.String()
// Check if the host is a subdomain
parts := strings.Split(url.Host, ".")
if len(parts) > 2 {
// Prepend the subdomain to the path
url.Path = "/exchange/" + parts[0] + url.Path
}
newUrl = url.Path + url.RawQuery + url.Fragment
// Replace the href attribute value in the anchor tag
return strings.Replace(match, hrefMatch[1], newUrl, 1)
})
}

49
src/utils/links_test.go Normal file
View File

@ -0,0 +1,49 @@
package utils
import (
"fmt"
"github.com/stretchr/testify/assert"
"strings"
"testing"
)
var sampleInput = `<div class="d-flex fd-column fw-nowrap">
<div class="d-flex fw-nowrap">
<div class="flex--item wmn0 fl1 lh-lg">
<div class="flex--item fl1 lh-lg">
<div>
<b>This question already has answers here</b>:
</div>
</div>
</div>
</div>
<div class="flex--item mb0 mt4">
<a href="/questions/55083952/is-it-possible-to-populate-a-large-set-at-compile-time" dir="ltr">Is it possible to populate a large set at compile time?</a>
<span class="question-originals-answer-count">
(3 answers)
</span>
</div>
<div class="flex--item mb0 mt4">
<a href="https://stackoverflow.com/questions/27221504/how-can-you-make-a-safe-static-singleton-in-rust" dir="ltr">How can you make a safe static singleton in Rust?</a>
<span class="question-originals-answer-count">
(5 answers)
</span>
</div>
<div class="flex--item mb0 mt4">
<a href="https://security.stackexchange.com/questions/25371/brute-force-an-ssh-login-that-has-only-a-4-letter-password" dir="ltr">Brute-force an SSH-login that has only a 4-letter password</a>
<span class="question-originals-answer-count">
(9 answers)
</span>
</div>
<div class="flex--item mb0 mt8">Closed <span title="2020-01-29 14:28:42Z" class="relativetime">4 years ago</span>.</div>
</div>`
func TestReplaceStackOverflowLinks(t *testing.T) {
replacedLinks := ReplaceStackOverflowLinks(sampleInput)
fmt.Println(replacedLinks)
assert.False(t, strings.Contains(replacedLinks, "stackoverflow.com"))
assert.False(t, strings.Contains(replacedLinks, "stackexchange.com"))
}

9
src/utils/process.go Normal file
View File

@ -0,0 +1,9 @@
package utils
// ProcessHTMLBody runs HTML through the various preparation functions.
func ProcessHTMLBody(bodyHTML string) string {
highlightedBody := HighlightCodeBlocks(bodyHTML)
imageProxiedBody := ReplaceImgTags(highlightedBody)
stackOverflowLinksReplacedBody := ReplaceStackOverflowLinks(imageProxiedBody)
return stackOverflowLinksReplacedBody
}