tren/extract/extractor.go
ngn 4df8b90dae
All checks were successful
Build and publish the docker image / build (push) Successful in 58s
initial commit
Signed-off-by: ngn <ngn@ngn.tf>
2025-04-05 09:58:58 +03:00

88 lines
1.7 KiB
Go

package extract
import (
"fmt"
"net/http"
"net/url"
"github.com/PuerkitoBio/goquery"
)
const (
BASE_URL = "https://tureng.com/en/"
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.3"
REFERER = "https://tureng.com/en/turkish-english"
)
type Extractor struct {
client *http.Client
}
func (e *Extractor) get(path string) (*http.Response, error) {
var (
err error
full_url string
req *http.Request
res *http.Response
)
if full_url, err = url.JoinPath(BASE_URL, path); err != nil {
return nil, err
}
if req, err = http.NewRequest("GET", full_url, nil); err != nil {
return nil, err
}
// headers
req.Header.Set("User-Agent", USER_AGENT)
req.Header.Set("Referer", REFERER)
if res, err = e.client.Do(req); err != nil {
return nil, err
}
if res.StatusCode != 200 {
return nil, fmt.Errorf("bad response code: %d", res.StatusCode)
}
return res, err
}
func (e *Extractor) Translate(dict_name string, term string) (*Translation, error) {
var (
dict *dictionary
res *http.Response
doc *goquery.Document
err error
)
if dict = find_dict(dict_name); dict == nil {
return nil, nil
}
term_path := dict.Path(term)
if res, err = e.get(term_path); err != nil {
return nil, fmt.Errorf("failed to get %s: %s", term_path, err.Error())
}
defer res.Body.Close()
if doc, err = goquery.NewDocumentFromReader(res.Body); err != nil {
return nil, err
}
trans := Translation{}
err = trans.GetResults(doc)
return &trans, err
}
func New() (*Extractor, error) {
var extractor Extractor
extractor.client = &http.Client{}
return &extractor, nil
}