Compare commits

...

11 Commits

Author SHA1 Message Date
ngn
4f7e9ff3d8 get rid of old about and donate pages
All checks were successful
docker / docker (push) Successful in 11s
ups / ups (push) Successful in 1m21s
Signed-off-by: ngn <ngn@ngn.tf>
2025-07-11 18:55:46 +03:00
ngn
c5eeb3c911 get rid of the background of the banner
All checks were successful
docker / docker (push) Successful in 11s
Signed-off-by: ngn <ngn@ngn.tf>
2025-07-11 16:32:24 +03:00
ngn
5aff5e9277 get rid of the captcha stuff, put proxies in data
All checks were successful
docker / docker (push) Successful in 11s
Signed-off-by: ngn <ngn@ngn.tf>
2025-07-11 16:28:11 +03:00
ngn
54b3588c3a clean config, support read-only non-root docker
All checks were successful
docker / docker (push) Successful in 22s
Signed-off-by: ngn <ngn@ngn.tf>
2025-07-11 15:33:40 +03:00
ngn
16f954f1e5 ups: update to 430c0a2
All checks were successful
docker / docker (push) Successful in 31s
2025-07-11 11:11:58 +03:00
lolcat
f7db6ba295 fix potential xss woops 2025-07-11 11:11:48 +03:00
lolcat
8bea7758f0 duckduckgo spelling fix 2025-07-11 11:10:21 +03:00
lolcat
b794dcbcd3 marginalia crash fix 2025-07-11 11:10:03 +03:00
ngn
088e64f37d ups: update to a2bc1e6
Some checks failed
docker / docker (push) Successful in 10s
ups / ups (push) Failing after 43s
2025-06-23 01:03:09 +03:00
lolcat
f2bc43c48d bypass anubis bullshit on marginalia 2025-06-23 01:03:04 +03:00
lolcat
2d74cce367 fix yandex web 2025-06-23 01:02:02 +03:00
26 changed files with 684 additions and 723 deletions

33
.gitignore vendored
View File

@@ -1,28 +1,5 @@
lib/test.html
lib/postdata.json
lib/nextpage.json
scraper/brave.html
scraper/yandex.json
scraper/marginalia.json
banner_og/
scraper/mojeek.html
scraper/google.html
scraper/google-img.html
scraper/google-video.html
scraper/google-news.html
scraper/google-img-nextpage.html
scraper/brave-image.html
scraper/brave-video.html
scraper/facebook.html
scraper/facebook-nextpage.json
scraper/yandex-video.json
scraper/yandex.html
scraper/soundcloud.json
scraper/mp3-pm.html
banner/*
data/captcha/birds/
data/captcha/fumo_plushies/
data/captcha/minecraft/
!banner/*default*
scraper/curlie.html
icons/*
/compose.yml
/docker-compose.yml
/banner
/favicon.ico
/config.php

View File

@@ -1,24 +1,20 @@
FROM alpine:latest
FROM alpine
RUN apk update
RUN apk upgrade
RUN apk add php apache2-ssl php83-fileinfo php83-openssl \
php83-iconv php83-common php83-dom php83-sodium \
php83-curl curl php83-pecl-apcu php83-apache2 \
imagemagick php83-pecl-imagick php-mbstring \
imagemagick-webp imagemagick-jpeg
RUN apk update && \
apk upgrade && \
apk add \
php php83-fileinfo php83-iconv php83-common php83-dom php83-sodium \
php83-curl php83-pecl-apcu php83-apache2 php-mbstring \
php83-pecl-imagick imagemagick-webp imagemagick-jpeg
COPY ./docker/httpd.conf /etc/apache2/httpd.conf
COPY ./docker/init.sh /
WORKDIR /var/www/html
WORKDIR /srv
COPY ./src ./4get
WORKDIR /var/www/html/4get
COPY ./docker/gen_config.php .
RUN chmod 777 /var/www/html/4get/icons
RUN chmod +x /init.sh
RUN adduser -DSH -u 1000 -h /srv runner
RUN chown -R runner /srv && chmod +x /init.sh
USER runner
CMD ["/init.sh"]

18
compose.example.yml Normal file
View File

@@ -0,0 +1,18 @@
services:
fourget:
container_name: 4get
image: git.ngn.tf/ngn/4get
ports:
- 80:8080
volumes:
- ./config.php:/srv/4get/data/config.php:ro
- ./favicon.ico:/srv/4get/favicon.ico:ro
- ./banner:/srv/4get/banner:ro
- type: tmpfs
target: /tmp/icons
cap_drop:
- ALL
security_opt:
- no-new-privileges:true
read_only: true
restart: unless-stopped

View File

@@ -1,12 +0,0 @@
services:
fourget:
container_name: 4get
image: git.ngn.tf/ngn/4get
environment:
- FOURGET_SERVER_NAME=example.com
ports:
- 80:80
volumes:
- ./banners:/var/www/html/4get/banner
- ./captcha:/var/www/html/4get/data/captcha
restart: unless-stopped

View File

@@ -1,90 +0,0 @@
<?php
include "/var/www/html/4get/data/config.php";
$refl = new ReflectionClass('config');
$from_config = ($refl->getConstants());
$from_env = array();
$env = getenv();
$fourget_env = array_filter($env, function($v, $k) {
return str_starts_with($k, "FOURGET");
}, ARRAY_FILTER_USE_BOTH);
foreach($fourget_env as $key => $val) {
$target_key = preg_replace('/^FOURGET_/', '', $key);
$from_env[$target_key] = trim($val, '\'"');
};
$merged_config = array_merge($from_config, $from_env);
function type_to_string($n) {
$type = gettype($n);
if ($type === "NULL") {
return "null";
}
if ($type === "boolean") {
return $n ? 'true' : 'false';
}
if ($type === "string") {
if(is_numeric($n)) {
return $n;
}
return "\"$n\"";
}
if ($type === "array") {
return json_encode($n, JSON_UNESCAPED_SLASHES);
}
return $n;
}
function detect_captcha_dirs() {
$captcha_dir = "/var/www/html/4get/data/captcha/";
$categories = (array_map(function ($n) {
return explode("/", $n)[7];
}, glob($captcha_dir . "*")));
$result = array_map(function($category) {
return [$category, count(glob("/var/www/html/4get/data/captcha/" . $category . "/*" ))];
}, $categories);
return $result;
}
$special_keys = ["PROTO", "CAPTCHA_DATASET"];
$output = "<?php\n // This file was generated by docker/gen_config.php\n";
$output = $output . "class config {\n";
foreach(($merged_config) as $key => $val){
if(!in_array($key, $special_keys)) {
$stored_value = $val;
// conversion between arrays and comma separated env value.
// Handle case when original type of field is array and there is a type mismatch when a comma separted string is passed,
// then split on comma if string (and not numeric, boolean, null, etc)
//
// except in the case where the inital value in default config is null or boolean. Assuming null and boolean
// in default config will be never be assigned an array
if(gettype($from_config[$key]) != gettype($val) && !is_numeric($val) && !is_null($from_config[$key]) && gettype($from_config[$key]) != "boolean") {
$stored_value = explode(",", $val);
}
$output = $output . "\tconst " . $key . " = " . type_to_string($stored_value) . ";\n";
continue;
}
if($key === "CAPTCHA_DATASET") {
$output = $output . "\tconst " . $key . " = " . type_to_string(detect_captcha_dirs()) . ";\n";
}
}
$output = $output . "}\n";
$output = $output . "?>";
file_put_contents("./data/config.php", $output);
?>

View File

@@ -1,16 +1,19 @@
Listen 80
ServerTokens OS
Listen 8080
ServerRoot /var/www
ServerSignature On
ServerName localhost
DocumentRoot "/var/www/html/4get"
ServerSignature Off
ServerTokens Prod
PidFile /dev/shm/httpd.pid
DocumentRoot /srv/4get
LogLevel error
CustomLog /dev/null common
ErrorLog /dev/null
ErrorLog /dev/stderr
<Directory "/var/www/html/4get">
<Directory /srv/4get>
RewriteEngine On
RewriteCond %{THE_REQUEST} ^\w+\ /(.*)\.php(\?.*)?\ HTTP/
RewriteRule ^ http://%{HTTP_HOST}/%1 [R=301]
@@ -22,68 +25,32 @@ ErrorLog /dev/null
</Directory>
# deny access to private resources
<Directory "/var/www/html/4get/data">
<Directory /srv/4get/data>
Require all denied
<Files "*">
<Files *>
Require all denied
</Files>
</Directory>
LoadModule rewrite_module modules/mod_rewrite.so
LoadModule mpm_prefork_module modules/mod_mpm_prefork.so
LoadModule authn_file_module modules/mod_authn_file.so
LoadModule authn_core_module modules/mod_authn_core.so
LoadModule authz_host_module modules/mod_authz_host.so
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
LoadModule authz_user_module modules/mod_authz_user.so
LoadModule authz_core_module modules/mod_authz_core.so
LoadModule access_compat_module modules/mod_access_compat.so
LoadModule auth_basic_module modules/mod_auth_basic.so
LoadModule reqtimeout_module modules/mod_reqtimeout.so
LoadModule filter_module modules/mod_filter.so
LoadModule mime_module modules/mod_mime.so
LoadModule log_config_module modules/mod_log_config.so
LoadModule env_module modules/mod_env.so
LoadModule headers_module modules/mod_headers.so
LoadModule setenvif_module modules/mod_setenvif.so
LoadModule version_module modules/mod_version.so
LoadModule unixd_module modules/mod_unixd.so
LoadModule status_module modules/mod_status.so
LoadModule autoindex_module modules/mod_autoindex.so
LoadModule dir_module modules/mod_dir.so
LoadModule alias_module modules/mod_alias.so
LoadModule negotiation_module modules/mod_negotiation.so
<IfModule unixd_module>
User apache
Group apache
</IfModule>
LoadModule dir_module modules/mod_dir.so
<Directory />
AllowOverride none
Require all denied
</Directory>
<IfModule dir_module>
DirectoryIndex index.html
</IfModule>
<Files ".ht*">
Require all denied
</Files>
<IfModule headers_module>
RequestHeader unset Proxy early
</IfModule>
<IfModule mime_module>
TypesConfig /etc/apache2/mime.types
AddType application/x-compress .Z
AddType application/x-gzip .gz .tgz
</IfModule>
<IfModule mime_magic_module>
MIMEMagicFile /etc/apache2/magic
</IfModule>
IncludeOptional /etc/apache2/conf.d/*.conf
Include /etc/apache2/conf.d/languages.conf
Include /etc/apache2/conf.d/php83-module.conf

View File

@@ -1,11 +1,17 @@
#!/bin/sh
set -e
#!/bin/sh -e
if [ ! -f '/var/www/html/4get/data/config.php' ] && [ -f './gen_config.php' ]
then
php ./gen_config.php
rm -f ./gen_config.php
config='/srv/4get/data/config.php'
defconfig='/srv/4get/data/config.def.php'
# check for the configuration file
if [ ! -f "${config}" ]; then
echo "configuration file not specified"
echo "here's the default configuration, modify and mount this to ${config}"
echo
cat "${defconfig}"
exit 1
fi
echo "Starting up apache2"
exec httpd -DFOREGROUND
# execute apache
echo "starting apache web server"
exec httpd -D FOREGROUND

28
src/.gitignore vendored Normal file
View File

@@ -0,0 +1,28 @@
lib/test.html
lib/postdata.json
lib/nextpage.json
scraper/brave.html
scraper/yandex.json
scraper/marginalia.json
scraper/mojeek.html
scraper/google.html
scraper/google-img.html
scraper/google-video.html
scraper/google-news.html
scraper/google-img-nextpage.html
scraper/brave-image.html
scraper/brave-video.html
scraper/facebook.html
scraper/facebook-nextpage.json
scraper/yandex-video.json
scraper/yandex.html
scraper/soundcloud.json
scraper/mp3-pm.html
scraper/curlie.html
banner/*
data/*
!banner/*default*
!data/*.def.*

View File

@@ -1,39 +0,0 @@
<?php
include "data/config.php";
include "lib/frontend.php";
$frontend = new frontend();
echo
$frontend->load(
"header_nofilters.html",
[
"title" => "About",
"class" => " class=\"about\""
]
);
$left =
explode(
"\n",
file_get_contents("template/about.html")
);
$out = "";
foreach($left as $line){
$out .= trim($line);
}
echo
$frontend->load(
"search.html",
[
"timetaken" => null,
"class" => "",
"right-left" => "",
"right-right" => "",
"left" => $out
]
);

87
src/data/config.def.php Normal file
View File

@@ -0,0 +1,87 @@
<?php
class config{
// Welcome to the 4get configuration file
// When updating your instance, please make sure this file isn't missing
// any parameters.
// 4get version. Please keep this updated
const VERSION = 8;
// Will be shown pretty much everywhere.
const SERVER_NAME = "4get";
// Will be shown in <meta> tag on home page
const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
// Will be shown in server list ping (null for no description)
const SERVER_LONG_DESCRIPTION = null;
// Add your own themes in "static/themes". Set to "Dark" for default theme.
// Eg. To use "static/themes/Cream.css", specify "Cream".
const DEFAULT_THEME = "black";
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
// Temporary directory for saving the page icons
const ICON_DIR = "/tmp/icons";
// List of domains that point to your servers. Include your tor/i2p
// addresses here! Must be a valid URL. Won't affect links placed on
// the homepage.
const ALT_ADDRESSES = [
//"https://4get.alt-tld",
//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
];
// Proxy pool assignments for each scraper
// false = Use server's raw IP
// string = will load a proxy list from data directory
// Eg. "tor" will load data/tor.txt
const PROXY_DDG = false; // duckduckgo
const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false;
const PROXY_GOOGLE_API = false;
const PROXY_GOOGLE_CSE = false;
const PROXY_STARTPAGE = false;
const PROXY_QWANT = false;
const PROXY_GHOSTERY = false;
const PROXY_MARGINALIA = false;
const PROXY_MOJEEK = false;
const PROXY_SC = false; // soundcloud
const PROXY_SPOTIFY = false;
const PROXY_SOLOFIELD = false;
const PROXY_WIBY = false;
const PROXY_CURLIE = false;
const PROXY_YT = false; // youtube
const PROXY_YEP = false;
const PROXY_PINTEREST = false;
const PROXY_SANKAKUCOMPLEX = false;
const PROXY_FLICKR = false;
const PROXY_FIVEHPX = false;
const PROXY_VSCO = false;
const PROXY_SEZNAM = false;
const PROXY_NAVER = false;
const PROXY_GREPPR = false;
const PROXY_CROWDVIEW = false;
const PROXY_MWMBL = false;
const PROXY_FTM = false; // findthatmeme
const PROXY_IMGUR = false;
const PROXY_YANDEX_W = false; // yandex web
const PROXY_YANDEX_I = false; // yandex images
const PROXY_YANDEX_V = false; // yandex videos
//
// Scraper-specific parameters
//
// GOOGLE CSE & GOOGLE API
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
// MARGINALIA
// Use "null" to default out to HTML scraping OR specify a string to
// use the API (Eg: "public"). API has less filters.
const MARGINALIA_API_KEY = null;
}

View File

@@ -1,173 +0,0 @@
<?php
class config{
// Welcome to the 4get configuration file
// When updating your instance, please make sure this file isn't missing
// any parameters.
// 4get version. Please keep this updated
const VERSION = 8;
// Will be shown pretty much everywhere.
const SERVER_NAME = "4get";
// Will be shown in <meta> tag on home page
const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
// Will be shown in server list ping (null for no description)
const SERVER_LONG_DESCRIPTION = null;
// Add your own themes in "static/themes". Set to "Dark" for default theme.
// Eg. To use "static/themes/Cream.css", specify "Cream".
const DEFAULT_THEME = "black";
// Enable the API?
const API_ENABLED = true;
//
// BOT PROTECTION
//
// 0 = disabled, 1 = ask for image captcha, @TODO: 2 = invite only (users needs a pass)
// VERY useful against a targetted attack
const BOT_PROTECTION = 0;
// if BOT_PROTECTION is set to 1, specify the available datasets here
// images should be named from 1.png to X.png, and be 100x100 in size
// Eg. data/captcha/birds/1.png up to 2263.png
const CAPTCHA_DATASET = [
// example:
//["birds", 2263],
//["fumo_plushies", 1006],
//["minecraft", 848]
];
// If this regex expression matches on the user agent, it blocks the request
// Not useful at all against a targetted attack
const HEADER_REGEX = '/bot|wget|curl|python-requests|scrapy|go-http-client|ruby|yahoo|spider|qwant/i';
// Block clients who present any of the following headers in their request (SPECIFY IN !!lowercase!!)
// Eg: ["x-forwarded-for", "x-via", "forwarded-for", "via"];
// Useful for blocking *some* proxies used for botting
const FILTERED_HEADER_KEYS = [
//"x-forwarded-for",
//"x-cluster-client-ip",
//"x-client-ip",
//"x-real-ip",
//"client-ip",
//"real-ip",
//"forwarded-for",
//"forwarded-for-ip",
//"forwarded",
//"proxy-connection",
//"remote-addr",
//"via"
];
// Block SSL ciphers used by CLI tools used for botting
// Basically a primitive version of Cloudflare's browser integrity check
// ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
// https://git.lolcat.ca/lolcat/4get/docs/apache2.md
const DISALLOWED_SSL = [
// "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
];
// Maximal number of searches per captcha key/pass issued. Counter gets
// reset on every APCU cache clear (should happen once a day).
// Only useful when BOT_PROTECTION is NOT set to 0
const MAX_SEARCHES = 100;
// List of domains that point to your servers. Include your tor/i2p
// addresses here! Must be a valid URL. Won't affect links placed on
// the homepage.
const ALT_ADDRESSES = [
//"https://4get.alt-tld",
//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
];
// Known 4get instances. MUST use the https protocol if your instance uses
// it. Is used to generate a distributed list of instances.
// To appear in the list of an instance, contact the host and if everyone added
// eachother your serber should appear everywhere.
const INSTANCES = [
"https://4get.ca",
"https://4get.zzls.xyz",
"https://4getus.zzls.xyz",
"https://4get.silly.computer",
"https://4get.konakona.moe",
"https://4get.lvkaszus.pl",
"https://4g.ggtyler.dev",
"https://4get.perennialte.ch",
"https://4get.sijh.net",
"https://4get.hbubli.cc",
"https://4get.plunked.party",
"https://4get.seitan-ayoub.lol",
"https://4get.etenie.pl",
"https://4get.lunar.icu",
"https://4get.dcs0.hu",
"https://4get.kizuki.lol",
"https://4get.psily.garden",
"https://search.milivojevic.in.rs",
"https://4get.snine.nl",
"https://4get.datura.network",
"https://4get.neco.lol",
"https://4get.lol",
"https://4get.ch",
"https://4get.edmateo.site",
"https://4get.sudovanilla.org",
"https://search.mint.lgbt"
];
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
// Proxy pool assignments for each scraper
// false = Use server's raw IP
// string = will load a proxy list from data/proxies
// Eg. "onion" will load data/proxies/onion.txt
const PROXY_DDG = false; // duckduckgo
const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false;
const PROXY_GOOGLE_API = false;
const PROXY_GOOGLE_CSE = false;
const PROXY_STARTPAGE = false;
const PROXY_QWANT = false;
const PROXY_GHOSTERY = false;
const PROXY_MARGINALIA = false;
const PROXY_MOJEEK = false;
const PROXY_SC = false; // soundcloud
const PROXY_SPOTIFY = false;
const PROXY_SOLOFIELD = false;
const PROXY_WIBY = false;
const PROXY_CURLIE = false;
const PROXY_YT = false; // youtube
const PROXY_YEP = false;
const PROXY_PINTEREST = false;
const PROXY_SANKAKUCOMPLEX = false;
const PROXY_FLICKR = false;
const PROXY_FIVEHPX = false;
const PROXY_VSCO = false;
const PROXY_SEZNAM = false;
const PROXY_NAVER = false;
const PROXY_GREPPR = false;
const PROXY_CROWDVIEW = false;
const PROXY_MWMBL = false;
const PROXY_FTM = false; // findthatmeme
const PROXY_IMGUR = false;
const PROXY_YANDEX_W = false; // yandex web
const PROXY_YANDEX_I = false; // yandex images
const PROXY_YANDEX_V = false; // yandex videos
//
// Scraper-specific parameters
//
// GOOGLE CSE & GOOGLE API
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
// MARGINALIA
// Use "null" to default out to HTML scraping OR specify a string to
// use the API (Eg: "public"). API has less filters.
const MARGINALIA_API_KEY = null;
}

Binary file not shown.

View File

@@ -1,3 +0,0 @@
*
!.gitignore
!onion.txt

View File

@@ -1,39 +0,0 @@
<?php
include "data/config.php";
include "lib/frontend.php";
$frontend = new frontend();
echo
$frontend->load(
"header_nofilters.html",
[
"title" => "Donate to the project",
"class" => " class=\"about\""
]
);
$left =
explode(
"\n",
file_get_contents("template/donate.html")
);
$out = "";
foreach($left as $line){
$out .= trim($line);
}
echo
$frontend->load(
"search.html",
[
"timetaken" => null,
"class" => "",
"right-left" => "",
"right-right" => "",
"left" => $out
]
);

View File

@@ -15,32 +15,38 @@ class favicon{
header("Content-Type: image/png");
if(
preg_match(
'/^https?:\/\/[A-Za-z0-9.-]+$/',
$url
) === 0
){
// first check the URL with regex
if(preg_match('/^https?:\/\/[A-Za-z0-9.-]+$/', $url) === 0){
header("X-Error: Only provide the protocol and domain");
$this->defaulticon();
}
$filename = str_replace(["https://", "http://"], "", $url);
header("Content-Disposition: inline; filename=\"{$filename}.png\"");
// validate the URL
$url = filter_var($url, FILTER_VALIDATE_URL);
if(!$url) {
header("X-Error: Invalid URL");
$this->defaulticon();
}
// extract the hostname
$this->filename = parse_url($url, PHP_URL_HOST);
if(!$this->filename || is_null($this->filename) || $this->filename === ""){
header("X-Error: Invalid URL");
$this->defaulticon();
}
// specify the filename in content-disposition
header("Content-Disposition: inline; filename=\"{$this->filename}.png\"");
include "lib/curlproxy.php";
$this->proxy = new proxy(false);
$this->filename = parse_url($url, PHP_URL_HOST);
/*
Check if we have the favicon stored locally
*/
if(file_exists("icons/" . $filename . ".png")){
$handle = fopen("icons/" . $filename . ".png", "r");
echo fread($handle, filesize("icons/" . $filename . ".png"));
if(file_exists($this->iconpath())){
$handle = fopen($this->iconpath(), "r");
echo fread($handle, filesize($this->iconpath()));
fclose($handle);
return;
}
@@ -249,7 +255,7 @@ class favicon{
$image = $image->getImageBlob();
// save favicon
$handle = fopen("icons/" . $this->filename . ".png", "w");
$handle = fopen($this->iconpath(), "w");
fwrite($handle, $image, strlen($image));
fclose($handle);
@@ -329,6 +335,11 @@ class favicon{
return $json["icons"][0]["src"];
}
private function iconpath() {
// $this->filename can be trusted
return config::ICON_DIR . "/" . $this->filename . ".png";
}
private function favicon404(){
// fallback to google favicons
@@ -346,7 +357,7 @@ class favicon{
}
// write favicon from google
$handle = fopen("icons/" . $this->filename . ".png", "w");
$handle = fopen($this->iconpath(), "w");
fwrite($handle, $image["body"], strlen($image["body"]));
fclose($handle);

Binary file not shown.

Before

Width:  |  Height:  |  Size: 753 B

100
src/lib/anubis.php Normal file
View File

@@ -0,0 +1,100 @@
<?php
//
// Reference
// https://github.com/TecharoHQ/anubis/blob/ecc716940e34ebe7249974f2789a99a2c7115e4e/web/js/proof-of-work.mjs
//
class anubis{
public function __construct(){
include_once "fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function scrape($html){
$this->fuckhtml->load($html);
$script =
$this->fuckhtml
->getElementById(
"anubis_challenge",
"script"
);
if($script === false){
throw new Exception("Failed to scrape anubis challenge data");
}
$script =
json_decode(
$this->fuckhtml
->getTextContent(
$script
),
true
);
if($script === null){
throw new Exception("Failed to decode anubis challenge data");
}
if(
!isset($script["challenge"]) ||
!isset($script["rules"]["difficulty"]) ||
!is_int($script["rules"]["difficulty"]) ||
!is_string($script["challenge"])
){
throw new Exception("Found invalid challenge data");
}
return $this->rape($script["challenge"], $script["rules"]["difficulty"]);
}
private function is_valid_hash($hash, $difficulty){
for ($i=0; $i<$difficulty; $i++) {
$index = (int)floor($i / 2);
$nibble = $i % 2;
$byte = ord($hash[$index]);
$nibble = ($byte >> ($nibble === 0 ? 4 : 0)) & 0x0f;
if($nibble !== 0){
return false;
}
}
return true;
}
public function rape($data, $difficulty = 5){
$nonce = 0;
while(true){
$hash_binary = hash("sha256", $data . $nonce, true);
if($this->is_valid_hash($hash_binary, $difficulty)){
$hash_hex = bin2hex($hash_binary);
return [
"response" => $hash_hex,
//"data" => $data,
//"difficulty" => $difficulty,
"nonce" => $nonce
];
}
$nonce++;
}
}
}

View File

@@ -21,7 +21,7 @@ class backend{
// indent
$proxy_index_raw = apcu_inc("p." . $this->scraper);
$proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
$proxylist = file_get_contents("data/" . $pool . ".txt");
$proxylist = explode("\n", $proxylist);
// ignore empty or commented lines

View File

@@ -73,7 +73,6 @@ class frontend{
}
public function loadheader(array $get, array $filters, string $page){
echo
$this->load("header.html", [
"title" => trim(htmlspecialchars($get["s"]) . " ({$page})"),
@@ -83,60 +82,6 @@ class frontend{
"tabs" => $this->generatehtmltabs($page, $get["s"]),
"filters" => $this->generatehtmlfilters($filters, $get)
]);
$headers_raw = getallheaders();
$header_keys = [];
$user_agent = "";
$bad_header = false;
// block bots that present X-Forwarded-For, Via, etc
foreach($headers_raw as $headerkey => $headervalue){
$headerkey = strtolower($headerkey);
if($headerkey == "user-agent"){
$user_agent = $headervalue;
continue;
}
// check header key
if(in_array($headerkey, config::FILTERED_HEADER_KEYS)){
$bad_header = true;
break;
}
}
// SSL check
$bad_ssl = false;
if(
isset($_SERVER["https"]) &&
$_SERVER["https"] == "on" &&
isset($_SERVER["SSL_CIPHER"]) &&
in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS)
){
$bad_ssl = true;
}
if(
$bad_header === true ||
$bad_ssl === true ||
$user_agent == "" ||
// user agent check
preg_match(
config::HEADER_REGEX,
$user_agent
)
){
// bot detected !!
$this->drawerror(
"Tshh, blocked!",
'Your browser, IP or IP range has been blocked from this 4get instance.'
);
die();
}
}
public function drawerror($title, $error, $timetaken = null){
@@ -178,7 +123,6 @@ class frontend{
'<li>Remove keywords that could cause errors</li>' .
'<li><a href="/instances?target=' . $target . "&" . $this->buildquery($get, false) . '">Try your search on another 4get instance</a></li>' .
'</ul><br>' .
'If the error persists, please <a href="/about">contact the administrator</a>.',
$timetaken
);
}

View File

@@ -719,7 +719,7 @@ class ddg{
->getTextContent(
$json["suggestion"]
),
"correction" => $json["recourseText"]
"correction" => html_entity_decode($json["recourseText"])
];
}
}

View File

@@ -3,7 +3,10 @@
class marginalia{
public function __construct(){
include "lib/fuckhtml.php";
include "lib/anubis.php";
$this->anubis = new anubis();
include_once "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
include "lib/backend.php";
@@ -102,7 +105,40 @@ class marginalia{
);
}
private function get($proxy, $url, $get = []){
private function get($proxy, $url, $get = [], $get_cookies = 1){
$curlproc = curl_init();
switch($get_cookies){
case 0:
$cookies = "";
$cookies_tmp = [];
curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
$length = strlen($header);
$header = explode(":", $header, 2);
if(trim(strtolower($header[0])) == "set-cookie"){
$cookie_tmp = explode("=", trim($header[1]), 2);
$cookies_tmp[trim($cookie_tmp[0])] =
explode(";", $cookie_tmp[1], 2)[0];
}
return $length;
});
break;
case 1:
$cookies = "";
break;
default:
$cookies = "Cookie: " . $get_cookies;
}
$headers = [
"User-Agent: " . config::USER_AGENT,
@@ -110,6 +146,7 @@ class marginalia{
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
$cookies,
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
@@ -118,8 +155,6 @@ class marginalia{
"Sec-Fetch-User: ?1"
];
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
@@ -145,7 +180,19 @@ class marginalia{
throw new Exception(curl_error($curlproc));
}
if($get_cookies === 0){
$cookie = [];
foreach($cookies_tmp as $key => $value){
$cookie[] = $key . "=" . $value;
}
curl_close($curlproc);
return implode(";", $cookie);
}
return $data;
}
@@ -267,6 +314,60 @@ class marginalia{
// HTML parser
$proxy = $this->backend->get_ip();
//
// Bypass anubis check
//
/*
if(($anubis_key = apcu_fetch("marginalia_cookie")) === false){
try{
$html =
$this->get(
$proxy,
"https://old-search.marginalia.nu/search",
[
"query" => $search
]
);
}catch(Exception $error){
throw new Exception("Failed to get anubis challenge");
}
try{
$anubis_data = $this->anubis->scrape($html);
}catch(Exception $error){
throw new Exception($error);
}
// send anubis response & get cookies
// https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge?response=0000018966b086834f738bacba6031028adb5aa875974ead197a8b75778baf3a&nonce=39947&redir=https%3A%2F%2Fold-search.marginalia.nu%2F&elapsedTime=1164
try{
$anubis_key =
$this->get(
$proxy,
"https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge",
[
"response" => $anubis_data["response"],
"nonce" => $anubis_data["nonce"],
"redir" => "https://old-search.marginalia.nu/",
"elapsedTime" => random_int(1000, 2000)
],
0
);
}catch(Exception $error){
throw new Exception("Failed to submit anubis challenge");
}
apcu_store("marginalia_cookie", $anubis_key);
}*/
if($get["npt"]){
[$params, $proxy] =
@@ -279,7 +380,9 @@ class marginalia{
$html =
$this->get(
$proxy,
"https://old-search.marginalia.nu/search?" . $params
"https://old-search.marginalia.nu/search?" . $params,
[],
//$anubis_key
);
}catch(Exception $error){
@@ -309,7 +412,8 @@ class marginalia{
$this->get(
$proxy,
"https://old-search.marginalia.nu/search",
$params
$params,
//$anubis_key
);
}catch(Exception $error){

View File

@@ -14,7 +14,7 @@ class yandex{
// backend included in the scraper functions
}
private function get($proxy, $url, $get = [], $nsfw){
private function get($proxy, $url, $get = [], $nsfw, $get_cookie = 1){
$curlproc = curl_init();
@@ -25,19 +25,55 @@ class yandex{
curl_setopt($curlproc, CURLOPT_URL, $url);
// extract "i" cookie
if($get_cookie === 0){
$cookies_tmp = [];
curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
$length = strlen($header);
$header = explode(":", $header, 2);
if(trim(strtolower($header[0])) == "set-cookie"){
$cookie_tmp = explode("=", trim($header[1]), 2);
$cookies_tmp[trim($cookie_tmp[0])] =
explode(";", $cookie_tmp[1], 2)[0];
}
return $length;
});
}
switch($nsfw){
case "yes": $nsfw = "0"; break;
case "maybe": $nsfw = "1"; break;
case "no": $nsfw = "2"; break;
}
switch($get_cookie){
case 0:
$cookie = "";
break;
case 1:
$cookie = "Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw;
break;
default:
$cookie = "Cookie: i=" . $get_cookie;
}
$headers =
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding: gzip",
"Accept-Language: en-US,en;q=0.5",
"DNT: 1",
"Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw,
$cookie,
"Referer: https://yandex.com/images/search",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
@@ -59,6 +95,17 @@ class yandex{
$data = curl_exec($curlproc);
if($get_cookie === 0){
if(isset($cookies_tmp["i"])){
return $cookies_tmp["i"];
}else{
throw new Exception("Failed to get Yandex clearance cookie");
}
}
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
@@ -217,6 +264,23 @@ class yandex{
// https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
// &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
// get clearance cookie
if(($cookie = apcu_fetch("yandexweb_cookie")) === false){
$proxy = $this->backend->get_ip();
$cookie =
$this->get(
$proxy,
"https://yandex.ru/support2/smart-captcha/ru/",
[],
false,
0
);
apcu_store("yandexweb_cookie", $cookie);
}
if($get["npt"]){
[$npt, $proxy] = $this->backend->get($get["npt"], "web");
@@ -226,7 +290,8 @@ class yandex{
$proxy,
"https://yandex.com" . $npt,
[],
"yes"
"yes",
$cookie
);
}else{
@@ -236,7 +301,7 @@ class yandex{
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
$proxy = !isset($proxy) ? $this->backend->get_ip() : $proxy;
$lang = $get["lang"];
$older = $get["older"];
$newer = $get["newer"];
@@ -283,7 +348,8 @@ class yandex{
$proxy,
"https://yandex.com/search/site/",
$params,
"yes"
"yes",
$cookie
);
}catch(Exception $error){
@@ -314,6 +380,19 @@ class yandex{
$this->fuckhtml->load($html);
// Scrape page blocked error
$title =
$this->fuckhtml
->getElementsByTagName("title");
if(
count($title) !== 0 &&
$title[0]["innerHTML"] == "403"
){
throw new Exception("Yandex blocked this proxy or 4get instance.");
}
// get nextpage
$npt =
$this->fuckhtml

View File

@@ -258,7 +258,7 @@ h3,h4,h5,h6{
font-family:Times;
width:100%;
height:100%;
background:var(--282828);
background: none;
display:block;
object-fit:contain;
}

View File

@@ -84,7 +84,7 @@ if($results["spelling"]["type"] != "no_correction"){
'&' .
$frontend->buildquery($get, true) .
'&spellcheck=no">' .
$results["spelling"]["correction"] .
htmlspecialchars($results["spelling"]["correction"]) .
'</a>?' .
'</div>';
}

View File

@@ -1,7 +1,7 @@
{
"upstream": "https://git.lolcat.ca/lolcat/4get",
"provider": "gitea",
"commit": "3e1487e614f3bb5d86ddda6da63a39a8cdaadf15",
"commit": "430c0a2f0f72f1254ab65d53f13640fe02418f05",
"scripts": [
"s/--- a\\//--- a\\/src\\//g",
"s/+++ b\\//+++ b\\/src\\//g"