Compare commits
11 Commits
40fa9b3f62
...
main
Author | SHA1 | Date | |
---|---|---|---|
4f7e9ff3d8
|
|||
c5eeb3c911
|
|||
5aff5e9277
|
|||
54b3588c3a
|
|||
16f954f1e5
|
|||
![]() |
f7db6ba295
|
||
![]() |
8bea7758f0
|
||
![]() |
b794dcbcd3
|
||
088e64f37d
|
|||
![]() |
f2bc43c48d
|
||
![]() |
2d74cce367
|
33
.gitignore
vendored
33
.gitignore
vendored
@@ -1,28 +1,5 @@
|
|||||||
lib/test.html
|
/compose.yml
|
||||||
lib/postdata.json
|
/docker-compose.yml
|
||||||
lib/nextpage.json
|
/banner
|
||||||
scraper/brave.html
|
/favicon.ico
|
||||||
scraper/yandex.json
|
/config.php
|
||||||
scraper/marginalia.json
|
|
||||||
banner_og/
|
|
||||||
scraper/mojeek.html
|
|
||||||
scraper/google.html
|
|
||||||
scraper/google-img.html
|
|
||||||
scraper/google-video.html
|
|
||||||
scraper/google-news.html
|
|
||||||
scraper/google-img-nextpage.html
|
|
||||||
scraper/brave-image.html
|
|
||||||
scraper/brave-video.html
|
|
||||||
scraper/facebook.html
|
|
||||||
scraper/facebook-nextpage.json
|
|
||||||
scraper/yandex-video.json
|
|
||||||
scraper/yandex.html
|
|
||||||
scraper/soundcloud.json
|
|
||||||
scraper/mp3-pm.html
|
|
||||||
banner/*
|
|
||||||
data/captcha/birds/
|
|
||||||
data/captcha/fumo_plushies/
|
|
||||||
data/captcha/minecraft/
|
|
||||||
!banner/*default*
|
|
||||||
scraper/curlie.html
|
|
||||||
icons/*
|
|
||||||
|
26
Dockerfile
26
Dockerfile
@@ -1,24 +1,20 @@
|
|||||||
FROM alpine:latest
|
FROM alpine
|
||||||
|
|
||||||
RUN apk update
|
RUN apk update && \
|
||||||
RUN apk upgrade
|
apk upgrade && \
|
||||||
|
apk add \
|
||||||
RUN apk add php apache2-ssl php83-fileinfo php83-openssl \
|
php php83-fileinfo php83-iconv php83-common php83-dom php83-sodium \
|
||||||
php83-iconv php83-common php83-dom php83-sodium \
|
php83-curl php83-pecl-apcu php83-apache2 php-mbstring \
|
||||||
php83-curl curl php83-pecl-apcu php83-apache2 \
|
php83-pecl-imagick imagemagick-webp imagemagick-jpeg
|
||||||
imagemagick php83-pecl-imagick php-mbstring \
|
|
||||||
imagemagick-webp imagemagick-jpeg
|
|
||||||
|
|
||||||
COPY ./docker/httpd.conf /etc/apache2/httpd.conf
|
COPY ./docker/httpd.conf /etc/apache2/httpd.conf
|
||||||
COPY ./docker/init.sh /
|
COPY ./docker/init.sh /
|
||||||
|
|
||||||
WORKDIR /var/www/html
|
WORKDIR /srv
|
||||||
COPY ./src ./4get
|
COPY ./src ./4get
|
||||||
|
|
||||||
WORKDIR /var/www/html/4get
|
RUN adduser -DSH -u 1000 -h /srv runner
|
||||||
COPY ./docker/gen_config.php .
|
RUN chown -R runner /srv && chmod +x /init.sh
|
||||||
|
|
||||||
RUN chmod 777 /var/www/html/4get/icons
|
|
||||||
RUN chmod +x /init.sh
|
|
||||||
|
|
||||||
|
USER runner
|
||||||
CMD ["/init.sh"]
|
CMD ["/init.sh"]
|
||||||
|
18
compose.example.yml
Normal file
18
compose.example.yml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
services:
|
||||||
|
fourget:
|
||||||
|
container_name: 4get
|
||||||
|
image: git.ngn.tf/ngn/4get
|
||||||
|
ports:
|
||||||
|
- 80:8080
|
||||||
|
volumes:
|
||||||
|
- ./config.php:/srv/4get/data/config.php:ro
|
||||||
|
- ./favicon.ico:/srv/4get/favicon.ico:ro
|
||||||
|
- ./banner:/srv/4get/banner:ro
|
||||||
|
- type: tmpfs
|
||||||
|
target: /tmp/icons
|
||||||
|
cap_drop:
|
||||||
|
- ALL
|
||||||
|
security_opt:
|
||||||
|
- no-new-privileges:true
|
||||||
|
read_only: true
|
||||||
|
restart: unless-stopped
|
@@ -1,12 +0,0 @@
|
|||||||
services:
|
|
||||||
fourget:
|
|
||||||
container_name: 4get
|
|
||||||
image: git.ngn.tf/ngn/4get
|
|
||||||
environment:
|
|
||||||
- FOURGET_SERVER_NAME=example.com
|
|
||||||
ports:
|
|
||||||
- 80:80
|
|
||||||
volumes:
|
|
||||||
- ./banners:/var/www/html/4get/banner
|
|
||||||
- ./captcha:/var/www/html/4get/data/captcha
|
|
||||||
restart: unless-stopped
|
|
@@ -1,90 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
include "/var/www/html/4get/data/config.php";
|
|
||||||
|
|
||||||
$refl = new ReflectionClass('config');
|
|
||||||
$from_config = ($refl->getConstants());
|
|
||||||
$from_env = array();
|
|
||||||
|
|
||||||
$env = getenv();
|
|
||||||
$fourget_env = array_filter($env, function($v, $k) {
|
|
||||||
return str_starts_with($k, "FOURGET");
|
|
||||||
}, ARRAY_FILTER_USE_BOTH);
|
|
||||||
|
|
||||||
foreach($fourget_env as $key => $val) {
|
|
||||||
$target_key = preg_replace('/^FOURGET_/', '', $key);
|
|
||||||
$from_env[$target_key] = trim($val, '\'"');
|
|
||||||
};
|
|
||||||
|
|
||||||
$merged_config = array_merge($from_config, $from_env);
|
|
||||||
|
|
||||||
function type_to_string($n) {
|
|
||||||
$type = gettype($n);
|
|
||||||
if ($type === "NULL") {
|
|
||||||
return "null";
|
|
||||||
}
|
|
||||||
if ($type === "boolean") {
|
|
||||||
return $n ? 'true' : 'false';
|
|
||||||
}
|
|
||||||
if ($type === "string") {
|
|
||||||
if(is_numeric($n)) {
|
|
||||||
return $n;
|
|
||||||
}
|
|
||||||
return "\"$n\"";
|
|
||||||
}
|
|
||||||
if ($type === "array") {
|
|
||||||
return json_encode($n, JSON_UNESCAPED_SLASHES);
|
|
||||||
}
|
|
||||||
return $n;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
function detect_captcha_dirs() {
|
|
||||||
$captcha_dir = "/var/www/html/4get/data/captcha/";
|
|
||||||
$categories = (array_map(function ($n) {
|
|
||||||
return explode("/", $n)[7];
|
|
||||||
}, glob($captcha_dir . "*")));
|
|
||||||
|
|
||||||
|
|
||||||
$result = array_map(function($category) {
|
|
||||||
return [$category, count(glob("/var/www/html/4get/data/captcha/" . $category . "/*" ))];
|
|
||||||
}, $categories);
|
|
||||||
|
|
||||||
return $result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
$special_keys = ["PROTO", "CAPTCHA_DATASET"];
|
|
||||||
|
|
||||||
$output = "<?php\n // This file was generated by docker/gen_config.php\n";
|
|
||||||
|
|
||||||
$output = $output . "class config {\n";
|
|
||||||
foreach(($merged_config) as $key => $val){
|
|
||||||
if(!in_array($key, $special_keys)) {
|
|
||||||
$stored_value = $val;
|
|
||||||
// conversion between arrays and comma separated env value.
|
|
||||||
// Handle case when original type of field is array and there is a type mismatch when a comma separted string is passed,
|
|
||||||
// then split on comma if string (and not numeric, boolean, null, etc)
|
|
||||||
//
|
|
||||||
// except in the case where the inital value in default config is null or boolean. Assuming null and boolean
|
|
||||||
// in default config will be never be assigned an array
|
|
||||||
|
|
||||||
if(gettype($from_config[$key]) != gettype($val) && !is_numeric($val) && !is_null($from_config[$key]) && gettype($from_config[$key]) != "boolean") {
|
|
||||||
$stored_value = explode(",", $val);
|
|
||||||
}
|
|
||||||
$output = $output . "\tconst " . $key . " = " . type_to_string($stored_value) . ";\n";
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if($key === "CAPTCHA_DATASET") {
|
|
||||||
$output = $output . "\tconst " . $key . " = " . type_to_string(detect_captcha_dirs()) . ";\n";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$output = $output . "}\n";
|
|
||||||
$output = $output . "?>";
|
|
||||||
|
|
||||||
file_put_contents("./data/config.php", $output);
|
|
||||||
?>
|
|
@@ -1,16 +1,19 @@
|
|||||||
Listen 80
|
Listen 8080
|
||||||
ServerTokens OS
|
|
||||||
ServerRoot /var/www
|
ServerRoot /var/www
|
||||||
ServerSignature On
|
|
||||||
ServerName localhost
|
ServerName localhost
|
||||||
|
|
||||||
DocumentRoot "/var/www/html/4get"
|
ServerSignature Off
|
||||||
|
ServerTokens Prod
|
||||||
|
|
||||||
|
PidFile /dev/shm/httpd.pid
|
||||||
|
DocumentRoot /srv/4get
|
||||||
|
|
||||||
LogLevel error
|
LogLevel error
|
||||||
CustomLog /dev/null common
|
CustomLog /dev/null common
|
||||||
ErrorLog /dev/null
|
ErrorLog /dev/stderr
|
||||||
|
|
||||||
<Directory "/var/www/html/4get">
|
<Directory /srv/4get>
|
||||||
RewriteEngine On
|
RewriteEngine On
|
||||||
RewriteCond %{THE_REQUEST} ^\w+\ /(.*)\.php(\?.*)?\ HTTP/
|
RewriteCond %{THE_REQUEST} ^\w+\ /(.*)\.php(\?.*)?\ HTTP/
|
||||||
RewriteRule ^ http://%{HTTP_HOST}/%1 [R=301]
|
RewriteRule ^ http://%{HTTP_HOST}/%1 [R=301]
|
||||||
@@ -22,68 +25,32 @@ ErrorLog /dev/null
|
|||||||
</Directory>
|
</Directory>
|
||||||
|
|
||||||
# deny access to private resources
|
# deny access to private resources
|
||||||
<Directory "/var/www/html/4get/data">
|
<Directory /srv/4get/data>
|
||||||
Require all denied
|
Require all denied
|
||||||
<Files "*">
|
<Files *>
|
||||||
Require all denied
|
Require all denied
|
||||||
</Files>
|
</Files>
|
||||||
</Directory>
|
</Directory>
|
||||||
|
|
||||||
LoadModule rewrite_module modules/mod_rewrite.so
|
LoadModule rewrite_module modules/mod_rewrite.so
|
||||||
LoadModule mpm_prefork_module modules/mod_mpm_prefork.so
|
LoadModule mpm_prefork_module modules/mod_mpm_prefork.so
|
||||||
LoadModule authn_file_module modules/mod_authn_file.so
|
|
||||||
LoadModule authn_core_module modules/mod_authn_core.so
|
|
||||||
LoadModule authz_host_module modules/mod_authz_host.so
|
|
||||||
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
|
|
||||||
LoadModule authz_user_module modules/mod_authz_user.so
|
|
||||||
LoadModule authz_core_module modules/mod_authz_core.so
|
LoadModule authz_core_module modules/mod_authz_core.so
|
||||||
LoadModule access_compat_module modules/mod_access_compat.so
|
LoadModule access_compat_module modules/mod_access_compat.so
|
||||||
LoadModule auth_basic_module modules/mod_auth_basic.so
|
|
||||||
LoadModule reqtimeout_module modules/mod_reqtimeout.so
|
|
||||||
LoadModule filter_module modules/mod_filter.so
|
LoadModule filter_module modules/mod_filter.so
|
||||||
LoadModule mime_module modules/mod_mime.so
|
LoadModule mime_module modules/mod_mime.so
|
||||||
LoadModule log_config_module modules/mod_log_config.so
|
LoadModule log_config_module modules/mod_log_config.so
|
||||||
LoadModule env_module modules/mod_env.so
|
|
||||||
LoadModule headers_module modules/mod_headers.so
|
|
||||||
LoadModule setenvif_module modules/mod_setenvif.so
|
|
||||||
LoadModule version_module modules/mod_version.so
|
|
||||||
LoadModule unixd_module modules/mod_unixd.so
|
LoadModule unixd_module modules/mod_unixd.so
|
||||||
LoadModule status_module modules/mod_status.so
|
|
||||||
LoadModule autoindex_module modules/mod_autoindex.so
|
|
||||||
LoadModule dir_module modules/mod_dir.so
|
|
||||||
LoadModule alias_module modules/mod_alias.so
|
|
||||||
LoadModule negotiation_module modules/mod_negotiation.so
|
LoadModule negotiation_module modules/mod_negotiation.so
|
||||||
|
LoadModule dir_module modules/mod_dir.so
|
||||||
<IfModule unixd_module>
|
|
||||||
User apache
|
|
||||||
Group apache
|
|
||||||
</IfModule>
|
|
||||||
|
|
||||||
<Directory />
|
<Directory />
|
||||||
AllowOverride none
|
AllowOverride none
|
||||||
Require all denied
|
Require all denied
|
||||||
</Directory>
|
</Directory>
|
||||||
|
|
||||||
<IfModule dir_module>
|
|
||||||
DirectoryIndex index.html
|
|
||||||
</IfModule>
|
|
||||||
|
|
||||||
<Files ".ht*">
|
<Files ".ht*">
|
||||||
Require all denied
|
Require all denied
|
||||||
</Files>
|
</Files>
|
||||||
|
|
||||||
<IfModule headers_module>
|
Include /etc/apache2/conf.d/languages.conf
|
||||||
RequestHeader unset Proxy early
|
Include /etc/apache2/conf.d/php83-module.conf
|
||||||
</IfModule>
|
|
||||||
|
|
||||||
<IfModule mime_module>
|
|
||||||
TypesConfig /etc/apache2/mime.types
|
|
||||||
AddType application/x-compress .Z
|
|
||||||
AddType application/x-gzip .gz .tgz
|
|
||||||
</IfModule>
|
|
||||||
|
|
||||||
<IfModule mime_magic_module>
|
|
||||||
MIMEMagicFile /etc/apache2/magic
|
|
||||||
</IfModule>
|
|
||||||
|
|
||||||
IncludeOptional /etc/apache2/conf.d/*.conf
|
|
||||||
|
@@ -1,11 +1,17 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh -e
|
||||||
set -e
|
|
||||||
|
|
||||||
if [ ! -f '/var/www/html/4get/data/config.php' ] && [ -f './gen_config.php' ]
|
config='/srv/4get/data/config.php'
|
||||||
then
|
defconfig='/srv/4get/data/config.def.php'
|
||||||
php ./gen_config.php
|
|
||||||
rm -f ./gen_config.php
|
# check for the configuration file
|
||||||
|
if [ ! -f "${config}" ]; then
|
||||||
|
echo "configuration file not specified"
|
||||||
|
echo "here's the default configuration, modify and mount this to ${config}"
|
||||||
|
echo
|
||||||
|
cat "${defconfig}"
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Starting up apache2"
|
# execute apache
|
||||||
exec httpd -DFOREGROUND
|
echo "starting apache web server"
|
||||||
|
exec httpd -D FOREGROUND
|
||||||
|
28
src/.gitignore
vendored
Normal file
28
src/.gitignore
vendored
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
lib/test.html
|
||||||
|
lib/postdata.json
|
||||||
|
lib/nextpage.json
|
||||||
|
|
||||||
|
scraper/brave.html
|
||||||
|
scraper/yandex.json
|
||||||
|
scraper/marginalia.json
|
||||||
|
scraper/mojeek.html
|
||||||
|
scraper/google.html
|
||||||
|
scraper/google-img.html
|
||||||
|
scraper/google-video.html
|
||||||
|
scraper/google-news.html
|
||||||
|
scraper/google-img-nextpage.html
|
||||||
|
scraper/brave-image.html
|
||||||
|
scraper/brave-video.html
|
||||||
|
scraper/facebook.html
|
||||||
|
scraper/facebook-nextpage.json
|
||||||
|
scraper/yandex-video.json
|
||||||
|
scraper/yandex.html
|
||||||
|
scraper/soundcloud.json
|
||||||
|
scraper/mp3-pm.html
|
||||||
|
scraper/curlie.html
|
||||||
|
|
||||||
|
banner/*
|
||||||
|
data/*
|
||||||
|
|
||||||
|
!banner/*default*
|
||||||
|
!data/*.def.*
|
@@ -1,39 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
include "data/config.php";
|
|
||||||
include "lib/frontend.php";
|
|
||||||
$frontend = new frontend();
|
|
||||||
|
|
||||||
echo
|
|
||||||
$frontend->load(
|
|
||||||
"header_nofilters.html",
|
|
||||||
[
|
|
||||||
"title" => "About",
|
|
||||||
"class" => " class=\"about\""
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
$left =
|
|
||||||
explode(
|
|
||||||
"\n",
|
|
||||||
file_get_contents("template/about.html")
|
|
||||||
);
|
|
||||||
|
|
||||||
$out = "";
|
|
||||||
|
|
||||||
foreach($left as $line){
|
|
||||||
|
|
||||||
$out .= trim($line);
|
|
||||||
}
|
|
||||||
|
|
||||||
echo
|
|
||||||
$frontend->load(
|
|
||||||
"search.html",
|
|
||||||
[
|
|
||||||
"timetaken" => null,
|
|
||||||
"class" => "",
|
|
||||||
"right-left" => "",
|
|
||||||
"right-right" => "",
|
|
||||||
"left" => $out
|
|
||||||
]
|
|
||||||
);
|
|
87
src/data/config.def.php
Normal file
87
src/data/config.def.php
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
<?php
|
||||||
|
class config{
|
||||||
|
// Welcome to the 4get configuration file
|
||||||
|
// When updating your instance, please make sure this file isn't missing
|
||||||
|
// any parameters.
|
||||||
|
|
||||||
|
// 4get version. Please keep this updated
|
||||||
|
const VERSION = 8;
|
||||||
|
|
||||||
|
// Will be shown pretty much everywhere.
|
||||||
|
const SERVER_NAME = "4get";
|
||||||
|
|
||||||
|
// Will be shown in <meta> tag on home page
|
||||||
|
const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
|
||||||
|
|
||||||
|
// Will be shown in server list ping (null for no description)
|
||||||
|
const SERVER_LONG_DESCRIPTION = null;
|
||||||
|
|
||||||
|
// Add your own themes in "static/themes". Set to "Dark" for default theme.
|
||||||
|
// Eg. To use "static/themes/Cream.css", specify "Cream".
|
||||||
|
const DEFAULT_THEME = "black";
|
||||||
|
|
||||||
|
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||||
|
// Changing this might break things.
|
||||||
|
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
|
||||||
|
|
||||||
|
// Temporary directory for saving the page icons
|
||||||
|
const ICON_DIR = "/tmp/icons";
|
||||||
|
|
||||||
|
// List of domains that point to your servers. Include your tor/i2p
|
||||||
|
// addresses here! Must be a valid URL. Won't affect links placed on
|
||||||
|
// the homepage.
|
||||||
|
const ALT_ADDRESSES = [
|
||||||
|
//"https://4get.alt-tld",
|
||||||
|
//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
|
||||||
|
];
|
||||||
|
|
||||||
|
// Proxy pool assignments for each scraper
|
||||||
|
// false = Use server's raw IP
|
||||||
|
// string = will load a proxy list from data directory
|
||||||
|
// Eg. "tor" will load data/tor.txt
|
||||||
|
const PROXY_DDG = false; // duckduckgo
|
||||||
|
const PROXY_BRAVE = false;
|
||||||
|
const PROXY_FB = false; // facebook
|
||||||
|
const PROXY_GOOGLE = false;
|
||||||
|
const PROXY_GOOGLE_API = false;
|
||||||
|
const PROXY_GOOGLE_CSE = false;
|
||||||
|
const PROXY_STARTPAGE = false;
|
||||||
|
const PROXY_QWANT = false;
|
||||||
|
const PROXY_GHOSTERY = false;
|
||||||
|
const PROXY_MARGINALIA = false;
|
||||||
|
const PROXY_MOJEEK = false;
|
||||||
|
const PROXY_SC = false; // soundcloud
|
||||||
|
const PROXY_SPOTIFY = false;
|
||||||
|
const PROXY_SOLOFIELD = false;
|
||||||
|
const PROXY_WIBY = false;
|
||||||
|
const PROXY_CURLIE = false;
|
||||||
|
const PROXY_YT = false; // youtube
|
||||||
|
const PROXY_YEP = false;
|
||||||
|
const PROXY_PINTEREST = false;
|
||||||
|
const PROXY_SANKAKUCOMPLEX = false;
|
||||||
|
const PROXY_FLICKR = false;
|
||||||
|
const PROXY_FIVEHPX = false;
|
||||||
|
const PROXY_VSCO = false;
|
||||||
|
const PROXY_SEZNAM = false;
|
||||||
|
const PROXY_NAVER = false;
|
||||||
|
const PROXY_GREPPR = false;
|
||||||
|
const PROXY_CROWDVIEW = false;
|
||||||
|
const PROXY_MWMBL = false;
|
||||||
|
const PROXY_FTM = false; // findthatmeme
|
||||||
|
const PROXY_IMGUR = false;
|
||||||
|
const PROXY_YANDEX_W = false; // yandex web
|
||||||
|
const PROXY_YANDEX_I = false; // yandex images
|
||||||
|
const PROXY_YANDEX_V = false; // yandex videos
|
||||||
|
|
||||||
|
//
|
||||||
|
// Scraper-specific parameters
|
||||||
|
//
|
||||||
|
|
||||||
|
// GOOGLE CSE & GOOGLE API
|
||||||
|
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
|
||||||
|
|
||||||
|
// MARGINALIA
|
||||||
|
// Use "null" to default out to HTML scraping OR specify a string to
|
||||||
|
// use the API (Eg: "public"). API has less filters.
|
||||||
|
const MARGINALIA_API_KEY = null;
|
||||||
|
}
|
@@ -1,173 +0,0 @@
|
|||||||
<?php
|
|
||||||
class config{
|
|
||||||
// Welcome to the 4get configuration file
|
|
||||||
// When updating your instance, please make sure this file isn't missing
|
|
||||||
// any parameters.
|
|
||||||
|
|
||||||
// 4get version. Please keep this updated
|
|
||||||
const VERSION = 8;
|
|
||||||
|
|
||||||
// Will be shown pretty much everywhere.
|
|
||||||
const SERVER_NAME = "4get";
|
|
||||||
|
|
||||||
// Will be shown in <meta> tag on home page
|
|
||||||
const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
|
|
||||||
|
|
||||||
// Will be shown in server list ping (null for no description)
|
|
||||||
const SERVER_LONG_DESCRIPTION = null;
|
|
||||||
|
|
||||||
// Add your own themes in "static/themes". Set to "Dark" for default theme.
|
|
||||||
// Eg. To use "static/themes/Cream.css", specify "Cream".
|
|
||||||
const DEFAULT_THEME = "black";
|
|
||||||
|
|
||||||
// Enable the API?
|
|
||||||
const API_ENABLED = true;
|
|
||||||
|
|
||||||
//
|
|
||||||
// BOT PROTECTION
|
|
||||||
//
|
|
||||||
|
|
||||||
// 0 = disabled, 1 = ask for image captcha, @TODO: 2 = invite only (users needs a pass)
|
|
||||||
// VERY useful against a targetted attack
|
|
||||||
const BOT_PROTECTION = 0;
|
|
||||||
|
|
||||||
// if BOT_PROTECTION is set to 1, specify the available datasets here
|
|
||||||
// images should be named from 1.png to X.png, and be 100x100 in size
|
|
||||||
// Eg. data/captcha/birds/1.png up to 2263.png
|
|
||||||
const CAPTCHA_DATASET = [
|
|
||||||
// example:
|
|
||||||
//["birds", 2263],
|
|
||||||
//["fumo_plushies", 1006],
|
|
||||||
//["minecraft", 848]
|
|
||||||
];
|
|
||||||
|
|
||||||
// If this regex expression matches on the user agent, it blocks the request
|
|
||||||
// Not useful at all against a targetted attack
|
|
||||||
const HEADER_REGEX = '/bot|wget|curl|python-requests|scrapy|go-http-client|ruby|yahoo|spider|qwant/i';
|
|
||||||
|
|
||||||
// Block clients who present any of the following headers in their request (SPECIFY IN !!lowercase!!)
|
|
||||||
// Eg: ["x-forwarded-for", "x-via", "forwarded-for", "via"];
|
|
||||||
// Useful for blocking *some* proxies used for botting
|
|
||||||
const FILTERED_HEADER_KEYS = [
|
|
||||||
//"x-forwarded-for",
|
|
||||||
//"x-cluster-client-ip",
|
|
||||||
//"x-client-ip",
|
|
||||||
//"x-real-ip",
|
|
||||||
//"client-ip",
|
|
||||||
//"real-ip",
|
|
||||||
//"forwarded-for",
|
|
||||||
//"forwarded-for-ip",
|
|
||||||
//"forwarded",
|
|
||||||
//"proxy-connection",
|
|
||||||
//"remote-addr",
|
|
||||||
//"via"
|
|
||||||
];
|
|
||||||
|
|
||||||
// Block SSL ciphers used by CLI tools used for botting
|
|
||||||
// Basically a primitive version of Cloudflare's browser integrity check
|
|
||||||
// ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
|
|
||||||
// https://git.lolcat.ca/lolcat/4get/docs/apache2.md
|
|
||||||
const DISALLOWED_SSL = [
|
|
||||||
// "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
|
|
||||||
];
|
|
||||||
|
|
||||||
// Maximal number of searches per captcha key/pass issued. Counter gets
|
|
||||||
// reset on every APCU cache clear (should happen once a day).
|
|
||||||
// Only useful when BOT_PROTECTION is NOT set to 0
|
|
||||||
const MAX_SEARCHES = 100;
|
|
||||||
|
|
||||||
// List of domains that point to your servers. Include your tor/i2p
|
|
||||||
// addresses here! Must be a valid URL. Won't affect links placed on
|
|
||||||
// the homepage.
|
|
||||||
const ALT_ADDRESSES = [
|
|
||||||
//"https://4get.alt-tld",
|
|
||||||
//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
|
|
||||||
];
|
|
||||||
|
|
||||||
// Known 4get instances. MUST use the https protocol if your instance uses
|
|
||||||
// it. Is used to generate a distributed list of instances.
|
|
||||||
// To appear in the list of an instance, contact the host and if everyone added
|
|
||||||
// eachother your serber should appear everywhere.
|
|
||||||
const INSTANCES = [
|
|
||||||
"https://4get.ca",
|
|
||||||
"https://4get.zzls.xyz",
|
|
||||||
"https://4getus.zzls.xyz",
|
|
||||||
"https://4get.silly.computer",
|
|
||||||
"https://4get.konakona.moe",
|
|
||||||
"https://4get.lvkaszus.pl",
|
|
||||||
"https://4g.ggtyler.dev",
|
|
||||||
"https://4get.perennialte.ch",
|
|
||||||
"https://4get.sijh.net",
|
|
||||||
"https://4get.hbubli.cc",
|
|
||||||
"https://4get.plunked.party",
|
|
||||||
"https://4get.seitan-ayoub.lol",
|
|
||||||
"https://4get.etenie.pl",
|
|
||||||
"https://4get.lunar.icu",
|
|
||||||
"https://4get.dcs0.hu",
|
|
||||||
"https://4get.kizuki.lol",
|
|
||||||
"https://4get.psily.garden",
|
|
||||||
"https://search.milivojevic.in.rs",
|
|
||||||
"https://4get.snine.nl",
|
|
||||||
"https://4get.datura.network",
|
|
||||||
"https://4get.neco.lol",
|
|
||||||
"https://4get.lol",
|
|
||||||
"https://4get.ch",
|
|
||||||
"https://4get.edmateo.site",
|
|
||||||
"https://4get.sudovanilla.org",
|
|
||||||
"https://search.mint.lgbt"
|
|
||||||
];
|
|
||||||
|
|
||||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
|
||||||
// Changing this might break things.
|
|
||||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
|
|
||||||
|
|
||||||
// Proxy pool assignments for each scraper
|
|
||||||
// false = Use server's raw IP
|
|
||||||
// string = will load a proxy list from data/proxies
|
|
||||||
// Eg. "onion" will load data/proxies/onion.txt
|
|
||||||
const PROXY_DDG = false; // duckduckgo
|
|
||||||
const PROXY_BRAVE = false;
|
|
||||||
const PROXY_FB = false; // facebook
|
|
||||||
const PROXY_GOOGLE = false;
|
|
||||||
const PROXY_GOOGLE_API = false;
|
|
||||||
const PROXY_GOOGLE_CSE = false;
|
|
||||||
const PROXY_STARTPAGE = false;
|
|
||||||
const PROXY_QWANT = false;
|
|
||||||
const PROXY_GHOSTERY = false;
|
|
||||||
const PROXY_MARGINALIA = false;
|
|
||||||
const PROXY_MOJEEK = false;
|
|
||||||
const PROXY_SC = false; // soundcloud
|
|
||||||
const PROXY_SPOTIFY = false;
|
|
||||||
const PROXY_SOLOFIELD = false;
|
|
||||||
const PROXY_WIBY = false;
|
|
||||||
const PROXY_CURLIE = false;
|
|
||||||
const PROXY_YT = false; // youtube
|
|
||||||
const PROXY_YEP = false;
|
|
||||||
const PROXY_PINTEREST = false;
|
|
||||||
const PROXY_SANKAKUCOMPLEX = false;
|
|
||||||
const PROXY_FLICKR = false;
|
|
||||||
const PROXY_FIVEHPX = false;
|
|
||||||
const PROXY_VSCO = false;
|
|
||||||
const PROXY_SEZNAM = false;
|
|
||||||
const PROXY_NAVER = false;
|
|
||||||
const PROXY_GREPPR = false;
|
|
||||||
const PROXY_CROWDVIEW = false;
|
|
||||||
const PROXY_MWMBL = false;
|
|
||||||
const PROXY_FTM = false; // findthatmeme
|
|
||||||
const PROXY_IMGUR = false;
|
|
||||||
const PROXY_YANDEX_W = false; // yandex web
|
|
||||||
const PROXY_YANDEX_I = false; // yandex images
|
|
||||||
const PROXY_YANDEX_V = false; // yandex videos
|
|
||||||
|
|
||||||
//
|
|
||||||
// Scraper-specific parameters
|
|
||||||
//
|
|
||||||
|
|
||||||
// GOOGLE CSE & GOOGLE API
|
|
||||||
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
|
|
||||||
|
|
||||||
// MARGINALIA
|
|
||||||
// Use "null" to default out to HTML scraping OR specify a string to
|
|
||||||
// use the API (Eg: "public"). API has less filters.
|
|
||||||
const MARGINALIA_API_KEY = null;
|
|
||||||
}
|
|
Binary file not shown.
3
src/data/proxies/.gitignore
vendored
3
src/data/proxies/.gitignore
vendored
@@ -1,3 +0,0 @@
|
|||||||
*
|
|
||||||
!.gitignore
|
|
||||||
!onion.txt
|
|
@@ -1,39 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
include "data/config.php";
|
|
||||||
include "lib/frontend.php";
|
|
||||||
$frontend = new frontend();
|
|
||||||
|
|
||||||
echo
|
|
||||||
$frontend->load(
|
|
||||||
"header_nofilters.html",
|
|
||||||
[
|
|
||||||
"title" => "Donate to the project",
|
|
||||||
"class" => " class=\"about\""
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
$left =
|
|
||||||
explode(
|
|
||||||
"\n",
|
|
||||||
file_get_contents("template/donate.html")
|
|
||||||
);
|
|
||||||
|
|
||||||
$out = "";
|
|
||||||
|
|
||||||
foreach($left as $line){
|
|
||||||
|
|
||||||
$out .= trim($line);
|
|
||||||
}
|
|
||||||
|
|
||||||
echo
|
|
||||||
$frontend->load(
|
|
||||||
"search.html",
|
|
||||||
[
|
|
||||||
"timetaken" => null,
|
|
||||||
"class" => "",
|
|
||||||
"right-left" => "",
|
|
||||||
"right-right" => "",
|
|
||||||
"left" => $out
|
|
||||||
]
|
|
||||||
);
|
|
@@ -15,32 +15,38 @@ class favicon{
|
|||||||
|
|
||||||
header("Content-Type: image/png");
|
header("Content-Type: image/png");
|
||||||
|
|
||||||
if(
|
// first check the URL with regex
|
||||||
preg_match(
|
if(preg_match('/^https?:\/\/[A-Za-z0-9.-]+$/', $url) === 0){
|
||||||
'/^https?:\/\/[A-Za-z0-9.-]+$/',
|
|
||||||
$url
|
|
||||||
) === 0
|
|
||||||
){
|
|
||||||
|
|
||||||
header("X-Error: Only provide the protocol and domain");
|
header("X-Error: Only provide the protocol and domain");
|
||||||
$this->defaulticon();
|
$this->defaulticon();
|
||||||
}
|
}
|
||||||
|
|
||||||
$filename = str_replace(["https://", "http://"], "", $url);
|
// validate the URL
|
||||||
header("Content-Disposition: inline; filename=\"{$filename}.png\"");
|
$url = filter_var($url, FILTER_VALIDATE_URL);
|
||||||
|
if(!$url) {
|
||||||
|
header("X-Error: Invalid URL");
|
||||||
|
$this->defaulticon();
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract the hostname
|
||||||
|
$this->filename = parse_url($url, PHP_URL_HOST);
|
||||||
|
if(!$this->filename || is_null($this->filename) || $this->filename === ""){
|
||||||
|
header("X-Error: Invalid URL");
|
||||||
|
$this->defaulticon();
|
||||||
|
}
|
||||||
|
|
||||||
|
// specify the filename in content-disposition
|
||||||
|
header("Content-Disposition: inline; filename=\"{$this->filename}.png\"");
|
||||||
|
|
||||||
include "lib/curlproxy.php";
|
include "lib/curlproxy.php";
|
||||||
$this->proxy = new proxy(false);
|
$this->proxy = new proxy(false);
|
||||||
|
|
||||||
$this->filename = parse_url($url, PHP_URL_HOST);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Check if we have the favicon stored locally
|
Check if we have the favicon stored locally
|
||||||
*/
|
*/
|
||||||
if(file_exists("icons/" . $filename . ".png")){
|
if(file_exists($this->iconpath())){
|
||||||
|
$handle = fopen($this->iconpath(), "r");
|
||||||
$handle = fopen("icons/" . $filename . ".png", "r");
|
echo fread($handle, filesize($this->iconpath()));
|
||||||
echo fread($handle, filesize("icons/" . $filename . ".png"));
|
|
||||||
fclose($handle);
|
fclose($handle);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -249,7 +255,7 @@ class favicon{
|
|||||||
$image = $image->getImageBlob();
|
$image = $image->getImageBlob();
|
||||||
|
|
||||||
// save favicon
|
// save favicon
|
||||||
$handle = fopen("icons/" . $this->filename . ".png", "w");
|
$handle = fopen($this->iconpath(), "w");
|
||||||
fwrite($handle, $image, strlen($image));
|
fwrite($handle, $image, strlen($image));
|
||||||
fclose($handle);
|
fclose($handle);
|
||||||
|
|
||||||
@@ -329,6 +335,11 @@ class favicon{
|
|||||||
return $json["icons"][0]["src"];
|
return $json["icons"][0]["src"];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function iconpath() {
|
||||||
|
// $this->filename can be trusted
|
||||||
|
return config::ICON_DIR . "/" . $this->filename . ".png";
|
||||||
|
}
|
||||||
|
|
||||||
private function favicon404(){
|
private function favicon404(){
|
||||||
|
|
||||||
// fallback to google favicons
|
// fallback to google favicons
|
||||||
@@ -346,7 +357,7 @@ class favicon{
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write favicon from google
|
// write favicon from google
|
||||||
$handle = fopen("icons/" . $this->filename . ".png", "w");
|
$handle = fopen($this->iconpath(), "w");
|
||||||
fwrite($handle, $image["body"], strlen($image["body"]));
|
fwrite($handle, $image["body"], strlen($image["body"]));
|
||||||
fclose($handle);
|
fclose($handle);
|
||||||
|
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 753 B |
100
src/lib/anubis.php
Normal file
100
src/lib/anubis.php
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
//
|
||||||
|
// Reference
|
||||||
|
// https://github.com/TecharoHQ/anubis/blob/ecc716940e34ebe7249974f2789a99a2c7115e4e/web/js/proof-of-work.mjs
|
||||||
|
//
|
||||||
|
|
||||||
|
class anubis{
|
||||||
|
|
||||||
|
public function __construct(){
|
||||||
|
|
||||||
|
include_once "fuckhtml.php";
|
||||||
|
$this->fuckhtml = new fuckhtml();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function scrape($html){
|
||||||
|
|
||||||
|
$this->fuckhtml->load($html);
|
||||||
|
|
||||||
|
$script =
|
||||||
|
$this->fuckhtml
|
||||||
|
->getElementById(
|
||||||
|
"anubis_challenge",
|
||||||
|
"script"
|
||||||
|
);
|
||||||
|
|
||||||
|
if($script === false){
|
||||||
|
|
||||||
|
throw new Exception("Failed to scrape anubis challenge data");
|
||||||
|
}
|
||||||
|
|
||||||
|
$script =
|
||||||
|
json_decode(
|
||||||
|
$this->fuckhtml
|
||||||
|
->getTextContent(
|
||||||
|
$script
|
||||||
|
),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
|
if($script === null){
|
||||||
|
|
||||||
|
throw new Exception("Failed to decode anubis challenge data");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(
|
||||||
|
!isset($script["challenge"]) ||
|
||||||
|
!isset($script["rules"]["difficulty"]) ||
|
||||||
|
!is_int($script["rules"]["difficulty"]) ||
|
||||||
|
!is_string($script["challenge"])
|
||||||
|
){
|
||||||
|
|
||||||
|
throw new Exception("Found invalid challenge data");
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->rape($script["challenge"], $script["rules"]["difficulty"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function is_valid_hash($hash, $difficulty){
|
||||||
|
|
||||||
|
for ($i=0; $i<$difficulty; $i++) {
|
||||||
|
|
||||||
|
$index = (int)floor($i / 2);
|
||||||
|
$nibble = $i % 2;
|
||||||
|
|
||||||
|
$byte = ord($hash[$index]);
|
||||||
|
$nibble = ($byte >> ($nibble === 0 ? 4 : 0)) & 0x0f;
|
||||||
|
|
||||||
|
if($nibble !== 0){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function rape($data, $difficulty = 5){
|
||||||
|
|
||||||
|
$nonce = 0;
|
||||||
|
|
||||||
|
while(true){
|
||||||
|
|
||||||
|
$hash_binary = hash("sha256", $data . $nonce, true);
|
||||||
|
|
||||||
|
if($this->is_valid_hash($hash_binary, $difficulty)){
|
||||||
|
|
||||||
|
$hash_hex = bin2hex($hash_binary);
|
||||||
|
|
||||||
|
return [
|
||||||
|
"response" => $hash_hex,
|
||||||
|
//"data" => $data,
|
||||||
|
//"difficulty" => $difficulty,
|
||||||
|
"nonce" => $nonce
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
$nonce++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -21,7 +21,7 @@ class backend{
|
|||||||
// indent
|
// indent
|
||||||
$proxy_index_raw = apcu_inc("p." . $this->scraper);
|
$proxy_index_raw = apcu_inc("p." . $this->scraper);
|
||||||
|
|
||||||
$proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
|
$proxylist = file_get_contents("data/" . $pool . ".txt");
|
||||||
$proxylist = explode("\n", $proxylist);
|
$proxylist = explode("\n", $proxylist);
|
||||||
|
|
||||||
// ignore empty or commented lines
|
// ignore empty or commented lines
|
||||||
|
@@ -73,7 +73,6 @@ class frontend{
|
|||||||
}
|
}
|
||||||
|
|
||||||
public function loadheader(array $get, array $filters, string $page){
|
public function loadheader(array $get, array $filters, string $page){
|
||||||
|
|
||||||
echo
|
echo
|
||||||
$this->load("header.html", [
|
$this->load("header.html", [
|
||||||
"title" => trim(htmlspecialchars($get["s"]) . " ({$page})"),
|
"title" => trim(htmlspecialchars($get["s"]) . " ({$page})"),
|
||||||
@@ -83,60 +82,6 @@ class frontend{
|
|||||||
"tabs" => $this->generatehtmltabs($page, $get["s"]),
|
"tabs" => $this->generatehtmltabs($page, $get["s"]),
|
||||||
"filters" => $this->generatehtmlfilters($filters, $get)
|
"filters" => $this->generatehtmlfilters($filters, $get)
|
||||||
]);
|
]);
|
||||||
|
|
||||||
$headers_raw = getallheaders();
|
|
||||||
$header_keys = [];
|
|
||||||
$user_agent = "";
|
|
||||||
$bad_header = false;
|
|
||||||
|
|
||||||
// block bots that present X-Forwarded-For, Via, etc
|
|
||||||
foreach($headers_raw as $headerkey => $headervalue){
|
|
||||||
|
|
||||||
$headerkey = strtolower($headerkey);
|
|
||||||
if($headerkey == "user-agent"){
|
|
||||||
|
|
||||||
$user_agent = $headervalue;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// check header key
|
|
||||||
if(in_array($headerkey, config::FILTERED_HEADER_KEYS)){
|
|
||||||
|
|
||||||
$bad_header = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SSL check
|
|
||||||
$bad_ssl = false;
|
|
||||||
if(
|
|
||||||
isset($_SERVER["https"]) &&
|
|
||||||
$_SERVER["https"] == "on" &&
|
|
||||||
isset($_SERVER["SSL_CIPHER"]) &&
|
|
||||||
in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS)
|
|
||||||
){
|
|
||||||
|
|
||||||
$bad_ssl = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(
|
|
||||||
$bad_header === true ||
|
|
||||||
$bad_ssl === true ||
|
|
||||||
$user_agent == "" ||
|
|
||||||
// user agent check
|
|
||||||
preg_match(
|
|
||||||
config::HEADER_REGEX,
|
|
||||||
$user_agent
|
|
||||||
)
|
|
||||||
){
|
|
||||||
|
|
||||||
// bot detected !!
|
|
||||||
$this->drawerror(
|
|
||||||
"Tshh, blocked!",
|
|
||||||
'Your browser, IP or IP range has been blocked from this 4get instance.'
|
|
||||||
);
|
|
||||||
die();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function drawerror($title, $error, $timetaken = null){
|
public function drawerror($title, $error, $timetaken = null){
|
||||||
@@ -178,7 +123,6 @@ class frontend{
|
|||||||
'<li>Remove keywords that could cause errors</li>' .
|
'<li>Remove keywords that could cause errors</li>' .
|
||||||
'<li><a href="/instances?target=' . $target . "&" . $this->buildquery($get, false) . '">Try your search on another 4get instance</a></li>' .
|
'<li><a href="/instances?target=' . $target . "&" . $this->buildquery($get, false) . '">Try your search on another 4get instance</a></li>' .
|
||||||
'</ul><br>' .
|
'</ul><br>' .
|
||||||
'If the error persists, please <a href="/about">contact the administrator</a>.',
|
|
||||||
$timetaken
|
$timetaken
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@@ -719,7 +719,7 @@ class ddg{
|
|||||||
->getTextContent(
|
->getTextContent(
|
||||||
$json["suggestion"]
|
$json["suggestion"]
|
||||||
),
|
),
|
||||||
"correction" => $json["recourseText"]
|
"correction" => html_entity_decode($json["recourseText"])
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -3,7 +3,10 @@
|
|||||||
class marginalia{
|
class marginalia{
|
||||||
public function __construct(){
|
public function __construct(){
|
||||||
|
|
||||||
include "lib/fuckhtml.php";
|
include "lib/anubis.php";
|
||||||
|
$this->anubis = new anubis();
|
||||||
|
|
||||||
|
include_once "lib/fuckhtml.php";
|
||||||
$this->fuckhtml = new fuckhtml();
|
$this->fuckhtml = new fuckhtml();
|
||||||
|
|
||||||
include "lib/backend.php";
|
include "lib/backend.php";
|
||||||
@@ -102,7 +105,40 @@ class marginalia{
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function get($proxy, $url, $get = []){
|
private function get($proxy, $url, $get = [], $get_cookies = 1){
|
||||||
|
|
||||||
|
$curlproc = curl_init();
|
||||||
|
|
||||||
|
switch($get_cookies){
|
||||||
|
|
||||||
|
case 0:
|
||||||
|
$cookies = "";
|
||||||
|
$cookies_tmp = [];
|
||||||
|
curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
|
||||||
|
|
||||||
|
$length = strlen($header);
|
||||||
|
|
||||||
|
$header = explode(":", $header, 2);
|
||||||
|
|
||||||
|
if(trim(strtolower($header[0])) == "set-cookie"){
|
||||||
|
|
||||||
|
$cookie_tmp = explode("=", trim($header[1]), 2);
|
||||||
|
|
||||||
|
$cookies_tmp[trim($cookie_tmp[0])] =
|
||||||
|
explode(";", $cookie_tmp[1], 2)[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $length;
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
$cookies = "";
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
$cookies = "Cookie: " . $get_cookies;
|
||||||
|
}
|
||||||
|
|
||||||
$headers = [
|
$headers = [
|
||||||
"User-Agent: " . config::USER_AGENT,
|
"User-Agent: " . config::USER_AGENT,
|
||||||
@@ -110,6 +146,7 @@ class marginalia{
|
|||||||
"Accept-Language: en-US,en;q=0.5",
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
"Accept-Encoding: gzip",
|
"Accept-Encoding: gzip",
|
||||||
"DNT: 1",
|
"DNT: 1",
|
||||||
|
$cookies,
|
||||||
"Connection: keep-alive",
|
"Connection: keep-alive",
|
||||||
"Upgrade-Insecure-Requests: 1",
|
"Upgrade-Insecure-Requests: 1",
|
||||||
"Sec-Fetch-Dest: document",
|
"Sec-Fetch-Dest: document",
|
||||||
@@ -118,8 +155,6 @@ class marginalia{
|
|||||||
"Sec-Fetch-User: ?1"
|
"Sec-Fetch-User: ?1"
|
||||||
];
|
];
|
||||||
|
|
||||||
$curlproc = curl_init();
|
|
||||||
|
|
||||||
if($get !== []){
|
if($get !== []){
|
||||||
$get = http_build_query($get);
|
$get = http_build_query($get);
|
||||||
$url .= "?" . $get;
|
$url .= "?" . $get;
|
||||||
@@ -145,7 +180,19 @@ class marginalia{
|
|||||||
throw new Exception(curl_error($curlproc));
|
throw new Exception(curl_error($curlproc));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if($get_cookies === 0){
|
||||||
|
|
||||||
|
$cookie = [];
|
||||||
|
|
||||||
|
foreach($cookies_tmp as $key => $value){
|
||||||
|
|
||||||
|
$cookie[] = $key . "=" . $value;
|
||||||
|
}
|
||||||
|
|
||||||
curl_close($curlproc);
|
curl_close($curlproc);
|
||||||
|
return implode(";", $cookie);
|
||||||
|
}
|
||||||
|
|
||||||
return $data;
|
return $data;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -267,6 +314,60 @@ class marginalia{
|
|||||||
// HTML parser
|
// HTML parser
|
||||||
$proxy = $this->backend->get_ip();
|
$proxy = $this->backend->get_ip();
|
||||||
|
|
||||||
|
//
|
||||||
|
// Bypass anubis check
|
||||||
|
//
|
||||||
|
/*
|
||||||
|
if(($anubis_key = apcu_fetch("marginalia_cookie")) === false){
|
||||||
|
|
||||||
|
try{
|
||||||
|
$html =
|
||||||
|
$this->get(
|
||||||
|
$proxy,
|
||||||
|
"https://old-search.marginalia.nu/search",
|
||||||
|
[
|
||||||
|
"query" => $search
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
}catch(Exception $error){
|
||||||
|
|
||||||
|
throw new Exception("Failed to get anubis challenge");
|
||||||
|
}
|
||||||
|
|
||||||
|
try{
|
||||||
|
|
||||||
|
$anubis_data = $this->anubis->scrape($html);
|
||||||
|
}catch(Exception $error){
|
||||||
|
|
||||||
|
throw new Exception($error);
|
||||||
|
}
|
||||||
|
|
||||||
|
// send anubis response & get cookies
|
||||||
|
// https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge?response=0000018966b086834f738bacba6031028adb5aa875974ead197a8b75778baf3a&nonce=39947&redir=https%3A%2F%2Fold-search.marginalia.nu%2F&elapsedTime=1164
|
||||||
|
|
||||||
|
try{
|
||||||
|
|
||||||
|
$anubis_key =
|
||||||
|
$this->get(
|
||||||
|
$proxy,
|
||||||
|
"https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge",
|
||||||
|
[
|
||||||
|
"response" => $anubis_data["response"],
|
||||||
|
"nonce" => $anubis_data["nonce"],
|
||||||
|
"redir" => "https://old-search.marginalia.nu/",
|
||||||
|
"elapsedTime" => random_int(1000, 2000)
|
||||||
|
],
|
||||||
|
0
|
||||||
|
);
|
||||||
|
}catch(Exception $error){
|
||||||
|
|
||||||
|
throw new Exception("Failed to submit anubis challenge");
|
||||||
|
}
|
||||||
|
|
||||||
|
apcu_store("marginalia_cookie", $anubis_key);
|
||||||
|
}*/
|
||||||
|
|
||||||
if($get["npt"]){
|
if($get["npt"]){
|
||||||
|
|
||||||
[$params, $proxy] =
|
[$params, $proxy] =
|
||||||
@@ -279,7 +380,9 @@ class marginalia{
|
|||||||
$html =
|
$html =
|
||||||
$this->get(
|
$this->get(
|
||||||
$proxy,
|
$proxy,
|
||||||
"https://old-search.marginalia.nu/search?" . $params
|
"https://old-search.marginalia.nu/search?" . $params,
|
||||||
|
[],
|
||||||
|
//$anubis_key
|
||||||
);
|
);
|
||||||
}catch(Exception $error){
|
}catch(Exception $error){
|
||||||
|
|
||||||
@@ -309,7 +412,8 @@ class marginalia{
|
|||||||
$this->get(
|
$this->get(
|
||||||
$proxy,
|
$proxy,
|
||||||
"https://old-search.marginalia.nu/search",
|
"https://old-search.marginalia.nu/search",
|
||||||
$params
|
$params,
|
||||||
|
//$anubis_key
|
||||||
);
|
);
|
||||||
}catch(Exception $error){
|
}catch(Exception $error){
|
||||||
|
|
||||||
|
@@ -14,7 +14,7 @@ class yandex{
|
|||||||
// backend included in the scraper functions
|
// backend included in the scraper functions
|
||||||
}
|
}
|
||||||
|
|
||||||
private function get($proxy, $url, $get = [], $nsfw){
|
private function get($proxy, $url, $get = [], $nsfw, $get_cookie = 1){
|
||||||
|
|
||||||
$curlproc = curl_init();
|
$curlproc = curl_init();
|
||||||
|
|
||||||
@@ -25,19 +25,55 @@ class yandex{
|
|||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||||
|
|
||||||
|
// extract "i" cookie
|
||||||
|
if($get_cookie === 0){
|
||||||
|
|
||||||
|
$cookies_tmp = [];
|
||||||
|
curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
|
||||||
|
|
||||||
|
$length = strlen($header);
|
||||||
|
|
||||||
|
$header = explode(":", $header, 2);
|
||||||
|
|
||||||
|
if(trim(strtolower($header[0])) == "set-cookie"){
|
||||||
|
|
||||||
|
$cookie_tmp = explode("=", trim($header[1]), 2);
|
||||||
|
|
||||||
|
$cookies_tmp[trim($cookie_tmp[0])] =
|
||||||
|
explode(";", $cookie_tmp[1], 2)[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $length;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
switch($nsfw){
|
switch($nsfw){
|
||||||
case "yes": $nsfw = "0"; break;
|
case "yes": $nsfw = "0"; break;
|
||||||
case "maybe": $nsfw = "1"; break;
|
case "maybe": $nsfw = "1"; break;
|
||||||
case "no": $nsfw = "2"; break;
|
case "no": $nsfw = "2"; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch($get_cookie){
|
||||||
|
|
||||||
|
case 0:
|
||||||
|
$cookie = "";
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
$cookie = "Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
$cookie = "Cookie: i=" . $get_cookie;
|
||||||
|
}
|
||||||
|
|
||||||
$headers =
|
$headers =
|
||||||
["User-Agent: " . config::USER_AGENT,
|
["User-Agent: " . config::USER_AGENT,
|
||||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||||
"Accept-Encoding: gzip",
|
"Accept-Encoding: gzip",
|
||||||
"Accept-Language: en-US,en;q=0.5",
|
"Accept-Language: en-US,en;q=0.5",
|
||||||
"DNT: 1",
|
"DNT: 1",
|
||||||
"Cookie: yp=" . (time() - 4000033) . ".szm.1:1920x1080:876x1000#" . time() . ".sp.family:" . $nsfw,
|
$cookie,
|
||||||
"Referer: https://yandex.com/images/search",
|
"Referer: https://yandex.com/images/search",
|
||||||
"Connection: keep-alive",
|
"Connection: keep-alive",
|
||||||
"Upgrade-Insecure-Requests: 1",
|
"Upgrade-Insecure-Requests: 1",
|
||||||
@@ -59,6 +95,17 @@ class yandex{
|
|||||||
|
|
||||||
$data = curl_exec($curlproc);
|
$data = curl_exec($curlproc);
|
||||||
|
|
||||||
|
if($get_cookie === 0){
|
||||||
|
|
||||||
|
if(isset($cookies_tmp["i"])){
|
||||||
|
|
||||||
|
return $cookies_tmp["i"];
|
||||||
|
}else{
|
||||||
|
|
||||||
|
throw new Exception("Failed to get Yandex clearance cookie");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(curl_errno($curlproc)){
|
if(curl_errno($curlproc)){
|
||||||
|
|
||||||
throw new Exception(curl_error($curlproc));
|
throw new Exception(curl_error($curlproc));
|
||||||
@@ -217,6 +264,23 @@ class yandex{
|
|||||||
// https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
|
// https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
|
||||||
// &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
|
// &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
|
||||||
|
|
||||||
|
// get clearance cookie
|
||||||
|
if(($cookie = apcu_fetch("yandexweb_cookie")) === false){
|
||||||
|
|
||||||
|
$proxy = $this->backend->get_ip();
|
||||||
|
|
||||||
|
$cookie =
|
||||||
|
$this->get(
|
||||||
|
$proxy,
|
||||||
|
"https://yandex.ru/support2/smart-captcha/ru/",
|
||||||
|
[],
|
||||||
|
false,
|
||||||
|
0
|
||||||
|
);
|
||||||
|
|
||||||
|
apcu_store("yandexweb_cookie", $cookie);
|
||||||
|
}
|
||||||
|
|
||||||
if($get["npt"]){
|
if($get["npt"]){
|
||||||
|
|
||||||
[$npt, $proxy] = $this->backend->get($get["npt"], "web");
|
[$npt, $proxy] = $this->backend->get($get["npt"], "web");
|
||||||
@@ -226,7 +290,8 @@ class yandex{
|
|||||||
$proxy,
|
$proxy,
|
||||||
"https://yandex.com" . $npt,
|
"https://yandex.com" . $npt,
|
||||||
[],
|
[],
|
||||||
"yes"
|
"yes",
|
||||||
|
$cookie
|
||||||
);
|
);
|
||||||
}else{
|
}else{
|
||||||
|
|
||||||
@@ -236,7 +301,7 @@ class yandex{
|
|||||||
throw new Exception("Search term is empty!");
|
throw new Exception("Search term is empty!");
|
||||||
}
|
}
|
||||||
|
|
||||||
$proxy = $this->backend->get_ip();
|
$proxy = !isset($proxy) ? $this->backend->get_ip() : $proxy;
|
||||||
$lang = $get["lang"];
|
$lang = $get["lang"];
|
||||||
$older = $get["older"];
|
$older = $get["older"];
|
||||||
$newer = $get["newer"];
|
$newer = $get["newer"];
|
||||||
@@ -283,7 +348,8 @@ class yandex{
|
|||||||
$proxy,
|
$proxy,
|
||||||
"https://yandex.com/search/site/",
|
"https://yandex.com/search/site/",
|
||||||
$params,
|
$params,
|
||||||
"yes"
|
"yes",
|
||||||
|
$cookie
|
||||||
);
|
);
|
||||||
}catch(Exception $error){
|
}catch(Exception $error){
|
||||||
|
|
||||||
@@ -314,6 +380,19 @@ class yandex{
|
|||||||
|
|
||||||
$this->fuckhtml->load($html);
|
$this->fuckhtml->load($html);
|
||||||
|
|
||||||
|
// Scrape page blocked error
|
||||||
|
$title =
|
||||||
|
$this->fuckhtml
|
||||||
|
->getElementsByTagName("title");
|
||||||
|
|
||||||
|
if(
|
||||||
|
count($title) !== 0 &&
|
||||||
|
$title[0]["innerHTML"] == "403"
|
||||||
|
){
|
||||||
|
|
||||||
|
throw new Exception("Yandex blocked this proxy or 4get instance.");
|
||||||
|
}
|
||||||
|
|
||||||
// get nextpage
|
// get nextpage
|
||||||
$npt =
|
$npt =
|
||||||
$this->fuckhtml
|
$this->fuckhtml
|
||||||
|
@@ -258,7 +258,7 @@ h3,h4,h5,h6{
|
|||||||
font-family:Times;
|
font-family:Times;
|
||||||
width:100%;
|
width:100%;
|
||||||
height:100%;
|
height:100%;
|
||||||
background:var(--282828);
|
background: none;
|
||||||
display:block;
|
display:block;
|
||||||
object-fit:contain;
|
object-fit:contain;
|
||||||
}
|
}
|
||||||
|
@@ -84,7 +84,7 @@ if($results["spelling"]["type"] != "no_correction"){
|
|||||||
'&' .
|
'&' .
|
||||||
$frontend->buildquery($get, true) .
|
$frontend->buildquery($get, true) .
|
||||||
'&spellcheck=no">' .
|
'&spellcheck=no">' .
|
||||||
$results["spelling"]["correction"] .
|
htmlspecialchars($results["spelling"]["correction"]) .
|
||||||
'</a>?' .
|
'</a>?' .
|
||||||
'</div>';
|
'</div>';
|
||||||
}
|
}
|
||||||
|
2
ups.json
2
ups.json
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"upstream": "https://git.lolcat.ca/lolcat/4get",
|
"upstream": "https://git.lolcat.ca/lolcat/4get",
|
||||||
"provider": "gitea",
|
"provider": "gitea",
|
||||||
"commit": "3e1487e614f3bb5d86ddda6da63a39a8cdaadf15",
|
"commit": "430c0a2f0f72f1254ab65d53f13640fe02418f05",
|
||||||
"scripts": [
|
"scripts": [
|
||||||
"s/--- a\\//--- a\\/src\\//g",
|
"s/--- a\\//--- a\\/src\\//g",
|
||||||
"s/+++ b\\//+++ b\\/src\\//g"
|
"s/+++ b\\//+++ b\\/src\\//g"
|
||||||
|
Reference in New Issue
Block a user