clean config, support read-only non-root docker
All checks were successful
docker / docker (push) Successful in 22s
All checks were successful
docker / docker (push) Successful in 22s
Signed-off-by: ngn <ngn@ngn.tf>
This commit is contained in:
33
.gitignore
vendored
33
.gitignore
vendored
@ -1,28 +1,5 @@
|
||||
lib/test.html
|
||||
lib/postdata.json
|
||||
lib/nextpage.json
|
||||
scraper/brave.html
|
||||
scraper/yandex.json
|
||||
scraper/marginalia.json
|
||||
banner_og/
|
||||
scraper/mojeek.html
|
||||
scraper/google.html
|
||||
scraper/google-img.html
|
||||
scraper/google-video.html
|
||||
scraper/google-news.html
|
||||
scraper/google-img-nextpage.html
|
||||
scraper/brave-image.html
|
||||
scraper/brave-video.html
|
||||
scraper/facebook.html
|
||||
scraper/facebook-nextpage.json
|
||||
scraper/yandex-video.json
|
||||
scraper/yandex.html
|
||||
scraper/soundcloud.json
|
||||
scraper/mp3-pm.html
|
||||
banner/*
|
||||
data/captcha/birds/
|
||||
data/captcha/fumo_plushies/
|
||||
data/captcha/minecraft/
|
||||
!banner/*default*
|
||||
scraper/curlie.html
|
||||
icons/*
|
||||
/compose.yml
|
||||
/docker-compose.yml
|
||||
/banners
|
||||
/captcha
|
||||
/config.php
|
||||
|
26
Dockerfile
26
Dockerfile
@ -1,24 +1,20 @@
|
||||
FROM alpine:latest
|
||||
FROM alpine
|
||||
|
||||
RUN apk update
|
||||
RUN apk upgrade
|
||||
|
||||
RUN apk add php apache2-ssl php83-fileinfo php83-openssl \
|
||||
php83-iconv php83-common php83-dom php83-sodium \
|
||||
php83-curl curl php83-pecl-apcu php83-apache2 \
|
||||
imagemagick php83-pecl-imagick php-mbstring \
|
||||
imagemagick-webp imagemagick-jpeg
|
||||
RUN apk update && \
|
||||
apk upgrade && \
|
||||
apk add \
|
||||
php php83-fileinfo php83-iconv php83-common php83-dom php83-sodium \
|
||||
php83-curl php83-pecl-apcu php83-apache2 php-mbstring \
|
||||
php83-pecl-imagick imagemagick-webp imagemagick-jpeg
|
||||
|
||||
COPY ./docker/httpd.conf /etc/apache2/httpd.conf
|
||||
COPY ./docker/init.sh /
|
||||
|
||||
WORKDIR /var/www/html
|
||||
WORKDIR /srv
|
||||
COPY ./src ./4get
|
||||
|
||||
WORKDIR /var/www/html/4get
|
||||
COPY ./docker/gen_config.php .
|
||||
|
||||
RUN chmod 777 /var/www/html/4get/icons
|
||||
RUN chmod +x /init.sh
|
||||
RUN adduser -DSH -u 1000 -h /srv runner
|
||||
RUN chown -R runner /srv && chmod +x /init.sh
|
||||
|
||||
USER runner
|
||||
CMD ["/init.sh"]
|
||||
|
18
compose.example.yml
Normal file
18
compose.example.yml
Normal file
@ -0,0 +1,18 @@
|
||||
services:
|
||||
fourget:
|
||||
container_name: 4get
|
||||
image: git.ngn.tf/ngn/4get
|
||||
ports:
|
||||
- 80:8080
|
||||
volumes:
|
||||
- ./config.php:/srv/4get/data/config.php:ro
|
||||
- ./banners:/srv/4get/banner:ro
|
||||
- ./captcha:/srv/4get/data/captcha:ro
|
||||
- type: tmpfs
|
||||
target: /tmp/icons
|
||||
cap_drop:
|
||||
- ALL
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
read_only: true
|
||||
restart: unless-stopped
|
@ -1,12 +0,0 @@
|
||||
services:
|
||||
fourget:
|
||||
container_name: 4get
|
||||
image: git.ngn.tf/ngn/4get
|
||||
environment:
|
||||
- FOURGET_SERVER_NAME=example.com
|
||||
ports:
|
||||
- 80:80
|
||||
volumes:
|
||||
- ./banners:/var/www/html/4get/banner
|
||||
- ./captcha:/var/www/html/4get/data/captcha
|
||||
restart: unless-stopped
|
@ -1,90 +0,0 @@
|
||||
<?php
|
||||
|
||||
include "/var/www/html/4get/data/config.php";
|
||||
|
||||
$refl = new ReflectionClass('config');
|
||||
$from_config = ($refl->getConstants());
|
||||
$from_env = array();
|
||||
|
||||
$env = getenv();
|
||||
$fourget_env = array_filter($env, function($v, $k) {
|
||||
return str_starts_with($k, "FOURGET");
|
||||
}, ARRAY_FILTER_USE_BOTH);
|
||||
|
||||
foreach($fourget_env as $key => $val) {
|
||||
$target_key = preg_replace('/^FOURGET_/', '', $key);
|
||||
$from_env[$target_key] = trim($val, '\'"');
|
||||
};
|
||||
|
||||
$merged_config = array_merge($from_config, $from_env);
|
||||
|
||||
function type_to_string($n) {
|
||||
$type = gettype($n);
|
||||
if ($type === "NULL") {
|
||||
return "null";
|
||||
}
|
||||
if ($type === "boolean") {
|
||||
return $n ? 'true' : 'false';
|
||||
}
|
||||
if ($type === "string") {
|
||||
if(is_numeric($n)) {
|
||||
return $n;
|
||||
}
|
||||
return "\"$n\"";
|
||||
}
|
||||
if ($type === "array") {
|
||||
return json_encode($n, JSON_UNESCAPED_SLASHES);
|
||||
}
|
||||
return $n;
|
||||
}
|
||||
|
||||
|
||||
function detect_captcha_dirs() {
|
||||
$captcha_dir = "/var/www/html/4get/data/captcha/";
|
||||
$categories = (array_map(function ($n) {
|
||||
return explode("/", $n)[7];
|
||||
}, glob($captcha_dir . "*")));
|
||||
|
||||
|
||||
$result = array_map(function($category) {
|
||||
return [$category, count(glob("/var/www/html/4get/data/captcha/" . $category . "/*" ))];
|
||||
}, $categories);
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
$special_keys = ["PROTO", "CAPTCHA_DATASET"];
|
||||
|
||||
$output = "<?php\n // This file was generated by docker/gen_config.php\n";
|
||||
|
||||
$output = $output . "class config {\n";
|
||||
foreach(($merged_config) as $key => $val){
|
||||
if(!in_array($key, $special_keys)) {
|
||||
$stored_value = $val;
|
||||
// conversion between arrays and comma separated env value.
|
||||
// Handle case when original type of field is array and there is a type mismatch when a comma separted string is passed,
|
||||
// then split on comma if string (and not numeric, boolean, null, etc)
|
||||
//
|
||||
// except in the case where the inital value in default config is null or boolean. Assuming null and boolean
|
||||
// in default config will be never be assigned an array
|
||||
|
||||
if(gettype($from_config[$key]) != gettype($val) && !is_numeric($val) && !is_null($from_config[$key]) && gettype($from_config[$key]) != "boolean") {
|
||||
$stored_value = explode(",", $val);
|
||||
}
|
||||
$output = $output . "\tconst " . $key . " = " . type_to_string($stored_value) . ";\n";
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if($key === "CAPTCHA_DATASET") {
|
||||
$output = $output . "\tconst " . $key . " = " . type_to_string(detect_captcha_dirs()) . ";\n";
|
||||
}
|
||||
}
|
||||
|
||||
$output = $output . "}\n";
|
||||
$output = $output . "?>";
|
||||
|
||||
file_put_contents("./data/config.php", $output);
|
||||
?>
|
@ -1,16 +1,19 @@
|
||||
Listen 80
|
||||
ServerTokens OS
|
||||
Listen 8080
|
||||
|
||||
ServerRoot /var/www
|
||||
ServerSignature On
|
||||
ServerName localhost
|
||||
|
||||
DocumentRoot "/var/www/html/4get"
|
||||
ServerSignature Off
|
||||
ServerTokens Prod
|
||||
|
||||
PidFile /dev/shm/httpd.pid
|
||||
DocumentRoot /srv/4get
|
||||
|
||||
LogLevel error
|
||||
CustomLog /dev/null common
|
||||
ErrorLog /dev/null
|
||||
ErrorLog /dev/stderr
|
||||
|
||||
<Directory "/var/www/html/4get">
|
||||
<Directory /srv/4get>
|
||||
RewriteEngine On
|
||||
RewriteCond %{THE_REQUEST} ^\w+\ /(.*)\.php(\?.*)?\ HTTP/
|
||||
RewriteRule ^ http://%{HTTP_HOST}/%1 [R=301]
|
||||
@ -22,68 +25,32 @@ ErrorLog /dev/null
|
||||
</Directory>
|
||||
|
||||
# deny access to private resources
|
||||
<Directory "/var/www/html/4get/data">
|
||||
<Directory /srv/4get/data>
|
||||
Require all denied
|
||||
<Files "*">
|
||||
<Files *>
|
||||
Require all denied
|
||||
</Files>
|
||||
</Directory>
|
||||
|
||||
LoadModule rewrite_module modules/mod_rewrite.so
|
||||
LoadModule mpm_prefork_module modules/mod_mpm_prefork.so
|
||||
LoadModule authn_file_module modules/mod_authn_file.so
|
||||
LoadModule authn_core_module modules/mod_authn_core.so
|
||||
LoadModule authz_host_module modules/mod_authz_host.so
|
||||
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
|
||||
LoadModule authz_user_module modules/mod_authz_user.so
|
||||
LoadModule authz_core_module modules/mod_authz_core.so
|
||||
LoadModule access_compat_module modules/mod_access_compat.so
|
||||
LoadModule auth_basic_module modules/mod_auth_basic.so
|
||||
LoadModule reqtimeout_module modules/mod_reqtimeout.so
|
||||
LoadModule filter_module modules/mod_filter.so
|
||||
LoadModule mime_module modules/mod_mime.so
|
||||
LoadModule log_config_module modules/mod_log_config.so
|
||||
LoadModule env_module modules/mod_env.so
|
||||
LoadModule headers_module modules/mod_headers.so
|
||||
LoadModule setenvif_module modules/mod_setenvif.so
|
||||
LoadModule version_module modules/mod_version.so
|
||||
LoadModule unixd_module modules/mod_unixd.so
|
||||
LoadModule status_module modules/mod_status.so
|
||||
LoadModule autoindex_module modules/mod_autoindex.so
|
||||
LoadModule dir_module modules/mod_dir.so
|
||||
LoadModule alias_module modules/mod_alias.so
|
||||
LoadModule negotiation_module modules/mod_negotiation.so
|
||||
|
||||
<IfModule unixd_module>
|
||||
User apache
|
||||
Group apache
|
||||
</IfModule>
|
||||
LoadModule dir_module modules/mod_dir.so
|
||||
|
||||
<Directory />
|
||||
AllowOverride none
|
||||
Require all denied
|
||||
</Directory>
|
||||
|
||||
<IfModule dir_module>
|
||||
DirectoryIndex index.html
|
||||
</IfModule>
|
||||
|
||||
<Files ".ht*">
|
||||
Require all denied
|
||||
</Files>
|
||||
|
||||
<IfModule headers_module>
|
||||
RequestHeader unset Proxy early
|
||||
</IfModule>
|
||||
|
||||
<IfModule mime_module>
|
||||
TypesConfig /etc/apache2/mime.types
|
||||
AddType application/x-compress .Z
|
||||
AddType application/x-gzip .gz .tgz
|
||||
</IfModule>
|
||||
|
||||
<IfModule mime_magic_module>
|
||||
MIMEMagicFile /etc/apache2/magic
|
||||
</IfModule>
|
||||
|
||||
IncludeOptional /etc/apache2/conf.d/*.conf
|
||||
Include /etc/apache2/conf.d/languages.conf
|
||||
Include /etc/apache2/conf.d/php83-module.conf
|
||||
|
@ -1,11 +1,17 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
#!/bin/sh -e
|
||||
|
||||
if [ ! -f '/var/www/html/4get/data/config.php' ] && [ -f './gen_config.php' ]
|
||||
then
|
||||
php ./gen_config.php
|
||||
rm -f ./gen_config.php
|
||||
config='/srv/4get/data/config.php'
|
||||
defconfig='/srv/4get/data/config.def.php'
|
||||
|
||||
# check for the configuration file
|
||||
if [ ! -f "${config}" ]; then
|
||||
echo "configuration file not specified"
|
||||
echo "here's the default configuration, modify and mount this to ${config}"
|
||||
echo
|
||||
cat "${defconfig}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Starting up apache2"
|
||||
# execute apache
|
||||
echo "starting apache web server"
|
||||
exec httpd -D FOREGROUND
|
||||
|
29
src/.gitignore
vendored
Normal file
29
src/.gitignore
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
lib/test.html
|
||||
lib/postdata.json
|
||||
lib/nextpage.json
|
||||
|
||||
scraper/brave.html
|
||||
scraper/yandex.json
|
||||
scraper/marginalia.json
|
||||
scraper/mojeek.html
|
||||
scraper/google.html
|
||||
scraper/google-img.html
|
||||
scraper/google-video.html
|
||||
scraper/google-news.html
|
||||
scraper/google-img-nextpage.html
|
||||
scraper/brave-image.html
|
||||
scraper/brave-video.html
|
||||
scraper/facebook.html
|
||||
scraper/facebook-nextpage.json
|
||||
scraper/yandex-video.json
|
||||
scraper/yandex.html
|
||||
scraper/soundcloud.json
|
||||
scraper/mp3-pm.html
|
||||
scraper/curlie.html
|
||||
|
||||
icons/*
|
||||
banner/*
|
||||
!banner/*default*
|
||||
|
||||
data/captcha
|
||||
data/config.php
|
87
src/data/config.def.php
Normal file
87
src/data/config.def.php
Normal file
@ -0,0 +1,87 @@
|
||||
<?php
|
||||
class config{
|
||||
// Welcome to the 4get configuration file
|
||||
// When updating your instance, please make sure this file isn't missing
|
||||
// any parameters.
|
||||
|
||||
// 4get version. Please keep this updated
|
||||
const VERSION = 8;
|
||||
|
||||
// Will be shown pretty much everywhere.
|
||||
const SERVER_NAME = "4get";
|
||||
|
||||
// Will be shown in <meta> tag on home page
|
||||
const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
|
||||
|
||||
// Will be shown in server list ping (null for no description)
|
||||
const SERVER_LONG_DESCRIPTION = null;
|
||||
|
||||
// Add your own themes in "static/themes". Set to "Dark" for default theme.
|
||||
// Eg. To use "static/themes/Cream.css", specify "Cream".
|
||||
const DEFAULT_THEME = "black";
|
||||
|
||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||
// Changing this might break things.
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
|
||||
|
||||
// Temporary directory for saving the page icons
|
||||
const ICON_DIR = "/tmp/icons";
|
||||
|
||||
// List of domains that point to your servers. Include your tor/i2p
|
||||
// addresses here! Must be a valid URL. Won't affect links placed on
|
||||
// the homepage.
|
||||
const ALT_ADDRESSES = [
|
||||
//"https://4get.alt-tld",
|
||||
//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
|
||||
];
|
||||
|
||||
// Proxy pool assignments for each scraper
|
||||
// false = Use server's raw IP
|
||||
// string = will load a proxy list from data/proxies
|
||||
// Eg. "onion" will load data/proxies/onion.txt
|
||||
const PROXY_DDG = false; // duckduckgo
|
||||
const PROXY_BRAVE = false;
|
||||
const PROXY_FB = false; // facebook
|
||||
const PROXY_GOOGLE = false;
|
||||
const PROXY_GOOGLE_API = false;
|
||||
const PROXY_GOOGLE_CSE = false;
|
||||
const PROXY_STARTPAGE = false;
|
||||
const PROXY_QWANT = false;
|
||||
const PROXY_GHOSTERY = false;
|
||||
const PROXY_MARGINALIA = false;
|
||||
const PROXY_MOJEEK = false;
|
||||
const PROXY_SC = false; // soundcloud
|
||||
const PROXY_SPOTIFY = false;
|
||||
const PROXY_SOLOFIELD = false;
|
||||
const PROXY_WIBY = false;
|
||||
const PROXY_CURLIE = false;
|
||||
const PROXY_YT = false; // youtube
|
||||
const PROXY_YEP = false;
|
||||
const PROXY_PINTEREST = false;
|
||||
const PROXY_SANKAKUCOMPLEX = false;
|
||||
const PROXY_FLICKR = false;
|
||||
const PROXY_FIVEHPX = false;
|
||||
const PROXY_VSCO = false;
|
||||
const PROXY_SEZNAM = false;
|
||||
const PROXY_NAVER = false;
|
||||
const PROXY_GREPPR = false;
|
||||
const PROXY_CROWDVIEW = false;
|
||||
const PROXY_MWMBL = false;
|
||||
const PROXY_FTM = false; // findthatmeme
|
||||
const PROXY_IMGUR = false;
|
||||
const PROXY_YANDEX_W = false; // yandex web
|
||||
const PROXY_YANDEX_I = false; // yandex images
|
||||
const PROXY_YANDEX_V = false; // yandex videos
|
||||
|
||||
//
|
||||
// Scraper-specific parameters
|
||||
//
|
||||
|
||||
// GOOGLE CSE & GOOGLE API
|
||||
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
|
||||
|
||||
// MARGINALIA
|
||||
// Use "null" to default out to HTML scraping OR specify a string to
|
||||
// use the API (Eg: "public"). API has less filters.
|
||||
const MARGINALIA_API_KEY = null;
|
||||
}
|
@ -1,173 +0,0 @@
|
||||
<?php
|
||||
class config{
|
||||
// Welcome to the 4get configuration file
|
||||
// When updating your instance, please make sure this file isn't missing
|
||||
// any parameters.
|
||||
|
||||
// 4get version. Please keep this updated
|
||||
const VERSION = 8;
|
||||
|
||||
// Will be shown pretty much everywhere.
|
||||
const SERVER_NAME = "4get";
|
||||
|
||||
// Will be shown in <meta> tag on home page
|
||||
const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
|
||||
|
||||
// Will be shown in server list ping (null for no description)
|
||||
const SERVER_LONG_DESCRIPTION = null;
|
||||
|
||||
// Add your own themes in "static/themes". Set to "Dark" for default theme.
|
||||
// Eg. To use "static/themes/Cream.css", specify "Cream".
|
||||
const DEFAULT_THEME = "black";
|
||||
|
||||
// Enable the API?
|
||||
const API_ENABLED = true;
|
||||
|
||||
//
|
||||
// BOT PROTECTION
|
||||
//
|
||||
|
||||
// 0 = disabled, 1 = ask for image captcha, @TODO: 2 = invite only (users needs a pass)
|
||||
// VERY useful against a targetted attack
|
||||
const BOT_PROTECTION = 0;
|
||||
|
||||
// if BOT_PROTECTION is set to 1, specify the available datasets here
|
||||
// images should be named from 1.png to X.png, and be 100x100 in size
|
||||
// Eg. data/captcha/birds/1.png up to 2263.png
|
||||
const CAPTCHA_DATASET = [
|
||||
// example:
|
||||
//["birds", 2263],
|
||||
//["fumo_plushies", 1006],
|
||||
//["minecraft", 848]
|
||||
];
|
||||
|
||||
// If this regex expression matches on the user agent, it blocks the request
|
||||
// Not useful at all against a targetted attack
|
||||
const HEADER_REGEX = '/bot|wget|curl|python-requests|scrapy|go-http-client|ruby|yahoo|spider|qwant/i';
|
||||
|
||||
// Block clients who present any of the following headers in their request (SPECIFY IN !!lowercase!!)
|
||||
// Eg: ["x-forwarded-for", "x-via", "forwarded-for", "via"];
|
||||
// Useful for blocking *some* proxies used for botting
|
||||
const FILTERED_HEADER_KEYS = [
|
||||
//"x-forwarded-for",
|
||||
//"x-cluster-client-ip",
|
||||
//"x-client-ip",
|
||||
//"x-real-ip",
|
||||
//"client-ip",
|
||||
//"real-ip",
|
||||
//"forwarded-for",
|
||||
//"forwarded-for-ip",
|
||||
//"forwarded",
|
||||
//"proxy-connection",
|
||||
//"remote-addr",
|
||||
//"via"
|
||||
];
|
||||
|
||||
// Block SSL ciphers used by CLI tools used for botting
|
||||
// Basically a primitive version of Cloudflare's browser integrity check
|
||||
// ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
|
||||
// https://git.lolcat.ca/lolcat/4get/docs/apache2.md
|
||||
const DISALLOWED_SSL = [
|
||||
// "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
|
||||
];
|
||||
|
||||
// Maximal number of searches per captcha key/pass issued. Counter gets
|
||||
// reset on every APCU cache clear (should happen once a day).
|
||||
// Only useful when BOT_PROTECTION is NOT set to 0
|
||||
const MAX_SEARCHES = 100;
|
||||
|
||||
// List of domains that point to your servers. Include your tor/i2p
|
||||
// addresses here! Must be a valid URL. Won't affect links placed on
|
||||
// the homepage.
|
||||
const ALT_ADDRESSES = [
|
||||
//"https://4get.alt-tld",
|
||||
//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
|
||||
];
|
||||
|
||||
// Known 4get instances. MUST use the https protocol if your instance uses
|
||||
// it. Is used to generate a distributed list of instances.
|
||||
// To appear in the list of an instance, contact the host and if everyone added
|
||||
// eachother your serber should appear everywhere.
|
||||
const INSTANCES = [
|
||||
"https://4get.ca",
|
||||
"https://4get.zzls.xyz",
|
||||
"https://4getus.zzls.xyz",
|
||||
"https://4get.silly.computer",
|
||||
"https://4get.konakona.moe",
|
||||
"https://4get.lvkaszus.pl",
|
||||
"https://4g.ggtyler.dev",
|
||||
"https://4get.perennialte.ch",
|
||||
"https://4get.sijh.net",
|
||||
"https://4get.hbubli.cc",
|
||||
"https://4get.plunked.party",
|
||||
"https://4get.seitan-ayoub.lol",
|
||||
"https://4get.etenie.pl",
|
||||
"https://4get.lunar.icu",
|
||||
"https://4get.dcs0.hu",
|
||||
"https://4get.kizuki.lol",
|
||||
"https://4get.psily.garden",
|
||||
"https://search.milivojevic.in.rs",
|
||||
"https://4get.snine.nl",
|
||||
"https://4get.datura.network",
|
||||
"https://4get.neco.lol",
|
||||
"https://4get.lol",
|
||||
"https://4get.ch",
|
||||
"https://4get.edmateo.site",
|
||||
"https://4get.sudovanilla.org",
|
||||
"https://search.mint.lgbt"
|
||||
];
|
||||
|
||||
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
|
||||
// Changing this might break things.
|
||||
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
|
||||
|
||||
// Proxy pool assignments for each scraper
|
||||
// false = Use server's raw IP
|
||||
// string = will load a proxy list from data/proxies
|
||||
// Eg. "onion" will load data/proxies/onion.txt
|
||||
const PROXY_DDG = false; // duckduckgo
|
||||
const PROXY_BRAVE = false;
|
||||
const PROXY_FB = false; // facebook
|
||||
const PROXY_GOOGLE = false;
|
||||
const PROXY_GOOGLE_API = false;
|
||||
const PROXY_GOOGLE_CSE = false;
|
||||
const PROXY_STARTPAGE = false;
|
||||
const PROXY_QWANT = false;
|
||||
const PROXY_GHOSTERY = false;
|
||||
const PROXY_MARGINALIA = false;
|
||||
const PROXY_MOJEEK = false;
|
||||
const PROXY_SC = false; // soundcloud
|
||||
const PROXY_SPOTIFY = false;
|
||||
const PROXY_SOLOFIELD = false;
|
||||
const PROXY_WIBY = false;
|
||||
const PROXY_CURLIE = false;
|
||||
const PROXY_YT = false; // youtube
|
||||
const PROXY_YEP = false;
|
||||
const PROXY_PINTEREST = false;
|
||||
const PROXY_SANKAKUCOMPLEX = false;
|
||||
const PROXY_FLICKR = false;
|
||||
const PROXY_FIVEHPX = false;
|
||||
const PROXY_VSCO = false;
|
||||
const PROXY_SEZNAM = false;
|
||||
const PROXY_NAVER = false;
|
||||
const PROXY_GREPPR = false;
|
||||
const PROXY_CROWDVIEW = false;
|
||||
const PROXY_MWMBL = false;
|
||||
const PROXY_FTM = false; // findthatmeme
|
||||
const PROXY_IMGUR = false;
|
||||
const PROXY_YANDEX_W = false; // yandex web
|
||||
const PROXY_YANDEX_I = false; // yandex images
|
||||
const PROXY_YANDEX_V = false; // yandex videos
|
||||
|
||||
//
|
||||
// Scraper-specific parameters
|
||||
//
|
||||
|
||||
// GOOGLE CSE & GOOGLE API
|
||||
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
|
||||
|
||||
// MARGINALIA
|
||||
// Use "null" to default out to HTML scraping OR specify a string to
|
||||
// use the API (Eg: "public"). API has less filters.
|
||||
const MARGINALIA_API_KEY = null;
|
||||
}
|
@ -15,32 +15,38 @@ class favicon{
|
||||
|
||||
header("Content-Type: image/png");
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/^https?:\/\/[A-Za-z0-9.-]+$/',
|
||||
$url
|
||||
) === 0
|
||||
){
|
||||
|
||||
// first check the URL with regex
|
||||
if(preg_match('/^https?:\/\/[A-Za-z0-9.-]+$/', $url) === 0){
|
||||
header("X-Error: Only provide the protocol and domain");
|
||||
$this->defaulticon();
|
||||
}
|
||||
|
||||
$filename = str_replace(["https://", "http://"], "", $url);
|
||||
header("Content-Disposition: inline; filename=\"{$filename}.png\"");
|
||||
// validate the URL
|
||||
$url = filter_var($url, FILTER_VALIDATE_URL);
|
||||
if(!$url) {
|
||||
header("X-Error: Invalid URL");
|
||||
$this->defaulticon();
|
||||
}
|
||||
|
||||
// extract the hostname
|
||||
$this->filename = parse_url($url, PHP_URL_HOST);
|
||||
if(!$this->filename || is_null($this->filename) || $this->filename === ""){
|
||||
header("X-Error: Invalid URL");
|
||||
$this->defaulticon();
|
||||
}
|
||||
|
||||
// specify the filename in content-disposition
|
||||
header("Content-Disposition: inline; filename=\"{$this->filename}.png\"");
|
||||
|
||||
include "lib/curlproxy.php";
|
||||
$this->proxy = new proxy(false);
|
||||
|
||||
$this->filename = parse_url($url, PHP_URL_HOST);
|
||||
|
||||
/*
|
||||
Check if we have the favicon stored locally
|
||||
*/
|
||||
if(file_exists("icons/" . $filename . ".png")){
|
||||
|
||||
$handle = fopen("icons/" . $filename . ".png", "r");
|
||||
echo fread($handle, filesize("icons/" . $filename . ".png"));
|
||||
if(file_exists($this->iconpath())){
|
||||
$handle = fopen($this->iconpath(), "r");
|
||||
echo fread($handle, filesize($this->iconpath()));
|
||||
fclose($handle);
|
||||
return;
|
||||
}
|
||||
@ -249,7 +255,7 @@ class favicon{
|
||||
$image = $image->getImageBlob();
|
||||
|
||||
// save favicon
|
||||
$handle = fopen("icons/" . $this->filename . ".png", "w");
|
||||
$handle = fopen($this->iconpath(), "w");
|
||||
fwrite($handle, $image, strlen($image));
|
||||
fclose($handle);
|
||||
|
||||
@ -329,6 +335,11 @@ class favicon{
|
||||
return $json["icons"][0]["src"];
|
||||
}
|
||||
|
||||
private function iconpath() {
|
||||
// $this->filename can be trusted
|
||||
return config::ICON_DIR . "/" . $this->filename . ".png";
|
||||
}
|
||||
|
||||
private function favicon404(){
|
||||
|
||||
// fallback to google favicons
|
||||
@ -346,7 +357,7 @@ class favicon{
|
||||
}
|
||||
|
||||
// write favicon from google
|
||||
$handle = fopen("icons/" . $this->filename . ".png", "w");
|
||||
$handle = fopen($this->iconpath(), "w");
|
||||
fwrite($handle, $image["body"], strlen($image["body"]));
|
||||
fclose($handle);
|
||||
|
||||
|
@ -73,7 +73,6 @@ class frontend{
|
||||
}
|
||||
|
||||
public function loadheader(array $get, array $filters, string $page){
|
||||
|
||||
echo
|
||||
$this->load("header.html", [
|
||||
"title" => trim(htmlspecialchars($get["s"]) . " ({$page})"),
|
||||
@ -83,60 +82,6 @@ class frontend{
|
||||
"tabs" => $this->generatehtmltabs($page, $get["s"]),
|
||||
"filters" => $this->generatehtmlfilters($filters, $get)
|
||||
]);
|
||||
|
||||
$headers_raw = getallheaders();
|
||||
$header_keys = [];
|
||||
$user_agent = "";
|
||||
$bad_header = false;
|
||||
|
||||
// block bots that present X-Forwarded-For, Via, etc
|
||||
foreach($headers_raw as $headerkey => $headervalue){
|
||||
|
||||
$headerkey = strtolower($headerkey);
|
||||
if($headerkey == "user-agent"){
|
||||
|
||||
$user_agent = $headervalue;
|
||||
continue;
|
||||
}
|
||||
|
||||
// check header key
|
||||
if(in_array($headerkey, config::FILTERED_HEADER_KEYS)){
|
||||
|
||||
$bad_header = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// SSL check
|
||||
$bad_ssl = false;
|
||||
if(
|
||||
isset($_SERVER["https"]) &&
|
||||
$_SERVER["https"] == "on" &&
|
||||
isset($_SERVER["SSL_CIPHER"]) &&
|
||||
in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS)
|
||||
){
|
||||
|
||||
$bad_ssl = true;
|
||||
}
|
||||
|
||||
if(
|
||||
$bad_header === true ||
|
||||
$bad_ssl === true ||
|
||||
$user_agent == "" ||
|
||||
// user agent check
|
||||
preg_match(
|
||||
config::HEADER_REGEX,
|
||||
$user_agent
|
||||
)
|
||||
){
|
||||
|
||||
// bot detected !!
|
||||
$this->drawerror(
|
||||
"Tshh, blocked!",
|
||||
'Your browser, IP or IP range has been blocked from this 4get instance.'
|
||||
);
|
||||
die();
|
||||
}
|
||||
}
|
||||
|
||||
public function drawerror($title, $error, $timetaken = null){
|
||||
|
Reference in New Issue
Block a user