clean config, support read-only non-root docker
All checks were successful
docker / docker (push) Successful in 22s

Signed-off-by: ngn <ngn@ngn.tf>
This commit is contained in:
ngn
2025-07-11 15:33:40 +03:00
parent 16f954f1e5
commit 54b3588c3a
12 changed files with 300 additions and 539 deletions

33
.gitignore vendored
View File

@ -1,28 +1,5 @@
lib/test.html
lib/postdata.json
lib/nextpage.json
scraper/brave.html
scraper/yandex.json
scraper/marginalia.json
banner_og/
scraper/mojeek.html
scraper/google.html
scraper/google-img.html
scraper/google-video.html
scraper/google-news.html
scraper/google-img-nextpage.html
scraper/brave-image.html
scraper/brave-video.html
scraper/facebook.html
scraper/facebook-nextpage.json
scraper/yandex-video.json
scraper/yandex.html
scraper/soundcloud.json
scraper/mp3-pm.html
banner/*
data/captcha/birds/
data/captcha/fumo_plushies/
data/captcha/minecraft/
!banner/*default*
scraper/curlie.html
icons/*
/compose.yml
/docker-compose.yml
/banners
/captcha
/config.php

View File

@ -1,24 +1,20 @@
FROM alpine:latest
FROM alpine
RUN apk update
RUN apk upgrade
RUN apk add php apache2-ssl php83-fileinfo php83-openssl \
php83-iconv php83-common php83-dom php83-sodium \
php83-curl curl php83-pecl-apcu php83-apache2 \
imagemagick php83-pecl-imagick php-mbstring \
imagemagick-webp imagemagick-jpeg
RUN apk update && \
apk upgrade && \
apk add \
php php83-fileinfo php83-iconv php83-common php83-dom php83-sodium \
php83-curl php83-pecl-apcu php83-apache2 php-mbstring \
php83-pecl-imagick imagemagick-webp imagemagick-jpeg
COPY ./docker/httpd.conf /etc/apache2/httpd.conf
COPY ./docker/init.sh /
WORKDIR /var/www/html
WORKDIR /srv
COPY ./src ./4get
WORKDIR /var/www/html/4get
COPY ./docker/gen_config.php .
RUN adduser -DSH -u 1000 -h /srv runner
RUN chown -R runner /srv && chmod +x /init.sh
RUN chmod 777 /var/www/html/4get/icons
RUN chmod +x /init.sh
CMD ["/init.sh"]
USER runner
CMD ["/init.sh"]

18
compose.example.yml Normal file
View File

@ -0,0 +1,18 @@
services:
fourget:
container_name: 4get
image: git.ngn.tf/ngn/4get
ports:
- 80:8080
volumes:
- ./config.php:/srv/4get/data/config.php:ro
- ./banners:/srv/4get/banner:ro
- ./captcha:/srv/4get/data/captcha:ro
- type: tmpfs
target: /tmp/icons
cap_drop:
- ALL
security_opt:
- no-new-privileges:true
read_only: true
restart: unless-stopped

View File

@ -1,12 +0,0 @@
services:
fourget:
container_name: 4get
image: git.ngn.tf/ngn/4get
environment:
- FOURGET_SERVER_NAME=example.com
ports:
- 80:80
volumes:
- ./banners:/var/www/html/4get/banner
- ./captcha:/var/www/html/4get/data/captcha
restart: unless-stopped

View File

@ -1,90 +0,0 @@
<?php
include "/var/www/html/4get/data/config.php";
$refl = new ReflectionClass('config');
$from_config = ($refl->getConstants());
$from_env = array();
$env = getenv();
$fourget_env = array_filter($env, function($v, $k) {
return str_starts_with($k, "FOURGET");
}, ARRAY_FILTER_USE_BOTH);
foreach($fourget_env as $key => $val) {
$target_key = preg_replace('/^FOURGET_/', '', $key);
$from_env[$target_key] = trim($val, '\'"');
};
$merged_config = array_merge($from_config, $from_env);
function type_to_string($n) {
$type = gettype($n);
if ($type === "NULL") {
return "null";
}
if ($type === "boolean") {
return $n ? 'true' : 'false';
}
if ($type === "string") {
if(is_numeric($n)) {
return $n;
}
return "\"$n\"";
}
if ($type === "array") {
return json_encode($n, JSON_UNESCAPED_SLASHES);
}
return $n;
}
function detect_captcha_dirs() {
$captcha_dir = "/var/www/html/4get/data/captcha/";
$categories = (array_map(function ($n) {
return explode("/", $n)[7];
}, glob($captcha_dir . "*")));
$result = array_map(function($category) {
return [$category, count(glob("/var/www/html/4get/data/captcha/" . $category . "/*" ))];
}, $categories);
return $result;
}
$special_keys = ["PROTO", "CAPTCHA_DATASET"];
$output = "<?php\n // This file was generated by docker/gen_config.php\n";
$output = $output . "class config {\n";
foreach(($merged_config) as $key => $val){
if(!in_array($key, $special_keys)) {
$stored_value = $val;
// conversion between arrays and comma separated env value.
// Handle case when original type of field is array and there is a type mismatch when a comma separted string is passed,
// then split on comma if string (and not numeric, boolean, null, etc)
//
// except in the case where the inital value in default config is null or boolean. Assuming null and boolean
// in default config will be never be assigned an array
if(gettype($from_config[$key]) != gettype($val) && !is_numeric($val) && !is_null($from_config[$key]) && gettype($from_config[$key]) != "boolean") {
$stored_value = explode(",", $val);
}
$output = $output . "\tconst " . $key . " = " . type_to_string($stored_value) . ";\n";
continue;
}
if($key === "CAPTCHA_DATASET") {
$output = $output . "\tconst " . $key . " = " . type_to_string(detect_captcha_dirs()) . ";\n";
}
}
$output = $output . "}\n";
$output = $output . "?>";
file_put_contents("./data/config.php", $output);
?>

View File

@ -1,16 +1,19 @@
Listen 80
ServerTokens OS
Listen 8080
ServerRoot /var/www
ServerSignature On
ServerName localhost
DocumentRoot "/var/www/html/4get"
ServerSignature Off
ServerTokens Prod
LogLevel error
PidFile /dev/shm/httpd.pid
DocumentRoot /srv/4get
LogLevel error
CustomLog /dev/null common
ErrorLog /dev/null
ErrorLog /dev/stderr
<Directory "/var/www/html/4get">
<Directory /srv/4get>
RewriteEngine On
RewriteCond %{THE_REQUEST} ^\w+\ /(.*)\.php(\?.*)?\ HTTP/
RewriteRule ^ http://%{HTTP_HOST}/%1 [R=301]
@ -22,68 +25,32 @@ ErrorLog /dev/null
</Directory>
# deny access to private resources
<Directory "/var/www/html/4get/data">
<Directory /srv/4get/data>
Require all denied
<Files "*">
<Files *>
Require all denied
</Files>
</Directory>
LoadModule rewrite_module modules/mod_rewrite.so
LoadModule mpm_prefork_module modules/mod_mpm_prefork.so
LoadModule authn_file_module modules/mod_authn_file.so
LoadModule authn_core_module modules/mod_authn_core.so
LoadModule authz_host_module modules/mod_authz_host.so
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
LoadModule authz_user_module modules/mod_authz_user.so
LoadModule authz_core_module modules/mod_authz_core.so
LoadModule rewrite_module modules/mod_rewrite.so
LoadModule mpm_prefork_module modules/mod_mpm_prefork.so
LoadModule authz_core_module modules/mod_authz_core.so
LoadModule access_compat_module modules/mod_access_compat.so
LoadModule auth_basic_module modules/mod_auth_basic.so
LoadModule reqtimeout_module modules/mod_reqtimeout.so
LoadModule filter_module modules/mod_filter.so
LoadModule mime_module modules/mod_mime.so
LoadModule log_config_module modules/mod_log_config.so
LoadModule env_module modules/mod_env.so
LoadModule headers_module modules/mod_headers.so
LoadModule setenvif_module modules/mod_setenvif.so
LoadModule version_module modules/mod_version.so
LoadModule unixd_module modules/mod_unixd.so
LoadModule status_module modules/mod_status.so
LoadModule autoindex_module modules/mod_autoindex.so
LoadModule dir_module modules/mod_dir.so
LoadModule alias_module modules/mod_alias.so
LoadModule negotiation_module modules/mod_negotiation.so
<IfModule unixd_module>
User apache
Group apache
</IfModule>
LoadModule filter_module modules/mod_filter.so
LoadModule mime_module modules/mod_mime.so
LoadModule log_config_module modules/mod_log_config.so
LoadModule unixd_module modules/mod_unixd.so
LoadModule negotiation_module modules/mod_negotiation.so
LoadModule dir_module modules/mod_dir.so
<Directory />
AllowOverride none
Require all denied
</Directory>
<IfModule dir_module>
DirectoryIndex index.html
</IfModule>
<Files ".ht*">
Require all denied
</Files>
<IfModule headers_module>
RequestHeader unset Proxy early
</IfModule>
<IfModule mime_module>
TypesConfig /etc/apache2/mime.types
AddType application/x-compress .Z
AddType application/x-gzip .gz .tgz
</IfModule>
<IfModule mime_magic_module>
MIMEMagicFile /etc/apache2/magic
</IfModule>
IncludeOptional /etc/apache2/conf.d/*.conf
Include /etc/apache2/conf.d/languages.conf
Include /etc/apache2/conf.d/php83-module.conf

View File

@ -1,11 +1,17 @@
#!/bin/sh
set -e
#!/bin/sh -e
if [ ! -f '/var/www/html/4get/data/config.php' ] && [ -f './gen_config.php' ]
then
php ./gen_config.php
rm -f ./gen_config.php
config='/srv/4get/data/config.php'
defconfig='/srv/4get/data/config.def.php'
# check for the configuration file
if [ ! -f "${config}" ]; then
echo "configuration file not specified"
echo "here's the default configuration, modify and mount this to ${config}"
echo
cat "${defconfig}"
exit 1
fi
echo "Starting up apache2"
exec httpd -DFOREGROUND
# execute apache
echo "starting apache web server"
exec httpd -D FOREGROUND

29
src/.gitignore vendored Normal file
View File

@ -0,0 +1,29 @@
lib/test.html
lib/postdata.json
lib/nextpage.json
scraper/brave.html
scraper/yandex.json
scraper/marginalia.json
scraper/mojeek.html
scraper/google.html
scraper/google-img.html
scraper/google-video.html
scraper/google-news.html
scraper/google-img-nextpage.html
scraper/brave-image.html
scraper/brave-video.html
scraper/facebook.html
scraper/facebook-nextpage.json
scraper/yandex-video.json
scraper/yandex.html
scraper/soundcloud.json
scraper/mp3-pm.html
scraper/curlie.html
icons/*
banner/*
!banner/*default*
data/captcha
data/config.php

87
src/data/config.def.php Normal file
View File

@ -0,0 +1,87 @@
<?php
class config{
// Welcome to the 4get configuration file
// When updating your instance, please make sure this file isn't missing
// any parameters.
// 4get version. Please keep this updated
const VERSION = 8;
// Will be shown pretty much everywhere.
const SERVER_NAME = "4get";
// Will be shown in <meta> tag on home page
const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
// Will be shown in server list ping (null for no description)
const SERVER_LONG_DESCRIPTION = null;
// Add your own themes in "static/themes". Set to "Dark" for default theme.
// Eg. To use "static/themes/Cream.css", specify "Cream".
const DEFAULT_THEME = "black";
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
// Temporary directory for saving the page icons
const ICON_DIR = "/tmp/icons";
// List of domains that point to your servers. Include your tor/i2p
// addresses here! Must be a valid URL. Won't affect links placed on
// the homepage.
const ALT_ADDRESSES = [
//"https://4get.alt-tld",
//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
];
// Proxy pool assignments for each scraper
// false = Use server's raw IP
// string = will load a proxy list from data/proxies
// Eg. "onion" will load data/proxies/onion.txt
const PROXY_DDG = false; // duckduckgo
const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false;
const PROXY_GOOGLE_API = false;
const PROXY_GOOGLE_CSE = false;
const PROXY_STARTPAGE = false;
const PROXY_QWANT = false;
const PROXY_GHOSTERY = false;
const PROXY_MARGINALIA = false;
const PROXY_MOJEEK = false;
const PROXY_SC = false; // soundcloud
const PROXY_SPOTIFY = false;
const PROXY_SOLOFIELD = false;
const PROXY_WIBY = false;
const PROXY_CURLIE = false;
const PROXY_YT = false; // youtube
const PROXY_YEP = false;
const PROXY_PINTEREST = false;
const PROXY_SANKAKUCOMPLEX = false;
const PROXY_FLICKR = false;
const PROXY_FIVEHPX = false;
const PROXY_VSCO = false;
const PROXY_SEZNAM = false;
const PROXY_NAVER = false;
const PROXY_GREPPR = false;
const PROXY_CROWDVIEW = false;
const PROXY_MWMBL = false;
const PROXY_FTM = false; // findthatmeme
const PROXY_IMGUR = false;
const PROXY_YANDEX_W = false; // yandex web
const PROXY_YANDEX_I = false; // yandex images
const PROXY_YANDEX_V = false; // yandex videos
//
// Scraper-specific parameters
//
// GOOGLE CSE & GOOGLE API
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
// MARGINALIA
// Use "null" to default out to HTML scraping OR specify a string to
// use the API (Eg: "public"). API has less filters.
const MARGINALIA_API_KEY = null;
}

View File

@ -1,173 +0,0 @@
<?php
class config{
// Welcome to the 4get configuration file
// When updating your instance, please make sure this file isn't missing
// any parameters.
// 4get version. Please keep this updated
const VERSION = 8;
// Will be shown pretty much everywhere.
const SERVER_NAME = "4get";
// Will be shown in <meta> tag on home page
const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
// Will be shown in server list ping (null for no description)
const SERVER_LONG_DESCRIPTION = null;
// Add your own themes in "static/themes". Set to "Dark" for default theme.
// Eg. To use "static/themes/Cream.css", specify "Cream".
const DEFAULT_THEME = "black";
// Enable the API?
const API_ENABLED = true;
//
// BOT PROTECTION
//
// 0 = disabled, 1 = ask for image captcha, @TODO: 2 = invite only (users needs a pass)
// VERY useful against a targetted attack
const BOT_PROTECTION = 0;
// if BOT_PROTECTION is set to 1, specify the available datasets here
// images should be named from 1.png to X.png, and be 100x100 in size
// Eg. data/captcha/birds/1.png up to 2263.png
const CAPTCHA_DATASET = [
// example:
//["birds", 2263],
//["fumo_plushies", 1006],
//["minecraft", 848]
];
// If this regex expression matches on the user agent, it blocks the request
// Not useful at all against a targetted attack
const HEADER_REGEX = '/bot|wget|curl|python-requests|scrapy|go-http-client|ruby|yahoo|spider|qwant/i';
// Block clients who present any of the following headers in their request (SPECIFY IN !!lowercase!!)
// Eg: ["x-forwarded-for", "x-via", "forwarded-for", "via"];
// Useful for blocking *some* proxies used for botting
const FILTERED_HEADER_KEYS = [
//"x-forwarded-for",
//"x-cluster-client-ip",
//"x-client-ip",
//"x-real-ip",
//"client-ip",
//"real-ip",
//"forwarded-for",
//"forwarded-for-ip",
//"forwarded",
//"proxy-connection",
//"remote-addr",
//"via"
];
// Block SSL ciphers used by CLI tools used for botting
// Basically a primitive version of Cloudflare's browser integrity check
// ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
// https://git.lolcat.ca/lolcat/4get/docs/apache2.md
const DISALLOWED_SSL = [
// "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
];
// Maximal number of searches per captcha key/pass issued. Counter gets
// reset on every APCU cache clear (should happen once a day).
// Only useful when BOT_PROTECTION is NOT set to 0
const MAX_SEARCHES = 100;
// List of domains that point to your servers. Include your tor/i2p
// addresses here! Must be a valid URL. Won't affect links placed on
// the homepage.
const ALT_ADDRESSES = [
//"https://4get.alt-tld",
//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
];
// Known 4get instances. MUST use the https protocol if your instance uses
// it. Is used to generate a distributed list of instances.
// To appear in the list of an instance, contact the host and if everyone added
// eachother your serber should appear everywhere.
const INSTANCES = [
"https://4get.ca",
"https://4get.zzls.xyz",
"https://4getus.zzls.xyz",
"https://4get.silly.computer",
"https://4get.konakona.moe",
"https://4get.lvkaszus.pl",
"https://4g.ggtyler.dev",
"https://4get.perennialte.ch",
"https://4get.sijh.net",
"https://4get.hbubli.cc",
"https://4get.plunked.party",
"https://4get.seitan-ayoub.lol",
"https://4get.etenie.pl",
"https://4get.lunar.icu",
"https://4get.dcs0.hu",
"https://4get.kizuki.lol",
"https://4get.psily.garden",
"https://search.milivojevic.in.rs",
"https://4get.snine.nl",
"https://4get.datura.network",
"https://4get.neco.lol",
"https://4get.lol",
"https://4get.ch",
"https://4get.edmateo.site",
"https://4get.sudovanilla.org",
"https://search.mint.lgbt"
];
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
// Proxy pool assignments for each scraper
// false = Use server's raw IP
// string = will load a proxy list from data/proxies
// Eg. "onion" will load data/proxies/onion.txt
const PROXY_DDG = false; // duckduckgo
const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false;
const PROXY_GOOGLE_API = false;
const PROXY_GOOGLE_CSE = false;
const PROXY_STARTPAGE = false;
const PROXY_QWANT = false;
const PROXY_GHOSTERY = false;
const PROXY_MARGINALIA = false;
const PROXY_MOJEEK = false;
const PROXY_SC = false; // soundcloud
const PROXY_SPOTIFY = false;
const PROXY_SOLOFIELD = false;
const PROXY_WIBY = false;
const PROXY_CURLIE = false;
const PROXY_YT = false; // youtube
const PROXY_YEP = false;
const PROXY_PINTEREST = false;
const PROXY_SANKAKUCOMPLEX = false;
const PROXY_FLICKR = false;
const PROXY_FIVEHPX = false;
const PROXY_VSCO = false;
const PROXY_SEZNAM = false;
const PROXY_NAVER = false;
const PROXY_GREPPR = false;
const PROXY_CROWDVIEW = false;
const PROXY_MWMBL = false;
const PROXY_FTM = false; // findthatmeme
const PROXY_IMGUR = false;
const PROXY_YANDEX_W = false; // yandex web
const PROXY_YANDEX_I = false; // yandex images
const PROXY_YANDEX_V = false; // yandex videos
//
// Scraper-specific parameters
//
// GOOGLE CSE & GOOGLE API
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
// MARGINALIA
// Use "null" to default out to HTML scraping OR specify a string to
// use the API (Eg: "public"). API has less filters.
const MARGINALIA_API_KEY = null;
}

View File

@ -15,32 +15,38 @@ class favicon{
header("Content-Type: image/png");
if(
preg_match(
'/^https?:\/\/[A-Za-z0-9.-]+$/',
$url
) === 0
){
// first check the URL with regex
if(preg_match('/^https?:\/\/[A-Za-z0-9.-]+$/', $url) === 0){
header("X-Error: Only provide the protocol and domain");
$this->defaulticon();
}
$filename = str_replace(["https://", "http://"], "", $url);
header("Content-Disposition: inline; filename=\"{$filename}.png\"");
// validate the URL
$url = filter_var($url, FILTER_VALIDATE_URL);
if(!$url) {
header("X-Error: Invalid URL");
$this->defaulticon();
}
// extract the hostname
$this->filename = parse_url($url, PHP_URL_HOST);
if(!$this->filename || is_null($this->filename) || $this->filename === ""){
header("X-Error: Invalid URL");
$this->defaulticon();
}
// specify the filename in content-disposition
header("Content-Disposition: inline; filename=\"{$this->filename}.png\"");
include "lib/curlproxy.php";
$this->proxy = new proxy(false);
$this->filename = parse_url($url, PHP_URL_HOST);
/*
Check if we have the favicon stored locally
*/
if(file_exists("icons/" . $filename . ".png")){
$handle = fopen("icons/" . $filename . ".png", "r");
echo fread($handle, filesize("icons/" . $filename . ".png"));
if(file_exists($this->iconpath())){
$handle = fopen($this->iconpath(), "r");
echo fread($handle, filesize($this->iconpath()));
fclose($handle);
return;
}
@ -249,7 +255,7 @@ class favicon{
$image = $image->getImageBlob();
// save favicon
$handle = fopen("icons/" . $this->filename . ".png", "w");
$handle = fopen($this->iconpath(), "w");
fwrite($handle, $image, strlen($image));
fclose($handle);
@ -329,6 +335,11 @@ class favicon{
return $json["icons"][0]["src"];
}
private function iconpath() {
// $this->filename can be trusted
return config::ICON_DIR . "/" . $this->filename . ".png";
}
private function favicon404(){
// fallback to google favicons
@ -346,7 +357,7 @@ class favicon{
}
// write favicon from google
$handle = fopen("icons/" . $this->filename . ".png", "w");
$handle = fopen($this->iconpath(), "w");
fwrite($handle, $image["body"], strlen($image["body"]));
fclose($handle);

View File

@ -73,7 +73,6 @@ class frontend{
}
public function loadheader(array $get, array $filters, string $page){
echo
$this->load("header.html", [
"title" => trim(htmlspecialchars($get["s"]) . " ({$page})"),
@ -83,60 +82,6 @@ class frontend{
"tabs" => $this->generatehtmltabs($page, $get["s"]),
"filters" => $this->generatehtmlfilters($filters, $get)
]);
$headers_raw = getallheaders();
$header_keys = [];
$user_agent = "";
$bad_header = false;
// block bots that present X-Forwarded-For, Via, etc
foreach($headers_raw as $headerkey => $headervalue){
$headerkey = strtolower($headerkey);
if($headerkey == "user-agent"){
$user_agent = $headervalue;
continue;
}
// check header key
if(in_array($headerkey, config::FILTERED_HEADER_KEYS)){
$bad_header = true;
break;
}
}
// SSL check
$bad_ssl = false;
if(
isset($_SERVER["https"]) &&
$_SERVER["https"] == "on" &&
isset($_SERVER["SSL_CIPHER"]) &&
in_array($_SERVER["SSL_CIPHER"], config::FILTERED_HEADER_KEYS)
){
$bad_ssl = true;
}
if(
$bad_header === true ||
$bad_ssl === true ||
$user_agent == "" ||
// user agent check
preg_match(
config::HEADER_REGEX,
$user_agent
)
){
// bot detected !!
$this->drawerror(
"Tshh, blocked!",
'Your browser, IP or IP range has been blocked from this 4get instance.'
);
die();
}
}
public function drawerror($title, $error, $timetaken = null){