general cleanup
All checks were successful
Build and publish the docker image / build (push) Successful in 18s

Signed-off-by: ngn <ngn@ngn.tf>
This commit is contained in:
ngn
2025-01-19 06:42:44 +03:00
parent 033e4cb959
commit ce81a54de1
147 changed files with 90 additions and 2222 deletions

39
src/about.php Normal file
View File

@ -0,0 +1,39 @@
<?php
include "data/config.php";
include "lib/frontend.php";
$frontend = new frontend();
echo
$frontend->load(
"header_nofilters.html",
[
"title" => "About",
"class" => " class=\"about\""
]
);
$left =
explode(
"\n",
file_get_contents("template/about.html")
);
$out = "";
foreach($left as $line){
$out .= trim($line);
}
echo
$frontend->load(
"search.html",
[
"timetaken" => null,
"class" => "",
"right-left" => "",
"right-right" => "",
"left" => $out
]
);

27
src/ami4get.php Normal file
View File

@ -0,0 +1,27 @@
<?php
header("Content-Type: application/json");
header("Access-Control-Allow-Origin: *");
include "data/config.php";
$real_requests = apcu_fetch("real_requests");
$bot_requests = apcu_fetch("captcha_gen");
echo json_encode(
[
"status" => "ok",
"service" => "4get",
"server" => [
"name" => config::SERVER_NAME,
"description" => config::SERVER_LONG_DESCRIPTION,
"bot_protection" => config::BOT_PROTECTION,
"real_requests" => $real_requests === false ? 0 : $real_requests,
"bot_requests" => $bot_requests === false ? 0 : $bot_requests,
"api_enabled" => config::API_ENABLED,
"alt_addresses" => config::ALT_ADDRESSES,
"version" => config::VERSION
],
"instances" => config::INSTANCES
]
);

10
src/api/index.php Normal file
View File

@ -0,0 +1,10 @@
<?php
header("Content-Type: application/json");
http_response_code(404);
echo json_encode(
[
"status" => "Unknown endpoint"
]
);

243
src/api/v1/ac.php Normal file
View File

@ -0,0 +1,243 @@
<?php
include "../../data/config.php";
new autocomplete();
class autocomplete{
public function __construct(){
header("Content-Type: application/json");
$this->scrapers = [
"brave" => "https://search.brave.com/api/suggest?q={searchTerms}",
"ddg" => "https://duckduckgo.com/ac/?q={searchTerms}&type=list",
"yandex" => "https://suggest.yandex.com/suggest-ff.cgi?part={searchTerms}&uil=en&v=3&sn=5&lr=21276&yu=4861394161661655015",
"google" => "https://www.google.com/complete/search?client=mobile-gws-lite&q={searchTerms}",
"qwant" => "https://api.qwant.com/v3/suggest/?q={searchTerms}&client=opensearch",
"yep" => "https://api.yep.com/ac/?query={searchTerms}",
"marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}",
"yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}",
"sc" => "",
"startpage" => "https://www.startpage.com/suggestions?q={searchTerms}&format=opensearch&segment=startpage.defaultffx&lui=english",
"kagi" => "https://kagi.com/api/autosuggest?q={searchTerms}",
"ghostery" => "https://ghosterysearch.com/suggest?q={searchTerms}"
];
/*
Sanitize input
*/
if(!isset($_GET["s"])){
$this->do404("Missing search(s) parameter");
}
if(is_string($_GET["s"]) === false){
$this->do404("Invalid search(s) parameter");
}
if(strlen($_GET["s"]) > 500){
$this->do404("Search(s) exceeds the 500 char length");
}
/*
Get $scraper
*/
if(!isset($_GET["scraper"])){
if(isset($_COOKIE["scraper_ac"])){
$scraper = $_COOKIE["scraper_ac"];
}else{
$scraper = "brave"; // default option
}
}else{
$scraper = $_GET["scraper"];
}
if($scraper == "disabled"){
// this shouldnt happen, but let's handle it anyways
$this->doempty();
}
// make sure it exists
if(!isset($this->scrapers[$scraper])){
$scraper = "brave"; // default option
}
// return results
switch($scraper){
case "google":
case "yt":
// handle google cause they want to be a special snowflake :(
$js = $this->get($this->scrapers[$scraper], $_GET["s"]);
preg_match(
'/\((\[.*\])\)/',
$js,
$js
);
if(!isset($js[1])){
$this->doempty();
}
$js = json_decode($js[1]);
$json = [];
foreach($js[1] as $item){
$json[] = htmlspecialchars_decode(strip_tags($item[0]));
}
echo json_encode(
[
$_GET["s"],
$json
],
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE
);
break;
case "sc":
// soundcloud
chdir("../../");
include "scraper/sc.php";
$sc = new sc();
$token = $sc->get_token("raw_ip::::");
$js = $this->get(
"https://api-v2.soundcloud.com/search/queries?q={searchTerms}&client_id=" . $token . "&limit=10&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en",
$_GET["s"]
);
$js = json_decode($js, true);
if(!isset($js["collection"])){
$this->doempty();
}
$json = [];
foreach($js["collection"] as $item){
$json[] = $item["query"];
}
echo json_encode(
[
$_GET["s"],
$json
],
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE
);
break;
case "marginalia":
$json = $this->get($this->scrapers[$scraper], $_GET["s"]);
$json = json_decode($json, true);
if($json === null){
$this->doempty();
}
echo json_encode(
[
$_GET["s"],
$json
],
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE
);
break;
default:
// if it respects the openSearch protocol
$json = json_decode($this->get($this->scrapers[$scraper], $_GET["s"]), true);
echo json_encode(
[
$_GET["s"],
$json[1] // ensure it contains valid key 0
],
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE
);
break;
}
}
private function get($url, $query){
try{
$curlproc = curl_init();
$url = str_replace("{searchTerms}", urlencode($query), $url);
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
"Accept: application/json, text/javascript, */*; q=0.01",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}catch(Exception $error){
do404("Curl error: " . $error->getMessage());
}
}
private function do404($error){
echo json_encode(
["error" => $error],
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE
);
die();
}
private function doempty(){
echo json_encode(
[
$_GET["s"],
[]
],
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_IGNORE
);
die();
}
}

10
src/api/v1/index.php Normal file
View File

@ -0,0 +1,10 @@
<?php
header("Content-Type: application/json");
http_response_code(404);
echo json_encode(
[
"status" => "Unknown endpoint"
]
);

20
src/audio/linear.php Normal file
View File

@ -0,0 +1,20 @@
<?php
if(!isset($_GET["s"])){
http_response_code(404);
header("X-Error: No SOUND(s) provided!");
die();
}
include "../data/config.php";
include "../lib/curlproxy.php";
$proxy = new proxy();
try{
$proxy->stream_linear_audio($_GET["s"]);
}catch(Exception $error){
header("X-Error: " . $error->getMessage());
}

224
src/audio/sc.php Normal file
View File

@ -0,0 +1,224 @@
<?php
new sc_audio();
class sc_audio{
public function __construct(){
include "../data/config.php";
include "../lib/curlproxy.php";
$this->proxy = new proxy();
if(isset($_GET["u"])){
/*
we're now proxying audio
*/
$viewkey = $_GET["u"];
if(!isset($_GET["r"])){
$this->do404("Ranges(r) are missing");
}
$ranges = explode(",", $_GET["r"]);
// sanitize ranges
foreach($ranges as &$range){
if(!is_numeric($range)){
$this->do404("Invalid range specified");
}
$range = (int)$range;
}
// sort ranges (just to make sure)
sort($ranges);
// convert ranges to pairs
$last = -1;
foreach($ranges as &$r){
$tmp = $r;
$r = [$last + 1, $r];
$last = $tmp;
}
$browser_headers = getallheaders();
// get the requested range from client
$client_range = 0;
foreach($browser_headers as $key => $value){
if(strtolower($key) == "range"){
preg_match(
'/bytes=([0-9]+)/',
$value,
$client_regex
);
if(isset($client_regex[1])){
$client_range = (int)$client_regex[1];
}else{
$client_range = 0;
}
break;
}
}
if(
$client_range < 0 ||
$client_range > $ranges[count($ranges) - 1][1]
){
// range is not satisfiable
http_response_code(416);
header("Content-Type: text/plain");
die();
}
$rng = null;
for($i=0; $i<count($ranges); $i++){
if($ranges[$i][0] <= $client_range){
$rng = $ranges[$i];
}
}
// proxy data!
http_response_code(206); // partial content
header("Accept-Ranges: bytes");
header("Content-Range: bytes {$rng[0]}-{$rng[1]}/" . ($ranges[count($ranges) - 1][1] + 1));
$viewkey =
preg_replace(
'/\/media\/([0-9]+)\/[0-9]+\/[0-9]+/',
'/media/$1/' . $rng[0] . '/' . $rng[1],
$viewkey
);
try{
$this->proxy->stream_linear_audio(
$viewkey
);
}catch(Exception $error){
$this->do404("Could not read stream");
}
die();
}
/*
redirect user to correct resource
we need to scrape and store the byte positions in the result URL
*/
if(!isset($_GET["s"])){
$this->do404("The URL(s) parameter is missing");
}
$viewkey = $_GET["s"];
if(
preg_match(
'/soundcloud\.com$/',
parse_url($viewkey, PHP_URL_HOST)
) === false
){
$this->do404("This endpoint can only be used for soundcloud streams");
}
try{
$json = $this->proxy->get($viewkey)["body"];
}catch(Exception $error){
$this->do404("Curl error: " . $error->getMessage());
}
$json = json_decode($json, true);
if(!isset($json["url"])){
$this->do404("Could not get URL from JSON");
}
$viewkey = $json["url"];
$m3u8 = $this->proxy->get($viewkey)["body"];
$m3u8 = explode("\n", $m3u8);
$lineout = null;
$streampos_arr = [];
foreach($m3u8 as $line){
$line = trim($line);
if($line[0] == "#"){
continue;
}
if($lineout === null){
$lineout = $line;
}
preg_match(
'/\/media\/[0-9]+\/([0-9]+)\/([0-9]+)/',
$line,
$matches
);
if(isset($matches[0])){
$streampos_arr[] = [
(int)$matches[1],
(int)$matches[2]
];
}
}
if($lineout === null){
$this->do404("Could not get stream URL");
}
$lineout =
preg_replace(
'/\/media\/([0-9]+)\/[0-9]+\/[0-9]+/',
'/media/$1/0/0',
$lineout
);
$streampos = [];
foreach($streampos_arr as $pos){
$streampos[] = $pos[1];
}
$streampos = implode(",", $streampos);
header("Location: /audio/sc?u=" . urlencode($lineout) . "&r=$streampos");
header("Accept-Ranges: bytes");
}
private function do404($error){
http_response_code(404);
header("Content-Type: text/plain");
header("X-Error: $error");
die();
}
}

20
src/audio/seekable.php Normal file
View File

@ -0,0 +1,20 @@
<?php
if(!isset($_GET["s"])){
http_response_code(404);
header("X-Error: No SOUND(s) provided!");
die();
}
include "../data/config.php";
include "../lib/curlproxy.php";
$proxy = new proxy();
try{
$proxy->stream_linear_audio($_GET["s"]);
}catch(Exception $error){
header("X-Error: " . $error->getMessage());
}

214
src/audio/spotify.php Normal file
View File

@ -0,0 +1,214 @@
<?php
include "../data/config.php";
new spotify();
class spotify{
public function __construct(){
include "../lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
if(
!isset($_GET["s"]) ||
!preg_match(
'/^(track|episode)\.([A-Za-z0-9]{22})$/',
$_GET["s"],
$matches
)
){
$this->do404("The track ID(s) parameter is missing or invalid");
}
try{
if($matches[1] == "episode"){
$uri = "show";
}else{
$uri = $matches[1];
}
$embed =
$this->get("https://embed.spotify.com/{$uri}/" . $matches[2]);
}catch(Exception $error){
$this->do404("Failed to fetch embed data");
}
$this->fuckhtml->load($embed);
$json =
$this->fuckhtml
->getElementById(
"__NEXT_DATA__",
"script"
);
if($json === null){
$this->do404("Failed to extract JSON");
}
$json =
json_decode($json["innerHTML"], true);
if($json === null){
$this->do404("Failed to decode JSON");
}
switch($matches[1]){
case "track":
if(
isset(
$json
["props"]
["pageProps"]
["state"]
["data"]
["entity"]
["audioPreview"]
["url"]
)
){
header("Content-type: audio/mpeg");
header(
"Location: /audio/linear?s=" .
urlencode(
$json
["props"]
["pageProps"]
["state"]
["data"]
["entity"]
["audioPreview"]
["url"]
)
);
}else{
$this->do404("Could not extract playback URL");
}
break;
case "episode":
if(
isset(
$json
["props"]
["pageProps"]
["state"]
["data"]
["entity"]
["id"]
)
){
try{
$json =
$this->get(
"https://spclient.wg.spotify.com/soundfinder/v1/unauth/episode/" .
$json
["props"]
["pageProps"]
["state"]
["data"]
["entity"]
["id"] .
"/com.widevine.alpha"
);
}catch(Exception $error){
$this->do404("Failed to fetch audio resource");
}
$json = json_decode($json, true);
if($json === null){
$this->do404("Failed to decode audio resource JSON");
}
if(
isset($json["passthrough"]) &&
$json["passthrough"] == "ALLOWED" &&
isset($json["passthroughUrl"])
){
header(
"Location:" .
"/audio/linear.php?s=" .
urlencode(
str_replace(
"http://",
"https://",
$json["passthroughUrl"]
)
)
);
}else{
$this->do404("Failed to find passthroughUrl");
}
}else{
$this->do404("Failed to find episode ID");
}
break;
}
}
private function get($url){
$headers = [
"User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"
];
$curlproc = curl_init();
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
private function do404($error){
http_response_code(404);
header("Content-Type: text/plain");
header("X-Error: $error");
die();
}
}

BIN
src/banner/4get-default.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

170
src/data/config.php Normal file
View File

@ -0,0 +1,170 @@
<?php
class config{
// Welcome to the 4get configuration file
// When updating your instance, please make sure this file isn't missing
// any parameters.
// 4get version. Please keep this updated
const VERSION = 8;
// Will be shown pretty much everywhere.
const SERVER_NAME = "4get";
// Will be shown in <meta> tag on home page
const SERVER_SHORT_DESCRIPTION = "4get is a proxy search engine that doesn't suck.";
// Will be shown in server list ping (null for no description)
const SERVER_LONG_DESCRIPTION = null;
// Add your own themes in "static/themes". Set to "Dark" for default theme.
// Eg. To use "static/themes/Cream.css", specify "Cream".
const DEFAULT_THEME = "Dark";
// Enable the API?
const API_ENABLED = true;
//
// BOT PROTECTION
//
// 0 = disabled, 1 = ask for image captcha, @TODO: 2 = invite only (users needs a pass)
// VERY useful against a targetted attack
const BOT_PROTECTION = 0;
// if BOT_PROTECTION is set to 1, specify the available datasets here
// images should be named from 1.png to X.png, and be 100x100 in size
// Eg. data/captcha/birds/1.png up to 2263.png
const CAPTCHA_DATASET = [
// example:
//["birds", 2263],
//["fumo_plushies", 1006],
//["minecraft", 848]
];
// If this regex expression matches on the user agent, it blocks the request
// Not useful at all against a targetted attack
const HEADER_REGEX = '/bot|wget|curl|python-requests|scrapy|go-http-client|ruby|yahoo|spider|qwant/i';
// Block clients who present any of the following headers in their request (SPECIFY IN !!lowercase!!)
// Eg: ["x-forwarded-for", "x-via", "forwarded-for", "via"];
// Useful for blocking *some* proxies used for botting
const FILTERED_HEADER_KEYS = [
//"x-forwarded-for",
//"x-cluster-client-ip",
//"x-client-ip",
//"x-real-ip",
//"client-ip",
//"real-ip",
//"forwarded-for",
//"forwarded-for-ip",
//"forwarded",
//"proxy-connection",
//"remote-addr",
//"via"
];
// Block SSL ciphers used by CLI tools used for botting
// Basically a primitive version of Cloudflare's browser integrity check
// ** If curl can still access the site (with spoofed headers), please make sure you use the new apache2 config **
// https://git.lolcat.ca/lolcat/4get/docs/apache2.md
const DISALLOWED_SSL = [
// "TLS_AES_256_GCM_SHA384" // used by WGET and CURL
];
// Maximal number of searches per captcha key/pass issued. Counter gets
// reset on every APCU cache clear (should happen once a day).
// Only useful when BOT_PROTECTION is NOT set to 0
const MAX_SEARCHES = 100;
// List of domains that point to your servers. Include your tor/i2p
// addresses here! Must be a valid URL. Won't affect links placed on
// the homepage.
const ALT_ADDRESSES = [
//"https://4get.alt-tld",
//"http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion"
];
// Known 4get instances. MUST use the https protocol if your instance uses
// it. Is used to generate a distributed list of instances.
// To appear in the list of an instance, contact the host and if everyone added
// eachother your serber should appear everywhere.
const INSTANCES = [
"https://4get.ca",
"https://4get.zzls.xyz",
"https://4getus.zzls.xyz",
"https://4get.silly.computer",
"https://4get.konakona.moe",
"https://4get.lvkaszus.pl",
"https://4g.ggtyler.dev",
"https://4get.perennialte.ch",
"https://4get.sijh.net",
"https://4get.hbubli.cc",
"https://4get.plunked.party",
"https://4get.seitan-ayoub.lol",
"https://4get.etenie.pl",
"https://4get.lunar.icu",
"https://4get.dcs0.hu",
"https://4get.kizuki.lol",
"https://4get.psily.garden",
"https://search.milivojevic.in.rs",
"https://4get.snine.nl",
"https://4get.datura.network",
"https://4get.neco.lol",
"https://4get.lol",
"https://4get.ch",
"https://4get.edmateo.site",
"https://4get.sudovanilla.org",
"https://search.mint.lgbt"
];
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0";
// Proxy pool assignments for each scraper
// false = Use server's raw IP
// string = will load a proxy list from data/proxies
// Eg. "onion" will load data/proxies/onion.txt
const PROXY_DDG = false; // duckduckgo
const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false;
const PROXY_GOOGLE_CSE = false;
const PROXY_STARTPAGE = false;
const PROXY_QWANT = false;
const PROXY_GHOSTERY = false;
const PROXY_MARGINALIA = false;
const PROXY_MOJEEK = false;
const PROXY_SC = false; // soundcloud
const PROXY_SPOTIFY = false;
const PROXY_SOLOFIELD = false;
const PROXY_WIBY = false;
const PROXY_CURLIE = false;
const PROXY_YT = false; // youtube
const PROXY_YEP = false;
const PROXY_PINTEREST = false;
const PROXY_FIVEHPX = false;
const PROXY_VSCO = false;
const PROXY_SEZNAM = false;
const PROXY_NAVER = false;
const PROXY_GREPPR = false;
const PROXY_CROWDVIEW = false;
const PROXY_MWMBL = false;
const PROXY_FTM = false; // findthatmeme
const PROXY_IMGUR = false;
const PROXY_YANDEX_W = false; // yandex web
const PROXY_YANDEX_I = false; // yandex images
const PROXY_YANDEX_V = false; // yandex videos
//
// Scraper-specific parameters
//
// GOOGLE CSE
const GOOGLE_CX_ENDPOINT = "d4e68b99b876541f0";
// MARGINALIA
// Use "null" to default out to HTML scraping OR specify a string to
// use the API (Eg: "public"). API has less filters.
const MARGINALIA_API_KEY = null;
}

BIN
src/data/fonts/captcha.ttf Normal file

Binary file not shown.

3
src/data/proxies/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*
!.gitignore
!onion.txt

View File

@ -0,0 +1,13 @@
# Specify proxies by following this format:
# <protocol>:<address>:<port>:<username>:<password>
#
# Examples:
# https:1.3.3.7:6969:abcd:efg
# socks4:1.2.3.4:8080::
# raw_ip::::
#
# Available protocols:
# raw_ip, http, https, socks4, socks5, socks4a, socks5_hostname
# Local tor proxy
socks5:localhost:9050::

39
src/donate.php Normal file
View File

@ -0,0 +1,39 @@
<?php
include "data/config.php";
include "lib/frontend.php";
$frontend = new frontend();
echo
$frontend->load(
"header_nofilters.html",
[
"title" => "Donate to the project",
"class" => " class=\"about\""
]
);
$left =
explode(
"\n",
file_get_contents("template/donate.html")
);
$out = "";
foreach($left as $line){
$out .= trim($line);
}
echo
$frontend->load(
"search.html",
[
"timetaken" => null,
"class" => "",
"right-left" => "",
"right-right" => "",
"left" => $out
]
);

BIN
src/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 193 B

363
src/favicon.php Normal file
View File

@ -0,0 +1,363 @@
<?php
if(!isset($_GET["s"])){
header("X-Error: Missing parameter (s)ite");
die();
}
include "data/config.php";
new favicon($_GET["s"]);
class favicon{
public function __construct($url){
header("Content-Type: image/png");
if(substr_count($url, "/") !== 2){
header("X-Error: Only provide the protocol and domain");
$this->defaulticon();
}
$filename = str_replace(["https://", "http://"], "", $url);
header("Content-Disposition: inline; filename=\"{$filename}.png\"");
include "lib/curlproxy.php";
$this->proxy = new proxy(false);
$this->filename = parse_url($url, PHP_URL_HOST);
/*
Check if we have the favicon stored locally
*/
if(file_exists("icons/" . $filename . ".png")){
$handle = fopen("icons/" . $filename . ".png", "r");
echo fread($handle, filesize("icons/" . $filename . ".png"));
fclose($handle);
return;
}
/*
Scrape html
*/
try{
$payload = $this->proxy->get($url, $this->proxy::req_web, true);
}catch(Exception $error){
header("X-Error: Could not fetch HTML (" . $error->getMessage() . ")");
$this->favicon404();
}
//$payload["body"] = '<link rel="manifest" id="MANIFEST_LINK" href="/data/manifest/" crossorigin="use-credentials" />';
// get link tags
preg_match_all(
'/< *link +(.*)[\/]?>/Uixs',
$payload["body"],
$linktags
);
/*
Get relevant tags
*/
$linktags = $linktags[1];
$attributes = [];
/*
header("Content-Type: text/plain");
print_r($linktags);
print_r($payload);
die();*/
for($i=0; $i<count($linktags); $i++){
// get attributes
preg_match_all(
'/([A-Za-z0-9]+) *= *("[^"]*"|[^" ]+)/s',
$linktags[$i],
$tags
);
for($k=0; $k<count($tags[1]); $k++){
$attributes[$i][] = [
"name" => $tags[1][$k],
"value" => trim($tags[2][$k], "\" \n\r\t\v\x00")
];
}
}
unset($payload);
unset($linktags);
$href = [];
// filter out the tags we want
foreach($attributes as &$group){
$tmp_href = null;
$tmp_rel = null;
$badtype = false;
foreach($group as &$attribute){
switch($attribute["name"]){
case "rel":
$attribute["value"] = strtolower($attribute["value"]);
if(
(
$attribute["value"] == "icon" ||
$attribute["value"] == "manifest" ||
$attribute["value"] == "shortcut icon" ||
$attribute["value"] == "apple-touch-icon" ||
$attribute["value"] == "mask-icon"
) === false
){
break;
}
$tmp_rel = $attribute["value"];
break;
case "type":
$attribute["value"] = explode("/", $attribute["value"], 2);
if(strtolower($attribute["value"][0]) != "image"){
$badtype = true;
break;
}
break;
case "href":
// must not contain invalid characters
// must be bigger than 1
if(
filter_var($attribute["value"], FILTER_SANITIZE_URL) == $attribute["value"] &&
strlen($attribute["value"]) > 0
){
$tmp_href = $attribute["value"];
break;
}
break;
}
}
if(
$badtype === false &&
$tmp_rel !== null &&
$tmp_href !== null
){
$href[$tmp_rel] = $tmp_href;
}
}
/*
Priority list
*/
/*
header("Content-Type: text/plain");
print_r($href);
die();*/
if(isset($href["icon"])){ $href = $href["icon"]; }
elseif(isset($href["apple-touch-icon"])){ $href = $href["apple-touch-icon"]; }
elseif(isset($href["manifest"])){
// attempt to parse manifest, but fallback to []
$href = $this->parsemanifest($href["manifest"], $url);
}
if(is_array($href)){
if(isset($href["mask-icon"])){ $href = $href["mask-icon"]; }
elseif(isset($href["shortcut icon"])){ $href = $href["shortcut icon"]; }
else{
$href = "/favicon.ico";
}
}
$href = $this->proxy->getabsoluteurl($href, $url);
/*
header("Content-type: text/plain");
echo $href;
die();*/
/*
Download the favicon
*/
//$href = "https://git.lolcat.ca/assets/img/logo.svg";
try{
$payload =
$this->proxy->get(
$href,
$this->proxy::req_image,
true,
$url
);
}catch(Exception $error){
header("X-Error: Could not fetch the favicon (" . $error->getMessage() . ")");
$this->favicon404();
}
/*
Parse the file format
*/
$image = null;
$format = $this->proxy->getimageformat($payload, $image);
/*
Convert the image
*/
try{
/*
@todo: fix issues with avif+transparency
maybe using GD as fallback?
*/
if($format !== false){
$image->setFormat($format);
}
$image->setBackgroundColor(new ImagickPixel("transparent"));
$image->readImageBlob($payload["body"]);
$image->resizeImage(16, 16, imagick::FILTER_LANCZOS, 1);
$image->setFormat("png");
$image = $image->getImageBlob();
// save favicon
$handle = fopen("icons/" . $this->filename . ".png", "w");
fwrite($handle, $image, strlen($image));
fclose($handle);
echo $image;
}catch(ImagickException $error){
header("X-Error: Could not convert the favicon: (" . $error->getMessage() . ")");
$this->favicon404();
}
return;
}
private function parsemanifest($href, $url){
if(
// check if base64-encoded JSON manifest
preg_match(
'/^data:application\/json;base64,([A-Za-z0-9=]*)$/',
$href,
$json
)
){
$json = base64_decode($json[1]);
if($json === false){
// could not decode the manifest regex
return [];
}
}else{
try{
$json =
$this->proxy->get(
$this->proxy->getabsoluteurl($href, $url),
$this->proxy::req_web,
false,
$url
);
$json = $json["body"];
}catch(Exception $error){
// could not fetch the manifest
return [];
}
}
$json = json_decode($json, true);
if($json === null){
// manifest did not return valid json
return [];
}
if(
isset($json["start_url"]) &&
$this->proxy->validateurl($json["start_url"])
){
$url = $json["start_url"];
}
if(!isset($json["icons"][0]["src"])){
// manifest does not contain a path to the favicon
return [];
}
// horay, return the favicon path
return $json["icons"][0]["src"];
}
private function favicon404(){
// fallback to google favicons
// ... probably blocked by cuckflare
try{
$image =
$this->proxy->get(
"https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=http://{$this->filename}&size=16",
$this->proxy::req_image
);
}catch(Exception $error){
$this->defaulticon();
}
// write favicon from google
$handle = fopen("icons/" . $this->filename . ".png", "w");
fwrite($handle, $image["body"], strlen($image["body"]));
fclose($handle);
echo $image["body"];
die();
}
private function defaulticon(){
// give 404 and fuck off
http_response_code(404);
$handle = fopen("lib/favicon404.png", "r");
echo fread($handle, filesize("lib/favicon404.png"));
fclose($handle);
die();
}
}

BIN
src/icons/lolcat.ca.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 753 B

77
src/images.php Normal file
View File

@ -0,0 +1,77 @@
<?php
/*
Initialize random shit
*/
include "data/config.php";
include "lib/frontend.php";
$frontend = new frontend();
[$scraper, $filters] = $frontend->getscraperfilters("images");
$get = $frontend->parsegetfilters($_GET, $filters);
/*
Captcha
*/
include "lib/bot_protection.php";
new bot_protection($frontend, $get, $filters, "images", true);
$payload = [
"timetaken" => microtime(true),
"images" => "",
"nextpage" => ""
];
try{
$results = $scraper->image($get);
}catch(Exception $error){
$frontend->drawscrapererror($error->getMessage(), $get, "images", $payload["timetaken"]);
}
if(count($results["image"]) === 0){
$payload["images"] =
'<div class="infobox">' .
"<h1>Nobody here but us chickens!</h1>" .
'Have you tried:' .
'<ul>' .
'<li>Using a different scraper</li>' .
'<li>Using fewer keywords</li>' .
'<li>Defining broader filters (Is NSFW turned off?)</li>' .
'</ul>' .
'</div>';
}
foreach($results["image"] as $image){
$payload["images"] .=
'<div class="image-wrapper" title="' . htmlspecialchars($image["title"]) .'" data-json="' . htmlspecialchars(json_encode($image["source"])) . '">' .
'<div class="image">' .
'<a href="' . htmlspecialchars($image["source"][0]["url"]) . '" rel="noreferrer nofollow" class="thumb">' .
'<img src="' . $frontend->htmlimage($image["source"][count($image["source"]) - 1]["url"], "thumb") . '" alt="thumbnail">';
if($image["source"][0]["width"] !== null){
$payload["images"] .= '<div class="duration">' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '</div>';
}
$payload["images"] .=
'</a>' .
'<a href="' . htmlspecialchars($image["url"]) . '" rel="noreferrer nofollow">' .
'<div class="title">' . htmlspecialchars(parse_url($image["url"], PHP_URL_HOST)) . '</div>' .
'<div class="description">' . $frontend->highlighttext($get["s"], $image["title"]) . '</div>' .
'</a>' .
'</div>' .
'</div>';
}
if($results["npt"] !== null){
$payload["nextpage"] =
'<a href="' . $frontend->htmlnextpage($get, $results["npt"], "images") . '" class="nextpage img">Next page &gt;</a>';
}
echo $frontend->load("images.html", $payload);

15
src/index.php Normal file
View File

@ -0,0 +1,15 @@
<?php
include "data/config.php";
include "lib/frontend.php";
$frontend = new frontend();
$images = glob("banner/*");
echo $frontend->load(
"home.html",
[
"server_short_description" => htmlspecialchars(config::SERVER_SHORT_DESCRIPTION),
"banner" => $images[rand(0, count($images) - 1)]
]
);

178
src/lib/backend.php Normal file
View File

@ -0,0 +1,178 @@
<?php
class backend{
public function __construct($scraper){
$this->scraper = $scraper;
}
/*
Proxy stuff
*/
public function get_ip(){
$pool = constant("config::PROXY_" . strtoupper($this->scraper));
if($pool === false){
// we don't want a proxy, fuck off!
return 'raw_ip::::';
}
// indent
$proxy_index_raw = apcu_inc("p." . $this->scraper);
$proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
$proxylist = explode("\n", $proxylist);
// ignore empty or commented lines
$proxylist = array_filter($proxylist, function($entry){
$entry = ltrim($entry);
return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
});
$proxylist = array_values($proxylist);
return $proxylist[$proxy_index_raw % count($proxylist)];
}
// this function is also called directly on nextpage
public function assign_proxy(&$curlproc, string $ip){
// parse proxy line
[
$type,
$address,
$port,
$username,
$password
] = explode(":", $ip, 5);
switch($type){
case "raw_ip":
return;
break;
case "http":
case "https":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
break;
case "socks4":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
case "socks5":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
case "socks4a":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
case "socks5_hostname":
case "socks5h":
case "socks5a":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
break;
}
if($username != ""){
curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
}
}
/*
Next page stuff
*/
public function store(string $payload, string $page, string $proxy){
$key = sodium_crypto_secretbox_keygen();
$nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES);
$requestid = apcu_inc("requestid");
apcu_store(
$page[0] . "." . // first letter of page name
$this->scraper . // scraper name
$requestid,
[
$nonce,
$proxy,
// compress and encrypt
sodium_crypto_secretbox(
gzdeflate($payload),
$nonce,
$key
)
],
900 // cache information for 15 minutes
);
return
$this->scraper . $requestid . "." .
rtrim(strtr(base64_encode($key), '+/', '-_'), '=');
}
public function get(string $npt, string $page){
$page = $page[0];
$explode = explode(".", $npt, 2);
if(count($explode) !== 2){
throw new Exception("Malformed nextPageToken!");
}
$apcu = $page . "." . $explode[0];
$key = $explode[1];
$payload = apcu_fetch($apcu);
if($payload === false){
throw new Exception("The next page token is invalid or has expired!");
}
$key =
base64_decode(
str_pad(
strtr($key, '-_', '+/'),
strlen($key) % 4,
'=',
STR_PAD_RIGHT
)
);
// decrypt and decompress data
$payload[2] =
gzinflate(
sodium_crypto_secretbox_open(
$payload[2], // data
$payload[0], // nonce
$key
)
);
if($payload[2] === false){
throw new Exception("The next page token is invalid or has expired!");
}
// remove the key after using successfully
apcu_delete($apcu);
return [
$payload[2], // data
$payload[1] // proxy
];
}
}

View File

@ -0,0 +1,144 @@
<?php
// https://www.bing.com/search?q=url%3Ahttps%3A%2F%2Flolcat.ca
// https://cc.bingj.com/cache.aspx?q=url%3ahttps%3a%2f%2flolcat.ca&d=4769685974291356&mkt=en-CA&setlang=en-US&w=tEsWuE7HW3Z5AIPQMVkDH4WaotS4LrK-
// <div class="b_attribution" u="0N|5119|4769685974291356|tEsWuE7HW3Z5AIPQMVkDH4WaotS4LrK-" tabindex="0">
new bingcache();
class bingcache{
public function __construct(){
if(
!isset($_GET["s"]) ||
$this->validate_url($_GET["s"]) === false
){
var_dump($this->validate_url($_GET["s"]));
$this->do404("Please provide a valid URL.");
}
$url = $_GET["s"];
$curlproc = curl_init();
curl_setopt(
$curlproc,
CURLOPT_URL,
"https://www.bing.com/search?q=url%3A" .
urlencode($url)
);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt(
$curlproc,
CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 5);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
$this->do404("Failed to connect to bing servers. Please try again later.");
}
curl_close($curlproc);
preg_match(
'/<div class="b_attribution" u="(.*)" tabindex="0">/',
$data,
$keys
);
print_r($keys);
if(count($keys) === 0){
$this->do404("Bing has not archived this URL.");
}
$keys = explode("|", $keys[1]);
$count = count($keys);
//header("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
echo("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
}
public function do404($text){
include "lib/frontend.php";
$frontend = new frontend();
echo
$frontend->load(
"error.html",
[
"title" => "Shit",
"text" => $text
]
);
die();
}
public function validate_url($url){
$url_parts = parse_url($url);
// check if required parts are there
if(
!isset($url_parts["scheme"]) ||
!(
$url_parts["scheme"] == "http" ||
$url_parts["scheme"] == "https"
) ||
!isset($url_parts["host"])
){
return false;
}
if(
// if its not an RFC-valid URL
!filter_var($url, FILTER_VALIDATE_URL)
){
return false;
}
$ip =
str_replace(
["[", "]"], // handle ipv6
"",
$url_parts["host"]
);
// if its not an IP
if(!filter_var($ip, FILTER_VALIDATE_IP)){
// resolve domain's IP
$ip = gethostbyname($url_parts["host"] . ".");
}
// check if its localhost
return filter_var(
$ip,
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
);
}
}

281
src/lib/bot_protection.php Normal file
View File

@ -0,0 +1,281 @@
<?php
class bot_protection{
public function __construct($frontend, $get, $filters, $page, $output){
// check if we want captcha
if(config::BOT_PROTECTION !== 1){
apcu_inc("real_requests");
if($output === true){
$frontend->loadheader(
$get,
$filters,
$page
);
}
return;
}
/*
Validate cookie, if it exists
*/
if(isset($_COOKIE["pass"])){
if(
// check if key is not malformed
preg_match(
'/^k[0-9]+\.[A-Za-z0-9_]{20}$/',
$_COOKIE["pass"]
) &&
// does key exist
apcu_exists($_COOKIE["pass"])
){
// exists, increment counter
$inc = apcu_inc($_COOKIE["pass"]);
// we start counting from 1
// when it has been incremented to 102, it has reached
// 100 reqs
if($inc >= config::MAX_SEARCHES + 2){
// reached limit, delete and give captcha
apcu_delete($_COOKIE["pass"]);
}else{
// the cookie is OK! dont die() and give results
apcu_inc("real_requests");
if($output === true){
$frontend->loadheader(
$get,
$filters,
$page
);
}
return;
}
}
}
if($output === false){
http_response_code(401); // forbidden
echo json_encode([
"status" => "The \"pass\" token in your cookies is missing or has expired!!"
]);
die();
}
/*
Validate form data
*/
$lines =
explode(
"\r\n",
file_get_contents("php://input")
);
$invalid = false;
$answers = [];
$key = false;
$error = "";
foreach($lines as $line){
$line = explode("=", $line, 2);
if(count($line) !== 2){
$invalid = true;
break;
}
preg_match(
'/^c\[([0-9]+)\]$/',
$line[0],
$regex
);
if(
$line[1] != "on" ||
!isset($regex[0][1])
){
// check if its the v key
if(
$line[0] == "v" &&
preg_match(
'/^c[0-9]+\.[A-Za-z0-9_]{20}$/',
$line[1]
)
){
$key = apcu_fetch($line[1]);
apcu_delete($line[1]);
}
break;
}
$regex = (int)$regex[1];
if(
$regex >= 16 ||
$regex <= -1
){
$invalid = true;
break;
}
$answers[] = $regex;
}
if(
!$invalid &&
$key !== false // has captcha been gen'd?
){
$check = count($key);
// validate answer
for($i=0; $i<count($answers); $i++){
if(in_array($answers[$i], $key)){
$check--;
}else{
$check = -1;
break;
}
}
if($check === 0){
// we passed the captcha
// set cookie
$inc = apcu_inc("cookie");
$key = "k" . $inc . "." . $this->randomchars();
apcu_inc($key, 1, $stupid, 86400);
apcu_inc("real_requests");
setcookie(
"pass",
$key,
[
"expires" => time() + 86400, // expires in 24 hours
"samesite" => "Lax",
"path" => "/"
]
);
$frontend->loadheader(
$get,
$filters,
$page
);
return;
}else{
$error = "<div class=\"quote\">You were <a href=\"https://www.youtube.com/watch?v=e1d7fkQx2rk\" target=\"_BLANK\" rel=\"noreferrer nofollow\">kicked out of Mensa.</a> Please try again.</div>";
}
}
$key = "c" . apcu_inc("captcha_gen", 1) . "." . $this->randomchars();
$payload = [
"timetaken" => microtime(true),
"class" => "",
"right-left" => "",
"right-right" => "",
"left" =>
'<div class="infobox">' .
'<h1>IQ test</h1>' .
'IQ test has been enabled due to bot abuse on the network.<br>' .
'Solving this IQ test will let you make 100 searches today. I will add an invite system to bypass this soon...' .
$error .
'<form method="POST" enctype="text/plain" autocomplete="off">' .
'<div class="captcha-wrapper">' .
'<div class="captcha">' .
'<img src="captcha?v=' . $key . '" alt="Captcha image">' .
'<div class="captcha-controls">' .
'<input type="checkbox" name="c[0]" id="c0">' .
'<label for="c0"></label>' .
'<input type="checkbox" name="c[1]" id="c1">' .
'<label for="c1"></label>' .
'<input type="checkbox" name="c[2]" id="c2">' .
'<label for="c2"></label>' .
'<input type="checkbox" name="c[3]" id="c3">' .
'<label for="c3"></label>' .
'<input type="checkbox" name="c[4]" id="c4">' .
'<label for="c4"></label>' .
'<input type="checkbox" name="c[5]" id="c5">' .
'<label for="c5"></label>' .
'<input type="checkbox" name="c[6]" id="c6">' .
'<label for="c6"></label>' .
'<input type="checkbox" name="c[7]" id="c7">' .
'<label for="c7"></label>' .
'<input type="checkbox" name="c[8]" id="c8">' .
'<label for="c8"></label>' .
'<input type="checkbox" name="c[9]" id="c9">' .
'<label for="c9"></label>' .
'<input type="checkbox" name="c[10]" id="c10">' .
'<label for="c10"></label>' .
'<input type="checkbox" name="c[11]" id="c11">' .
'<label for="c11"></label>' .
'<input type="checkbox" name="c[12]" id="c12">' .
'<label for="c12"></label>' .
'<input type="checkbox" name="c[13]" id="c13">' .
'<label for="c13"></label>' .
'<input type="checkbox" name="c[14]" id="c14">' .
'<label for="c14"></label>' .
'<input type="checkbox" name="c[15]" id="c15">' .
'<label for="c15"></label>' .
'</div>' .
'</div>' .
'</div>' .
'<input type="hidden" name="v" value="' . $key . '">' .
'<input type="submit" value="Check IQ" class="captcha-submit">' .
'</form>' .
'</div>'
];
$frontend->loadheader(
$get,
$filters,
$page
);
echo $frontend->load("search.html", $payload);
die();
}
private function randomchars(){
$chars =
array_merge(
range("A", "Z"),
range("a", "z"),
range(0, 9)
);
$chars[] = "_";
$c = count($chars) - 1;
$key = "";
for($i=0; $i<20; $i++){
$key .= $chars[random_int(0, $c)];
}
return $key;
}
}

660
src/lib/curlproxy.php Normal file
View File

@ -0,0 +1,660 @@
<?php
class proxy{
public const req_web = 0;
public const req_image = 1;
public function __construct($cache = true){
$this->cache = $cache;
}
public function do404(){
http_response_code(404);
header("Content-Type: image/png");
$handle = fopen("lib/img404.png", "r");
echo fread($handle, filesize("lib/img404.png"));
fclose($handle);
die();
return;
}
public function getabsoluteurl($path, $relative){
if($this->validateurl($path)){
return $path;
}
if(substr($path, 0, 2) == "//"){
return "https:" . $path;
}
$url = null;
$relative = parse_url($relative);
$url = $relative["scheme"] . "://";
if(
isset($relative["user"]) &&
isset($relative["pass"])
){
$url .= $relative["user"] . ":" . $relative["pass"] . "@";
}
$url .= $relative["host"];
if(isset($relative["path"])){
$relative["path"] = explode(
"/",
$relative["path"]
);
unset($relative["path"][count($relative["path"]) - 1]);
$relative["path"] = implode("/", $relative["path"]);
$url .= $relative["path"];
}
if(
strlen($path) !== 0 &&
$path[0] !== "/"
){
$url .= "/";
}
$url .= $path;
return $url;
}
public function validateurl($url){
$url_parts = parse_url($url);
// check if required parts are there
if(
!isset($url_parts["scheme"]) ||
!(
$url_parts["scheme"] == "http" ||
$url_parts["scheme"] == "https"
) ||
!isset($url_parts["host"])
){
return false;
}
$ip =
str_replace(
["[", "]"], // handle ipv6
"",
$url_parts["host"]
);
// if its not an IP
if(!filter_var($ip, FILTER_VALIDATE_IP)){
// resolve domain's IP
$ip = gethostbyname($url_parts["host"] . ".");
}
// check if its localhost
if(
filter_var(
$ip,
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
) === false
){
return false;
}
return true;
}
public function get($url, $reqtype = self::req_web, $acceptallcodes = false, $referer = null, $redirectcount = 0){
if($redirectcount === 5){
throw new Exception("Too many redirects");
}
if($url == "https://i.imgur.com/removed.png"){
throw new Exception("Encountered imgur 404");
}
// sanitize URL
if($this->validateurl($url) === false){
throw new Exception("Invalid URL");
}
$this->clientcache();
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curl, CURLOPT_HEADER, 1);
switch($reqtype){
case self::req_web:
curl_setopt(
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"
]
);
break;
case self::req_image:
if($referer === null){
$referer = explode("/", $url, 4);
array_pop($referer);
$referer = implode("/", $referer);
}
curl_setopt(
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: " . config::USER_AGENT,
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
"DNT: 1",
"Connection: keep-alive",
"Referer: {$referer}"
]
);
break;
}
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curl, CURLOPT_TIMEOUT, 30);
// limit size of payloads
curl_setopt($curl, CURLOPT_BUFFERSIZE, 1024);
curl_setopt($curl, CURLOPT_NOPROGRESS, false);
curl_setopt(
$curl,
CURLOPT_PROGRESSFUNCTION,
function($downloadsize, $downloaded, $uploadsize, $uploaded
){
// if $downloaded exceeds 100MB, fuck off
return ($downloaded > 100000000) ? 1 : 0;
});
$body = curl_exec($curl);
if(curl_errno($curl)){
throw new Exception(curl_error($curl));
}
curl_close($curl);
$headers = [];
$http = null;
while(true){
$header = explode("\n", $body, 2);
$body = $header[1];
if($http === null){
// http/1.1 200 ok
$header = explode("/", $header[0], 2);
$header = explode(" ", $header[1], 3);
$http = [
"version" => (float)$header[0],
"code" => (int)$header[1]
];
continue;
}
if(trim($header[0]) == ""){
// reached end of headers
break;
}
$header = explode(":", $header[0], 2);
// malformed headers
if(count($header) !== 2){ continue; }
$headers[strtolower(trim($header[0]))] = trim($header[1]);
}
// check http code
if(
$http["code"] >= 300 &&
$http["code"] <= 309
){
// redirect
if(!isset($headers["location"])){
throw new Exception("Broken redirect");
}
$redirectcount++;
return $this->get($this->getabsoluteurl($headers["location"], $url), $reqtype, $acceptallcodes, $referer, $redirectcount);
}else{
if(
$acceptallcodes === false &&
$http["code"] > 300
){
throw new Exception("Remote server returned an error code! ({$http["code"]})");
}
}
// check if data is okay
switch($reqtype){
case self::req_image:
$format = false;
if(isset($headers["content-type"])){
if(stripos($headers["content-type"], "text/html") !== false){
throw new Exception("Server returned html");
}
if(
preg_match(
'/image\/([^ ]+)/i',
$headers["content-type"],
$match
)
){
$format = strtolower($match[1]);
if(substr(strtolower($format), 0, 2) == "x-"){
$format = substr($format, 2);
}
}
}
return [
"http" => $http,
"format" => $format,
"headers" => $headers,
"body" => $body
];
break;
default:
return [
"http" => $http,
"headers" => $headers,
"body" => $body
];
break;
}
return;
}
public function stream_linear_image($url, $referer = null){
$this->stream($url, $referer, "image");
}
public function stream_linear_audio($url, $referer = null){
$this->stream($url, $referer, "audio");
}
private function stream($url, $referer, $format){
$this->clientcache();
$this->url = $url;
$this->format = $format;
// sanitize URL
if($this->validateurl($url) === false){
throw new Exception("Invalid URL");
}
$curl = curl_init();
// set headers
if($referer === null){
$referer = explode("/", $url, 4);
array_pop($referer);
$referer = implode("/", $referer);
}
switch($format){
case "image":
curl_setopt(
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: " . config::USER_AGENT,
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
"DNT: 1",
"Connection: keep-alive",
"Referer: {$referer}"
]
);
break;
case "audio":
curl_setopt(
$curl,
CURLOPT_HTTPHEADER,
[
"User-Agent: " . config::USER_AGENT,
"Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
"DNT: 1",
"Connection: keep-alive",
"Referer: {$referer}"
]
);
break;
}
// follow redirects
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_MAXREDIRS, 5);
curl_setopt($curl, CURLOPT_AUTOREFERER, 5);
// set url
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
// timeout + disable ssl
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($curl, CURLOPT_TIMEOUT, 30);
curl_setopt(
$curl,
CURLOPT_WRITEFUNCTION,
function($c, $data){
if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
throw new Exception("Serber returned a non-200 code");
}
echo $data;
return strlen($data);
}
);
$this->empty_header = false;
$this->cont = false;
$this->headers_tmp = [];
$this->headers = [];
curl_setopt(
$curl,
CURLOPT_HEADERFUNCTION,
function($c, $header){
$head = trim($header);
$len = strlen($head);
if($len === 0){
$this->empty_header = true;
$this->headers_tmp = [];
}else{
$this->empty_header = false;
$this->headers_tmp[] = $head;
}
foreach($this->headers_tmp as $h){
// parse headers
$h = explode(":", $h, 2);
if(count($h) !== 2){
if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
// not HTTP 200, probably a redirect
$this->cont = false;
}else{
$this->cont = true;
}
// is HTTP 200, just ignore that line
continue;
}
$this->headers[strtolower(trim($h[0]))] = trim($h[1]);
}
if(
$this->cont &&
$this->empty_header
){
// get content type
if(isset($this->headers["content-type"])){
$octet_check = stripos($this->headers["content-type"], "octet-stream");
if(
stripos($this->headers["content-type"], $this->format) === false &&
$octet_check === false
){
throw new Exception("Resource reported invalid Content-Type");
}
}else{
throw new Exception("Resource is not an {$this->format} (no Content-Type)");
}
$filetype = explode("/", $this->headers["content-type"]);
if(!isset($filetype[1])){
throw new Exception("Malformed Content-Type header");
}
if($octet_check !== false){
$filetype[1] = "jpeg";
}
header("Content-Type: {$this->format}/{$filetype[1]}");
// give payload size
if(isset($this->headers["content-length"])){
header("Content-Length: {$this->headers["content-length"]}");
}
// give filename
$this->getfilenameheader($this->headers, $this->url, $filetype[1]);
}
return strlen($header);
}
);
curl_exec($curl);
if(curl_errno($curl)){
throw new Exception(curl_error($curl));
}
curl_close($curl);
}
public function getfilenameheader($headers, $url, $filetype = "jpg"){
// get filename from content-disposition header
if(isset($headers["content-disposition"])){
preg_match(
'/filename=([^;]+)/',
$headers["content-disposition"],
$filename
);
if(isset($filename[1])){
header("Content-Disposition: filename=\"" . trim($filename[1], "\"'") . "." . $filetype . "\"");
return;
}
}
// get filename from URL
$filename = parse_url($url, PHP_URL_PATH);
if($filename === null){
// everything failed! rename file to domain name
header("Content-Disposition: filename=\"" . parse_url($url, PHP_URL_HOST) . "." . $filetype . "\"");
return;
}
// remove extension from filename
$filename =
explode(
".",
basename($filename)
);
if(count($filename) > 1){
array_pop($filename);
}
$filename = implode(".", $filename);
header("Content-Disposition: inline; filename=\"" . $filename . "." . $filetype . "\"");
return;
}
public function getimageformat($payload, &$imagick){
$finfo = new finfo(FILEINFO_MIME_TYPE);
$format = $finfo->buffer($payload["body"]);
if($format === false){
if($payload["format"] === false){
header("X-Error: Could not parse format");
$this->favicon404();
}
$format = $payload["format"];
}else{
$format_tmp = explode("/", $format, 2);
if($format_tmp[0] == "image"){
$format_tmp = strtolower($format_tmp[1]);
if(substr($format_tmp, 0, 2) == "x-"){
$format_tmp = substr($format_tmp, 2);
}
$format = $format_tmp;
}
}
switch($format){
case "tiff": $format = "gif"; break;
case "vnd.microsoft.icon": $format = "ico"; break;
case "icon": $format = "ico"; break;
case "svg+xml": $format = "svg"; break;
}
$imagick = new Imagick();
if(
!in_array(
$format,
array_map("strtolower", $imagick->queryFormats())
)
){
// format could not be found, but imagemagick can
// sometimes detect it? shit's fucked
$format = false;
}
return $format;
}
public function clientcache(){
if($this->cache === false){
return;
}
header("Last-Modified: Thu, 01 Oct 1970 00:00:00 GMT");
$headers = getallheaders();
if(
isset($headers["If-Modified-Since"]) ||
isset($headers["If-Unmodified-Since"])
){
http_response_code(304); // 304: Not Modified
die();
}
}
}

BIN
src/lib/favicon404.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 744 B

1342
src/lib/frontend.php Normal file

File diff suppressed because it is too large Load Diff

610
src/lib/fuckhtml.php Normal file
View File

@ -0,0 +1,610 @@
<?php
class fuckhtml{
public function __construct($html = null, $isfile = false){
if($html !== null){
$this->load($html, $isfile);
}
}
public function load($html, $isfile = false){
if(is_array($html)){
if(!isset($html["innerHTML"])){
throw new Exception("(load) Supplied array doesn't contain an innerHTML index");
}
$html = $html["innerHTML"];
}
if($isfile){
$handle = fopen($html, "r");
$fetch = fread($handle, filesize($html));
fclose($handle);
$this->html = $fetch;
}else{
$this->html = $html;
}
$this->strlen = strlen($this->html);
}
public function getloadedhtml(){
return $this->html;
}
public function getElementsByTagName(string $tagname){
$out = [];
/*
Scrape start of the tag. Example
<div class="mydiv"> ...
*/
if($tagname == "*"){
$tagname = '[A-Za-z0-9._-]+';
}else{
$tagname = preg_quote(strtolower($tagname));
}
preg_match_all(
'/<\s*(' . $tagname . ')(\s(?:[^>\'"]*|"[^"]*"|\'[^\']*\')+)?\s*>/i',
/* '/<\s*(' . $tagname . ')(\s[\S\s]*?)?>/i', */
$this->html,
$starting_tags,
PREG_OFFSET_CAPTURE
);
for($i=0; $i<count($starting_tags[0]); $i++){
/*
Parse attributes
*/
$attributes = [];
preg_match_all(
'/([^\/\s\\=]+)(?:\s*=\s*("[^"]*"|\'[^\']*\'|[^\s]*))?/i',
$starting_tags[2][$i][0],
$regex_attributes
);
for($k=0; $k<count($regex_attributes[0]); $k++){
if(trim($regex_attributes[2][$k]) == ""){
$attributes[$regex_attributes[1][$k]] =
"true";
continue;
}
$attributes[strtolower($regex_attributes[1][$k])] =
trim($regex_attributes[2][$k], "'\" \n\r\t\v\x00");
}
$out[] = [
"tagName" => strtolower($starting_tags[1][$i][0]),
"startPos" => $starting_tags[0][$i][1],
"endPos" => 0,
"startTag" => $starting_tags[0][$i][0],
"attributes" => $attributes,
"innerHTML" => null
];
}
/*
Get innerHTML
*/
// get closing tag positions
preg_match_all(
'/<\s*\/\s*(' . $tagname . ')\s*>/i',
$this->html,
$regex_closing_tags,
PREG_OFFSET_CAPTURE
);
// merge opening and closing tags together
for($i=0; $i<count($regex_closing_tags[1]); $i++){
$out[] = [
"tagName" => strtolower($regex_closing_tags[1][$i][0]),
"endTag" => $regex_closing_tags[0][$i][0],
"startPos" => $regex_closing_tags[0][$i][1]
];
}
usort(
$out,
function($a, $b){
return $a["startPos"] > $b["startPos"];
}
);
// compute the indent level for each element
$level = [];
$count = count($out);
for($i=0; $i<$count; $i++){
if(!isset($level[$out[$i]["tagName"]])){
$level[$out[$i]["tagName"]] = 0;
}
if(isset($out[$i]["startTag"])){
// encountered starting tag
$level[$out[$i]["tagName"]]++;
$out[$i]["level"] = $level[$out[$i]["tagName"]];
}else{
// encountered closing tag
$out[$i]["level"] = $level[$out[$i]["tagName"]];
$level[$out[$i]["tagName"]]--;
}
}
// if the indent level is the same for a div,
// we encountered _THE_ closing tag
for($i=0; $i<$count; $i++){
if(!isset($out[$i]["startTag"])){
continue;
}
for($k=$i; $k<$count; $k++){
if(
isset($out[$k]["endTag"]) &&
$out[$i]["tagName"] == $out[$k]["tagName"] &&
$out[$i]["level"]
=== $out[$k]["level"]
){
$startlen = strlen($out[$i]["startTag"]);
$endlen = strlen($out[$k]["endTag"]);
$out[$i]["endPos"] = $out[$k]["startPos"] + $endlen;
$out[$i]["innerHTML"] =
substr(
$this->html,
$out[$i]["startPos"] + $startlen,
$out[$k]["startPos"] - ($out[$i]["startPos"] + $startlen)
);
$out[$i]["outerHTML"] =
substr(
$this->html,
$out[$i]["startPos"],
$out[$k]["startPos"] - $out[$i]["startPos"] + $endlen
);
break;
}
}
}
// filter out ending divs
for($i=0; $i<$count; $i++){
if(isset($out[$i]["endTag"])){
unset($out[$i]);
}
unset($out[$i]["startTag"]);
}
return array_values($out);
}
public function getElementsByAttributeName(string $name, $collection = null){
if($collection === null){
$collection = $this->getElementsByTagName("*");
}elseif(is_string($collection)){
$collection = $this->getElementsByTagName($collection);
}
$return = [];
foreach($collection as $elem){
foreach($elem["attributes"] as $attrib_name => $attrib_value){
if($attrib_name == $name){
$return[] = $elem;
continue 2;
}
}
}
return $return;
}
public function getElementsByFuzzyAttributeValue(string $name, string $value, $collection = null){
$elems = $this->getElementsByAttributeName($name, $collection);
$value =
explode(
" ",
trim(
preg_replace(
'/ +/',
" ",
$value
)
)
);
$return = [];
foreach($elems as $elem){
foreach($elem["attributes"] as $attrib_name => $attrib_value){
$attrib_value = explode(" ", $attrib_value);
$ac = count($attrib_value);
$nc = count($value);
$cr = 0;
for($i=0; $i<$nc; $i++){
for($k=0; $k<$ac; $k++){
if($value[$i] == $attrib_value[$k]){
$cr++;
}
}
}
if($cr === $nc){
$return[] = $elem;
continue 2;
}
}
}
return $return;
}
public function getElementsByAttributeValue(string $name, string $value, $collection = null){
$elems = $this->getElementsByAttributeName($name, $collection);
$return = [];
foreach($elems as $elem){
foreach($elem["attributes"] as $attrib_name => $attrib_value){
if($attrib_value == $value){
$return[] = $elem;
continue 2;
}
}
}
return $return;
}
public function getElementById(string $idname, $collection = null){
$id = $this->getElementsByAttributeValue("id", $idname, $collection);
if(count($id) !== 0){
return $id[0];
}
return false;
}
public function getElementsByClassName(string $classname, $collection = null){
return $this->getElementsByFuzzyAttributeValue("class", $classname, $collection);
}
public function getTextContent($html, $whitespace = false, $trim = true){
if(is_array($html)){
if(!isset($html["innerHTML"])){
throw new Exception("(getTextContent) Supplied array doesn't contain an innerHTML index");
}
$html = $html["innerHTML"];
}
$html = preg_split('/\n|<\/?br>/i', $html);
$out = "";
for($i=0; $i<count($html); $i++){
$tmp =
html_entity_decode(
strip_tags(
$html[$i]
),
ENT_QUOTES | ENT_XML1, "UTF-8"
);
if($trim){
$tmp = trim($tmp);
}
$out .= $tmp;
if($whitespace === true){
$out .= "\n";
}else{
$out .= " ";
}
}
if($trim){
return trim($out);
}
return $out;
}
public function parseJsObject(string $json){
$bracket = false;
$is_close_bracket = false;
$escape = false;
$lastchar = false;
$json_out = null;
$last_char = null;
$keyword_check = null;
for($i=0; $i<strlen($json); $i++){
switch($json[$i]){
case "\"":
case "'":
if($escape === true){
break;
}
if($json[$i] == $bracket){
$bracket = false;
$is_close_bracket = true;
}else{
if($bracket === false){
$bracket = $json[$i];
}
}
break;
default:
$is_close_bracket = false;
break;
}
if(
$json[$i] == "\\" &&
!(
$lastchar !== false &&
$lastchar . $json[$i] == "\\\\"
)
){
$escape = true;
}else{
$escape = false;
}
if(
$bracket === false &&
$is_close_bracket === false
){
// do keyword check
$keyword_check .= $json[$i];
if(in_array($json[$i], [":", "{"])){
$keyword_check = substr($keyword_check, 0, -1);
if(
preg_match(
'/function|array|return/i',
$keyword_check
)
){
$json_out =
preg_replace(
'/[{"]*' . preg_quote($keyword_check, "/") . '$/',
"",
$json_out
);
}
$keyword_check = null;
}
// here we know we're not iterating over a quoted string
switch($json[$i]){
case "[":
case "{":
$json_out .= $json[$i];
break;
case "]":
case "}":
case ",":
case ":":
if(!in_array($last_char, ["[", "{", "}", "]", "\""])){
$json_out .= "\"";
}
$json_out .= $json[$i];
break;
default:
if(in_array($last_char, ["{", "[", ",", ":"])){
$json_out .= "\"";
}
$json_out .= $json[$i];
break;
}
}else{
$json_out .= $json[$i];
}
$last_char = $json[$i];
}
return json_decode($json_out, true);
}
public function parseJsString($string){
return
preg_replace_callback(
'/\\\u[A-Fa-f0-9]{4}|\\\x[A-Fa-f0-9]{2}|\\\n|\\\r/',
function($match){
switch($match[0][1]){
case "u":
return json_decode('"' . $match[0] . '"');
break;
case "x":
return mb_convert_encoding(
stripcslashes($match[0]),
"utf-8",
"windows-1252"
);
break;
default:
return " ";
break;
}
},
$string
);
}
public function extract_json($json){
$len = strlen($json);
$array_level = 0;
$object_level = 0;
$in_quote = null;
$start = null;
for($i=0; $i<$len; $i++){
switch($json[$i]){
case "[":
if($in_quote === null){
$array_level++;
if($start === null){
$start = $i;
}
}
break;
case "]":
if($in_quote === null){
$array_level--;
}
break;
case "{":
if($in_quote === null){
$object_level++;
if($start === null){
$start = $i;
}
}
break;
case "}":
if($in_quote === null){
$object_level--;
}
break;
case "\"":
case "'":
if(
$i !== 0 &&
$json[$i - 1] !== "\\"
){
// found a non-escaped quote
if($in_quote === null){
// open quote
$in_quote = $json[$i];
}elseif($in_quote === $json[$i]){
// close quote
$in_quote = null;
}
}
break;
}
if(
$start !== null &&
$array_level === 0 &&
$object_level === 0
){
return substr($json, $start, $i - $start + 1);
break;
}
}
}
}

BIN
src/lib/img404.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 216 B

132
src/lib/type-todo.php Normal file
View File

@ -0,0 +1,132 @@
public function type($get){
$search = $get["s"];
$bang = $get["bang"];
if(empty($search)){
if(!empty($bang)){
// !youtube
$conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
pg_prepare($conn, "bang_get", "SELECT bang,name FROM bangs WHERE bang LIKE $1 ORDER BY bang ASC LIMIT 8");
$q = pg_execute($conn, "bang_get", ["$bang%"]);
$results = [];
while($row = pg_fetch_array($q, null, PGSQL_ASSOC)){
$results[] = [
"s" => "!" . $row["bang"],
"n" => $row["name"]
];
}
return $results;
}else{
// everything is empty
// lets just return a bang list
return [
[
"s" => "!w",
"n" => "Wikipedia",
"u" => "https://en.wikipedia.org/wiki/Special:Search?search={%q%}"
],
[
"s" => "!4ch",
"n" => "4chan Board",
"u" => "https://find.4chan.org/?q={%q%}"
],
[
"s" => "!a",
"n" => "Amazon",
"u" => "https://www.amazon.com/s?k={%q%}"
],
[
"s" => "!e",
"n" => "eBay",
"u" => "https://www.ebay.com/sch/items/?_nkw={%q%}"
],
[
"s" => "!so",
"n" => "Stack Overflow",
"u" => "http://stackoverflow.com/search?q={%q%}"
],
[
"s" => "!gh",
"n" => "GitHub",
"u" => "https://github.com/search?utf8=%E2%9C%93&q={%q%}"
],
[
"s" => "!tw",
"n" => "Twitter",
"u" => "https://twitter.com/search?q={%q%}"
],
[
"s" => "!r",
"n" => "Reddit",
"u" => "https://www.reddit.com/search?q={%q%}"
],
];
}
}
// now we know search isnt empty
if(!empty($bang)){
// check if the bang exists
$conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
pg_prepare($conn, "bang_get_single", "SELECT bang,name FROM bangs WHERE bang = $1 LIMIT 1");
$q = pg_execute($conn, "bang_get_single", [$bang]);
$row = pg_fetch_array($q, null, PGSQL_ASSOC);
if(isset($row["bang"])){
$bang = "!$bang ";
}else{
$bang = "";
}
}
try{
$res = $this->get(
"https://duckduckgo.com/ac/",
[
"q" => strtolower($search)
],
ddg::req_xhr
);
$res = json_decode($res, true);
}catch(Exception $e){
throw new Exception("Failed to get /ac/");
}
$arr = [];
for($i=0; $i<count($res); $i++){
if($i === 8){break;}
if(empty($bang)){
$arr[] = [
"s" => $res[$i]["phrase"]
];
}else{
$arr[] = [
"s" => $bang . $res[$i]["phrase"],
"n" => $row["name"]
];
}
}
return $arr;
}

222
src/music.php Normal file
View File

@ -0,0 +1,222 @@
<?php
/*
Initialize random shit
*/
include "data/config.php";
include "lib/frontend.php";
$frontend = new frontend();
[$scraper, $filters] = $frontend->getscraperfilters("music");
$get = $frontend->parsegetfilters($_GET, $filters);
/*
Captcha
*/
include "lib/bot_protection.php";
new bot_protection($frontend, $get, $filters, "music", true);
$payload = [
"timetaken" => microtime(true),
"class" => "",
"right-left" => "",
"right-right" => "",
"left" => ""
];
try{
$results = $scraper->music($get);
}catch(Exception $error){
$frontend->drawscrapererror($error->getMessage(), $get, "music", $payload["timetaken"]);
}
$categories = [
"song" => "",
"author" => "",
"playlist" => "",
"album" => "",
"podcast" => "",
"user" => ""
];
/*
Set the main container
*/
$main = null;
if(count($results["song"]) !== 0){
$main = "song";
}elseif(count($results["album"]) !== 0){
$main = "album";
}elseif(count($results["playlist"]) !== 0){
$main = "playlist";
}elseif(count($results["podcast"]) !== 0){
$main = "podcast";
}elseif(count($results["author"]) !== 0){
$main = "author";
}elseif(count($results["user"]) !== 0){
$main = "user";
}else{
// No results found!
echo
$frontend->drawerror(
"Nobody here but us chickens!",
'Have you tried:' .
'<ul>' .
'<li>Using a different scraper</li>' .
'<li>Using fewer keywords</li>' .
'<li>Defining broader filters (Is NSFW turned off?)</li>' .
'</ul>' .
'</div>'
);
die();
}
/*
Generate list of songs
*/
foreach($categories as $name => $data){
foreach($results[$name] as $item){
$greentext = [];
if(
isset($item["date"]) &&
$item["date"] !== null
){
$greentext[] = date("jS M y @ g:ia", $item["date"]);
}
if(
isset($item["views"]) &&
$item["views"] !== null
){
$views = number_format($item["views"]) . " views";
$greentext[] = $views;
}
if(
isset($item["followers"]) &&
$item["followers"] !== null
){
$greentext[] = number_format($item["followers"]) . " followers";
}
if(
isset($item["author"]["name"]) &&
$item["author"]["name"] !== null
){
$greentext[] = $item["author"]["name"];
}
$greentext = implode("", $greentext);
if(
isset($item["duration"]) &&
$item["duration"] !== null
){
$duration = $frontend->s_to_timestamp($item["duration"]);
}else{
$duration = null;
}
$tabindex = $name == $main ? true : false;
$customhtml = null;
if(
(
$name == "song" ||
$name == "podcast"
) &&
$item["stream"]["endpoint"] !== null
){
$customhtml =
'<audio src="/audio/' . $item["stream"]["endpoint"] . '?s=' . urlencode($item["stream"]["url"]) . '" controls autostart="false" preload="none">';
}
$categories[$name] .= $frontend->drawtextresult($item, $greentext, $duration, $get["s"], $tabindex, $customhtml);
}
}
$payload["left"] = $categories[$main];
// dont re-draw the category
unset($categories[$main]);
/*
Populate right handside
*/
$i = 1;
foreach($categories as $name => $value){
if($value == ""){
continue;
}
if($i % 2 === 1){
$write = "right-left";
}else{
$write = "right-right";
}
$payload[$write] .=
'<div class="answer-wrapper">' .
'<input id="answer' . $i . '" class="spoiler" type="checkbox">' .
'<div class="answer">' .
'<div class="answer-title">' .
'<a class="answer-title" href="?s=' . urlencode($get["s"]);
$payload[$write] .=
'&type=' . $name . '"><h2>' . ucfirst($name) . 's</h2></a>';
$payload[$write] .=
'</div>' .
$categories[$name] .
'</div>' .
'<label class="spoiler-button" for="answer' . $i . '"></label></div>';
$i++;
}
if($i !== 1){
$payload["class"] = " has-answer";
}
if($results["npt"] !== null){
$payload["left"] .=
'<a href="' . $frontend->htmlnextpage($get, $results["npt"], "music") . '" class="nextpage">Next page &gt;</a>';
}
echo $frontend->load("search.html", $payload);

86
src/news.php Normal file
View File

@ -0,0 +1,86 @@
<?php
/*
Initialize random shit
*/
include "data/config.php";
include "lib/frontend.php";
$frontend = new frontend();
[$scraper, $filters] = $frontend->getscraperfilters("news");
$get = $frontend->parsegetfilters($_GET, $filters);
/*
Captcha
*/
include "lib/bot_protection.php";
new bot_protection($frontend, $get, $filters, "news", true);
$payload = [
"timetaken" => microtime(true),
"class" => "",
"right-left" => "",
"right-right" => "",
"left" => ""
];
try{
$results = $scraper->news($get);
}catch(Exception $error){
$frontend->drawscrapererror($error->getMessage(), $get, "news", $payload["timetaken"]);
}
/*
Populate links
*/
if(count($results["news"]) === 0){
$payload["left"] =
'<div class="infobox">' .
"<h1>Nobody here but us chickens!</h1>" .
'Have you tried:' .
'<ul>' .
'<li>Using a different scraper</li>' .
'<li>Using fewer keywords</li>' .
'<li>Defining broader filters (Is NSFW turned off?)</li>' .
'</ul>' .
'</div>';
}
foreach($results["news"] as $news){
$greentext = [];
if($news["date"] !== null){
$greentext[] = date("jS M y @ g:ia", $news["date"]);
}
if($news["author"] !== null){
$greentext[] = htmlspecialchars($news["author"]);
}
if(count($greentext) !== 0){
$greentext = implode("", $greentext);
}else{
$greentext = null;
}
$n = null;
$payload["left"] .= $frontend->drawtextresult($news, $greentext, $n, $get["s"]);
}
if($results["npt"] !== null){
$payload["left"] .=
'<a href="' . $frontend->htmlnextpage($get, $results["npt"], "news") . '" class="nextpage">Next page &gt;</a>';
}
echo $frontend->load("search.html", $payload);

42
src/opensearch.php Normal file
View File

@ -0,0 +1,42 @@
<?php
header("Content-Type: application/xml");
include "data/config.php";
$domain =
htmlspecialchars(
(isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == "on" ? "https" : "http") .
'://' . $_SERVER["HTTP_HOST"]
);
if(
preg_match(
'/\.onion$/',
$domain
)
){
$onion = true;
}else{
$onion = false;
}
echo
'<?xml version="1.0" encoding="UTF-8"?>' .
'<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">' .
'<ShortName>' . htmlspecialchars(config::SERVER_NAME) . ($onion ? " (onion)" : "") . '</ShortName>' .
'<InputEncoding>UTF-8</InputEncoding>' .
'<Image width="16" height="16">' . $domain . '/favicon.ico</Image>' .
'<Url type="text/html" method="GET" template="' . $domain . '/web?s={searchTerms}"/>';
if(
isset($_GET["ac"]) &&
is_string($_GET["ac"]) &&
$_GET["ac"] != "disabled"
){
echo '<Url rel="suggestions" type="application/x-suggestions+json" template="' . $domain . '/api/v1/ac?s={searchTerms}&amp;scraper=' . htmlspecialchars($_GET["ac"]) . '"/>';
}
echo '</OpenSearchDescription>';

36
src/oracles/base.php Normal file
View File

@ -0,0 +1,36 @@
<?php
abstract class oracle {
// some info to spit out alongside the result, so the user knows
// what exactly is giving out the answer. prevents confusion
// about what oracle is answering them for ambiguous queries.
public $info = [
"name" => "some oracle"
];
// this function should take in a query string search from $_GET,
// and return a bool determining whether or not it is a question
// intended for the oracle.
public function check_query($q) {
return false;
}
// produce the correct answer for the query using the oracle.
// note: if it becomes apparent /during generation/ that the
// query is not in fact for the oracle, returning an empty
// string will kill the oracle pane.
// answer format: ["ans1 title" => "ans1", ...]
public function generate_response($q) {
return "";
}
}
// backwards compatibility
if (!function_exists('str_starts_with')) {
function str_starts_with($haystack, $needle) {
return strncmp($haystack, $needle, strlen($needle)) === 0;;
}
}
if (!function_exists('str_contains')) {
function str_contains($haystack, $needle) {
return strpos((string)$haystack, (string)$needle) !== false;
}
}
?>

165
src/oracles/calc.php Normal file
View File

@ -0,0 +1,165 @@
<?php
include_once("oracles/base.php");
class calculator extends oracle {
public $info = [
"name" => "calculator"
];
public function check_query($q) {
// straight numerics should go to that oracle
if (is_numeric($q)) {
return false;
}
// all chars should be number-y or operator-y
$char_whitelist = str_split("1234567890.+-/*^%() ");
foreach (str_split($q) as $char) {
if (!in_array($char, $char_whitelist)) {
return false;
}
}
return true;
}
// a custom parser and calculator because FUCK YUO, libraries are
// gay.
public function generate_response($q)
{
$nums = str_split("1234567890.");
$ops = str_split("+-/*^%;");
$grouping = str_split("()");
$q = str_replace(" ", "", $q);
// backstop for the parser so it catches the last
// numeric token
$q .= ";";
// the following comments refer to this example input:
// 21+9*(3+2^9)+1
// 2-length lists of the following patterns:
// ["n" (umeric), <some number>]
// ["o" (perator), "<some operator>"]
// ["g" (roup explicit), <"(" or ")">]
// e.g. [["n", 21], ["o", "+"], ["n", 9], ["o", *],
// ["g", "("], ["n", 3], ["o", "+"], ["n", 2],
// ["o", "^"], ["n", 9], ["g", ")"], ["o", "+"],
// ["n", "1"]]
$tokens = array();
$dragline = 0;
foreach(str_split($q) as $i=>$char) {
if (in_array($char, $nums)) {
continue;
}
elseif (in_array($char, $ops) || in_array($char, $grouping)) {
// hitting a non-numeric implies everything since the
// last hit has been part of a number
$capture = substr($q, $dragline, $i - $dragline);
// prevent the int cast from creating imaginary
// ["n", 0] tokens
if ($capture != "") {
if (substr_count($capture, ".") > 1) {
return "";
}
array_push($tokens, ["n", (float)$capture]);
}
// reset to one past the current (non-numeric) char
$dragline = $i + 1;
// the `;' backstop is not a real token and this should
// never be present in the token list
if ($char != ";") {
array_push($tokens, [
($char == "(" || $char == ")") ? "g" : "o",
$char
]);
}
}
else {
return "";
}
}
// two operators back to back should fail
for ($i = 1; $i < count($tokens); $i++) {
if ($tokens[$i][0] == "o" && $tokens[$i-1][0] == "o") {
return "";
}
}
// no implicit multiplication
for ($i = 0; $i < count($tokens) - 1; $i++) {
if ($tokens[$i][0] == "n" && $tokens[$i+1] == ["g", "("]) {
return "";
}
}
//strategy:
// traverse to group open (if there is one)
// - return to start with the internals
// traverse to ^, attack token previous and after
// same but for *, then / then + then -
// poppers all teh way down
try {
return [
substr($q, 0, strlen($q)-1)." = " => $this->executeBlock($tokens)[0][1]
];
}
catch (\Throwable $e) {
if (get_class($e) == "DivisionByZeroError") {
return [
$q." = " => "Division by Zero Error!!"
];
}
return "";
}
}
public function executeBlock($tokens) {
if (count($tokens) >= 2 && $tokens[0][0] == "o" && $tokens[0][1] == "-" && $tokens[1][0] == "n") {
array_splice($tokens, 0, 2, [["n", -1 * (float)$tokens[1][1]]]);
}
if (count($tokens) > 0 && $tokens[0][0] == "o" || $tokens[count($tokens)-1][0] == "o") {
throw new Exception("Error Processing Request", 1);
}
while (in_array(["g", "("], $tokens)) {
$first_open = array_search(["g", "("], $tokens);
$enclosedality = 1;
for ($i = $first_open+1; $i < count($tokens); $i++) {
if ($tokens[$i][0] == "g") {
$enclosedality += ($tokens[$i][1] == "(") ? 1 : -1;
}
if ($enclosedality == 0) {
array_splice($tokens,
$first_open,
$i+1 - $first_open,
$this->executeBlock(
array_slice($tokens, $first_open+1, $i-1 - $first_open)
)
);
break;
}
}
}
$operators_in_pemdas_order = [
"^" => (fn($x, $y) => $x ** $y),
"*" => (fn($x, $y) => $x * $y),
"/" => (fn($x, $y) => $x / $y),
"%" => (fn($x, $y) => $x % $y),
"+" => (fn($x, $y) => $x + $y),
"-" => (fn($x, $y) => $x - $y)
];
foreach ($operators_in_pemdas_order as $op=>$func) {
while (in_array(["o", $op], $tokens)) {
for ($i = 0; $i < count($tokens); $i++) {
if ($tokens[$i] == ["o", $op]) {
array_splice(
$tokens,
$i-1,
3,
[["n", (string)($func((float)$tokens[$i-1][1], (float)$tokens[$i+1][1]))]]
);
}
}
}
}
return $tokens;
}
}
?>

40
src/oracles/encoder.php Normal file
View File

@ -0,0 +1,40 @@
<?php
include_once("oracles/base.php");
class encoder extends oracle {
public $info = [
"name" => "text encoder/hasher"
];
private $special_types = [
"rot13",
"base64"
];
public function check_query($q) {
$types = array_merge($this->special_types, hash_algos());
foreach ($types as $type) {
$type .= " ";
if (str_starts_with($q, $type)) {
return true;
}
}
return false;
}
public function generate_response($q)
{
$type = explode(" ", $q)[0];
$victim = substr($q, strlen($type)+1);
if (in_array($type, hash_algos())) {
return [$type." hash" => hash($type, $victim)];
}
switch ($type) {
case "rot13":
return ["rot13 encoded" => str_rot13($victim)];
case "base64":
return [
"base64 encoded" => base64_encode($victim),
"base64 decoded" => base64_decode($victim)
];
}
return "";
}
}
?>

54
src/oracles/numerics.php Normal file
View File

@ -0,0 +1,54 @@
<?php
include_once("oracles/base.php");
class numerics extends oracle {
public $info = [
"name" => "numeric base conversion"
];
public function check_query($q) {
if (str_contains($q, " ")) {
return false;
}
$q = strtolower($q);
$profiles = [
["0x", str_split("0123456789abcdef")],
["", str_split("1234567890")],
["b", str_split("10")]
];
foreach ($profiles as $profile) {
$good = true;
$good &= str_starts_with($q, $profile[0]);
$nq = substr($q, strlen($profile[0]));
foreach (str_split($nq) as $c) {
$good &= in_array($c, $profile[1]);
}
if ($good) {
return true;
}
}
return false;
}
public function generate_response($q) {
$n = 0;
if (str_starts_with($q, "0x")) {
$nq = substr($q, strlen("0x"));
$n = hexdec($nq);
}
elseif (str_starts_with($q, "b")) {
$nq = substr($q, strlen("b"));
$n = bindec($nq);
}
else {
$n = (int)$q;
}
return [
"decimal (base 10)" => (string)$n,
"hexadecimal (base 16)" => "0x".(string)dechex($n),
"binary (base 2)" => "b".(string)decbin($n),
"" => "binary inputs should be prefixed with 'b', hex with '0x'."
];
}
}
?>

45
src/oracles/time.php Normal file
View File

@ -0,0 +1,45 @@
<?php
include_once("oracles/base.php");
class time extends oracle {
public $info = [
"name" => "what time is it?"
];
public function check_query($q) {
$prompts = [
"what", "time", "is", "it",
"right", "now", "the", "current",
"get", "date"
];
$q = str_replace(",", "", $q);
$q = str_replace("?", "", $q);
$q = str_replace("what's", "what is", $q);
$oq = $q;
$q = explode(" ", $q);
$count = 0;
foreach ($q as $word) {
if (in_array($word, $prompts)) {
$count++;
}
}
// remove one from total count if a timezone is specified
return ($count/(count($q) + (str_contains($oq, "tz:") ? -1 : 0))) > 3/4;
}
public function generate_response($q) {
$timezone = timezone_name_from_abbr("UTC");
foreach (explode(" ", $q) as $word) {
if (str_starts_with($word, "tz:")) {
$decltz = timezone_name_from_abbr(substr($word, 3, 3));
if ($decltz) {
$timezone = $decltz;
}
}
}
date_default_timezone_set($timezone);
return [
"The time in ".$timezone => date("H:i:s"),
" " => date("l, F jS"),
"" => "include the string \"tz:XXX\" to use timezone XXX"
];
}
}
?>

156
src/proxy.php Normal file
View File

@ -0,0 +1,156 @@
<?php
include "data/config.php";
include "lib/curlproxy.php";
$proxy = new proxy();
if(!isset($_GET["i"])){
header("X-Error: No URL(i) provided!");
$proxy->do404();
die();
}
try{
// original size request, stream file to browser
if(
!isset($_GET["s"]) ||
$_GET["s"] == "original"
){
$proxy->stream_linear_image($_GET["i"]);
die();
}
// bing request, ask bing to resize and stream to browser
$image = parse_url($_GET["i"]);
if(
isset($image["host"]) &&
preg_match(
'/^[A-z0-9.]*bing\.(net|com)$/i',
$image["host"]
)
){
if(
!isset($image["query"]) ||
!isset($image["path"]) ||
$image["path"] != "/th"
){
header("X-Error: Invalid bing image path");
$proxy->do404();
die();
}
parse_str($image["query"], $str);
if(!isset($str["id"])){
header("X-Error: Missing bing ID");
$proxy->do404();
die();
}
switch($_GET["s"]){
case "portrait": $req = "&w=50&h=90&p=0&qlt=90"; break;
case "landscape": $req = "&w=160&h=90&p=0&qlt=90"; break;
case "square": $req = "&w=90&h=90&p=0&qlt=90"; break;
case "thumb": $req = "&w=236&h=180&p=0&qlt=90"; break;
case "cover": $req = "&w=207&h=270&p=0&qlt=90"; break;
}
$proxy->stream_linear_image("https://" . $image["host"] . "/th?id=" . urlencode($str["id"]) . $req, "https://www.bing.com");
die();
}
// resize image ourselves
$payload = $proxy->get($_GET["i"], $proxy::req_image, true);
// get image format & set imagick
$image = null;
$format = $proxy->getimageformat($payload, $image);
try{
if($format !== false){
$image->setFormat($format);
}
$image->readImageBlob($payload["body"]);
$image_width = $image->getImageWidth();
$image_height = $image->getImageHeight();
switch($_GET["s"]){
case "portrait":
$width = 50;
$height = 90;
break;
case "landscape":
$width = 160;
$height = 90;
break;
case "square":
$width = 90;
$height = 90;
break;
case "thumb":
$width = 236;
$height = 180;
break;
case "cover":
$width = 207;
$height = 270;
break;
}
$ratio = $image_width / $image_height;
if($image_width > $width){
$image_width = $width;
$image_height = round($image_width / $ratio);
}
if($image_height > $height){
$ratio = $image_width / $image_height;
$image_height = $height;
$image_width = $image_height * $ratio;
}
$image->setImageBackgroundColor("#504945");
$image->setImageAlphaChannel(Imagick::ALPHACHANNEL_REMOVE);
$image->stripImage();
$image->setFormat("jpeg");
$image->setImageCompressionQuality(90);
$image->setImageCompression(Imagick::COMPRESSION_JPEG2000);
$image->resizeImage($image_width, $image_height, Imagick::FILTER_LANCZOS, 1);
$proxy->getfilenameheader($payload["headers"], $_GET["i"]);
header("Content-Type: image/jpeg");
echo $image->getImageBlob();
}catch(ImagickException $error){
header("X-Error: Could not convert the image: (" . $error->getMessage() . ")");
$proxy->do404();
}
}catch(Exception $error){
header("X-Error: " . $error->getMessage());
$proxy->do404();
die();
}

1877
src/scraper/brave.php Normal file

File diff suppressed because it is too large Load Diff

145
src/scraper/crowdview.php Normal file
View File

@ -0,0 +1,145 @@
<?php
class crowdview{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("crowdview");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [];
}
private function get($proxy, $url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
try{
$json = $this->get(
$proxy,
"https://crowdview-next-js.onrender.com/api/search-v3",
[
"query" => $search
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
$json = json_decode($json, true);
if($json === NULL){
throw new Exception("Failed to decode JSON");
}
foreach($json["results"] as $item){
$description = explode("<b>", $item["snippet"], 2);
$out["web"][] = [
"title" => $this->sanitize($item["title"]),
"description" => $this->sanitize($description[1]),
"url" => $item["link"],
"date" => strtotime($description[0]),
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
return $out;
}
private function sanitize($html){
return
trim(
$this->fuckhtml
->getTextContent(
html_entity_decode(
$html
)
),
". "
);
}
}

309
src/scraper/curlie.php Normal file
View File

@ -0,0 +1,309 @@
<?php
class curlie{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("curlie");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
if($page != "web"){
return [];
}
return [
"lang" => [
"display" => "Language",
"option" => [
"any" => "Any language",
"en" => "English",
"de" => "German",
"fr" => "French",
"ja" => "Japanese",
"it" => "Italian",
"es" => "Spanish",
"ru" => "Russian",
"nl" => "Dutch",
"pl" => "Polish",
"tr" => "Turkish",
"da" => "Danish",
"sv" => "Swedish",
"no" => "Norwegian",
"is" => "Icelandic",
"fo" => "Faroese",
"fi" => "Finnish",
"et" => "Estonian",
"lt" => "Lithuanian",
"lv" => "Latvian",
"cy" => "Welsh",
"ga" => "Irish",
"gd" => "Scottish Gaelic",
"br" => "Breton",
"fy" => "Frisian",
"frr" => "North Frisian",
"gem" => "Saterland Frisian",
"lb" => "Luxembourgish",
"rm" => "Romansh",
"pt" => "Portuguese",
"ca" => "Catalan",
"gl" => "Galician",
"eu" => "Basque",
"ast" => "Asturian",
"an" => "Aragonese",
"fur" => "Friulan",
"sc" => "Sardinian",
"scn" => "Sicilian",
"oc" => "Occitan",
"be" => "Belarusian",
"cs" => "Czech",
"hu" => "Hungarian",
"sk" => "Slovak",
"uk" => "Ukrainian",
"csb" => "Kashubian",
"tt" => "Tatar",
"ba" => "Bashkir",
"os" => "Ossetian",
"sl" => "Slovene",
"sr" => "Serbian",
"hr" => "Croatian",
"bs" => "Bosnian",
"bg" => "Bulgarian",
"sq" => "Albanian",
"ro" => "Romanian",
"mk" => "Macedonian",
"el" => "Greek",
"iw" => "Hebrew",
"fa" => "Persian",
"ar" => "Arabic",
"ku" => "Kurdish",
"az" => "Azerbaijani",
"hy" => "Armenian",
"af" => "Afrikaans",
"sw" => "Kiswahili",
"uz" => "Uzbek",
"kk" => "Kazakh",
"ky" => "Kyrgyz",
"tg" => "Tajik",
"tk" => "Turkmen",
"ug" => "Uyghurche",
"hi" => "Hindi",
"si" => "Sinhalese",
"gu" => "Gujarati",
"ur" => "Urdu",
"mr" => "Marathi",
"pa" => "Punjabi",
"bn" => "Bengali",
"ta" => "Tamil",
"te" => "Telugu",
"kn" => "Kannada",
"zh_CN" => "Chinese Simplified",
"zh_TW" => "Chinese Traditional",
"ko" => "Korean",
"cfr" => "Taiwanese",
"th" => "Thai",
"vi" => "Vietnamese",
"in" => "Indonesian",
"ms" => "Malay",
"tl" => "Tagalog",
"eo" => "Esperanto",
"ia" => "Interlingua",
"la" => "Latin"
]
]
];
}
private function get($proxy, $url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
if($get["npt"]){
[$query, $proxy] = $this->backend->get($get["npt"], "web");
try{
$html = $this->get(
$proxy,
"https://curlie.org/" . $query,
[]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}else{
$proxy = $this->backend->get_ip();
$query = [
"q" => $get["s"],
"start" => 0,
"stime" => 92452189 // ?
];
if($get["lang"] !== "any"){
$query["lang"] = $get["lang"];
}
try{
$html = $this->get(
$proxy,
"https://curlie.org/search",
$query
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}
$this->fuckhtml->load($html);
$nextpage =
$this->fuckhtml
->getElementsByClassName(
"next-page",
"a"
);
if(count($nextpage) !== 0){
$nextpage =
$this->backend->store(
$nextpage[0]["attributes"]["href"],
"web",
$proxy
);
}else{
$nextpage = null;
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => $nextpage,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
$items =
$this->fuckhtml
->getElementsByClassName(
"site-item",
"div"
);
foreach($items as $item){
$this->fuckhtml->load($item);
$a =
$this->fuckhtml
->getElementsByAttributeValue(
"target",
"_blank",
"a"
)[0];
$description =
$this->fuckhtml
->getElementsByClassName("site-descr");
if(count($description) !== 0){
$description =
$this->fuckhtml
->getTextContent(
$description[0]
);
}else{
$description = null;
}
$out["web"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$a
),
"description" => $description,
"url" =>
$this->fuckhtml
->getTextContent(
$a["attributes"]["href"]
),
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
return $out;
}
}

1967
src/scraper/ddg.php Normal file

File diff suppressed because it is too large Load Diff

820
src/scraper/facebook.php Normal file
View File

@ -0,0 +1,820 @@
<?php
class facebook{
const get = 0;
const post = 1;
public function __construct(){
include "lib/nextpage.php";
$this->nextpage = new nextpage("fb");
include "lib/proxy_pool.php";
$this->proxy = new proxy_pool("facebook");
}
public function getfilters($page){
return [
"sort" => [
"display" => "Sort by",
"option" => [
"relevance" => "Relevance",
"most_recent" => "Most recent"
]
],
"newer" => [
"display" => "Newer than",
"option" => "_DATE"
],
"older" => [
"display" => "Older than",
"option" => "_DATE"
],
"live" => [
"display" => "Livestream",
"option" => [
"no" => "No",
"yes" => "Yes"
]
]
];
}
private function get($url, $get = [], $reqtype = self::get){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
if($reqtype === self::get){
$headers = [
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"
];
$url .= "?" . $get;
}else{
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
$headers = [
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0",
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
"Content-Type: application/x-www-form-urlencoded",
"X-FB-Friendly-Name: SearchCometResultsPaginatedResultsQuery",
//"X-FB-LSD: AVptQC4a16c",
//"X-ASBD-ID: 129477",
"Content-Length: " . strlen($get),
"Origin: https://www.facebook.com",
"DNT: 1",
"Connection: keep-alive",
"Referer: https://www.facebook.com/watch/",
"Cookie: datr=__GMZCgwVF5BbyvAtfJojQwg; oo=v1%7C3%3A1691641171; wd=955x995",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-origin",
"TE: trailers"
];
curl_setopt($curlproc, CURLOPT_POST, true);
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
}
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->proxy->assign_proxy($curlproc);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function video($get){
$search = $get["s"];
$npt = $get["npt"];
$this->out = [
"status" => "ok",
"npt" => null,
"video" => [],
"author" => [],
"livestream" => [],
"playlist" => [],
"reel" => []
];
if($get["npt"]){
$nextpage =
json_decode(
$this->nextpage->get(
$npt,
"videos"
),
true
);
// parse next page
$this->video_nextpage($nextpage);
return $this->out;
}
// generate filter data
// {
// "rp_creation_time:0":"{\"name\":\"creation_time\",\"args\":\"{\\\"start_year\\\":\\\"2023\\\",\\\"start_month\\\":\\\"2023-08\\\",\\\"end_year\\\":\\\"2023\\\",\\\"end_month\\\":\\\"2023-08\\\",\\\"start_day\\\":\\\"2023-08-10\\\",\\\"end_day\\\":\\\"2023-08-10\\\"}\"}",
// "videos_sort_by:0":"{\"name\":\"videos_sort_by\",\"args\":\"Most Recent\"}",
// "videos_live:0":"{\"name\":\"videos_live\",\"args\":\"\"}"
// }
$filter = [];
$sort = $get["sort"];
$live = $get["live"];
$older = $get["older"];
$newer = $get["newer"];
if(
$older !== false ||
$newer !== false
){
if($older === false){
$older = time();
}
if($newer === false){
$newer = 0;
}
$filter["rp_creation_time:0"] =
json_encode(
[
"name" => "creation_time",
"args" =>
json_encode(
[
"start_year" => date("Y", $newer),
"start_month" => date("Y-m", $newer),
"end_year" => date("Y", $older),
"end_month" => date("Y-m", $older),
"start_day" => date("Y-m-d", $newer),
"end_day" => date("Y-m-d", $older)
]
)
]
);
}
if($sort != "relevance"){
$filter["videos_sort_by:0"] =
json_encode(
[
"name" => "videos_sort_by",
"args" => "Most Recent"
]
);
}
if($live != "no"){
$filter["videos_live:0"] = json_encode(
[
"name" => "videos_live",
"args" => ""
]
);
}
$req = [
"q" => $search
];
if(count($filter) !== 0){
$req["filters"] =
base64_encode(
json_encode(
$filter
)
);
}
/*
$html =
$this->get(
"https://www.facebook.com/watch/search/",
$req
);*/
$handle = fopen("scraper/facebook.html", "r");
$html = fread($handle, filesize("scraper/facebook.html"));
fclose($handle);
preg_match_all(
'/({"__bbox":.*,"sequence_number":0}})\]\]/',
$html,
$json
);
if(!isset($json[1][1])){
throw new Exception("Could not grep JSON body");
}
$json = json_decode($json[1][1], true);
foreach(
$json
["__bbox"]
["result"]
["data"]
["serpResponse"]
["results"]
["edges"]
as $result
){
$this->parse_edge($result);
}
// get nextpage data
if(
$json
["__bbox"]
["result"]
["data"]
["serpResponse"]
["results"]
["page_info"]
["has_next_page"]
== 1
){
preg_match(
'/handleWithCustomApplyEach\(ScheduledApplyEach,({.*})\);}\);}\);<\/script>/',
$html,
$nextpagedata
);
// [POST] https://www.facebook.com/api/graphql/
// FORM data, not JSON!
$nextpage = [
"av" => "0",
"__user" => null,
"__a" => null,
"__req" => "2",
"__hs" => null,
"dpr" => "1",
"__ccg" => null,
"__rev" => null,
// another client side token
"__s" => $this->randomstring(6) . ":" . $this->randomstring(6) . ":" . $this->randomstring(6),
"__hsi" => null,
// tracking fingerprint (probably generated using webgl)
"__dyn" => "7xeUmwlE7ibwKBWo2vwAxu13w8CewSwMwNw9G2S0im3y4o0B-q1ew65xO2O1Vw8G1Qw5Mx61vw9m1YwBgao6C0Mo5W3S7Udo5q4U2zxe2Gew9O222SUbEaU2eU5O0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w",
"__csr" => $this->randomstring(null),
"__comet_req" => null,
"lsd" => null,
"jazoest" => null,
"__spin_r" => null,
"__spin_b" => null,
"__spin_t" => null,
"fb_api_caller_class" => "RelayModern",
"fb_api_req_friendly_name" => "SearchCometResultsPaginatedResultsQuery",
"variables" => [ // this is json
"UFI2CommentsProvider_commentsKey" => "SearchCometResultsInitialResultsQuery",
"allow_streaming" => false,
"args" => [
"callsite" => "comet:watch_search",
"config" => [
"exact_match" => false,
"high_confidence_config" => null,
"intercept_config" => null,
"sts_disambiguation" => null,
"watch_config" => null
],
"context" => [
"bsid" => null,
"tsid" => null
],
"experience" => [
"encoded_server_defined_params" => null,
"fbid" => null,
"type" => "WATCH_TAB_GLOBAL"
],
"filters" => [],
"text" => $search
],
"count" => 5,
"cursor" =>
$json
["__bbox"]
["result"]
["data"]
["serpResponse"]
["results"]
["page_info"]
["end_cursor"],
"displayCommentsContextEnableComment" => false,
"displayCommentsContextIsAdPreview" => false,
"displayCommentsContextIsAggregatedShare" => false,
"displayCommentsContextIsStorySet" => false,
"displayCommentsFeedbackContext" => null,
"feedLocation" => "SEARCH",
"feedbackSource" => 23,
"fetch_filters" => true,
"focusCommentID" => null,
"locale" => null,
"privacySelectorRenderLocation" => "COMET_STREAM",
"renderLocation" => "search_results_page",
"scale" => 1,
"stream_initial_count" => 0,
"useDefaultActor" => false,
"__relay_internal__pv__IsWorkUserrelayprovider" => false,
"__relay_internal__pv__IsMergQAPollsrelayprovider" => false,
"__relay_internal__pv__StoriesArmadilloReplyEnabledrelayprovider" => false,
"__relay_internal__pv__StoriesRingrelayprovider" => false
],
"server_timestamps" => "true",
"doc_id" => "6761275837251607" // is actually dynamic
];
// append filters to nextpage
foreach($filter as $key => $value){
$nextpage["variables"]["args"]["filters"][] =
$value;
}
$nextpagedata = json_decode($nextpagedata[1], true);
// get bsid
foreach($nextpagedata["require"] as $key){
foreach($key as $innerkey){
if(is_array($innerkey)){
foreach($innerkey as $inner_innerkey){
if(is_array($inner_innerkey)){
foreach($inner_innerkey as $inner_inner_innerkey){
if(
isset(
$inner_inner_innerkey
["variables"]
["args"]
["context"]
["bsid"]
)
){
$nextpage
["variables"]
["args"]
["context"]
["bsid"] =
$inner_inner_innerkey
["variables"]
["args"]
["context"]
["bsid"];
}
}
}
}
}
}
}
foreach($nextpagedata["define"] as $key){
if(isset($key[2]["haste_session"])){
$nextpage["__hs"] = $key[2]["haste_session"];
}
if(isset($key[2]["connectionClass"])){
$nextpage["__ccg"] = $key[2]["connectionClass"];
}
if(isset($key[2]["__spin_r"])){
$nextpage["__spin_r"] = (string)$key[2]["__spin_r"];
}
if(isset($key[2]["hsi"])){
$nextpage["__hsi"] = (string)$key[2]["hsi"];
}
if(
isset($key[2]["token"]) &&
!empty($key[2]["token"])
){
$nextpage["lsd"] = $key[2]["token"];
}
if(isset($key[2]["__spin_r"])){
$nextpage["__spin_r"] = (string)$key[2]["__spin_r"];
$nextpage["__rev"] = $nextpage["__spin_r"];
}
if(isset($key[2]["__spin_b"])){
$nextpage["__spin_b"] = $key[2]["__spin_b"];
}
if(isset($key[2]["__spin_t"])){
$nextpage["__spin_t"] = (string)$key[2]["__spin_t"];
}
}
preg_match(
'/{"u":"\\\\\/ajax\\\\\/qm\\\\\/\?__a=([0-9]+)&__user=([0-9]+)&__comet_req=([0-9]+)&jazoest=([0-9]+)"/',
$html,
$ajaxparams
);
if(count($ajaxparams) !== 5){
throw new Exception("Could not grep the AJAX parameters");
}
$nextpage["__a"] = $ajaxparams[1];
$nextpage["__user"] = $ajaxparams[2];
$nextpage["__comet_req"] = $ajaxparams[3];
$nextpage["jazoest"] = $ajaxparams[4];
/*
$handle = fopen("scraper/facebook-nextpage.json", "r");
$json = fread($handle, filesize("scraper/facebook-nextpage.json"));
fclose($handle);*/
$nextpage["variables"] = json_encode($nextpage["variables"]);
$this->video_nextpage($nextpage);
}
return $this->out;
}
private function video_nextpage($nextpage, $getcursor = false){
$json =
$this->get(
"https://www.facebook.com/api/graphql/",
$nextpage,
self::post
);
$json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode next page JSON");
}
foreach(
$json
["data"]
["serpResponse"]
["results"]
["edges"]
as $result
){
$this->parse_edge($result);
}
if(
$json
["data"]
["serpResponse"]
["results"]
["page_info"]
["has_next_page"] == 1
){
$nextpage["variables"] = json_decode($nextpage["variables"], true);
$nextpage["variables"]["cursor"] =
$json
["data"]
["serpResponse"]
["results"]
["page_info"]
["end_cursor"];
$nextpage["variables"] = json_encode($nextpage["variables"]);
//change this for second call. after, it's static.
// TODO: csr also updates to longer string
$nextpage["__dyn"] = "7xeUmwlEnwn8K2WnFw9-2i5U4e0yoW3q322aew9G2S0zU20xi3y4o0B-q1ew65xOfxO1Vw8G11xmfz81s8hwGwQw9m1YwBgao6C2O0B85W3S7Udo5qfK0EUjwGzE2swwwJK2W2K0zK5o4q0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w";
// TODO: change this on third and 6th call
//$nextpage["__s"] = $this->randomstring(6) . ":" . explode(":", $nextpage["__s"], 2)[1];
$this->out["npt"] = $this->nextpage->store(json_encode($nextpage), "videos");
}
}
private function parse_edge($edge){
$append = "video";
$edge =
$edge
["relay_rendering_strategy"]
["view_model"];
if(
strtolower(
$edge
["video_metadata_model"]
["video_broadcast_status"]
)
== "live"
){
// handle livestream
$duration = "_LIVE";
$append = "livestream";
$timetext = null;
$views =
(int)$edge
["video_metadata_model"]
["relative_time_string"];
$url_prefix = "https://www.facebook.com/watch/live/?v=";
}elseif(
stripos(
$edge
["video_metadata_model"]
["video_broadcast_status"],
"vod"
) !== false
){
// handle VOD format
$timetext = null;
$views =
(int)$edge
["video_metadata_model"]
["relative_time_string"];
$duration =
$this->hms2int(
$edge
["video_thumbnail_model"]
["video_duration_text"]
);
$url_prefix = "https://www.facebook.com/watch/live/?v=";
}else{
// handle normal format
$timetext =
explode(
" · ",
$edge
["video_metadata_model"]
["relative_time_string"],
2
);
if(count($timetext) === 2){
$views = $this->truncatedcount2int($timetext[1]);
}else{
$views = null;
}
$timetext = strtotime($timetext[0]);
$duration =
$this->hms2int(
$edge
["video_thumbnail_model"]
["video_duration_text"]
);
$url_prefix = "https://www.facebook.com/watch/?v=";
}
if(
isset(
$edge
["video_metadata_model"]
["video_owner_profile"]
["uri_token"]
)
){
$profileurl =
"https://www.facebook.com/watch/" .
$edge
["video_metadata_model"]
["video_owner_profile"]
["uri_token"];
}else{
$profileurl =
$edge
["video_metadata_model"]
["video_owner_profile"]
["url"];
}
$this->out[$append][] = [
"title" =>
$this->limitstrlen(
str_replace(
"\n",
" ",
$edge
["video_metadata_model"]
["title"]
),
100
),
"description" =>
empty(
$edge
["video_metadata_model"]
["save_description"]
) ?
null :
str_replace(
"\n",
" ",
$this->limitstrlen(
$edge
["video_metadata_model"]
["save_description"]
)
),
"author" => [
"name" =>
$edge
["video_metadata_model"]
["video_owner_profile"]
["name"],
"url" => $profileurl,
"avatar" => null
],
"date" => $timetext,
"duration" => $duration,
"views" => $views,
"thumb" =>
[
"url" =>
$edge
["video_thumbnail_model"]
["thumbnail_image"]
["uri"],
"ratio" => "16:9"
],
"url" =>
$url_prefix .
$edge
["video_click_model"]
["click_metadata_model"]
["video_id"]
];
}
private function randomstring($len){
if($len === null){
$str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789-";
$len = rand(141, 145);
$c = 61;
}else{
$str = "abcdefghijklmnopqrstuvwxyz123456789";
$c = 34;
}
$out = null;
for($i=0; $i<$len; $i++){
$out .= $str[rand(0, $c)];
}
return $out;
}
private function limitstrlen($text, $len = 300){
return explode("\n", wordwrap($text, $len, "\n"))[0];
}
private function hms2int($time){
$parts = explode(":", $time, 3);
$time = 0;
if(count($parts) === 3){
// hours
$time = $time + ((int)$parts[0] * 3600);
array_shift($parts);
}
if(count($parts) === 2){
// minutes
$time = $time + ((int)$parts[0] * 60);
array_shift($parts);
}
// seconds
$time = $time + (int)$parts[0];
return $time;
}
private function truncatedcount2int($number){
// decimal should always be 1 number long
$number = explode(" ", $number, 2);
$number = $number[0];
$unit = strtolower($number[strlen($number) - 1]);
$tmp = explode(".", $number, 2);
$number = (int)$number;
if(count($tmp) === 2){
$decimal = (int)$tmp[1];
}else{
$decimal = 0;
}
switch($unit){
case "k":
$exponant = 1000;
break;
case "m":
$exponant = 1000000;
break;
case "b";
$exponant = 1000000000;
break;
default:
$exponant = 1;
break;
}
return ($number * $exponant) + ($decimal * ($exponant / 10));
}
}

262
src/scraper/fivehpx.php Normal file
View File

@ -0,0 +1,262 @@
<?php
class fivehpx{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("fivehpx");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [
"sort" => [
"display" => "Sort",
"option" => [
"relevance" => "Relevance",
"pulse" => "Pulse",
"newest" => "Newest"
]
]
];
}
private function get($proxy, $url, $get = [], $post_data = null){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
if($post_data === null){
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: same-origin",
"Sec-Fetch-User: ?1",
"Priority: u=0, i",
"TE: trailers"]
);
}else{
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Referer: https://500px.com/",
"content-type: application/json",
//"x-csrf-token: undefined",
"x-500px-source: Search",
"Content-Length: " . strlen($post_data),
"Origin: https://500px.com",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
// "Cookie: _pin_unauth, _fbp, _sharedID, _sharedID_cst",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"Priority: u=4",
"TE: trailers"]
);
// set post data
curl_setopt($curlproc, CURLOPT_POST, true);
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data);
}
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
// http2 bypass
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
if($get["npt"]){
[$pagination, $proxy] =
$this->backend->get(
$get["npt"], "images"
);
$pagination = json_decode($pagination, true);
$search = $pagination["search"];
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
$pagination = [
"sort" => strtoupper($get["sort"]),
"search" => $search,
"filters" => [],
"nlp" => false,
];
}
try{
$json =
$this->get(
$proxy,
"https://api.500px.com/graphql",
[],
json_encode([
"operationName" => "PhotoSearchPaginationContainerQuery",
"variables" => $pagination,
"query" =>
'query PhotoSearchPaginationContainerQuery(' .
(isset($pagination["cursor"]) ? '$cursor: String, ' : "") .
'$sort: PhotoSort, $search: String!, $filters: [PhotoSearchFilter!], $nlp: Boolean) { ...PhotoSearchPaginationContainer_query_1vzAZD} fragment PhotoSearchPaginationContainer_query_1vzAZD on Query { photoSearch(sort: $sort, first: 100, ' .
(isset($pagination["cursor"]) ? 'after: $cursor, ' : "") .
'search: $search, filters: $filters, nlp: $nlp) { edges { node { id legacyId canonicalPath name description width height images(sizes: [33, 36]) { size url id } } } totalCount pageInfo { endCursor hasNextPage } }}'
])
);
}catch(Exception $error){
throw new Exception("Failed to fetch graphQL object");
}
$json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode graphQL object");
}
if(isset($json["errors"][0]["message"])){
throw new Exception("500px returned an API error: " . $json["errors"][0]["message"]);
}
if(!isset($json["data"]["photoSearch"]["edges"])){
throw new Exception("No edges returned by API");
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
foreach($json["data"]["photoSearch"]["edges"] as $image){
$image = $image["node"];
$title =
trim(
$this->fuckhtml
->getTextContent(
$image["name"]
) . ": " .
$this->fuckhtml
->getTextContent(
$image["description"]
)
, " :"
);
$small = $this->image_ratio(600, $image["width"], $image["height"]);
$large = $this->image_ratio(2048, $image["width"], $image["height"]);
$out["image"][] = [
"title" => $title,
"source" => [
[
"url" => $image["images"][1]["url"],
"width" => $large[0],
"height" => $large[1]
],
[
"url" => $image["images"][0]["url"],
"width" => $small[0],
"height" => $small[1]
]
],
"url" => "https://500px.com" . $image["canonicalPath"]
];
}
// get NPT token
if($json["data"]["photoSearch"]["pageInfo"]["hasNextPage"] === true){
$out["npt"] =
$this->backend->store(
json_encode([
"cursor" => $json["data"]["photoSearch"]["pageInfo"]["endCursor"],
"search" => $search,
"sort" => $pagination["sort"],
"filters" => [],
"nlp" => false
]),
"images",
$proxy
);
}
return $out;
}
private function image_ratio($longest_edge, $width, $height){
$ratio = [
$longest_edge / $width,
$longest_edge / $height
];
if($ratio[0] < $ratio[1]){
$ratio = $ratio[0];
}else{
$ratio = $ratio[1];
}
return [
floor($width * $ratio),
floor($height * $ratio)
];
}
}

161
src/scraper/ftm.php Normal file
View File

@ -0,0 +1,161 @@
<?php
class ftm{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("ftm");
}
public function getfilters($page){
return [];
}
private function get($proxy, $url, $search, $offset){
$curlproc = curl_init();
curl_setopt($curlproc, CURLOPT_URL, $url);
$payload =
json_encode(
[
"search" => $search,
"offset" => $offset
]
);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Content-Length: " . strlen($payload),
"Content-Type: application/json",
"DNT: 1",
"Connection: keep-alive",
"Origin: https://findthatmeme.com",
"Referer: https://findthatmeme.com/?search=" . urlencode($search),
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1",
"X-Auth-Key: undefined",
"X-CSRF-Validation-Header: true"]
);
curl_setopt($curlproc, CURLOPT_POST, true);
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $payload);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
if($get["npt"]){
[$data, $proxy] = $this->backend->get($get["npt"], "images");
$data = json_decode($data, true);
$count = $data["count"];
$search = $data["search"];
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$count = 0;
$proxy = $this->backend->get_ip();
}
try{
$json =
json_decode(
$this->get(
$proxy,
"https://findthatmeme.com/api/v1/search",
$search,
$count
),
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
if($json === null){
throw new Exception("Failed to decode JSON");
}
foreach($json as $item){
$count++;
if($item["type"] == "VIDEO"){
$thumb = "thumb/" . $item["thumbnail"];
}else{
$thumb = $item["image_path"];
}
$out["image"][] = [
"title" => date("jS \of F Y @ g:ia", strtotime($item["created_at"])),
"source" => [
[
"url" =>
"https://s3.thehackerblog.com/findthatmeme/" .
$thumb,
"width" => null,
"height" => null
]
],
"url" => $item["source_page_url"]
];
}
$out["npt"] =
$this->backend->store(
json_encode([
"count" => $count,
"search" => $search
]),
"images",
$proxy
);
return $out;
}
}

320
src/scraper/ghostery.php Normal file
View File

@ -0,0 +1,320 @@
<?php
class ghostery{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("ghostery");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
if($page != "web"){
return [];
}
return [
"country" => [
"display" => "Country",
"option" => [
"any" => "All regions",
"AR" => "Argentina",
"AU" => "Australia",
"AT" => "Austria",
"BE" => "Belgium",
"BR" => "Brazil",
"CA" => "Canada",
"CL" => "Chile",
"DK" => "Denmark",
"FI" => "Finland",
"FR" => "France",
"DE" => "Germany",
"HK" => "Hong Kong",
"IN" => "India",
"ID" => "Indonesia",
"IT" => "Italy",
"JP" => "Japan",
"KR" => "Korea",
"MY" => "Malaysia",
"MX" => "Mexico",
"NL" => "Netherlands",
"NZ" => "New Zealand",
"NO" => "Norway",
"CN" => "People's Republic of China",
"PL" => "Poland",
"PT" => "Portugal",
"PH" => "Republic of the Philippines",
"RU" => "Russia",
"SA" => "Saudi Arabia",
"ZA" => "South Africa",
"ES" => "Spain",
"SE" => "Sweden",
"CH" => "Switzerland",
"TW" => "Taiwan",
"TR" => "Turkey",
"GB" => "United Kingdom",
"US" => "United States"
]
]
];
}
private function get($proxy, $url, $get = [], $country){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Referer: https://ghosterysearch.com",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Cookie: ctry=" . ($country == "any" ? "--" : $country) . "; noads=true",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: same-origin",
"Sec-Fetch-User: ?1",
"Priority: u=0, i"]
);
// http2 bypass
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
if($get["npt"]){
[$query, $proxy] = $this->backend->get($get["npt"], "web");
parse_str($query, $query);
// country
$country = $query["c"];
unset($query["c"]);
$query = http_build_query($query);
try{
$html =
$this->get(
$proxy,
"https://ghosterysearch.com/search?" . $query,
[],
$country
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}else{
$proxy = $this->backend->get_ip();
try{
$html =
$this->get(
$proxy,
"https://ghosterysearch.com/search",
[
"q" => $get["s"]
],
$get["country"]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
$this->fuckhtml->load($html);
$results_wrapper =
$this->fuckhtml
->getElementsByClassName(
"results",
"section"
);
if(count($results_wrapper) === 0){
throw new Exception("Failed to grep result section");
}
$this->fuckhtml->load($results_wrapper[0]);
// get search results
$results =
$this->fuckhtml
->getElementsByClassName(
"result",
"li"
);
if(count($results) === 0){
return $out;
}
foreach($results as $result){
$this->fuckhtml->load($result);
$a =
$this->fuckhtml
->getElementsByClassName(
"url",
"a"
);
if(count($a) === 0){
continue;
}
$a = $a[0];
$out["web"][] = [
"title" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByTagName(
"h2"
)[0]
)
),
"description" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByTagName(
"p"
)[0]
)
),
"url" =>
$this->fuckhtml
->getTextContent(
$a
["attributes"]
["href"]
),
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
$this->fuckhtml->load($html);
// get pagination token
$pagination_wrapper =
$this->fuckhtml
->getElementsByClassName(
"pagination",
"div"
);
if(count($pagination_wrapper) !== 0){
// found next page!
$this->fuckhtml->load($pagination_wrapper[0]);
$a =
$this->fuckhtml
->getElementsByTagName(
"a"
);
if(count($a) !== 0){
$q =
parse_url(
$this->fuckhtml
->getTextContent(
$a[count($a) - 1]
["attributes"]
["href"]
),
PHP_URL_QUERY
);
$out["npt"] =
$this->backend
->store(
$q . "&c=" . $get["country"],
"web",
$proxy
);
}
}
return $out;
}
private function titledots($title){
return trim($title, " .\t\n\r\0\x0B");
}
}

5049
src/scraper/google.php Normal file

File diff suppressed because it is too large Load Diff

1054
src/scraper/google_cse.php Normal file

File diff suppressed because it is too large Load Diff

435
src/scraper/greppr.php Normal file
View File

@ -0,0 +1,435 @@
<?php
class greppr{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("greppr");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [];
}
private function get($proxy, $url, $get = [], $cookie = false){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
if($cookie === false){
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
}else{
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Cookie: PHPSESSID=" . $cookie,
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
}
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$headers = [];
curl_setopt(
$curlproc,
CURLOPT_HEADERFUNCTION,
function($curlproc, $header) use (&$headers){
$len = strlen($header);
$header = explode(':', $header, 2);
if(count($header) < 2){
// ignore invalid headers
return $len;
}
$headers[strtolower(trim($header[0]))] = trim($header[1]);
return $len;
}
);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return [
"headers" => $headers,
"data" => $data
];
}
public function web($get, $first_attempt = true){
if($get["npt"]){
[$q, $proxy] = $this->backend->get($get["npt"], "web");
$q = json_decode($q, true);
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
}
// get token
// token[0] = static token that changes once a day
// token[1] = dynamic token that changes on every request
// token[1] = PHPSESSID cookie
$tokens = apcu_fetch("greppr_token");
if(
$tokens === false ||
$first_attempt === false // force token fetch
){
// we haven't gotten the token yet, get it
try{
$response =
$this->get(
$proxy,
"https://greppr.org",
[]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search tokens");
}
$tokens = $this->parse_token($response);
if($tokens === false){
throw new Exception("Failed to grep search tokens");
}
}
try{
if($get["npt"]){
$params = [
$tokens[0] => $q["q"],
"s" => $q["s"],
"l" => 30,
"n" => $tokens[1]
];
}else{
$params = [
$tokens[0] => $search,
"n" => $tokens[1]
];
}
$searchresults = $this->get(
$proxy,
"https://greppr.org/search",
$params,
$tokens[2]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
if(strlen($searchresults["data"]) === 0){
// redirected to main page, which means we got old token
// generate a new one
// ... unless we just tried to do that
if($first_attempt === false){
throw new Exception("Failed to get a new search token");
}
return $this->web($get, false);
}
// refresh the token with new data (this also triggers fuckhtml load)
$this->parse_token($searchresults, $tokens[2]);
// response object
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
// get results for later
$results =
$this->fuckhtml
->getElementsByClassName(
"result",
"div"
);
// check for next page
$next_elem =
$this->fuckhtml
->getElementsByClassName(
"pagination",
"ul"
);
if(count($next_elem) !== 0){
$this->fuckhtml->load($next_elem[0]);
$as =
$this->fuckhtml
->getElementsByClassName(
"page-link",
"a"
);
$break = false;
foreach($as as $a){
if($break === true){
parse_str(
$this->fuckhtml
->getTextContent(
$a["attributes"]["href"]
),
$values
);
$values = array_values($values);
$out["npt"] =
$this->backend->store(
json_encode(
[
"q" => $values[0],
"s" => $values[1]
]
),
"web",
$proxy
);
break;
}
if($a["attributes"]["href"] == "#"){
$break = true;
}
}
}
// scrape results
foreach($results as $result){
$this->fuckhtml->load($result);
$a =
$this->fuckhtml
->getElementsByTagName(
"a"
)[0];
$description =
$this->fuckhtml
->getElementsByClassName(
"highlightedDesc",
"p"
);
if(count($description) === 0){
$description = null;
}else{
$description =
$this->limitstrlen(
$this->fuckhtml
->getTextContent(
$description[0]
)
);
}
$date =
$this->fuckhtml
->getElementsByTagName(
"p"
);
$date =
strtotime(
explode(
":",
$this->fuckhtml
->getTextContent(
$date[count($date) - 1]["innerHTML"]
)
)[1]
);
$out["web"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$a["innerHTML"]
),
"description" => $description,
"url" =>
$this->fuckhtml
->getTextContent(
$a["attributes"]["href"]
),
"date" => $date,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
return $out;
}
private function parse_token($response, $cookie = false){
$this->fuckhtml->load($response["data"]);
$scripts =
$this->fuckhtml
->getElementsByTagName("script");
$found = false;
foreach($scripts as $script){
preg_match(
'/window\.location ?= ?\'\/search\?([^=]+).*&n=([0-9]+)/',
$script["innerHTML"],
$tokens
);
if(isset($tokens[1])){
$found = true;
break;
}
}
if($found === false){
return false;
}
$tokens = [
$tokens[1],
$tokens[2]
];
if($cookie !== false){
// we already specified a cookie, so use the one we have already
$tokens[] = $cookie;
apcu_store("greppr_token", $tokens);
return $tokens;
}
if(!isset($response["headers"]["set-cookie"])){
// server didn't send a cookie
return false;
}
// get cookie
preg_match(
'/PHPSESSID=([^;]+)/',
$response["headers"]["set-cookie"],
$cookie
);
if(!isset($cookie[1])){
// server sent an unexpected cookie
return false;
}
$tokens[] = $cookie[1];
apcu_store("greppr_token", $tokens);
return $tokens;
}
private function limitstrlen($text){
return explode("\n", wordwrap($text, 300, "\n"))[0];
}
}

258
src/scraper/imgur.php Normal file
View File

@ -0,0 +1,258 @@
<?php
class imgur{
public function __construct(){
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
include "lib/backend.php";
$this->backend = new backend("imgur");
}
public function getfilters($page){
return [
"sort" => [ // /score/
"display" => "Sort by",
"option" => [
"score" => "Highest scoring",
"relevance" => "Most relevant",
"time" => "Newest first"
]
],
"time" => [ // /score/day/
"display" => "Time posted",
"option" => [
"all" => "All time",
"day" => "Today",
"week" => "This week",
"month" => "This month",
"year" => "This year"
]
],
"format" => [ // q_type
"display" => "Format",
"option" => [
"any" => "Any format",
"jpg" => "JPG",
"png" => "PNG",
"gif" => "GIF",
"anigif" => "Animated GIF",
"album" => "Albums"
]
],
"size" => [ // q_size_px
"display" => "Size",
"option" => [
"any" => "Any size",
"small" => "Small (500px or less)",
"med" => "Medium (500px to 2000px)",
"big" => "Big (2000px to 5000px)",
"lrg" => "Large (5000px to 10000px)",
"huge" => "Huge (10000px and above)"
]
]
];
}
private function get($proxy, $url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?scrolled&" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Referer: https://imgur.com/search/",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-origin",
"TE: trailers",
"X-Requested-With: XMLHttpRequest"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
if($get["npt"]){
[$filter, $proxy] =
$this->backend->get(
$get["npt"],
"images"
);
$filter = json_decode($filter, true);
$search = $filter["s"];
unset($filter["s"]);
$sort = $filter["sort"];
unset($filter["sort"]);
$time = $filter["time"];
unset($filter["time"]);
$format = $filter["format"];
unset($filter["format"]);
$size = $filter["size"];
unset($filter["size"]);
$page = $filter["page"];
unset($filter["page"]);
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
$sort = $get["sort"];
$time = $get["time"];
$format = $get["format"];
$size = $get["size"];
$page = 0;
$filter = [
"q" => $search
];
if($format != "any"){
$filter["q_type"] = $format;
}
if($size != "any"){
$filter["q_size_px"] = $size;
$filter["q_size_is_mpx"] = "off";
}
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
try{
$html =
$this->get(
$proxy,
"https://imgur.com/search/$sort/$time/page/$page",
$filter
);
}catch(Exception $error){
throw new Exception("Failed to fetch HTML");
}
$this->fuckhtml->load($html);
$posts =
$this->fuckhtml
->getElementsByClassName(
"post",
"div"
);
foreach($posts as $post){
$this->fuckhtml->load($post);
$image =
$this->fuckhtml
->getElementsByTagName("img")[0];
$image_url = "https:" . substr($this->fuckhtml->getTextContent($image["attributes"]["src"]), 0, -5);
$out["image"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$image["attributes"]["alt"]
),
"source" => [
[
"url" => $image_url . ".jpg",
"width" => null,
"height" => null
],
[
"url" => $image_url . "m.jpg",
"width" => null,
"height" => null
]
],
"url" =>
"https://imgur.com" .
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"image-list-link",
"a"
)
[0]
["attributes"]
["href"]
)
];
}
if(isset($out["image"][0])){
// store nextpage
$filter["s"] = $search;
$filter["sort"] = $sort;
$filter["time"] = $time;
$filter["format"] = $format;
$filter["size"] = $size;
$filter["page"] = $page + 1;
$out["npt"] =
$this->backend->store(
json_encode($filter),
"images",
$proxy
);
}
return $out;
}
}

476
src/scraper/marginalia.php Normal file
View File

@ -0,0 +1,476 @@
<?php
class marginalia{
public function __construct(){
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
include "lib/backend.php";
$this->backend = new backend("marginalia");
}
public function getfilters($page){
if(config::MARGINALIA_API_KEY === null){
$base = [
"adtech" => [
"display" => "Reduce adtech",
"option" => [
"no" => "No",
"yes" => "Yes"
]
],
"recent" => [
"display" => "Recent results",
"option" => [
"no" => "No",
"yes" => "Yes"
]
],
"intitle" => [
"display" => "Search in title",
"option" => [
"no" => "No",
"yes" => "Yes"
]
]
];
}else{
$base = [];
}
return array_merge(
$base,
[
"format" => [
"display" => "Format",
"option" => [
"any" => "Any format",
"html5" => "html5",
"xhtml" => "xhtml",
"html123" => "html123"
]
],
"file" => [
"display" => "Filetype",
"option" => [
"any" => "Any filetype",
"nomedia" => "Deny media",
"media" => "Contains media",
"audio" => "Contains audio",
"video" => "Contains video",
"archive" => "Contains archive",
"document" => "Contains document"
]
],
"javascript" => [
"display" => "Javascript",
"option" => [
"any" => "Allow JS",
"deny" => "Deny JS",
"require" => "Require JS"
]
],
"trackers" => [
"display" => "Trackers",
"option" => [
"any" => "Allow trackers",
"deny" => "Deny trackers",
"require" => "Require trackers"
]
],
"cookies" => [
"display" => "Cookies",
"option" => [
"any" => "Allow cookies",
"deny" => "Deny cookies",
"require" => "Require cookies"
]
],
"affiliate" => [
"display" => "Affiliate links in body",
"option" => [
"any" => "Allow affiliate links",
"deny" => "Deny affiliate links",
"require" => "Require affiliate links"
]
]
]
);
}
private function get($proxy, $url, $get = []){
$headers = [
"User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"
];
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
$search = [$get["s"]];
if(strlen($get["s"]) === 0){
throw new Exception("Search term is empty!");
}
$format = $get["format"];
$file = $get["file"];
foreach(
[
"javascript" => $get["javascript"],
"trackers" => $get["trackers"],
"cookies" => $get["cookies"],
"affiliate" => $get["affiliate"]
]
as $key => $value
){
if($value == "any"){ continue; }
switch($key){
case "javascript": $str = "js:true"; break;
case "trackers": $str = "special:tracking"; break;
case "cookies": $str = "special:cookies"; break;
case "affiliate": $str = "special:affiliate"; break;
}
if($value == "deny"){
$str = "-" . $str;
}
$search[] = $str;
}
if($format != "any"){
$search[] = "format:$format";
}
switch($file){
case "any": break;
case "nomedia": $search[] = "-special:media"; break;
case "media": $search[] = "special:media"; break;
default:
$search[] = "file:$file";
}
$search = implode(" ", $search);
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
// API scraper
if(config::MARGINALIA_API_KEY !== null){
try{
$json =
$this->get(
$this->backend->get_ip(), // no nextpage
"https://api.marginalia-search.com/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search),
[
"count" => 20
]
);
}catch(Exception $error){
throw new Exception("Failed to get JSON");
}
if($json == "Slow down"){
throw new Exception("The API key used is rate limited. Please try again in a few minutes.");
}
$json = json_decode($json, true);
foreach($json["results"] as $result){
$out["web"][] = [
"title" => $result["title"],
"description" => str_replace("\n", " ", $result["description"]),
"url" => $result["url"],
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
return $out;
}
// HTML parser
$proxy = $this->backend->get_ip();
if($get["npt"]){
[$params, $proxy] =
$this->backend->get(
$get["npt"],
"web"
);
try{
$html =
$this->get(
$proxy,
"https://old-search.marginalia.nu/search?" . $params
);
}catch(Exception $error){
throw new Exception("Failed to get HTML");
}
}else{
$params = [
"query" => $search
];
foreach(["adtech", "recent", "intitle"] as $v){
if($get[$v] == "yes"){
switch($v){
case "adtech": $params["adtech"] = "reduce"; break;
case "recent": $params["recent"] = "recent"; break;
case "adtech": $params["searchTitle"] = "title"; break;
}
}
}
try{
$html =
$this->get(
$proxy,
"https://old-search.marginalia.nu/search",
$params
);
}catch(Exception $error){
throw new Exception("Failed to get HTML");
}
}
$this->fuckhtml->load($html);
$sections =
$this->fuckhtml
->getElementsByClassName(
"card search-result",
"section"
);
foreach($sections as $section){
$this->fuckhtml->load($section);
$title =
$this->fuckhtml
->getElementsByClassName(
"title",
"a"
)[0];
$description =
$this->fuckhtml
->getElementsByClassName(
"description",
"p"
);
if(count($description) !== 0){
$description =
$this->fuckhtml
->getTextContent(
$description[0]
);
}else{
$description = null;
}
$sublinks = [];
$sublink_html =
$this->fuckhtml
->getElementsByClassName("additional-results");
if(count($sublink_html) !== 0){
$this->fuckhtml->load($sublink_html[0]);
$links =
$this->fuckhtml
->getElementsByTagName("a");
foreach($links as $link){
$sublinks[] = [
"title" =>
$this->fuckhtml
->getTextContent(
$link
),
"date" => null,
"description" => null,
"url" =>
$this->fuckhtml
->getTextContent(
$link["attributes"]["href"]
)
];
}
}
$out["web"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$title
),
"description" => $description,
"url" =>
$this->fuckhtml
->getTextContent(
$title["attributes"]["href"]
),
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => $sublinks,
"table" => []
];
}
// get next page
$this->fuckhtml->load($html);
$pagination =
$this->fuckhtml
->getElementsByAttributeValue(
"aria-label",
"pagination",
"nav"
);
if(count($pagination) === 0){
// no pagination
return $out;
}
$this->fuckhtml->load($pagination[0]);
$pages =
$this->fuckhtml
->getElementsByClassName(
"page-link",
"a"
);
$found_current_page = false;
foreach($pages as $page){
if(
stripos(
$page["attributes"]["class"],
"active"
) !== false
){
$found_current_page = true;
continue;
}
if($found_current_page){
// we found current page index, and we iterated over
// the next page <a>
$out["npt"] =
$this->backend->store(
parse_url(
$page["attributes"]["href"],
PHP_URL_QUERY
),
"web",
$proxy
);
break;
}
}
return $out;
}
}

1174
src/scraper/mojeek.php Normal file

File diff suppressed because it is too large Load Diff

236
src/scraper/mwmbl.php Normal file
View File

@ -0,0 +1,236 @@
<?php
class mwmbl{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("mwmbl");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [];
}
private function get($proxy, $url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
// use http2
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Referer: https://beta.mwmbl.org/",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: same-origin",
"Priority: u=0, i",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
try{
$html = $this->get(
$this->backend->get_ip(), // no next page!
"https://beta.mwmbl.org/",
[
"q" => $search
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch HTML. If you're getting a timeout, make sure you have curl-impersonate setup.");
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
$this->fuckhtml->load($html);
$results =
$this->fuckhtml
->getElementsByClassName(
"result",
"li"
);
foreach($results as $result){
$this->fuckhtml->load($result);
$p =
$this->fuckhtml
->getElementsByTagName("p");
$sublinks = [];
$mores =
$this->fuckhtml
->getElementsByClassName(
"result-link-more",
"div"
);
foreach($mores as $more){
$this->fuckhtml->load($more);
$as =
$this->fuckhtml
->getElementsByClassName(
"more",
"a"
);
if(count($as) === 0){
// ?? invalid
continue;
}
$sublinks[] = [
"title" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"more-title",
"span"
)[0]
)
),
"description" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"more-extract",
"span"
)[0]
)
),
"url" =>
$this->fuckhtml
->getTextContent(
$as[0]
["attributes"]
["href"]
)
];
}
// reset
$this->fuckhtml->load($result);
$out["web"][] = [
"title" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"title",
$p
)[0]
)
),
"description" =>
$this->titledots(
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"extract",
$p
)[0]
)
),
"url" =>
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByTagName("a")
[0]
["attributes"]
["href"]
),
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => $sublinks,
"table" => []
];
}
return $out;
}
private function titledots($title){
return rtrim($title, "");
}
}

439
src/scraper/pinterest.php Normal file
View File

@ -0,0 +1,439 @@
<?php
class pinterest{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("pinterest");
}
public function getfilters($page){
return [];
}
private function get($proxy, $url, $get = [], &$cookies, $header_data_post = null){
$curlproc = curl_init();
if($header_data_post === null){
// handling GET
// extract cookies
$cookies_tmp = [];
curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
$length = strlen($header);
$header = explode(":", $header, 2);
if(trim(strtolower($header[0])) == "set-cookie"){
$cookie_tmp = explode("=", trim($header[1]), 2);
$cookies_tmp[trim($cookie_tmp[0])] =
explode(";", $cookie_tmp[1], 2)[0];
}
return $length;
});
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: application/json, text/javascript, */*, q=0.01",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Referer: https://ca.pinterest.com/",
"X-Requested-With: XMLHttpRequest",
"X-APP-VERSION: 78f8764",
"X-Pinterest-AppState: active",
"X-Pinterest-Source-Url: /",
"X-Pinterest-PWS-Handler: www/index.js",
"screen-dpr: 1",
"is-preload-enabled: 1",
"DNT: 1",
"Sec-GPC: 1",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-origin",
"Connection: keep-alive",
"Alt-Used: ca.pinterest.com",
"Priority: u=0",
"TE: trailers"]
);
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
}else{
// handling POST (pagination)
$get = http_build_query($get);
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: application/json, text/javascript, */*, q=0.01",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Content-Type: application/x-www-form-urlencoded",
"Content-Length: " . strlen($get),
"Referer: https://ca.pinterest.com/",
"X-Requested-With: XMLHttpRequest",
"X-APP-VERSION: 78f8764",
"X-CSRFToken: " . $cookies["csrf"],
"X-Pinterest-AppState: active",
"X-Pinterest-Source-Url: /search/pins/?rs=ac&len=2&q=" . urlencode($header_data_post) . "&eq=" . urlencode($header_data_post),
"X-Pinterest-PWS-Handler: www/search/[scope].js",
"screen-dpr: 1",
"is-preload-enabled: 1",
"Origin: https://ca.pinterest.com",
"DNT: 1",
"Sec-GPC: 1",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-origin",
"Connection: keep-alive",
"Alt-Used: ca.pinterest.com",
"Cookie: " . $cookies["cookie"],
"TE: trailers"]
);
curl_setopt($curlproc, CURLOPT_POST, true);
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
// http2 bypass
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
if($header_data_post === null){
if(!isset($cookies_tmp["csrftoken"])){
throw new Exception("Failed to grep CSRF token");
}
$cookies = "";
foreach($cookies_tmp as $cookie_name => $cookie_value){
$cookies .= $cookie_name . "=" . $cookie_value . "; ";
}
$cookies = [
"csrf" => $cookies_tmp["csrftoken"],
"cookie" => rtrim($cookies, " ;")
];
}
curl_close($curlproc);
return $data;
}
public function image($get){
if($get["npt"]){
[$data, $proxy] =
$this->backend->get(
$get["npt"], "images"
);
$data = json_decode($data, true);
$search = $data["q"];
$cookies = $data["cookies"];
try{
$json =
$this->get(
$proxy,
"https://ca.pinterest.com/resource/BaseSearchResource/get/",
[
"source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
"data" => json_encode(
[
"options" => [
"applied_unified_filters" => null,
"appliedProductFilters" => "---",
"article" => null,
"auto_correction_disabled" => false,
"corpus" => null,
"customized_rerank_type" => null,
"domains" => null,
"dynamicPageSizeExpGroup" => null,
"filters" => null,
"journey_depth" => null,
"page_size" => null,
"price_max" => null,
"price_min" => null,
"query_pin_sigs" => null,
"query" => $data["q"],
"redux_normalize_feed" => true,
"request_params" => null,
"rs" => "typed",
"scope" => "pins",
"selected_one_bar_modules" => null,
"source_id" => null,
"source_module_id" => null,
"source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
"top_pin_id" => null,
"top_pin_ids" => null,
"bookmarks" => [
$data["bookmark"]
]
],
"context" => []
],
JSON_UNESCAPED_SLASHES
)
],
$cookies,
$search
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
// https://ca.pinterest.com/resource/BaseSearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac&data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D&_=1736116313987
// source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac
// &data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D
// &_=1736116313987
$source_url = "/search/pins/?q=" . urlencode($search) . "&rs=" . urlencode($search);
$filter = [
"source_url" => $source_url,
"rs" => "typed",
"data" =>
json_encode(
[
"options" => [
"applied_unified_filters" => null,
"appliedProductFilters" => "---",
"article" => null,
"corpus" => null,
"customized_rerank_type" => null,
"domains" => null,
"dynamicPageSizeExpGroup" => null,
"filters" => null,
"journey_depth" => null,
"page_size" => null,
"price_max" => null,
"price_min" => null,
"query_pin_sigs" => null,
"query" => $search,
"redux_normalize_feed" => true,
"request_params" => null,
"rs" => "ac",
"scope" => "pins", // pins, boards, videos,
"selected_one_bar_modules" => null,
"source_id" => null,
"source_module_id" => null,
"source_url" => $source_url,
"top_pin_id" => null,
"top_pin_ids" => null
],
"context" => []
]
),
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
];
$proxy = $this->backend->get_ip();
$cookies = [];
try{
$json =
$this->get(
$proxy,
"https://ca.pinterest.com/resource/BaseSearchResource/get/",
$filter,
$cookies,
null
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
}
$json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
if(
!isset(
$json["resource_response"]
["status"]
)
){
throw new Exception("Unknown API failure");
}
if($json["resource_response"]["status"] != "success"){
$status = "Got non-OK response: " . $json["resource_response"]["status"];
if(
isset(
$json["resource_response"]["message"]
)
){
$status .= " - " . $json["resource_response"]["message"];
}
throw new Exception($status);
}
if(
isset(
$json["resource_response"]["sensitivity"]
["notices"][0]["description"]["text"]
)
){
throw new Exception(
"Pinterest returned a notice: " .
$json["resource_response"]["sensitivity"]["notices"][0]["description"]["text"]
);
}
// get NPT
if(isset($json["resource_response"]["bookmark"])){
$out["npt"] =
$this->backend->store(
json_encode([
"q" => $search,
"bookmark" => $json["resource_response"]["bookmark"],
"cookies" => $cookies
]),
"images",
$proxy
);
}
foreach(
$json
["resource_response"]
["data"]
["results"]
as $item
){
switch($item["type"]){
case "pin":
case "board":
/*
Handle image object
*/
$images = array_values($item["images"]);
$image = &$images[count($images) - 1]; // original
$thumb = &$images[1]; // 236x
$title = [];
if(
isset($item["grid_title"]) &&
trim($item["grid_title"]) != ""
){
$title[] = $item["grid_title"];
}
if(
isset($item["description"]) &&
trim($item["description"]) != ""
){
$title[] = $item["description"];
}
$title = implode(": ", $title);
if(
$title == "" &&
isset($item["board"]["name"]) &&
trim($item["board"]["name"]) != ""
){
$title = $item["board"]["name"];
}
if($title == ""){
$title = null;
}
$out["image"][] = [
"title" => $title,
"source" => [
[
"url" => $image["url"],
"width" => (int)$image["width"],
"height" => (int)$image["height"]
],
[
"url" => $thumb["url"],
"width" => (int)$thumb["width"],
"height" => (int)$thumb["height"]
]
],
"url" =>
$item["link"] === null ?
"https://ca.pinterest.com/pin/" . $item["id"] :
$item["link"]
];
break;
}
}
return $out;
}
}

937
src/scraper/qwant.php Normal file
View File

@ -0,0 +1,937 @@
<?php
class qwant{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("qwant");
}
public function getfilters($page){
$base = [
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"maybe" => "Maybe",
"no" => "No"
]
],
"country" => [
"display" => "Country",
"option" => [
"en_US" => "United States",
"fr_FR" => "France",
"en_GB" => "Great Britain",
"de_DE" => "Germany",
"it_IT" => "Italy",
"es_AR" => "Argentina",
"en_AU" => "Australia",
"es_ES" => "Spain (es)",
"ca_ES" => "Spain (ca)",
"cs_CZ" => "Czech Republic",
"ro_RO" => "Romania",
"el_GR" => "Greece",
"zh_CN" => "China",
"zh_HK" => "Hong Kong",
"en_NZ" => "New Zealand",
"fr_FR" => "France",
"th_TH" => "Thailand",
"ko_KR" => "South Korea",
"sv_SE" => "Sweden",
"nb_NO" => "Norway",
"da_DK" => "Denmark",
"hu_HU" => "Hungary",
"et_EE" => "Estonia",
"es_MX" => "Mexico",
"es_CL" => "Chile",
"en_CA" => "Canada (en)",
"fr_CA" => "Canada (fr)",
"en_MY" => "Malaysia",
"bg_BG" => "Bulgaria",
"fi_FI" => "Finland",
"pl_PL" => "Poland",
"nl_NL" => "Netherlands",
"pt_PT" => "Portugal",
"de_CH" => "Switzerland (de)",
"fr_CH" => "Switzerland (fr)",
"it_CH" => "Switzerland (it)",
"de_AT" => "Austria",
"fr_BE" => "Belgium (fr)",
"nl_BE" => "Belgium (nl)",
"en_IE" => "Ireland",
"he_IL" => "Israel"
]
]
];
switch($page){
case "web":
$base = array_merge(
$base,
[
"time" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"day" => "Past 24 hours",
"week" => "Past week",
"month" => "Past month"
]
],
"extendedsearch" => [
// no display, wont show in interface
"option" => [
"yes" => "Yes",
"no" => "No"
]
]
]
);
break;
case "images":
$base = array_merge(
$base,
[
"time" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"day" => "Past 24 hours",
"week" => "Past week",
"month" => "Past month"
]
],
"size" => [
"display" => "Size",
"option" => [
"any" => "Any size",
"large" => "Large",
"medium" => "Medium",
"small" => "Small"
]
],
"color" => [
"display" => "Color",
"option" => [
"any" => "Any color",
"coloronly" => "Color only",
"monochrome" => "Monochrome",
"black" => "Black",
"brown" => "Brown",
"gray" => "Gray",
"white" => "White",
"yellow" => "Yellow",
"orange" => "Orange",
"red" => "Red",
"pink" => "Pink",
"purple" => "Purple",
"blue" => "Blue",
"teal" => "Teal",
"green" => "Green"
]
],
"imagetype" => [
"display" => "Type",
"option" => [
"any" => "Any type",
"animatedgif" => "Animated GIF",
"photo" => "Photograph",
"transparent" => "Transparent"
]
],
"license" => [
"display" => "License",
"option" => [
"any" => "Any license",
"share" => "Non-commercial reproduction and sharing",
"sharecommercially" => "Reproduction and sharing",
"modify" => "Non-commercial reproduction, sharing and modification",
"modifycommercially" => "Reproduction, sharing and modification",
"public" => "Public domain"
]
]
]
);
break;
case "videos":
$base = array_merge(
$base,
[
"order" => [
"display" => "Order by",
"option" => [
"relevance" => "Relevance",
"views" => "Views",
"date" => "Most recent",
]
],
"source" => [
"display" => "Source",
"option" => [
"any" => "Any source",
"youtube" => "YouTube",
"dailymotion" => "Dailymotion",
]
]
]
);
break;
case "news":
$base = array_merge(
$base,
[
"time" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"hour" => "Less than 1 hour ago",
"day" => "Past 24 hours",
"week" => "Past week",
"month" => "Past month"
]
],
"order" => [
"display" => "Order by",
"option" => [
"relevance" => "Relevance",
"date" => "Most recent"
]
]
]
);
break;
}
return $base;
}
private function get($proxy, $url, $get = []){
$headers = [
"User-Agent: " . config::USER_AGENT,
"Accept: application/json, text/plain, */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Origin: https://www.qwant.com",
"Referer: https://www.qwant.com/",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"TE: trailers"
];
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
// Bypass HTTP/2 check
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
if($get["npt"]){
// get next page data
[$params, $proxy] = $this->backend->get($get["npt"], "web");
$params = json_decode($params, true);
}else{
// get _GET data instead
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
if(strlen($search) > 2048){
throw new Exception("Search term is too long!");
}
$proxy = $this->backend->get_ip();
$params = [
"q" => $search,
"freshness" => $get["time"],
"count" => 10,
"locale" => $get["country"],
"offset" => 0,
"device" => "desktop",
"tgp" => 3,
"safesearch" => 0,
"displayed" => "true"
];
switch($get["nsfw"]){
case "yes": $params["safesearch"] = 0; break;
case "maybe": $params["safesearch"] = 1; break;
case "no": $params["safesearch"] = 2; break;
}
}
/*
$handle = fopen("scraper/qwant_web.json", "r");
$json = fread($handle, filesize("scraper/qwant_web.json"));
fclose($handle);*/
try{
$json =
$this->get(
$proxy,
"https://fdn.qwant.com/v3/search/web",
$params
);
}catch(Exception $error){
throw new Exception("Could not fetch JSON");
}
$json = json_decode($json, true);
if($json === NULL){
throw new Exception("Failed to decode JSON");
}
if(isset($json["data"]["message"][0])){
throw new Exception("Server returned an error:\n" . $json["data"]["message"][0]);
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
if(
$json["status"] != "success" &&
$json["data"]["error_code"] === 5
){
// no results
return $out;
}
$this->detect_errors($json);
if(!isset($json["data"]["result"]["items"]["mainline"])){
throw new Exception("Server did not return a result object");
}
// data is OK, parse
// get instant answer
if(
$get["extendedsearch"] == "yes" &&
isset($json["data"]["result"]["items"]["sidebar"][0]["endpoint"])
){
try{
$answer =
$this->get(
$proxy,
"https://api.qwant.com/v3" .
$json["data"]["result"]["items"]["sidebar"][0]["endpoint"],
[]
);
$answer = json_decode($answer, true);
if(
$answer === null ||
$answer["status"] != "success" ||
$answer["data"]["result"] === null
){
throw new Exception();
}
// parse answer
$out["answer"][] = [
"title" => $answer["data"]["result"]["title"],
"description" => [
[
"type" => "text",
"value" => $this->trimdots($answer["data"]["result"]["description"])
]
],
"url" => $answer["data"]["result"]["url"],
"thumb" =>
$answer["data"]["result"]["thumbnail"]["landscape"] == null ?
null :
$this->unshitimage(
$answer["data"]["result"]["thumbnail"]["landscape"],
false
),
"table" => [],
"sublink" => []
];
}catch(Exception $error){
// do nothing in case of failure
}
}
// get word correction
if(isset($json["data"]["query"]["queryContext"]["alteredQuery"])){
$out["spelling"] = [
"type" => "including",
"using" => $json["data"]["query"]["queryContext"]["alteredQuery"],
"correction" => $json["data"]["query"]["queryContext"]["alterationOverrideQuery"]
];
}
// check for next page
if($json["data"]["result"]["lastPage"] === false){
$params["offset"] = $params["offset"] + 10;
$out["npt"] =
$this->backend->store(
json_encode($params),
"web",
$proxy
);
}
// parse results
foreach($json["data"]["result"]["items"]["mainline"] as $item){
switch($item["type"]){ // ignores ads
case "web":
$first_iteration = true;
foreach($item["items"] as $result){
if(isset($result["thumbnailUrl"])){
$thumb = [
"url" => $this->unshitimage($result["thumbnailUrl"]),
"ratio" => "16:9"
];
}else{
$thumb = [
"url" => null,
"ratio" => null
];
}
$sublinks = [];
if(isset($result["links"])){
foreach($result["links"] as $link){
$sublinks[] = [
"title" => $this->trimdots($link["title"]),
"date" => null,
"description" => isset($link["desc"]) ? $this->trimdots($link["desc"]) : null,
"url" => $link["url"]
];
}
}
// detect gibberish results
if(
$first_iteration &&
!isset($result["urlPingSuffix"])
){
throw new Exception("Qwant returned gibberish results");
}
$out["web"][] = [
"title" => $this->trimdots($result["title"]),
"description" => $this->trimdots($result["desc"]),
"url" => $result["url"],
"date" => null,
"type" => "web",
"thumb" => $thumb,
"sublink" => $sublinks,
"table" => []
];
$first_iteration = false;
}
break;
case "images":
foreach($item["items"] as $image){
$out["image"][] = [
"title" => $image["title"],
"source" => [
[
"url" => $image["media"],
"width" => (int)$image["width"],
"height" => (int)$image["height"]
],
[
"url" => $this->unshitimage($image["thumbnail"]),
"width" => $image["thumb_width"],
"height" => $image["thumb_height"]
]
],
"url" => $image["url"]
];
}
break;
case "videos":
foreach($item["items"] as $video){
$out["video"][] = [
"title" => $video["title"],
"description" => null,
"date" => (int)$video["date"],
"duration" => $video["duration"] === null ? null : $video["duration"] / 1000,
"views" => null,
"thumb" =>
$video["thumbnail"] === null ?
[
"url" => null,
"ratio" => null,
] :
[
"url" => $this->unshitimage($video["thumbnail"]),
"ratio" => "16:9",
],
"url" => $video["url"]
];
}
break;
case "related_searches":
foreach($item["items"] as $related){
$out["related"][] = $related["text"];
}
break;
}
}
return $out;
}
public function image($get){
if($get["npt"]){
[$params, $proxy] =
$this->backend->get(
$get["npt"],
"images"
);
$params = json_decode($params, true);
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
$params = [
"t" => "images",
"q" => $search,
"count" => 125,
"locale" => $get["country"],
"offset" => 0, // increment by 125
"device" => "desktop",
"tgp" => 3
];
if($get["time"] != "any"){
$params["freshness"] = $get["time"];
}
foreach(["size", "color", "imagetype", "license"] as $p){
if($get[$p] != "any"){
$params[$p] = $get[$p];
}
}
switch($get["nsfw"]){
case "yes": $params["safesearch"] = 0; break;
case "maybe": $params["safesearch"] = 1; break;
case "no": $params["safesearch"] = 2; break;
}
}
try{
$json = $this->get(
$proxy,
"https://api.qwant.com/v3/search/images",
$params,
);
}catch(Exception $err){
throw new Exception("Failed to get JSON");
}
/*
$handle = fopen("scraper/yandex.json", "r");
$json = fread($handle, filesize("scraper/yandex.json"));
fclose($handle);*/
$json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
$this->detect_errors($json);
if(isset($json["data"]["result"]["items"]["mainline"])){
throw new Exception("Qwant returned gibberish results");
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
if($json["data"]["result"]["lastPage"] === false){
$params["offset"] = $params["offset"] + 125;
$out["npt"] = $this->backend->store(
json_encode($params),
"images",
$proxy
);
}
foreach($json["data"]["result"]["items"] as $image){
$out["image"][] = [
"title" => $this->trimdots($image["title"]),
"source" => [
[
"url" => $image["media"],
"width" => $image["width"],
"height" => $image["height"]
],
[
"url" => $this->unshitimage($image["thumbnail"]),
"width" => $image["thumb_width"],
"height" => $image["thumb_height"]
]
],
"url" => $image["url"]
];
}
return $out;
}
public function video($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$params = [
"t" => "videos",
"q" => $search,
"count" => 50,
"locale" => $get["country"],
"offset" => 0, // dont implement pagination
"device" => "desktop",
"tgp" => 3
];
switch($get["nsfw"]){
case "yes": $params["safesearch"] = 0; break;
case "maybe": $params["safesearch"] = 1; break;
case "no": $params["safesearch"] = 2; break;
}
try{
$json =
$this->get(
$this->backend->get_ip(),
"https://api.qwant.com/v3/search/videos",
$params
);
}catch(Exception $error){
throw new Exception("Could not fetch JSON");
}
/*
$handle = fopen("scraper/yandex-video.json", "r");
$json = fread($handle, filesize("scraper/yandex-video.json"));
fclose($handle);
*/
$json = json_decode($json, true);
if($json === null){
throw new Exception("Could not parse JSON");
}
$this->detect_errors($json);
if(isset($json["data"]["result"]["items"]["mainline"])){
throw new Exception("Qwant returned gibberish results");
}
$out = [
"status" => "ok",
"npt" => null,
"video" => [],
"author" => [],
"livestream" => [],
"playlist" => [],
"reel" => []
];
foreach($json["data"]["result"]["items"] as $video){
if(empty($video["thumbnail"])){
$thumb = [
"url" => null,
"ratio" => null
];
}else{
$thumb = [
"url" => $this->unshitimage($video["thumbnail"], false),
"ratio" => "16:9"
];
}
$duration = (int)$video["duration"];
$out["video"][] = [
"title" => $video["title"],
"description" => $this->limitstrlen($video["desc"]),
"author" => [
"name" => $video["channel"],
"url" => null,
"avatar" => null
],
"date" => (int)$video["date"],
"duration" => $duration === 0 ? null : $duration,
"views" => null,
"thumb" => $thumb,
"url" => preg_replace("/\?syndication=.+/", "", $video["url"])
];
}
return $out;
}
public function news($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$params = [
"t" => "news",
"q" => $search,
"count" => 50,
"locale" => $get["country"],
"offset" => 0, // dont implement pagination
"device" => "desktop",
"tgp" => 3
];
switch($get["nsfw"]){
case "yes": $params["safesearch"] = 0; break;
case "maybe": $params["safesearch"] = 1; break;
case "no": $params["safesearch"] = 2; break;
}
try{
$json =
$this->get(
$this->backend->get_ip(),
"https://api.qwant.com/v3/search/news",
$params
);
}catch(Exception $error){
throw new Exception("Could not fetch JSON");
}
/*
$handle = fopen("scraper/yandex-video.json", "r");
$json = fread($handle, filesize("scraper/yandex-video.json"));
fclose($handle);
*/
$json = json_decode($json, true);
if($json === null){
throw new Exception("Could not parse JSON");
}
$this->detect_errors($json);
if(isset($json["data"]["result"]["items"]["mainline"])){
throw new Exception("Qwant returned gibberish results");
}
$out = [
"status" => "ok",
"npt" => null,
"news" => []
];
foreach($json["data"]["result"]["items"] as $news){
if(empty($news["media"][0]["pict_big"]["url"])){
$thumb = [
"url" => null,
"ratio" => null
];
}else{
$thumb = [
"url" => $this->unshitimage($news["media"][0]["pict_big"]["url"], false),
"ratio" => "16:9"
];
}
$out["news"][] = [
"title" => $news["title"],
"author" => $news["press_name"],
"description" => $this->trimdots($news["desc"]),
"date" => (int)$news["date"],
"thumb" => $thumb,
"url" => $news["url"]
];
}
return $out;
}
private function detect_errors($json){
if(
isset($json["status"]) &&
$json["status"] == "error"
){
if(isset($json["data"]["error_data"]["captchaUrl"])){
throw new Exception("Qwant returned a captcha");
}elseif(isset($json["data"]["error_data"]["error_code"])){
throw new Exception(
"Qwant returned an API error: " .
$json["data"]["error_data"]["error_code"]
);
}
throw new Exception("Qwant returned an API error");
}
}
private function limitstrlen($text){
return explode("\n", wordwrap($text, 300, "\n"))[0];
}
private function trimdots($text){
return trim($text, ". ");
}
private function unshitimage($url, $is_bing = true){
// https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0
parse_str(parse_url($url)["query"], $parts);
if($is_bing){
$parse = parse_url($parts["u"]);
parse_str($parse["query"], $parts);
return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]);
}
return $parts["u"];
}
}

512
src/scraper/sc.php Normal file
View File

@ -0,0 +1,512 @@
<?php
class sc{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("sc");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [
"type" => [
"display" => "Type",
"option" => [
"any" => "Any type",
"track" => "Tracks",
"author" => "People",
"album" => "Albums",
"playlist" => "Playlists",
"goplus" => "Go+ Tracks"
]
]
];
}
private function get($proxy, $url, $get = [], $web_req = false){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
// use http2
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
if($web_req === false){
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Referer: https://soundcloud.com/",
"Origin: https://soundcloud.com",
"DNT: 1",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"Priority: u=1"]
);
}else{
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: cross-site",
"Priority: u=1",
"TE: trailers"]
);
}
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function music($get, $last_attempt = false){
if($get["npt"]){
[$params, $proxy] = $this->backend->get($get["npt"], "music");
$params = json_decode($params, true);
$url = $params["url"];
unset($params["url"]);
}else{
// normal search:
// https://api-v2.soundcloud.com/search?q=freddie%20dredd&variant_ids=&facet=model&user_id=351062-302234-707916-795081&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
// soundcloud go+ search:
// https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&filter.content_tier=SUB_HIGH_TIER&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
// tracks search:
// https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
// users search:
// https://api-v2.soundcloud.com/search/users?q=freddie%20dredd&variant_ids=&facet=place&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
// albums search:
// https://api-v2.soundcloud.com/search/albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
// playlists search:
// https://api-v2.soundcloud.com/search/playlists_without_albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$type = $get["type"];
$proxy = $this->backend->get_ip();
$token = $this->get_token($proxy);
switch($type){
case "any":
$url = "https://api-v2.soundcloud.com/search";
$params = [
"q" => $search,
"variant_ids" => "",
"facet" => "model",
"client_id" => $token,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
"app_version" => 1713542117,
"app_locale" => "en"
];
break;
case "track":
$url = "https://api-v2.soundcloud.com/search/tracks";
$params = [
"q" => $search,
"variant_ids" => "",
"facet_genre" => "",
"client_id" => $token,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
"app_version" => 1713542117,
"app_locale" => "en"
];
break;
case "author":
$url = "https://api-v2.soundcloud.com/search/users";
$params = [
"q" => $search,
"variant_ids" => "",
"facet" => "place",
"client_id" => $token,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
"app_version" => 1713542117,
"app_locale" => "en"
];
break;
case "album":
$url = "https://api-v2.soundcloud.com/search/albums";
$params = [
"q" => $search,
"variant_ids" => "",
"facet" => "genre",
"client_id" => $token,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
"app_version" => 1713542117,
"app_locale" => "en"
];
break;
case "playlist":
$url = "https://api-v2.soundcloud.com/search/playlists_without_albums";
$params = [
"q" => $search,
"variant_ids" => "",
"facet" => "genre",
"client_id" => $token,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
"app_version" => 1713542117,
"app_locale" => "en"
];
break;
case "goplus":
$url = "https://api-v2.soundcloud.com/search/tracks";
$params = [
"q" => $search,
"variant_ids" => "",
"filter.content_tier" => "SUB_HIGH_TIER",
"facet" => "genre",
"client_id" => $token,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
"app_version" => 1713542117,
"app_locale" => "en"
];
break;
}
}
try{
$json = $this->get($proxy, $url, $params);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
/*
$handle = fopen("scraper/soundcloud.json", "r");
$json = fread($handle, filesize("scraper/soundcloud.json"));
fclose($handle);
*/
$json = json_decode($json, true);
if($json === null){
if($last_attempt === true){
throw new Exception("Fetched an invalid token (please report!!)");
}
// token might've expired, get a new one and re-try search
$this->get_token($proxy);
return $this->music($get, true);
}
$out = [
"status" => "ok",
"npt" => null,
"song" => [],
"playlist" => [],
"album" => [],
"podcast" => [],
"author" => [],
"user" => []
];
/*
Get next page
*/
if(isset($json["next_href"])){
$params["query_urn"] = $json["query_urn"];
$params["offset"] = $params["offset"] + 20;
$params["url"] = $url; // we will remove this later
$out["npt"] =
$this->backend->store(
json_encode($params),
"music",
$proxy
);
}
/*
Scrape items
*/
foreach($json["collection"] as $item){
switch($item["kind"]){
case "user":
// parse author
$out["author"][] = [
"title" => $item["username"],
"followers" => $item["followers_count"],
"description" => trim($item["track_count"] . " songs. " . $this->limitstrlen($item["description"])),
"thumb" => [
"url" => $item["avatar_url"],
"ratio" => "1:1"
],
"url" => $item["permalink_url"]
];
break;
case "playlist":
// parse playlist
$description = [];
$count = 0;
foreach($item["tracks"] as $song){
$count++;
if(!isset($song["title"])){
continue;
}
$description[] = $song["title"];
}
if(count($description) !== 0){
$description = trim($count . " songs. " . implode(", ", $description));
}else{
$description = "";
}
if(
isset($item["artwork_url"]) &&
!empty($item["artwork_url"])
){
$thumb = [
"ratio" => "1:1",
"url" => $item["artwork_url"]
];
}elseif(
isset($item["tracks"][0]["artwork_url"]) &&
!empty($item["tracks"][0]["artwork_url"])
){
$thumb = [
"ratio" => "1:1",
"url" => $item["tracks"][0]["artwork_url"]
];
}else{
$thumb = [
"ratio" => null,
"url" => null
];
}
$out["playlist"][] = [
"title" => $item["title"],
"description" => $this->limitstrlen($description),
"author" => [
"name" => $item["user"]["username"],
"url" => $item["user"]["permalink_url"],
"avatar" => $item["user"]["avatar_url"]
],
"thumb" => $thumb,
"date" => strtotime($item["created_at"]),
"duration" => $item["duration"] / 1000,
"url" => $item["permalink_url"]
];
break;
case "track":
if(stripos($item["monetization_model"], "TIER") === false){
$stream = [
"endpoint" => "sc",
"url" =>
$item["media"]["transcodings"][0]["url"] .
"?client_id=" . $token .
"&track_authorization=" .
$item["track_authorization"]
];
}else{
$stream = [
"endpoint" => null,
"url" => null
];
}
// parse track
$out["song"][] = [
"title" => $item["title"],
"description" => $item["description"] == "" ? null : $this->limitstrlen($item["description"]),
"url" => $item["permalink_url"],
"views" => $item["playback_count"],
"author" => [
"name" => $item["user"]["username"],
"url" => $item["user"]["permalink_url"],
"avatar" => $item["user"]["avatar_url"]
],
"thumb" => [
"ratio" => "1:1",
"url" => $item["artwork_url"]
],
"date" => strtotime($item["created_at"]),
"duration" => (int)$item["full_duration"] / 1000,
"stream" => $stream
];
break;
}
}
return $out;
}
public function get_token($proxy){
$token = apcu_fetch("sc_token");
if($token !== false){
return $token;
}
// search through all javascript components on the main page
try{
$html =
$this->get(
$proxy,
"https://soundcloud.com",
[],
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch front page");
}
$this->fuckhtml->load($html);
$scripts =
$this->fuckhtml
->getElementsByTagName(
"script"
);
foreach($scripts as $script){
if(
!isset($script["attributes"]["src"]) ||
strpos($script["attributes"]["src"], "sndcdn.com") === false
){
continue;
}
try{
$js =
$this->get(
$proxy,
$script["attributes"]["src"],
[]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search token");
}
preg_match(
'/client_id=([^"]+)/',
$js,
$token
);
if(isset($token[1])){
apcu_store("sc_token", $token[1]);
return $token[1];
break;
}
}
throw new Exception("Did not find a Soundcloud token in the Javascript blobs");
}
private function limitstrlen($text){
return
explode(
"\n",
wordwrap(
str_replace(
["\n\r", "\r\n", "\n", "\r"],
" ",
$text
),
300,
"\n"
),
2
)[0];
}
}

668
src/scraper/solofield.php Normal file
View File

@ -0,0 +1,668 @@
<?php
class solofield{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("solofield");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"no" => "No",
]
]
];
}
private function get($proxy, $url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Referer: https://solofield.net",
"DNT: 1",
"Connection: keep-alive",
"Cookie: cross-site-cookie=name; lno=35842050",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: same-origin",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
if($get["npt"]){
[$query, $proxy] = $this->backend->get($get["npt"], "web");
try{
$html =
$this->get(
$proxy,
"https://solofield.net/search?" . $query,
[]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}else{
$proxy = $this->backend->get_ip();
try{
$html =
$this->get(
$proxy,
"https://solofield.net/search",
[
"q" => $get["s"],
"ie" => "UTF-8",
"oe" => "UTF-8",
"hl" => "ja", // changing this doesnt do anything
"lr" => "lang_ja", // same here
//"ls" => "", // ??
"f" => ($get["nsfw"] == "yes" ? "off" : "on")
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
// check for errors and load the result div
if($this->error_and_load($html)){
return $out;
}
$items =
$this->fuckhtml
->getElementsByClassName(
"g0",
"li"
);
foreach($items as $item){
$this->fuckhtml->load($item);
$title_tag =
$this->fuckhtml
->getElementsByClassName(
"r",
"h3"
);
if(count($title_tag) === 0){
continue;
}
$this->fuckhtml->load($title_tag[0]);
$link =
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByTagName(
"a"
)[0]
["attributes"]
["href"]
);
$this->fuckhtml->load($item);
$thumb =
$this->fuckhtml
->getElementsByClassName(
"webshot",
"img"
);
if(count($thumb) !== 0){
$uri =
$this->fuckhtml
->getTextContent(
$thumb[0]
["attributes"]
["src"]
);
if(stripos($uri, "now_printing") === false){
$thumb = [
"ratio" => "1:1",
"url" =>
"https://solofield.net" .
$this->fuckhtml
->getTextContent(
$thumb[0]
["attributes"]
["src"]
)
];
}else{
$thumb = [
"ratio" => null,
"url" => null
];
}
}else{
$thumb = [
"ratio" => null,
"url" => null
];
}
$out["web"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$title_tag[0]
),
"description" =>
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"s",
"div"
)[0]
),
"url" => $link,
"date" => null,
"type" => "web",
"thumb" => $thumb,
"sublink" => [],
"table" => []
];
}
// get next page
$this->get_npt($html, $proxy, $out, "web");
return $out;
}
public function image($get){
// no pagination
$html =
$this->get(
$this->backend->get_ip(),
"https://solofield.net/isearch",
[
"q" => $get["s"],
"ie" => "UTF-8",
"oe" => "UTF-8",
"hl" => "ja", // changing this doesnt do anything
//"lr" => "lang_ja", // same here
"ls" => "", // ??
"f" => ($get["nsfw"] == "yes" ? "off" : "on")
]
);
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
// check for errors and load the result div
if($this->error_and_load($html)){
return $out;
}
$images =
$this->fuckhtml
->getElementsByTagName(
"li"
);
foreach($images as $image){
$this->fuckhtml->load($image);
$img =
$this->fuckhtml
->getElementsByTagName(
"img"
);
if(count($img) === 0){
// ?? invalid
continue;
}
$img = $img[0];
$size =
explode(
"x",
$this->fuckhtml
->getTextContent(
$image
),
2
);
$size = [
(int)trim($size[0]), // width
(int)trim($size[1]) // height
];
$out["image"][] = [
"title" => null,
"source" => [
[
"url" =>
"https://solofield.net/" .
$this->fuckhtml
->getTextContent(
$img["attributes"]["src"]
),
"width" => $size[0],
"height" => $size[1]
]
],
"url" =>
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByTagName(
"a"
)[0]
["attributes"]
["href"]
)
];
}
return $out;
}
public function video($get){
if($get["npt"]){
[$query, $proxy] = $this->backend->get($get["npt"], "videos");
try{
$html =
$this->get(
$proxy,
"https://solofield.net/vsearch?" . $query,
[]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}else{
$proxy = $this->backend->get_ip();
try{
$html =
$this->get(
$proxy,
"https://solofield.net/vsearch",
[
"q" => $get["s"],
"ie" => "UTF-8",
"oe" => "UTF-8",
"hl" => "ja", // changing this doesnt do anything
//"lr" => "lang_ja", // same here
"ls" => "", // ??
"f" => ($get["nsfw"] == "yes" ? "off" : "on")
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
}
$out = [
"status" => "ok",
"npt" => null,
"video" => [],
"author" => [],
"livestream" => [],
"playlist" => [],
"reel" => []
];
// check for errors and load the result div
if($this->error_and_load($html)){
return $out;
}
$items =
$this->fuckhtml
->getElementsByTagName(
"li"
);
foreach($items as $item){
$this->fuckhtml->load($item);
$as =
$this->fuckhtml
->getElementsByTagName(
"a"
);
if(count($as) === 0){
continue;
}
$thumb =
$this->fuckhtml
->getElementsByTagName(
"img"
);
if(count($thumb) !== 0){
$thumb = [
"ratio" => "16:9",
"url" =>
"https://solofield.net/" .
$thumb[0]
["attributes"]
["src"]
];
}else{
$thumb = [
"ratio" => null,
"url" => null
];
}
$date =
$this->fuckhtml
->getElementsByAttributeValue(
"style",
"font-size: 10px;",
"span"
);
if(count($date) !== 0){
$date =
$this->unfuckdate(
$this->fuckhtml
->getTextContent(
$date[0]
)
);
}else{
$date = null;
}
$center_td =
$this->fuckhtml
->getElementsByAttributeValue(
"align",
"center",
"td"
);
if(count($center_td) === 2){
$duration =
$this->fuckhtml
->getTextContent(
$this->hms2int(
$center_td[0]
)
);
}else{
$duration = null;
}
$out["video"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$as[1]
),
"description" => null,
"author" => [
"name" => null,
"url" => null,
"avatar" => null
],
"date" => $date,
"duration" => $duration,
"views" => null,
"thumb" => $thumb,
"url" =>
$this->fuckhtml
->getTextContent(
$as[0]
["attributes"]
["href"]
)
];
}
// get next page
$this->get_npt($html, $proxy, $out, "videos");
return $out;
}
private function get_npt($html, $proxy, &$out, $type){
// get next page
$this->fuckhtml->load($html);
$pjs =
$this->fuckhtml
->getElementById(
"pjs"
);
if($pjs){
$alnk =
$this->fuckhtml
->getElementsByClassName(
"alnk",
"span"
);
foreach($alnk as $lnk){
if(
stripos(
$this->fuckhtml
->getTextContent(
$lnk
),
"Next"
) !== false
){
$this->fuckhtml->load($lnk);
$out["npt"] =
$this->backend->store(
parse_url(
$this->fuckhtml
->getElementsByTagName(
"a"
)[0]
["attributes"]
["href"],
PHP_URL_QUERY
),
$type,
$proxy
);
}
}
}
}
private function error_and_load($html){
if(strlen($html) === 0){
throw new Exception("Solofield blocked the request IP");
}
$this->fuckhtml->load($html);
$list =
$this->fuckhtml
->getElementById(
"list",
"div"
);
if($list === false){
$nosearch =
$this->fuckhtml
->getElementById(
"nosearch",
"div"
);
if($nosearch){
return true;
}
throw new Exception("Failed to grep search list");
}
$this->fuckhtml->load($list);
return false;
}
private function unfuckdate($date){
return
strtotime(
rtrim(
preg_replace(
'/[^0-9]+/',
"-",
explode(
":",
$date,
2
)[1]
),
"-"
)
);
}
private function hms2int($time){
$parts = explode(":", $time, 3);
$time = 0;
if(count($parts) === 3){
// hours
$time = $time + ((int)$parts[0] * 3600);
array_shift($parts);
}
if(count($parts) === 2){
// minutes
$time = $time + ((int)$parts[0] * 60);
array_shift($parts);
}
// seconds
$time = $time + (int)$parts[0];
return $time;
}
}

726
src/scraper/spotify.php Normal file
View File

@ -0,0 +1,726 @@
<?php
class spotify{
private const req_web = 0;
private const req_api = 1;
private const req_clientid = 2;
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("spotify");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [
"category" => [
"display" => "Category",
"option" => [
"any" => "All (no pagination)",
"audiobooks" => "Audiobooks",
"tracks" => "Songs",
"artists" => "Artists",
"playlists" => "Playlists",
"albums" => "Albums",
"podcastAndEpisodes" => "Podcasts & Shows (no pagination)",
"episodes" => "Episodes",
"users" => "Profiles"
]
]
];
}
private function get($proxy, $url, $get = [], $reqtype = self::req_web, $bearer = null, $token = null){
$curlproc = curl_init();
switch($reqtype){
case self::req_api:
$headers = [
"User-Agent: " . config::USER_AGENT,
"Accept: application/json",
"Accept-Language: en",
"app-platform: WebPlayer",
"authorization: Bearer {$bearer}",
"client-token: {$token}",
"content-type: application/json;charset=UTF-8",
"Origin: https://open.spotify.com",
"Referer: https://open.spotify.com/",
"DNT: 1",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"spotify-app-version: 1.2.27.93.g7aee53d4",
"TE: trailers"
];
break;
case self::req_web:
$headers = [
"User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: cross-site"
];
break;
case self::req_clientid:
$get = json_encode($get);
curl_setopt($curlproc, CURLOPT_POST, true);
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
$headers = [
"User-Agent:" . config::USER_AGENT,
"Accept: application/json",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
"Referer: https://open.spotify.com/",
"content-type: application/json",
"Content-Length: " . strlen($get),
"Origin: https://open.spotify.com",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"TE: trailers"
];
break;
}
if($reqtype !== self::req_clientid){
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function music($get){
$search = $get["s"];
$ip = $this->backend->get_ip();
$category = $get["category"];
/*
audiobooks first and second page decoded
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAudiobooks&variables={"searchTerm":"freddie+dredd","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"8758e540afdba5afa3c5246817f6bd31d86a15b3f5666c363dd017030f35d785"}}
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAudiobooks&variables={"searchTerm":"freddie+dredd","offset":30,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"8758e540afdba5afa3c5246817f6bd31d86a15b3f5666c363dd017030f35d785"}}
*/
/*
songs
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchTracks&variables={"searchTerm":"asmr","offset":0,"limit":100,"numberOfTopResults":20,"includeAudiobooks":false}&extensions={"persistedQuery":{"version":1,"sha256Hash":"16c02d6304f5f721fc2eb39dacf2361a4543815112506a9c05c9e0bc9733a679"}}
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchTracks&variables={"searchTerm":"asmr","offset":100,"limit":100,"numberOfTopResults":20,"includeAudiobooks":false}&extensions={"persistedQuery":{"version":1,"sha256Hash":"16c02d6304f5f721fc2eb39dacf2361a4543815112506a9c05c9e0bc9733a679"}}
*/
/*
artists
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":30,"limit":23,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":53,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
*/
/*
playlists
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchPlaylists&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"19b4143a0500ccec189ca0f4a0316bc2c615ecb51ce993ba4d7d08afd1d87aa4"}}
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchPlaylists&variables={"searchTerm":"asmr","offset":30,"limit":3,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"19b4143a0500ccec189ca0f4a0316bc2c615ecb51ce993ba4d7d08afd1d87aa4"}}
*/
/*
albums
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAlbums&variables={"searchTerm":"asmr","offset":33,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"e93b13cda461482da2940467eb2beed9368e9bb2fff37df3fb6633fc61271a27"}}
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAlbums&variables={"searchTerm":"asmr","offset":33,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"e93b13cda461482da2940467eb2beed9368e9bb2fff37df3fb6633fc61271a27"}}
*/
/*
podcasts & shows (contains authors, no pagination)
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchFullEpisodes&variables={"searchTerm":"asmr","offset":0,"limit":30}&extensions={"persistedQuery":{"version":1,"sha256Hash":"9f996251c9781fabce63f1a9980b5287ea33bc5e8c8953d0c4689b09936067a1"}}
*/
/*
episodes
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchDesktop&variables={"searchTerm":"asmr","offset":0,"limit":10,"numberOfTopResults":5,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"da03293d92a2cfc5e24597dcdc652c0ad135e1c64a78fddbf1478a7e096bea44"}}
??? https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchFullEpisodes&variables={"searchTerm":"asmr","offset":60,"limit":30}&extensions={"persistedQuery":{"version":1,"sha256Hash":"9f996251c9781fabce63f1a9980b5287ea33bc5e8c8953d0c4689b09936067a1"}}
*/
/*
profiles
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchUsers&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"02026f48ab5001894e598904079b620ebc64f2d53b55ca20c3858abd3a46c5fb"}}
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchUsers&variables={"searchTerm":"asmr","offset":30,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"02026f48ab5001894e598904079b620ebc64f2d53b55ca20c3858abd3a46c5fb"}}
*/
// get HTML
try{
$html =
$this->get(
$ip,
"https://open.spotify.com/search/" .
rawurlencode($search) .
($category != "any" ? "/" . $category : ""),
[]
);
}catch(Exception $error){
throw new Exception("Failed to get initial search page");
}
// grep bearer and client ID
$this->fuckhtml->load($html);
$script =
$this->fuckhtml
->getElementById(
"session",
"script"
);
if($script === null){
throw new Exception("Failed to grep bearer token");
}
$script =
json_decode(
$script["innerHTML"],
true
);
$bearer = $script["accessToken"];
$client_id = $script["clientId"];
// hit client ID endpoint
try{
$token =
json_decode(
$this->get(
$ip,
"https://clienttoken.spotify.com/v1/clienttoken",
[ // !! that shit must be sent as json data
"client_data" => [
"client_id" => $client_id,
"client_version" => "1.2.27.93.g7aee53d4",
"js_sdk_data" => [
"device_brand" => "unknown",
"device_id" => "4c7ca20117ca12288ea8fc7118a9118c",
"device_model" => "unknown",
"device_name" => "computer",
"os" => "windows",
"os_version" => "NT 10.0"
]
]
],
self::req_clientid
),
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch token");
}
if($token === null){
throw new Exception("Failed to decode token");
}
$token = $token["granted_token"]["token"];
try{
switch($get["option"]){
case "any":
$variables = [
"searchTerm" => $search,
"offset" => 0,
"limit" => 10,
"numberOfTopResults" => 5,
"includeAudiobooks" => true
];
break;
case "audiobooks":
break;
}
$payload =
$this->get(
$ip,
"https://api-partner.spotify.com/pathfinder/v1/query",
[
"operationName" => "searchDesktop",
"variables" =>
json_encode(
[
"searchTerm" => $search,
"offset" => 0,
"limit" => 10,
"numberOfTopResults" => 5,
"includeAudiobooks" => true
]
),
"extensions" =>
json_encode(
[
"persistedQuery" => [
"version" => 1,
"sha256Hash" => "21969b655b795601fb2d2204a4243188e75fdc6d3520e7b9cd3f4db2aff9591e" // ?
]
]
)
],
self::req_api,
$bearer,
$token
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON results");
}
if($payload == "Token expired"){
throw new Exception("Grepped spotify token has expired");
}
$payload = json_decode($payload, true);
if($payload === null){
throw new Exception("Failed to decode JSON results");
}
//$payload = json_decode(file_get_contents("scraper/spotify.json"), true);
$out = [
"status" => "ok",
"npt" => null,
"song" => [],
"playlist" => [],
"album" => [],
"podcast" => [],
"author" => [],
"user" => []
];
// get songs
foreach($payload["data"]["searchV2"]["tracksV2"]["items"] as $result){
if(isset($result["item"])){
$result = $result["item"];
}
if(isset($result["data"])){
$result = $result["data"];
}
[$artist, $artist_link] = $this->get_artists($result["artists"]);
$out["song"][] = [
"title" => $result["name"],
"description" => null,
"url" => "https://open.spotify.com/track/" . $result["id"],
"views" => null,
"author" => [
"name" => $artist,
"url" => $artist_link,
"avatar" => null
],
"thumb" => $this->get_thumb($result["albumOfTrack"]["coverArt"]),
"date" => null,
"duration" => $result["duration"]["totalMilliseconds"] / 1000,
"stream" => [
"endpoint" => "spotify",
"url" => "track." . $result["id"]
]
];
}
// get playlists
foreach($payload["data"]["searchV2"]["playlists"]["items"] as $playlist){
if(isset($playlist["data"])){
$playlist = $playlist["data"];
}
$avatar = $this->get_thumb($playlist["ownerV2"]["data"]["avatar"]);
$out["playlist"][] = [
"title" => $playlist["name"],
"description" => null,
"author" => [
"name" => $playlist["ownerV2"]["data"]["name"],
"url" =>
"https://open.spotify.com/user/" .
explode(
":",
$playlist["ownerV2"]["data"]["uri"],
3
)[2],
"avatar" => $avatar["url"]
],
"thumb" => $this->get_thumb($playlist["images"]["items"][0]),
"date" => null,
"duration" => null,
"url" =>
"https://open.spotify.com/playlist/" .
explode(
":",
$playlist["uri"],
3
)[2]
];
}
// get albums
foreach($payload["data"]["searchV2"]["albums"]["items"] as $album){
if(isset($album["data"])){
$album = $album["data"];
}
[$artist, $artist_link] = $this->get_artists($album["artists"]);
$out["album"][] = [
"title" => $album["name"],
"description" => null,
"author" => [
"name" => $artist,
"url" => $artist_link,
"avatar" => null
],
"thumb" => $this->get_thumb($album["coverArt"]),
"date" => mktime(0, 0, 0, 0, 32, $album["date"]["year"]),
"duration" => null,
"url" =>
"https://open.spotify.com/album/" .
explode(
":",
$album["uri"],
3
)[2]
];
}
// get podcasts
foreach($payload["data"]["searchV2"]["podcasts"]["items"] as $podcast){
if(isset($podcast["data"])){
$podcast = $podcast["data"];
}
$description = [];
foreach($podcast["topics"]["items"] as $subject){
$description[] = $subject["title"];
}
$description = implode(", ", $description);
if($description == ""){
$description = null;
}
$out["podcast"][] = [
"title" => $podcast["name"],
"description" => $description,
"author" => [
"name" => $podcast["publisher"]["name"],
"url" => null,
"avatar" => null
],
"thumb" => $this->get_thumb($podcast["coverArt"]),
"date" => null,
"duration" => null,
"url" =>
"https://open.spotify.com/show/" .
explode(
":",
$podcast["uri"],
3
)[2],
"stream" => [
"endpoint" => null,
"url" => null
]
];
}
// get audio books (put in podcasts)
foreach($payload["data"]["searchV2"]["audiobooks"]["items"] as $podcast){
if(isset($podcast["data"])){
$podcast = $podcast["data"];
}
$description = [];
foreach($podcast["topics"]["items"] as $subject){
$description[] = $subject["title"];
}
$description = implode(", ", $description);
if($description == ""){
$description = null;
}
$authors = [];
foreach($podcast["authors"] as $author){
$authors[] = $author["name"];
}
$authors = implode(", ", $authors);
if($authors == ""){
$authors = null;
}
$uri =
explode(
":",
$podcast["uri"],
3
)[2];
$out["podcast"][] = [
"title" => $podcast["name"],
"description" => $description,
"author" => [
"name" => $authors,
"url" => null,
"avatar" => null
],
"thumb" => $this->get_thumb($podcast["coverArt"]),
"date" => strtotime($podcast["publishDate"]["isoString"]),
"duration" => null,
"url" => "https://open.spotify.com/show/" . $uri,
"stream" => [
"endpoint" => "spotify",
"url" => "episode." . $uri
]
];
}
// get episodes (and place them in podcasts)
foreach($payload["data"]["searchV2"]["episodes"]["items"] as $podcast){
if(isset($podcast["data"])){
$podcast = $podcast["data"];
}
$out["podcast"][] = [
"title" => $podcast["name"],
"description" => $this->limitstrlen($podcast["description"]),
"author" => [
"name" =>
isset(
$podcast["podcastV2"]["data"]["publisher"]["name"]
) ?
$podcast["podcastV2"]["data"]["publisher"]["name"]
: null,
"url" => null,
"avatar" => null
],
"thumb" => $this->get_thumb($podcast["coverArt"]),
"date" => strtotime($podcast["releaseDate"]["isoString"]),
"duration" => $podcast["duration"]["totalMilliseconds"] / 1000,
"url" =>
"https://open.spotify.com/show/" .
explode(
":",
$podcast["uri"],
3
)[2],
"stream" => [
"endpoint" => null,
"url" => null
]
];
}
// get authors
foreach($payload["data"]["searchV2"]["artists"]["items"] as $user){
if(isset($user["data"])){
$user = $user["data"];
}
$avatar = $this->get_thumb($user["visuals"]["avatarImage"]);
$out["author"][] = [
"title" =>
(
$user["profile"]["verified"] === true ?
"" : ""
) .
$user["profile"]["name"],
"followers" => null,
"description" => null,
"thumb" => $avatar,
"url" =>
"https://open.spotify.com/artist/" .
explode(
":",
$user["uri"],
3
)[2]
];
}
// get users
foreach($payload["data"]["searchV2"]["users"]["items"] as $user){
if(isset($user["data"])){
$user = $user["data"];
}
$avatar = $this->get_thumb($user["avatar"]);
$out["user"][] = [
"title" => $user["displayName"] . " (@{$user["id"]})",
"followers" => null,
"description" => null,
"thumb" => $avatar,
"url" => "https://open.spotify.com/user/" . $user["id"]
];
}
return $out;
}
private function get_artists($artists){
$artist_out = [];
foreach($artists["items"] as $artist){
$artist_out[] = $artist["profile"]["name"];
}
$artist_out =
implode(", ", $artist_out);
if($artist_out == ""){
return [null, null];
}
$artist_link =
$artist === null ?
null :
"https://open.spotify.com/artist/" .
explode(
":",
$artists["items"][0]["uri"]
)[2];
return [$artist_out, $artist_link];
}
private function get_thumb($cover){
$thumb_out = null;
if($cover !== null){
foreach($cover["sources"] as $thumb){
if(
$thumb_out === null ||
(int)$thumb["width"] > $thumb_out["width"]
){
$thumb_out = $thumb;
}
}
}
if($thumb_out === null){
return [
"url" => null,
"ratio" => null
];
}else{
return [
"url" => $thumb_out["url"],
"ratio" => "1:1"
];
}
}
private function limitstrlen($text){
return
explode(
"\n",
wordwrap(
str_replace(
["\n\r", "\r\n", "\n", "\r"],
" ",
$text
),
300,
"\n"
),
2
)[0];
}
}

1579
src/scraper/startpage.php Normal file

File diff suppressed because it is too large Load Diff

257
src/scraper/vsco.php Normal file
View File

@ -0,0 +1,257 @@
<?php
class vsco{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("vsco");
}
public function getfilters($page){
return [];
}
private function get($proxy, $url, $get = [], $bearer = null){
$curlproc = curl_init();
if($get !== []){
$get_tmp = http_build_query($get);
$url .= "?" . $get_tmp;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
if($bearer === null){
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: same-origin",
"Sec-Fetch-User: ?1",
"Priority: u=0, i",
"TE: trailers"]
);
}else{
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Language: en-US",
"Accept-Encoding: gzip",
"Referer: https://vsco.co/search/images/" . urlencode($get["query"]),
"authorization: Bearer " . $bearer,
"content-type: application/json",
"x-client-build: 1",
"x-client-platform: web",
"DNT: 1",
"Sec-GPC: 1",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-origin",
"Priority: u=0",
"TE: trailers"]
);
}
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
// http2 bypass
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
if($get["npt"]){
[$data, $proxy] =
$this->backend->get(
$get["npt"], "images"
);
$data = json_decode($data, true);
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
// get bearer token
try{
$html =
$this->get(
$proxy,
"https://vsco.co/feed"
);
}catch(Exception $error){
throw new Exception("Failed to fetch feed page");
}
preg_match(
'/"tkn":"([A-z0-9]+)"/',
$html,
$bearer
);
if(!isset($bearer[1])){
throw new Exception("Failed to grep bearer token");
}
$data = [
"pagination" => [
"query" => $search,
"page" => 0,
"size" => 100
],
"bearer" => $bearer[1]
];
}
try{
$json =
$this->get(
$proxy,
"https://vsco.co/api/2.0/search/images",
$data["pagination"],
$data["bearer"]
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
$json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
if(!isset($json["results"])){
throw new Exception("Failed to access results object");
}
foreach($json["results"] as $image){
$image_domain = parse_url("https://" . $image["responsive_url"], PHP_URL_HOST);
$thumbnail = explode($image_domain, $image["responsive_url"], 2)[1];
if(substr($thumbnail, 0, 3) != "/1/"){
$thumbnail =
preg_replace(
'/^\/[^\/]+/',
"",
$thumbnail
);
}
$thumbnail = "https://img.vsco.co/cdn-cgi/image/width=480,height=360" . $thumbnail;
$size =
$this->image_ratio(
(int)$image["dimensions"]["width"],
(int)$image["dimensions"]["height"]
);
$out["image"][] = [
"title" => $image["description"],
"source" => [
[
"url" => "https://" . $image["responsive_url"],
"width" => (int)$image["dimensions"]["width"],
"height" => (int)$image["dimensions"]["height"]
],
[
"url" => $thumbnail,
"width" => $size[0],
"height" => $size[1]
]
],
"url" => "https://" . $image["grid"]["domain"] . "/media/" . $image["imageId"]
];
}
// get NPT
$max_page = ceil($json["total"] / 100);
$data["pagination"]["page"]++;
if($max_page > $data["pagination"]["page"]){
$out["npt"] =
$this->backend->store(
json_encode($data),
"images",
$proxy
);
}
return $out;
}
private function image_ratio($width, $height){
$ratio = [
480 / $width,
360 / $height
];
if($ratio[0] < $ratio[1]){
$ratio = $ratio[0];
}else{
$ratio = $ratio[1];
}
return [
floor($width * $ratio),
floor($height * $ratio)
];
}
}

246
src/scraper/wiby.php Normal file
View File

@ -0,0 +1,246 @@
<?php
class wiby{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("wiby");
}
public function getfilters($page){
if($page != "web"){
return [];
}
return [
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"no" => "No"
]
],
"date" => [
"display" => "Time posted",
"option" => [
"any" => "Any time",
"day" => "Past day",
"week" => "Past week",
"month" => "Past month",
"year" => "Past year",
]
]
];
}
private function get($proxy, $url, $get = [], $nsfw){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Cookie: ws={$nsfw}",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
if($get["npt"]){
[$q, $proxy] = $this->backend->get($get["npt"], "web");
$q = json_decode($q, true);
$nsfw = $q["nsfw"];
unset($q["nsfw"]);
}else{
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
$date = $get["date"];
$nsfw = $get["nsfw"] == "yes" ? "0" : "1";
$search =
str_replace(
[
"!g",
"!gi",
"!gv",
"!gm",
"!b",
"!bi",
"!bv",
"!bm",
"!td",
"!tw",
"!tm",
"!ty",
"&g",
"&gi",
"&gv",
"&gm",
"&b",
"&bi",
"&bv",
"&bm",
"&td",
"&tw",
"&tm",
"&ty",
],
"",
$search
);
switch($date){
case "day": $search = "!td " . $search; break;
case "week": $search = "!tw " . $search; break;
case "month": $search = "!tm " . $search; break;
case "year": $search = "!ty " . $search; break;
}
$q = [
"q" => $search
];
}
try{
$html = $this->get(
$proxy,
"https://wiby.me/",
$q,
$nsfw
);
}catch(Exception $error){
throw new Exception("Failed to fetch search page");
}
preg_match(
'/<p class="pin"><blockquote>(?:<\/p>)?<br><a class="more" href="\/\?q=[^"]+&p=([0-9]+)">Find more\.\.\.<\/a><\/blockquote>/',
$html,
$nextpage
);
if(count($nextpage) === 0){
$nextpage = null;
}else{
$nextpage =
$this->backend->store(
json_encode([
"q" => $q["q"],
"p" => (int)$nextpage[1],
"nsfw" => $nsfw
]),
"web",
$proxy
);
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => $nextpage,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
preg_match_all(
'/<blockquote>[\s]*<a .* href="(.*)">(.*)<\/a>.*<p>(.*)<\/p>[\s]*<\/blockquote>/Ui',
$html,
$links
);
for($i=0; $i<count($links[0]); $i++){
$out["web"][] = [
"title" => $this->unescapehtml(trim($links[2][$i])),
"description" => $this->unescapehtml(trim(strip_tags($links[3][$i]), ".\n\r ")),
"url" => trim($links[1][$i]),
"date" => null,
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => [],
"table" => []
];
}
return $out;
}
private function unescapehtml($str){
return html_entity_decode(
str_replace(
[
"<br>",
"<br/>",
"</br>",
"<BR>",
"<BR/>",
"</BR>",
],
"\n",
$str
),
ENT_QUOTES | ENT_XML1, 'UTF-8'
);
}
}

1170
src/scraper/yandex.php Normal file

File diff suppressed because it is too large Load Diff

741
src/scraper/yep.php Normal file
View File

@ -0,0 +1,741 @@
<?php
class yep{
public function __construct(){
include "lib/backend.php";
$this->backend = new backend("yep");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [
"country" => [
"display" => "Country",
"option" => [
"all" => "All regions",
"af" => "Afghanistan",
"al" => "Albania",
"dz" => "Algeria",
"as" => "American Samoa",
"ad" => "Andorra",
"ao" => "Angola",
"ai" => "Anguilla",
"ag" => "Antigua and Barbuda",
"ar" => "Argentina",
"am" => "Armenia",
"aw" => "Aruba",
"au" => "Australia",
"at" => "Austria",
"az" => "Azerbaijan",
"bs" => "Bahamas",
"bh" => "Bahrain",
"bd" => "Bangladesh",
"bb" => "Barbados",
"by" => "Belarus",
"be" => "Belgium",
"bz" => "Belize",
"bj" => "Benin",
"bt" => "Bhutan",
"bo" => "Bolivia",
"ba" => "Bosnia and Herzegovina",
"bw" => "Botswana",
"br" => "Brazil",
"bn" => "Brunei Darussalam",
"bg" => "Bulgaria",
"bf" => "Burkina Faso",
"bi" => "Burundi",
"cv" => "Cabo Verde",
"kh" => "Cambodia",
"cm" => "Cameroon",
"ca" => "Canada",
"ky" => "Cayman Islands",
"cf" => "Central African Republic",
"td" => "Chad",
"cl" => "Chile",
"cn" => "China",
"co" => "Colombia",
"cg" => "Congo",
"cd" => "Congo, Democratic Republic",
"ck" => "Cook Islands",
"cr" => "Costa Rica",
"hr" => "Croatia",
"cu" => "Cuba",
"cy" => "Cyprus",
"cz" => "Czechia",
"ci" => "Côte d'Ivoire",
"dk" => "Denmark",
"dj" => "Djibouti",
"dm" => "Dominica",
"do" => "Dominican Republic",
"ec" => "Ecuador",
"eg" => "Egypt",
"sv" => "El Salvador",
"gq" => "Equatorial Guinea",
"ee" => "Estonia",
"et" => "Ethiopia",
"fo" => "Faroe Islands",
"fj" => "Fiji",
"fi" => "Finland",
"fr" => "France",
"gf" => "French Guiana",
"pf" => "French Polynesia",
"ga" => "Gabon",
"gm" => "Gambia",
"ge" => "Georgia",
"de" => "Germany",
"gh" => "Ghana",
"gi" => "Gibraltar",
"gr" => "Greece",
"gl" => "Greenland",
"gd" => "Grenada",
"gp" => "Guadeloupe",
"gu" => "Guam",
"gt" => "Guatemala",
"gg" => "Guernsey",
"gn" => "Guinea",
"gy" => "Guyana",
"ht" => "Haiti",
"hn" => "Honduras",
"hk" => "Hong Kong",
"hu" => "Hungary",
"is" => "Iceland",
"in" => "India",
"id" => "Indonesia",
"iq" => "Iraq",
"ie" => "Ireland",
"im" => "Isle of Man",
"il" => "Israel",
"it" => "Italy",
"jm" => "Jamaica",
"jp" => "Japan",
"je" => "Jersey",
"jo" => "Jordan",
"kz" => "Kazakhstan",
"ke" => "Kenya",
"ki" => "Kiribati",
"kw" => "Kuwait",
"kg" => "Kyrgyzstan",
"la" => "Lao People's Democratic Republic",
"lv" => "Latvia",
"lb" => "Lebanon",
"ls" => "Lesotho",
"ly" => "Libya",
"li" => "Liechtenstein",
"lt" => "Lithuania",
"lu" => "Luxembourg",
"mk" => "Macedonia",
"mg" => "Madagascar",
"mw" => "Malawi",
"my" => "Malaysia",
"mv" => "Maldives",
"ml" => "Mali",
"mt" => "Malta",
"mq" => "Martinique",
"mr" => "Mauritania",
"mu" => "Mauritius",
"yt" => "Mayotte",
"mx" => "Mexico",
"fm" => "Micronesia, Federated States of",
"md" => "Moldova",
"mc" => "Monaco",
"mn" => "Mongolia",
"me" => "Montenegro",
"ms" => "Montserrat",
"ma" => "Morocco",
"mz" => "Mozambique",
"mm" => "Myanmar",
"na" => "Namibia",
"nr" => "Nauru",
"np" => "Nepal",
"nl" => "Netherlands",
"nc" => "New Caledonia",
"nz" => "New Zealand",
"ni" => "Nicaragua",
"ne" => "Niger",
"ng" => "Nigeria",
"nu" => "Niue",
"no" => "Norway",
"om" => "Oman",
"pk" => "Pakistan",
"ps" => "Palestine, State of",
"pa" => "Panama",
"pg" => "Papua New Guinea",
"py" => "Paraguay",
"pe" => "Peru",
"ph" => "Philippines",
"pn" => "Pitcairn",
"pl" => "Poland",
"pt" => "Portugal",
"pr" => "Puerto Rico",
"qa" => "Qatar",
"ro" => "Romania",
"ru" => "Russian Federation",
"rw" => "Rwanda",
"re" => "Réunion",
"sh" => "Saint Helena",
"kn" => "Saint Kitts and Nevis",
"lc" => "Saint Lucia",
"vc" => "Saint Vincent and the Grenadines",
"ws" => "Samoa",
"sm" => "San Marino",
"st" => "Sao Tome and Principe",
"sa" => "Saudi Arabia",
"sn" => "Senegal",
"rs" => "Serbia",
"sc" => "Seychelles",
"sl" => "Sierra Leone",
"sg" => "Singapore",
"sk" => "Slovakia",
"si" => "Slovenia",
"sb" => "Solomon Islands",
"so" => "Somalia",
"kr" => "Sourth Korea",
"za" => "South Africa",
"es" => "Spain",
"lk" => "Sri Lanka",
"sr" => "Suriname",
"se" => "Sweden",
"ch" => "Switzerland",
"tw" => "Taiwan",
"tj" => "Tajikistan",
"tz" => "Tanzania",
"th" => "Thailand",
"tl" => "Timor-Leste",
"tg" => "Togo",
"tk" => "Tokelau",
"to" => "Tonga",
"tt" => "Trinidad and Tobago",
"tn" => "Tunisia",
"tr" => "Turkey",
"tm" => "Turkmenistan",
"ug" => "Uganda",
"ua" => "Ukraine",
"ae" => "United Arab Emirates",
"gb" => "United Kingdom",
"us" => "United States",
"uy" => "Uruguay",
"uz" => "Uzbekistan",
"vu" => "Vanuatu",
"ve" => "Venezuela",
"vn" => "Vietnam",
"vg" => "Virgin Islands, British",
"vi" => "Virgin Islands, U.S.",
"ye" => "Yemen",
"zm" => "Zambia",
"zw" => "Zimbabwe"
]
],
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"maybe" => "Maybe",
"no" => "No"
]
]
];
}
private function get($proxy, $url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
// use http2
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
// set ciphers
curl_setopt(
$curlproc,
CURLOPT_SSL_CIPHER_LIST,
"aes_128_gcm_sha_256,chacha20_poly1305_sha_256,aes_256_gcm_sha_384,ecdhe_ecdsa_aes_128_gcm_sha_256,ecdhe_rsa_aes_128_gcm_sha_256,ecdhe_ecdsa_chacha20_poly1305_sha_256,ecdhe_rsa_chacha20_poly1305_sha_256,ecdhe_ecdsa_aes_256_gcm_sha_384,ecdhe_rsa_aes_256_gcm_sha_384,ecdhe_ecdsa_aes_256_sha,ecdhe_ecdsa_aes_128_sha,ecdhe_rsa_aes_128_sha,ecdhe_rsa_aes_256_sha,rsa_aes_128_gcm_sha_256,rsa_aes_256_gcm_sha_384,rsa_aes_128_sha,rsa_aes_256_sha"
);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br, zstd",
"Referer: https://yep.com/",
"Origin: https://yep.com",
"DNT: 1",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site",
"Priority: u=4",
"TE: trailers"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function web($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$country = $get["country"];
$nsfw = $get["nsfw"];
switch($nsfw){
case "yes": $nsfw = "off"; break;
case "maybe": $nsfw = "moderate"; break;
case "no": $nsfw = "strict"; break;
}
$out = [
"status" => "ok",
"spelling" => [
"type" => "no_correction",
"using" => null,
"correction" => null
],
"npt" => null,
"answer" => [],
"web" => [],
"image" => [],
"video" => [],
"news" => [],
"related" => []
];
try{
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
$json =
$this->get(
$this->backend->get_ip(),
"https://api.yep.com/fs/2/search",
[
"client" => "web",
"gl" => $country == "all" ? $country : strtoupper($country),
"limit" => "99999",
"no_correct" => "false",
"q" => $search,
"safeSearch" => $nsfw,
"type" => "web"
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
$this->detect_cf($json);
$json = json_decode($json, true);
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
if(isset($json[1]["correction"])){
$out["spelling"] = [
"type" => "not_many",
"using" => $search,
"correction" => $json[1]["correction"][1]
];
}
if(isset($json[1]["results"])){
foreach($json[1]["results"] as $item){
switch(strtolower($item["type"])){
case "organic":
$sublinks = [];
if(isset($item["sitelinks"]["full"])){
foreach($item["sitelinks"]["full"] as $link){
$sublinks[] = [
"title" => $link["title"],
"date" => null,
"description" =>
$this->titledots(
strip_tags(
html_entity_decode(
$link["snippet"]
)
)
),
"url" => $link["url"]
];
}
}
$out["web"][] = [
"title" => $item["title"],
"description" =>
$this->titledots(
strip_tags(
html_entity_decode(
$item["snippet"]
)
)
),
"url" => $item["url"],
"date" => strtotime($item["first_seen"]),
"type" => "web",
"thumb" => [
"url" => null,
"ratio" => null
],
"sublink" => $sublinks,
"table" => []
];
break;
}
}
}
if(isset($json[1]["featured_news"])){
foreach($json[1]["featured_news"] as $news){
$out["news"][] = [
"title" => $news["title"],
"description" =>
$this->titledots(
strip_tags(
html_entity_decode(
$news["snippet"]
)
)
),
"date" => strtotime($news["first_seen"]),
"thumb" =>
isset($news["img"]) ?
[
"url" => $this->unshiturl($news["img"]),
"ratio" => "16:9"
] :
[
"url" => null,
"ratio" => null
],
"url" => $news["url"]
];
}
}
if(isset($json[1]["featured_images"])){
foreach($json[1]["featured_images"] as $image){
if(
$image["width"] !== 0 &&
$image["height"] !== 0
){
$thumb_width = $image["width"] >= 260 ? 260 : $image["width"];
$thumb_height = ceil($image["height"] * ($thumb_width / $image["width"]));
$width = $image["width"];
$height = $image["height"];
}else{
$thumb_width = null;
$thumb_height = null;
$width = null;
$height = null;
}
$out["image"][] = [
"title" => $image["title"],
"source" => [
[
"url" => $image["image_id"],
"width" => $width,
"height" => $height
],
[
"url" => $image["src"],
"width" => $thumb_width,
"height" => $thumb_height
]
],
"url" => $image["host_page"]
];
}
}
return $out;
}
public function image($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$country = $get["country"];
$nsfw = $get["nsfw"];
switch($nsfw){
case "yes": $nsfw = "off"; break;
case "maybe": $nsfw = "moderate"; break;
case "no": $nsfw = "strict"; break;
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
try{
$json =
$this->get(
$this->backend->get_ip(), // no nextpage!
"https://api.yep.com/fs/2/search",
[
"client" => "web",
"gl" => $country == "all" ? $country : strtoupper($country),
"no_correct" => "false",
"q" => $search,
"safeSearch" => $nsfw,
"type" => "images"
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
$this->detect_cf($json);
$json = json_decode($json, true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
if(isset($json[1]["results"])){
foreach($json[1]["results"] as $item){
if(
$item["width"] !== 0 &&
$item["height"] !== 0
){
$thumb_width = $item["width"] >= 260 ? 260 : $item["width"];
$thumb_height = ceil($item["height"] * ($thumb_width / $item["width"]));
$width = $item["width"];
$height = $item["height"];
}else{
$thumb_width = null;
$thumb_height = null;
$width = null;
$height = null;
}
$out["image"][] = [
"title" => $item["title"],
"source" => [
[
"url" => $item["image_id"],
"width" => $width,
"height" => $height
],
[
"url" => $item["src"],
"width" => $thumb_width,
"height" => $thumb_height
]
],
"url" => $item["host_page"]
];
}
}
return $out;
}
public function news($get){
$search = $get["s"];
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
}
$country = $get["country"];
$nsfw = $get["nsfw"];
switch($nsfw){
case "yes": $nsfw = "off"; break;
case "maybe": $nsfw = "moderate"; break;
case "no": $nsfw = "strict"; break;
}
$out = [
"status" => "ok",
"npt" => null,
"news" => []
];
try{
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
$json =
$this->get(
$this->backend->get_ip(),
"https://api.yep.com/fs/2/search",
[
"client" => "web",
"gl" => $country == "all" ? $country : strtoupper($country),
"limit" => "99999",
"no_correct" => "false",
"q" => $search,
"safeSearch" => $nsfw,
"type" => "news"
]
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
$this->detect_cf($json);
$json = json_decode($json, true);
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
if($json === null){
throw new Exception("Failed to decode JSON");
}
if(isset($json[1]["results"])){
foreach($json[1]["results"] as $item){
$out["news"][] = [
"title" => $item["title"],
"author" => null,
"description" =>
$this->titledots(
strip_tags(
html_entity_decode(
$item["snippet"]
)
)
),
"date" => strtotime($item["first_seen"]),
"thumb" =>
isset($item["img"]) ?
[
"url" => $this->unshiturl($item["img"]),
"ratio" => "16:9"
] :
[
"url" => null,
"ratio" => null
],
"url" => $item["url"]
];
}
}
return $out;
}
private function detect_cf($payload){
// detect cloudflare page
$this->fuckhtml->load($payload);
if(
count(
$this->fuckhtml
->getElementsByClassName(
"cf-wrapper",
"div"
)
) !== 0
){
throw new Exception("Blocked by Cloudflare. Please follow curl-impersonate installation instructions");
}
}
private function titledots($title){
$substr = substr($title, -4);
if(
strpos($substr, "...") !== false ||
strpos($substr, "") !== false
){
return trim(substr($title, 0, -4));
}
return trim($title);
}
private function unshiturl($url){
$newurl = parse_url($url, PHP_URL_QUERY);
parse_str($newurl, $newurl);
if(isset($newurl["url"])){
return $newurl["url"];
}
return $url;
}
}

1727
src/scraper/yt.php Normal file

File diff suppressed because it is too large Load Diff

573
src/settings.php Normal file
View File

@ -0,0 +1,573 @@
<?php
include "data/config.php";
/*
Define settings
*/
$settings = [
[
"name" => "General",
"settings" => [
[
"description" => "Allow NSFW content",
"parameter" => "nsfw",
"options" => [
[
"value" => "yes",
"text" => "Yes"
],
[
"value" => "maybe",
"text" => "Maybe"
],
[
"value" => "no",
"text" => "No"
]
]
],
[
"description" => "Theme",
"parameter" => "theme",
"options" => []
],
[
"description" => "Prevent clicking background elements when image viewer is open",
"parameter" => "bg_noclick",
"options" => [
[
"value" => "no",
"text" => "No"
],
[
"value" => "yes",
"text" => "Yes"
]
]
]
]
],
[
"name" => "Scrapers to use",
"settings" => [
[
"description" => "Autocomplete<br><i>Picking <span class=\"code-inline\">Auto</span> changes the source dynamically depending of the page's scraper<br><b>Warning:</b> If you edit this field, you will need to re-add the search engine so that the new autocomplete settings are applied!</i>",
"parameter" => "scraper_ac",
"options" => [
[
"value" => "disabled",
"text" => "Disabled"
],
[
"value" => "auto",
"text" => "Auto"
],
[
"value" => "brave",
"text" => "Brave"
],
[
"value" => "ddg",
"text" => "DuckDuckGo"
],
[
"value" => "yandex",
"text" => "Yandex"
],
[
"value" => "google",
"text" => "Google"
],
[
"value" => "startpage",
"text" => "Startpage"
],
[
"value" => "kagi",
"text" => "Kagi"
],
[
"value" => "qwant",
"text" => "Qwant"
],
[
"value" => "ghostery",
"text" => "Ghostery"
],
[
"value" => "yep",
"text" => "Yep"
],
[
"value" => "marginalia",
"text" => "Marginalia"
],
[
"value" => "yt",
"text" => "YouTube"
],
[
"value" => "sc",
"text" => "SoundCloud"
]
]
],
[
"description" => "Web",
"parameter" => "scraper_web",
"options" => [
[
"value" => "ddg",
"text" => "DuckDuckGo"
],
[
"value" => "brave",
"text" => "Brave"
],
[
"value" => "yandex",
"text" => "Yandex"
],
[
"value" => "google",
"text" => "Google"
],
[
"value" => "google_cse",
"text" => "Google CSE"
],
[
"value" => "startpage",
"text" => "Startpage"
],
[
"value" => "qwant",
"text" => "Qwant"
],
[
"value" => "ghostery",
"text" => "Ghostery"
],
[
"value" => "yep",
"text" => "Yep"
],
[
"value" => "greppr",
"text" => "Greppr"
],
[
"value" => "crowdview",
"text" => "Crowdview"
],
[
"value" => "mwmbl",
"text" => "Mwmbl"
],
[
"value" => "mojeek",
"text" => "Mojeek"
],
[
"value" => "solofield",
"text" => "Solofield"
],
[
"value" => "marginalia",
"text" => "Marginalia"
],
[
"value" => "wiby",
"text" => "wiby"
],
[
"value" => "curlie",
"text" => "Curlie"
]
]
],
[
"description" => "Images",
"parameter" => "scraper_images",
"options" => [
[
"value" => "ddg",
"text" => "DuckDuckGo"
],
[
"value" => "yandex",
"text" => "Yandex"
],
[
"value" => "brave",
"text" => "Brave"
],
[
"value" => "google",
"text" => "Google"
],
[
"value" => "google_cse",
"text" => "Google CSE"
],
[
"value" => "startpage",
"text" => "Startpage"
],
[
"value" => "qwant",
"text" => "Qwant"
],
[
"value" => "yep",
"text" => "Yep"
],
[
"value" => "solofield",
"text" => "Solofield"
],
[
"value" => "pinterest",
"text" => "Pinterest"
],
[
"value" => "fivehpx",
"text" => "500px"
],
[
"value" => "vsco",
"text" => "VSCO"
],
[
"value" => "imgur",
"text" => "Imgur"
],
[
"value" => "ftm",
"text" => "FindThatMeme"
]
]
],
[
"description" => "Videos",
"parameter" => "scraper_videos",
"options" => [
[
"value" => "yt",
"text" => "YouTube"
],
[
"value" => "ddg",
"text" => "DuckDuckGo"
],
[
"value" => "brave",
"text" => "Brave"
],
[
"value" => "yandex",
"text" => "Yandex"
],
[
"value" => "google",
"text" => "Google"
],
[
"value" => "startpage",
"text" => "Startpage"
],
[
"value" => "qwant",
"text" => "Qwant"
],
[
"value" => "solofield",
"text" => "Solofield"
]
]
],
[
"description" => "News",
"parameter" => "scraper_news",
"options" => [
[
"value" => "ddg",
"text" => "DuckDuckGo"
],
[
"value" => "brave",
"text" => "Brave"
],
[
"value" => "google",
"text" => "Google"
],
[
"value" => "startpage",
"text" => "Startpage"
],
[
"value" => "qwant",
"text" => "Qwant"
],
[
"value" => "yep",
"text" => "Yep"
],
[
"value" => "mojeek",
"text" => "Mojeek"
]
]
],
[
"description" => "Music",
"parameter" => "scraper_music",
"options" => [
[
"value" => "sc",
"text" => "SoundCloud"
]//,
//[
// "value" => "spotify",
// "text" => "Spotify"
//]
]
]
]
]
];
/*
Set theme collection
*/
$themes = glob("static/themes/*");
$settings[0]["settings"][1]["options"][] = [
"value" => "Dark",
"text" => "Dark"
];
foreach($themes as $theme){
$theme = explode(".", basename($theme))[0];
$settings[0]["settings"][1]["options"][] = [
"value" => $theme,
"text" => $theme
];
}
/*
Set cookies
*/
if($_POST){
$loop = &$_POST;
}elseif(count($_GET) !== 0){
// redirect user to front page
$loop = &$_GET;
header("Location: /");
}else{
// refresh cookie dates
$loop = &$_COOKIE;
}
foreach($loop as $key => $value){
if($key == "theme"){
if($value == config::DEFAULT_THEME){
unset($_COOKIE[$key]);
setcookie(
"theme",
"",
[
"expires" => -1, // removes cookie
"samesite" => "Lax",
"path" => "/"
]
);
continue;
}
}else{
foreach($settings as $title){
foreach($title["settings"] as $list){
if(
$list["parameter"] == $key &&
$list["options"][0]["value"] == $value
){
unset($_COOKIE[$key]);
setcookie(
$key,
"",
[
"expires" => -1, // removes cookie
"samesite" => "Lax",
"path" => "/"
]
);
continue 3;
}
}
}
}
if(!is_string($value)){
continue;
}
$key = trim($key);
$value = trim($value);
$_COOKIE[$key] = $value;
setcookie(
$key,
$value,
[
"expires" => strtotime("+400 days"), // maximal cookie ttl in chrome
"samesite" => "Lax",
"path" => "/"
]
);
}
include "lib/frontend.php";
$frontend = new frontend();
echo
$frontend->load(
"header_nofilters.html",
[
"title" => "Settings",
"class" => ""
]
);
$left =
'<h1>Settings</h1>' .
'<form method="post" autocomplete="off">' .
'By clicking <div class="code-inline">Update settings!</div>, a plaintext <div class="code-inline">key=value</div> cookie will be stored on your browser. When selecting a default setting, the parameter is removed from your cookies.';
$c = count($_COOKIE);
$code = "";
if($c !== 0){
$left .=
'<br><br>Your current cookie looks like this:' .
'<div class="code">';
$ca = 0;
foreach($_COOKIE as $key => $value){
$code .= $key . "=" . $value;
$ca++;
if($ca !== $c){
$code .= "; ";
}
}
$left .= $frontend->highlightcode($code);
$left .= '</div>';
}else{
$left .=
'<br><br>You currently don\'t have any cookies set.';
}
$left .=
'<div class="settings">';
foreach($settings as $title){
$left .= '<h2>' . $title["name"] . '</h2>';
foreach($title["settings"] as $setting){
$left .=
'<div class="setting">' .
'<div class="title">' . $setting["description"] . '</div>' .
'<select name="' . $setting["parameter"] . '">';
if($setting["parameter"] == "theme"){
if(!isset($_COOKIE["theme"])){
$_COOKIE["theme"] = config::DEFAULT_THEME;
}
}
foreach($setting["options"] as $option){
$left .=
'<option value="' . $option["value"] . '"';
if(
isset($_COOKIE[$setting["parameter"]]) &&
$_COOKIE[$setting["parameter"]] == $option["value"]
){
$left .= ' selected';
}
$left .= '>' . $option["text"] . '</option>';
}
$left .= '</select></div>';
}
}
$left .=
'</div>' .
'<div class="settings-submit">' .
'<input type="submit" value="Update settings!">' .
'<a href="../">&lt; Go back</a>' .
'</div>' .
'</form>';
if(count($_GET) === 0){
$code = [];
foreach($_COOKIE as $key => $value){
$code[] = rawurlencode($key) . "=" . rawurlencode($value);
}
$code = implode("&", $code);
if($code != ""){
$code = "?" . $code;
}
echo
$frontend->load(
"search.html",
[
"timetaken" => null,
"class" => "",
"right-left" =>
'<div class="infobox"><h2>Preference link</h2>Following this link will re-apply all cookies configured here and will redirect you to the front page. Useful if your browser clears out cookies after a browsing session.<br><br>' .
'<a href="settings' . $code . '">Bookmark me!</a>' .
'</div>',
"right-right" => "",
"left" => $left
]
);
}

BIN
src/static/404.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 216 B

985
src/static/client.js Normal file
View File

@ -0,0 +1,985 @@
/*
Global functions
*/
function htmlspecialchars(str){
var map = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#039;'
}
return str.replace(/[&<>"']/g, function(m){return map[m];});
}
function htmlspecialchars_decode(str){
var map = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#039;': "'"
}
return str.replace(/&amp;|&lt;|&gt;|&quot;|&#039;/g, function(m){return map[m];});
}
function is_click_within(elem, classname, is_id = false){
while(true){
if(elem === null){
return false;
}
if(
(
is_id === false &&
elem.className == classname
) ||
(
is_id === true &&
elem.id == classname
)
){
return elem;
}
elem = elem.parentElement;
}
}
/*
Prevent GET parameter pollution
*/
var form = document.getElementsByTagName("form");
if(
form.length !== 0 &&
window.location.pathname != "/" &&
window.location.pathname != "/settings.php" &&
window.location.pathname != "/settings"
){
form = form[0];
var scraper_dropdown = document.getElementsByName("scraper")[0];
scraper_dropdown.addEventListener("change", function(choice){
submit(form);
});
form.addEventListener("submit", function(e){
e.preventDefault();
submit(e.srcElement);
});
}
function submit(e){
var GET = "";
var first = true;
if((s = document.getElementsByName("s")).length !== 0){
GET += "?s=" + encodeURIComponent(s[0].value).replaceAll("%20", "+");
first = false;
}
Array.from(
e.getElementsByTagName("select")
).concat(
Array.from(
e.getElementsByTagName("input")
)
).forEach(function(el){
var firstelem = el.getElementsByTagName("option");
if(
(
(
firstelem.length === 0 ||
firstelem[0].value != el.value
) &&
el.name != "" &&
el.value != "" &&
el.name != "s"
) ||
el.name == "scraper" ||
el.name == "nsfw"
){
if(first){
GET += "?";
first = false;
}else{
GET += "&";
}
GET += encodeURIComponent(el.name).replaceAll("%20", "+") + "=" + encodeURIComponent(el.value).replaceAll("%20", "+");
}
});
window.location.href = GET;
}
/*
Hide show more button when it's not needed on answers
*/
var answer_div = document.getElementsByClassName("answer");
if(answer_div.length !== 0){
answer_div = Array.from(answer_div);
var spoiler_button_div = Array.from(document.getElementsByClassName("spoiler-button"));
// execute on pageload
hide_show_more();
window.addEventListener("resize", hide_show_more);
function hide_show_more(){
var height = window.innerWidth >= 1000 ? 600 : 200;
for(i=0; i<answer_div.length; i++){
if(answer_div[i].scrollHeight < height){
spoiler_button_div[i].style.display = "none";
document.getElementById(spoiler_button_div[i].htmlFor).checked = true;
}else{
spoiler_button_div[i].style.display = "block";
}
}
}
}
switch(document.location.pathname){
case "/web":
case "/web.php":
var image_class = "image";
break;
case "/images":
case "/images.php":
var image_class = "thumb";
break;
default:
var image_class = null;
}
if(image_class !== null){
/*
Add popup to document
*/
var popup_bg = document.createElement("div");
popup_bg.id = "popup-bg";
document.body.appendChild(popup_bg);
// enable/disable pointer events
if(!document.cookie.includes("bg_noclick=yes")){
popup_bg.style.pointerEvents = "none";
}
var popup_status = document.createElement("div");
popup_status.id = "popup-status";
document.body.appendChild(popup_status);
var popup_body = document.createElement("div");
popup_body.id = "popup";
document.body.appendChild(popup_body);
// import popup
var popup_body = document.getElementById("popup");
var popup_status = document.getElementById("popup-status");
var popup_image = null; // is set later on popup click
// image metadata
var collection = []; // will contain width, height, image URL
var collection_index = 0;
// event handling helper variables
var is_popup_shown = false;
var mouse_down = false;
var mouse_move = false;
var move_x = 0;
var move_y = 0;
var target_is_popup = false;
var mirror_x = false;
var mirror_y = false;
var rotation = 0;
/*
Image dragging (mousedown)
*/
document.addEventListener("mousedown", function(div){
if(div.buttons !== 1){
return;
}
mouse_down = true;
mouse_move = false;
if(is_click_within(div.target, "popup", true) === false){
target_is_popup = false;
}else{
target_is_popup = true;
var pos = popup_body.getBoundingClientRect();
move_x = div.x - pos.x;
move_y = div.y - pos.y;
}
});
/*
Image dragging (mousemove)
*/
document.addEventListener("mousemove", function(pos){
if(
target_is_popup &&
mouse_down
){
mouse_move = true;
movepopup(popup_body, pos.clientX - move_x, pos.clientY - move_y);
}
});
/*
Image dragging (mouseup)
*/
document.addEventListener("mouseup", function(){
mouse_down = false;
});
/*
Image popup open
*/
document.addEventListener("click", function(click){
// should our click trigger image open?
if(
elem = is_click_within(click.target, image_class) ||
click.target.classList.contains("openimg")
){
event.preventDefault();
is_popup_shown = true;
// reset position params
mirror_x = false;
mirror_y = false;
rotation = 0;
scale = 60;
collection_index = 0;
// get popup data
if(elem === true){
// we clicked a simple image preview
elem = click.target;
var image_url = elem.getAttribute("src");
if(image_url.startsWith("/proxy")){
var match = image_url.match(/i=([^&]+)/);
if(match !== null){
image_url = decodeURIComponent(match[1]);
}
}else{
image_url = htmlspecialchars_decode(image_url);
}
var w = Math.round(click.target.naturalWidth);
var h = Math.round(click.target.naturalHeight);
if(
w === 0 ||
h === 0
){
w = 100;
h = 100;
}
collection = [
{
"url": image_url,
"width": w,
"height": h
}
];
var title = "No description provided";
if(click.target.title != ""){
title = click.target.title;
}else{
if(click.target.alt != ""){
title = click.target.alt;
}
}
}else{
if(image_class == "thumb"){
// we're inside image.php
elem =
elem
.parentElement
.parentElement;
var image_url = elem.getElementsByTagName("a")[1].href;
}else{
// we're inside web.php
var image_url = elem.href;
}
collection =
JSON.parse(
elem.getAttribute("data-json")
);
var imagesize = elem.getElementsByTagName("img")[0];
var imagesize_w = 0;
var imagesize_h = 0;
if(imagesize.complete){
imagesize_w = imagesize.naturalWidth;
imagesize_h = imagesize.naturalHeight;
}
if(
imagesize_w === 0 ||
imagesize_h === 0
){
imagesize_w = 100;
imagesize_h = 100;
}
for(var i=0; i<collection.length; i++){
if(collection[i].width === null){
collection[i].width = imagesize_w;
collection[i].height = imagesize_h;
}
}
var title = elem.title;
}
// prepare HTML
var html =
'<div id="popup-num">(' + collection.length + ')</div>' +
'<div id="popup-dropdown">' +
'<select name="viewer-res" onchange="changeimage(event)">';
for(i=0; i<collection.length; i++){
if(collection[i].url.startsWith("data:")){
var domain = "&lt;Base64 Data&gt;";
}else{
var domain = new URL(collection[i].url).hostname;
}
html += '<option value="' + i + '">' + '(' + collection[i].width + 'x' + collection[i].height + ') ' + domain + '</option>';
}
popup_status.innerHTML =
html + '</select></div>' +
'<a href="' + htmlspecialchars(image_url) + '" rel="noreferrer nofollow "id="popup-title">' + htmlspecialchars(title) + '</a>';
popup_body.innerHTML =
'<img src="' + getproxylink(collection[0].url) + '" draggable="false" id="popup-image">';
// make changes to DOM
popup_body.style.display = "block";
popup_bg.style.display = "block";
popup_status.style.display = "table";
// store for rotation functions & changeimage()
popup_image = document.getElementById("popup-image");
scalepopup(collection[collection_index], scale);
centerpopup();
}else{
// click inside the image viewer
// resize image
if(is_click_within(click.target, "popup", true)){
if(mouse_move === false){
scale = 80;
scalepopup(collection[collection_index], scale);
centerpopup();
}
}else{
if(is_click_within(click.target, "popup-status", true) === false){
// click outside the popup while its open
// close it
if(is_popup_shown){
hidepopup();
}
}
}
}
});
/*
Scale image viewer
*/
popup_body.addEventListener("wheel", function(scroll){
event.preventDefault();
if(
scroll.altKey ||
scroll.ctrlKey ||
scroll.shiftKey
){
var increment = 7;
}else{
var increment = 14;
}
if(scroll.wheelDelta > 0){
// scrolling up
scale = scale + increment;
}else{
// scrolling down
if(scale - increment > 7){
scale = scale - increment;
}
}
// calculate relative size before scroll
var pos = popup_body.getBoundingClientRect();
var x = (scroll.x - pos.x) / pos.width;
var y = (scroll.y - pos.y) / pos.height;
scalepopup(collection[collection_index], scale);
// move popup to % we found
pos = popup_body.getBoundingClientRect();
movepopup(
popup_body,
scroll.clientX - (x * pos.width),
scroll.clientY - (y * pos.height)
);
});
/*
Keyboard controls
*/
document.addEventListener("keydown", function(key){
// close popup
if(
is_popup_shown &&
key.keyCode === 27
){
hidepopup();
return;
}
if(is_popup_shown === false){
return;
}
if(
key.altKey ||
key.ctrlKey ||
key.shiftKey
){
// mirror image
switch(key.keyCode){
case 37:
// left
key.preventDefault();
mirror_x = true;
break;
case 38:
// up
key.preventDefault();
mirror_y = false;
break;
case 39:
// right
key.preventDefault();
mirror_x = false;
break;
case 40:
// down
key.preventDefault();
mirror_y = true;
break;
}
}else{
// rotate image
switch(key.keyCode){
case 37:
// left
key.preventDefault();
rotation = -90;
break;
case 38:
// up
key.preventDefault();
rotation = 0;
break;
case 39:
// right
key.preventDefault();
rotation = 90;
break;
case 40:
// down
key.preventDefault();
rotation = -180;
break;
}
}
popup_image.style.transform =
"scale(" +
(mirror_x ? "-1" : "1") +
", " +
(mirror_y ? "-1" : "1") +
") " +
"rotate(" +
rotation + "deg" +
")";
});
}
function getproxylink(url){
if(url.startsWith("data:")){
return htmlspecialchars(url);
}else{
return '/proxy?i=' + encodeURIComponent(url);
}
}
function hidepopup(){
is_popup_shown = false;
popup_status.style.display = "none";
popup_body.style.display = "none";
popup_bg.style.display = "none";
}
function scalepopup(size, scale){
var ratio =
Math.min(
(window.innerWidth * (scale / 100)) / collection[collection_index].width, (window.innerHeight * (scale / 100)) / collection[collection_index].height
);
popup_body.style.width = size.width * ratio + "px";
popup_body.style.height = size.height * ratio + "px";
}
function centerpopup(){
var size = popup_body.getBoundingClientRect();
var size = {
"width": parseInt(size.width),
"height": parseInt(size.height)
};
movepopup(
popup_body,
(window.innerWidth / 2) - (size.width / 2),
(window.innerHeight / 2) - (size.height / 2)
);
}
function movepopup(popup_body, x, y){
popup_body.style.left = x + "px";
popup_body.style.top = y + "px";
}
function changeimage(event){
// reset rotation params
mirror_x = false;
mirror_y = false;
rotation = 0;
scale = 60;
collection_index = parseInt(event.target.value);
// we set innerHTML otherwise old image lingers a little
popup_body.innerHTML =
'<img src="' + getproxylink(collection[collection_index].url) + '" draggable="false" id="popup-image">';
// store for rotation functions & changeimage()
popup_image = document.getElementById("popup-image");
scalepopup(collection[collection_index], scale);
centerpopup();
}
var searchbox_wrapper = document.getElementsByClassName("searchbox");
if(searchbox_wrapper.length !== 0){
searchbox_wrapper = searchbox_wrapper[0];
var searchbox = searchbox_wrapper.getElementsByTagName("input")[1];
/*
Textarea shortcuts
*/
document.addEventListener("keydown", function(key){
switch(key.keyCode){
case 191:
// 191 = /
if(document.activeElement.tagName == "INPUT"){
// already focused, ignore
break;
}
if(
typeof is_popup_shown != "undefined" &&
is_popup_shown
){
hidepopup();
}
window.scrollTo(0, 0);
searchbox.focus();
key.preventDefault();
break;
}
});
/*
Autocompleter
*/
if( // make sure the user wants it
document.cookie.includes("scraper_ac=") &&
document.cookie.includes("scraper_ac=disabled") === false
){
var autocomplete_cache = [];
var focuspos = -1;
var list = [];
var autocomplete_div = document.getElementsByClassName("autocomplete")[0];
if(
document.cookie.includes("scraper_ac=auto") &&
typeof scraper_dropdown != "undefined"
){
var ac_req_appendix = "&scraper=" + scraper_dropdown.value;
}else{
var ac_req_appendix = "";
}
function getsearchboxtext(){
var value =
searchbox.value
.trim()
.replace(
/ +/g,
" "
)
.toLowerCase();
return value;
}
searchbox.addEventListener("input", async function(){
// ratelimit on input only
// dont ratelimit if we already have res
if(typeof autocomplete_cache[getsearchboxtext()] != "undefined"){
await getac();
}else{
await getac_ratelimit();
}
});
async function getac(){
var curvalue = getsearchboxtext();
if(curvalue == ""){
// hide autocompleter
autocomplete_div.style.display = "none";
return;
}
if(typeof autocomplete_cache[curvalue] == "undefined"){
/*
Fetch autocomplete
*/
// make sure we dont fetch same thing twice
autocomplete_cache[curvalue] = [];
var res = await fetch("/api/v1/ac?s=" + (encodeURIComponent(curvalue).replaceAll("%20", "+")) + ac_req_appendix);
if(!res.ok){
return;
}
var json = await res.json();
autocomplete_cache[curvalue] = json[1];
if(curvalue == getsearchboxtext()){
render_ac(curvalue, autocomplete_cache[curvalue]);
}
return;
}
render_ac(curvalue, autocomplete_cache[curvalue]);
}
var ac_func = null;
function getac_ratelimit(){
return new Promise(async function(resolve, reject){
if(ac_func !== null){
clearTimeout(ac_func);
}//else{
// no ratelimits
//getac();
//}
ac_func =
setTimeout(function(){
ac_func = null;
getac(); // get results after 100ms of no keystroke
resolve();
}, 200);
});
}
function render_ac(query, list){
if(list.length === 0){
autocomplete_div.style.display = "none";
return;
}
html = "";
// prepare regex
var highlight = query.split(" ");
var regex = [];
for(var k=0; k<highlight.length; k++){
// espace regex
regex.push(
highlight[k].replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
);
}
regex = new RegExp(highlight.join("|"), "gi");
for(var i=0; i<list.length; i++){
html +=
'<div tabindex="0" class="entry" onclick="handle_entry_click(this);">' +
htmlspecialchars(
list[i]
).replace(
regex,
'<u>$&</u>'
) +
'</div>';
}
autocomplete_div.innerHTML = html;
autocomplete_div.style.display = "block";
}
var should_focus = false;
document.addEventListener("keydown", function(event){
if(event.key == "Escape"){
document.activeElement.blur();
focuspos = -1;
autocomplete_div.style.display = "none";
return;
}
if(
is_click_within(event.target, "searchbox") === false ||
typeof autocomplete_cache[getsearchboxtext()] == "undefined"
){
return;
}
switch(event.key){
case "ArrowUp":
event.preventDefault();
focuspos--;
if(focuspos === -2){
focuspos = autocomplete_cache[getsearchboxtext()].length - 1;
}
break;
case "ArrowDown":
case "Tab":
event.preventDefault();
focuspos++;
if(focuspos >= autocomplete_cache[getsearchboxtext()].length){
focuspos = -1;
}
break;
case "Enter":
should_focus = true;
if(focuspos !== -1){
// replace input content
event.preventDefault();
searchbox.value =
autocomplete_div.getElementsByClassName("entry")[focuspos].innerText;
break;
}
break;
default:
focuspos = -1;
break;
}
if(focuspos === -1){
searchbox.focus();
return;
}
autocomplete_div.getElementsByClassName("entry")[focuspos].focus();
});
window.addEventListener("blur", function(){
autocomplete_div.style.display = "none";
});
document.addEventListener("keyup", function(event){
// handle ENTER key on entry
if(should_focus){
should_focus = false;
searchbox.focus();
}
});
document.addEventListener("mousedown", function(event){
// hide input if click is outside
if(is_click_within(event.target, "searchbox") === false){
autocomplete_div.style.display = "none";
return;
}
});
function handle_entry_click(event){
searchbox.value = event.innerText;
focuspos = -1;
searchbox.focus();
}
searchbox.addEventListener("focus", function(){
focuspos = -1;
getac();
});
}
}

BIN
src/static/icon/amazon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

BIN
src/static/icon/call.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 555 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

BIN
src/static/icon/github.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 508 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

BIN
src/static/icon/imdb.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

BIN
src/static/icon/itunes.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

BIN
src/static/icon/quora.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 835 B

BIN
src/static/icon/reddit.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 468 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 354 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 592 B

BIN
src/static/icon/spotify.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 689 B

BIN
src/static/icon/steam.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 622 B

BIN
src/static/icon/twitter.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 698 B

BIN
src/static/icon/w3html.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1000 B

BIN
src/static/icon/website.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

BIN
src/static/icon/youtube.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

BIN
src/static/misc/snow.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 188 KiB

473
src/static/serverping.js Normal file
View File

@ -0,0 +1,473 @@
function htmlspecialchars(str){
if(str === null){
return "<i>&lt;Empty&gt;</i>";
}
var map = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#039;'
}
return str.replace(/[&<>"']/g, function(m){return map[m];});
}
// initialize garbage
var list = [];
var pinged_list = [];
var reqs = 0;
var errors = 0;
var sort = 6; // highest version first
// check for instance redirect stuff
var redir = [];
var target = "/web?";
new URL(window.location.href)
.searchParams
.forEach(
function(value, key){
if(key == "target"){
target = "/" + encodeURIComponent(value) + "?";
return;
}
if(key == "npt"){ return; }
redir.push(encodeURIComponent(key) + "=" + encodeURIComponent(value))
}
);
if(redir.length !== 0){
redir = target + redir.join("&");
}else{
redir = "";
}
var quote = document.createElement("div");
quote.className = "quote";
quote.innerHTML = 'Pinged <b>0</b> servers (<b>0</b> failed requests)';
var [div_servercount, div_failedreqs] =
quote.getElementsByTagName("b");
var noscript = document.getElementsByTagName("noscript")[0];
document.body.insertBefore(quote, noscript.nextSibling);
// create table
var table = document.createElement("table");
table.innerHTML =
'<thead>' +
'<tr>' +
'<th class="extend">Server</th>' +
'<th>Address</th>' +
'<th>Bot protection</th>' +
'<th title="Amount of legit requests processed since the last APCU cache clear (usually happens at midnight)">Real reqs (?)</th>' +
'<th title="Amount of filtered requests processed since the last APCU cache clear (usually happens at midnight)">Bot reqs (?)</th>' +
'<th>API</th>' +
'<th><div class="arrow up"></div>Version</th>' +
'</tr>' +
'</thead>' +
'<tbody></tbody>';
document.body.insertBefore(table, quote.nextSibling);
// handle sorting clicks
var tbody = table.getElementsByTagName("tbody")[0];
var th = table.getElementsByTagName("th");
for(var i=0; i<th.length; i++){
th[i].addEventListener("click", function(event){
if(event.target.className.includes("arrow")){
var div = event.target.parentElement;
}else{
var div = event.target;
}
var arrow = div.getElementsByClassName("arrow");
var orientation = 0; // up
if(arrow.length === 0){
// delete arrow and add new one
arrow = document.getElementsByClassName("arrow");
arrow[0].remove();
arrow = document.createElement("div");
arrow.className = "arrow up";
div.insertBefore(arrow, event.target.firstChild);
}else{
// switch arrow position
if(arrow[0].className == "arrow down"){
arrow[0].className = "arrow up";
}else{
arrow[0].className = "arrow down";
orientation = 1;
}
}
switch(div.textContent.toLowerCase()){
case "server": sort = 0 + orientation; break;
case "address": sort = 2 + orientation; break;
case "bot protection": sort = 4 + orientation; break;
case "real reqs (?)": sort = 6 + orientation; break;
case "bot reqs (?)": sort = 8 + orientation; break;
case "api": sort = 10 + orientation; break;
case "version": sort = 12 + orientation; break;
}
render_list();
});
}
function validate_url(url, allow_http = false){
try{
url = new URL(url);
if(
url.protocol == "https:" ||
(
(
allow_http === true ||
window.location.protocol == "http:"
) &&
url.protocol == "http:"
)
){
return true;
}
}catch(error){} // do nothing
return false;
}
function number_format(int){
return new Intl.NumberFormat().format(int);
}
// parse initial server list
fetch_server(window.location.origin);
async function fetch_server(server){
if(!validate_url(server)){
console.warn("Invalid server URL: " + server);
return;
}
// make sure baseURL is origin
server = new URL(server).origin;
// prevent multiple fetches
for(var i=0; i<list.length; i++){
if(list[i] == server){
// serber was already fetched
return;
}
}
// prevent future fetches
list.push(server);
var data = null;
try{
var payload = await fetch(server + "/ami4get");
if(payload.status !== 200){
// endpoint is not available
errors++;
div_failedreqs.textContent = number_format(errors);
console.warn(server + ": Invalid HTTP code " + payload.status);
return;
}
data = await payload.json();
}catch(error){
errors++;
div_failedreqs.textContent = number_format(errors);
console.warn(server + ": Could not fetch or decode JSON");
return;
}
// sanitize data
if(
typeof data.status != "string" ||
data.status != "ok" ||
typeof data.server != "object" ||
!(
typeof data.server.name == "string" ||
(
typeof data.server.name == "object" &&
data.server.name === null
)
) ||
typeof data.service != "string" ||
data.service != "4get" ||
(
typeof data.server.description != "string" &&
data.server.description !== null
) ||
typeof data.server.bot_protection != "number" ||
typeof data.server.real_requests != "number" ||
typeof data.server.bot_requests != "number" ||
typeof data.server.api_enabled != "boolean" ||
typeof data.server.alt_addresses != "object" ||
typeof data.server.version != "number" ||
typeof data.instances != "object"
){
errors++;
div_failedreqs.textContent = number_format(errors);
console.warn(server + ": Malformed JSON");
return;
}
data.server.ip = server;
reqs++;
div_servercount.textContent = number_format(reqs);
var total = pinged_list.push(data) - 1;
pinged_list[total].index = total;
render_list();
// get more serbers
for(var i=0; i<data.instances.length; i++){
fetch_server(data.instances[i]);
}
}
function sorta(object, element, order){
return object.slice().sort(
function(a, b){
if(order){
return a.server[element] - b.server[element];
}
return b.server[element] - a.server[element];
}
);
}
function textsort(object, element, order){
var sort = object.slice().sort(
function(a, b){
return a.server[element].localeCompare(b.server[element]);
}
);
if(!order){
return sort.reverse();
}
return sort;
}
function render_list(){
var sorted_list = [];
// sort
var filter = Boolean(sort % 2);
switch(sort){
case 0:
case 1:
sorted_list = textsort(pinged_list, "name", filter === true ? false : true);
break;
case 2:
case 3:
sorted_list = textsort(pinged_list, "ip", filter === true ? false : true);
break;
case 4:
case 5:
sorted_list = sorta(pinged_list, "bot_protection", filter === true ? false : true);
break;
case 6:
case 7:
sorted_list = sorta(pinged_list, "real_requests", filter);
break;
case 8:
case 9:
sorted_list = sorta(pinged_list, "bot_requests", filter);
break;
case 10:
case 11:
sorted_list = sorta(pinged_list, "api_enabled", filter);
break;
case 12:
case 13:
sorted_list = sorta(pinged_list, "version", filter);
break;
}
// render tabloid
var html = "";
for(var k=0; k<sorted_list.length; k++){
html += '<tr onclick="show_server(' + sorted_list[k].index + ');">';
for(var i=0; i<7; i++){
html += '<td';
switch(i){
// server name
case 0: html += ' class="extend">' + htmlspecialchars(sorted_list[k].server.name); break;
case 1: html += '>' + htmlspecialchars(new URL(sorted_list[k].server.ip).host); break;
case 2: // bot protection
switch(sorted_list[k].server.bot_protection){
case 0:
html += '><span style="color:var(--green);">Disabled</span>';
break;
case 1:
html += '><span style="color:var(--yellow);">Image captcha</span>';
break;
case 2:
html += '><span style="color:var(--red);">Invite only</span>';
break;
default:
html += '>Unknown';
}
break;
case 3: // real reqs
html += '>' + number_format(sorted_list[k].server.real_requests);
break;
case 4: // bot reqs
html += '>' + number_format(sorted_list[k].server.bot_requests);
break;
case 5: // api enabled
if(sorted_list[k].server.api_enabled){
html += '><span style="color:var(--green);">Yes</span>';
}else{
html += '><span style="color:var(--red);">No</span>';
}
break;
// version
case 6: html += ">v" + sorted_list[k].server.version; break;
}
html += '</td>';
}
html += '</tr>';
}
console.log(html);
tbody.innerHTML = html;
}
var popup_bg = document.getElementById("popup-bg");
var popup_wrapper = document.getElementsByClassName("popup-wrapper")[0];
var popup = popup_wrapper.getElementsByClassName("popup")[0];
var popup_shown = false;
popup_bg.addEventListener("click", function(){
popup_wrapper.style.display = "none";
popup_bg.style.display = "none";
});
function show_server(serverid){
var html =
'<h2>' + htmlspecialchars(pinged_list[serverid].server.name) + '</h2>' +
'Description' +
'<div class="code">' + htmlspecialchars(pinged_list[serverid].server.description) + '</div>';
var url_obj = new URL(pinged_list[serverid].server.ip);
var url = htmlspecialchars(url_obj.origin);
var domain = url_obj.hostname;
html +=
'URL: <a rel="noreferer" target="_BLANK" href="' + url + redir + '">' + url + '</a> <a rel="noreferer" target="_BLANK" href="https://browserleaks.com/ip/' + encodeURIComponent(domain) + '">(IP lookup)</a>' +
'<br><br>Alt addresses:';
var len = pinged_list[serverid].server.alt_addresses.length;
if(len === 0){
html += ' <i>&lt;Empty&gt;</i>';
}else{
html += '<ul>';
for(var i=0; i<len; i++){
var url_obj = new URL(pinged_list[serverid].server.alt_addresses[i]);
var url = htmlspecialchars(url_obj.origin);
var domain = url_obj.hostname;
if(validate_url(pinged_list[serverid].server.alt_addresses[i], true)){
html += '<li><a rel="noreferer" href="' + url + redir + '" target="_BLANK">' + url + '</a> <a rel="noreferer" target="_BLANK" href="https://browserleaks.com/ip/' + encodeURIComponent(domain) + '">(IP lookup)</a></li>';
}else{
console.warn(pinged_list[serverid].server.ip + ": Invalid peer URL => " + pinged_list[serverid].server.alt_addresses[i]);
}
}
html += '</ul>';
}
popup.innerHTML = html;
popup_wrapper.style.display = "block";
popup_bg.style.display = "block";
}
function hide_server(){
popup_wrapper.style.display = "none";
popup_bg.style.display = "none";
}

1384
src/static/style.css Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
:root{
/* background */
--1d2021: #eff1f5;
--282828: #eff1f5;
--3c3836: #dce0e8;
--504945: #5c5f77;
/* font */
--928374: #8c8fa1;
--a89984: #4c4f69;
--bdae93: #4c4f69;
--8ec07c: #df8e1d;
--ebdbb2: #4c4f69;
/* code highlighter */
--comment: #e64553;
--default: #eff1f5;
--keyword: #df8e1d;
--string: #209fb5;
}

View File

@ -0,0 +1,20 @@
:root{
/* background */
--1d2021: #1e1e2e;
--282828: #313244;
--3c3836: #45475a;
--504945: #585b70;
/* font */
--928374: #bac2de;
--a89984: #a6adc8;
--bdae93: #cdd6f4;
--8ec07c: #a6e3a1;
--ebdbb2: #f9e2af;
/* code highlighter */
--comment: #f5e0dc;
--default: #f2cdcd;
--keyword: #fab387;
--string: #74c7ec;
}

View File

@ -0,0 +1,31 @@
:root{
/* background */
--1d2021: #bdae93;
--282828: #a89984;
--3c3836: #a89984;
--504945: #504945;
/* font */
--928374: #1d2021;
--a89984: #282828;
--bdae93: #3c3836;
--8ec07c: #52520e;
--ebdbb2: #1d2021;
/* code highlighter */
--comment: #6a4400;
--default: #d4be98;
--keyword: #4a4706;
--string: #076678;
/* color codes for instance list */
--green: #636311;
--yellow: #8a6214;
--red: #711410;
}
.autocomplete .entry:hover,
.instances th:hover
{
background:#928374;
}

Some files were not shown because too many files have changed in this diff Show More