get rid of the captcha stuff, put proxies in data
All checks were successful
docker / docker (push) Successful in 11s
All checks were successful
docker / docker (push) Successful in 11s
Signed-off-by: ngn <ngn@ngn.tf>
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,5 +1,5 @@
|
|||||||
/compose.yml
|
/compose.yml
|
||||||
/docker-compose.yml
|
/docker-compose.yml
|
||||||
/banners
|
/banner
|
||||||
/captcha
|
/favicon.ico
|
||||||
/config.php
|
/config.php
|
||||||
|
@ -6,8 +6,8 @@ services:
|
|||||||
- 80:8080
|
- 80:8080
|
||||||
volumes:
|
volumes:
|
||||||
- ./config.php:/srv/4get/data/config.php:ro
|
- ./config.php:/srv/4get/data/config.php:ro
|
||||||
- ./banners:/srv/4get/banner:ro
|
- ./favicon.ico:/srv/4get/favicon.ico:ro
|
||||||
- ./captcha:/srv/4get/data/captcha:ro
|
- ./banner:/srv/4get/banner:ro
|
||||||
- type: tmpfs
|
- type: tmpfs
|
||||||
target: /tmp/icons
|
target: /tmp/icons
|
||||||
cap_drop:
|
cap_drop:
|
||||||
|
7
src/.gitignore
vendored
7
src/.gitignore
vendored
@ -21,9 +21,8 @@ scraper/soundcloud.json
|
|||||||
scraper/mp3-pm.html
|
scraper/mp3-pm.html
|
||||||
scraper/curlie.html
|
scraper/curlie.html
|
||||||
|
|
||||||
icons/*
|
|
||||||
banner/*
|
banner/*
|
||||||
!banner/*default*
|
data/*
|
||||||
|
|
||||||
data/captcha
|
!banner/*default*
|
||||||
data/config.php
|
!data/*.def.*
|
||||||
|
@ -37,8 +37,8 @@ class config{
|
|||||||
|
|
||||||
// Proxy pool assignments for each scraper
|
// Proxy pool assignments for each scraper
|
||||||
// false = Use server's raw IP
|
// false = Use server's raw IP
|
||||||
// string = will load a proxy list from data/proxies
|
// string = will load a proxy list from data directory
|
||||||
// Eg. "onion" will load data/proxies/onion.txt
|
// Eg. "tor" will load data/tor.txt
|
||||||
const PROXY_DDG = false; // duckduckgo
|
const PROXY_DDG = false; // duckduckgo
|
||||||
const PROXY_BRAVE = false;
|
const PROXY_BRAVE = false;
|
||||||
const PROXY_FB = false; // facebook
|
const PROXY_FB = false; // facebook
|
||||||
|
Binary file not shown.
3
src/data/proxies/.gitignore
vendored
3
src/data/proxies/.gitignore
vendored
@ -1,3 +0,0 @@
|
|||||||
*
|
|
||||||
!.gitignore
|
|
||||||
!onion.txt
|
|
Binary file not shown.
Before Width: | Height: | Size: 753 B |
@ -1,27 +1,27 @@
|
|||||||
<?php
|
<?php
|
||||||
class backend{
|
class backend{
|
||||||
|
|
||||||
public function __construct($scraper){
|
public function __construct($scraper){
|
||||||
|
|
||||||
$this->scraper = $scraper;
|
$this->scraper = $scraper;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Proxy stuff
|
Proxy stuff
|
||||||
*/
|
*/
|
||||||
public function get_ip(){
|
public function get_ip(){
|
||||||
|
|
||||||
$pool = constant("config::PROXY_" . strtoupper($this->scraper));
|
$pool = constant("config::PROXY_" . strtoupper($this->scraper));
|
||||||
if($pool === false){
|
if($pool === false){
|
||||||
|
|
||||||
// we don't want a proxy, fuck off!
|
// we don't want a proxy, fuck off!
|
||||||
return 'raw_ip::::';
|
return 'raw_ip::::';
|
||||||
}
|
}
|
||||||
|
|
||||||
// indent
|
// indent
|
||||||
$proxy_index_raw = apcu_inc("p." . $this->scraper);
|
$proxy_index_raw = apcu_inc("p." . $this->scraper);
|
||||||
|
|
||||||
$proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
|
$proxylist = file_get_contents("data/" . $pool . ".txt");
|
||||||
$proxylist = explode("\n", $proxylist);
|
$proxylist = explode("\n", $proxylist);
|
||||||
|
|
||||||
// ignore empty or commented lines
|
// ignore empty or commented lines
|
||||||
@ -29,15 +29,15 @@ class backend{
|
|||||||
$entry = ltrim($entry);
|
$entry = ltrim($entry);
|
||||||
return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
|
return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
|
||||||
});
|
});
|
||||||
|
|
||||||
$proxylist = array_values($proxylist);
|
$proxylist = array_values($proxylist);
|
||||||
|
|
||||||
return $proxylist[$proxy_index_raw % count($proxylist)];
|
return $proxylist[$proxy_index_raw % count($proxylist)];
|
||||||
}
|
}
|
||||||
|
|
||||||
// this function is also called directly on nextpage
|
// this function is also called directly on nextpage
|
||||||
public function assign_proxy(&$curlproc, string $ip){
|
public function assign_proxy(&$curlproc, string $ip){
|
||||||
|
|
||||||
// parse proxy line
|
// parse proxy line
|
||||||
[
|
[
|
||||||
$type,
|
$type,
|
||||||
@ -46,34 +46,34 @@ class backend{
|
|||||||
$username,
|
$username,
|
||||||
$password
|
$password
|
||||||
] = explode(":", $ip, 5);
|
] = explode(":", $ip, 5);
|
||||||
|
|
||||||
switch($type){
|
switch($type){
|
||||||
|
|
||||||
case "raw_ip":
|
case "raw_ip":
|
||||||
return;
|
return;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "http":
|
case "http":
|
||||||
case "https":
|
case "https":
|
||||||
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
|
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
|
||||||
curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
|
curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "socks4":
|
case "socks4":
|
||||||
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
|
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
|
||||||
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "socks5":
|
case "socks5":
|
||||||
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
|
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
|
||||||
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "socks4a":
|
case "socks4a":
|
||||||
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
|
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
|
||||||
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "socks5_hostname":
|
case "socks5_hostname":
|
||||||
case "socks5h":
|
case "socks5h":
|
||||||
case "socks5a":
|
case "socks5a":
|
||||||
@ -81,25 +81,25 @@ class backend{
|
|||||||
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if($username != ""){
|
if($username != ""){
|
||||||
|
|
||||||
curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
|
curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Next page stuff
|
Next page stuff
|
||||||
*/
|
*/
|
||||||
public function store(string $payload, string $page, string $proxy){
|
public function store(string $payload, string $page, string $proxy){
|
||||||
|
|
||||||
$key = sodium_crypto_secretbox_keygen();
|
$key = sodium_crypto_secretbox_keygen();
|
||||||
$nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES);
|
$nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES);
|
||||||
|
|
||||||
$requestid = apcu_inc("requestid");
|
$requestid = apcu_inc("requestid");
|
||||||
|
|
||||||
apcu_store(
|
apcu_store(
|
||||||
$page[0] . "." . // first letter of page name
|
$page[0] . "." . // first letter of page name
|
||||||
$this->scraper . // scraper name
|
$this->scraper . // scraper name
|
||||||
@ -117,31 +117,31 @@ class backend{
|
|||||||
900 // cache information for 15 minutes
|
900 // cache information for 15 minutes
|
||||||
);
|
);
|
||||||
|
|
||||||
return
|
return
|
||||||
$this->scraper . $requestid . "." .
|
$this->scraper . $requestid . "." .
|
||||||
rtrim(strtr(base64_encode($key), '+/', '-_'), '=');
|
rtrim(strtr(base64_encode($key), '+/', '-_'), '=');
|
||||||
}
|
}
|
||||||
|
|
||||||
public function get(string $npt, string $page){
|
public function get(string $npt, string $page){
|
||||||
|
|
||||||
$page = $page[0];
|
$page = $page[0];
|
||||||
$explode = explode(".", $npt, 2);
|
$explode = explode(".", $npt, 2);
|
||||||
|
|
||||||
if(count($explode) !== 2){
|
if(count($explode) !== 2){
|
||||||
|
|
||||||
throw new Exception("Malformed nextPageToken!");
|
throw new Exception("Malformed nextPageToken!");
|
||||||
}
|
}
|
||||||
|
|
||||||
$apcu = $page . "." . $explode[0];
|
$apcu = $page . "." . $explode[0];
|
||||||
$key = $explode[1];
|
$key = $explode[1];
|
||||||
|
|
||||||
$payload = apcu_fetch($apcu);
|
$payload = apcu_fetch($apcu);
|
||||||
|
|
||||||
if($payload === false){
|
if($payload === false){
|
||||||
|
|
||||||
throw new Exception("The next page token is invalid or has expired!");
|
throw new Exception("The next page token is invalid or has expired!");
|
||||||
}
|
}
|
||||||
|
|
||||||
$key =
|
$key =
|
||||||
base64_decode(
|
base64_decode(
|
||||||
str_pad(
|
str_pad(
|
||||||
@ -151,7 +151,7 @@ class backend{
|
|||||||
STR_PAD_RIGHT
|
STR_PAD_RIGHT
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
// decrypt and decompress data
|
// decrypt and decompress data
|
||||||
$payload[2] =
|
$payload[2] =
|
||||||
gzinflate(
|
gzinflate(
|
||||||
@ -161,15 +161,15 @@ class backend{
|
|||||||
$key
|
$key
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
if($payload[2] === false){
|
if($payload[2] === false){
|
||||||
|
|
||||||
throw new Exception("The next page token is invalid or has expired!");
|
throw new Exception("The next page token is invalid or has expired!");
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove the key after using successfully
|
// remove the key after using successfully
|
||||||
apcu_delete($apcu);
|
apcu_delete($apcu);
|
||||||
|
|
||||||
return [
|
return [
|
||||||
$payload[2], // data
|
$payload[2], // data
|
||||||
$payload[1] // proxy
|
$payload[1] // proxy
|
||||||
|
Reference in New Issue
Block a user