merge brave scraper fix from upstream

f9f3c919d6

Signed-off-by: ngn <ngn@ngn.tf>
This commit is contained in:
ngn 2025-01-28 07:29:42 +03:00
parent 52145d2fc3
commit ebd5101d44
Signed by: ngn
GPG Key ID: A3654DF5AD9F641D

View File

@ -209,9 +209,66 @@ class brave{
curl_close($curlproc); curl_close($curlproc);
return $data; return $data;
} }
private function get_js(){
$script_disc =
$this->fuckhtml
->getElementsByTagName(
"script"
);
$data = null;
foreach($script_disc as &$discs){
if(
preg_match(
'/kit\.start\(/',
$discs["innerHTML"]
)
){
$data =
explode(
"data:",
$discs["innerHTML"],
2
);
if(count($data) !== 2){
throw new Exception("Failed to split up data field");
}
$data = $data[1];
break;
}
}
if($data === null){
throw new Exception("Could not grep JavaScript object");
}
$data =
$this->fuckhtml
->parseJsObject(
$this->fuckhtml
->extract_json(
$data
)
);
if($data === null){
throw new Exception("Failed to decode JavaScript object");
}
return $data;
}
public function web($get){ public function web($get){
if($get["npt"]){ if($get["npt"]){
// get next page data // get next page data
@ -381,57 +438,11 @@ class brave{
} }
} }
} }
// do some magic
$this->fuckhtml->load($html); $this->fuckhtml->load($html);
$data = $this->get_js();
$script_disc =
$this->fuckhtml
->getElementsByTagName(
"script"
);
$grep = [];
foreach($script_disc as $discs){
preg_match(
'/const data ?= ?(\[{.*}]);/',
$discs["innerHTML"],
$grep
);
if(isset($grep[1])){
break;
}
}
if(!isset($grep[1])){
throw new Exception("Could not grep JavaScript object");
}
$data =
rtrim(
preg_replace(
'/\(Array\(0\)\)\).*$/',
"",
$grep[1]
),
" ]"
) . "]";
$data =
$this->fuckhtml
->parseJsObject(
$data
);
unset($grep);
if($data === null){
throw new Exception("Failed to decode JavaScript object");
}
if( if(
isset($data[2]["data"]["title"]) && isset($data[2]["data"]["title"]) &&
stripos($data[2]["data"]["title"], "PoW Captcha") !== false stripos($data[2]["data"]["title"], "PoW Captcha") !== false
@ -1178,25 +1189,10 @@ class brave{
"news", "news",
$proxy $proxy
); );
preg_match( $this->fuckhtml->load($html);
'/const data ?= ?(\[{.*}]);/', $json = $this->get_js();
$html,
$json
);
if(!isset($json[1])){
throw new Exception("Failed to grep javascript object");
}
$json = $this->fuckhtml->parseJsObject($json[1], true);
if($json === null){
throw new Exception("Failed to parse javascript object");
}
foreach( foreach(
$json[1]["data"]["body"]["response"]["news"]["results"] $json[1]["data"]["body"]["response"]["news"]["results"]
as $news as $news
@ -1276,24 +1272,10 @@ class brave{
$handle = fopen("scraper/brave-image.html", "r"); $handle = fopen("scraper/brave-image.html", "r");
$html = fread($handle, filesize("scraper/brave-image.html")); $html = fread($handle, filesize("scraper/brave-image.html"));
fclose($handle);*/ fclose($handle);*/
preg_match( $this->fuckhtml->load($html);
'/const data = (\[{.*}\]);/', $json = $this->get_js();
$html,
$json
);
if(!isset($json[1])){
throw new Exception("Failed to get data object");
}
$json =
$this->fuckhtml
->parseJsObject(
$json[1]
);
foreach( foreach(
$json[1] $json[1]
["data"] ["data"]
@ -1421,24 +1403,10 @@ class brave{
$handle = fopen("scraper/brave-video.html", "r"); $handle = fopen("scraper/brave-video.html", "r");
$html = fread($handle, filesize("scraper/brave-video.html")); $html = fread($handle, filesize("scraper/brave-video.html"));
fclose($handle);*/ fclose($handle);*/
preg_match( $this->fuckhtml->load($html);
'/const data = (\[{.*}\]);/', $json = $this->get_js();
$html,
$json
);
if(!isset($json[1])){
throw new Exception("Failed to get data object");
}
$json =
$this->fuckhtml
->parseJsObject(
$json[1]
);
foreach( foreach(
$json $json
[1] [1]