From 445bdce612738f5c997b5bd6d5cd9773e652cd65 Mon Sep 17 00:00:00 2001 From: lolcat Date: Sun, 1 Jun 2025 13:03:39 -0400 Subject: [PATCH] fix google image crash --- src/scraper/google.php | 71 ++++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/src/scraper/google.php b/src/scraper/google.php index d5202fe..4b34120 100644 --- a/src/scraper/google.php +++ b/src/scraper/google.php @@ -634,24 +634,51 @@ class google{ private function scrape_imagearr($html){ // get image links arrays preg_match_all( - '/\[0,"([^"]+)",\["([^"]+)\",([0-9]+),([0-9]+)\],\["([^"]+)",([0-9]+),([0-9]+)\]/', + '/\[[0-9]+,"([^"]+)",\["([^"]+)\",([0-9]+),([0-9]+)\],\["([^"]+)",([0-9]+),([0-9]+)\]/', $html, $image_arr ); $this->image_arr = []; if(isset($image_arr[1])){ - + for($i=0; $ifuckhtml + ->parseJsString( + $image_arr[5][$i] + ); + + if( + preg_match( + '/^x-raw-image/', + $original + ) + ){ + + // only add thumbnail, google doesnt have OG resolution + $this->image_arr[$image_arr[1][$i]] = [ + [ + "url" => + $this->unshit_thumb( + $this->fuckhtml + ->parseJsString( + $image_arr[2][$i] + ) + ), + "width" => (int)$image_arr[7][$i], // pass the OG image width & height + "height" => (int)$image_arr[6][$i] + ] + ]; + + continue; + } + $this->image_arr[$image_arr[1][$i]] = [ [ - "url" => - $this->fuckhtml - ->parseJsString( - $image_arr[5][$i] - ), + "url" => $original, "width" => (int)$image_arr[7][$i], "height" => (int)$image_arr[6][$i] ], @@ -2635,10 +2662,10 @@ class google{ } } /* - $handle = fopen("scraper/google-img.html", "r"); - $html = fread($handle, filesize("scraper/google-img.html")); + $handle = fopen("scraper/page.html", "r"); + $html = fread($handle, filesize("scraper/page.html")); fclose($handle);*/ - + try{ $html = $this->get( @@ -2678,7 +2705,22 @@ class google{ $image = $this->fuckhtml ->getElementsByTagName("img")[0]; - + + // make sure we dont attempt to show an image we dont have data for + if( + isset($div["attributes"]["data-docid"]) && + isset($this->image_arr[$div["attributes"]["data-docid"]]) + ){ + + $source = + $this->image_arr[ + $div["attributes"]["data-docid"] + ]; + }else{ + + continue; + } + $out["image"][] = [ "title" => $this->titledots( @@ -2687,10 +2729,7 @@ class google{ $image["attributes"]["alt"] ) ), - "source" => - $this->image_arr[ - $div["attributes"]["data-docid"] - ], + "source" => $source, "url" => $this->fuckhtml ->getTextContent(