2015-11-06 1 views
1

J'ai un grattoir très simple maintenant fait ce dont j'ai besoin, mais il est très lent il gratte 2 photos en 3 secondes ce que je dois faire est d'au moins 1000 photos en quelques secondes.CURL multi-threadé avec SSL et redirection

Voici le code que j'utilise maintenant

<?php 
require_once('config.php'); 

//Calling PHasher class file. 
include_once('classes/phasher.class.php'); 
$I = PHasher::Instance(); 

//Prevent execution timeout. 
set_time_limit(0); 

//Solving SSL Problem. 
$arrContextOptions=array(
    "ssl"=>array(
     "verify_peer"=>false, 
     "verify_peer_name"=>false, 
    ), 
); 

//Check if the database contains hashed pictures or if it's empty, Then start from the latest hashed picture or start from 4. 
$check = mysqli_query($con, "SELECT fid FROM images ORDER BY fid DESC LIMIT 1;"); 
if(mysqli_num_rows($check) > 0){ 

    $max_fid = mysqli_fetch_row($check); 

    $fid = $max_fid[0]+1; 
} else { 
    $fid = 4; 
} 

$deletedProfile = "https://z-1-static.xx.fbcdn.net/rsrc.php/v2/yo/r/UlIqmHJn-SK.gif"; 

//Infinte while loop to fetch profiles pictures and save them inside avatar folder. 
$initial = $fid; 

while($fid = $initial){ 

    $url = 'https://graph.facebook.com/'.$fid.'/picture?width=378&height=378'; 

    $ch = curl_init(); 
    curl_setopt($ch, CURLOPT_URL, $url); 
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // follow the redirects 
    curl_setopt($ch, CURLOPT_HEADER, false); // no needs to pass the headers to the data stream 
    curl_setopt($ch, CURLOPT_NOBODY, true); // get the resource without a body 
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // accept any server certificate 
    curl_exec($ch); 

    // get the last used URL 
    $lastUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); 

    curl_close($ch); 

    if($lastUrl == $deletedProfile){ 
     $initial++; 
    }else{ 
     $imageUrl = file_get_contents($url, false, stream_context_create($arrContextOptions)); 
     $savedImage = dirname(__file__).'/avatar/image.jpg'; 
     file_put_contents($savedImage, $imageUrl); 

     //Exclude deleted profiles or corrupted pictures. 
    if(getimagesize($savedImage) > 0){ 

    //PHasher class call to hash the images to hexdecimal values or binary values. 
     $hash = $I->FastHashImage($savedImage); 
     $hex = $I->HashAsString($hash); 

     //Store Facebook id and hashed values for the images in hexa values. 
     mysqli_query($con, "INSERT INTO images(fid, hash) VALUES ('$fid', '$hex')"); 

     $initial++; 
    } else { 
     $initial++; 
    } 
} 
} 

?> 

je ne figure pas comment le faire, mais ce que je pense est maintenant:

1- Diviser en 1000 profils pour chaque bouclez et stockez-les dans un tableau.

$items = array(); 
for($i=$fid; $i <= $fid+1000; $i++){ 

    $url = 'https://graph.facebook.com/'.$i.'/picture?width=378&height=378'; 
    $items[$i] = array($url); 
} 

mais les résultats sont incorrects Je veux savoir comment réparer la sortie de la matrice.

Array ([28990] => Array ([0] => https://graph.facebook.com/28990/picture?width=378&height=378) 
[28991] => Array ([0] => https://graph.facebook.com/28991/picture?width=378&height=378) 
[28992] => Array ([0] => https://graph.facebook.com/28992/picture?width=378&height=378) 
[28993] => Array ([0] => https://graph.facebook.com/28993/picture?width=378&height=378) 
[28994] => Array ([0] => https://graph.facebook.com/28994/picture?width=378&height=378) 
[28995] => Array ([0] => https://graph.facebook.com/28995/picture?width=378&height=378) 
[28996] => Array ([0] => https://graph.facebook.com/28996/picture?width=378&height=378) 
[28997] => Array ([0] => https://graph.facebook.com/28997/picture?width=378&height=378) 

2- Ensuite, je souhaite utiliser la matrice de sortie à l'intérieur de boucle Mulit, permet le traitement de plusieurs poignées cURL de façon asynchrone.

3- Vérifiez les URL de sortie si elles sont égales au profil supprimé si elles ne le transmettent pas pour être converties en valeur de hachage à l'aide de PHasher et stockez-les dans le DB.

Répondre

1

J'ai juste ce dont vous avez besoin, même si je ne l'ai pas été en mesure d'atteindre ce genre de débit (1000 demandes parallèles par seconde)

J'ai oublié où je suis arrivé avant, mais je me sers de ce télécharger reddit contenu:

class ParallelCurl { 

    public $max_requests; 
    public $options; 
    public $outstanding_requests; 
    public $multi_handle; 

    public function __construct($in_max_requests = 10, $in_options = array()) { 
     $this->max_requests = $in_max_requests; 
     $this->options = $in_options; 

     $this->outstanding_requests = array(); 
     $this->multi_handle = curl_multi_init(); 
    } 

    //Ensure all the requests finish nicely 
    public function __destruct() { 
     $this->finishAllRequests(); 
    } 

    // Sets how many requests can be outstanding at once before we block and wait for one to 
    // finish before starting the next one 
    public function setMaxRequests($in_max_requests) { 
     $this->max_requests = $in_max_requests; 
    } 

    // Sets the options to pass to curl, using the format of curl_setopt_array() 
    public function setOptions($in_options) { 
     $this->options = $in_options; 
    } 

    // Start a fetch from the $url address, calling the $callback function passing the optional 
    // $user_data value. The callback should accept 3 arguments, the url, curl handle and user 
    // data, eg on_request_done($url, $ch, $user_data); 
    public function startRequest($url, $callback, $user_data = array(), $post_fields = null, $headers = null) { 
     if ($this->max_requests > 0) 
      $this->waitForOutstandingRequestsToDropBelow($this->max_requests); 

     $ch = curl_init(); 
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); 
     curl_setopt_array($ch, $this->options); 
     curl_setopt($ch, CURLOPT_URL, $url); 
     if (isset($post_fields)) { 
      curl_setopt($ch, CURLOPT_POST, TRUE); 
      curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields); 
     } 
     if (is_array($headers)) { 
      curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 
     } 

     curl_multi_add_handle($this->multi_handle, $ch); 

     $ch_array_key = (int) $ch; 
     $this->outstanding_requests[$ch_array_key] = array(
      'link_url' => $url, 
      'callback' => $callback, 
      'user_data' => $user_data, 
     ); 

     $this->checkForCompletedRequests(); 
    } 

    // You *MUST* call this function at the end of your script. It waits for any running requests 
    // to complete, and calls their callback functions 
    public function finishAllRequests() { 
     $this->waitForOutstandingRequestsToDropBelow(1); 
    } 

    // Checks to see if any of the outstanding requests have finished 
    private function checkForCompletedRequests() { 
     /* 
      // Call select to see if anything is waiting for us 
      if (curl_multi_select($this->multi_handle, 0.0) === -1) 
      return; 

      // Since something's waiting, give curl a chance to process it 
      do { 
      $mrc = curl_multi_exec($this->multi_handle, $active); 
      } while ($mrc == CURLM_CALL_MULTI_PERFORM); 
     */ 
     // fix for https://bugs.php.net/bug.php?id=63411 
     do { 
      $mrc = curl_multi_exec($this->multi_handle, $active); 
     } while ($mrc == CURLM_CALL_MULTI_PERFORM); 
     while ($active && $mrc == CURLM_OK) { 
      if (curl_multi_select($this->multi_handle) != -1) { 
       do { 
        $mrc = curl_multi_exec($this->multi_handle, $active); 
       } while ($mrc == CURLM_CALL_MULTI_PERFORM); 
      } else 
       return; 
     } 

     // Now grab the information about the completed requests 
     while ($info = curl_multi_info_read($this->multi_handle)) { 

      $ch = $info['handle']; 
      $ch_array_key = (int) $ch; 

      if (!isset($this->outstanding_requests[$ch_array_key])) { 
       die("Error - handle wasn't found in requests: '$ch' in " . 
        print_r($this->outstanding_requests, true)); 
      } 

      $request = $this->outstanding_requests[$ch_array_key]; 
      $url = $request['link_url']; 
      $content = curl_multi_getcontent($ch); 
      $callback = $request['callback']; 
      $user_data = $request['user_data']; 

      call_user_func($callback, $content, $url, $ch, $user_data); 

      unset($this->outstanding_requests[$ch_array_key]); 

      curl_multi_remove_handle($this->multi_handle, $ch); 
     } 
    } 

    // Blocks until there's less than the specified number of requests outstanding 
    private function waitForOutstandingRequestsToDropBelow($max) { 
     while (1) { 
      $this->checkForCompletedRequests(); 
      if (count($this->outstanding_requests) < $max) 
       break; 

      usleep(10000); 
     } 
    } 

} 

la façon dont cela fonctionne est que vous passez à ParallelCurl :: startRequest() une URL et une fonction de rappel (peut-être anonyme), et ce file d'attente téléchargement de cette URL, puis appelle la fonction lorsque la télécharger les finitions.

$pcurl = new ParallelCurl(10, array(
    CURLOPT_RETURNTRANSFER => 1, 
    CURLOPT_FOLLOWLOCATION => 1, 
    CURLOPT_SSL_VERIFYPEER => 1, 
)); 

$pcurl->startRequest($url, function($data) { 
    // download finished. $data is html or binary, whatever you requested 
    echo $data; 
}); 
+0

Merci, il travaille, mais pouvez-vous me dire comment enregistrer cette sortie de la boucle comme un tableau, il économise comme un élément du tableau seulement [0] – Jadolyo

+0

Que voulez-vous dire enregistrer comme un tableau? Collectez simplement vos URL dans un tableau et faites une boucle dans ce tableau en passant chaque URL à $ pcurl-> startRequest. Voir ceci: http://pastebin.com/rMJG1QjC. Exécutez ceci (avec la classe ici) en ligne de commande et vous remarquerez que les téléchargements peuvent ne pas être en ordre car ils ne sont pas sauvegardés en séquence mais en parallèle –