用安全登录抓一个网站

时间:2013-11-28 00:33:52

标签: php

我正在努力削减我的一个经销商每件产品的库存数量。他们不知道如何导出这些数据。所以我想知道是否有人可以帮我指出如何使用PHP抓取网站以获取数据?

我尝试过以下脚本但没有工作。可以检查出来吗。

$postData="email_address=".urlencode("benkrish.mk@gmail.com")."&password=18&x=24&y=11";

$ch = curl_init(); 
curl_setopt($ch, CURLOPT_URL,"https://www.wonatrading.com/login.php"); 
curl_exec($ch); 

curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
curl_setopt($ch, CURLOPT_URL, $url); 
$cookie = 'cookies.txt';
$timeout = 30;

curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_TIMEOUT,         10); 
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,  $timeout);
curl_setopt($ch, CURLOPT_COOKIEJAR,       $cookie);
curl_setopt($ch, CURLOPT_COOKIEFILE,      $cookie);

curl_setopt ($ch, CURLOPT_POST, 1); 
curl_setopt ($ch,CURLOPT_POSTFIELDS,$postData);     

curl_exec($ch); 

curl_setopt($ch, CURLOPT_URL, "http://www.wonatrading.com/product_info.php?products_id=213754&kind=2&cPath=172_185"); 
echo curl_exec($ch); 

1 个答案:

答案 0 :(得分:2)

好的,我会和你分享这个......这是我用来刮痧的课程。随意使用它。

<?php

class Scrape
{
public $cookies = 'cookies.txt';
private $user = null;
private $pass = null;

/*Data generated from cURL*/
public $content = null;
public $response = null;

/* Links */
private $url = array(
                    'login'     => 'https://www.wonatrading.com/account.php',
                    'submit'    => 'https://www.wonatrading.com/login.php?action=process'
                    );

/* Fields */
public $data = array();

public function __construct ($user, $pass)
{

    $this->user = $user;
    $this->pass = $pass;        

}

public function login()
{

            $this->cURL($this->url['login']);

            if($form = $this->getFormFields($this->content, 'login'))
            {
                $form['email_address'] = $this->user;
                $form['password'] =$this->pass;
                //echo "<pre>".print_r($form,true);exit;
                $this->cURL($this->url['submit'], $form);
                echo $this->content;exit;
            }
           echo $this->content;exit;    
}

/* Scan for form */
private function getFormFields($data, $id)
{
        if (preg_match('/(<form.*?name=.?'.$id.'.*?<\/form>)/is', $data, $matches)) {
            $inputs = $this->getInputs($matches[1]);

            return $inputs;
        } else {
            return false;
        }

}

/* Get Inputs in form */
private function getInputs($form)
{
    $inputs = array();

    $elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);

    if ($elements > 0) {
        for($i = 0; $i < $elements; $i++) {
            $el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);

            if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
                $name  = $name[1];
                $value = '';

                if (preg_match('/value=(?:["\'])?([^"\']*)/i', $el, $value)) {
                    $value = $value[1];
                }

                $inputs[$name] = $value;
            }
        }
    }

    return $inputs;
}

/* Perform curl function to specific URL provided */
public function cURL($url, $post = false)
{
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
    curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_VERBOSE, 1);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookies);
    curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookies);
    curl_setopt($ch, CURLOPT_HEADER, 0);  
    curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
    curl_setopt($ch, CURLOPT_TIMEOUT, 120);

    if($post)   //if post is needed
    {
        curl_setopt($ch, CURLOPT_POST, 1);
        curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($post));
    }

    curl_setopt($ch, CURLOPT_URL, $url); 
    $this->content = curl_exec($ch);
    $this->response = curl_getinfo( $ch );
    $this->url['last_url'] = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
    curl_close($ch);
}
}


$sc = new Scrape('benkrish.mk@gmail.com','18UlG');
$sc->login();


?>