如何检查代理是否处于活动状态

时间:2013-04-30 21:30:43

标签: php mysql curl proxy

我有一个代理列表,我正在尝试检查它们中的哪些是活动的。到目前为止我所做的是通过卷曲连接到他们中的每一个,看看他们是否有回应,但我正在寻找更快的东西,类似于http://www.ip-adress.com/Proxy_Checker/。我正在考虑检查端口是否打开或类似的东西。我目前使用的代码如下

<?php




error_reporting(E_ERROR);
//ini_set('memory_limit', '256M');
function hitFormGet($loginURL, $loginFields, $referer, $cookieString, $code)
{
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
    curl_setopt($ch, CURLOPT_COOKIEFILE, "cookies.txt");

    // curl_setopt($ch,    CURLOPT_AUTOREFERER,         true);
    curl_setopt($ch, CURLOPT_COOKIESESSION, true);
    // curl_setopt( $ch, CURLOPT_COOKIE,$cookieString);
    curl_setopt($ch, CURLOPT_FAILONERROR, false);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
    curl_setopt($ch, CURLOPT_VERBOSE, 0);
    curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate,sdch');
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
    curl_setopt($ch, CURLOPT_FRESH_CONNECT, true);
    curl_setopt($ch, CURLOPT_HEADER, false);
    // curl_setopt($ch,    CURLOPT_POST,                 true);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
    curl_setopt($ch, CURLOPT_TIMEOUT, 35);
    curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11");

    curl_setopt($ch, CURLOPT_URL, $loginURL . $loginFields);

    curl_setopt($ch, CURLOPT_REFERER, $referer);

    // curl_setopt($ch, CURLOPT_POSTFIELDS, $loginFields);
    $ret  = curl_exec($ch);
    $info = curl_getinfo($ch);
    $code = $info['http_code'];
    curl_close($ch);
    return $ret;
}
function hitFormGetProxyINI($loginURL, $loginFields, $referer, $cookieString, &$code, $proxy, $js)
{
    echo $proxy . ">$loginURL\n";
    $ch       = curl_init();
    $initials = file_get_contents("bot.ini");
    $initials = explode(";", $initials);
    $encod    = "gzip,deflate,sdch";
    $agent    = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11";
    curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
    curl_setopt($ch, CURLOPT_COOKIEFILE, "cookies.txt");

    curl_setopt($ch, CURLOPT_AUTOREFERER, true);
    curl_setopt($ch, CURLOPT_COOKIESESSION, true);
    curl_setopt($ch, CURLOPT_COOKIE, $cookieString);
    curl_setopt($ch, CURLOPT_FAILONERROR, false);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
    curl_setopt($ch, CURLOPT_VERBOSE, 0);
    curl_setopt($ch, CURLOPT_PROXY, $proxy);

    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
    curl_setopt($ch, CURLOPT_TIMEOUT, 20);
    curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
    curl_setopt($ch, CURLOPT_ENCODING, $encod);

    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
    curl_setopt($ch, CURLOPT_FRESH_CONNECT, true);
    curl_setopt($ch, CURLOPT_HEADER, false);
    if (!$js)
        curl_setopt($ch, CURLOPT_HTTPHEADER, array(
            'Accept-Language: en-US,en;q=0.8',
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3'
        ));
    else
        curl_setopt($ch, CURLOPT_HTTPHEADER, array(
            'Accept-Language: en-US,en;q=0.8',
            'Accept: */*',
            'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3'
        ));

    // curl_setopt($ch,    CURLOPT_POST,                 true);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
    curl_setopt($ch, CURLOPT_USERAGENT, $agent);

    curl_setopt($ch, CURLOPT_URL, $loginURL . $loginFields);

    if (strlen($referer) > 4)
        curl_setopt($ch, CURLOPT_REFERER, $referer);

    // curl_setopt($ch, CURLOPT_POSTFIELDS, $loginFields);
    $ret  = curl_exec($ch);
    $info = curl_getinfo($ch);
    $code = $info;
    curl_close($ch);
    return $ret;
}
function hitFormPost($loginURL, $loginFields, $referer, $cookieString)
{
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_COOKIEJAR, "cookies.txt");
    curl_setopt($ch, CURLOPT_COOKIEFILE, "cookies.txt");

    // curl_setopt($ch,    CURLOPT_AUTOREFERER,         true);
    curl_setopt($ch, CURLOPT_COOKIESESSION, true);
    // curl_setopt( $ch, CURLOPT_COOKIE,$cookieString);
    curl_setopt($ch, CURLOPT_FAILONERROR, false);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
    curl_setopt($ch, CURLOPT_VERBOSE, 0);
    curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate,sdch');
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
    curl_setopt($ch, CURLOPT_FRESH_CONNECT, true);
    curl_setopt($ch, CURLOPT_HEADER, false);
    curl_setopt($ch, CURLOPT_POST, true);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
    curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11");

    curl_setopt($ch, CURLOPT_URL, $loginURL);

    curl_setopt($ch, CURLOPT_REFERER, $referer);

    curl_setopt($ch, CURLOPT_POSTFIELDS, $loginFields);
    $ret = curl_exec($ch);
    curl_close($ch);
    return $ret;
}
for ($in = 0; $in < 100; $in++) {
    file_put_contents("mysql_dumpB$in.txt", '');

}
sleep(180);
$link = mysql_connect("localhost:3306", "userkdo_botuser1", "dvd6000") or die(mysql_error());
mysql_select_db("userkdo_botdb1", $link) or die(mysql_error());


$tier1_countries = array(
    "United States",
    "Canada",
    "Japan",
    "United Kingdom",
    "Germany",
    "France",
    "Belgium",
    "Netherlands",
    "Sweden",
    "Norway",
    "Denmark",
    "Ireland",
    "Switzerland",
    "Spain",
    "Luxembourg",
    "Liechtenstein",
    "Monaco",
    "Italy",
    "Finland",
    "Austria",
    "Australia"
);
$tier2_countries = array(
    "Cyprus",
    "Greece",
    "Hong Kong",
    "Israel",
    "Republic of Korea",
    "New Zealand",
    "Poland",
    "Portugal"
);



$result  = mysql_query("SELECT * 
FROM  `proxies` WHERE `type`='anon'
ORDER BY  `proxies`.`last_checked` ASC 
LIMIT 0 , 100");
$nexturl = "http://DOMAIN.net/bot/headers.php";
$pids    = array();
$i       = 0;
while ($row = mysql_fetch_assoc($result)) {





    $pids[$i] = pcntl_fork();

    if (!$pids[$i]) {
        // child process
        $proxy = $row['proxy'];
        $res   = hitFormGetProxyINI($nexturl, "", "", "", $cd, $proxy, false);
        //print_r($cd);
        echo $res . "\n";

        //$res=trim(strstr($res,"<",true));


        //file_put_contents("mysql_dumpB$i.txt","UPDATE  `proxies` SET  `last_checked` =  '".time()."',`response_time`='".$cd['total_time']."' WHERE  `proxies`.`proxy` =  '".$proxy."'|");

        //echo "UPDATE  `proxies` SET  `last_checked` =  '".time()."',`response_time`='".$cd['total_time']."' WHERE  `proxies`.`proxy` =  '".$proxy."'"."\n";

        if (strlen($res) > 1 && $res == "FALSE") { //$res1=hitFormGet("http://api.easyjquery.com/ips/?ip=".urlencode(strstr($proxy,":",true))."&full=true","","","","");
            $LocationData = json_decode($res1);

            //$country= $LocationData->CountryName;
            //if(in_array($country,$tier1_countries)) $ctier=1; else if(in_array($country,$tier2_countries)) $ctier=2; else $ctier=3;
            file_put_contents("mysql_dumpB$i.txt", "UPDATE  `proxies` SET `status` =  'active',`last_checked` =  '" . time() . "',`last_active` =  '" . time() . "',`response_time`='" . $cd['total_time'] . "' WHERE  `proxies`.`proxy` =  '" . $proxy . "'|", FILE_APPEND);
            echo "UPDATE  `proxies` SET `status` =  'active',`last_active` =  '" . time() . "',`response_time`='" . $cd['total_time'] . "' WHERE  `proxies`.`proxy` =  '" . $proxy . "'" . "\n";
            echo "\n>>$country<<\n";
        } else {
            $res1         = hitFormGet("http://api.easyjquery.com/ips/?ip=" . urlencode(strstr($proxy, ":", true)) . "&full=true", "", "", "", "");
            $LocationData = json_decode($res1);

            //$country= $LocationData->CountryName;
            //if(in_array($country,$tier1_countries)) $ctier=1; else if(in_array($country,$tier2_countries)) $ctier=2; else $ctier=3;
            file_put_contents("mysql_dumpB$i.txt", "UPDATE  `proxies` SET `status` =  'inactive',`last_checked` =  '" . time() . "',`response_time`='" . $cd['total_time'] . "' WHERE  `proxies`.`proxy` =  '" . $proxy . "'|", FILE_APPEND);
            echo "UPDATE  `proxies` SET `status` =  'inactive',`response_time`='" . $cd['total_time'] . "' WHERE  `proxies`.`proxy` =  '" . $proxy . "'" . "\n";
            echo "\n>>$country<<\n";
        }
        if (strlen($res) > 1 && $res != "FALSE") {

            file_put_contents("mysql_dumpB$i.txt", "UPDATE  `proxies` SET `status` =  '404',`last_checked` =  '" . time() . "',`response_time`='" . $cd['total_time'] . "' WHERE  `proxies`.`proxy` =  '" . $proxy . "'|", FILE_APPEND);



        }

        exit();
    }
    $i++;
}


foreach ($pids as $pid) {
    pcntl_waitpid($pid, $status, WUNTRACED);
}




?>

该脚本从数据库中检索100个代理,然后通过将它们发送到目的地并检查响应来检查每个代理,我只需要查看它们当前是否处于活动状态,以便删除多余的内容。检查结果作为查询写在文件上,然后第二个脚本运行它们并更改数据库。

1 个答案:

答案 0 :(得分:6)

也许你可以逐个ping他们?

类似这样的事情

$host = '192.168.0.1'; 
$port = 80; 
$waitTimeoutInSeconds = 1; 
if($fp = fsockopen($host,$port,$errCode,$errStr,$waitTimeoutInSeconds)){   
   // It worked 
} else {
   // It didn't work 
} 
fclose($fp);

了解here

的方式