使用curl从asp抓取数据

时间:2014-12-07 05:51:26

标签: php curl samplegrabber webgrabber

我在下面的代码中显示了来自此网站的所有作物价格:http://agmarknet.nic.in/agnew/NationalBEnglish/MarketWiseDailyReport.aspx?ss=1

但我只得到了来自西孟加拉邦的Ranaghat(Kalyani)的价格。 但是我希望获得所有区域的事件验证和事件参数值,有没有办法获取所有区域的值并在一个页面上显示所有区域?

<?php

$debug = 1;
//__EVENTTARGET=Calendar1&__EVENTARGUMENT=5450
$post_params = '__EVENTTARGET=Calendar1&__EVENTARGUMENT=5450&__LASTFOCUS=&__VIEWSTATE=%2FwEPDwULLTE3NTAzMTM4MzYPZBYCAgMPZBYMAgcPEGQPFg9mAgECAgIDAgQCBQIGAgcCCAIJAgoCCwIMAg0CDhYPEAUEMjAwMAUEMjAwMGcQBQQyMDAxBQQyMDAxZxAFBDIwMDIFBDIwMDJnEAUEMjAwMwUEMjAwM2cQBQQyMDA0BQQyMDA0ZxAFBDIwMDUFBDIwMDVnEAUEMjAwNgUEMjAwNmcQBQQyMDA3BQQyMDA3ZxAFBDIwMDgFBDIwMDhnEAUEMjAwOQUEMjAwOWcQBQQyMDEwBQQyMDEwZxAFBDIwMTEFBDIwMTFnEAUEMjAxMgUEMjAxMmcQBQQyMDEzBQQyMDEzZxAFBDIwMTQFBDIwMTRnFgECDmQCCQ8QZGQWAQILZAILDzwrAAoBAA8WBB4LVmlzaWJsZURhdGUGAEBi4hDb0QgeAlNEFgEGAMA1QqZuyAhkZAINDxYCHgdWaXNpYmxlaGQCDw8WAh8CaGQCEQ8QZGQWAGQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgIFDEltYWdlQnV0dG9uMgUMSW1hZ2VCdXR0b24x5LvtLZ2qkO71%2BB%2FYvmnfZbHLL18%3D&__EVENTVALIDATION=%2FwEWTwLwtt7gBQLSwtXkAgLSwpnTCALM9PumDwKhkfnTCAKJv7TUCAKJv8D%2FDwKJv9yCBgKJv%2BilDQKJv4TJBQKJv5DsDAKJv6y3AwKJv7jaCwKJv5SzDQKJv6DWBQLihpLPAgLihq6SCQLihro1AuKG1tgIAuKG4uMPArDenUMC0KSt6AgCn4vJagLh8LLyDQLJ86aRBAKw35mbCQKK%2FN%2FwCgK0jqvCCgKfkaS6DAKJ5paiAwKTgaGmAgLs0%2B3ZDgL%2F%2B9P4DAKh6eFRAqHpnboFAqHpnboFAqHpidUOAqHpidUOApyGm%2F4JApyGm%2F4JApyG95kBApyG47QKApyG3%2B8DApyGy4oLApyGp6YMApyGk8EFApyGj%2FwOApyGu8UDApyGl%2BAEAvefuYkGAveflaQPAvefgV8C95%2F9%2BgkC95%2FplQEC95%2FFsAoC95%2Bx7AMC95%2BthwsC95%2FZ7wkC95%2B1iwEC0rTHkwwC0rSzzwUC0rSv6g4C0rSbhQYC0rT3oA8C0rTjWwLStN%2F2CQLStMuRAQLStOf6BwLStNOVDwL93ofSBQL93vONDQL93u%2BoBgL93tvDDwL93rd%2FAv3eo5oIAv3en7UBAv3ei9AKAv3ep7kPAo7TxPYIYM6jG2mBqNWgIsnXliZHO%2FTCAdk%3D&drpDwnYear=2014&drpDwnMonth=December&hidStates=';
$fb_page_url ="http://agmarknet.nic.in/agnew/NationalBEnglish/MarketWiseDailyReport.aspx?ss=1"; // urlencodeurlencode
//$fb_page_url = urlencode("https://www.facebook.com"); // urlencodeurlencode

$uagent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36';
$cookies = 'cookies.txt';
touch($cookies);

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $fb_page_url);
   // curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
    curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
    curl_setopt($ch, CURLOPT_USERAGENT, $uagent);

    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
    curl_setopt($ch, CURLOPT_POST, 1);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $post_params);
    $data = curl_exec($ch);
    if ($debug) {
        echo $data;
    }
    curl_close($ch);
    //exit(1);
?>
<hr><hr>
<?php

$debug = 1;
$post_params = '__EVENTTARGET=&__EVENTARGUMENT=&__LASTFOCUS=&__VIEWSTATE=%2FwEPDwULLTE3NTAzMTM4MzYPZBYCAgMPZBYQAgcPEGQPFg9mAgECAgIDAgQCBQIGAgcCCAIJAgoCCwIMAg0CDhYPEAUEMjAwMAUEMjAwMGcQBQQyMDAxBQQyMDAxZxAFBDIwMDIFBDIwMDJnEAUEMjAwMwUEMjAwM2cQBQQyMDA0BQQyMDA0ZxAFBDIwMDUFBDIwMDVnEAUEMjAwNgUEMjAwNmcQBQQyMDA3BQQyMDA3ZxAFBDIwMDgFBDIwMDhnEAUEMjAwOQUEMjAwOWcQBQQyMDEwBQQyMDEwZxAFBDIwMTEFBDIwMTFnEAUEMjAxMgUEMjAxMmcQBQQyMDEzBQQyMDEzZxAFBDIwMTQFBDIwMTRnFgECDmQCCQ8QZGQWAQILZAILDzwrAAoBAA8WBB4LVmlzaWJsZURhdGUGAEBi4hDb0QgeAlNEFgBkZAINDxYCHgdWaXNpYmxlZ2QCDw8WAh8CZ2QCEQ8QDxYEHgtfIURhdGFCb3VuZGcfAmdkEBUWDkFuZGhyYSBQcmFkZXNoBUFzc2FtC0NoYXR0aXNnYXJoB0d1amFyYXQHSGFyeWFuYRBIaW1hY2hhbCBQcmFkZXNoEUphbW11IGFuZCBLYXNobWlyCUpoYXJraGFuZAlLYXJuYXRha2EGS2VyYWxhDk1hZGh5YSBQcmFkZXNoC01haGFyYXNodHJhB01hbmlwdXIGT3Jpc3NhBlB1bmphYglSYWphc3RoYW4KVGFtaWwgTmFkdQlUZWxhbmdhbmEHVHJpcHVyYQ1VdHRhciBQcmFkZXNoClV0dHJha2hhbmQLV2VzdCBCZW5nYWwVFgJBUAJBUwJDRwJHSgJIUgJIUAJKSwJKUgJLSwJLTAJNUAJNSAJNTgJPUgJQQgJSSgJUTgJUTAJUUgJVUAJVQwJXQhQrAxZnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZGQCEw8PFgIfAmdkZAIVDw8WAh8CZ2RkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYDBQxJbWFnZUJ1dHRvbjIFDEltYWdlQnV0dG9uMQUITGlzdEJveDFPyGwEH3yOh9hDMn%2FT8iVQ9gyq%2Bw%3D%3D&__EVENTVALIDATION=%2FwEWZwKR5dH1BgLSwtXkAgLSwpnTCALM9PumDwKhkfnTCAKJv7TUCAKJv8D%2FDwKJv9yCBgKJv%2BilDQKJv4TJBQKJv5DsDAKJv6y3AwKJv7jaCwKJv5SzDQKJv6DWBQLihpLPAgLihq6SCQLihro1AuKG1tgIAuKG4uMPArDenUMC0KSt6AgCn4vJagLh8LLyDQLJ86aRBAKw35mbCQKK%2FN%2FwCgK0jqvCCgKfkaS6DAKJ5paiAwKTgaGmAgLs0%2B3ZDgL%2F%2B9P4DAKh6eFRAqHpnboFAqHpnboFAqHpidUOAqHpidUOApyGm%2F4JApyGm%2F4JApyG95kBApyG47QKApyG3%2B8DApyGy4oLApyGp6YMApyGk8EFApyGj%2FwOApyGu8UDApyGl%2BAEAvefuYkGAveflaQPAvefgV8C95%2F9%2BgkC95%2FplQEC95%2FFsAoC95%2Bx7AMC95%2BthwsC95%2FZ7wkC95%2B1iwEC0rTHkwwC0rSzzwUC0rSv6g4C0rSbhQYC0rT3oA8C0rTjWwLStN%2F2CQLStMuRAQLStOf6BwLStNOVDwL93ofSBQL93vONDQL93u%2BoBgL93tvDDwL93rd%2FAv3eo5oIAv3en7UBAv3ei9AKAv3ep7kPAuLWifwFAuLWvfwFAuDW7fwFAuTWofwFAtXWgfwFAtXWifwFAtvW3fwFAtvWgfwFAtjW3fwFAtjW2fwFAt7WifwFAt7WqfwFAt7W0fwFAtzWgfwFAs3WwfwFAtPWofwFAtHW0fwFAtHW2fwFAtHWgfwFAtbWifwFAtbW%2FfwFAtTWwfwFAvGWg84EAp3rx40DAo7TxPYIEpT4xxK8ljYGtgaUlQSLR5YBjI4%3D&drpDwnYear=2014&drpDwnMonth=December&ListBox1=WB&Submit_list=Submit&hidStates=';
$fb_page_url ="http://agmarknet.nic.in/agnew/NationalBEnglish/MarketWiseDailyReport.aspx"; // urlencodeurlencode
//$fb_page_url = urlencode("https://www.facebook.com"); // urlencodeurlencode

$uagent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36';
$cookies = 'cookies.txt';
touch($cookies);

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $fb_page_url);
   // curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
    curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
    curl_setopt($ch, CURLOPT_USERAGENT, $uagent);

    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
    curl_setopt($ch, CURLOPT_POST, 1);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $post_params);
    $data = curl_exec($ch);
    if ($debug) {
        echo $data;
    }
    curl_close($ch);
?>
<hr><hr>
<?php

$debug = 1;
$post_params = '__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=OW49wEHXOlJcAEYhXvEGoPpDJUx%2FzJxHO49oHzFmlAOZqvz0Ll5y0zdrRrtT2Z53TS8wbAtpRDzt8SI0qSSdBYS4WGfYiM%2FKeqLJua2c%2FJ2GcdN2kL0Qr8BIt4D9qjVlPeRUlQUh0HVDbsEXV5Ri6NPfDWartnk7vQkMrd2I9md1tgbP69qaPQkN4QavFrIJTPleUV3aDOPUeNdU1i4uJ%2B0q23sapRzcMOPb9cw7ClG%2F1dWf4wemDZE7ihHj7I7uL1A94sPWmE8Fa6Jc5ftrGyBfl5JxjAO47zGYpj1ICHHtW%2FXg8MNa5KrUDjPzwq0IfAWjCjI%2BmnOEy7mX6ZOvUKTpSHYdnlgbRtGx1zalwRU%2BGBDfC1MfREJDmr2VfiLtsjRutB5tiD8J%2BZdr7vEKsojyvldAVB2y8i5t3fUeF3hebV1s5DfiU6HGbq5xghASkIvNM5CShCIY9QfMNwV%2BN%2Fd7smd%2B%2FENTtTB5Qoa563c6QvGFYULUPKwVu6urMoFHMFNHGWkrhTA%2BxFgSc3tVs7205S14RguQQHvOAq1v8tecv7ROOyNu1j72tKvwzQ86QIU6BTq58W6nq36fOBLHAuFB1xzxrjrgWfOw1vMqEwjBQqemIvQmAmLGHFm6fxONgvxDwVZqZ3tA1rxauEtjbXRc9xpvJxkqH0bJPewwrnHQ0aoBfCGtO3quy6G2sUxCdSGF3%2BvLLYBGSBU31D3UTKjck5m9eJPuDDjBDe5Nqkmomzcu0OZ%2BebT65xC5zrXI%2BYys%2B%2F1mMCh1Bz5HvUZg2asDa2Motwo3UP1pVIbPoNc8Dsm7GvacJJw%2FTdMa2h5VObcFCc8m3zsI9FBUfQtzFZBXwqQMMNA7nYielcLqsg%2B7bXMykw%2F%2BFhV1UDkRutPJXYB7pm8vqWLFaa4Bkn54g4A4vPRBlXr3ZTadKvD%2Bt0m7yIgBKaEGWWiMrPt4%2FjzwQ4PhxfZ%2FUJ2kcIuwdItBQJ8LHRmQeKkKDfYUI6%2B5tXFFlNa86Q3qfpuiPzMrbTRIoVvjtwa3rE1xVfH2bZ6XWJknt%2FWhzug5iTwAOjsk7S0ntHpigPvKS7bPJOkxlHZeymTh5OFbuv7sQrealGEUvOYGrfa6D1UBTQFel7BMXdyFQbkxziG7iwxLJ9gRGBuhFg10hnWDDhVkePI59zrKXeHVETHK%2BvHQE37qJ6zRy5S%2FkKezwnizbtLVl3F%2BOHCVqDbi3CGc7SMtgEPULwPkyNqQ9%2BymVeAXztIuFhDW8I87a1cZNny4c6xyIp2AWqTDzxPGXz%2B0s5PhBzW2HiJB6I9MslSWPMAUUkazPCq0yxyTX5Tj9dO3E0qtn7QZ3L9B0u4qtuJuK2M3PwrV%2FwuhWInRKyokV2NazjfjKy3agPXCDxAW6GRfL7uv1NaGckeSouAY9WmZT0nbfBJn8pRO9Fr2yR3gSM7X38haYhTMqIwisu6t%2BsbbFx9AZup%2BW3dzyIp9310DKNAZiX709PeLGzzAcojnk%2BjPW%2B1d2g0plSHK4PG9p7GzyZGO%2FFUdcx8stQNXow%3D%3D&__VIEWSTATEENCRYPTED=&__PREVIOUSPAGE=6CvuK3sr8tHZD65YwwwgBTJ0Rrj7OTiua9sandbZdigxXZMvaRwht0m4g-fyuAJiiTSBh3ou-n9J7XjuXP1UJ5GllwM3xtQQmjJtMygkNYux0AkixCoqQJ6HGXMVJXNX_PGWGA2&__EVENTVALIDATION=qikfNlSjty%2FhkScW6o3q9rqoFGqn3weRdW9lwMSuw3NE2AJ7CxL%2F7ossEvo3lTDh4SRzDAxgEb1mWVymzRGfwaGD9m%2FN6B0VMbyBLkjVapCbAMVQR4P09iyZduW%2BYRloraF4IGulVNfUTGNAZfHl0wiIDx8C1ckppOZXEzR%2BCw9BEopp&GridView1%24ctl04%24RowLevelCheckBox2=on&btnSubmit=Submit';
$fb_page_url ="http://agmarknet.nic.in/agnew/NationalBEnglish/MarketWiseDailyReport2.aspx"; // urlencodeurlencode
//$fb_page_url = urlencode("https://www.facebook.com"); // urlencodeurlencode

$uagent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36';
$cookies = 'cookies.txt';
touch($cookies);

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $fb_page_url);
   // curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
    curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
    curl_setopt($ch, CURLOPT_USERAGENT, $uagent);

    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
    curl_setopt($ch, CURLOPT_POST, 1);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $post_params);
    $data = curl_exec($ch);
    if ($debug) {
        echo $data;
    }
    curl_close($ch);
?>

1 个答案:

答案 0 :(得分:0)

我们不需要为ViewState和EventValidation编写值,因为我们可以使用下面的代码动态获取此值:

<?php

$debug = 1;
$fb_page_url = "agmarknet.nic.in/agnew/NationalBEnglish/MarketWiseDailyReport.aspx?ss=1";
$cookies = 'cookies.txt';
touch($cookies);
$uagent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36';


/**
    Get __VIEWSTATE & __EVENTVALIDATION
 */
$ch = curl_init($fb_page_url);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $uagent);

$html = curl_exec($ch);

curl_close($ch);

preg_match('~<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="(.*?)" />~', $html, $viewstate);
preg_match('~<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="(.*?)" />~', $html, $eventValidation);

$viewstate = $viewstate[1];
$eventValidation = $eventValidation[1];



/**
 Start Fetching process
 */
$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, $fb_page_url);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookies);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookies);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 9850);
curl_setopt($ch, CURLOPT_USERAGENT, $uagent);

// Collecting all POST fields
$postfields = array();
$postfields['__EVENTTARGET'] = "Calendar1";
$postfields['__EVENTARGUMENT'] = 5450;
$postfields['__LASTFOCUS'] = "";
$postfields['__VIEWSTATE'] = $viewstate;
$postfields['__EVENTVALIDATION'] = $eventValidation;
$postfields['drpDwnYear'] = 2014;
$postfields['drpDwnMonth'] = "December";
$postfields['hidStates'] = "";

curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postfields);
$ret = curl_exec($ch); // Get result as fetched web page.

    if ($debug) {
        echo $ret;
    }
    curl_close($ch);
?>