Puppeteer使用递归迭代获取page.content以获得分页列表的所有结果

时间:2017-09-10 14:43:01

标签: puppeteer

我想用puppeteer获取分页数据列表的所有结果。

如果我制作一个for循环,我会给出这个错误:

(node:54961) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 1): Error: Protocol error (Runtime.evaluate): Cannot find context with specified id undefined

(node:54961) DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

这是我的代码:

const puppeteer = require('puppeteer');
var sleep = require('sleep');

function getRandomInt(min, max) {
    return Math.floor(Math.random() * (max - min + 1)) + min;
}

(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();

console.log('start');
page.on('console', (...args) => console.log('PAGE LOG:', ...args));

await page.goto('pageUrl');
var num = 0;
for(var i=0; i< 10; i++){
    var content = await page.content();
    console.log('we have content of page '+num);
    var fs = require('fs');
    fs.writeFileSync("htmls/"+num+".html", content);
    console.log("The file of page "+num+" was saved!");

    var sleepSecond = getRandomInt(20,40);
    console.log("We are waiting "+ sleepSecond + " seconds");
    sleep.sleep(sleepSecond);

    var inputElement = await page.$('a.next');
    await inputElement.click();
    console.log('Click on next');
    sleepSecond = getRandomInt(40,80);
    console.log("We are waiting "+ sleepSecond + " seconds");
    sleep.sleep(sleepSecond);
    num +=15;
}

browser.close();
})();

那么如何解决这个错误以及如何在递归循环中转换for循环?

1 个答案:

答案 0 :(得分:-1)

我发现在try / catch中包装整个东西解决了它。我一直在寻找.then()&amp; .catch()示例,但post有一个例子,几乎使它太简单了。

(async () => {
    try {
        const browser = await puppeteer.launch({headless: false});
        const page = await browser.newPage();
        console.log('start');
        page.on('console', (...args) => console.log('PAGE LOG:', ...args));
        await page.goto('pageUrl');
        var num = 0;
        for(var i=0; i< 10; i++){
            var content = await page.content();
            console.log('we have content of page '+num);
            var fs = require('fs');
            fs.writeFileSync("htmls/"+num+".html", content);
            console.log("The file of page "+num+" was saved!");

            var sleepSecond = getRandomInt(20,40);
            console.log("We are waiting "+ sleepSecond + " seconds");
            sleep.sleep(sleepSecond);

            var inputElement = await page.$('a.next');
            await inputElement.click();
            console.log('Click on next');
            sleepSecond = getRandomInt(40,80);
            console.log("We are waiting "+ sleepSecond + " seconds");
            sleep.sleep(sleepSecond);
            num +=15;
        }
        browser.close();
    } catch(e) {
        console.log(e);
    }
})();
相关问题