抓取网页时,NodeJs请求错误(ETIMEDOUT)

时间:2018-11-22 21:02:09

标签: node.js promise request

有时在Node JS中抓取网页时会出现ETIMEDOUT错误(使用localhost作为服务器)。我正在使用Request-Promise发出请求。我找不到处理它的方法,它没有总是给出它,而是随机出现。我试图减少并发性,但是没有用。毕竟,我并不辛苦。有什么建议(使用其他图书馆等...)?

这是错误代码:

{ RequestError: Error: connect ETIMEDOUT 52.232.0.90:443
    at new RequestError (C:\Users\HHS\NodeJs 
Projects\News_Scraper\node_modules\request-promise-core\lib\errors.js:14:15)
    at Request.plumbing.callback (C:\Users\HHS\NodeJs 
Projects\News_Scraper\node_modules\request-promise-core\lib\plumbing.js:87:29)
    at Request.RP$callback [as _callback] (C:\Users\HHS\NodeJs 
Projects\News_Scraper\node_modules\request-promise-core\lib\plumbing.js:46:31)
    at self.callback (C:\Users\HHS\NodeJs 
Projects\News_Scraper\node_modules\request\request.js:185:22)
    at emitOne (events.js:116:13)
    at Request.emit (events.js:211:7)
    at Request.onRequestError (C:\Users\HHS\NodeJs 
Projects\News_Scraper\node_modules\request\request.js:881:8)
    at emitOne (events.js:116:13)
    at ClientRequest.emit (events.js:211:7)
    at TLSSocket.socketErrorListener (_http_client.js:387:9)
    at emitOne (events.js:116:13)
    at TLSSocket.emit (events.js:211:7)
    at emitErrorNT (internal/streams/destroy.js:66:8)
    at _combinedTickCallback (internal/process/next_tick.js:139:11)
    at process._tickCallback (internal/process/next_tick.js:181:9)

  name: 'RequestError',
  message: 'Error: connect ETIMEDOUT 52.232.0.90:443',
  cause:
   { Error: connect ETIMEDOUT 52.232.0.90:443
    at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
     errno: 'ETIMEDOUT',
     code: 'ETIMEDOUT',
     syscall: 'connect',
     address: '52.232.0.90',
     port: 443 },
  error:
   { Error: connect ETIMEDOUT 52.232.0.90:443
    at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1191:14)
     errno: 'ETIMEDOUT',
     code: 'ETIMEDOUT',
     syscall: 'connect',
     address: '52.232.0.90',
     port: 443 },
  options:
   { uri: 'https://www.ntv.com.tr/ekonomi/hazine-ve-maliye-bakani-albayrakenflasyondaki-asagiya-dogru-trendi-cok-daha-guc,400oDcsHMUq1nXl6-52C9w',
     callback: [Function: RP$callback],
     transform: undefined,
     simple: true,
     resolveWithFullResponse: false,
     transform2xxOnly: false },
     response: undefined }

用于抓取的代码:

const rp = require('request-promise');
const cheerio = require('cheerio');
const fs = require("fs");
const Promise = require("bluebird");
const moment = require('moment');

async function Gundem () {
    var posts = [];
    try {
        const baseUrl = 'https://www.ntv.com.tr';
        const mainHtml = await rp(baseUrl);
        const $ = cheerio.load(mainHtml);
        const links = $(".swiper-slide.color-white).map((i, el) => {
            return baseUrl + $(el).children("a").first().attr("href");
        }).get();

        posts = await Promise.map(links, async (link) => {
            try {
                const newsHtml = await rp(link);
                const $ = cheerio.load(newsHtml);
                return {
                    title: getTitle ($),
                    newsUrl: $("meta[property='og:url']").attr("content"),
                    imageUrl: $("meta[property='og:image']").attr("content"),
                    time: moment($("time").attr("datetime")).valueOf()
                }
            } catch (e) {
                console.log('error scraping ' + link + '\n', e)
            }
        },
        {concurrency:10}) 
    } catch (e) {
        console.log('error scraping ntv' +'\n', e)
    };
    fs.writeFile('./ntv.json', JSON.stringify(posts, null, 3), (err) => {
        if (err) throw err;
    });
    return posts;
};

0 个答案:

没有答案