Nightmare.js有条件浏览

时间:2016-06-16 11:15:22

标签: node.js web-scraping nightmare

我试图理解如何使用“if-then”逻辑制作一个nightmare.js脚本。例如

var Nightmare = require('nightmare');
var nightmare = Nightmare({
    show: true,
    paths: {
        userData: '/dev/null'
    }
});

nightmare
    .goto('http://www.example.com/')
    .wait('h1')
    .evaluate(function() {
        return document.querySelector('title').innerText;
    })
    // here: go to url1 if title == '123' otherwise to url2
    .end()
    .then(function() {
        console.log('then', arguments);

    }).catch(function() {
        console.log('end', arguments);
    });

如何根据评估结果将此脚本转到其他网址?

1 个答案:

答案 0 :(得分:10)

由于梦魇有then能力,你可以将它从.then()返回到它,就像普通的Promises一样。

var Nightmare = require('nightmare');
var nightmare = Nightmare({
  show: true,
  paths: {
    userData: '/dev/null'
  }
});

nightmare
  .goto('http://www.example.com/')
  .wait('h1')
  .evaluate(function() {
    return document.querySelector('title')
      .innerText;
  })
  .then(function(title) {
    if (title == 'someTitle') {
      return nightmare.goto('http://www.yahoo.com');
    } else {
      return nightmare.goto('http://w3c.org');
    }
  })
  .then(function() {
    //since nightmare is `then`able, this `.then()` will
    //execute the call chain described and returned in 
    //the previous `.then()`
    return nightmare
      //... other actions...
      .end();
  })
  .then(function() {
    console.log('done');
  })
  .catch(function() {
    console.log('caught', arguments);
  });

如果您想要更具同步性的逻辑,可以考虑将generatorsvoco一起使用。例如,上面用vo重写:

var Nightmare = require('nightmare');
var vo = require('vo');

vo(function * () {
  var nightmare = Nightmare({
    show: true,
    paths: {
      userData: '/dev/null'
    }
  });

  var title = yield nightmare
    .goto('http://www.example.com/')
    .wait('h1')
    .evaluate(function() {
      return document.querySelector('title')
        .innerText;
    });

  if (title == 'someTitle') {
    yield nightmare.goto('http://www.yahoo.com');
  } else {
    yield nightmare.goto('http://w3c.org');
  }

  //... other actions...

  yield nightmare.end();
})(function(err) {
  if (err) {
    console.log('caught', err);
  } else {
    console.log('done');
  }
});