我的网页之间有这样的网页:
<a href="http://foo.com/home.do?SID=3443132">...
我需要使用XPath提取“href”属性。在CasperJS的API中写了这样的信息:clientutils.getElementByXPath。
这是我的代码:
phantom.casperPath = '..n1k0-casperjs-5428865';
phantom.injectJs(phantom.casperPath + '\\bin\\bootstrap.js');
var casper = require('casper').create();
var url = "...";
casper.start(url, function() {
casper.echo("started");
});
var x = require('casper').selectXPath;
casper.then(function()
{
casper.echo("getsid");
this.test.assertExists(x('//a[contains(@href, "home.do?SID=")]'), 'the element exists');
var element = __utils__.getElementByXPath('//a[contains(@href, "home.do?SID=")]');
});
但它失败了。它返回:
false
undefined
started
getsid
PASS the element exists <== XPATH WORKS
FAIL ReferenceError: Can't find variable: __utils__
# type: uncaughtError
# error: "ReferenceError: Can't find variable: __utils__"
ReferenceError: Can't find variable: __utils__
答案 0 :(得分:3)
试试这个:
phantom.casperPath = '..n1k0-casperjs-5428865';
phantom.injectJs(phantom.casperPath + '\\bin\\bootstrap.js');
var url = "...";
var casper = require('casper').create();
var x = require('casper').selectXPath;
casper.start(url, function() {
casper.echo("started");
});
casper.then(function() {
casper.echo("getsid");
var xpath = '//a[contains(@href, "home.do?SID=")]';
var xpath_arr = { type: 'xpath', path: xpath};
this.test.assertExists(xpath_arr, 'the element exists');
var element = x(xpath);
});
答案 1 :(得分:3)
正如评论中指出的那样,您必须在__utils__
回调中使用evaluate
,因为它会被注入页面。由于您需要(编辑)href
,您可以使用:
casper.then(function(){
casper.echo("getsid");
this.test.assertExists(x('//a[contains(@href, "home.do?SID=")]'), 'the element exists');
var href = this.evaluate(function(){
var element = __utils__.getElementByXPath('//a[contains(@href, "home.do?SID=")]');
return element.href;
});
});
可以缩短这一点
casper.then(function(){
casper.echo("getsid");
this.test.assertExists(x('//a[contains(@href, "home.do?SID=")]'), 'the element exists');
var href = this.getElementAttribute(x('//a[contains(@href, "home.do?SID=")]'), "href");
});
您还可以使用casper.getElementInfo
获取元素的完整信息,包括所有属性(但只包含一些属性)。