使用phantom.js生成多个HAR文件

时间:2017-01-30 08:01:09

标签: javascript phantomjs har

我正在使用netsniff.js中的代码生成har file,我想改进它以从数组中给出的多个链接生成一个har文件(在我的名为links中下面的代码)。

这里有另一个问题Using Multiple page.open in Single Script可能对我有所帮助,但我不知道如何在我的代码中实现给定的解决方案..

下面是我的代码(如果FAIL to load the address数组包含多个项目,它会在输出文件中记录links):

"use strict";
if (!Date.prototype.toISOString) {
    Date.prototype.toISOString = function () {
        function pad(n) { return n < 10 ? '0' + n : n; }
        function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n }
        return this.getFullYear() + '-' +
            pad(this.getMonth() + 1) + '-' +
            pad(this.getDate()) + 'T' +
            pad(this.getHours()) + ':' +
            pad(this.getMinutes()) + ':' +
            pad(this.getSeconds()) + '.' +
            ms(this.getMilliseconds()) + 'Z';
    }
}
var entries = [];
function createHAR(address, title, startTime, resources)
{
    resources.forEach(function (resource) {
        var request = resource.request,
            startReply = resource.startReply,
            endReply = resource.endReply;

        if (!request || !startReply || !endReply) {
            return;
        }

        // Exclude Data URI from HAR file because
        // they aren't included in specification
        if (request.url.match(/(^data:image\/.*)/i)) {
            return;
        }

        entries.push({
            startedDateTime: request.time.toISOString(),
            time: endReply.time - request.time,
            request: {
                method: request.method,
                url: request.url,
                httpVersion: "HTTP/1.1",
                cookies: [],
                headers: request.headers,
                queryString: [],
                headersSize: -1,
                bodySize: -1
            },
            response: {
                status: endReply.status,
                statusText: endReply.statusText,
                httpVersion: "HTTP/1.1",
                cookies: [],
                headers: endReply.headers,
                redirectURL: "",
                headersSize: -1,
                bodySize: startReply.bodySize,
                content: {
                    size: startReply.bodySize,
                    mimeType: endReply.contentType
                }
            },
            cache: {},
            timings: {
                blocked: 0,
                dns: -1,
                connect: -1,
                send: 0,
                wait: startReply.time - request.time,
                receive: endReply.time - startReply.time,
                ssl: -1
            },
            pageref: address
        });
    });

    return {
        log: {
            version: '1.2',
            creator: {
                name: "PhantomJS",
                version: phantom.version.major + '.' + phantom.version.minor +
                    '.' + phantom.version.patch
            },
            pages: [{
                startedDateTime: startTime.toISOString(),
                id: address,
                title: title,
                pageTimings: {
                    onLoad: page.endTime - page.startTime
                }
            }],
            entries: entries
        }
    };
}
var page = require('webpage').create()
var fs = require('fs');
var count = 0;
function processSites(links)
{
    page.address = links.pop();
    var path = 'file' + count + '.har';
    page.resources = [];
    console.log("page resources:", page.resources)
    count = count + 1;
    page.onLoadStarted = function () {
        page.startTime = new Date();
    };
    page.onResourceRequested = function (req) {
        page.resources[req.id] = {
            request: req,
            startReply: null,
            endReply: null
        };
    };

    page.onResourceReceived = function (res) {
        if (res.stage === 'start') {
            page.resources[res.id].startReply = res;
        }
        if (res.stage === 'end') {
            page.resources[res.id].endReply = res;
        }
    };

    page.open(page.address, function (status) {
        var har;
        setTimeout(function () {
            if (status !== 'success') {
                console.log('FAIL to load the address');
                phantom.exit(1);
            } else {
                page.endTime = new Date();
                page.title = page.evaluate(function () {
                    return document.title;
                });
                entries = [];
                har = createHAR(page.address, page.title, page.startTime, page.resources);
                // console.log(JSON.stringify(har, undefined, 4));
                fs.write(path, JSON.stringify(har), 'w');

                if(links.length > 0)
                {
                    processSites(links);
                }
                else
                {
                    phantom.exit();
                }
            }
        }, 10000);
    });

}

var links = ["http://stackoverflow.com", "http://marvel.com"];

processSites(links);

更新:
上面的代码生成两个harfile文件file1.har和file2.har,但是第二个har文件还包含从两个链接生成的har代码,它应该只有第一个链接的har代码...

通过设置var har = " "

来解决此问题

1 个答案:

答案 0 :(得分:2)

由于page.open方法是异步的,因此您无法在简单的循环中迭代PhantomJS中的打开页面。它不会等待第一个网站被处理,立即打开第二个网站。

我已经重写了您的脚本以使用递归:只有在处理完当前网站后才会打开下一个网站。 (注意:如果队列中的任何站点无法加载,整个过程将停止,但您可以轻松地重写脚本以避免这种情况。)

if (!Date.prototype.toISOString) {
    Date.prototype.toISOString = function () {
        // ...
    }
}

var entries = [];

function createHAR(address, title, startTime, resources)
{
    // ...
}

var page = require('webpage').create()

function processSites(links)
{
    page.address = links.pop();

    console.log("PAGE ADDRESS: ", page.address);
    page.resources = [];

    page.onLoadStarted = function () {
        page.startTime = new Date();
    };
    page.onResourceRequested = function (req) {
        page.resources[req.id] = {
            request: req,
            startReply: null,
            endReply: null
        };
    };

    page.onResourceReceived = function (res) {
        if (res.stage === 'start') {
            page.resources[res.id].startReply = res;
        }
        if (res.stage === 'end') {
            page.resources[res.id].endReply = res;
        }
    };

    page.open(page.address, function (status) {
        var har;
        setTimeout(function () {
            if (status !== 'success') {
                console.log('FAIL to load the address');
                phantom.exit(1);
            } else {
                page.endTime = new Date();
                page.title = page.evaluate(function () {
                    return document.title;
                });
                har = createHAR(page.address, page.title, page.startTime, page.resources);
                console.log(JSON.stringify(har, undefined, 4));

                if(links.length > 0)
                {
                    processSites(links);
                }
                else
                {
                    phantom.exit();
                }
            }
        }, 10000);
    });

}

var links = ["http://edition.cnn.com", "http://stackoverflow.com"];

processSites(links);
相关问题