NodeJS内存使用情况

时间:2015-03-09 21:14:23

标签: javascript node.js memory

我正在玩NodeJS,为此创建了一个电子邮件提取器。不知何故,当我创建多个http请求时,Windows任务管理器中的node.exe内存使用量不断增加。我知道节点需要更多的内存来处理请求,但我注意到即使在所有请求都已成功处理之后,这种内存使用量也没有下降。

当我启动nodejs时,它消耗大约35000K的内存,但在大约80-100的请求之后,它会上升到50000K并保持不变。

这是我的简单电子邮件提取程序模块:

    var request = require('request'), 
    cheerio = require('cheerio'), 
    async = require('async'), 
    urlHelper = require('url');

function Extractor(config) {
    this.baseUrl = config.url;
    this.parsedUrl = urlHelper.parse(config.url);
    this.urls = [];
    this.emails = [];
}

Extractor.prototype.getEmails = function getEmails(html) {
    var foundEmails = html.match(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi) || [];

    if(foundEmails.length) this.emails = this.emails.concat(foundEmails);
}

Extractor.prototype.extract = function extract(html) {
    var $ = cheerio.load(html), 
        that = this;

    if($('body')){
        this.getEmails($('body').html());
    }

    if(!this.emails.length){
        $("a[href^='http://" + this.parsedUrl.host + "'], a[href^='https://" + this.parsedUrl.host + "'], a[href^='/'], a[href^='./'], a[href^='../']").each(function(k, v) {
            that.urls.push(urlHelper.resolve(that.baseUrl, $(v).attr('href')));
        });
    }
};


/**
* Process the base URL
*/
Extractor.prototype.processBase = function processBase(next) {
    request(this.baseUrl, function(err, response, body) {
        return next(err, body);
    });
}

/**
* Process the internal pages
*/
Extractor.prototype.processInternal = function processInternal(cb) {
    var that = this;

    async.whilst(
        // while this condition returns true
        function () { return that.emails.length === 0 && that.urls.length > 0; },
        // do this
        function (callback) {
            request(that.urls.shift(), function (err, response, body) {
                var $ = cheerio.load(body);
                if($(body)){
                    that.getEmails($('body').html());
                }
                callback(); // async internal, needs to be called after we are done with our thing
            });
        },
        // call this if any errors occur. An error also stops the series
        // this is also called on successful completion of the series
        function (err) {
            cb(that);
        }
    );
}

Extractor.prototype.process = function process(next) {
    var that = this;

    this.processBase(function(err, html) {
        if(err) {
            console.log(err);
        } else {
            that.extract(html);
            if(!that.emails.length) {
                that.processInternal(function(res) {
                    return next(null, that);
                });
            }
        }
    });
}

module.exports = Extractor;

以下是我的称呼方式:

var express = require('express');
var router = express.Router();
var Extractor = require('../services/Extractor');

router.get('/', function(req, res) {
    res.json({msg: 'okay'});

    var extractor = new Extractor({url: 'http://lior-197784.use1-2.nitrousbox.com:4000/crawl'});

    extractor.process(function(err, res) {});
});

module.exports = router;

0 个答案:

没有答案
相关问题