在node.js中为AWS S3编写的备份脚本可以同步备份数百万个文件

时间:2015-09-04 03:29:26

标签: node.js amazon-web-services amazon-s3

很多搜索谷歌,stackoverflow和bing我已经做了但我没有答案找到我的问题。

如何让我的备份脚本运行同步。我有问题,文件夹中的文件太多,子文件夹因太多打开的文件(fs.createReadStream)而停止脚本。

我希望有人可以帮助我。 谢谢。 问候斯文

var AWS = require('aws-sdk')
        , s3 = new AWS.S3()
        , fs = require('fs')
        , wrench = require('wrench')
        , util = require('util')

        var smadir = "/Users/myuser/myfolder"
          , smafiles = wrench.readdirSyncRecursive(smadir)

          smafiles.forEach (function (file) {
             var params = {Bucket: 'mybucked', Key: file, Body: fs.createReadStream(smadir + '/' + file)};
             var options = {partSize: 10 * 1024 * 1024, queueSize: 1};
             s3.upload(params, options, function(err, data) {
               console.log(err, data);
             })
          })

2 个答案:

答案 0 :(得分:1)

您可以手动编写控制一次上传内容的代码,如下所示:

var AWS = require('aws-sdk')
    , s3 = new AWS.S3()
    , fs = require('fs')
    , wrench = require('wrench')
    , util = require('util')

var smadir = "/Users/myuser/myfolder"
    , smafiles = wrench.readdirSyncRecursive(smadir);

var index = 0, maxAtOnce = 10;

function uploadNext() {
    if (index >= smafiles.length) return;
    var file = smafiles[index++];
    var params = {Bucket: 'mybucked', Key: file, Body: fs.createReadStream(smadir + '/' + file)};
    var options = {partSize: 10 * 1024 * 1024, queueSize: 1};
    s3.upload(params, options, function(err, data) {
        console.log(err, data);
        // need to decide what to do here if an error occurred

        // since one just finished, launch another one
        uploadNext();
    });
}

// now launch the initial set
for (var i = 0; i < maxAtOnce; i++) {
    uploadNext();
}

或者,您可以使用像Async或Bluebird这样的库,它具有同时管理飞行中最大并行操作数的功能。

对于Async库,您可以使用迭代数组的.eachLimit()方法,但同时限制飞行中的最大操作数。

对于Bluebird promise库,您可以使用Promise.map()并指定并发选项以同时控制飞行中的最大操作数。

答案 1 :(得分:0)

我已经完成了解决方案。在这里我的代码。问候斯文

var AWS = require('aws-sdk')
    , s3 = new AWS.S3()
    , fs = require('fs')
    , wrench = require('wrench')
    , util = require('util')

var smadir = "/home/user/folder"
  , smas3rootdir = "destination_folder"
  , smafiles = wrench.readdirSyncRecursive(smadir)

var index = 0, maxAtOnce = 1;

function uploadNext() {
    if (index >= smafiles.length) return;
    var file = smafiles[index++];
    var smafoldertocheck = smadir + '/' + file
    var smaprepare = fs.statSync(smafoldertocheck).isDirectory()

    if (!smaprepare) {
        var params = {Bucket: 'mybucked', Key: smas3rootdir + '/' + file, Body: fs.createReadStream(smadir + '/' + file)};
        var options = {partSize: 10 * 1024 * 1024, queueSize: 1};
        s3.upload(params, options, function(err, data) {
            console.log(err, data);
            // need to decide what to do here if an error occurred

            // since one just finished, launch another one
            uploadNext();
        })
    } else {
        uploadNext();
    }
}

// now launch the initial set
for (var i = 0; i < maxAtOnce; i++) {
    uploadNext();
}