Question

我创建了一个脚本，用于将数据从Dynamo迁移到Mysql DB。首先我没有使用Async，但是我开始在sql方面遇到瓶颈，所以我决定＆＃34;油门＆＃34;使用async lib的dymano部分。问题：我在路径中间有一个递归，只要发电机有数据我必须继续这个过程（超简单的ETL），但我不知道如何在瀑布内执行递归。我的代码：

function main() {
    async.waterfall([getMaxTimestamp, scanDynamoDB, printout, saveToMySQL], function(err, result) {
      if(err) console.log(err)
      console.log(result)
    });
}

function getMaxTimestamp(callback) {
    console.time("max query");
    connection.query("SELECT MAX(created_at) as start_date from Tracking;", function(err, data) {
        console.timeEnd("max query");
        callback(err, data);
    })
}

function scanDynamoDB(data, callback) {
    if (data[0].start_date != null && data[0].start_date)
        query.ExpressionAttributeValues[':v_ca'].N = data[0].start_date;

    console.time("dynamo read");
    dynamoDB.scan(query, function(err, data) {
        console.timeEnd("dynamo read");
        callback(err, data);
        // if (!err) {
        //     if (data != undefined && data.Count > 0) {
        //         printout(data.Items) // Print out the subset of results.
        //         if (data.LastEvaluatedKey) { // Result is incomplete; there is more to come.
        //             query.ExclusiveStartKey = data.LastEvaluatedKey;
        //             scanDynamoDB(query);
        //         }
        //     } else {
        //         console.log('No fresh data found on Dynamo')
        // } else console.dir(err);
    });
};

function assembleSql() {
    insertSql = "insert into Tracking (";
    for (var i = 0; i < headers.length; i++) {
        insertSql += headers[i];
        if (i < headers.length - 1)
            insertSql += ",";
    }

    insertSql += ") values ?;"
    previousInsertSql = insertSql;
}

function saveToMySQL(items, callback) {
    assembleSql();
    //connection.connect();
    console.time("insert sql")
    connection.query(insertSql, [items], function(err, result) {
        console.timeEnd("insert sql")
        if (err){
          callback(err, null)
          return;
        }

        totalInserts += result.affectedRows;
        callback(err, totalInserts)
        //connection.end();
    })
}

function printout(items, callback) {
    var headersMap = {};
    var values;
    var header;
    var value;

    var out = [];

    if (headers.length == 0) {
        if (items.length > 0) {
            for (var i = 0; i < items.length; i++) {
                for (var key in items[i]) {
                    headersMap[key] = true;
                }
            }
        }
        for (var key in headersMap) {
            headers.push(key);
        }
    }

    for (index in items) {
        values = [];
        for (i = 0; i < headers.length; i++) {
            value = "";
            header = headers[i];
            // Loop through the header rows, adding values if they exist
            if (items[index].hasOwnProperty(header)) {
                if (items[index][header].N) {
                    value = items[index][header].N;
                } else if (items[index][header].S) {
                    value = items[index][header].S;
                } else if (items[index][header].SS) {
                    value = items[index][header].SS.toString();
                } else if (items[index][header].NS) {
                    value = items[index][header].NS.toString();
                } else if (items[index][header].B) {
                    value = items[index][header].B.toString('base64');
                } else if (items[index][header].M) {
                    value = JSON.stringify(items[index][header].M);
                } else if (items[index][header].L) {
                    value = JSON.stringify(items[index][header].L);
                } else if (items[index][header].BOOL !== undefined) {
                    value = items[index][header].BOOL.toString();
                }
            }
            values.push(value)
        }
        out.push(values)
    }
    callback(null, out);
}
main();

评论部分是递归发生的地方，但我不知道将它放在我的流程中的哪个位置！

任何帮助将不胜感激！

Answer 1

在获取数据时，不要在scanDynamoDB内调用回调函数。您可以实现其他功能，并在未显示错误时将其称为递归，如下所示

function scanDynamoDB(data, callback) {
    if (data[0].start_date != null && data[0].start_date)
        query.ExpressionAttributeValues[':v_ca'].N = data[0].start_date;

    console.time("dynamo read");

    var result = []; // for accumulate data of each query

    function readNext(err, data) {
        if (err)
            return callback(err);

        if (!data || !data.Count)   
            return callback(null, result);

        // add data to result   

        dynamoDB.scan(query, readNext);
    }

    dynamoDB.scan(query, readNext);
};

Answer 2

其实我能够自己解决这个问题。

async.whilst(function() { return canInsert}, function (callback){
          scanDynamoDB(query, callback)
        }, function(err, res) {}
function scanDynamoDB(data, callback) {
    console.time("dynamo read");

    dynamoDB.scan(query, function(err, data) {
        console.timeEnd("dynamo read");
        if (!err) {
            if (data != undefined && data.Count > 0) {
                canInsert = data.LastEvaluatedKey;
                if (data.LastEvaluatedKey) // Result is incomplete; there is more to come.
                    query.ExclusiveStartKey = data.LastEvaluatedKey;
            }
        } else console.dir(err);
    });
};

我可以用while(canInsert)完成它。无论如何，我避免了递归和内存使用方式的降低。

在nodejs中使用Async with waterfall和Recursion

2 个答案: