快速csv同步读取多个文件

时间:2018-07-26 12:59:34

标签: javascript node.js csv parsing asynchronous

我正在尝试使用fast-csv同步读取多个文件,看起来应该像这样:

read file 1
execute something while reading
read file 2
execute something while reading (it must be execute after first execution's file that's why I need to do this synchronously)
...

这是我的代码简化了:

const csv = require('fast-csv');
const PROCEDURES = [
    { "name": "p1", "file": "p1.csv" },
    { "name": "p2", "file": "p2.csv" },
];

const launchProcedure = (name, file) => {

    try {   
        const fs = require("fs");
        const stream = fs.createReadStream(file, {
            encoding: 'utf8'
        });
        console.log('launching parsing...');

        stream.once('readable', () => {
        // ignore first line
        let chunk;
        while (null !== (chunk = stream.read(1))) {
            if (chunk == '\n') {
                break;
            }
        }

            // CSV parsing
            const csvStream = csv.fromStream(stream, {
                renameHeaders: false,
                headers: true,
                delimiter: ',',
                rowDelimiter: '\n',
                quoteHeaders: false,
                quoteColumns: false
            }).on("data", data => {
                console.log('procedure execution...');
                // I execute a procedure...

            }).on("error", error => {
                logger.error(error);
            }).on("end", data => {
                logger.info(data);
            });
         });

    }
    catch (e) {
        logger.info(e);
    }
}

PROCEDURES.forEach(procedure => {
    launchProcedure(procedure.name, procedure.file);
});

输出将是:

launching parsing...
launching parsing...
procedure execution...
procedure execution...

问题在stream.once上,但是我用它忽略了第一行。我试图使我的功能多样化并使用异步等待... (我在执行过程时遇到了类似的问题,并通过使用csvStream.pause()和csvStream.resume()解决了该问题。)

有什么想法吗?

3 个答案:

答案 0 :(得分:0)

使用诺言似乎是解决此问题的好方法。但是请注意,当您使用new Promise(executor)创建新的Promise时,executorexecuted immediately。因此,您需要将其延迟到之前的承诺执行完毕。

要“承诺” launchProcedure函数,您需要在函数开始时返回一个新的Promise:

const launchProcedure = (name, file) => {
    return new Promise((resolve, reject) => {

然后,只要解析完成,您就需要调用resolve(代表成功)和reject(代表失败)。

最后,我们需要将承诺放在一起:

let promise = launchProcedure(PROCEDURES[0].name, PROCEDURES[0].file);
for (let i = 1; i < PROCEDURES.length; i++) {
    promise = promise.then(() => launchProcedure(PROCEDURES[i].name, PROCEDURES[i].file));
}

请注意,我在then中使用lambda函数来延迟Promise的创建。 (顺便说一下,还有nicer ways可以将诺言组合在一起。)

最终代码如下:

const csv = require('fast-csv');
const PROCEDURES = [
    { "name": "p1", "file": "p1.csv" },
    { "name": "p2", "file": "p2.csv" },
];

const launchProcedure = (name, file) => {
    return new Promise((resolve, reject) => {
        try {   
            const fs = require("fs");
            const stream = fs.createReadStream(file, {
                encoding: 'utf8'
            });
            console.log('launching parsing...');

            stream.once('readable', () => {
                // ignore first line
                let chunk;
                while (null !== (chunk = stream.read(1))) {
                    if (chunk == '\n') {
                        break;
                    }
                }

                // CSV parsing
                const csvStream = csv.fromStream(stream, {
                    renameHeaders: false,
                    headers: true,
                    delimiter: ',',
                    rowDelimiter: '\n',
                    quoteHeaders: false,
                    quoteColumns: false
                }).on("data", data => {
                    console.log('procedure execution...');
                    // I execute a procedure...

                }).on("error", error => {
                    logger.error(error);
                    reject(error);
                }).on("end", data => {
                    logger.info(data);
                    resolve();
                });
            });
        }
        catch (e) {
            logger.info(e);
            reject(e);
        }
    });
}

let promise = launchProcedure(PROCEDURES[0].name, PROCEDURES[0].file);
for (let i = 1; i < PROCEDURES.length; i++) {
    promise = promise.then(() => launchProcedure(PROCEDURES[i].name, PROCEDURES[i].file));
}
promise.then(() => { console.log('all files parsed'); });

答案 1 :(得分:0)

这里的问题是,launchProcedure必须是async才能使用await。另一个问题是async / awaitArray.forEach一起使用不是最佳选择(see here)。

您可以使用“ for-of”循环执行此操作,然后在循环体内等待:

const csv = require('fast-csv');
const fs = require('fs');
const PROCEDURES = [
    { "name": "p1", "file": "p1.csv" },
    { "name": "p2", "file": "p2.csv" },
];

const launchProcedure = async (name, file) => {

    try {   
        // only require this once in the file (you require each time `launchProcedure` is getting called)
        // const fs = require("fs"); 
        const stream = fs.createReadStream(file, {
            encoding: 'utf8'
        });

        console.log('launching parsing...');

        // wait for the readable (or error) event
        const ready = await new Promise((resolve, reject) => {
            stream.on('readable', resolve);
            stream.on('error', reject);
        })
        .then(() => true)
        .catch(() => false);

        console.log('file is ready: ', ready)

        if (!ready) {
            throw new Error(`Unable to read file (file-name: "${file}")`);
        }

        // ignore first line
        let chunk;
        while (null !== (chunk = stream.read(1))) {
            if (chunk == '\n') {
                break;
            }
        }

        // CSV parsing
        const csvStream = csv.fromStream(stream, {
            renameHeaders: false,
            headers: true,
            delimiter: ',',
            rowDelimiter: '\n',
            quoteHeaders: false,
            quoteColumns: false
        }).on("data", data => {
            console.log('procedure execution...');
            // I execute a procedure...

        }).on("error", error => {
            logger.error(error);
        }).on("end", data => {
            logger.info(data);
        });

        console.log(`Done reading file (file-name: "${file}")`);
    }
    catch (e) {
        logger.info(e);
    }
}


// Wrap your iteration over the `PROCEDURES` array into an async function (this makes `await` available inside the function)
// Then use "for-of" here instead of for each to have full async support.
const runProcedures = async (procedures) => {
    for (procedure of PROCEDURES) {
        await launchProcedure(procedure.name, procedure.file);
    }
}

runProcedures(PROCEDURES);

输出:

launching parsing...
file is ready:  true
Done reading file (file-name: "p1.csv")
launching parsing...
file is ready:  true
Done reading file (file-name: "p2.csv")

答案 2 :(得分:0)

嘿,在此之前,我在找到答案之前就找到了解决方案(抱歉,稍后再发布)!

const launchProcedure = (name, file, callback) => {

    try {   
        const fs = require("fs");
        const stream = fs.createReadStream(file, {
            encoding: 'utf8'
        });
        console.log('launching parsing...');

        stream.once('readable', () => {
        // ignore first line
        let chunk;
        while (null !== (chunk = stream.read(1))) {
            if (chunk == '\n') {
                break;
            }
        }

            // CSV parsing
            const csvStream = csv.fromStream(stream, {
                renameHeaders: false,
                headers: true,
                delimiter: ',',
                rowDelimiter: '\n',
                quoteHeaders: false,
                quoteColumns: false
            }).on("data", data => {
                console.log('procedure execution...');
                // I execute a procedure...

            }).on("error", error => {
                logger.error(error);
            }).on("end", data => {
                logger.info(data);
                callback();
            });
         });

    }
    catch (e) {
        logger.info(e);
    }
}
async.eachSeries(PROCEDURES, (procedure, callback) => {
        launchProcedure(db, procedure.name, procedure.file, callback);        
    }, error => {
        if (error) {
            logger.error(error);
        }
        else {
            logger.info("done");
        }
    });