nodejs列出巨大目录挂起

时间:2019-07-17 11:15:06

标签: node.js

我正在使用this方法列出包含大量文件(超过10K以及更多...)的目录:

scanDirStream = (needle, params) => {
            var options = {
                // find -type
                type: 'f',
                // find -name
                name: '*',
                limit: 100
            };
            for (var attrname in params) { options[attrname] = params[attrname]; }
            return new Promise((resolve, reject) => {
                var opt = [needle];
                for (var k in options) {
                    var v = options[k];
                    if (!Util.empty(v) && k != 'limit') {
                        opt.push('-' + k);
                        opt.push(v);
                    }
                };
                var res = '';
                var find = spawn('find', opt);
                var head = spawn('head',[ '-n', options.limit]);
                find.stdout.pipe(head.stdin);

                head.stdout.on('data', _data => {
                    var buff = Buffer.from(_data, 'utf-8').toString();
                    if (buff != '') res += buff;
                })
                find.stderr.on('data', error => {
                    return reject(Buffer.from(error, 'utf-8').toString());
                });
                head.stderr.on('data', error => {
                    return reject(Buffer.from(error, 'utf-8').toString());
                });
                head.on('close', (_) => {
                    data = res.split('\n');
                    data = data.slice(0, data.length - 1);
                    return resolve(data);
                });
            });
        }//scanDirStream

所以我称它为传递目录路径和将要传递到head -n命令的要列出文件的限制,而find命令将列出具有指定名称的文件,就像这样

// test.js

的内容
scanDirStream(mediaRoot, {
        name: "*.mp3",
        limit: 100
    })
        .then(res => {
            console.debug("files %d", res.length);
        })
        .catch(err => console.error(err))

假设当前mediaRoot中有10000个文件,那么会发生

  • 使用limit <10000 node.js的值挂起主循环:我得到控制台输出,但脚本运行node test.js不会终止。
  • 使用limit值> = 10000可以正确列出文件并输出并终止运行。

要进行尝试,请指向包含大约~8000-100000个文件的目录并运行test.js脚本,将limit的值更改为100,然后将{{1} }:

10000

// this will hang node
scanDirStream(mediaRoot, {
        name: "*.mp3",
        limit: 100
    })

您有时也会收到此错误:

// this will not hang node
scanDirStream(mediaRoot, {
        name: "*.mp3",
        limit: 10000
    })

1 个答案:

答案 0 :(得分:1)

我猜想您从head进程中获得了EPIPE(管道中断),因为一旦读取了所需数量的文件,该文件便会终止。

我看不出使用head的理由;最好在Node代码中跟踪文件计数。

这是我的事–请注意,它不会跟踪自己找到的文件名;这是在fileCallback上进行的。如果该回调将文件视为要归入limit的文件,则它必须返回真实值。

const { spawn } = require("child_process");

function findFiles(
  rootPath,
  findParams,
  limit = 0,
  fileCallback = () => true,
) {
  return new Promise((resolve, reject) => {
    // Files found so far.
    let nFound = 0;

    // Whether we killed `find` on our own (limit reached).
    let killed = false;

    // Buffer to hold our results so far.
    let buffer = Buffer.alloc(0);

    // Build args for `find`. Note `-print0` to delimit the files
    // with NUL bytes for extra safety (i.e. this can handle filenames
    // with newlines).
    const args = [rootPath].concat(findParams).concat("-print0");

    // Spawn the find process.
    const findProc = spawn("/usr/bin/find", args, { stdio: "pipe" });

    // Figure out what to do when the process exits; this may be
    // because it's done, or because we've closed things down upon
    // finding `limit` files.
    findProc.on("close", (code, signal) => {
      if (code && code !== 0) {
        return reject("find died with error " + code);
      }
      if (!killed && signal) {
        return reject("find died from signal " + signal);
      }
      resolve(nFound);
    });

    findProc.stdout.on("data", chunk => {
      // Concatenate the new chunk of data into the buffer.
      buffer = Buffer.concat([buffer, chunk]);

      // Split the buffer; see below.
      let searchOffset = 0;
      for (;;) {
        // Find the next zero byte (which delimits files).
        const nextOffset = buffer.indexOf(0, searchOffset);
        // If there is none, we're done -- the rest of the buffer
        // is the start of a new filename, if any.
        if (nextOffset === -1) {
          break;
        }
        // Stringify the segment between `searchOffset` and `nextOffset`...
        const filename = buffer.toString("utf8", searchOffset, nextOffset);

        // ... pass it to the callback to see if it's valid...
        if (fileCallback(filename)) {
          nFound++;
          // ... see if we need to stop iterating over files...
          if (limit > 0 && nFound >= limit) {
            killed = true;
            findProc.kill();
            break;
          }
        }
        // and move on to the byte after the NUL.
        searchOffset = nextOffset + 1;
      }

      // Cut off the already-walked start of the buffer, if any.
      if (searchOffset > 0) {
        buffer = buffer.slice(searchOffset);
      }
    });
  });
}

findFiles("/Users/akx", ["-type", "f"], 1000, path => {
  console.log(path);
  return true;
}).then(
  res => {
    console.log(res);
  },
  err => {
    throw new Error(err);
  },
);