动机：

Question

动机：

我有一个涉及许多工作人员的架构，他们会消费＆＃34;像这样的文档：

worker.on('readyForAnotherDoc', () => worker.consume( await cursor.next() ));

那种伪代码 - 我正在检查真实代码中的cursor.hasNext()。有数百名工人，cursor.next()可能会突然突然发生200次请求。

我试图解决mongodb node.js驱动程序中的错误/怪癖，如果我对cursor.next()＆＃34;的重叠请求过多，则会导致错误。相互巧合。

背景：

似乎MongoDB Node.js驱动程序没有正确处理cursor.next发出一连串请求的情况。尝试运行此代码：

(async function() {

  // create a collection for testing:
  let db = await require('mongodb').MongoClient.connect('mongodb://localhost:27017/tester-db-478364');
  await db.collection("test").drop();
  for(let i = 0; i < 1000; i++) {
    await db.collection("test").insertOne({num:i, foo:'bar'});
  }

  let cursor = await db.collection("test").find({});

  async function go() {
    let doc = await cursor.next();
    console.log(doc.num);
  }

  // start 100 simulataneous requests to `cursor.next()`
  for(let i = 0; i < 1000; i++) {
    go();
  }

})();

这就是它为我输出的内容：

0
1
2
3
4
5
6
7
8
9
/home/joe/Downloads/testtt/node_modules/mongodb-core/lib/connection/pool.js:410
    if(workItem.socketTimeout) {
               ^

TypeError: Cannot read property 'socketTimeout' of null
    at Connection.messageHandler (/home/me/Downloads/testtt/node_modules/mongodb-core/lib/connection/pool.js:410:16)
    at Socket.<anonymous> (/home/me/Downloads/testtt/node_modules/mongodb-core/lib/connection/connection.js:361:20)
    at emitOne (events.js:115:13)
    at Socket.emit (events.js:210:7)
    at addChunk (_stream_readable.js:252:12)
    at readableAddChunk (_stream_readable.js:239:11)
    at Socket.Readable.push (_stream_readable.js:197:10)
    at TCP.onread (net.js:589:20)

所以在当前批次耗尽之前看起来很好。但这很奇怪，因为如果您在.batchSize(100)之后添加.find({})，那么它就无法修复它。但有趣的是，如果你添加.batchSize(5)，你会得到这个：

0
1
2
3
4
0
1
2
3
/home/joe/Downloads/testtt/node_modules/mongodb-core/lib/connection/pool.js:410
    if(workItem.socketTimeout) {
               ^

TypeError: Cannot read property 'socketTimeout' of null
etc...

不确定那里发生了什么......

尝试解决它：

但是，让我们说我们刚刚解决了这个问题。我们假设我们稍微更改了go函数：

let cursorBusy = false;
async function go() {
  if(cursorBusy) await waitForCursor();
  cursorBusy = true;
  let doc = await cursor.next();
  cursorBusy = false;
  console.log(doc.num);
}
function waitForCursor() {
  return new Promise(resolve => {
    let si = setInterval(() => {
      if(!cursorBusy) {
        resolve();
        clearInterval(si);
      }
    }, 50);
  });
}

这导致了一个新的错误，似乎出现在console.log(doc.num)这里和那里：

...
359
415
466
(node:16259) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 12): MongoError: clientcursor already in use? driver problem?
427
433
459
...

我认为这并不能避免这种错误，因为某种“竞争条件”＃34; setInterval的东西。有趣的是，它是一个不同的错误信息。

问题：有没有办法测试光标当前是否正常＃34;？在修复此错误之前，还有其他任何可能的解决方法（如果它甚至是一个错误）？

This question有一些相似（但绝对不一样）的行为，similar issues似乎出现在第三方node.js libs中。

Answer 1

您的商家信息中存在一些错误。所以真的只需要清理一下：

const MongoClient = require('mongodb').MongoClient;

(async function() {

  let db;

  try {
    db = await MongoClient.connect('mongodb://localhost/test');

    await db.collection('test').drop();

    await db.collection('test').insertMany(
      Array(1000).fill(1).map((e,num) => ({ num, foo: 'bar' }))
    );

    // This is not async. It returns immediately
    let cursor = db.collection('test').find();

    async function go() {
      let doc = await cursor.next();   // This awaits before continuing. Not concurrent.
      console.log(doc.num);
    }

    for ( let i = 0; i < 100; i++ ) {
      go();  // Note that these "await" internally
    }

  } catch(e) {
    console.error(e);
  } finally {
    db.close();
  }

})();

要真正防弹它，那么你真的应该await每次操作。因此，在返回时Promise.resolve()添加await go()以获得良好的衡量标准，并通过减少批量大小来强制破坏条件：

const MongoClient = require('mongodb').MongoClient;

(async function() {

  let db;

  try {
    db = await MongoClient.connect('mongodb://localhost/test');

    await db.collection('test').drop();

    await db.collection('test').insertMany(
      Array(1000).fill(1).map((e,num) => ({ num, foo: 'bar' }))
    );

    let cursor = db.collection('test').find().batchSize(1);

    async function go() {
      let doc = await cursor.next();
      console.log(doc.num);
      return Promise.resolve();
    }

    for ( let i = 0; i < 100; i++ ) {
      await go();
    }

    console.log('done');


  } catch(e) {
    console.error(e);
  } finally {
    db.close();
  }


})();

按顺序正确打印出来。缩短了，但实际上按预期转到99：

0
1
2
3
4
5
6
7
8
9
10
(etc..)

解释主要是在对代码的评论中，您似乎缺少哪些内容async以及哪些内容不。

所以从Cursor返回.find() 不 async方法，并立即返回。这是因为它只是一个操作句柄，此时不做任何事情。 MongoDB驱动程序（所有这些驱动程序）不会联系服务器或在此端建立游标，直到实际请求“获取”数据为止。

当您与服务器进行实际通信并且返回“批量”结果时，调用.next()。 “批处理”实际上只影响后续调用是否实际返回服务器或不检索数据，因为“批处理”可能已经具有“更多”结果，可以在另一个“批处理”请求之前“清空” “成了。无论如何，每次对.next() 的调用都会被视为async，无论是否有外部I / O.

通常，您使用.hasNext()调用每次迭代（也是async）进行调用，因为在.next()上调用Cursor而没有更多结果是错误的。它通常也是一种“循环控制”的方法，如下所示：

(async function() {

  let db;

  try {
    db = await MongoClient.connect('mongodb://localhost/test');

    await db.collection('test').drop();

    await db.collection('test').insertMany(
      Array(1000).fill(1).map((e,num) => ({ num, foo: 'bar' }))
    );

    let cursor = db.collection('test').find();

    async function go() {
      let doc = await cursor.next();
      console.log(doc.num);
    }

    //for ( let i = 0; i < 100; i++ ) {
    while( await cursor.hasNext() ) {  // Check the cursor still has results
      go();
    }

  } catch(e) {
    console.error(e);
  } finally {
    db.close();
  }

})();

然后循环变化直到光标结束。

关于“并发”的注意事项也不是你在这里所期望的。如果你确实想要并行发出多个请求，那么你仍然需要等待当前的游标提取。如果您不这样做，那么您要求服务器返回所有请求的相同数据，而不是“迭代”游标中的顺序数据。

你似乎对此感到困惑的是一些实用函数（特别是mongoose asyncEach()在实现并行“fetch”时的作用。其中代码（来自内存）基本上是人为地插入setTimeout()为了等待“下一个勾号”，这基本上意味着每个.next()必须实际开火。

如上所述，它是“人为的”，因为批次只是有效地.map()（在底层代码中的某个地方）成为一个更大的批次。

但正如所展示的那样。由于实际“等待”每个.next()，基本的预期用法确实按预期工作。就像你应该的那样。

Answer 2

~~编辑：当这个答案工作时，my new answer是解决此问题的更好办法。留下这个繁荣的答案。~~ 编辑2：其他答案错了：（

好的，所以我整理了waitForCursor函数，因此它没有竞争条件的东西，因此似乎工作正常：

let cursorBusy = false;
async function go() {
  await waitForCursorLock();
  let doc = await cursor.next();
  cursorBusy = false;
  console.log(doc.num);
}
function waitForCursorLock() {
  return new Promise(resolve => {
    let si = setInterval(() => {
      if(!cursorBusy) {
        cursorBusy = true;
        resolve();
        clearInterval(si);
      }
    }, 50);
  });
}

虽然这很糟糕，所以我可能不会接受这个答案。如果您能想出更好的产品，请发布！

Answer 3

编辑：这不起作用。（见评论）

受到@ NeilLunn解释的启发，我们修复原始代码所需要做的就是在创建游标后立即添加await cursor.hasNext();：

(async function() {

  // create a collection for testing:
  let db = await require('mongodb').MongoClient.connect('mongodb://localhost:27017/tester-db-478364');

  await db.collection("test").drop();

  await db.collection('test').insertMany(
    Array(1000).fill(1).map((e,num) => ({ num, foo: 'bar' }))
  );

  let cursor = db.collection("test").find({});
  await cursor.hasNext(); // <-- add this line to "pre-instantiate" cursor

  async function go() {
    let doc = await cursor.next();
    console.log(doc.num);
  }

  // start 100 simulataneous requests to `cursor.next()`
  for(let i = 0; i < 100; i++) {
    go();
  }

})();

这是因为光标实际上没有被实例化，直到使用（例如.next，.hasNext等），并且驱动程序似乎有一个怪癖它不检查光标是否已被实例化，因此最终向服务器发送许多请求，这可能导致错误（可能是太多连接）。我们通过在前面调用“无用的”await cursor.hasNext()来解决这个问题，以便将光标实例化，然后我们就可以对cursor.next进行尽可能多的“并发”调用而不会出错。

所以尽管在这个帖子中进行了其他讨论，但似乎可以尽可能快地调用cursor.next（并行）而不会出现错误或异常行为，只要你首先触发（并等待）游标实例化。

理想情况下，驱动程序只需检查游标初始化是否已经发生，并等待它完成，但也许有一些奇怪的技术原因导致无法/不会完成。

多次同时调用`cursor.next（）`会导致驱动程序

动机：

背景：

尝试解决它：

3 个答案:

编辑：这不起作用。（见评论）

多次同时调用`cursor.next（）`会导致驱动程序

动机：

背景：

尝试解决它：

3 个答案:

编辑：这不起作用。 （见评论）

编辑：这不起作用。（见评论）