Question

在下面的文档集中，我试图找到唯一句子的总单词。总字数必须为5（您好，\ nworld，您好吗？）+ 5（您好，我很好）+ 3（下雨了吗？）+ 5（看美丽的老虎！）= 18

[
    {
        "sourceList": [
        {
            "source": "hello\nworld, how are you?",
            "_id": ObjectId("5f0eb9946db57c0007841153")
        },
        {
            "source": "hello world, I am fine",
            "_id": ObjectId("5f0eb9946db57c0007841153")
        },
        {
            "source": "Is it raining?",
            "_id": ObjectId("5f0eb9946db57c0007841153")
        }
        ]
    },
    {
        "sourceList": [
        {
            "source": "Look at the beautiful tiger!",
            "_id": ObjectId("5f0eb9946db57c0007841153")
        },
        {
            "source": "Is it raining?",
            "_id": ObjectId("5f0eb9946db57c0007841153")
        }
        ]
    }
]

但是使用以下查询

    db.collection.aggregate([
    {
        "$unwind": "$sourceList"
    },
    {
        $project: {
        "sp": {
            $split: [
                "$sourceList.source",
                "\n"
            ],
            $split: [
                "$sourceList.source",
                " "
            ]
        }
        }
    },
    {
        "$group": {
            "_id": null,
            "elements": {
                $addToSet: "$sp"
            }
        }
    },
    {
        "$unwind": "$elements"
    },
    {
        "$project": {
            "sizes": {
                "$size": "$elements"
            }
        }
    },
    {
        "$group": {
            "_id": null,
            "count": {
                "$sum": "$sizes"
            }
        }
    }
])

它给出为17。这可能是什么原因？我首先尝试按\n分，然后按space

编辑

我正在尝试查找唯一句子和总共唯一句子的字数。

Answer 1

问题在于，这里：

$split

MongoDB仅执行第二个hello\nworld，并且将$split作为一个字符串返回。没有这样的“层叠”语法，因为它只是相同的JSON密钥$split，所以最后获胜。

为了解决此问题，您可以使用$reduce在由\n值拆分的数组上按空格应用{ $project: { "sp": { $reduce: { input: { $split: [ "$sourceList.source", "\n" ] }, initialValue: [], in: { $concatArrays: [ "$$value", { $split: [ "$$this", " " ] } ] } } } } }：

i = 0
lista = []
for i in range(50):
        i += 1
        if i == 5:
            continue
        lista.append(i)

print(i)
# what I wanted from this code is like :
1
2
3
4
5
.
.
.
# repeat printing 1-5 for 10 times

Mongo Playground

Answer 2

根据评论以及@micki的答案和我以前的答案，

play

db.collection.aggregate([
  {
    "$unwind": "$sourceList"
  },
  {
    $project: {
      "sp": {
        $reduce: {
          input: {
            $split: [
              "$sourceList.source",
              "\n"
            ]
          },
          initialValue: [],
          in: {
            $concatArrays: [
              "$$value",
              {
                $split: [
                  "$$this",
                  " "
                ]
              }
            ]
          }
        }
      }
    }
  },
  {
    "$group": {
      "_id": null,
      "elements": {
        $addToSet: "$sp"
      }
    }
  },
  {
    "$project": {
      "unique_sen": {
        "$size": "$elements"
      },
      "elements": 1
    }
  },
  {
    "$unwind": "$elements"
  },
  {
    "$project": {
      "sizes": {
        "$size": "$elements"
      },
      "unique_sen": 1
    }
  },
  {
    "$group": {
      "_id": null,
      "unique_count": {
        "$sum": "$sizes"
      },
      "data": {
        $push: "$$ROOT"
      }
    }
  },
  {
    "$project": {
      "unique_count": 1,
      "unique_sen": {
        $first: "$data.unique_sen"
      }
    }
  }
])

更新：

您无需在查询中转义。

play

db.collection.aggregate([
  {
    "$match": {
      "url": "https://www.rootsresource.in"
    }
  },
  {
    "$unwind": "$translations"
  },
  {
    $project: {
      "sp": {
        $reduce: {
          input: {
            $split: [
              "$translations.source",
              "\n"
            ]
          },
          initialValue: [],
          in: {
            $concatArrays: [
              "$$value",
              {
                $split: [
                  "$$this",
                  " "
                ]
              }
            ]
          }
        }
      }
    }
  },
  {
    "$group": {
      "_id": null,
      "elements": {
        $addToSet: "$sp"
      }
    }
  },
  {
    "$project": {
      "unique_sen": {
        "$size": "$elements"
      },
      "elements": 1
    }
  },
  {
    "$unwind": "$elements"
  },
  {
    "$project": {
      "sizes": {
        "$size": "$elements"
      },
      "unique_sen": 1
    }
  },
  {
    "$group": {
      "_id": null,
      "unique_count": {
        "$sum": "$sizes"
      },
      "data": {
        $push: "$$ROOT"
      }
    }
  },
  {
    "$project": {
      "unique_count": 1,
      "unique_sen": {
        $first: "$data.unique_sen"
      }
    }
  }
])

更新：

以上查询在mongo 4.4中有效-$ first在4.4中的项目中可用

对于较旧的版本。

db.test.aggregate([
  {
    "$match": {
      url: "https://www.rootsresource.in"
    }
  },
  {
    "$unwind": "$translations"
  },
  {
    $project: {
      "sp": {
        $reduce: {
          input: {
            $split: [
              "$translations.source",
              "\n"
            ]
          },
          initialValue: [],
          in: {
            $concatArrays: [
              "$$value",
              {
                $split: [
                  "$$this",
                  " "
                ]
              }
            ]
          }
        }
      }
    }
  },
  {
    "$group": {
      "_id": null,
      "elements": {
        $addToSet: "$sp"
      }
    }
  },
  {
    "$project": {
      "unique_sen": {
        "$size": "$elements"
      },
      "elements": 1
    }
  },
  {
    "$unwind": "$elements"
  },
  {
    "$project": {
      "sizes": {
        "$size": "$elements"
      },
      "unique_sen": 1
    }
  },
  {
    "$group": {
      "_id": null,
      "unique_count": {
        "$sum": "$sizes"
      },
      "data": {
        $push: "$$ROOT"
      }
    }
  },
  {
    "$project": {
      "unique_count": 1,
        unique_sen: { $arrayElemAt: [ "$data.unique_sen", 0 ] }
    }
  }
])

汇总查询中的计数不正确

2 个答案: