如何创建字典,使用单词的频率作为键?

时间:2016-06-02 20:15:08

标签: javascript dictionary

我正在编写一个脚本我想用一些数组的单词创建一个字典,该数组名为removeArray,如下所示:

["RBD", "X", "RBD3", "C", "92173", "GJHGWO.NAYE", "SAMBORNSiPOSSSTHRa1", "GJHGX4.NAYE", "SAMBORNSiPOSSSTHRa", "X3", "GJHGX6.NAYE", "GJHGX8.NAYE", "SAMBORNSiPOSSSTHRa2", "GJHGXA.NAYE", "GJHGXC.NAYE"]

我从文本区域中提取了这些单词,如下所示:

"RBD|X|RBD3|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa1"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X3|RBD3|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa1"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa2"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa2"
"RBD|X3|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"

我想创建一个按以下方式组成的字典,首先键是包含在名为removeArray的数组中的单词,字典的值是textarea中该单词的频率,第一个键是“RBD”然后它的频率是10,因为该单词在textarea中出现了10次。

myDictionary = { "RBD" : 10, X: 4, 92173: 6, ...}; 

第一个键是“RBD”,然后它的频率是10,因为该单词在文本区域中出现10次。 为了达到这个目的,我尝试首先提取不带重复的单词,如下所示:

var splitWords = document.getElementById("texto").value.split(/[["\|~]/);
splitWords = document.getElementById("texto").value.split(/[["\|~]/)
uniqueWords = _.uniq(splitWords);
console.log(uniqueWords);
var removeArray  = _.remove(uniqueWords, function (word) {return word !== '' && word !== ' ' && word !== '\n'});

但是我不知道将这个数组转换成具有上述规格的字典很热,我想感谢对此的支持,我的完整代码如下:

<!DOCTYPE html>
<html>
<script src="lodash.js"></script> 
<body>
<p id="demo"></p>
<textarea cols=150 rows=15 id="texto">
"RBD|X|RBD3|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa1"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X3|RBD3|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa1"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa2"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa2"
"RBD|X3|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
</textarea>
<script>
var splitWords = document.getElementById("texto").value.split(/[["\|~]/);
splitWords = document.getElementById("texto").value.split(/[["\|~]/)
uniqueWords = _.uniq(splitWords);
console.log(uniqueWords);
var removeArray  = _.remove(uniqueWords, function (word) {return word !== '' && word !== ' ' && word !== '\n'});
console.log(removeArray);
</script>
</body>
</html>

2 个答案:

答案 0 :(得分:2)

这可以通过Array.prototype.reduce非常简单地完成。像这样:

var splitWords = document.getElementById("texto").value.split(/[["\|~]/);

var dict = splitWords.reduce(function(p,c) {
    if (p[c] === undefined) {
        p[c] = 1;
    } else {
        p[c]++;
    }
    return p;
},{});

console.log(dict);
<textarea cols=150 rows=15 id="texto">
"RBD|X|RBD3|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa1"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X3|RBD3|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa1"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa2"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa2"
"RBD|X3|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
</textarea>

循环遍历所有单词并检查它们是否已存在于词典中。如果没有,则将其添加到字典中,计数为1,否则只需增加计数。

答案 1 :(得分:1)

您可以使用reduce构建字典对象,并使用智能||运算符来增加计数。不再需要首先使列表唯一,因为这同时发生。也可以动态消除空字符串,如下所示:

&#13;
&#13;
var splitWords = document.getElementById("texto").value.split(/[["\|~]/);
var obj = splitWords.reduce(function (obj, word) {
    word = word.trim();
    if (word.length) {
        obj[word] = (obj[word] || 0) + 1;
    }
    return obj;
}, {});
console.log(obj);
&#13;
<p id="demo"></p>
<textarea cols=150 rows=15 id="texto">
"RBD|X|RBD3|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa1"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X3|RBD3|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa1"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa2"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa2"
"RBD|X3|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
</textarea>
&#13;
&#13;
&#13;