比较字符串和字符串数组的最快方法

时间:2016-06-05 21:37:22

标签: javascript performance

我有一个数组,让我们说:

   var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"];
   var myString = "I went to the park with my garmin watch";

检查我的String是否包含myArray中的任何单词的快速方法是什么?

贝娄是我的代码,但我不确定这是否是最好的方式...

   function score(arKeywords, frase) {
      if (frase == undefined) {
        return 0;
      } else {
          var indice = 0;
          var indArray = arKeywords.length;
          var sentencaMin = frase.toLowerCase();
          for (i = 0; i < indArray; i++) {
              if (sentencaMin.search(arKeywords[i]) > 0) { indice++; }
          }
          return indice;
      }
  }

请帮助我。该函数将以很多字符串运行!

谢谢大家:)

7 个答案:

答案 0 :(得分:3)

  

检查我的String是否包含任何单词的快速方法是什么?   MYARRAY?

将您的myArray编译为正则表达式并测试myString - 请参阅FizzyTea's answer

如果您因任何原因不想使用正则表达式,则第二快的替代方法是使用String.includes()Array.some()

 var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"];
 var myString = "I went to the park with my garmin watch";

 console.log(myArray.some(e => myString.includes(e)));

有关不同方法的效果比较,请参阅https://jsfiddle.net/usq9zs61/5/

在Chrome 48 / Firefox 46,Ubuntu中完成超过100000次迭代:

  • compiledregextest(FizzyTea):16.046ms / 21.84ms
  • someincludes(此答案):76.707ms / 62.55ms
  • compiledregexmatch(FizzyTea):104.682ms / 170.58ms
  • someset(Comment by Bergi):488.474ms / 749.46ms
  • splitregexsome(David Thomas):529.529ms / 677.20ms
  • filterset(Comment by Bergi):742.857ms / 875.86ms
  • ahocorasick(ordi):1790.654ms / 1642.19ms

Aho-Corasick algorithm提出的orid具有最佳的运行时复杂性,但替代方法在当前Javascript引擎上执行得更快,除非您的myArray搜索字符串更大。

答案 1 :(得分:3)

基于这句话,来自问题:

  

检查我的字符串是否包含<{1}}中 任何 字样的方法是什么?

强调 我的。)

我建议如下,它将测试提供的字符串中提供的字符串中是否存在“ some ”。这个 - 理论上 - 一旦匹配数组中存在的字符串中的任何单词,就会停止比较:

myArray

var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"],
  myString = "I went to the park with my garmin watch";

function anyInArray(needles, haystack) {

  // we split the supplied string ("needles") into words by splitting
  // the string at the occurrence of a word-boundary ('\b') followed
  // one or more ('+') occurrences of white-space ('\s') followed by
  // another word-boundary:
  return needles.split(/\b\s+\b/)
    // we then use Array.prototype.some() to work on the array of
    // words, to assess whether any/some of the words ('needle') 
    // - using an Arrow function - are present in the supplied
    // array ('haystack'), in which case Array.prototype.indexOf()
    // would return the index of the found-word, or -1 if that word
    // is not found:
    .some(needle => haystack.indexOf(needle) > -1);
    // at which point we return the Boolean, true if some of the
    // words were found, false if none of the words were found.
}

console.log(anyInArray(myString, myArray));

JS Fiddle demo

参考文献:

答案 2 :(得分:1)

对于速度,请尝试预编译的RegExp:

var re = RegExp('\\b' + myArray.join('\\b|\\b') + '\\b', gi);
var i, matches;
for(i=0; i<lotsOfStrings.length; i+=1){
    // note that this retrieves the total number
    // of matches, not unique matches, which may
    // not be what you want
    matches = lotsOfStrings[i].match(re);
    // do something with matches
}

请注意,RegExp是在循环外构造的。

或者,简单地测试匹配:

var re = RegExp('\\b' + myArray.join('\\b|\\b') + '\\b', gi);
var i, matched;
for(i=0; i<lotsOfStrings.length; i+=1){
    matched = re.test(lotsOfStrings[i]);
    // do something with matched
}

答案 3 :(得分:0)

这是一种方法: https://jsbin.com/fiqegu/1/edit?js,console

var result = myString.split(' ').filter(function(word) {
  return myArray.indexOf(word) > -1;
});

这将返回单词

显然,您可以通过在上面的代码末尾添加.length来获取计数:

var result = myString.split(' ').filter(function(word) {
  return myArray.indexOf(word) > -1;
}).length;

答案 4 :(得分:0)

如果您只想知道是否有匹配项,可以将数组转换为正则表达式。

我的正则表达式还使用\b来匹配字边界,因此如果字符串包含park,则spark将不匹配。

var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"];
var myString = "I went to the park with my garmin watch";


function score(arKeywords, frase) {
  if (frase == undefined) {
    return 0;
  } else {
    var re = new RegExp('\\b(' + arKeywords.join('|') + ')\\b', 'i');
    return !!frase.match(re);
  }
}

console.log(score(myArray, myString));

答案 5 :(得分:0)

您可以使用|加入数组并构建一个正则表达式,这不是最快的,但引用相当:

function score(myArray, text) {
  var regex = new RegExp('\\b(' + myArray.join('|') + ')\\b', 'gi');
  var matches = text.match(regex);
  return matches ? matches.length : 0;
}

用法:

var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"];
var myString = "I went to the park with my garmin watch";

score(myArray, myString); // 2
score(myArray, 'Ibira is doing sports in the Park'); // 3

这假设myArray不包含任何特殊字符。

答案 6 :(得分:0)

针对此问题的最有效解决方案可能是Aho-Corasick algorithm,它在从O中的字符串列表创建初始DAG之后搜索O(正在搜索的字符串的大小)(字符串大小的总和)在列表中。)