有没有办法在javascript中使用正则表达式来实现有效且准确的搜索功能

问题描述

我一直在从事一个项目,该项目要求其用户搜索沃尔玛或亚马逊搜索功能之类的产品。但是似乎随时都可以找到一个可行的解决方案,我会遇到另一个问题。这是我当前的代码段。我将在下面解释代码的作用。

const keywords = [
  'oranges','red bull','red bull energy drink','carnation breakfast essentials','carnation instant breakfast','organic oranges','oargens','nesquik powder',"welch's orange juice",'mandarin oranges','bananas','nesquik chocolate powder','jimmy dean sausage','organic bananas','nesquik','nesquik no sugar',"welch's white grape",'great value','great value apple juice','lemon','lemon fruit','avocados','avocados,each','apple juice',];

class SearchApi {
  constructor(keywords,query) {
    this.keywords = keywords;
    this.query = query;
  }

  findWithRegex(keyword,query) {
    const pattern = query
      .split('')
      .map(q => {
        return `(?=.*${q})`;
      })
      .join('');

    const regex = new RegExp(`${pattern}`,'g');

    return keyword.match(regex);
  }

  matchKeywords() {
    const str = this.query.trim().toLowerCase().substring(0,3);
    const queryLength = this.query.trim().length;

      return this.keywords.filter(keyword => {
        const keywordSubstr = keyword.substring(0,3);
        const equalInitials = keyword.substring(0,1) === this.query.toLowerCase().substring(0,1);

        return this.findWithRegex(keywordSubstr,str) && equalInitials && this.findWithRegex(keyword.substring(queryLength,queryLength - 3),this.query.trim().substring(queryLength,queryLength - 3));
    });
  }
}

const searchApi = new SearchApi(keywords,'organic banan');
searchApi.matchKeywords();

代码解释

在这里所做的基本上是查询后,我比较查询和关键字的前三个字符和最后三个字符,还检查查询和关键字的首字母是否相同,因为如果有人键入字母“ o ”,我只想显示以该字母开头的关键字。

当我输入“ organic banan ”作为查询得到["organic oranges","organic bananas"](应为["organic bananas"])作为查询时,它工作正常,但很不幸。

这是因为正则表达式函数会在使用的有机橘子的最后三个字母中找到字符“ a ”和“ n ”。从这里获得的有关如何有效执行此操作的任何进一步帮助将对我有所帮助。

解决方法

“按需搜索”或“自动完成”功能通常使用专门的数据结构和算法来实现,而不使用正则表达式(除非您的搜索功能全部与正则表达式搜索有关。)

您可能希望使用二进制搜索来搜索数组中的字符串,如下所示(我在下面选择此功能仅用于演示目的,不推荐使用此功能。Source)。 您可能会发现许多适合您的环境和需求的软件包,例如fast-string-search,它使用N-API和boyer-moore-magiclen来使事情变得更快。

在数据结构方面,经常建议使用前缀树(TRIE)并将其用于实现快速自动完成功能。 Here是显示TRIE基本概念的简单实现。

var movies = [
    "ACADEMY DINOSAUR","ACE GOLDFINGER","ADAPTATION HOLES","AFFAIR PREJUDICE","BENEATH RUSH","BERETS AGENT","BETRAYED REAR","BEVERLY OUTLAW","BIKINI BORROWERS","YENTL IDAHO","YOUNG LANGUAGE","YOUTH KICK","ZHIVAGO CORE","ZOOLANDER FICTION","ZORRO ARK"
];

var searchBinary = function (needle,haystack,case_insensitive) {
    if (needle == "") return [];
    var haystackLength = haystack.length;
    var letterNumber = needle.length;
    case_insensitive = (typeof (case_insensitive) === 'undefined' || case_insensitive) ? true : false;
    needle = (case_insensitive) ? needle.toLowerCase() : needle;

    /* start binary search,Get middle position */
    var getElementPosition = findElement()

    /* get interval and return result array */
    if (getElementPosition == -1) return [];
    return getRangeElement = findRangeElement()

    function findElement() {
        if (typeof (haystack) === 'undefined' || !haystackLength) return -1;

        var high = haystack.length - 1;
        var low = 0;

        while (low <= high) {
            mid = parseInt((low + high) / 2);
            var element = haystack[mid].substr(0,letterNumber);
            element = (case_insensitive) ? element.toLowerCase() : element;

            if (element > needle) {
                high = mid - 1;
            } else if (element < needle) {
                low = mid + 1;
            } else {

                return mid;
            }
        }
        return -1;
    }

    function findRangeElement() {    
        for (i = getElementPosition; i > 0; i--) {
            var element = (case_insensitive) ? haystack[i].substr(0,letterNumber).toLowerCase() : haystack[i].substr(0,letterNumber);
            if (element != needle) {
                var start = i + 1;
                break;
            } else {
                var start = 0;
            }
        }
        for (i = getElementPosition; i < haystackLength; i++) {
            var element = (case_insensitive) ? haystack[i].substr(0,letterNumber);
            if (element != needle) {
                var end = i;
                break;
            } else {
                var end = haystackLength - 1;
            }
        }
        var result = [];
        for (i = start; i < end; i++) {
            result.push(haystack[i])
        }    
        return result;
    }    
};


testBinary = searchBinary("BIKINI",movies,false);
console.log('searchBinary("BIKINI",false) = [' + testBinary + ']');

testBinary = searchBinary("b",false);
console.log('searchBinary("b",true);
console.log('searchBinary("b",true) = [' + testBinary + ']');