根据带占位符的模式过滤字符串数组

问题描述

我一直在努力做到这一点（每天！）至少一个月了。我搜索了stackoverflow，我阅读了MDN 数组、字符串、正则表达式等，一遍又一遍地引用，但没有任何帮助。我对正则表达式有些熟悉，但这超出了我的理解。我相信这里有人会用一行代码解决这个问题，这就是为什么我一直等到我要把我的电脑扔出窗外才寻求帮助。我真的很想为自己找到解决方案，但我就是做不到。

我正在玩一个密码游戏，其中随机字母被用来对一首诗或故事进行“编码”，我可能不需要在这里描述它，但这里有一张图片以防万一。

所以我认为创建一个表单是一个很好的练习，您可以在其中输入由字母、数字和“？”的组合组成的模式。对于未知。在图像中，您会看到用“YACAZ”表示的单词，该单词中有两个 A，因此您知道这两个字母是相同的。因此，在我的函数中，您可以使用 0 - 9 之间的任意数字作为占位符，因此使用相同的示例，您将输入“?1a1?”。

这是我目前所拥有的。每次我尝试遍历正则表达式给我的数组时，我最终都会在同一个地方，尝试 - 并且失败 - 将两组嵌套数组相互比较。无论我如何尝试分解它们并进行比较，它最终都会变成一个巨大的无功能混乱。我可以获取占位符索引，但接下来呢？

我不反对 lodash，但我对它的经验很少，所以也许它可以帮助解决这个问题？它不会做任何普通的 javascript 无法完成的事情，是吗？

const words = [
  { word: 'bargain',score: 1700 },{ word: 'balloon',score: 1613 },{ word: 'bastion',score: 1299 },{ word: 'babylon',score: 634 },{ word: 'based on',score: 425 },{ word: 'bassoon',score: 371 },{ word: 'baldwin',score: 359 },{ word: 'bahrain',score: 318 },{ word: 'balmain',score: 249 },{ word: 'basilan',score: 218 },{ word: 'bang on',score: 209 },{ word: 'baseman',score: 204 },{ word: 'batsman',{ word: 'bakunin',score: 143 },{ word: 'barchan',score: 135 },{ word: 'bastian',score: 133 },{ word: 'balagan',score: 118 },{ word: 'balafon',score: 113 },{ word: 'bank on',{ word: 'ballpen',score: 111 },]

const input = 'ba1122n' // those are numeric 1's,not lowercase L's

//matching words from the list above should be 'balloon' and 'bassoon',using the input 'ba1122n'.

export const stringDiff = (a,b) => {
  let match = false,error = ''
  const results = []

  // Idk why I have a reducer inside a loop. I have tried many,many,MANY other
  // ways of looping,usually 'for (const i in whatever)` but they all end up with
  // the same problem. I usually have a loop inside a reducer,not the other way around.
  
  const forLoop = (array) => {
   
    a.reduce((acc,curr,next) => {
      const aa = [...curr.input.matchAll(curr[0])] // this tells me how many 0's,1's,etc.

      const bChar = b.charat(curr.index) // this tells me what letters are at those index positions
      const bb = [...b.matchAll(bChar)] // if the array 'bb' is not the same length,it's not a match
      if (aa.length === bb.length) {
        /* console output:
        word bargain

        aa:
        0: ["2",index: 4,input: "ba1122n",groups: undefined]
        1: ["2",index: 5,groups: undefined]

        bb:
        0: ["a",index: 1,input: "bargain",groups: undefined]
        1: ["a",groups: undefined]
        */
       
        // matching the lengths only helps narrow down ***some*** of the non-matching words.
        // How do I match each index of each letter in each word with
        // each index of each placeholder character??? And check the letters match ***EACH OTHER***????
        // with any number of placholders for any digit 0 - 9?
      }
    },[])

    return array
  }

  console.log('forLoop',forLoop([]))

  return { match,results,error }
}

stringDiff(words,input)

解决方法

根据我的上述评论，我仍然不太确定下一个提供的方法是否在某种程度上符合 OP 的目标。

但如果是关于从自定义替换/替换模式创建正则表达式，然后仅通过此正则表达式过滤单词列表（甚至可能捕获正确的字符，可以尝试以下代码。

不过它有一个限制；用于描述自定义占位符模式的数字范围限制为从 1 到 9（零将被排除在外），因为这与正则表达式捕获组的定义/限制完全匹配（和如何访问它们）。

function createRegexFromSubstitutePattern(pattern) {
  // - turn e.g. `ba1122n` into `/ba(\w)\1(\w)\2n/`
  // - turn e.g. `?1a1?` into `/.(\w)a\1./`
  // - turn e.g. `?1b22a1?` into `/.(\w)b(\w)\2a\1./`
  return RegExp(
    [1,2,3,4,5,6,7,8,9].reduce((regXString,placeholder) =>

      // programmatically replace the first occurrence of
      // any digit (from 1 to 9) with a capture group pattern
      // for a single word character.
      regXString.replace(RegExp(placeholder,''),'(\\w)'),// provide the initial input/pattern as start value.
      String(pattern)
    )
    // replace any further occurrence of any digit (from 1 to 9)
    // by a back reference pattern which matches the group's index.
    .replace((/([1-9])/g),'\\$1')

    // replace the wildcard placeholder with the regex wildcard.
    .replace((/\?/g),'.'),'');
}

const wordList = [
  { word: 'bargain',score: 1700 },{ word: 'balloon',score: 1613 },{ word: 'bastion',score: 1299 },{ word: 'babylon',score: 634 },{ word: 'based on',score: 425 },{ word: 'bassoon',score: 371 },{ word: 'baldwin',score: 359 },{ word: 'bahrain',score: 318 },{ word: 'balmain',score: 249 },{ word: 'basilan',score: 218 },{ word: 'bang on',score: 209 },{ word: 'baseman',score: 204 },{ word: 'batsman',{ word: 'bakunin',score: 143 },{ word: 'barchan',score: 135 },{ word: 'bastian',score: 133 },{ word: 'balagan',score: 118 },{ word: 'balafon',score: 113 },{ word: 'bank on',{ word: 'ballpen',score: 111 },];
const input = 'ba1122n';

const regXWord = createRegexFromSubstitutePattern(input);

console.log(
  'filter word list ...',wordList
    .filter(item => regXWord.test(item.word))
);
console.log(
  "filter word list and map each word's match and captures ...",wordList
    .filter(item => regXWord.test(item.word))
    .map(item => item.word.match(regXWord))
);

console.log(
  "createRegexFromSubstitutePattern('ba1122n')",createRegexFromSubstitutePattern('ba1122n')
);
console.log(
  "createRegexFromSubstitutePattern('?1a1?')",createRegexFromSubstitutePattern('?1a1?')
);
console.log(
  "createRegexFromSubstitutePattern('?1b22a1?')",createRegexFromSubstitutePattern('?1b22a1?')
);

.as-console-wrapper { min-height: 100%!important; top: 0; }

arrays arrays arrays javascript reduce reduce regex regex regex string string