问题描述
例如我的字符串是:
var str = 'Hello "Counts1 [ignore1] Counts2 [ignore2] Counts3 [ignore3] Count these too"';
如何在忽略括号内的字符的情况下获取引号内的字符串中的所有内容?
例如要收集的正则表达式:“Counts1”、“Counts2”、“Counts3”、“Count these too”
到目前为止我只得到:
var regex = /".*?"/g;
它输出:
"Counts1 [ignore1] Counts2 [ignore2] Counts3 [ignore3]"
解决方法
应该这样做:/(?<="|\] )[\w ]+(?="| \[)/g
正向后视 (?<="|\] )
确保 [\w ]+
在左侧有 "
或 ]
。
正向前瞻 (?="| \[)
确保 [\w ]+
右侧有 "
或 [
。
您还可以使用以下同样支持嵌套方括号的非正则表达式方法:
const extract = (str) => {
let result = [],start = 0,i=0,level = 0,inquote=false;
for (let i = 0; i < str.length; ++i) {
switch (str[i]) {
case '"':
if (inquote && !level) {
if (start < i) {
result.push(str.substr(start,i - start));
}
start = i + 1;
} else {
start = i+1;
}
inquote = !inquote;
break;
case '[':
if (inquote) {
if (start < i && inquote && !level) {
result.push(str.substr(start,i - start));
}
++level;
}
break;
case ']':
if (inquote) {
if (level > 0) {
--level;
}
if (level == 0) {
start = i + 1;
}
}
break;
}
}
if (start < i)
result.push(str.substr(start,i - start));
return result;
}
console.log(extract('Hello "Counts1 [ignore1] Counts2 [ignore2] Counts3 [ignore3] Count these too"'));
console.log(extract('Hello "Counts1 [[ignore1] this [2]] Counts2 [ignore2] Counts3 [ignore3] Count these too"'));
console.log(extract('"Counts1 [[ignore1] [2]] Counts2 [ignore2] Count these too" and [junk],and "maybe this,too [not this]"'));