问题描述
很久以前,有一个非常有钱的人和他的三个女儿住在一起。这两个大女儿嘲笑任何穿着不像他们那么温顺的人。如果他们两个不在家休息,他们就出去买尽可能多的精美衣服和帽子。
<span> <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0)"> <p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was a very rich man who <span style="color: blue">lived</span> with his three daughters.<span class="Apple-converted-space"> </span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot dress <span style="color: green">as</span> wel as they did.<span class="Apple-converted-space"> </span>If the two of them were not resting at home,they were out shopping for as many fine dresses and hats as they Could <span style="color: red">carry</span> home. <span class="Apple-converted-space"> </span></b></p><br> </div> <div style="font-family:Calibri,0)"> </div> </span>
@H_404_9@需要一个通用的解决方案来查找从文本到html的单词/短语的位置。问题是单词/短语中可能有一些样式
di<span style="color: orange">d n</span>ot
@H_404_9@尝试使用Levenshtein距离监听班次,但这是一个非常“困难”的解决方案
解决方法
let html = document.getElementById('input').innerHTML; let word = 'did not'; console.log(searchPositions(html,word)); function searchPositions(html,issueText) { let htmlArr = Array.from(html).map((item,index) => { return { item,index } }); const regexp = /<\/?[^>]+(>|$)/g; const tags = html.match(regexp) || []; const textTrue = html.replace(/<\/?[^>]+(>|$)/g,''); let inTextStartPosition = textTrue.indexOf(issueText); let inTextEndPosition = inTextStartPosition + issueText.length - 1; let matches = [...html.matchAll(regexp)]; let tagsIndexs = matches.map((item) => { return item.index; }); let tagsInfo = tags.map((item,index) => { let length = item.length; let startPosition = tagsIndexs[index]; let endPosition = startPosition + length; return { startPosition,endPosition,length } }) for (let ii = 0; ii < tagsInfo.length; ii++) { let startPosition = tagsInfo[ii].startPosition; let endPosition = tagsInfo[ii].endPosition; while (startPosition !== endPosition) { htmlArr = htmlArr.filter(x => x.index !== startPosition); startPosition++; } } let start = htmlArr[inTextStartPosition].index; let end = htmlArr[inTextEndPosition].index; return { start,end } }
<div id='input'> <span> <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0)"> <p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was a very rich man who <span style="color: blue">lived</span> with his three daughters.<span class="Apple-converted-space"> </span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot dress <span style="color: green">as</span> wel as they did.<span class="Apple-converted-space"> </span>If the two of them were not resting at home,they were out shopping for as many fine dresses and hats as they could <span style="color: red">carry</span> home. <span class="Apple-converted-space"> </span></b> </p><br> </div> <div style="font-family:Calibri,0)"> </div> </span> </div>