Matching text and html. Search for a position
There is a text and its html version:
Once upon a time there was a very rich man who lived with his three daughters. The two older daughters laughed at anyone who did not dress as wel as they did. If the two of them were not resting at home, they were out shopping for as many fine dresses and hats as they could carry home.
<span>
<div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
<p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
a
very rich man who <span style="color: blue">lived</span> with his three daughters.<span class="Apple-converted-space">
</span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot dress <span style="color: green">as</span> wel as they did.<span
class="Apple-converted-space"> </span>If the two of them were not resting at home,
they were out shopping for as many fine dresses and hats as they could <span style="color: red">carry</span> home. <span
class="Apple-converted-space"> </span></b></p><br>
</div>
<div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
</div>
</span>
You need a universal solution to find the position of a word/phrase from the text in html The problem is that in the word / phrase there can be any style
di<span style="color: orange">d n</span>ot
Which makes it difficult to find
I tried to listen for shifts using the Levenshtein distance, but this is a very "heavy" solution
1
2 answers
Js file:
const text = document.querySelector('.p1')
const input = document.querySelector('input');
const output = document.querySelector('output');
let fullText = text.textContent.split('<span>')[0].split('');
for (let i = 0; i < fullText.length; i++) {
if (fullText[i] === ' ' || fullText[i] === '\n') {
if (fullText[i + 1] === ' ' || fullText[i + 1] === '\n') {
fullText.splice(i, 1)
i--;
}
}
}
fullText = fullText.join('').split(' ');
console.log(fullText)
input.addEventListener('input', ({target}) => {
console.log(Boolean(target.value))
if (target.value) {
if (target.value.split(' ').length > 1) {
// console.log(~fullText.join(' ').indexOf(target.value))
if (~fullText.join(' ').indexOf(target.value)) {
output.textContent = target.value
} else {
output.textContent = 'not found'
}
} else {
let all = ''
fullText.forEach((e) => {
if (~e.indexOf(target.value)) {
console.log(e);
all += `${e} `
}
})
output.textContent = all
}
}
})
Html:
<input type="text">
<output></output>
<span>
<div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
<p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
a
very rich man who <span style="color: blue">lived</span> with his three daughters.<span
class="Apple-converted-space">
</span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot dress <span
style="color: green">as</span> wel as they did.<span
class="Apple-converted-space"> </span>If the two of them were not resting at home,
they were out shopping for as many fine dresses and hats as they could <span style="color: red">carry</span> home. <span
class="Apple-converted-space"> </span></b></p><br>
</div>
<div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
</div>
</span>
1
Author: WebFox, 2020-09-08 13:39:00
I found a crooked, but still a solution
let html = document.getElementById('input').innerHTML;
let word = 'did not';
console.log(searchPositions(html, word));
function searchPositions(html, issueText) {
let htmlArr = Array.from(html).map((item, index) => {
return {
item,
index
}
});
const regexp = /<\/?[^>]+(>|$)/g;
const tags = html.match(regexp) || [];
const textTrue = html.replace(/<\/?[^>]+(>|$)/g, '');
let inTextStartPosition = textTrue.indexOf(issueText);
let inTextEndPosition = inTextStartPosition + issueText.length - 1;
let matches = [...html.matchAll(regexp)];
let tagsIndexs = matches.map((item) => {
return item.index;
});
let tagsInfo = tags.map((item, index) => {
let length = item.length;
let startPosition = tagsIndexs[index];
let endPosition = startPosition + length;
return {
startPosition,
endPosition,
length
}
})
for (let ii = 0; ii < tagsInfo.length; ii++) {
let startPosition = tagsInfo[ii].startPosition;
let endPosition = tagsInfo[ii].endPosition;
while (startPosition !== endPosition) {
htmlArr = htmlArr.filter(x => x.index !== startPosition);
startPosition++;
}
}
let start = htmlArr[inTextStartPosition].index;
let end = htmlArr[inTextEndPosition].index;
return {
start,
end
}
}
<div id='input'>
<span>
<div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
<p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
a
very rich man who <span style="color: blue">lived</span> with his three daughters.<span
class="Apple-converted-space">
</span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot
dress <span style="color: green">as</span> wel as they did.<span
class="Apple-converted-space"> </span>If the two of them were not resting at home,
they were out shopping for as many fine dresses and hats as they could <span
style="color: red">carry</span> home. <span class="Apple-converted-space"> </span></b>
</p><br>
</div>
<div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
</div>
</span>
</div>
0
Author: grvctr, 2020-09-08 14:30:55