Matching text and html. Search for a position

There is a text and its html version:

Once upon a time there was a very rich man who lived with his three daughters. The two older daughters laughed at anyone who did not dress as wel as they did. If the two of them were not resting at home, they were out shopping for as many fine dresses and hats as they could carry home.

<span>
        <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
            <p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
                    a
                    very rich man who <span style="color: blue">lived</span> with his three daughters.<span class="Apple-converted-space">&nbsp;
                    </span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot dress <span style="color: green">as</span> wel as they did.<span
                        class="Apple-converted-space">&nbsp; </span>If the two of them were not resting at home,
                    they were out shopping for as many fine dresses and hats as they could <span style="color: red">carry</span> home. <span
                        class="Apple-converted-space">&nbsp;</span></b></p><br>
        </div>
        <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
        </div>
    </span>

You need a universal solution to find the position of a word/phrase from the text in html The problem is that in the word / phrase there can be any style

di<span style="color: orange">d n</span>ot

Which makes it difficult to find

I tried to listen for shifts using the Levenshtein distance, but this is a very "heavy" solution

Author: grvctr, 2020-09-08

2 answers

Js file:

const text = document.querySelector('.p1')
const input = document.querySelector('input');
const output = document.querySelector('output');

let fullText = text.textContent.split('<span>')[0].split('');

for (let i = 0; i < fullText.length; i++) {

    if (fullText[i] === ' ' || fullText[i] === '\n') {
        if (fullText[i + 1] === ' ' || fullText[i + 1] === '\n') {
            fullText.splice(i, 1)
            i--;
        }
    }
}

fullText = fullText.join('').split(' ');

console.log(fullText)

input.addEventListener('input', ({target}) => {
    console.log(Boolean(target.value))
    if (target.value) {
        if (target.value.split(' ').length > 1) {
            // console.log(~fullText.join(' ').indexOf(target.value))
            if (~fullText.join(' ').indexOf(target.value)) {
                output.textContent = target.value
            } else {
                output.textContent = 'not found'
            }
        } else {
            let all = ''
            fullText.forEach((e) => {
                if (~e.indexOf(target.value)) {
                    console.log(e);
                    all += `${e} `
                }
            })
            output.textContent = all
        }
    }

})

Html:

<input type="text">
<output></output>

<span>
        <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
            <p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
                    a
                    very rich man who <span style="color: blue">lived</span> with his three daughters.<span
                        class="Apple-converted-space">&nbsp;
                    </span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot dress <span
                        style="color: green">as</span> wel as they did.<span
                        class="Apple-converted-space">&nbsp; </span>If the two of them were not resting at home,
                    they were out shopping for as many fine dresses and hats as they could <span style="color: red">carry</span> home. <span
                        class="Apple-converted-space">&nbsp;</span></b></p><br>
        </div>
        <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
        </div>
    </span>
 1
Author: WebFox, 2020-09-08 13:39:00

I found a crooked, but still a solution

let html = document.getElementById('input').innerHTML;
let word = 'did not';

console.log(searchPositions(html, word));

function searchPositions(html, issueText) {
    let htmlArr = Array.from(html).map((item, index) => {
        return {
            item,
            index
        }
    });

    const regexp = /<\/?[^>]+(>|$)/g;
    const tags = html.match(regexp) || [];
    const textTrue = html.replace(/<\/?[^>]+(>|$)/g, '');

    let inTextStartPosition = textTrue.indexOf(issueText);
    let inTextEndPosition = inTextStartPosition + issueText.length - 1;

    let matches = [...html.matchAll(regexp)];
    let tagsIndexs = matches.map((item) => {
        return item.index;
    });

    let tagsInfo = tags.map((item, index) => {
        let length = item.length;
        let startPosition = tagsIndexs[index];
        let endPosition = startPosition + length;

        return {
            startPosition,
            endPosition,
            length
        }
    })

    for (let ii = 0; ii < tagsInfo.length; ii++) {
        let startPosition = tagsInfo[ii].startPosition;
        let endPosition = tagsInfo[ii].endPosition;

        while (startPosition !== endPosition) {
            htmlArr = htmlArr.filter(x => x.index !== startPosition);
            startPosition++;
        }
    }

    let start = htmlArr[inTextStartPosition].index;
    let end = htmlArr[inTextEndPosition].index;

    return {
        start,
        end
    }
}
<div id='input'>
        <span>
            <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
                <p class="p1" style="margin: 0px; font: 17px; font-family: Helvetica Neue"><b>Once upon a time there was
                        a
                        very rich man who <span style="color: blue">lived</span> with his three daughters.<span
                            class="Apple-converted-space">&nbsp;
                        </span>The two older daughters laughed at anyone who di<span style="color: orange">d n</span>ot
                        dress <span style="color: green">as</span> wel as they did.<span
                            class="Apple-converted-space">&nbsp; </span>If the two of them were not resting at home,
                        they were out shopping for as many fine dresses and hats as they could <span
                            style="color: red">carry</span> home. <span class="Apple-converted-space">&nbsp;</span></b>
                </p><br>
            </div>
            <div style="font-family:Calibri,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
            </div>
        </span>
    </div>
 0
Author: grvctr, 2020-09-08 14:30:55