import ArticleTranscriptGenerationStatus from "src/types/article-transcript-generation-status";
import { generateRandomString, removeHtmlFromText } from "src/utils/string";
import FileSaver from "file-saver";
import { IDiffExtendedItem, IDiffExtendedItemWithGroup, ILineBreak, IPron, IQaPron, ITranscript } from "./types";
import ArticleNarrationTypes from "src/types/article-narration-types";
import IArticle from "src/types/article";
import _ from "lodash";
import ITemplate from "src/types/template";

export const isTranscriptAvailable = (transcriptStatus?: ArticleTranscriptGenerationStatus) => {
    if (transcriptStatus === ArticleTranscriptGenerationStatus.SUCCESS) return true;

    return false;
};

export const isTranscriptFailed = (transcriptStatus?: ArticleTranscriptGenerationStatus) => {
    if (transcriptStatus === ArticleTranscriptGenerationStatus.FAILURE) return true;

    return false;
};

export const isTranscriptPending = (transcriptStatus?: ArticleTranscriptGenerationStatus) => {
    if (transcriptStatus === ArticleTranscriptGenerationStatus.PENDING) return true;

    return false;
};

export const detectGroupsAndAddIdsToDiff = (diff: IDiffExtendedItem[]): IDiffExtendedItemWithGroup[] => {
    const groupTypes = ["INSERTION", "DELETION"];

    let diffTransformed = [...diff];

    const updateDiffTransformedBasedOnThePos = (startPos: number | null, endPos: number | null) => {
        if (startPos !== null && endPos !== null && startPos !== endPos) {
            const groupId = generateRandomString();
            let groupItemPos = 0;
            for (let lStartPos = startPos; lStartPos <= endPos; lStartPos++) {
                const diffTransItem = { ...diffTransformed[lStartPos], groupId, groupItemPos };
                diffTransformed[lStartPos] = diffTransItem;

                groupItemPos += 1;
            }
        }
    };

    let startPos: number | null = null;
    let endPos: number | null = null;
    let currentType: string | null = null;

    for (let pos = 0; pos < diff.length; pos++) {
        const diffItem = diff[pos];
        currentType = diffItem.type;

        if (groupTypes.includes(diffItem.type) && (currentType === null || currentType === diffItem.type)) {
            endPos = pos;

            if (startPos === null) {
                startPos = pos;
            }

            if (pos === diff.length - 1) {
                updateDiffTransformedBasedOnThePos(startPos, endPos);
            }
        } else {
            updateDiffTransformedBasedOnThePos(startPos, endPos);

            startPos = null;
            endPos = null;
        }
    }

    return diffTransformed as IDiffExtendedItemWithGroup[];
};

const normalizeWord = (text: string) => {
    return text
        .trim()
        .split(" ")
        .filter((i) => !!i.trim().length)
        .join(" ")
        .toLowerCase()
        .replace(/[#$%&'()*+,-./:;<=>?@[\]^_`{|}~]/g, "");
};

export function mergeDiffItemsWithByGroupId<T extends IDiffExtendedItemWithGroup>(diff: T[]): T[] {
    return diff.reduce((prev, curr) => {
        const groupItemIndex = prev.findIndex((item) => !!curr.groupId && item.groupId === curr.groupId);

        const prevCloned = [...prev];

        if (groupItemIndex === -1) {
            return [...prevCloned, curr];
        }

        const item = { ...prevCloned[groupItemIndex] };

        if (curr.type === "INSERTION") {
            item.hypothesis = `${item.hypothesis} ${curr.hypothesis}`;
        } else if (curr.type === "DELETION") {
            item.reference = `${item.reference} ${curr.reference}`;
        }

        prevCloned[groupItemIndex] = item;

        return prevCloned;
    }, [] as T[]);
}

export const addTimestampsToDiff = (diff: IDiffExtendedItem[], transcript: ITranscript["transcript"]) => {
    const words = transcript.map((i) => i.alternatives.map((a) => a.words.map((w) => w))).flat(2);

    const getTextByType = (item: IDiffExtendedItem | null) => {
        if (!item) return null;

        switch (item.type) {
            case "DELETION":
                return item.reference || null;
            case "INSERTION":
                return item.hypothesis || null;
            case "SUBSTITUTION":
                return item.hypothesis || null;
            case "EQUAL":
                return item.reference || null;
            default:
                return null;
        }
    };

    const getDeletionItemStartAndEndTime = (actualWord: string, nextWord: string) => {
        let wordFound = null;
        let nWordFound = null;

        for (let i = 0; i < words.length; i++) {
            const word = words[i];
            if (normalizeWord(word.word) === normalizeWord(actualWord)) {
                const nWord = words?.[i + 1];

                if (!!nWord && normalizeWord(nWord.word) === normalizeWord(nextWord)) {
                    wordFound = word;
                    nWordFound = nWord;
                    break;
                }
            }
        }

        return wordFound && nWordFound && { st: Number(`${wordFound.endTime.seconds}.${wordFound.endTime.nanos}`), et: Number(`${nWordFound.startTime.seconds}.${nWordFound.startTime.nanos}`) };
    };

    const getItemStartAndEndTime = (
        actualWord: string,
        prevItem: IDiffExtendedItem | null,
        nextItem: IDiffExtendedItem | null,
        prevWord2: string | null,
        prevWord3: string | null,
        prevWord4: string | null,
        prevWord5: string | null,
        nextWord2: string | null,
        nextWord3: string | null,
        nextWord4: string | null,
        nextWord5: string | null,
    ) => {
        let wordFound = null;

        for (let i = 0; i < words.length; i++) {
            const word = words[i];

            if (normalizeWord(word.word) === normalizeWord(actualWord)) {
                //? if the actual word has a deletion right before/after then decrement is required to correct the match,
                //? see the else block of the main function where this function is used, there it is skipping the next and prev match in the diff,
                //? but in the "diff", the deletion can be considered a skippable item as it is present in the diff,
                //? but that deletion is not present in the transcript so the decrement will equalize the skipping(due to deletion) of diff and transcript,
                const optionalPrevDec = prevItem?.type === "DELETION" ? 1 : 0;
                const optionalNextDec = nextItem?.type === "DELETION" ? 1 : 0;

                const pWord2 = words?.[i - (2 - optionalPrevDec)];
                const pWord3 = words?.[i - (3 - optionalPrevDec)];
                const pWord4 = words?.[i - (4 - optionalPrevDec)];
                const pWord5 = words?.[i - (5 - optionalPrevDec)];

                const nWord2 = words?.[i + (2 - optionalNextDec)];
                const nWord3 = words?.[i + (3 - optionalNextDec)];
                const nWord4 = words?.[i + (4 - optionalNextDec)];
                const nWord5 = words?.[i + (5 - optionalNextDec)];

                const isPrev2Matched = (prevWord2 && normalizeWord(prevWord2)) === (pWord2 && normalizeWord(pWord2.word));
                const isPrev3Matched = (prevWord3 && normalizeWord(prevWord3)) === (pWord3 && normalizeWord(pWord3.word));
                const isPrev4Matched = (prevWord4 && normalizeWord(prevWord4)) === (pWord4 && normalizeWord(pWord4.word));
                const isPrev5Matched = (prevWord5 && normalizeWord(prevWord5)) === (pWord5 && normalizeWord(pWord5.word));

                const isNext2Matched = (nextWord2 && normalizeWord(nextWord2)) === (nWord2 && normalizeWord(nWord2.word));
                const isNext3Matched = (nextWord3 && normalizeWord(nextWord3)) === (nWord3 && normalizeWord(nWord3.word));
                const isNext4Matched = (nextWord4 && normalizeWord(nextWord4)) === (nWord4 && normalizeWord(nWord4.word));
                const isNext5Matched = (nextWord5 && normalizeWord(nextWord5)) === (nWord5 && normalizeWord(nWord5.word));

                if (
                    !(prevWord2 && !isPrev2Matched) &&
                    !(prevWord3 && !isPrev3Matched) &&
                    !(prevWord4 && !isPrev4Matched) &&
                    !(prevWord5 && !isPrev5Matched) &&
                    !(nextWord2 && !isNext2Matched) &&
                    !(nextWord3 && !isNext3Matched) &&
                    !(nextWord4 && !isNext4Matched) &&
                    !(nextWord5 && !isNext5Matched)
                ) {
                    wordFound = word;
                    break;
                }
            }
        }

        return wordFound && { st: Number(`${wordFound.startTime.seconds}.${wordFound.startTime.nanos}`), et: Number(`${wordFound.endTime.seconds}.${wordFound.endTime.nanos}`) };
    };

    const diffClone = diff.map((i) => ({ ...i }));
    const diffLength = diff.length;

    let i = 0;
    while (i < diffLength) {
        const item = diff[i];
        const prevItem = i > 0 ? diff[i - 1] : null;
        let nextItem = i < diffLength - 1 ? diff[i + 1] : null;

        let updatedI: number | null = null;

        // Doing this because deletions does not exist in the transcript
        // so here finding the next non-deletion word if there are multiple deletions in a row
        if (item?.type === "DELETION" && nextItem?.type === "DELETION") {
            for (let j = i + 1; j < diffLength; j++) {
                const currItem = j < diffLength - 1 ? diff[j] : null;

                if (currItem === null || currItem.type !== "DELETION") {
                    nextItem = currItem;
                    updatedI = j;
                    break;
                }
            }
        }

        const word = getTextByType(item);
        const prevWord = getTextByType(prevItem);
        const nextWord = getTextByType(nextItem);

        let startTime = null;
        let endTime = null;

        if (!word) startTime = null;
        else if (!prevWord && item.type === "DELETION") startTime = null;
        else if (!prevWord) {
            const firstWord = words?.[0];
            if (!firstWord) {
                console.log("IF NOT PREV WORD -- FIRST WORD NOT FOUND!");
                startTime = null;
                endTime = null;
            } else {
                startTime = 0;
                endTime = Number(`${firstWord.endTime.seconds}.${firstWord.endTime.nanos}`);
            }
        } else if (!nextWord) {
            const lastWord = words?.[words.length - 1];
            if (!lastWord) {
                console.log("IF NOT NEXT WORD -- FIRST WORD NOT FOUND!");
                startTime = null;
                endTime = null;
            } else {
                startTime = words[words.length - 1].startTime.seconds;
                endTime = Number(`${lastWord.endTime.seconds}.${lastWord.endTime.nanos}`);
            }
        } else {
            if (item.type === "DELETION") {
                const stAndEt = getDeletionItemStartAndEndTime(prevWord, nextWord);

                if (!stAndEt) {
                    startTime = null;
                    endTime = null;
                } else {
                    startTime = stAndEt.st;
                    endTime = stAndEt.et;
                }
            } else {
                // const prevWord2 = i > 1 ? getTextByType(diff[i - 2]) : null;
                // const prevWord3 = i > 2 ? getTextByType(diff[i - 3]) : null;
                // const prevWord4 = i > 3 ? getTextByType(diff[i - 4]) : null;
                // const prevWord5 = i > 4 ? getTextByType(diff[i - 5]) : null;
                // const nextWord2 = i < diffLength - 2 ? getTextByType(diff[i + 2]) : null;
                // const nextWord3 = i < diffLength - 3 ? getTextByType(diff[i + 3]) : null;
                // const nextWord4 = i < diffLength - 4 ? getTextByType(diff[i + 4]) : null;
                // const nextWord5 = i < diffLength - 5 ? getTextByType(diff[i + 5]) : null;

                //? All the implementation is getting the exact same above variables from the "diff"
                //? but the difference is that diff also contains the DELETIONS but the transcript doesn't contain the DELETIONS
                //? So it iteratively keep looking the 4 non-DELETION words before and after the actual word to match with the transcript.
                //? also in the getItemStartAndEndTime function below, it'll not match the word right after & before the actual word,
                //? instead it'll match the 4 words before the word which comes right before the actual word and similarly to the 4 words after.

                let prevWords: (IDiffExtendedItem | null)[] = [];
                let nextWords: (IDiffExtendedItem | null)[] = [];

                const numberOfWordsToMatch = 4;

                let prevIterationCounter = 1;
                while (prevWords.length < numberOfWordsToMatch) {
                    const prevWordsLength = prevWords.length;

                    for (let current = numberOfWordsToMatch * (prevIterationCounter - 1) + 1; current <= numberOfWordsToMatch * prevIterationCounter - prevWordsLength; current += 1) {
                        const prevWord = i > current ? diff[i - (current + 1)] : null;
                        prevWords.push(prevWord);
                    }

                    prevWords = prevWords.filter((word) => word?.type !== "DELETION");
                    prevIterationCounter += 1;
                }

                let nextIterationCounter = 1;
                while (nextWords.length < numberOfWordsToMatch) {
                    const nextWordsLength = nextWords.length;

                    for (let current = numberOfWordsToMatch * (nextIterationCounter - 1) + 1; current <= numberOfWordsToMatch * nextIterationCounter - nextWordsLength; current += 1) {
                        const prevWord = i < diffLength - (current + 1) ? diff[i + (current + 1)] : null;
                        nextWords.push(prevWord);
                    }

                    nextWords = nextWords.filter((word) => word?.type !== "DELETION");
                    nextIterationCounter += 1;
                }

                const prevWordsText = prevWords.map((word) => getTextByType(word));
                const nextWordsText = nextWords.map((word) => getTextByType(word));

                const stAndEt = getItemStartAndEndTime(
                    word,
                    prevItem,
                    nextItem,
                    prevWordsText[0],
                    prevWordsText[1],
                    prevWordsText[2],
                    prevWordsText[3],
                    nextWordsText[0],
                    nextWordsText[1],
                    nextWordsText[2],
                    nextWordsText[3],
                );

                if (!stAndEt) {
                    startTime = null;
                    endTime = null;
                } else {
                    startTime = stAndEt.st;
                    endTime = stAndEt.et;
                }
            }
        }

        diffClone[i] = { ...diffClone[i], startTime, endTime };

        if (updatedI) {
            // if there are multiple deletions in a row(group of deletions) then\
            // adding the start and the end time to the skipped deletions as well based on the updatedI(see above)
            for (let j = i + 1; j < updatedI; j++) {
                diffClone[j] = { ...diffClone[j], startTime, endTime };
            }

            i = updatedI;
        } else {
            i += 1;
        }
    }

    return diffClone;
};

export const truncateTheWordsText = (text: string, maxLength?: number): string => {
    const mLength = maxLength || 34;
    if (text.length <= mLength) {
        return text;
    }

    const words = text.split(" ");

    let startWords = "";
    let endWords = "";

    for (let i = 0; i < words.length; i++) {
        const currWord = words[i];
        if ((startWords + currWord).length <= mLength / 2) {
            startWords = `${startWords} ${currWord}`;
        }
    }

    for (let i = words.length - 1; i >= 0; i--) {
        const currWord = words[i];
        if ((endWords + currWord).length <= mLength / 2) {
            endWords = `${endWords} ${currWord}`;
        }
    }

    return `${startWords} ... ${endWords}`;
};

export const qaIssuesCount = (diff: IDiffExtendedItemWithGroup[]): { insertionsCount: number; deletionsCount: number; substitutionsCount: number } => {
    return diff.reduce(
        (prev, curr) => {
            if (curr.type === "INSERTION") return { ...prev, insertionsCount: prev.insertionsCount + 1 };
            if (curr.type === "DELETION") return { ...prev, deletionsCount: prev.deletionsCount + 1 };
            if (curr.type === "SUBSTITUTION") return { ...prev, substitutionsCount: prev.substitutionsCount + 1 };

            return prev;
        },
        { insertionsCount: 0, deletionsCount: 0, substitutionsCount: 0 },
    );
};

export const jsonToCsv = (items: Array<{ [key: string]: string | number | null | undefined }>) => {
    const header = Object.keys(items[0]);
    const headerString = header.join(",");

    const replacer = (key: any, value: any) => value ?? "";

    const rowItems = items.map((row) => header.map((fieldName) => JSON.stringify(row[fieldName], replacer)).join(","));

    const csv = [headerString, ...rowItems].join("\r\n");
    return csv;
};

export const exportCsvReport = (title: string, text: string) => {
    const csvData = new Blob([text], { type: "text/csv;charset=utf-8;" });

    FileSaver.saveAs(csvData, `${title}.csv`);
};

export const formatTime = (milliseconds: number) => {
    const seconds = Math.floor((milliseconds / 1000) % 60);
    const minutes = Math.floor((milliseconds / 1000 / 60) % 60);
    const hours = Math.floor((milliseconds / 1000 / 60 / 60) % 24);

    return [hours.toString().padStart(2, "0"), minutes.toString().padStart(2, "0"), seconds.toString().padStart(2, "0")].join(":");
};

export const collectArticleTextForPronunciationsQa = (diff: IDiffExtendedItemWithGroup[], getDiffId: (id: string, word: string) => string) => {
    return diff
        .reduce((joined, item) => {
            const itemType = item.type;

            if (itemType === "INSERTION" || itemType === "LINEBREAK") return joined;

            // making it lowercase important because the matching words will be made lowercased too.
            // also normalize it too
            const referenceWord = item.reference?.replaceAll("’s", "");

            if (!referenceWord) return joined;

            const diffId = ` ${getDiffId(item.id, referenceWord)}`;
            return `${joined} ${referenceWord}${diffId}`;
        }, "")
        .trim();
};

export const collectArticleTextForPronsQaAndAddDiffId = (diff: IDiffExtendedItemWithGroup[]) => {
    /* 
        adding the diff ids to the individual words, so that we can get them back after the prons match\
        and then get diff items using the diff ids for getting the information like startTime etc.
    */

    //! CHANGE THESE VALUES WITH CARE
    //! DIFF IDs must be only chars, numbers and dashes(-). SEE REGEX BELOW.

    const DIFF_ID_PREFIX = "//DIFFID";
    const DIFF_ID_POSTFIX = "DIFFID//";

    // ${word} before and after makes the match more specific and it won't make the \b in regex invalid.
    const diffIdRegexStr = (word: string) => `${word}${DIFF_ID_PREFIX}[-A-Za-z0-9]+${DIFF_ID_POSTFIX}${word}`;
    const getDiffId = (id: string, word: string) => `${word}${DIFF_ID_PREFIX}${id}${DIFF_ID_POSTFIX}${word}`;

    const getPronNameRegex = (pronName: string, plainRegex?: boolean) => {
        const pronNameEscaped = _.escapeRegExp(pronName);
        const attachedIdsWithEachWord = pronNameEscaped
            .split(" ")
            .filter((i) => !!i.length)
            .map((word) => `${word} ${diffIdRegexStr(word)}`)
            .join(" ");

        if (plainRegex) {
            return attachedIdsWithEachWord;
        }

        return new RegExp("(?<=\\s|^|[^\\w])" + attachedIdsWithEachWord + "(?=\\s|$|[^\\w])", "g");
    };

    const getPronsAndDiffIds = (prons: IPron[]) => {
        return prons.map((pron) => {
            const { word } = pron;

            const split = word.split(" ").filter((i) => !!i.length);

            // every grouped item contains the word(0th pos) and the diff ID(1st pos)
            const itemsGrouped: string[] = split.reduce((grouped, current, i) => {
                const cloned = [...grouped];
                const index = i + 1;

                if (index % 2 === 0) {
                    const lastGroupIndex = cloned.length - 1;
                    cloned[lastGroupIndex] = [cloned[lastGroupIndex][0], current];
                } else {
                    cloned.push([current]);
                }

                return cloned;
            }, [] as any[]);

            const actualWord = itemsGrouped.map((i) => i[0]).join(" ");

            const diffIds = itemsGrouped.map((item) => {
                const itemWord = item[0];

                // Id is like this: `${itemWord}${DIFF_ID_PREFIX}${id}${DIFF_ID_POSTFIX}${itemWord}`;
                const rawId = item[1];
                const diffId = rawId.replace(`${itemWord}${DIFF_ID_PREFIX}`, "").replace(`${DIFF_ID_POSTFIX}${itemWord}`, "");

                return { word: itemWord, id: diffId };
            });

            return { pron, actualWord, diffIds };
        });
    };

    const articleTextJoined = collectArticleTextForPronunciationsQa(diff, getDiffId);

    return { articleText: articleTextJoined, getPronNameRegex, getPronsAndDiffIds };
};

export const isAiGenerationType = (article?: IArticle) => ([ArticleNarrationTypes.AI_GOLD, ArticleNarrationTypes.AI_SILVER] as any[]).includes(article?.articleNarrationType);

export const normalizeQaPronWord = (word: string) => {
    return word
        .replaceAll("-", " ")
        .trim()
        .split(" ")
        .filter((i) => !!i.length)
        .join(" ");
};

export const filterPronsFalsePositives = (prons: IQaPron[]) => {
    const ignoreList = ["for", "do"];
    return prons.filter((pron) => {
        const pronWord = pron.word;
        return !ignoreList.includes(pronWord) && pronWord.length > 1;
    });
};

export const removeDuplicatedPronunciations = (prons: IQaPron[]) => {
    /**
     * Removes duplicate pronunciations when a shorter word appears within a longer phrase.
     * This prevents redundant QA checks for the same word when it's part of a longer phrase.
     *
     * Example:
     * If we have pronunciations for "Latif" and "Shams Latif":
     * - "Latif" appears within "Shams Latif"
     * - We'll remove the diffItems from "Latif" that overlap with "Shams Latif"
     * - If "Latif" has no remaining diffItems, it will be removed entirely
     *
     * @param prons - Array of pronunciations to process
     * @returns Filtered array of pronunciations with duplicates removed
     */

    const pronsLength = prons.length;
    const pronsIdsToRemove: string[] = [];

    // Compare each pronunciation against all others
    for (let pronAIndex = 0; pronAIndex < pronsLength; pronAIndex++) {
        const pronA = { ...prons[pronAIndex] };
        const pronAWord = pronA.word;

        // Check if current pronunciation (A) appears within any other pronunciation (B)
        for (let pronBIndex = 0; pronBIndex < pronsLength; pronBIndex++) {
            const pronB = prons[pronBIndex];
            const pronBWord = pronB.word;

            // If B contains A and B is longer than A, remove overlapping diffItems from A
            if (pronBWord.includes(pronAWord) && pronBWord.length > pronAWord.length) {
                // Filter out any diffItems from A that overlap with B's diffItems
                pronA.diffItems = pronA.diffItems.filter((adi) => !pronB.diffItems.some((bdi) => adi.id === bdi.id));
            }
        }

        // If pronunciation A has no remaining diffItems after filtering, mark it for removal
        if (!pronA.diffItems.length) {
            pronsIdsToRemove.push(pronA.qaPronId);
        } else {
            // Update the original array with filtered diffItems
            prons[pronAIndex] = pronA;
        }
    }

    // Remove all pronunciations that were marked for removal
    const pronsFiltered = prons.filter((pron) => !pronsIdsToRemove.includes(pron.qaPronId));

    return pronsFiltered;
};

export const collectBeforeAndAfterWordsForSelectionItem = (diff: IDiffExtendedItemWithGroup[], firstItemId: string, lastItemId: string) => {
    const firstItemIndex = diff.findIndex((di) => di.id === firstItemId);
    const lastItemIndex = diff.findIndex((di) => di.id === lastItemId);

    if (firstItemIndex === -1 || lastItemIndex === -1) return "";

    const numberOfWordsBefore = 20;
    const numberOfWordsAfter = 20;

    const diffLength = diff.length;

    const startPos = firstItemIndex <= numberOfWordsBefore ? 0 : firstItemIndex - numberOfWordsBefore;
    const endPos = lastItemIndex + numberOfWordsAfter >= diffLength - 1 ? diffLength - 1 : lastItemIndex + numberOfWordsAfter;

    // the reason for +1 is that it finds the excluding the end pos
    const diffSliced = diff.slice(startPos, endPos + 1);

    return diffSliced.reduce((joined, current) => `${joined} ${current.reference || ""}`, "");
};

export const getArticleIntroOutroTextForQa = (article: IArticle, templates: any[]) => {
    const days: any = {
        1: "st",
        2: "nd",
        3: "rd",
        4: "th",
        5: "th",
        6: "th",
        7: "th",
        8: "th",
        9: "th",
        10: "th",
        11: "th",
        12: "th",
        13: "th",
        14: "th",
        15: "th",
        16: "th",
        17: "th",
        18: "th",
        19: "th",
        20: "th",
        21: "st",
        22: "nd",
        23: "rd",
        24: "th",
        25: "th",
        26: "th",
        27: "th",
        28: "th",
        29: "th",
        30: "th",
        31: "st",
    };

    const months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"];

    const templateTypes = {
        intro: 0,
        outro: 1,
    };

    const formatDate = (date?: Date | string, options?: { enhancedFormatting?: boolean }) => {
        if (!date) {
            return "";
        }

        const convertDateToUTC = (d: Date) => new Date(d.getTime() + d.getTimezoneOffset() * 60000);

        const dateParsed = typeof date === "string" ? new Date(date) : date;
        const dateParsedUTC = convertDateToUTC(dateParsed);

        const day = dateParsedUTC.getDate();
        const month = dateParsedUTC.getMonth();
        const year = dateParsedUTC.getFullYear();

        if (options?.enhancedFormatting) {
            return `${day}${days[day]} of ${months[month]} ${year}`;
        }

        return `${day}${days[day]} ${months[month]} ${year}`;
    };

    const getTemplate = (type: number) => {
        const templateId = type === templateTypes.intro ? article?.introId || 0 : article?.outroId || 0;
        const template = templates && templates.find((t) => t.templateId === +templateId);

        let templateText = template ? template.templateText : "";

        if (templateText === "") {
            if (type === templateTypes.intro) {
                if (article.manualIntro) {
                    templateText = article.manualIntro;
                } else {
                    templateText = "";
                }
            } else {
                // eslint-disable-next-line no-lonely-if
                if (article.manualOutro) {
                    templateText = article.manualOutro;
                } else {
                    templateText = "";
                }
            }
        }

        const title = article.altNameForNarration ? article.altNameForNarration : article.articleName;
        const section = article.articleSection || "";
        const subtitle = article.articleSubName || "";
        const newspaperName = article.newspaper ? article.newspaper.newspaperName : "";
        const narratorName = article.articleReader ? `${article.articleReader.userFirstName} ${article.articleReader.userLastName}` : "";

        const date = formatDate(article.articleAddedDateTime, { enhancedFormatting: true });
        const viewDate = formatDate(article.articleViewDateTime, { enhancedFormatting: true });
        const publicationDate = formatDate(article.articleOriginalPublicationDateTime, { enhancedFormatting: true });

        const journalistsLength = article.journalists?.length || 0;
        const journalistsName = (article.journalists || [])
            .map((j) => j.journalistName)
            .reduce((joined, journalist, currentIndex) => {
                if (journalistsLength === 1 || currentIndex === journalistsLength - 1) return `${joined}${journalist}`;

                if (journalistsLength === 2) {
                    return `${joined}${journalist} and `;
                }

                if (journalistsLength > 2 && currentIndex === journalistsLength - 2) {
                    return `${joined}${journalist}, and `;
                }

                return `${joined}${journalist}, `;
            }, "");

        // eslint-disable-next-line no-nested-ternary
        const journalistsWriteOrWrites = journalistsLength < 1 ? "" : journalistsLength > 1 ? "write" : "writes";

        return templateText
            .replace(/\n/g, "<br/>")
            .replace(/\{title\}/g, title)
            .replace(/\{section\}/g, section)
            .replace(/\{publisher\}/g, newspaperName)
            .replace(/\{narrator\}/g, narratorName)
            .replace(/\[narrator\]/g, narratorName)
            .replace(/\{journalists\}/g, journalistsName)
            .replace(/\{journalists_write\}/g, journalistsWriteOrWrites)
            .replace(/\{date\}/g, date)
            .replace(/\{view_date\}/g, viewDate)
            .replace(/\{publication_date\}/g, publicationDate)
            .replace(/\{subtitle\}/g, subtitle);
    };

    const introHtml = getTemplate(templateTypes.intro);
    const outroHtml = getTemplate(templateTypes.outro);

    return { introText: removeHtmlFromText(introHtml), outroText: removeHtmlFromText(outroHtml) };
};

/**
 * Formats article text by mapping original text to transcript words and handling line breaks
 *
 * @param article - The article object containing text and metadata
 * @param templates - Array of templates for intro/outro text
 * @param diffWithTimestamps - Array of diff items containing transcript words with timestamps
 * @param lineBreakDiffItems - Array to store line break positions
 */
export function formatArticleTextWordUsingOriginalText(article: IArticle, templates: ITemplate[], diffWithTimestamps: IDiffExtendedItem[], lineBreakDiffItems: ILineBreak[]) {
    // Get intro and outro text from templates
    let { introText, outroText } = getArticleIntroOutroTextForQa(article!, templates);

    // Extract plain text from article HTML
    const tempDivElement = document.createElement("div");
    tempDivElement.innerHTML = article!.articleText!;

    // Clean up article text by removing extra line breaks and dashes
    let articleText = (tempDivElement.textContent || tempDivElement.innerText || "")
        .replace(/\n--\n/g, "\n")
        .replace(/^--/, "")
        .replace(/--$/, "")
        .replace(/\n+/g, "\n");

    introText = introText
        .replace(/\n--\n/g, "\n")
        .replace(/^--/, "")
        .replace(/--$/, "")
        .replace(/\n+/g, "\n");

    outroText = outroText
        .replace(/\n--\n/g, "\n")
        .replace(/^--/, "")
        .replace(/--$/, "")
        .replace(/\n+/g, "\n");

    // Combine intro, article text and outro with single line breaks
    articleText = `${introText}\n${articleText}\n${outroText}`.replace(/\n+/g, "\n");

    // Filter diff items to only include EQUAL, SUBSTITUTION and DELETION types
    // Insertions are excluded since they don't have corresponding original text
    const dataForMapping = diffWithTimestamps.filter((item) => ["EQUAL", "SUBSTITUTION", "DELETION"].includes(item.type)).map((item) => ({ id: item.id, text: item.reference! }));

    // Map transcript words to original text positions
    const mappedResponse = mapTranscriptToOriginal(articleText, dataForMapping);
    // console.log(mappedResponse);
    // console.log(dataForMapping);

    let shouldSkipNextItem = false;

    // Process each mapped word to handle line breaks and format text
    mappedResponse.forEach((item, pos) => {
        if (shouldSkipNextItem) {
            // Skip item if flag was set in previous iteration
            shouldSkipNextItem = false;
            return;
        }
        // original is an array, which happens when multiple original tokens
        // map to a single transcript token after normalization
        //
        // Example:
        // original: ["up", "lifting"]
        // transcript: { text: "uplifting", ... }
        //
        // This occurs when the original text has separate tokens that combine
        // into a single word in the transcript. The mapping process tries to
        // merge original tokens (e.g. "up"+"lifting" -> "uplifting") to match
        // the normalized transcript word.

        // currently skipping the array of original tokens.

        if (Array.isArray(item.original)) {
            return;
        }
        // transcript is an array, which happens when a single original token
        // maps to multiple transcript tokens after normalization
        //
        // Example:
        // original: "uplifting"
        // transcript: [{ text: "up"}, { text: "lifting"}]
        //
        // This occurs when the original text has a single token that splits
        // into multiple words in the transcript. The mapping process tries to
        // split original tokens (e.g. "uplifting" -> "up"+"lifting") to match
        // the normalized transcript words.

        // currently skipping the array of transcript tokens.
        if (Array.isArray(item.transcript)) {
            return;
        }

        if (item.isLineBreak) {
            // "mapTranscriptToOriginal" also return the position of line breaks.

            if (pos === 0) return;

            // Find previous real word to anchor the line break
            const previousItem = (() => {
                // there must be previous real word from "mapTranscriptToOriginal" to find the relative word in diff for changing it.
                for (let i = pos; i > 0; i -= 1) {
                    if (!!mappedResponse[i].transcript && !Array.isArray(mappedResponse[i].transcript)) {
                        return mappedResponse[i];
                    }
                }

                return null;
            })();

            if (!previousItem) return;

            const diffItemPos = diffWithTimestamps.findIndex((i) => i.id === (previousItem as any).transcript.id);

            if (diffItemPos === -1) return;

            const diffItem = { ...diffWithTimestamps[diffItemPos] };

            // Store line break position and insert line break diff item
            lineBreakDiffItems.push({ diffItem });

            diffWithTimestamps.splice(diffItemPos + 1, 0, { ...diffItem, lineBreakDiffItem: diffItem, type: "LINEBREAK", pos: 0, id: generateRandomString() });

            return;
        }

        if (!item.transcript) {
            // if there is no transcript property then that means there was a deletion and the word was present in the diff. That is mostly for the special characters and punctuation etc.
            const previousItem =
                pos === 0
                    ? null
                    : (() => {
                          for (let i = pos; i > 0; i -= 1) {
                              if (!!mappedResponse[i].transcript && !Array.isArray(mappedResponse[i].transcript)) {
                                  return mappedResponse[i];
                              }
                          }

                          return null;
                      })();

            const nextItem =
                pos === mappedResponse.length - 1
                    ? null
                    : (() => {
                          for (let i = pos; i <= mappedResponse.length - 1; i += 1) {
                              if (!!mappedResponse[i].transcript && !Array.isArray(mappedResponse[i].transcript)) {
                                  return mappedResponse[i];
                              }
                          }

                          return null;
                      })();

            if (!previousItem && !nextItem) {
                return;
            }

            // Create new diff item for the word
            const diffItemData: IDiffExtendedItem = {
                id: generateRandomString(),
                type: "EQUAL",
                pos: 0,
                reference: item.original!,
                startTime: null,
                endTime: null,
                isSpaceBefore: item.isSpaceBefore,
                isSpaceAfter: item.isSpaceAfter,
            };

            if (!previousItem) {
                // means it is the very first word.
                const nextItemPos = diffWithTimestamps.findIndex((i) => i.id === (nextItem as any).transcript.id);
                diffWithTimestamps.splice(nextItemPos, 0, diffItemData);
            } else if (!nextItem) {
                // means it is the very last word.

                const previousItemPos = diffWithTimestamps.findIndex((i) => i.id === (previousItem as any).transcript.id);
                diffWithTimestamps.splice(previousItemPos + 1, 0, diffItemData);
            } else {
                // means it is somewhere in the middle of the diff.
                const previousItemPos = diffWithTimestamps.findIndex((i) => i.id === (previousItem as any).transcript.id);
                const previousDiffItem = { ...diffWithTimestamps[previousItemPos] };
                const nextDiffItem = diffWithTimestamps.find((i) => i.id === (nextItem as any).transcript.id);

                // if the word is somewhere in the middle of deletion words group.

                if (previousDiffItem?.type === "DELETION" && nextDiffItem?.type === "DELETION") {
                    diffItemData.type = "DELETION";
                }

                diffWithTimestamps.splice(previousItemPos + 1, 0, diffItemData);
            }

            // Handle line break after current word if needed
            if (mappedResponse?.[pos + 1]?.isLineBreak) {
                // if the next word is line-break then add the linebreak here and skip the next iteration. This is important for some reason.
                const diffItemPos = diffWithTimestamps.findIndex((i) => i.id === diffItemData.id);

                lineBreakDiffItems.push({ diffItem: diffItemData });
                diffWithTimestamps.splice(diffItemPos + 1, 0, { ...diffItemData, lineBreakDiffItem: diffItemData, type: "LINEBREAK", pos: 0, id: generateRandomString() });

                shouldSkipNextItem = true;
            }

            // then update the mappedResponse as well because when the loop gets the previous word then the word text must be formatted as it is the past item.
            mappedResponse[pos] = { ...mappedResponse[pos], transcript: { id: diffItemData.id, text: diffItemData.reference, normalized: "" } as any };

            return;
        }

        // if we do have transcript property then just find the pos in the diff and replace the word text.
        const diffItemPos = diffWithTimestamps.findIndex((i) => i.id === (item as any).transcript.id);

        if (diffItemPos === -1) {
            return;
        }

        diffWithTimestamps[diffItemPos] = { ...diffWithTimestamps[diffItemPos], reference: item.original! };
    });
}

/**
 * mapTranscriptToOriginal
 *
 * Aligns a list of transcript objects to an original text, returning an array
 * of { original, transcript }. The "original" can be a single string token or
 * an array of strings (if multiple original tokens were merged), or null if
 * the token is purely an insertion from the transcript. The "transcript" can
 * be one object or an array of objects (if multiple transcript tokens merge),
 * or null if it was an insertion in the original text.
 *
 * @param {string} originalText - The properly cased original text with punctuation, etc.
 * @param {Array<Object>} transcriptObjects - e.g. [{ text: "financial" }, { text: "up" }, ...]
 * @returns {Array<{ original: string|string[]|null, transcript: Object|Object[]|null, isSpaceBefore?: boolean, isSpaceAfter?: boolean, isLineBreak?: boolean }>}
 */
export function mapTranscriptToOriginal(originalText: string, transcriptObjects: { id: string; text: string }[]) {
    // 1) Tokenize the original text “like a human” but still systematically:
    //    - First split on newline boundaries (inserting a special token)
    //    - Then split each non-newline part on whitespace and further separate punctuation
    //      (preserving internal apostrophes/hyphens if they appear “inside” a word).
    const originalTokens = tokenizeOriginal(originalText);

    // 2) Prepare transcript tokens with a normalized field for matching.
    const transcriptTokens = transcriptObjects.map((obj) => ({
        ...obj,
        normalized: normalizeForMatching(obj.text),
    }));

    let i = 0; // pointer for originalTokens
    let j = 0; // pointer for transcriptTokens
    const result: Array<{
        original: string | string[] | null;
        transcript: { id: string; text: string; normalized: string } | { id: string; text: string; normalized: string }[] | null;
        isSpaceBefore?: boolean;
        isSpaceAfter?: boolean;
        isLineBreak?: boolean;
    }> = [];

    while (i < originalTokens.length || j < transcriptTokens.length) {
        // A. If we've used all original tokens, the rest of transcript is insertion.
        if (i >= originalTokens.length) {
            result.push({ original: null, transcript: transcriptTokens[j] });
            j++;
            continue;
        }
        // B. If we've used all transcript tokens, the rest of original is insertion.
        if (j >= transcriptTokens.length) {
            const origTokObj = originalTokens[i];
            if (origTokObj.text === "__LINE_BREAK__") {
                result.push({ original: null, transcript: null, isLineBreak: true });
            } else {
                result.push({
                    original: origTokObj.text,
                    transcript: null,
                    isSpaceBefore: origTokObj.isSpaceBefore,
                    isSpaceAfter: origTokObj.isSpaceAfter,
                });
            }
            i++;
            continue;
        }

        const origTokObj = originalTokens[i];
        // Special handling for line breaks.
        if (origTokObj.text === "__LINE_BREAK__") {
            result.push({ original: null, transcript: null, isLineBreak: true });
            i++;
            continue;
        }
        const origTok = origTokObj.text;
        const normOrig = normalizeForMatching(origTok);
        const tranTok = transcriptTokens[j];

        // 1) Direct single-token match?
        if (normOrig === tranTok.normalized) {
            result.push({
                original: origTok,
                transcript: tranTok,
                isSpaceBefore: origTokObj.isSpaceBefore,
                isSpaceAfter: origTokObj.isSpaceAfter,
            });
            i++;
            j++;
            continue;
        }

        // 2) Check if one side is punctuation/dash-like and the other is a normal word.
        const origIsPunct = isPunctuationOrSymbol(origTok);
        const tranIsPunct = isPunctuationOrSymbol(tranTok.text);
        if (origIsPunct && !tranIsPunct) {
            result.push({
                original: origTok,
                transcript: null,
                isSpaceBefore: origTokObj.isSpaceBefore,
                isSpaceAfter: origTokObj.isSpaceAfter,
            });
            i++;
            continue;
        }
        if (!origIsPunct && tranIsPunct) {
            result.push({ original: null, transcript: tranTok });
            j++;
            continue;
        }

        // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
        // INSERTION CHECK (STEP 2.5)
        // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
        if (j + 1 < transcriptTokens.length) {
            if (canMergeTranscriptToMatch(origTok, normOrig, transcriptTokens, j + 1)) {
                result.push({ original: null, transcript: tranTok });
                j++;
                continue; // do not advance i
            }
        }
        if (i + 1 < originalTokens.length) {
            if (canMergeOriginalToMatch(tranTok, transcriptTokens, j, originalTokens, i + 1)) {
                result.push({
                    original: origTok,
                    transcript: null,
                    isSpaceBefore: origTokObj.isSpaceBefore,
                    isSpaceAfter: origTokObj.isSpaceAfter,
                });
                i++;
                continue; // do not advance j
            }
        }

        // 3) Try merging multiple transcript tokens => single original token.
        let mergedText = tranTok.text;
        let usedTranscriptObjs = [tranTok];
        let mergedNormalized = normalizeForMatching(mergedText);
        let mergedFound = false;
        let k = j + 1;
        while (k < transcriptTokens.length && mergedNormalized !== normOrig) {
            mergedText += transcriptTokens[k].text;
            usedTranscriptObjs.push(transcriptTokens[k]);
            mergedNormalized = normalizeForMatching(mergedText);
            if (mergedNormalized === normOrig) {
                result.push({
                    original: origTok,
                    transcript: usedTranscriptObjs.length === 1 ? usedTranscriptObjs[0] : usedTranscriptObjs,
                    isSpaceBefore: origTokObj.isSpaceBefore,
                    isSpaceAfter: origTokObj.isSpaceAfter,
                });
                i++;
                j = k + 1;
                mergedFound = true;
                break;
            }
            k++;
        }
        if (mergedFound) continue;

        // 4) Try merging multiple original tokens => single transcript token.
        let mergedOrigText = origTok;
        let usedOriginals = [origTokObj];
        let mergedOrigNorm = normOrig;
        let x = i + 1;
        let multiOrigFound = false;
        while (x < originalTokens.length && mergedOrigNorm !== tranTok.normalized) {
            if (originalTokens[x].text === "__LINE_BREAK__") break;
            mergedOrigText += originalTokens[x].text;
            usedOriginals.push(originalTokens[x]);
            mergedOrigNorm = normalizeForMatching(mergedOrigText);
            if (mergedOrigNorm === tranTok.normalized) {
                result.push({
                    original: usedOriginals.length === 1 ? usedOriginals[0].text : usedOriginals.map((tok) => tok.text),
                    transcript: tranTok,
                    isSpaceBefore: usedOriginals[0].isSpaceBefore,
                    isSpaceAfter: usedOriginals[usedOriginals.length - 1].isSpaceAfter,
                });
                i = x + 1;
                j++;
                multiOrigFound = true;
                break;
            }
            x++;
        }
        if (multiOrigFound) continue;

        // 5) If still no match, treat it as a “substitution.”
        result.push({
            original: origTok,
            transcript: tranTok,
            isSpaceBefore: origTokObj.isSpaceBefore,
            isSpaceAfter: origTokObj.isSpaceAfter,
        });
        i++;
        j++;
    }

    return result;

    // ---------------- HELPER FUNCTIONS ----------------

    /**
     * tokenizeOriginal:
     * Splits the text by newline boundaries first (inserting a special token for "\n"),
     * then for each non-newline part splits on whitespace.
     * Each chunk is further processed to separate leading/trailing punctuation (unless internal)
     * and to split hyphenated words into sub‐tokens.
     * For each resulting token, we record:
     *   - text: the token text
     *   - isSpaceBefore: true if there was whitespace before this token within its chunk
     *   - isSpaceAfter: true if there was whitespace after this token within its chunk
     */
    function tokenizeOriginal(text: string): Array<{ text: string; isSpaceBefore: boolean; isSpaceAfter: boolean }> {
        const parts = text.split(/(\n)/);
        let tokens: Array<{
            text: string;
            isSpaceBefore: boolean;
            isSpaceAfter: boolean;
        }> = [];
        for (const part of parts) {
            if (part === "\n") {
                tokens.push({ text: "__LINE_BREAK__", isSpaceBefore: false, isSpaceAfter: false });
            } else {
                // Split the part on whitespace.
                const chunks = part.split(/\s+/).filter(Boolean);
                for (let i = 0; i < chunks.length; i++) {
                    const chunk = chunks[i];
                    let tokenObjs: Array<{
                        text: string;
                        isSpaceBefore: boolean;
                        isSpaceAfter: boolean;
                    }> = [];
                    let front = "";
                    let back = "";
                    let middle = chunk;
                    // Trim leading punctuation.
                    while (middle.length > 0 && isPuncSymbolForSplit(middle[0]) && !isInternalChar(middle, 0)) {
                        front += middle[0];
                        middle = middle.slice(1);
                    }
                    // Trim trailing punctuation.
                    while (middle.length > 0 && isPuncSymbolForSplit(middle[middle.length - 1]) && !isInternalChar(middle, middle.length - 1)) {
                        back = middle[middle.length - 1] + back;
                        middle = middle.slice(0, -1);
                    }
                    // Create tokens for front punctuation.
                    for (const c of front) {
                        tokenObjs.push({ text: c, isSpaceBefore: false, isSpaceAfter: false });
                    }
                    // Create token for the middle part.
                    if (middle) {
                        // If the token contains an internal hyphen or en-dash, split it.
                        if (/[-–]/.test(middle)) {
                            const subParts = middle.split(/[-–]/);
                            const subTokens: Array<{ text: string; isSpaceBefore: boolean; isSpaceAfter: boolean }> = [];
                            for (const part of subParts) {
                                // Ignore empty strings (if there are consecutive hyphens, etc.)
                                if (part) {
                                    subTokens.push({ text: part, isSpaceBefore: false, isSpaceAfter: false });
                                }
                            }
                            if (subTokens.length > 0) {
                                // For the first sub-token, inherit the chunk’s isSpaceBefore flag.
                                subTokens[0].isSpaceBefore = i > 0;
                                // For the last sub-token, inherit the chunk’s isSpaceAfter flag.
                                subTokens[subTokens.length - 1].isSpaceAfter = i < chunks.length - 1;
                                tokenObjs.push(...subTokens);
                            }
                        } else {
                            tokenObjs.push({ text: middle, isSpaceBefore: false, isSpaceAfter: false });
                        }
                    }
                    // Create tokens for back punctuation.
                    for (const c of back) {
                        tokenObjs.push({ text: c, isSpaceBefore: false, isSpaceAfter: false });
                    }
                    // For tokens in this chunk, mark the first token with isSpaceBefore true if not the first chunk,
                    // and the last token with isSpaceAfter true if not the last chunk.
                    if (tokenObjs.length > 0) {
                        tokenObjs[0].isSpaceBefore = i > 0;
                        tokenObjs[tokenObjs.length - 1].isSpaceAfter = i < chunks.length - 1;
                    }
                    tokens.push(...tokenObjs);
                }
            }
        }
        return tokens;
    }

    /**
     * normalizeForMatching:
     * Lowercases, removes apostrophes, quotes and spaces.
     */
    function normalizeForMatching(str: string) {
        let lower = str.toLowerCase();
        lower = lower.replace(/[’'"]/g, "");
        lower = lower.replace(/\s+/g, "");
        return lower.trim();
    }

    /**
     * isPunctuationOrSymbol:
     * Returns true if, after removing letters/digits, the token is entirely punctuation.
     */
    function isPunctuationOrSymbol(token: string) {
        const stripped = token.replace(/[A-Za-z0-9À-ž\u0400-\u04FF]+/g, "");
        return stripped.length === token.length;
    }

    /**
     * isPuncSymbolForSplit:
     * Returns true if the character is punctuation or a symbol that should be split off.
     */
    function isPuncSymbolForSplit(ch: string) {
        return /[.,!?;:\-–(){}[\]"“”'‘’…]/.test(ch);
    }

    /**
     * isInternalChar:
     * Returns true if the character at the given index in a string is "internal"
     * (i.e. surrounded by letters/digits).
     */
    function isInternalChar(str: string, idx: number) {
        if (idx <= 0 || idx >= str.length - 1) return false;
        return /[A-Za-z0-9À-ž\u0400-\u04FF]/.test(str[idx - 1]) && /[A-Za-z0-9À-ž\u0400-\u04FF]/.test(str[idx + 1]);
    }

    // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    // EXTRA HELPERS FOR THE "SKIP = MERGE MATCH" CHECK
    // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

    function canMergeTranscriptToMatch(origTok: string, normOrig: string, transcriptArr: Array<{ text: string; normalized: string }>, startIndex: number): boolean {
        let mergedText = "";
        let mergedNormalized = "";
        for (let t = startIndex; t < transcriptArr.length; t++) {
            mergedText += transcriptArr[t].text;
            mergedNormalized = normalizeForMatching(mergedText);
            if (mergedNormalized === normOrig) {
                return true;
            }
            if (mergedNormalized.length > normOrig.length) {
                break;
            }
        }
        return false;
    }

    function canMergeOriginalToMatch(
        tranTok: { text: string; normalized: string },
        transcriptArr: Array<{ text: string; normalized: string }>,
        tranIndex: number,
        originalArr: Array<{ text: string; isSpaceBefore: boolean; isSpaceAfter: boolean }>,
        startIndex: number,
    ): boolean {
        const normTran = tranTok.normalized;
        let mergedOrigText = "";
        let mergedNorm = "";
        for (let o = startIndex; o < originalArr.length; o++) {
            mergedOrigText += originalArr[o].text;
            mergedNorm = normalizeForMatching(mergedOrigText);
            if (mergedNorm === normTran) {
                return true;
            }
            if (mergedNorm.length > normTran.length) {
                break;
            }
        }
        return false;
    }
}

// /**
//  * mapTranscriptToOriginal
//  *
//  * Aligns a list of transcript objects to an original text, returning an array
//  * of { original, transcript, isSpaceBefore?, isSpaceAfter?, isLineBreak? }.
//  * The "original" can be a single string token or an array of strings (if multiple original tokens were merged),
//  * or null if the token is purely an insertion from the transcript.
//  * The "transcript" can be one object or an array of objects (if multiple transcript tokens merge),
//  * or null if it was an insertion in the original text.
//  *
//  * Additionally, whenever a "\n" is encountered in the original text a token
//  * {original: null, transcript: null, isLineBreak: true} is returned.
//  *
//  * Each token coming from the original text also has two flags:
//  *   - isSpaceBefore: whether there was whitespace before it (in its "chunk")
//  *   - isSpaceAfter: whether there was whitespace after it (in its "chunk")
//  *
//  * @param {string} originalText - The properly cased original text with punctuation, etc.
//  * @param {Array<Object>} transcriptObjects - e.g. [{ id: "1", text: "financial" }, { id: "2", text: "up" }, ...]
//  * @returns {Array<{ original: string|string[]|null, transcript: Object|Object[]|null, isSpaceBefore?: boolean, isSpaceAfter?: boolean, isLineBreak?: boolean }>}
//  */
// export function mapTranscriptToOriginal(originalText: string, transcriptObjects: { id: string; text: string }[]) {
//     // 1) Tokenize the original text into tokens that include space info.
//     const originalTokens = tokenizeOriginal(originalText);
//     // 2) Prepare transcript tokens with a normalized field for matching.
//     const transcriptTokens = transcriptObjects.map((obj) => ({
//         ...obj,
//         normalized: normalizeForMatching(obj.text),
//     }));

//     let i = 0; // pointer for originalTokens
//     let j = 0; // pointer for transcriptTokens
//     const result: Array<{
//         original: string | string[] | null;
//         transcript: { id: string; text: string; normalized: string } | { id: string; text: string; normalized: string }[] | null;
//         isSpaceBefore?: boolean;
//         isSpaceAfter?: boolean;
//         isLineBreak?: boolean;
//     }> = [];

//     while (i < originalTokens.length || j < transcriptTokens.length) {
//         // A. If we've used all original tokens, the rest of transcript is insertion.
//         if (i >= originalTokens.length) {
//             result.push({ original: null, transcript: transcriptTokens[j] });
//             j++;
//             continue;
//         }
//         // B. If we've used all transcript tokens, the rest of original is insertion.
//         if (j >= transcriptTokens.length) {
//             const origTokObj = originalTokens[i];
//             if (origTokObj.text === "__LINE_BREAK__") {
//                 result.push({ original: null, transcript: null, isLineBreak: true });
//             } else {
//                 result.push({ original: origTokObj.text, transcript: null, isSpaceBefore: origTokObj.isSpaceBefore, isSpaceAfter: origTokObj.isSpaceAfter });
//             }
//             i++;
//             continue;
//         }

//         const origTokObj = originalTokens[i];
//         // Special handling for line breaks.
//         if (origTokObj.text === "__LINE_BREAK__") {
//             result.push({ original: null, transcript: null, isLineBreak: true });
//             i++;
//             continue;
//         }
//         const origTok = origTokObj.text;
//         const normOrig = normalizeForMatching(origTok);
//         const tranTok = transcriptTokens[j];

//         // 1) Direct single-token match?
//         if (normOrig === tranTok.normalized) {
//             result.push({
//                 original: origTok,
//                 transcript: tranTok,
//                 isSpaceBefore: origTokObj.isSpaceBefore,
//                 isSpaceAfter: origTokObj.isSpaceAfter,
//             });
//             i++;
//             j++;
//             continue;
//         }

//         // 2) Check if one side is punctuation/dash-like and the other is a normal word.
//         const origIsPunct = isPunctuationOrSymbol(origTok);
//         const tranIsPunct = isPunctuationOrSymbol(tranTok.text);
//         if (origIsPunct && !tranIsPunct) {
//             result.push({
//                 original: origTok,
//                 transcript: null,
//                 isSpaceBefore: origTokObj.isSpaceBefore,
//                 isSpaceAfter: origTokObj.isSpaceAfter,
//             });
//             i++;
//             continue;
//         }
//         if (!origIsPunct && tranIsPunct) {
//             result.push({ original: null, transcript: tranTok });
//             j++;
//             continue;
//         }

//         // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
//         // INSERTION CHECK (NEW STEP 2.5)
//         // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
//         if (j + 1 < transcriptTokens.length) {
//             if (canMergeTranscriptToMatch(origTok, normOrig, transcriptTokens, j + 1)) {
//                 result.push({ original: null, transcript: tranTok });
//                 j++;
//                 continue;
//             }
//         }
//         if (i + 1 < originalTokens.length) {
//             if (canMergeOriginalToMatch(tranTok, transcriptTokens, j, originalTokens, i + 1)) {
//                 result.push({
//                     original: origTok,
//                     transcript: null,
//                     isSpaceBefore: origTokObj.isSpaceBefore,
//                     isSpaceAfter: origTokObj.isSpaceAfter,
//                 });
//                 i++;
//                 continue;
//             }
//         }

//         // 3) Try merging multiple transcript tokens => single original token.
//         let mergedText = tranTok.text;
//         let usedTranscriptObjs = [tranTok];
//         let mergedNormalized = normalizeForMatching(mergedText);
//         let mergedFound = false;
//         let k = j + 1;
//         while (k < transcriptTokens.length && mergedNormalized !== normOrig) {
//             mergedText += transcriptTokens[k].text;
//             usedTranscriptObjs.push(transcriptTokens[k]);
//             mergedNormalized = normalizeForMatching(mergedText);
//             if (mergedNormalized === normOrig) {
//                 result.push({
//                     original: origTok,
//                     transcript: usedTranscriptObjs.length === 1 ? usedTranscriptObjs[0] : usedTranscriptObjs,
//                     isSpaceBefore: origTokObj.isSpaceBefore,
//                     isSpaceAfter: origTokObj.isSpaceAfter,
//                 });
//                 i++;
//                 j = k + 1;
//                 mergedFound = true;
//                 break;
//             }
//             k++;
//         }
//         if (mergedFound) continue;

//         // 4) Try merging multiple original tokens => single transcript token.
//         let mergedOrigText = origTok;
//         let usedOriginals = [origTokObj];
//         let mergedOrigNorm = normOrig;
//         let x = i + 1;
//         let multiOrigFound = false;
//         while (x < originalTokens.length && mergedOrigNorm !== tranTok.normalized) {
//             // If the next token is a line break, break out.
//             if (originalTokens[x].text === "__LINE_BREAK__") break;
//             mergedOrigText += originalTokens[x].text;
//             usedOriginals.push(originalTokens[x]);
//             mergedOrigNorm = normalizeForMatching(mergedOrigText);
//             if (mergedOrigNorm === tranTok.normalized) {
//                 // For merged tokens, use the first token's isSpaceBefore and last token's isSpaceAfter.
//                 result.push({
//                     original: usedOriginals.length === 1 ? usedOriginals[0].text : usedOriginals.map((tok) => tok.text),
//                     transcript: tranTok,
//                     isSpaceBefore: usedOriginals[0].isSpaceBefore,
//                     isSpaceAfter: usedOriginals[usedOriginals.length - 1].isSpaceAfter,
//                 });
//                 i = x + 1;
//                 j++;
//                 multiOrigFound = true;
//                 break;
//             }
//             x++;
//         }
//         if (multiOrigFound) continue;

//         // 5) If still no match, treat it as a substitution.
//         result.push({
//             original: origTok,
//             transcript: tranTok,
//             isSpaceBefore: origTokObj.isSpaceBefore,
//             isSpaceAfter: origTokObj.isSpaceAfter,
//         });
//         i++;
//         j++;
//     }

//     return result;

//     // ---------------- HELPER FUNCTIONS ----------------

//     /**
//      * tokenizeOriginal:
//      * Splits the text by newline boundaries first (inserting a special token for "\n"),
//      * then for each non-newline part splits on whitespace. Each chunk is further processed to
//      * separate leading/trailing punctuation (unless internal). For each resulting token, we record:
//      *   - text: the token text
//      *   - isSpaceBefore: true if there was whitespace before this token within its chunk
//      *   - isSpaceAfter: true if there was whitespace after this token within its chunk
//      */
//     function tokenizeOriginal(text: string): Array<{ text: string; isSpaceBefore: boolean; isSpaceAfter: boolean }> {
//         const parts = text.split(/(\n)/);
//         let tokens: Array<{ text: string; isSpaceBefore: boolean; isSpaceAfter: boolean }> = [];
//         for (const part of parts) {
//             if (part === "\n") {
//                 tokens.push({ text: "__LINE_BREAK__", isSpaceBefore: false, isSpaceAfter: false });
//             } else {
//                 // Split the part on whitespace.
//                 const chunks = part.split(/\s+/).filter(Boolean);
//                 for (let i = 0; i < chunks.length; i++) {
//                     const chunk = chunks[i];
//                     let tokenObjs: Array<{ text: string; isSpaceBefore: boolean; isSpaceAfter: boolean }> = [];
//                     let front = "";
//                     let back = "";
//                     let middle = chunk;
//                     // Trim leading punctuation.
//                     while (middle.length > 0 && isPuncSymbolForSplit(middle[0]) && !isInternalChar(middle, 0)) {
//                         front += middle[0];
//                         middle = middle.slice(1);
//                     }
//                     // Trim trailing punctuation.
//                     while (middle.length > 0 && isPuncSymbolForSplit(middle[middle.length - 1]) && !isInternalChar(middle, middle.length - 1)) {
//                         back = middle[middle.length - 1] + back;
//                         middle = middle.slice(0, -1);
//                     }
//                     // Create tokens for front punctuation.
//                     for (const c of front) {
//                         tokenObjs.push({ text: c, isSpaceBefore: false, isSpaceAfter: false });
//                     }
//                     // Create token for the middle part.
//                     if (middle) {
//                         tokenObjs.push({ text: middle, isSpaceBefore: false, isSpaceAfter: false });
//                     }
//                     // Create tokens for back punctuation.
//                     for (const c of back) {
//                         tokenObjs.push({ text: c, isSpaceBefore: false, isSpaceAfter: false });
//                     }
//                     // For tokens in this chunk, mark the first token with isSpaceBefore true if not the first chunk,
//                     // and the last token with isSpaceAfter true if not the last chunk.
//                     if (tokenObjs.length > 0) {
//                         tokenObjs[0].isSpaceBefore = i > 0;
//                         tokenObjs[tokenObjs.length - 1].isSpaceAfter = i < chunks.length - 1;
//                     }
//                     tokens.push(...tokenObjs);
//                 }
//             }
//         }
//         return tokens;
//     }

//     /**
//      * normalizeForMatching:
//      * Lowercases, removes apostrophes, quotes and spaces.
//      */
//     function normalizeForMatching(str: string) {
//         let lower = str.toLowerCase();
//         lower = lower.replace(/[''"]/g, "");
//         lower = lower.replace(/\s+/g, "");
//         return lower.trim();
//     }

//     /**
//      * isPunctuationOrSymbol:
//      * Returns true if the token (after removing letters/digits) is entirely punctuation.
//      */
//     function isPunctuationOrSymbol(token: string) {
//         const stripped = token.replace(/[A-Za-z0-9À-ž\u0400-\u04FF]+/g, "");
//         return stripped.length === token.length;
//     }

//     /**
//      * isPuncSymbolForSplit:
//      * Returns true if the character is punctuation or a symbol that should be split off.
//      */
//     function isPuncSymbolForSplit(ch: string) {
//         return /[.,!?;:\-–(){}[\]"“”'‘'…]/.test(ch);
//     }

//     /**
//      * isInternalChar:
//      * Returns true if the character at the given index in a string is "internal"
//      * (i.e. surrounded by letters/digits).
//      */
//     function isInternalChar(str: string, idx: number) {
//         if (idx <= 0 || idx >= str.length - 1) return false;
//         return /[A-Za-z0-9À-ž\u0400-\u04FF]/.test(str[idx - 1]) && /[A-Za-z0-9À-ž\u0400-\u04FF]/.test(str[idx + 1]);
//     }

//     // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
//     // EXTRA HELPERS FOR THE "SKIP = MERGE MATCH" CHECK
//     // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

//     function canMergeTranscriptToMatch(origTok: string, normOrig: string, transcriptArr: Array<{ text: string; normalized: string }>, startIndex: number): boolean {
//         let mergedText = "";
//         let mergedNormalized = "";
//         for (let t = startIndex; t < transcriptArr.length; t++) {
//             mergedText += transcriptArr[t].text;
//             mergedNormalized = normalizeForMatching(mergedText);
//             if (mergedNormalized === normOrig) {
//                 return true;
//             }
//             if (mergedNormalized.length > normOrig.length) {
//                 break;
//             }
//         }
//         return false;
//     }

//     function canMergeOriginalToMatch(
//         tranTok: { text: string; normalized: string },
//         transcriptArr: Array<{ text: string; normalized: string }>,
//         tranIndex: number,
//         originalArr: Array<{ text: string; isSpaceBefore: boolean; isSpaceAfter: boolean }>,
//         startIndex: number,
//     ): boolean {
//         const normTran = tranTok.normalized;
//         let mergedOrigText = "";
//         let mergedNorm = "";
//         for (let o = startIndex; o < originalArr.length; o++) {
//             mergedOrigText += originalArr[o].text;
//             mergedNorm = normalizeForMatching(mergedOrigText);
//             if (mergedNorm === normTran) {
//                 return true;
//             }
//             if (mergedNorm.length > normTran.length) {
//                 break;
//             }
//         }
//         return false;
//     }
// }
