// doing this so we can just use linear loops instead of having to complicate main

import {imageUrlToBase64} from './imageUrlToBase64';

// functions with traversals
const convertTreeToQueue = (root: Element) => {
    // add all children to result
    const result: Element[] = [...root.children];

    // for each child, do the same
    Array.from(root.children).forEach((child) => {
        result.push(...convertTreeToQueue(child));
    });

    return result;
};

// since we only have rules for .mce-content-body and .html-editor__page-break,
// all other classes are to be removed
const removeUnnecessaryClasses = (element: Element) => {
    const {classList} = element;
    const classArr = [...classList];
    classArr.forEach((className) => {
        if (
            ![
                'mce-content-body',
                'html-editor__page-break',
                'mis-merge-field',
            ].includes(className)
        ) {
            classList.remove(className);
        }
    });
};

// MS Office generates clashing IDs that aren't actually used for styling, so we're getting
// rid of them
const removeID = (element: Element) => {
    if (element.id) {
        element.removeAttribute('id');
    }
};

// copies any inline styles to the data-mce-style attribute that TinyMCE uses
// internally
const copyInlineStyles = (element: Element) => {
    if (element instanceof HTMLElement) {
        const style = element.style;
        if (style.cssText) {
            element.setAttribute('data-mce-style', style.cssText);
        }
    }
};

// MS Office sticks this on all paragraphs by default
const stripMSFormatting = (element: Element) => {
    if (element instanceof HTMLElement) {
        const style = element.style;
        if (
            style.lineHeight === 'normal' &&
            style.marginBottom === '0.0001pt'
        ) {
            style.removeProperty('line-height');
            style.removeProperty('margin-bottom');
        }
    }
};

// since we have regex based formatting and some previous steps can leave attributes
// such as class="" or style="", we remove any empty attributes here
const removeEmptyProperties = (element: Element) => {
    for (const attr of element.attributes) {
        if (!attr.value) {
            element.removeAttribute(attr.name);
        }
    }

    if (element.classList.length === 0) {
        element.removeAttribute('class');
    }
};

// Google docs adds dir="ltr" on most elements
const removeDir = (element: Element) => {
    if (element.getAttribute('dir')) {
        element.removeAttribute('dir');
    }
};

// MS occasionally includes tags in the o or v namespaces, but they're of no use to us.
// inline style tags are also removed to avoid weirdness with TinyMCE
const removeProprietaryNamespaceAndStyleTags = (queue: Element[]) => {
    return queue.filter(
        (element) =>
            !(
                element.tagName.startsWith('o:') ||
                element.tagName.startsWith('v:') ||
                element.tagName === 'style'
            ),
    );
};

// MS office will occasionally leave empty spans inside, so we tidy them up here.
// done in a loop so that nested spans are taken care of too
const removeEmptySpans = (queue: Element[]) => {
    let hasEmptySpans = false;
    let result = [...queue];
    do {
        hasEmptySpans = false;
        // this array will be the new "result"
        const tmp: Element[] = [];

        for (const element of result) {
            if (
                element instanceof HTMLSpanElement &&
                element.children.length === 0 &&
                element.textContent === ''
            ) {
                element.remove();
                hasEmptySpans = true;
            } else {
                tmp.push(element);
            }
        }
        result = tmp;
    } while (hasEmptySpans);

    return result;
};

const reformatMergeField = async (element: Element) => {
    const src = element.getAttribute('src');
    if (
        element instanceof HTMLImageElement &&
        element.classList.contains('mis-merge-field') &&
        typeof src === 'string' &&
        src.startsWith('/image/placeholder/value')
    ) {
        element.setAttribute('src', await imageUrlToBase64(src));
    }
};

// The steps for this function are largely taken from this blog post:
// https://mitcho.com/blog/projects/disgusting-word-formatted-html-and-how-to-fix-it/
export const preprocessPastedContent = async (content: string) => {
    const placeholderDiv = document.createElement('div');
    placeholderDiv.innerHTML = content;
    const queue = removeEmptySpans(
        removeProprietaryNamespaceAndStyleTags(
            convertTreeToQueue(placeholderDiv),
        ),
    );

    for (const element of queue) {
        removeUnnecessaryClasses(element);
        stripMSFormatting(element);
        removeEmptyProperties(element);
        removeDir(element);
        copyInlineStyles(element);
        removeID(element);
        await reformatMergeField(element);
    }

    const result = placeholderDiv.innerHTML;

    // prevent memory leak
    placeholderDiv.remove();

    return result;
};
