import { decode } from './entities';

/*
A simple HTML parser with no external dependencies.

You can use this if your API returns HTML CMS content
to parse it. The CMS modules expect a DOM tree nearly in the
format returned by this parser. That is because this tree
maps almost directly onto the shape used by component
libraries like React or Vue.

The parser is a predictive recursive descent parser,
meaning it does not backtrack. It runs in O(n) time.
*/

// from HTML 5.3 spec, these do not
// need to be closed, so we should treat
// them as normal tags, not elements
const voidElement = {
    area: true,
    base: true,
    br: true,
    col: true,
    embed: true,
    hr: true,
    img: true,
    input: true,
    link: true,
    meta: true,
    param: true,
    source: true,
    track: true,
    wbr: true,
};

function splitTag(tag: string): string[] {
    return (tag.match(/^(\S*)(.*)/) || []).slice(1);
}

type LexTokenType = 'OpenTag'|'Text'|'CloseTag'|'Tag';

interface ILexToken {
    tag?: string;
    value: string;
    attributes?: { [p: string]: string };
    type: LexTokenType;
    all: string;
    pos: number;
}

interface ITagInfo {
    type?: 'Tag';
    tag: string;
    attributes: { [p: string]: string };
}

function parseTag(tag: string): ITagInfo {
    const [tagName, rawAtts] = splitTag(tag);

    const atts: {[k: string]: string} = {};
    rawAtts.replace( // a hack (ab)using String.replace to build above attrs
        /(.*?)="(.*?)"/g,
        (_: string, k: string, v: string) => {
            atts[k.trim()] = v;
            return ''; // return dummy value to prevent Typescript warning
        },
    );

    const retval: ITagInfo = {
        tag: tagName,
        attributes: atts,
    };

    if (tagName in voidElement) {
        retval.type = 'Tag';
    }

    return retval;
}

function lexHTML(inputHTML: string): Array<ILexToken> {
    if (inputHTML === null || inputHTML === undefined) {
        const tokens: Array<ILexToken> = [];
        return tokens;
    }

    const rules: Array<{ pattern: RegExp; lexeme: LexTokenType }> = [
        { pattern: /^<\/([^>]*?)>/, lexeme: 'CloseTag' },
        { pattern: /^<([^/>]*?)\/>/, lexeme: 'Tag' },
        { pattern: /^<([^>]*?)>/, lexeme: 'OpenTag' },
        { pattern: /^([^<>]*)/, lexeme: 'Text' },
    ];

    // strip HTML comments
    const html = inputHTML.replace(/<!--((.|\r|\n)*?)-->/g, '');

    const tokens: Array<ILexToken> = [];
    let pos = 0;
    while (pos < html.length) {
        let foundMatch = false;
        const remaining = html.substring(pos);
        // eslint-disable-next-line no-plusplus
        for (let i = 0; i < rules.length; i++) {
            const matches = remaining.match(rules[i].pattern);
            if (matches) {
                const [all, value] = matches;
                tokens.push({
                    type: rules[i].lexeme,
                    value,
                    pos,
                    all,
                });
                pos += all.length;

                if (all.length === 0) {
                    throw new Error(`Invalid HTML at ${pos}`);
                }

                foundMatch = true;
                break;
            }
        }

        if (!foundMatch) {
            throw new Error(`Invalid HTML at ${pos}`);
        }
    }

    return tokens.map((t) => {
        if (t.type !== 'Text') {
            const info = parseTag(t.value);
            return { ...t, ...info };
        }
        return t;
    });
}

export interface IHtmlTreeNode {
    type: 'Document'|'Element'|'TextNode'|'Widget';
    widget?: string;
    tag?: string;
    text?: string;
    attributes?: ILexToken['attributes'];
    children?: Array<IHtmlTreeNode>;
    data?: {[key: string]: any};
}

function parseHTML(tokens: Array<ILexToken>): IHtmlTreeNode {
    let pos = 0;

    const context = {
        scriptCount: 0,
    };

    function next() {
        // eslint-disable-next-line no-plusplus
        return tokens[pos++];
    }

    function ahead(n = 0): ILexToken {
        return tokens[pos + n];
    }

    function parseElement(): IHtmlTreeNode {
        const t = next();

        if (t.type !== 'OpenTag') {
            throw new Error('Expected open tag');
        }

        if (t.tag === 'script') {
            // eslint-disable-next-line no-plusplus
            context.scriptCount++;
        }

        let children: Array<IHtmlTreeNode> = [];
        let child;
        // eslint-disable-next-line @typescript-eslint/no-use-before-define, no-cond-assign
        while (child = parseNode()) {
            children.push(child);
        }

        const t2 = next();
        if (t2.type !== 'CloseTag') {
            throw new Error('Expected close tag');
        }

        if (t2.tag !== t.tag) {
            throw new Error(`Expected closing '${t.tag}'`);
        }

        if (t.tag === 'script') {
            // eslint-disable-next-line no-plusplus
            context.scriptCount--;
            children = [{ type: 'TextNode', text: children.map(e => e.text).join('') }];
        }

        return {
            type: 'Element',
            tag: t.tag,
            attributes: t.attributes,
            children,
        };
    }

    function parseNode(): IHtmlTreeNode|null {
        const t = ahead();

        if (!t) { return null; }

        // eslint-disable-next-line react/destructuring-assignment
        if (context.scriptCount && t.type === 'CloseTag' && t.tag === 'script') {
            return null; // end of script
        // eslint-disable-next-line react/destructuring-assignment
        } if (context.scriptCount) {
            // in script, so return text...
            next();
            return { type: 'TextNode', text: decode(t.all) };
        }

        if (t.type === 'Text') {
            next();
            return { type: 'TextNode', text: decode(t.value) };
        } if (t.type === 'OpenTag') {
            return parseElement();
        } if (t.type === 'Tag') {
            next();
            return {
                type: 'Element', tag: t.tag, attributes: t.attributes, children: [],
            };
        }
        return null;
    }

    function parseDocument(): IHtmlTreeNode {
        const children = [];
        let child;
        // eslint-disable-next-line no-cond-assign
        while (child = parseNode()) {
            children.push(child);
        }

        return {
            type: 'Document',
            children,
        };
    }

    return parseDocument();
}

export function parse(html: string): IHtmlTreeNode {
    const lexed = lexHTML(html);
    const parsed = parseHTML(lexed);
    return parsed;
}
