import { HTMLElement, Node, NodeType, parse } from "node-html-parser";
import { InsertPosition } from "node-html-parser/dist/nodes/html";
import sanitize from "sanitize-html";

function keywordMatch(node: HTMLElement, keyword: string): boolean {
  return (
    node.nodeType === NodeType.TEXT_NODE &&
    node.textContent?.toLowerCase().includes(keyword.toLowerCase())
  );
}

function cloneRoot(node: Node): HTMLElement {
  let pointer = node;
  while (pointer.parentNode) {
    pointer = pointer.parentNode;
  }
  const cloned = parse(pointer.toString());
  return cloned as HTMLElement;
}

function findFirstParentWhere(
  node: Node,
  where = (pointer: Node) => pointer.nodeType === NodeType.ELEMENT_NODE
): HTMLElement | undefined {
  if (!node) {
    return undefined;
  }
  let pointer = node;
  while (pointer.parentNode && !where(pointer)) {
    pointer = pointer.parentNode;
  }
  if (!where(pointer)) {
    return undefined;
  }
  return pointer as HTMLElement;
}

export function findKeywordInHTML(html: HTMLElement, keyword: string): HTMLElement[] | undefined {
  if (!html) {
    return undefined;
  }
  const root = html;
  const headerRegex = /^h[1-6]$/i;

  function search(node: HTMLElement) {
    const results = [];
    const nodeIsHTag = headerRegex.test(node.tagName);
    if (keywordMatch(node, keyword) && !nodeIsHTag) {
      results.push(node);
    }
    if (node.nodeType === NodeType.ELEMENT_NODE && !nodeIsHTag) {
      node.childNodes.forEach((childNode: Node) => {
        const result = search(childNode as HTMLElement);
        if (result) {
          results.push(...result);
        }
      });
    }

    return results;
  }

  return search(root);
}

function moveKeywordIntoWrapper(
  wholeText: string,
  keyword: string,
  wrapperElement: HTMLElement
): string | undefined {
  const regex = new RegExp(`\\b(${keyword}(s|'s)?)\\b`, "i");

  const match = wholeText.match(regex);

  if (match) {
    wrapperElement.textContent = match[1];
    return wholeText.replace(regex, wrapperElement.outerHTML);
  } else {
    return undefined;
  }
}

export function highlightNode(node: HTMLElement, keyword: string): HTMLElement | undefined {
  const pointer = findFirstParentWhere(node);
  if (!pointer) {
    return undefined;
  }

  const highlightElement = new HTMLElement("mark", {}, "", undefined, [0, 0]);

  const newAttribute = moveKeywordIntoWrapper(
    pointer.innerHTML,
    keyword,
    highlightElement as HTMLElement
  );

  if (newAttribute) {
    pointer.innerHTML = newAttribute;
  }

  return cloneRoot(pointer);
}

export function removeHighlight(node: Node): HTMLElement | undefined {
  const highlightPointer = findFirstParentWhere(
    node,
    (pointer) =>
      pointer.nodeType === NodeType.ELEMENT_NODE && (pointer as HTMLElement).rawTagName === "mark"
  );
  if (!highlightPointer) {
    return undefined;
  }
  const highlightContainer = highlightPointer.parentNode;
  const highlightChildren = highlightPointer.childNodes;
  highlightContainer.removeChild(highlightPointer);
  highlightContainer.childNodes.push(...highlightChildren);
  return cloneRoot(highlightContainer);
}

export function addLink(node: Node, url: string, keyword: string): HTMLElement | undefined {
  if (!node) {
    return;
  }

  if (node.parentNode?.rawTagName === "a") {
    node.parentNode.setAttribute("href", url);
  } else {
    const anchor = new HTMLElement(
      "a",
      {},
      `href="${url}" target="_blank" rel="nonreferrer" style="text-decoration: underline;"`,
      undefined,
      [0, 0]
    );

    const linkedText: string | undefined = moveKeywordIntoWrapper(
      node.textContent,
      keyword,
      anchor
    );

    if (linkedText) {
      const linkedNode = parse(linkedText);
      node.parentNode.exchangeChild(node, linkedNode);
    }
  }

  return cloneRoot(node);
}

export function removeLink(node: HTMLElement): HTMLElement | undefined {
  const aHrefPointer = findFirstParentWhere(
    node,
    (pointer) =>
      pointer.nodeType === NodeType.ELEMENT_NODE && (pointer as HTMLElement).rawTagName === "a"
  );

  if (!aHrefPointer) {
    return undefined;
  }

  const aHrefContainer = aHrefPointer.parentNode;
  const aHrefChildren = parse(aHrefPointer.innerHTML);
  aHrefContainer.exchangeChild(aHrefPointer, aHrefChildren);

  return cloneRoot(aHrefContainer);
}

export function removeAllHighlights(node: HTMLElement): HTMLElement {
  const root = cloneRoot(node);
  const highlightedNodes = root.querySelectorAll('[style="background-color: #FFFF00"]');
  highlightedNodes.forEach((element) => {
    element.removeAttribute("style");
  });
  return root;
}

export function removeAllLinks(node: HTMLElement): HTMLElement {
  const root = cloneRoot(node);
  const links = root.querySelectorAll("a");
  links.forEach((element) => {
    element.replaceWith(...element.childNodes);
  });
  return root;
}

export function addLinkToFirstKeyword(
  content: string,
  keyword: string,
  url: string,
  overrideExistingAnchor: boolean = true
): HTMLElement | undefined {
  const html = parse(content);
  let occurrences = findKeywordInHTML(html, keyword) || [];

  if (!overrideExistingAnchor) {
    occurrences = occurrences.filter((item) => item.parentNode.rawTagName !== "a");
  }

  const firstKeywordNode = occurrences.at(0);

  if (!firstKeywordNode) {
    return;
  }

  return addLink(firstKeywordNode, url, keyword);
}

export function removeLinkFromFirstKeyword(
  content: string,
  keyword: string,
  url: string
): HTMLElement | undefined {
  const html = parse(content);
  const occurrences = findKeywordInHTML(html, keyword) || [];
  const foundKeywordNode = occurrences.find((item) => item.parentNode.getAttribute("href") === url);

  if (foundKeywordNode) {
    return removeLink(foundKeywordNode);
  }
}

export function insertTag(
  html: HTMLElement,
  insertionPointTag: string,
  newTagContent: string,
  where: InsertPosition
): HTMLElement {
  const root = cloneRoot(html);

  const targetTag = root.querySelector(insertionPointTag);

  targetTag?.insertAdjacentHTML(where, newTagContent);

  return root;
}

/**
 * Cleans HTML string by keeping text-related tags, removing non-textual elements and event handler attributes.
 * @param html - The HTML string to be processed.
 * @returns The cleaned HTML string with text-related tags and without event handlers.
 */
export function keepTextHtmlOnly(html: string): string {
  const textHtmlOnly = sanitize(html, {
    allowedTags: [
      "h1",
      "h2",
      "h3",
      "h4",
      "h5",
      "h6",
      "blockquote",
      "p",
      "a",
      "ul",
      "ol",
      "nl",
      "li",
      "b",
      "i",
      "strong",
      "em",
      "strike",
      "code",
      "hr",
      "br",
      "div",
      "table",
      "thead",
      "caption",
      "tbody",
      "tr",
      "th",
      "td",
      "pre",
      "urlset", // sitemaps
      "url", // sitemaps
      "loc", // sitemaps
    ],

    allowedAttributes: {
      a: ["href", "name", "target", "rel"],
      // Include other attributes as needed
    },

    disallowedTagsMode: "discard",

    transformTags: {
      "*": (tagName, attribs) => {
        // Remove any attributes that start with 'on' (like 'onclick', 'onmouseover', etc.)
        const cleanAttribs = Object.keys(attribs)
          .filter((attr) => !/^on/.test(attr))
          .reduce((clean, attr) => {
            clean[attr] = attribs[attr];
            return clean;
          }, {});

        return {
          tagName: tagName,
          attribs: cleanAttribs,
        };
      },
    },

    allowVulnerableTags: false,
  });
  const noDupeSpaces = textHtmlOnly
    .replace(/ +/g, " ") // replace 2+ spaces with 1 space
    .replace(/\n{4,}/g, "\n") // replace 4+ newlines with 1 newline
    .replace(/^\s+$/gm, "\n") // replace whitespace lines with newline
    .replace(/\t+/g, "\t"); // replace 2 tabs with 1 tab (preserves structure?);
  return noDupeSpaces.trim();
}

export function sanitizeHtml(
  html: string,
  removedAttributes: string[] = [],
  removedTags: string[] = []
): string {
  if (!html) {
    return;
  }

  // Clone the defaults to avoid mutating them directly
  const allowedAttributes: { [key: string]: sanitize.AllowedAttribute[] } = {
    ...sanitize.defaults.allowedAttributes,
  };
  const allowedTags: string[] = [...sanitize.defaults.allowedTags];

  // Filter out the tags that need to be removed
  for (const tag of removedTags) {
    const index = allowedTags.indexOf(tag);
    if (index !== -1) {
      allowedTags.splice(index, 1);
    }
  }

  // Filter out the attributes that need to be removed
  for (const attr of removedAttributes) {
    for (const tag in allowedAttributes) {
      allowedAttributes[tag] = allowedAttributes[tag].filter((a) => a !== attr);
    }
  }

  const cleanHtml = sanitize(html, {
    allowedAttributes,
    allowedTags,
  });

  return cleanHtml.replace(/\s/g, "").toLowerCase();
}

export function compareHTMLStrings(htmlStr1: string, htmlStr2: string): boolean {
  if (!htmlStr1 || !htmlStr2) {
    return false;
  }

  const removedAttributes = ["target"];

  // Remove all whitespace and line breaks and convert to lowercase
  const normalizedHtmlStr1 = sanitizeHtml(htmlStr1, removedAttributes);
  const normalizedHtmlStr2 = sanitizeHtml(htmlStr2, removedAttributes);

  return normalizedHtmlStr1 === normalizedHtmlStr2;
}

export function getAllHtags(root: HTMLElement | string): HTMLElement[] {
  if (typeof root === "string") {
    root = parse(root);
  }
  return root.querySelectorAll("h1, h2, h3, h4, h5, h6");
}

/**
 * Chunks an HTML article into sections based on h2 tags.
 * @param {string} content - The HTML content of the article.
 * @return {Array} An array of section strings.
 */
export function chunkArticleByH2(content) {
  const root = parse(content);
  let sections = [];
  let currentSection = "";

  for (let node of root.childNodes) {
    if (node instanceof HTMLElement) {
      const tagName = node.tagName.toLowerCase();
      if (tagName === "h2") {
        if (currentSection) {
          sections.push(currentSection); // Push the previous section
        }
        currentSection = node.outerHTML; // Start a new section
      } else {
        currentSection += node.outerHTML;
      }
    }
  }
  if (currentSection) {
    sections.push(currentSection); // Push the last section
  }

  return sections;
}

export function sectionHasTwoParagraphs(sectionHtml: string): boolean {
  const root = parse(sectionHtml);
  const paragraphNodes = root.querySelectorAll("p, ul, ol, li");
  return paragraphNodes.length >= 2;
}

/**
 * Checks if at least half of the sections have two or more paragraphs after the most recent h2 tag.
 * @param {string} contentHtml - The HTML content of the article.
 * @return {boolean} True if the condition is met, false otherwise.
 */
export function twoParagraphsAfterMostHtag(contentHtml) {
  const sections = chunkArticleByH2(contentHtml);
  let validSectionCount = 0;

  for (let section of sections) {
    if (sectionHasTwoParagraphs(section)) {
      validSectionCount++;
    }
  }

  console.log(
    `twoParagraphsAfterMostHtag validSectionCount=${validSectionCount} totalSectionCount=${sections.length}`
  );

  return validSectionCount >= sections.length / 2;
}

export function logSectionHeaders(label: string, contentHtml: string): void {
  const headers = getAllHtags(contentHtml).map((item) => item.textContent);
  console.log(`${label} - all headers [${headers.length}]:`, headers);
}

export function makeAhrefsNoFollow(htmlContent: string): string {
  const root = parse(htmlContent);
  const links = root.querySelectorAll("a");
  links.forEach((element) => {
    const relAttribute = element.getAttribute("rel");
    const existingAttributes = new Set(relAttribute ? relAttribute.split(" ") : []);
    existingAttributes.add("nofollow");
    const withNewAttribute = Array.from(existingAttributes).join(" ");
    element.setAttribute("rel", withNewAttribute);
  });
  return root.toString();
}

export function extractSections(htmlContent: string): string[] {
  const root = parse(htmlContent);
  const headers = root.querySelectorAll("h1, h2, h3, h4, h5, h6");
  const sections = [];

  headers.forEach((header) => {
    let nextSibling = header.nextElementSibling;
    let sectionHtml = header.outerHTML;

    while (nextSibling && (!nextSibling.tagName || !nextSibling.tagName.startsWith("H"))) {
      sectionHtml += nextSibling.outerHTML;
      nextSibling = nextSibling.nextElementSibling;
    }

    sections.push(sectionHtml);
  });

  return sections;
}

export function addAttributeToTag(
  html: string,
  tag: string,
  attribute: {
    key: string;
    value: string;
  }
): string {
  const root = parse(html);
  const targetTag = root.querySelectorAll(tag);
  targetTag.forEach((element) => {
    element.setAttribute(attribute.key, attribute.value);
  });

  return root.toString();
}
