export function canonicalizeUrl(inputUrl: string, forceHttps = true): string {
  try {
    const urlWithProtocol =
      inputUrl.startsWith("http://") || inputUrl.startsWith("https://")
        ? inputUrl
        : "https://" + inputUrl;

    const forceHttpsUrl = forceHttps
      ? urlWithProtocol.replace("http://", "https://")
      : urlWithProtocol;

    // Parse the URL.
    const parsedUrl = new URL(forceHttpsUrl);

    // Normalize the protocol to lowercase.
    parsedUrl.protocol = parsedUrl.protocol.toLowerCase();

    // Normalize the hostname to lowercase.
    parsedUrl.hostname = parsedUrl.hostname.toLowerCase();

    // Make sure pathname begins with '/'.
    if (!parsedUrl.pathname.startsWith("/")) {
      parsedUrl.pathname = "/" + parsedUrl.pathname;
    }

    // Remove the default port.
    if (
      (parsedUrl.protocol === "http:" && parsedUrl.port === "80") ||
      (parsedUrl.protocol === "https:" && parsedUrl.port === "443")
    ) {
      parsedUrl.port = "";
    }

    // Remove any trailing '/' from the path.
    if (parsedUrl.pathname !== "/" && parsedUrl.pathname.endsWith("/")) {
      parsedUrl.pathname = parsedUrl.pathname.slice(0, -1);
    }

    // Sort query parameters.
    if (parsedUrl.search) {
      const params: any = new URLSearchParams(parsedUrl.search);
      const keys = [...params.keys()];
      keys.sort();

      parsedUrl.search = keys.reduce((search: string, key: string) => {
        const value = params.get(key);
        return search ? `${search}&${key}=${value}` : `?${key}=${value}`;
      }, "");
    }

    // Construct and return the canonicalized URL.
    return parsedUrl.toString();
  } catch (err) {
    console.error(`Unable to canonicalize URL: ${inputUrl}`);
    return inputUrl;
  }
}

export function getDomain(url: string, includeProtocol: boolean = false): string {
  try {
    // Check if the URL starts with a protocol. If not, prepend it.
    if (!/^https?:\/\//i.test(url)) {
      url = "http://" + url;
    }

    const urlObj = new URL(url);
    return `${includeProtocol ? `${urlObj.protocol}//` : ""}${urlObj.hostname}`;
  } catch (e) {
    console.error("Invalid URL:", e);
    return url;
  }
}

export async function getSitemap(url: string): Promise<string | null> {
  // Parse the URL.
  const baseURL = new URL(canonicalizeUrl(url));

  // Check for robots.txt and find sitemap from it
  const robotsTxtURL = new URL("/robots.txt", baseURL);
  const robotsTxtResponse = await fetch(robotsTxtURL);
  const robotsTxt = await robotsTxtResponse.text();

  const sitemapMatch = robotsTxt.match(/sitemap: (.+)/i);
  if (sitemapMatch && sitemapMatch[1]) {
    return sitemapMatch[1]; // Returns the sitemap URL from robots.txt
  }

  // If not found in robots.txt, try common sitemap paths
  const sitemapURL = new URL("/sitemap.xml", baseURL);
  const sitemapResponse = await fetch(sitemapURL);

  if (sitemapResponse.status === 200) {
    return sitemapURL.href; // Returns the default sitemap URL if it exists
  }

  // If not found, return null or throw an error
  return null;
}

export function isValidUrl(url: string): boolean {
  const urlPattern =
    /^(http(s):\/\/.)[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)$/;
  return urlPattern.test(url);
}

export function appendPath(baseURL: string, additionalPath: string): string {
  return new URL(additionalPath, baseURL).toString();
}
