import { deburr } from 'lodash';

type NormalizeOptions = {
  /**
   * Replace consecutive whitespace with only one
   */
  singleWhiteSpace?: boolean;
  /**
   * Remove all whitespace
   */
  removeWhiteSpace?: boolean;
  /**
   * list of word to remove from normalized string
   */
  stopWords?: string[];
  /**
   * Min length for the input without stop word to remove stop word
   */
  minLengthStopWordRemoval?: number;
  /**
   * characters to remove from normalized string
   */
  removeCharacters?: string;
};

/**
 * Prepare a regex by escaping necessary characters
 */
function escapeRegExp(source: string) {
  return source.replaceAll(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
}

/**
 * Deburr, lowercase and trim the source string
 * @param source (support for null and undefined)
 * @param options
 * @returns a normalized string
 */
export function normalize(
  source: string | null | undefined,
  {
    singleWhiteSpace = false,
    removeWhiteSpace = false,
    stopWords = undefined,
    minLengthStopWordRemoval = 0,
    removeCharacters = undefined,
  }: NormalizeOptions = {},
): string {
  /**
   * @NOTE: deburr is not removing some vietnamese diacritics.
   * In combination with String.prototype.normalize with
   * "Compatibility Decomposition" mode (NKFD) seems to do the trick.
   *
   * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
   */
  let normalizedStr = deburr(source?.normalize('NFKD')).toLowerCase().trim();

  if (removeCharacters) {
    const escapedCharacters = removeCharacters.split('').map(escapeRegExp);
    const regexp = new RegExp(`[${escapedCharacters.join('')}]`, 'gi');

    normalizedStr = normalizedStr.replace(regexp, '');
  }

  if (stopWords && stopWords.length > 0) {
    const normalizedTmp = stopWords.reduce((str, stopWord) => {
      const escapedStopWords = escapeRegExp(deburr(stopWord));
      // skip the stop word because it's in a non-capturing group
      const regexp = new RegExp(`(^|\\s)(?:${escapedStopWords})(\\s|$)`, 'gi');
      return str.replaceAll(regexp, '$1$2');
    }, normalizedStr);

    // Don't remove stop words if the string is empty or less than minLengthStopWordRemoval characters without them
    if (normalizedTmp.trim().length > minLengthStopWordRemoval)
      normalizedStr = normalizedTmp;
  }

  if (singleWhiteSpace && !removeWhiteSpace) {
    normalizedStr = normalizedStr.replace(/\s+/g, ' ');
  }

  if (removeWhiteSpace) {
    normalizedStr = normalizedStr.replace(/\s/g, '');
  }

  return normalizedStr.trim();
}
