import { bigram } from './n-gram';

/**
 * Inspiration from dice-coefficient package
 * Recreated because it's ESM only
 * @see https://github.com/words/dice-coefficient/blob/main/index.js
 */
export function sorensenDiceCoefficient(str1: string, str2: string): number {
  const left = bigram(str1.toLowerCase());
  const right = bigram(str2.toLowerCase());

  if (left.length === 0 && right.length === 0) {
    if (str1.toLowerCase() === str2.toLowerCase()) return 1;
    return 0;
  }

  let index = -1;
  let intersections = 0;

  while (++index < left.length) {
    const leftPair = left[index];
    let offset = -1;

    while (++offset < right.length) {
      const rightPair = right[offset];

      if (leftPair === rightPair) {
        intersections++;

        // Make sure this pair never matches again.
        right[offset] = '';
        break;
      }
    }
  }

  return (2 * intersections) / (left.length + right.length);
}

/**
 * Compute best dice-coefficient from string arrays
 */
export function maxSorensenDiceCoefficient(
  arr1: string[],
  arr2: string[],
): number {
  const distances: number[] = [];
  arr1.forEach((str1) => {
    arr2.forEach((str2) => {
      distances.push(sorensenDiceCoefficient(str1, str2));
    });
  });
  return Math.max(...distances);
}
