import { Graph, alg } from "@dagrejs/graphlib";

import { distance, token_set_ratio } from "fuzzball";
import {
  DuplicateCheck,
  DuplicateIssue,
  StandardCheck,
  UploadDataContainer,
} from "../../../types/quickAnalysis";
import { normalizeString } from "../../../utils/strings";

const APPLY_FUZZBALL_MATCHING_QUADRANT = 0.1125; // threshold [0.05, 0.2]

export const findDuplicateIssuesFuzzy = (
  dataContainer: UploadDataContainer,
  duplicateCheck: DuplicateCheck
): DuplicateIssue[] => {
  const columnIndexes = getColumnIndexes(duplicateCheck);

  console.log({ columnIndexes });
  const rowsAsStrings = dataContainer.data.map((row) => {
    return columnIndexes.map((column) => row[column].value).join(" ");
  });
  const graph = new Graph();
  rowsAsStrings.forEach((_, i: number) => graph.setNode(i.toString()));
  const issues: DuplicateIssue[] = [];
  for (let row = 0; row < rowsAsStrings.length; ++row) {
    if (!rowsAsStrings[row].trim().length) continue;
    const maxEditDistance = duplicateCheck.meta.threshold * rowsAsStrings[row].length;

    for (let otherRow = row + 1; otherRow < rowsAsStrings.length; ++otherRow) {
      const string1 = normalizeString(rowsAsStrings[row]);
      const string2 = normalizeString(rowsAsStrings[otherRow]);
      const calcDist = distance(string1, string2);
      if (calcDist < maxEditDistance) {
        graph.setEdge(row.toString(), otherRow.toString());
      } else if (duplicateCheck.meta.threshold >= APPLY_FUZZBALL_MATCHING_QUADRANT) {
        const secondDistance = token_set_ratio(string1, string2) / 100;
        if (secondDistance > 0.8) graph.setEdge(row.toString(), otherRow.toString());
      }
    }
  }
  let groupId = 1;
  alg.components(graph).forEach((components: string[]) => {
    if (components.length < 2) return;
    components.forEach((row) =>
      issues.push({
        row: parseInt(row),
        columns: columnIndexes,
        type: "duplicate",
        groupId,
        severity: "info",
        comment: "fuzzy_duplicate_comment",
        isIgnored: false,
      })
    );
    groupId++;
  });
  return issues;
};

export const findDuplicateIssuesExact = (
  dataContainer: UploadDataContainer,
  duplicateCheck: DuplicateCheck
): DuplicateIssue[] => {
  const columnIndexes = getColumnIndexes(duplicateCheck);

  const graph = new Graph();
  dataContainer.data.forEach((_: any, i: number) => graph.setNode(i.toString()));
  const duplicateRowMap: { [key: string]: number } = {};

  for (let row = 0; row < dataContainer.data.length; ++row) {
    const rowAsString = columnIndexes
      .map((column) => dataContainer.data[row][column].value)
      .join(" ");
    if (!rowAsString.trim().length) continue;
    if (duplicateRowMap[rowAsString] || duplicateRowMap[rowAsString] === 0)
      graph.setEdge(row.toString(), duplicateRowMap[rowAsString].toString());
    else duplicateRowMap[rowAsString] = row;
  }

  const issues: DuplicateIssue[] = [];
  let groupId = 1;
  alg.components(graph).forEach((components: string[]) => {
    if (components.length < 2) return;

    components.forEach((row) => {
      const originalRow = dataContainer.data[parseInt(row)][0].row;
      issues.push({
        row: originalRow,
        columns: columnIndexes,
        type: "duplicate",
        groupId,
        comment: "exact_duplicate_comment",
        severity: "warning",
        isIgnored: false,
      });
    });
    groupId++;
  });

  return issues;
};

export const getColumnIndexes = (check: StandardCheck): number[] => {
  return check.columns.reduce((columnIndexes, column, index) => {
    if (column.selected) columnIndexes.push(index);
    return columnIndexes;
  }, [] as number[]);
};

export const findDuplicateIssues = (
  dataContainer: UploadDataContainer,
  duplicateCheck: DuplicateCheck
): DuplicateIssue[] => {
  if (!duplicateCheck.enabled) return [];
  if (duplicateCheck.meta.threshold === 0)
    return findDuplicateIssuesExact(dataContainer, duplicateCheck);
  return findDuplicateIssuesFuzzy(dataContainer, duplicateCheck);
};
