import { ColumnDefinition, UploadDataContainer } from "../../../types/fileUploader";
import { RuleCandidateWithoutMeta } from "../../../types/rules";
import { getAllPossiblePairs } from "../../../utils/combinatorics";
import { sampleDeterministically } from "../../../utils/sampling";

const SAMPLE_SIZE = 30;
const MAX_MISMATCH_COUNT = 3;

export const predictColumnMatching = (
  dataContainer: UploadDataContainer
): RuleCandidateWithoutMeta[] => {
  const { data, columns } = dataContainer;
  const notEmptyColumns = columns.filter((column) => column.dataType !== "Empty");
  const ruleCandidates: RuleCandidateWithoutMeta[] = [];
  const sampledRows = sampleDeterministically(data, SAMPLE_SIZE);
  const maxMismatchCutoff = Math.min(MAX_MISMATCH_COUNT, sampledRows.length / 10);
  getAllPossiblePairs(notEmptyColumns).forEach(({ item1: column1, item2: column2 }) => {
    let valueMismatchCount = 0;
    for (let row of sampledRows) {
      if (row[column1.index].value !== row[column2.index].value) valueMismatchCount++;
      if (valueMismatchCount > maxMismatchCutoff) return;
    }
    ruleCandidates.push(getColumnMatchingRule(column1, column2, dataContainer));
  });
  return ruleCandidates;
};

export const getColumnMatchingRule = (
  column1: ColumnDefinition,
  column2: ColumnDefinition,
  dataContainer: UploadDataContainer
): RuleCandidateWithoutMeta => {
  return {
    id: `${dataContainer.fileName}::inconsistent::column_matching::${column1.index}:${column2.index}`,
    columns: [column1, column2],
    dimension: "inconsistent",
    severity: "warning",
    confidence: 4,
    isAccepted: true,
    name: "Cross column matching",
    description: `Values in ${column1.name} must equal values in ${column2.name}`,
    qualityTest: {
      testFunctionName: "columnMatching",
      meta: {},
    },
    fileName: dataContainer.fileName,
  };
};
