import { ColumnDefinition, UploadDataContainer } from "../../../types/fileUploader";
import { IQROutlierTest, RuleCandidateWithoutMeta, StdDevOutlierTest } from "../../../types/rules";
import jerzy from "jerzy";
import { getNumericCellsForColumn } from "../getNumericCells";

export const stdDevOutlierTest: StdDevOutlierTest = {
  testFunctionName: "stdDevOutlier",
  meta: {
    stdDevCutoff: 3,
  },
};
export const iqrCutoffOutlierTest: IQROutlierTest = {
  testFunctionName: "iqrOutlier",
  meta: {
    iqrCutoff: 2,
  },
};

/**
 * Returns false if the distribution is not normal with a confidence above 95% (alpha = 0.05)
 * true otherwise
 * @param numbers
 * @returns
 */
export const isNormallyDistributed = (numbers: number[]): boolean => {
  const v = new jerzy.Vector(numbers);
  const { w, p } = jerzy.Normality.shapiroWilk(v);
  // w = 0.98 is an arbitrary cutoff, the closer to 1 the more normal a distribution is
  // p below 0.05 signifies we are 95+% sure the distribution is not normal
  return w > 0.98 && p > 0.05;
};

export const predictOutlierRules = (
  dataContainer: UploadDataContainer
): RuleCandidateWithoutMeta[] => {
  const rules: RuleCandidateWithoutMeta[] = [];
  dataContainer.columns.forEach((column) => {
    const isNumericColumn = column.dataType === "Integer" || column.dataType === "Double";
    if (!isNumericColumn) return;
    if (column.dataType === "GeoCoordinate") return;
    const numericCells = getNumericCellsForColumn(dataContainer.data, column.index);
    const values = numericCells.map((cell) => cell.value);
    const isNormalDistribution = isNormallyDistributed(values);
    rules.push(getOutlierRule(column, isNormalDistribution, dataContainer.fileName));
  });
  return rules;
};

export const getOutlierRule = (
  column: ColumnDefinition,
  isNormalDistribution: boolean,
  fileName: string
): RuleCandidateWithoutMeta => {
  const distribution = isNormalDistribution ? "stdDev" : "iqr";
  return {
    id: `${fileName}::outlier::${distribution}::${column.index}`,
    columns: [column],
    dimension: "outlier",
    severity: "info",
    confidence: 3,
    isAccepted: true,
    name: "No outlier",
    description: isNormalDistribution
      ? "Detects outlier in normally distributed data"
      : "Detects outlier /w IQR method",
    qualityTest: isNormalDistribution ? stdDevOutlierTest : iqrCutoffOutlierTest,
    fileName,
  };
};
