import { mean, standardDeviation } from "simple-statistics";
import { UploadDataContainer, UploadNumberType } from "../../../types/fileUploader";
import { QuickAnalysisIssue } from "../../../types/quickAnalysis";
import { RuleCandidate } from "../../../types/rules";
import { EMPTY_CELL_VALUES } from "../../quickAnalysis/qualityChecks/constants";
import { addIssueId } from "../../quickAnalysis/qualityChecks/qualityUtils";

export const stdDevOutlier = (
  dataContainer: UploadDataContainer,
  rule: RuleCandidate
): QuickAnalysisIssue[] => {
  const issues: QuickAnalysisIssue[] = [];
  if (rule.qualityTest.testFunctionName !== "stdDevOutlier") return issues;
  const column = rule.columns[0].index;
  const values = dataContainer.data.map((row) => row[column]);
  const nonEmptyNumericItems: UploadNumberType[] = [];
  values.forEach((cell) => {
    const isNumber = cell.type === "Integer" || cell.type === "Double";
    const isNonEmptyNumber = !EMPTY_CELL_VALUES.includes(cell.value.toString()) && isNumber;
    isNonEmptyNumber && nonEmptyNumericItems.push(cell);
  });
  const numericValues: number[] = nonEmptyNumericItems.map((cell) => cell.value);
  // If most non-missing numericValues are not ints or doubles don't do statistical analysis
  if (numericValues.length === 0) return issues;
  if (numericValues.length <= values.length / 2) return issues;

  const MEAN = mean(numericValues);
  const SD = standardDeviation(numericValues);
  const cutOff = rule.qualityTest.meta.stdDevCutoff;
  const max = MEAN + cutOff * SD;
  const min = MEAN - cutOff * SD;

  nonEmptyNumericItems.forEach((cell) => {
    const isCellNumeric = cell.type === "Integer" || cell.type === "Double";
    if (!isCellNumeric) return;
    if (cell.value > max || cell.value < min) {
      const severity = rule.severity;
      issues.push({
        row: cell.row,
        column: cell.column,
        type: "outlier",
        comment: severity === "warning" ? "extreme_outlier_comment" : "outlier_comment",
        severity,
        id: "",
        rule_id: rule.id,
      });
    }
  });
  return addIssueId(issues);
};
