import { linearRegression, linearRegressionLine, median, quantile } from "simple-statistics";
import { UploadDQCDataType, UploadDataContainer } from "../../../types/fileUploader";
import { QuickAnalysisIssue } from "../../../types/quickAnalysis";
import { RuleCandidate } from "../../../types/rules";
import { addIssueId } from "../../quickAnalysis/qualityChecks/qualityUtils";
import { transpose } from "../../../utils/transpose";
import { getNumericCellsForTwoColumns } from "../getNumericCells";

const IQRS_CONSIDERED_OUTLIER = 2;

export const columnCorrelation = (
  dataContainer: UploadDataContainer,
  rule: RuleCandidate
): QuickAnalysisIssue[] => {
  const issues: QuickAnalysisIssue[] = [];
  const testFunction = rule.qualityTest;
  if (testFunction.testFunctionName !== "columnCorrelation") return issues;
  const { correlation } = testFunction.meta;
  const [column1, column2] = rule.columns;
  const numericCells = getNumericCellsForTwoColumns(
    dataContainer.data,
    column1.index,
    column2.index
  );
  const values1 = numericCells.map(({ cell1 }) => cell1.value);
  const values2 = numericCells.map(({ cell2 }) => cell2.value);
  const data = transpose([values1, values2]);
  const linearRegressionFunction = linearRegressionLine(linearRegression(data));

  const values2Predicted = values1.map(linearRegressionFunction);
  const residuals = values2.map((value2, index) => Math.abs(value2 - values2Predicted[index]));

  const Q3 = quantile(residuals, 0.75);
  const Q1 = quantile(residuals, 0.25);
  const MEDIAN = median(residuals);
  const cutOff = 1 * (IQRS_CONSIDERED_OUTLIER / correlation);
  const IQR = Q3 - Q1;
  const max = MEDIAN + cutOff * IQR;
  const min = MEDIAN - cutOff * IQR;
  // The more correlated columns are the smaller the gap between
  // linear regression model prediction and actual value allowed

  residuals.forEach((residual, rowIndex) => {
    if (isNaN(residual)) return;
    if (residual <= max && residual >= min) return;
    const { cell1, cell2 } = numericCells[rowIndex];
    const issue1 = getIssueForCellIfNumeric(cell1, rule.severity, rule.id);
    const issue2 = getIssueForCellIfNumeric(cell2, rule.severity, rule.id);
    issues.push(issue1, issue2);
  });
  return addIssueId(issues);
};

const getIssueForCellIfNumeric = (
  cell: UploadDQCDataType,
  severity: "warning" | "info",
  rule_id: string
): QuickAnalysisIssue => {
  return {
    row: cell.row,
    column: cell.column,
    type: "outlier",
    comment: "correlation_outlier_comment",
    severity,
    id: "",
    rule_id,
  };
};
