import math
from scipy.stats import chi2


def chi_square_gof_test(
    observed,
    expected=None,
    probs=None,
    n=None,
    ddof=0,                 # extra parameters estimated from data (e.g., ddof=1 if you estimated 1 parameter)
    alpha=0.05
):
    """
    Chi-square Goodness-of-Fit (GoF) test.

    H0: The data follow the specified categorical distribution.
    Test statistic:
        X^2 = sum_i (O_i - E_i)^2 / E_i
    Degrees of freedom:
        df = k - 1 - ddof
      where k = number of categories, ddof = number of parameters estimated from the data.

    Inputs
    ------
    observed : list/tuple of nonnegative counts O_i
    expected : list/tuple of expected counts E_i (same length as observed), optional
    probs    : list/tuple of category probabilities p_i (same length as observed), optional
    n        : total sample size (required if probs is provided and observed does not already sum to n)
    ddof     : int, number of fitted parameters (reduces df)
    alpha    : significance level

    Provide either:
      - expected, OR
      - probs (then expected counts are computed as E_i = n * p_i)

    Returns
    -------
    dict with statistic, df, p-value, and critical region decision.
    """

    # ---------- Validate observed ----------
    if observed is None:
        raise ValueError("`observed` must be provided.")
    if len(observed) < 2:
        raise ValueError("Need at least 2 categories.")
    if any(o < 0 for o in observed):
        raise ValueError("Observed counts must be nonnegative.")

    k = len(observed)
    obs_sum = sum(observed)

    # ---------- Build expected counts ----------
    if expected is not None and probs is not None:
        raise ValueError("Provide only one of `expected` or `probs`, not both.")

    if expected is not None:
        if len(expected) != k:
            raise ValueError("`expected` must have the same length as `observed`.")
        if any(e <= 0 for e in expected):
            raise ValueError("All expected counts must be > 0.")
        exp = list(expected)

    elif probs is not None:
        if len(probs) != k:
            raise ValueError("`probs` must have the same length as `observed`.")
        if any(p < 0 for p in probs):
            raise ValueError("Probabilities must be nonnegative.")
        p_sum = sum(probs)
        if p_sum <= 0:
            raise ValueError("Sum of probabilities must be > 0.")
        # normalize just in case
        probs = [p / p_sum for p in probs]

        if n is None:
            n = obs_sum
        if n <= 0:
            raise ValueError("`n` must be positive.")
        exp = [n * p for p in probs]

    else:
        raise ValueError("Provide either `expected` or `probs`.")

    # ---------- (Optional) sanity check: totals ----------
    # In GoF, typically sum(expected) == sum(observed) == n
    # If expected provided directly, we won't force equality, but we can warn via return field.
    exp_sum = sum(exp)
    totals_match = math.isclose(exp_sum, obs_sum, rel_tol=1e-9, abs_tol=1e-9)

    # ---------- Compute chi-square statistic ----------
    chi2_obs = 0.0
    for o, e in zip(observed, exp):
        if e <= 0:
            raise ValueError("All expected counts must be > 0.")
        chi2_obs += (o - e) ** 2 / e

    # ---------- Degrees of freedom ----------
    df = k - 1 - ddof
    if df <= 0:
        raise ValueError("Degrees of freedom must be positive. Check k and ddof.")

    # ---------- p-value method ----------
    p_value = 1 - chi2.cdf(chi2_obs, df)
    reject_by_pvalue = p_value < alpha

    # ---------- Critical region method ----------
    chi2_crit = chi2.ppf(1 - alpha, df)
    reject_by_critical = chi2_obs > chi2_crit
    critical_region = f"X^2 > {chi2_crit:.4f}"

    return {
        "inputs": {
            "observed": list(observed),
            "expected": exp,
            "alpha": alpha,
            "ddof": ddof
        },
        "sanity_checks": {
            "sum_observed": obs_sum,
            "sum_expected": exp_sum,
            "totals_match": totals_match
        },
        "statistic": {
            "chi2_obs": chi2_obs,
            "df": df
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "chi2_crit": chi2_crit,
            "reject_H0": reject_by_critical
        }
    }


# Observed counts
observed = [80, 50, 58, 38, 24]

# Hypothesized probabilities
probs = [0.35, 0.23, 0.25, 0.1, 0.07]

# Run Chi-square GoF test
result_probs = chi_square_gof_test(
    observed=observed,
    probs=probs,
    alpha=0.05
)

result_probs

{'inputs': {'observed': [80, 50, 58, 38, 24],
  'expected': [87.5, 57.5, 62.5, 25.0, 17.5],
  'alpha': 0.05,
  'ddof': 0},
 'sanity_checks': {'sum_observed': 250,
  'sum_expected': 250.0,
  'totals_match': True},
 'statistic': {'chi2_obs': 11.119403726708075, 'df': 4},
 'p_value_method': {'p_value': 0.025254353833125798, 'reject_H0': True},
 'critical_region_method': {'critical_region': 'X^2 > 9.4877',
  'chi2_crit': 9.487729036781154,
  'reject_H0': True}}


# Observed counts
observed = [45, 67, 55, 73]

# Hypothesized probabilities
probs = [0.25, 0.25, 0.25, 0.25]

# Run Chi-square GoF test
result_probs = chi_square_gof_test(
    observed=observed,
    probs=probs,
    alpha=0.05
)

result_probs

{'inputs': {'observed': [45, 67, 55, 73],
  'expected': [60.0, 60.0, 60.0, 60.0],
  'alpha': 0.05,
  'ddof': 0},
 'sanity_checks': {'sum_observed': 240,
  'sum_expected': 240.0,
  'totals_match': True},
 'statistic': {'chi2_obs': 7.800000000000001, 'df': 3},
 'p_value_method': {'p_value': 0.050331097859853346, 'reject_H0': False},
 'critical_region_method': {'critical_region': 'X^2 > 7.8147',
  'chi2_crit': 7.814727903251179,
  'reject_H0': False}}


import math
from scipy.stats import chi2


def chi_square_independence_test(
    table,
    alpha=0.05
):
    """
    Chi-square Test of Independence (No Association).

    H0: The two categorical variables are independent.
    H1: The two categorical variables are associated.

    Input
    -----
    table : 2D list or array
        Contingency table of observed counts.
        Shape: (r rows) x (c columns)

    alpha : significance level

    Test statistic:
        X^2 = sum_{i,j} (O_ij - E_ij)^2 / E_ij

    Expected counts:
        E_ij = (row_i_total * column_j_total) / grand_total

    Degrees of freedom:
        df = (r - 1)(c - 1)

    Uses BOTH:
      (1) p-value method
      (2) critical region method
    """

    # ---------- Validate table ----------
    if table is None or len(table) < 2:
        raise ValueError("Table must have at least 2 rows.")

    r = len(table)
    c = len(table[0])

    if c < 2:
        raise ValueError("Table must have at least 2 columns.")

    for row in table:
        if len(row) != c:
            raise ValueError("All rows must have the same number of columns.")
        if any(x < 0 for x in row):
            raise ValueError("Counts must be nonnegative.")

    # ---------- Totals ----------
    row_totals = [sum(row) for row in table]
    col_totals = [sum(table[i][j] for i in range(r)) for j in range(c)]
    grand_total = sum(row_totals)

    if grand_total == 0:
        raise ValueError("Grand total must be positive.")

    # ---------- Expected counts ----------
    expected = [
        [(row_totals[i] * col_totals[j]) / grand_total for j in range(c)]
        for i in range(r)
    ]

    # ---------- Chi-square statistic ----------
    chi2_obs = 0.0
    for i in range(r):
        for j in range(c):
            if expected[i][j] == 0:
                raise ValueError("Expected count is zero — cannot compute χ².")
            chi2_obs += (table[i][j] - expected[i][j]) ** 2 / expected[i][j]

    # ---------- Degrees of freedom ----------
    df = (r - 1) * (c - 1)

    # ---------- p-value method ----------
    p_value = 1 - chi2.cdf(chi2_obs, df)
    reject_by_pvalue = p_value < alpha

    # ---------- Critical region method ----------
    chi2_crit = chi2.ppf(1 - alpha, df)
    reject_by_critical = chi2_obs > chi2_crit
    critical_region = f"X^2 > {chi2_crit:.4f}"

    # ---------- Return results ----------
    return {
        "inputs": {
            "observed_table": table,
            "alpha": alpha
        },
        "expected_counts": expected,
        "statistic": {
            "chi2_obs": chi2_obs,
            "df": df
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "chi2_crit": chi2_crit,
            "reject_H0": reject_by_critical
        }
    }


table_2x4 = [
    [241, 198, 164, 215],
    [20, 25, 27, 44]
]

result_2x4 = chi_square_independence_test(
    table=table_2x4,
    alpha=0.01
)

result_2x4

# see: https://jcbuitrago.com/wp-content/uploads/2026/02/Screenshot-2026-02-01-at-13.25.44.png

{'inputs': {'observed_table': [[241, 198, 164, 215], [20, 25, 27, 44]],
  'alpha': 0.01},
 'expected_counts': [[228.5845824411135,
   195.30406852248393,
   167.27837259100642,
   226.83297644539616],
  [32.41541755888651,
   27.69593147751606,
   23.721627408993577,
   32.16702355460385]],
 'statistic': {'chi2_obs': 11.216688008237018, 'df': 3},
 'p_value_method': {'p_value': 0.01061005135825377, 'reject_H0': False},
 'critical_region_method': {'critical_region': 'X^2 > 11.3449',
  'chi2_crit': 11.344866730144373,
  'reject_H0': False}}


# 2×3 contingency table (Observed counts)
# Example: Treatment (Yes/No) vs Outcome (Success/Failure)
table_2x3 = [
    [21, 25, 19],
    [46, 39, 10]
]

result_2x3 = chi_square_independence_test(
    table=table_2x3,
    alpha=0.05
)

result_2x3

{'inputs': {'observed_table': [[21, 25, 19], [46, 39, 10]], 'alpha': 0.05},
 'expected_counts': [[27.21875, 26.0, 11.78125], [39.78125, 38.0, 17.21875]],
 'statistic': {'chi2_obs': 9.907263903850843, 'df': 2},
 'p_value_method': {'p_value': 0.007057728990733203, 'reject_H0': True},
 'critical_region_method': {'critical_region': 'X^2 > 5.9915',
  'chi2_crit': 5.991464547107979,
  'reject_H0': True}}

ASD	None	Less than 2 months	2 to 6 months	Over 6 months	Total
Yes	241	198	164	215	818
No	20	25	27	44	116
Total	261	223	191	259	934

Dental Insurance	Small	Medium	Large
Yes	21	25	19
No	46	39	10

Seminar 4

Chi-Square Tests for Categorical Data

Hypothesis Testing: Goodness-of-Fit and Test of Independence (No Association)¶

1. Categorical data and frequency tables¶

1.1 Categorical variable¶

1.2 Observed counts¶

2. Hypothesis testing recap¶

2.1 Significance level, p-value¶

3. Why chi-square tests work (core theory)¶

3.1 Asymptotic chi-square distribution (informal but essential)¶

3.2 Chi-square distribution definition¶

4. Chi-square Goodness-of-Fit (GoF) test

4.1 Goal¶

4.2 Setup (multinomial model)¶

4.3 Hypotheses¶

4.4 Expected counts¶

4.5 Test statistic¶

4.6 Degrees of freedom (GoF)¶

4.7 Decision rule and p-value¶

4.8 Typical example: fair die¶

5. Chi-square Test of Independence (No Association)

5.1 Goal¶

5.2 Contingency table¶

5.3 Hypotheses¶

5.4 Expected counts under independence¶

5.5 Test statistic¶

5.6 Degrees of freedom (independence test)¶

5.7 p-value¶

6. Assumptions and practical rules¶

6.1 Independence of observations¶

6.2 Expected counts should not be too small¶

7. Relationship to other chi-square tests (for context)¶

7.1 Test of homogeneity¶

9. Summary¶

9.1 Goodness-of-Fit (GoF)¶

9.2 Independence (No Association)¶

10. What chi-square tests do not tell you¶

Example: Chi-square Goodness-of-Fit (GoF) test

GoF test with probabilities given¶

Example 2

Problem: Chi-Square Goodness-of-Fit Test (Shelf Placement Preference)¶

Example 3 : Chi-square Test of Independence (No Association)

Problem: Chi-Square Test of Independence (ASD and Breastfeeding)¶

Example with a larger table (3×4)¶

Example 4

Problem: Chi-Square Test of Independence (Dental Insurance and Company Size)¶