import math
from scipy.stats import f


def one_way_anova(
    groups,
    alpha=0.05
):
    """
    One-way ANOVA.

    H0: All group means are equal
        μ1 = μ2 = ... = μk
    H1: At least one group mean differs

    Input
    -----
    groups : list of lists (or arrays)
        groups[i] contains observations from group i
        Need at least 2 groups, each with at least 2 observations

    alpha : significance level

    Test statistic:
        F = MS_between / MS_within

    Degrees of freedom:
        df_between = k - 1
        df_within  = N - k

    Uses BOTH:
      (1) p-value method
      (2) critical region method
    """

    # ---------- Validate input ----------
    if groups is None or len(groups) < 2:
        raise ValueError("At least two groups are required.")

    k = len(groups)
    n_i = []
    means = []

    for g in groups:
        if len(g) < 2:
            raise ValueError("Each group must have at least 2 observations.")
        n_i.append(len(g))
        means.append(sum(g) / len(g))

    N = sum(n_i)

    # ---------- Grand mean ----------
    grand_mean = sum(
        means[i] * n_i[i] for i in range(k)
    ) / N

    # ---------- Sum of Squares ----------
    # Between groups
    ss_between = sum(
        n_i[i] * (means[i] - grand_mean) ** 2
        for i in range(k)
    )

    # Within groups
    ss_within = 0.0
    for i in range(k):
        ss_within += sum(
            (x - means[i]) ** 2 for x in groups[i]
        )

    # Total (optional check)
    ss_total = ss_between + ss_within

    # ---------- Degrees of freedom ----------
    df_between = k - 1
    df_within = N - k

    if df_within <= 0:
        raise ValueError("Not enough data to compute within-group variance.")

    # ---------- Mean Squares ----------
    ms_between = ss_between / df_between
    ms_within = ss_within / df_within

    # ---------- F statistic ----------
    F_obs = ms_between / ms_within

    # ---------- p-value method ----------
    p_value = 1 - f.cdf(F_obs, df_between, df_within)
    reject_by_pvalue = p_value < alpha

    # ---------- Critical region method ----------
    F_crit = f.ppf(1 - alpha, df_between, df_within)
    reject_by_critical = F_obs > F_crit
    critical_region = f"F > {F_crit:.4f}"

    # ---------- Return results ----------
    return {
        "inputs": {
            "k": k,
            "group_sizes": n_i,
            "alpha": alpha
        },
        "means": {
            "group_means": means,
            "grand_mean": grand_mean
        },
        "anova_table": {
            "SS_between": ss_between,
            "df_between": df_between,
            "MS_between": ms_between,
            "SS_within": ss_within,
            "df_within": df_within,
            "MS_within": ms_within,
            "SS_total": ss_total,
            "df_total": N - 1
        },
        "statistic": {
            "F_obs": F_obs
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "F_crit": F_crit,
            "reject_H0": reject_by_critical
        }
    }


# Real-ish example data: exam scores from 3 teaching methods
group_A = [28, 32, 36, 42, 50, 33, 38]
group_B = [44, 61, 52, 54, 62, 45, 46]
group_C = [54, 56, 55, 65, 52, 50, 46]

groups = [group_A, group_B, group_C]

anova_res = one_way_anova(groups=groups, alpha=0.01)
anova_res

{'inputs': {'k': 3, 'group_sizes': [7, 7, 7], 'alpha': 0.01},
 'means': {'group_means': [37.0, 52.0, 54.0],
  'grand_mean': 47.666666666666664},
 'anova_table': {'SS_between': 1208.6666666666667,
  'df_between': 2,
  'MS_between': 604.3333333333334,
  'SS_within': 862.0,
  'df_within': 18,
  'MS_within': 47.888888888888886,
  'SS_total': 2070.666666666667,
  'df_total': 20},
 'statistic': {'F_obs': 12.619489559164736},
 'p_value_method': {'p_value': 0.00037546863291471055, 'reject_H0': True},
 'critical_region_method': {'critical_region': 'F > 6.0129',
  'F_crit': 6.012904834800529,
  'reject_H0': True}}


import matplotlib.pyplot as plt

plt.figure(figsize=(8, 4.5))
plt.boxplot(groups, labels=["Group A", "Group B", "Group C"])
plt.title("One-way ANOVA data: boxplots by group")
plt.xlabel("Group")
plt.ylabel("Value")
plt.grid(True, alpha=0.3)
plt.show()


import numpy as np
import matplotlib.pyplot as plt

# We assume these already exist from previous cells:
# group_A, group_B, group_C

groups = {
    "A": group_A,
    "B": group_B,
    "C": group_C
}

rng = np.random.default_rng(0)   # reproducible jitter
jitter = 0.08                    # horizontal spread

plt.figure(figsize=(8, 4.5))

labels = list(groups.keys())
x_pos = np.arange(len(labels))

for i, label in enumerate(labels):
    y = np.array(groups[label], dtype=float)
    x = i + rng.uniform(-jitter, jitter, size=len(y))

    # individual observations
    plt.scatter(x, y, s=35, marker="s", alpha=0.9)

    # group mean
    plt.scatter(i, y.mean(), s=120, marker="o", zorder=3)

plt.xticks(x_pos, labels)
plt.ylabel("Response")
plt.title("Almost surely different")
plt.grid(True, alpha=0.25)
plt.tight_layout()
plt.show()


import numpy as np
from scipy.stats import f

df1 = anova_res["anova_table"]["df_between"]
df2 = anova_res["anova_table"]["df_within"]
alpha = anova_res["inputs"]["alpha"]
F_obs = anova_res["statistic"]["F_obs"]

F_crit = f.ppf(1 - alpha, df1, df2)

# x-grid (go far enough to include F_obs and the critical value)
x_max = max(F_obs, F_crit) + 3
x = np.linspace(0, x_max, 1200)
y = f.pdf(x, df1, df2)

plt.figure(figsize=(9, 4.5))
plt.plot(x, y, label=f"F density (df1={df1}, df2={df2})")

# Shade critical region (right tail)
mask = x >= F_crit
plt.fill_between(x[mask], y[mask], alpha=0.3, label="Critical region")

# Mark critical value and observed statistic
plt.axvline(F_crit, linestyle="--", label=f"F_crit = {F_crit:.3f}")
plt.axvline(F_obs, linestyle="-.", label=f"F_obs = {F_obs:.3f}")

plt.title(f"F-test in one-way ANOVA (reject H0 = {F_obs > F_crit})")
plt.xlabel("F")
plt.ylabel("density")
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()


import math
import itertools
import numpy as np
import pandas as pd
from scipy.stats import t, studentized_range


def _anova_mse_and_df_within(groups):
    """
    Compute MSE (= MSW) and df_within from raw groups.
    groups: list of arrays/lists
    """
    k = len(groups)
    n_i = [len(g) for g in groups]
    if k < 2 or any(n < 2 for n in n_i):
        raise ValueError("Need at least 2 groups and each group must have n>=2.")
    N = sum(n_i)

    means = [sum(g) / len(g) for g in groups]
    ss_within = 0.0
    for i, g in enumerate(groups):
        ss_within += sum((x - means[i]) ** 2 for x in g)

    df_within = N - k
    mse = ss_within / df_within
    return mse, df_within, means, n_i


def bonferroni_posthoc(groups, labels=None, alpha=0.05):
    """
    Bonferroni post hoc for all pairwise mean comparisons after one-way ANOVA.
    Uses ANOVA MSE (= MSW) as common variance estimator.
    """
    if labels is None:
        labels = [f"G{i+1}" for i in range(len(groups))]
    if len(labels) != len(groups):
        raise ValueError("labels must have same length as groups.")

    mse, dfw, means, n_i = _anova_mse_and_df_within(groups)
    k = len(groups)
    m = k * (k - 1) // 2  # number of pairwise comparisons
    alpha_bonf = alpha / m

    rows = []
    for i, j in itertools.combinations(range(k), 2):
        diff = means[i] - means[j]
        se_ij = math.sqrt(mse * (1 / n_i[i] + 1 / n_i[j]))
        t_obs = diff / se_ij

        # two-sided p-value for t
        p_raw = 2 * (1 - t.cdf(abs(t_obs), dfw))
        p_adj = min(m * p_raw, 1.0)

        reject_adj = p_adj < alpha  # equivalent to p_raw < alpha/m
        reject_alpha_div_m = p_raw < alpha_bonf

        rows.append({
            "pair": f"{labels[i]} - {labels[j]}",
            "mean_diff": diff,
            "t_obs": t_obs,
            "df": dfw,
            "p_raw": p_raw,
            "p_adj_bonf": p_adj,
            "alpha/m": alpha_bonf,
            "reject (p_adj<alpha)": reject_adj,
            "reject (p_raw<alpha/m)": reject_alpha_div_m
        })

    df = pd.DataFrame(rows)
    return {
        "method": "Bonferroni (pairwise t using ANOVA MSE)",
        "alpha": alpha,
        "m": m,
        "alpha_bonf": alpha_bonf,
        "MSE": mse,
        "df_within": dfw,
        "table": df.sort_values("p_adj_bonf").reset_index(drop=True)
    }


def tukey_hsd_posthoc(groups, labels=None, alpha=0.05):
    """
    Tukey HSD (Tukey–Kramer for possibly unequal n) for all pairwise comparisons.
    Uses studentized range distribution.

    Reject H0 for pair (i,j) if:
        |xbar_i - xbar_j| > q_{1-alpha,k,df} * sqrt(MSE/2 * (1/ni + 1/nj))
    """
    if labels is None:
        labels = [f"G{i+1}" for i in range(len(groups))]
    if len(labels) != len(groups):
        raise ValueError("labels must have same length as groups.")

    mse, dfw, means, n_i = _anova_mse_and_df_within(groups)
    k = len(groups)

    # Tukey critical value for studentized range
    q_crit = studentized_range.ppf(1 - alpha, k, dfw)

    rows = []
    for i, j in itertools.combinations(range(k), 2):
        diff = means[i] - means[j]
        se_ij = math.sqrt(mse / 2 * (1 / n_i[i] + 1 / n_i[j]))
        q_obs = abs(diff) / se_ij

        # Tukey p-value (right tail)
        p_value = 1 - studentized_range.cdf(q_obs, k, dfw)

        # Tukey-Kramer HSD threshold
        hsd = q_crit * se_ij
        reject = abs(diff) > hsd

        rows.append({
            "pair": f"{labels[i]} - {labels[j]}",
            "mean_diff": diff,
            "q_obs": q_obs,
            "df_within": dfw,
            "k": k,
            "q_crit": q_crit,
            "HSD": hsd,
            "p_value": p_value,
            "reject_H0": reject
        })

    df = pd.DataFrame(rows)
    return {
        "method": "Tukey HSD (Tukey–Kramer)",
        "alpha": alpha,
        "k": k,
        "MSE": mse,
        "df_within": dfw,
        "q_crit": q_crit,
        "table": df.sort_values("p_value").reset_index(drop=True)
    }


# --- EXECUTION CELL (uses data from previous ANOVA cells) ---
# Assumes you already have: group_A, group_B, group_C defined above

import pandas as pd
from scipy.stats import t, studentized_range

groups = [group_A, group_B, group_C]
labels = ["A", "B", "C"]

alpha = 0.01

tukey_res = tukey_hsd_posthoc(groups=groups, labels=labels, alpha=alpha)
bonf_res  = bonferroni_posthoc(groups=groups, labels=labels, alpha=alpha)

# -------- Add global critical values (nice for hand-checking) --------
k = tukey_res["k"]
dfw = tukey_res["df_within"]
m = bonf_res["m"]

alpha_bonf = bonf_res["alpha_bonf"]          # = alpha / m
q_crit = tukey_res["q_crit"]                 # Tukey critical q
t_crit_bonf = t.ppf(1 - alpha_bonf/2, dfw)   # Bonferroni critical t (two-sided)

print("=== GLOBAL CRITICAL VALUES ===")
print(f"alpha = {alpha}")
print(f"k = {k}, df_within = {dfw}, number of pairs m = {m}")
print(f"Bonferroni alpha/m = {alpha_bonf:.8f}")
print(f"Bonferroni t_crit (two-sided) = {t_crit_bonf:.6f}")
print(f"Tukey q_crit = {q_crit:.6f}")

# -------- Add per-pair critical thresholds into the tables --------
# Tukey per-pair HSD threshold is already in column 'HSD'
# Add a clearer column name too:
tukey_table = tukey_res["table"].copy()
tukey_table["critical_rule"] = "Reject if |mean_diff| > HSD"

# Bonferroni: add per-pair half-width threshold: t_crit * SE_ij
bonf_table = bonf_res["table"].copy()
bonf_table["t_crit_bonf"] = t_crit_bonf
bonf_table["crit_abs_diff"] = t_crit_bonf * (
    # SE_ij = |mean_diff| / |t_obs|  (stable unless t_obs=0)
    (bonf_table["mean_diff"].abs() / bonf_table["t_obs"].abs()).replace([float("inf")], 0.0)
)
bonf_table["critical_rule"] = "Reject if |mean_diff| > t_crit * SE"

# Display tables
display(tukey_table.style.format({
    "mean_diff": "{:.4f}",
    "q_obs": "{:.4f}",
    "q_crit": "{:.4f}",
    "HSD": "{:.4f}",
    "p_value": "{:.6g}"
}))

display(bonf_table.style.format({
    "mean_diff": "{:.4f}",
    "t_obs": "{:.4f}",
    "p_raw": "{:.6g}",
    "p_adj_bonf": "{:.6g}",
    "alpha/m": "{:.6g}",
    "t_crit_bonf": "{:.4f}",
    "crit_abs_diff": "{:.4f}"
}))

=== GLOBAL CRITICAL VALUES ===
alpha = 0.01
k = 3, df_within = 18, number of pairs m = 3
Bonferroni alpha/m = 0.00333333
Bonferroni t_crit (two-sided) = 3.380362
Tukey q_crit = 4.703370

Source	Sum of Squares	df	Mean Square	F
Between groups	SSB	k − 1	MSB = SSB/(k − 1)	MSB/MSW
Within groups	SSW	N − k	MSW = SSW/(N − k)
Total	SST	N − 1

Rank	Ages
Assistant Professor	28, 32, 36, 42, 50, 33, 38
Associate Professor	44, 61, 52, 54, 62, 45, 46
Professor	54, 56, 55, 65, 52, 50, 46

Method	Controls FWER	Assumptions	Notes
Bonferroni	Yes	Minimal	Conservative
Holm–Bonferroni	Yes	Minimal	Less conservative
Tukey HSD	Yes	Equal variances	Most common after ANOVA
Scheffé	Yes	Very general	Very conservative
Fisher LSD	No	Equal variances	Only valid if ANOVA significant

Aspect	Bonferroni	Tukey
Power	Lower	Higher
FWER control	Guaranteed	Guaranteed
Assumes equal variances	No	Yes
Uses ANOVA MSE	Optional	Yes
Typical use	General	Standard ANOVA

Aspect	Tukey HSD	Bonferroni
Designed for pairwise means	Yes	No (general)
Uses ANOVA MSE	Yes	Optional
Equal variance assumption	Yes	No
Power	Higher	Lower
FWER control	Exact	Upper bound
Conservativeness	Moderate	Often very conservative

	pair	mean_diff	q_obs	df_within	k	q_crit	HSD	p_value	reject_H0	critical_rule
0	A - C	-17.0000	6.4995	18	3	4.7034	12.3021	0.000625717	True	Reject if \|mean_diff\| > HSD
1	A - B	-15.0000	5.7349	18	3	4.7034	12.3021	0.00204056	True	Reject if \|mean_diff\| > HSD
2	B - C	-2.0000	0.7646	18	3	4.7034	12.3021	0.852447	False	Reject if \|mean_diff\| > HSD

	pair	mean_diff	t_obs	df	p_raw	p_adj_bonf	alpha/m	reject (p_adj	reject (p_raw	t_crit_bonf	crit_abs_diff	critical_rule
0	A - C	-17.0000	-4.5958	18	0.000224288	0.000672863	0.00333333	True	True	3.3804	12.5039	Reject if \|mean_diff\| > t_crit * SE
1	A - B	-15.0000	-4.0552	18	0.000742764	0.00222829	0.00333333	True	True	3.3804	12.5039	Reject if \|mean_diff\| > t_crit * SE
2	B - C	-2.0000	-0.5407	18	0.595351	1	0.00333333	False	False	3.3804	12.5039	Reject if \|mean_diff\| > t_crit * SE

Comparison	Mean diff	SE	$t$	Raw p-value	Bonferroni p-value	Reject at $\alpha=0.05$ (Bonferroni)?
System 2 − System 1	7	2.422	2.890	0.01358	0.04075	Yes
System 3 − System 1	4	2.422	1.651	0.12459	0.37378	No
System 2 − System 3	3	2.422	1.238	0.23923	0.71770	No

Seminar 5

Analysis of Variance (ANOVA)

One-Way ANOVA: Theory, Assumptions, and Interpretation¶

1. Motivation¶

2. Statistical Question¶

3. One-Way ANOVA Model¶

3.1 Data structure¶

3.2 Model assumption¶

4. Hypotheses¶

Null hypothesis¶

Alternative hypothesis¶

5. Key Idea Behind ANOVA¶

6. Sample Means and Grand Mean¶

7. Decomposition of Sums of Squares (Core Theory)¶

7.1 Total Sum of Squares (SST)¶

7.2 Between-Group Sum of Squares (SSB)¶

7.3 Within-Group Sum of Squares (SSW)¶

7.4 Fundamental identity¶

8. Degrees of Freedom¶

Total¶

Between groups¶

Within groups¶

9. Mean Squares¶

Mean square between¶

Mean square within¶

10. The F Statistic¶

11. Why the F Distribution Appears¶

12. Decision Rule¶

13. ANOVA Table¶

ANOVA Table (One-Way ANOVA)

</table>¶

14. Assumptions of One-Way ANOVA¶

15. What ANOVA Does and Does Not Do¶

ANOVA tests:¶

ANOVA does not:¶

16. Relationship to t-Test¶

17. Practical Remarks¶

18. Summary¶

Graphical intuition for ANOVA¶

Graphical intuition for ANOVA

Implementing One-way ANOVA¶

Problem: One-Way ANOVA (Faculty Ages by Rank)¶

Post Hoc Tests After One-Way ANOVA

Why Post Hoc Tests Are Needed¶

The Multiple Comparisons Problem¶

Why We Should NOT Use Multiple Two-Sample t-Tests¶

Inflation of Type I Error¶

What Happens With Multiple Comparisons?¶

Concrete Examples¶

Example 1: Two Comparisons¶

Example 2: Five Groups¶

Interpretation¶

Why ANOVA Fixes This¶

Key Takeaway (Exam-Ready Sentence)¶

Types of Post Hoc Tests (Big Picture)¶

Bonferroni Correction (Core Idea)¶

Bonferroni Post Hoc Test (Step by Step)

Step 1: Form pairwise hypotheses¶

Step 2: Compute test statistics¶

$t_{ij}¶

Degrees of Freedom¶

Bonferroni Adjustment¶

Decision Rule (Two-Sided)¶

Equivalent p-Value Formulation¶

Interpretation¶

Properties of Bonferroni¶

Advantages¶

Disadvantages¶

When to Use Bonferroni¶

Comparison with Tukey HSD¶

Summary (Exam-Ready)¶

Tukey’s HSD (Honestly Significant Difference) Test

Context: Post Hoc Testing After ANOVA¶

What Tukey’s HSD Tests¶

Key Idea Behind Tukey’s HSD¶

Assumptions of Tukey’s HSD¶

Test Statistic¶

Tukey HSD Test Statistic¶

Studentized Range Distribution¶

Family-Wise Error Control¶