# Mann–Whitney U test (ties handled correctly):
# - ALWAYS uses mean ranks for ties
# - If there are NO ties: uses true exact (U-table equivalent) critical values + exact p-value
# - If there ARE ties: U-table critical values are NOT valid, so we use a permutation null
#   (with mean ranks) to get:
#     * permutation p-value (Monte Carlo by default)
#     * permutation critical values (table-like), and decision
# - Also reports normal approximation with tie correction (+ optional continuity correction)

import math
import numpy as np
from scipy.stats import rankdata, norm


# ---------- Exact distribution WITHOUT ties (U-table equivalent) ----------

def _u_distribution_counts_no_ties(n1: int, n2: int):
    """
    Exact distribution of U (for sample 1) under H0 for given (n1,n2),
    assuming continuous data (no ties), i.e., classic U-table setting.

    Returns counts_u for U=0..n1*n2 and total = C(n1+n2, n1)
    """
    N = n1 + n2
    max_w = n1 * (2 * N - n1 + 1) // 2  # max rank-sum W

    dp = [np.zeros(max_w + 1, dtype=np.int64) for _ in range(n1 + 1)]
    dp[0][0] = 1

    for r in range(1, N + 1):
        kmax = min(n1, r)
        for k in range(kmax, 0, -1):
            dp[k][r:max_w + 1] += dp[k - 1][0:max_w + 1 - r]

    counts_w = dp[n1]
    total = int(math.comb(N, n1))

    shift = n1 * (n1 + 1) // 2
    max_u = n1 * n2
    counts_u = counts_w[shift:shift + max_u + 1].copy()

    return counts_u, total


def _exact_p_and_crit_no_ties(U_obs_int: int, n1: int, n2: int, alpha=0.05, alternative="two-sided"):
    counts_u, total = _u_distribution_counts_no_ties(n1, n2)
    probs = counts_u / total
    max_u = n1 * n2

    cdf = np.cumsum(probs)
    sf = np.cumsum(probs[::-1])[::-1]  # P(U >= u)

    if alternative == "less":
        p = float(cdf[U_obs_int])
        crit_low = int(np.max(np.where(cdf <= alpha)[0])) if np.any(cdf <= alpha) else -1
        return p, (crit_low, None)

    if alternative == "greater":
        p = float(sf[U_obs_int])
        crit_high = int(np.min(np.where(sf <= alpha)[0])) if np.any(sf <= alpha) else max_u + 1
        return p, (None, crit_high)

    if alternative == "two-sided":
        a2 = alpha / 2.0
        crit_low = int(np.max(np.where(cdf <= a2)[0])) if np.any(cdf <= a2) else -1
        crit_high = int(np.min(np.where(sf <= a2)[0])) if np.any(sf <= a2) else max_u + 1
        p_one = min(cdf[U_obs_int], sf[U_obs_int])
        p = min(1.0, 2.0 * float(p_one))
        return float(p), (crit_low, crit_high)

    raise ValueError("alternative must be one of: 'two-sided', 'less', 'greater'")


# ---------- Approximation (normal) with tie correction ----------

def _normal_approx(U1: float, pooled: np.ndarray, alternative="two-sided", continuity=True, n1=None, n2=None):
    if n1 is None or n2 is None:
        raise ValueError("n1 and n2 must be provided")

    N = n1 + n2

    # tie correction factor: 1 - sum(t^3 - t)/(N^3 - N)
    _, counts = np.unique(pooled, return_counts=True)
    tie_term = np.sum(counts**3 - counts)
    tie_correction = 1.0 - tie_term / (N**3 - N) if N > 1 else 1.0

    mu = n1 * n2 / 2.0
    var = (n1 * n2 * (N + 1) / 12.0) * tie_correction
    sigma = math.sqrt(var) if var > 0 else 0.0

    if sigma == 0:
        return 0.0, 1.0, mu, sigma, tie_correction

    cc = 0.0
    if continuity:
        if alternative == "less":
            cc = -0.5
        elif alternative == "greater":
            cc = +0.5
        else:
            cc = 0.5 * np.sign(U1 - mu)

    z = (U1 - mu - cc) / sigma

    if alternative == "less":
        p = norm.cdf(z)
    elif alternative == "greater":
        p = 1.0 - norm.cdf(z)
    elif alternative == "two-sided":
        p = 2.0 * min(norm.cdf(z), 1.0 - norm.cdf(z))
    else:
        raise ValueError("alternative must be one of: 'two-sided', 'less', 'greater'")

    return float(z), float(p), float(mu), float(sigma), float(tie_correction)


def _approx_critical(mu, sigma, alpha=0.05, alternative="two-sided"):
    if sigma == 0:
        return (None, None)

    if alternative == "less":
        zc = norm.ppf(alpha)
        return (mu + zc * sigma, None)

    if alternative == "greater":
        zc = norm.ppf(1 - alpha)
        return (None, mu + zc * sigma)

    if alternative == "two-sided":
        zc = norm.ppf(1 - alpha / 2.0)
        return (mu - zc * sigma, mu + zc * sigma)

    raise ValueError("alternative must be one of: 'two-sided', 'less', 'greater'")


# ---------- Permutation null (ties OK because we keep midranks fixed) ----------

def _perm_null_U1_values(pooled: np.ndarray, n1: int, n_perm: int, rng: np.random.Generator):
    """
    Monte Carlo permutation null for U1 using FIXED midranks of the observed pooled data.
    This is the right way to handle ties: ranks are mean ranks and remain attached to values,
    and the null shuffles group labels.
    """
    ranks = rankdata(pooled, method="average")
    N = len(pooled)
    idx = np.arange(N)

    # precompute constant
    shift = n1 * (n1 + 1) / 2.0

    U1_samples = np.empty(n_perm, dtype=float)
    for b in range(n_perm):
        rng.shuffle(idx)
        Rx = ranks[idx[:n1]].sum()
        U1_samples[b] = Rx - shift
    return U1_samples


def _perm_p_and_crit(U1_obs: float, pooled: np.ndarray, n1: int, n2: int, alpha=0.05,
                     alternative="two-sided", n_perm=200_000, seed=0):
    rng = np.random.default_rng(seed)
    U1_null = _perm_null_U1_values(pooled, n1, n_perm, rng)

    # p-values via Monte Carlo tail probabilities
    if alternative == "less":
        p = np.mean(U1_null <= U1_obs)
        crit_low = np.quantile(U1_null, alpha, method="lower")
        return float(p), (float(crit_low), None), U1_null

    if alternative == "greater":
        p = np.mean(U1_null >= U1_obs)
        crit_high = np.quantile(U1_null, 1 - alpha, method="higher")
        return float(p), (None, float(crit_high)), U1_null

    if alternative == "two-sided":
        mu = np.mean(U1_null)
        # two-sided around center using min-tail approach
        p_left = np.mean(U1_null <= U1_obs)
        p_right = np.mean(U1_null >= U1_obs)
        p = min(1.0, 2.0 * min(p_left, p_right))

        crit_low = np.quantile(U1_null, alpha / 2.0, method="lower")
        crit_high = np.quantile(U1_null, 1 - alpha / 2.0, method="higher")
        return float(p), (float(crit_low), float(crit_high)), U1_null

    raise ValueError("alternative must be one of: 'two-sided', 'less', 'greater'")


# ---------- Main function ----------

def mann_whitney_u_test(x, y, alpha=0.05, alternative="two-sided",
                        continuity=True, n_perm=200_000, seed=0):
    """
    Mann–Whitney U Test with correct tie handling:
      - mean ranks always
      - exact/table only if NO ties
      - permutation-based (Monte Carlo) "exact-like" p-value + critical values if ties exist
      - normal approx with tie correction always

    Returns a dict with results + decisions.
    """
    x = np.asarray(x, dtype=float)
    y = np.asarray(y, dtype=float)
    n1, n2 = len(x), len(y)

    pooled = np.concatenate([x, y])
    ranks = rankdata(pooled, method="average")
    Rx = ranks[:n1].sum()
    Ry = ranks[n1:].sum()

    U1 = Rx - n1 * (n1 + 1) / 2.0
    U2 = Ry - n2 * (n2 + 1) / 2.0   # equals n1*n2 - U1 when no ties; with midranks still holds numerically

    # detect ties in pooled data
    _, counts = np.unique(pooled, return_counts=True)
    has_ties = np.any(counts >= 2)

    # Normal approximation (tie-corrected)
    z, p_approx, mu, sigma, tie_corr = _normal_approx(
        U1, pooled, alternative=alternative, continuity=continuity, n1=n1, n2=n2
    )
    crit_low_a, crit_high_a = _approx_critical(mu, sigma, alpha=alpha, alternative=alternative)

    if alternative == "less":
        reject_approx = (U1 <= crit_low_a) if crit_low_a is not None else False
    elif alternative == "greater":
        reject_approx = (U1 >= crit_high_a) if crit_high_a is not None else False
    else:
        reject_approx = ((crit_low_a is not None and U1 <= crit_low_a) or
                         (crit_high_a is not None and U1 >= crit_high_a))

    # Exact / permutation branch for "table-like" criticals
    if not has_ties:
        # Classic U-table setting (true exact)
        U1_int = int(round(U1))
        p_exact, (crit_low_e, crit_high_e) = _exact_p_and_crit_no_ties(
            U1_int, n1, n2, alpha=alpha, alternative=alternative
        )
        if alternative == "less":
            reject_exact = (U1_int <= crit_low_e)
        elif alternative == "greater":
            reject_exact = (U1_int >= crit_high_e)
        else:
            reject_exact = (U1_int <= crit_low_e) or (U1_int >= crit_high_e)

        exact_block = {
            "mode": "exact_no_ties (U-table equivalent)",
            "p_value": float(p_exact),
            "critical_low": crit_low_e,
            "critical_high": crit_high_e,
            "reject_H0": bool(reject_exact),
            "note": None
        }
    else:
        # Ties present -> use permutation null on fixed mean ranks
        p_perm, (crit_low_p, crit_high_p), _ = _perm_p_and_crit(
            U1, pooled, n1, n2, alpha=alpha, alternative=alternative, n_perm=n_perm, seed=seed
        )
        if alternative == "less":
            reject_perm = (U1 <= crit_low_p)
        elif alternative == "greater":
            reject_perm = (U1 >= crit_high_p)
        else:
            reject_perm = (U1 <= crit_low_p) or (U1 >= crit_high_p)

        exact_block = {
            "mode": f"permutation_with_ties (Monte Carlo, n_perm={n_perm})",
            "p_value": float(p_perm),
            "critical_low": crit_low_p,
            "critical_high": crit_high_p,
            "reject_H0": bool(reject_perm),
            "note": "U-table exact critical values assume no ties. Using permutation null with mean ranks instead."
        }

    return {
        "n1": n1, "n2": n2,
        "alpha": alpha, "alternative": alternative,
        "has_ties": bool(has_ties),
        "U1": float(U1), "U2": float(U2),
        "mean_rank_sum_x": float(Rx),
        "mean_rank_sum_y": float(Ry),
        "exact_or_perm": exact_block,
        "approx": {
            "z": float(z),
            "p_value": float(p_approx),
            "critical_low": float(crit_low_a) if crit_low_a is not None else None,
            "critical_high": float(crit_high_a) if crit_high_a is not None else None,
            "reject_H0": bool(reject_approx),
            "continuity_correction": bool(continuity),
            "tie_correction_factor": float(tie_corr),
            "mu_U": float(mu),
            "sigma_U": float(sigma)
        }
    }


# Cell for Problem 1: Athletics vs Library (two-sided difference)

athletics = [20, 24, 17, 12, 18, 22, 25, 30, 15, 19]
library   = [35, 28, 24, 20, 25, 18, 22, 26, 31, 21, 19]

res = mann_whitney_u_test(
    athletics, library,
    alpha=0.05,
    alternative="two-sided",
    continuity=True,
    n_perm=200_000,
    seed=1
)

print("Mann–Whitney U Test: Athletics vs Library")
print("----------------------------------------")
print(f"n1 = {res['n1']} (Athletics), n2 = {res['n2']} (Library)")
print(f"U1 (Athletics) = {res['U1']:.6g}, U2 (Library) = {res['U2']:.6g}")
print(f"Has ties in pooled data? {res['has_ties']}")

print("\nExact / Permutation (table-like criticals) results")
ex = res["exact_or_perm"]
print(f"  mode        = {ex['mode']}")
print(f"  p-value     = {ex['p_value']:.6g}")
print(f"  critical_low  = {ex['critical_low']}, critical_high = {ex['critical_high']}")
print(f"  decision    = {'REJECT H0' if ex['reject_H0'] else 'FAIL TO REJECT H0'}")
if ex["note"]:
    print(f"  NOTE: {ex['note']}")

print("\nNormal approximation (tie-corrected) results")
ap = res["approx"]
print(f"  z           = {ap['z']:.6g}")
print(f"  p-value     = {ap['p_value']:.6g}")
print(f"  critical_low  = {ap['critical_low']}, critical_high = {ap['critical_high']}")
print(f"  tie correction factor = {ap['tie_correction_factor']:.6g}")
print(f"  decision    = {'REJECT H0' if ap['reject_H0'] else 'FAIL TO REJECT H0'}")

Mann–Whitney U Test: Athletics vs Library
----------------------------------------
n1 = 10 (Athletics), n2 = 11 (Library)
U1 (Athletics) = 30, U2 (Library) = 80
Has ties in pooled data? True

Exact / Permutation (table-like criticals) results
  mode        = permutation_with_ties (Monte Carlo, n_perm=200000)
  p-value     = 0.08106
  critical_low  = 27.5, critical_high = 82.5
  decision    = FAIL TO REJECT H0
  NOTE: U-table exact critical values assume no ties. Using permutation null with mean ranks instead.

Normal approximation (tie-corrected) results
  z           = -1.72861
  p-value     = 0.0838791
  critical_low  = 27.220944824684082, critical_high = 82.77905517531592
  tie correction factor = 0.996104
  decision    = FAIL TO REJECT H0


import numpy as np
import pandas as pd
from itertools import combinations
from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd


def one_way_anova(groups):
    """
    Perform one-way ANOVA and return ANOVA table components.

    Parameters
    ----------
    groups : dict
        keys   = group names
        values = lists or arrays of observations

    Returns
    -------
    dict with:
        - F_stat
        - p_value
        - MSE
        - df_error
        - group_means
        - group_sizes
    """
    group_names = list(groups.keys())
    data = [np.asarray(groups[g]) for g in group_names]

    # ANOVA F-test
    F_stat, p_value = stats.f_oneway(*data)

    # pooled variance (MSE)
    N = sum(len(x) for x in data)
    k = len(data)
    df_error = N - k

    SSE = sum(((x - x.mean())**2).sum() for x in data)
    MSE = SSE / df_error

    group_means = {g: np.mean(groups[g]) for g in group_names}
    group_sizes = {g: len(groups[g]) for g in group_names}

    return {
        "F_stat": F_stat,
        "p_value": p_value,
        "MSE": MSE,
        "df_error": df_error,
        "group_means": group_means,
        "group_sizes": group_sizes
    }


def bonferroni_ci(groups, alpha=0.05):
    """
    Bonferroni simultaneous confidence intervals
    for all pairwise mean differences.
    """
    anova = one_way_anova(groups)
    means = anova["group_means"]
    sizes = anova["group_sizes"]
    MSE   = anova["MSE"]
    df    = anova["df_error"]

    pairs = list(combinations(means.keys(), 2))
    m = len(pairs)

    t_crit = stats.t.ppf(1 - alpha/(2*m), df)

    rows = []
    for g1, g2 in pairs:
        diff = means[g1] - means[g2]
        se = np.sqrt(MSE * (1/sizes[g1] + 1/sizes[g2]))
        ci_low  = diff - t_crit * se
        ci_high = diff + t_crit * se

        rows.append([g1, g2, diff, ci_low, ci_high])

    return pd.DataFrame(
        rows,
        columns=["Group 1", "Group 2", "Mean diff", "CI lower", "CI upper"]
    )


def tukey_ci(groups, alpha=0.05):
    """
    Tukey (HSD / Tukey–Kramer) simultaneous confidence intervals
    for all pairwise mean differences.
    """
    values = []
    labels = []

    for g, obs in groups.items():
        values.extend(obs)
        labels.extend([g] * len(obs))

    values = np.asarray(values)
    labels = np.asarray(labels)

    tukey = pairwise_tukeyhsd(
        endog=values,
        groups=labels,
        alpha=alpha
    )

    return pd.DataFrame(
        tukey._results_table.data[1:],
        columns=tukey._results_table.data[0]
    )


# Data: Faculty ages by rank
groups = {
    "Assistant Professor": [28, 32, 36, 42, 50, 33, 38],
    "Associate Professor": [44, 61, 52, 54, 62, 45, 46],
    "Professor": [54, 56, 55, 65, 52, 50, 46]
}

alpha = 0.01

# --- One-way ANOVA ---
anova_results = one_way_anova(groups)

print("One-way ANOVA")
print("-------------")
print(f"F statistic = {anova_results['F_stat']:.4f}")
print(f"p-value     = {anova_results['p_value']:.6f}")
print(f"MSE         = {anova_results['MSE']:.4f}")
print(f"df_error    = {anova_results['df_error']}")

# --- Bonferroni simultaneous CIs ---
print("\nBonferroni simultaneous confidence intervals")
print("-------------------------------------------")
bonf_ci = bonferroni_ci(groups, alpha=alpha)
display(bonf_ci)

# --- Tukey simultaneous CIs ---
print("\nTukey (HSD / Tukey–Kramer) simultaneous confidence intervals")
print("------------------------------------------------------------")
tukey_results = tukey_ci(groups, alpha=alpha)
display(tukey_results)

One-way ANOVA
-------------
F statistic = 12.6195
p-value     = 0.000375
MSE         = 47.8889
df_error    = 18

Bonferroni simultaneous confidence intervals
-------------------------------------------

Tukey (HSD / Tukey–Kramer) simultaneous confidence intervals
------------------------------------------------------------

Ranks assigned to $X$	$R_X$	$U_X$
$\{1,2\}$	3	0
$\{1,3\}$	4	1
$\{1,4\}$	5	2
$\{1,5\}$	6	3
$\{2,3\}$	5	2
$\{2,4\}$	6	3
$\{2,5\}$	7	4
$\{3,4\}$	7	4
$\{3,5\}$	8	5
$\{4,5\}$	9	6

$u$	Count	$\mathbb{P}(U_X = u)$
0	1	0.1
1	1	0.1
2	2	0.2
3	2	0.2
4	2	0.2
5	1	0.1
6	1	0.1

Aspect	Bonferroni	Tukey (HSD / Kramer)
Comparisons	Arbitrary	All pairwise
Error control	Always valid	Exact under ANOVA
Interval width	Often wider	Usually narrower
Planning	Pre-specified	Exploratory
Variance assumption	None	Equal variances

Rank	Ages
Assistant Professor	28, 32, 36, 42, 50, 33, 38
Associate Professor	44, 61, 52, 54, 62, 45, 46
Professor	54, 56, 55, 65, 52, 50, 46

	group1	group2	meandiff	p-adj	lower	upper	reject
0	Assistant Professor	Associate Professor	15.0	0.0020	2.6979	27.3021	True
1	Assistant Professor	Professor	17.0	0.0006	4.6979	29.3021	True
2	Associate Professor	Professor	2.0	0.8524	-10.3021	14.3021	False

	Group 1	Group 2	Mean diff	CI lower	CI upper
0	Assistant Professor	Associate Professor	-15.0	-27.503932	-2.496068
1	Assistant Professor	Professor	-17.0	-29.503932	-4.496068
2	Associate Professor	Professor	-2.0	-14.503932	10.503932

Seminar 8

Mann–Whitney U Test (Wilcoxon Rank-Sum Test)

1. Problem setup¶

2. Hypotheses¶

3. Assumptions¶

4. Test statistic¶

Rank-based form¶

Pairwise-comparison form (theoretical form)¶

5. Relationship between statistics¶

6. Exact null distribution (finite sample)¶

\mathbb{P}(U_X = u)¶

Exact null distribution: numerical example (Mann–Whitney U)¶

Step 1. Fixed ranks under $H_0$¶

Step 2. Definition of the statistic¶

Step 3. Enumerate all rank allocations¶

Step 4. Exact null distribution of $U_X$¶

\mathbb{P}(U_X = u)¶

Step 5. Symmetry¶

Conclusion¶

7. Support of the distribution¶

8. Mean and variance under $H_0$¶

9. Why the test works (core theoretical reason)¶

\theta¶

10. U-statistic structure¶

11. Asymptotic null distribution (CLT)¶

Dealing with Ties in the Mann–Whitney U Test¶

\sigma_U¶

\frac{N^3 - N}{12}¶

12. Interpretation¶

13. When the test may mislead¶

14. Relation to the t-test¶

15. One-sentence summary (exam-perfect)¶

16. One-line intuition¶

Problem: Comparison of Student Work Hours¶

Simultaneous Confidence Intervals in One-Way ANOVA - Bonferroni and Tukey (HSD / Tukey–Kramer) Methods

1. Motivation¶

2. Model and notation¶

\text{MSE}¶

3. What does “simultaneous” mean?¶

4. The multiple comparison problem¶

5. Bonferroni simultaneous confidence intervals¶

5.1 Bonferroni inequality¶

5.2 Bonferroni confidence intervals¶

5.3 Bonferroni CIs for pairwise mean differences¶

\widehat{\mathrm{SE}}(\bar X_i-\bar X_j)¶

5.4 Properties of Bonferroni intervals¶

6. Tukey’s method (HSD / Tukey–Kramer)¶

6.1 Studentized range distribution¶

6.2 Tukey HSD (balanced design)¶

\bar X_i-\bar X_j¶

6.3 Tukey–Kramer method (unbalanced design)¶

6.4 Properties of Tukey intervals¶

7. Relationship to the ANOVA F-test¶

8. Bonferroni vs Tukey: comparison¶

9. Practical guidance¶

10. Summary¶

Problem: One-Way ANOVA (Faculty Ages by Rank)¶