import numpy as np
from math import comb

def sign_test(x, y=None, median=0, alternative="two-sided"):
    """
    Sign test.
    
    Parameters
    ----------
    x : array-like
        Sample data (or first sample if paired test).
    y : array-like or None
        Second sample for paired sign test.
    median : float
        Hypothesized median (used only if y is None).
    alternative : {"two-sided", "greater", "less"}
    
    Returns
    -------
    dict with test statistic and p-value
    """
    
    x = np.asarray(x)
    
    if y is not None:
        y = np.asarray(y)
        d = x - y
    else:
        d = x - median
    
    # Remove zeros (ties)
    d = d[d != 0]
    n = len(d)
    
    if n == 0:
        raise ValueError("All differences are zero.")
    
    S = np.sum(d > 0)  # number of positive signs
    
    # Binomial probabilities
    if alternative == "two-sided":
        k = min(S, n - S)
        p_value = 2 * sum(comb(n, i) * 0.5**n for i in range(k + 1))
        p_value = min(p_value, 1.0)
    elif alternative == "greater":
        p_value = sum(comb(n, i) * 0.5**n for i in range(S, n + 1))
    elif alternative == "less":
        p_value = sum(comb(n, i) * 0.5**n for i in range(0, S + 1))
    else:
        raise ValueError("alternative must be 'two-sided', 'greater', or 'less'")
    
    return {
        "n": n,
        "S": S,
        "p_value": p_value
    }


# Example data
x = [2.1, -0.3, 1.4, 0.7, -1.2, 0.5, 0.9, -0.4]

result = sign_test(x, median=0, alternative="two-sided")

result

{'n': 8, 'S': 5, 'p_value': 0.7265625}


before = [150, 132, 130, 116, 107, 100, 101, 96, 90, 78]
after  = [90, 102, 80, 82, 90, 94, 84, 93, 90, 80]

sign_test(before, after, alternative="greater")

{'n': 9, 'S': 8, 'p_value': 0.01953125}


import numpy as np
from math import sqrt
from scipy.stats import wilcoxon, norm

def wilcoxon_signed_rank_one_sample(
    x,
    median_0=0,
    alpha=0.05,
    alternative="two-sided"
):
    """
    One-sample Wilcoxon signed-rank test.

    Reports:
    - W+ , W-
    - W_exact = min(W+, W-)  (classical table statistic)
    - exact p-value from scipy.stats.wilcoxon
    - asymptotic z and p-value (normal approximation)
    """

    x = np.asarray(x)
    d = x - median_0
    d = d[d != 0]
    n = len(d)

    if n == 0:
        raise ValueError("All observations equal the hypothesized median.")

    # ----- Signed ranks -----
    abs_d = np.abs(d)
    ranks = abs_d.argsort().argsort() + 1

    W_plus = np.sum(ranks[d > 0])
    W_minus = np.sum(ranks[d < 0])
    W_exact = min(W_plus, W_minus)

    # ----- Exact p-value (SciPy) -----
    stat_scipy, p_exact = wilcoxon(
        d,
        alternative=alternative,
        mode="exact" if n <= 25 else "approx"
    )

    # ----- Asymptotic approximation -----
    mu = n * (n + 1) / 4
    sigma = sqrt(n * (n + 1) * (2 * n + 1) / 24)

    z = (W_plus - mu - 0.5 * np.sign(W_plus - mu)) / sigma

    if alternative == "two-sided":
        p_asym = 2 * (1 - norm.cdf(abs(z)))
    elif alternative == "greater":
        p_asym = 1 - norm.cdf(z)
    else:
        p_asym = norm.cdf(z)

    return {
        "n": n,
        "W_plus": W_plus,
        "W_minus": W_minus,
        "W_exact": W_exact,
        "scipy_stat": stat_scipy,
        "p_value_exact": p_exact,
        "z": z,
        "p_value_asymptotic": p_asym
    }


# Example — One-sample Wilcoxon signed-rank test (table statistic)

# Input data
x = [5.0, 3.9, 5.2, 5.5, 2.8, 6.1, 6.4, 2.6, 1.7, 4.3]
median_0 = 3.7
alpha = 0.05

# Run the test
result = wilcoxon_signed_rank_one_sample(
    x,
    median_0=median_0,
    alpha=alpha,
    alternative="two-sided"
)

# Output
print("One-sample Wilcoxon signed-rank test")
print("-----------------------------------")
print(f"n = {result['n']}")
print(f"W+ = {result['W_plus']}")
print(f"W- = {result['W_minus']}")
print(f"W (min(W+, W-)) = {result['W_exact']}")

print("\nExact test (SciPy)")
print(f"Exact p-value = {result['p_value_exact']:.4f}")

if result["p_value_exact"] < alpha:
    print("Exact decision: REJECT H0")
else:
    print("Exact decision: DO NOT reject H0")

print("\nAsymptotic normal approximation")
print(f"z = {result['z']:.3f}")
print(f"p-value ≈ {result['p_value_asymptotic']:.4f}")

if result["p_value_asymptotic"] < alpha:
    print("Asymptotic decision: REJECT H0")
else:
    print("Asymptotic decision: DO NOT reject H0")

One-sample Wilcoxon signed-rank test
-----------------------------------
n = 10
W+ = 40
W- = 15
W (min(W+, W-)) = 15

Exact test (SciPy)
Exact p-value = 0.2324
Exact decision: DO NOT reject H0

Asymptotic normal approximation
z = 1.223
p-value ≈ 0.2213
Asymptotic decision: DO NOT reject H0


    
# Input data
x = [35.5, 44.5, 39.8, 33.3, 51.4, 51.3, 30.5, 48.9, 42.1, 40.3,
    46.8, 38.0, 40.1, 36.8, 39.3, 65.4, 42.6, 42.8, 59.8, 52.4,
    26.2, 60.9, 45.6, 27.1, 47.3, 36.6, 55.6, 45.1, 52.2, 43.5]
median_0 = 45
alpha = 0.05

# Run the test
result = wilcoxon_signed_rank_one_sample(
    x,
    median_0=median_0,
    alpha=alpha,
    alternative="two-sided"
)

# Output
print("One-sample Wilcoxon signed-rank test")
print("-----------------------------------")
print(f"n = {result['n']}")
print(f"W+ = {result['W_plus']}")
print(f"W- = {result['W_minus']}")
print(f"W (min(W+, W-)) = {result['W_exact']}")

print("\nExact test (SciPy)")
print(f"Exact p-value = {result['p_value_exact']:.4f}")

if result["p_value_exact"] < alpha:
    print("Exact decision: REJECT H0")
else:
    print("Exact decision: DO NOT reject H0")

print("\nAsymptotic normal approximation")
print(f"z = {result['z']:.3f}")
print(f"p-value ≈ {result['p_value_asymptotic']:.4f}")

if result["p_value_asymptotic"] < alpha:
    print("Asymptotic decision: REJECT H0")
else:
    print("Asymptotic decision: DO NOT reject H0")

One-sample Wilcoxon signed-rank test
-----------------------------------
n = 30
W+ = 200
W- = 265
W (min(W+, W-)) = 200

Exact test (SciPy)
Exact p-value = 0.5038
Exact decision: DO NOT reject H0

Asymptotic normal approximation
z = -0.658
p-value ≈ 0.5104
Asymptotic decision: DO NOT reject H0


import numpy as np
from math import sqrt
from scipy.stats import wilcoxon, norm
from scipy.stats import rankdata

def wilcoxon_signed_rank_paired(
    x,
    y,
    alpha=0.05,
    alternative="two-sided"
):
    """
    Paired Wilcoxon signed-rank test.

    Reports:
    - W+ , W-
    - W_exact = min(W+, W-)  (classical table statistic)
    - exact p-value from scipy.stats.wilcoxon
    - asymptotic z and p-value (normal approximation)
    """

    x = np.asarray(x)
    y = np.asarray(y)

    if len(x) != len(y):
        raise ValueError("x and y must have the same length.")

    # Paired differences
    d = x - y
    d = d[d != 0]   # remove zero differences
    n = len(d)

    if n == 0:
        raise ValueError("All paired differences are zero.")

    # ----- Signed ranks -----
    abs_d = np.abs(d)
    ranks = rankdata(abs_d, method="average")  # correct for ties
    W_plus = np.sum(ranks[d > 0])
    W_minus = np.sum(ranks[d < 0])
    W_exact = min(W_plus, W_minus)

    # ----- Exact p-value (SciPy) -----
    stat_scipy, p_exact = wilcoxon(
        d,
        alternative=alternative,
        mode="exact" if n <= 25 else "approx"
    )

    # ----- Asymptotic normal approximation -----
    mu = n * (n + 1) / 4
    sigma = sqrt(n * (n + 1) * (2 * n + 1) / 24)

    z = (W_plus - mu - 0.5 * np.sign(W_plus - mu)) / sigma

    if alternative == "two-sided":
        p_asym = 2 * (1 - norm.cdf(abs(z)))
    elif alternative == "greater":
        p_asym = 1 - norm.cdf(z)
    elif alternative == "less":
        p_asym = norm.cdf(z)
    else:
        raise ValueError("Invalid alternative.")

    return {
        "n": n,
        "W_plus": W_plus,
        "W_minus": W_minus,
        "W_exact": W_exact,
        "scipy_stat": stat_scipy,   # this is W+
        "p_value_exact": p_exact,
        "z": z,
        "p_value_asymptotic": p_asym
    }


# Paired example data (e.g., before vs after)
# Data: number of cavities
coated = [3, 1, 0, 4, 1, 0, 1, 2, 1, 0, 0, 4]
uncoated = [3, 3, 2, 5, 0, 1, 5, 0, 6, 0, 3, 3]

alpha = 0.05

# Paired Wilcoxon signed-rank test
result = wilcoxon_signed_rank_paired(
    uncoated,
    coated,
    alpha=alpha,
    alternative="greater"   # H1: median(uncoated − coated) > 0
)

print("Paired Wilcoxon signed-rank test (Dental sealant study)")
print("------------------------------------------------------")
print(f"n = {result['n']}")
print(f"W+ = {result['W_plus']}")
print(f"W- = {result['W_minus']}")
print(f"W = min(W+, W-) = {result['W_exact']}")

print("\nExact test (SciPy)")
print(f"Test statistic (W+) = {result['scipy_stat']}")
print(f"Exact p-value = {result['p_value_exact']:.4f}")
print("Decision:", "REJECT H0" if result["p_value_exact"] < alpha else "DO NOT reject H0")

print("\nAsymptotic normal approximation")
print(f"z = {result['z']:.3f}")
print(f"p-value ≈ {result['p_value_asymptotic']:.4f}")
print("Decision:", "REJECT H0" if result["p_value_asymptotic"] < alpha else "DO NOT reject H0")

Paired Wilcoxon signed-rank test (Dental sealant study)
------------------------------------------------------
n = 10
W+ = 44.0
W- = 11.0
W = min(W+, W-) = 11.0

Exact test (SciPy)
Test statistic (W+) = 44.0
Exact p-value = 0.0527
Decision: DO NOT reject H0

Asymptotic normal approximation
z = 1.631
p-value ≈ 0.0515
Decision: DO NOT reject H0

Child	Coated	Uncoated	Diff
1	3	3	0
2	1	3	2
3	0	2	2
4	4	5	1
5	1	0	-1
6	0	1	1
7	1	5	4
8	2	0	-2
9	1	6	5
10	0	0	0
11	0	3	3
12	4	3	-1

Seminar 7

Non-Parametric Tests (The ones that we are going to discuss in here...)

1. One-sample tests & goodness-of-fit¶

Sign test¶

Wilcoxon signed-rank test¶

Kolmogorov–Smirnov test (1-sample)¶

Anderson–Darling test¶

2. Two-sample location tests (independent samples)¶

Mann–Whitney U test (Wilcoxon rank-sum)¶

Two-sample Kolmogorov–Smirnov test¶

Brunner–Munzel test¶

3. Paired / repeated-measures tests¶

Wilcoxon signed-rank test (paired)¶

Sign test (paired)¶

4. More than two groups (one-way designs)¶

Kruskal–Wallis test¶

Post-hoc procedures¶

5. Blocked & repeated-measures designs¶

Friedman test¶

Quade test¶

6. Factorial designs (two or more factors)¶

Aligned Rank Transform (ART) ANOVA¶

Permutation-based factorial ANOVA¶

7. Scale / variance tests¶

Ansari–Bradley test¶

Fligner–Killeen test¶

Levene / Brown–Forsythe tests¶

8. Association & dependence¶

Spearman’s rho¶

Kendall’s tau¶

Hoeffding’s D (optional / advanced)¶

9. Categorical data (non-parametric by nature)¶

Chi-squared tests¶

Fisher’s exact test¶

10. Resampling-based inference¶

Permutation tests¶

Bootstrap confidence intervals¶

Minimal required¶

Sign Test

Purpose¶

Hypotheses¶

Exact statistic¶

Null distribution¶

Assumptions¶

Why the test works (theory)¶

Interpretation¶

Example¶

Wilcoxon Signed-Rank Test

1. Problem setup¶

One-sample case¶

Paired two-sample case¶

2. Hypotheses¶

3. Assumptions¶

4. Test statistic¶

Signed-rank statistic (theoretical form)¶

Positive rank-sum statistic (computational form)¶

5. Relationship between the statistics¶

6. Exact null distribution (finite sample)¶

7. Support of the distribution¶

\mathbb{P}(W^+ = w)¶

8. Symmetry of the distribution¶

9. Mean and variance under $H_0$¶

10. Why the test works (core theoretical reason)¶

11. Asymptotic null distribution (CLT)¶

12. Interpretation¶

13. When the test fails¶

14. One-sentence summary (exam-perfect)¶

EXAMPLE 1¶

Example 2¶

Example for paired data¶