import math
from scipy.stats import norm


def one_sample_proportion_test(
    data=None,
    p_hat=None,
    n=None,
    p0=0.5,
    alternative="two-sided",
    alpha=0.05,
    success_values=("1", "success", "yes", "true")
):
    """
    One-sample Z-test for proportions.

    Parameters
    ----------
    data : str, optional
        Raw data as a string (e.g. "10101", "1 0 1 1 0", "success failure success").
    p_hat : float, optional
        Sample proportion (used if data is not provided).
    n : int, optional
        Sample size (required if p_hat is provided).
    p0 : float
        Null hypothesis proportion.
    alternative : {"two-sided", "less", "greater"}
        Type of alternative hypothesis.
    alpha : float
        Significance level.
    success_values : tuple
        Values interpreted as "success" in the data string.

    Returns
    -------
    dict
        Test results.
    """

    # ---- Case 1: raw data is given ----
    if data is not None:
        tokens = data.lower().replace(",", " ").split()
        n = len(tokens)
        x = sum(token in success_values for token in tokens)
        p_hat = x / n

    # ---- Case 2: p_hat and n are given ----
    elif p_hat is not None and n is not None:
        x = p_hat * n

    else:
        raise ValueError("Provide either `data` or both `p_hat` and `n`.")

    # ---- Z statistic ----
    standard_error = math.sqrt(p0 * (1 - p0) / n)
    z_obs = (p_hat - p0) / standard_error

    # ---- p-value ----
    if alternative == "two-sided":
        p_value = 2 * (1 - norm.cdf(abs(z_obs)))
    elif alternative == "greater":
        p_value = 1 - norm.cdf(z_obs)
    elif alternative == "less":
        p_value = norm.cdf(z_obs)
    else:
        raise ValueError("alternative must be 'two-sided', 'less', or 'greater'")

    # ---- Decision ----
    reject = p_value < alpha

    return {
        "n": n,
        "x": x,
        "p_hat": p_hat,
        "z_obs": z_obs,
        "p_value": p_value,
        "alpha": alpha,
        "reject_H0": reject
    }


result = one_sample_proportion_test(
    data="1 0 1 1 0 1 1 0 1 1",
    p0=0.5,
    alternative="two-sided"
)

print(result)

{'n': 10, 'x': 7, 'p_hat': 0.7, 'z_obs': 1.2649110640673513, 'p_value': 0.2059032107320684, 'alpha': 0.05, 'reject_H0': False}


result = one_sample_proportion_test(
    data="success failure success success failure",
    p0=0.6,
    alternative="greater"
)

print(result)

{'n': 5, 'x': 3, 'p_hat': 0.6, 'z_obs': 0.0, 'p_value': 0.5, 'alpha': 0.05, 'reject_H0': False}


result = one_sample_proportion_test(
    n=500,
    p_hat=(420 / 500),
    p0=0.856,
    alternative="two-sided"
)

print(result)

{'n': 500, 'x': 420.0, 'p_hat': 0.84, 'z_obs': -1.0190297341929058, 'p_value': 0.30818885050252565, 'alpha': 0.05, 'reject_H0': False}


import math
from scipy.stats import t


def one_sample_ttest(
    data=None,
    x_bar=None,
    s=None,
    n=None,
    mu0=0.0,
    alternative="two-sided",   # "two-sided", "greater", "less"
    alpha=0.05
):
    """
    One-sample t-test for the population mean.
    Uses both:
      (1) p-value method
      (2) critical region method
    """

    # ---------- Parse input ----------
    if data is not None:
        values = [float(x) for x in data.replace(",", " ").split()]
        n = len(values)
        if n < 2:
            raise ValueError("Sample size must be at least 2.")

        x_bar = sum(values) / n
        s = math.sqrt(
            sum((x - x_bar) ** 2 for x in values) / (n - 1)
        )

    elif x_bar is not None and s is not None and n is not None:
        if n < 2:
            raise ValueError("Sample size must be at least 2.")
    else:
        raise ValueError("Provide either `data` OR (`x_bar`, `s`, `n`).")

    # ---------- Test statistic ----------
    se = s / math.sqrt(n)
    t_obs = (x_bar - mu0) / se
    df = n - 1

    # ---------- p-value method ----------
    if alternative == "two-sided":
        p_value = 2 * (1 - t.cdf(abs(t_obs), df))
    elif alternative == "greater":
        p_value = 1 - t.cdf(t_obs, df)
    elif alternative == "less":
        p_value = t.cdf(t_obs, df)
    else:
        raise ValueError("alternative must be 'two-sided', 'greater', or 'less'.")

    reject_by_pvalue = (p_value < alpha)

    # ---------- Critical region method ----------
    if alternative == "two-sided":
        t_crit = t.ppf(1 - alpha / 2, df)
        reject_by_critical = abs(t_obs) > t_crit
        critical_region = f"|T| > {t_crit:.4f}"

    elif alternative == "greater":
        t_crit = t.ppf(1 - alpha, df)
        reject_by_critical = t_obs > t_crit
        critical_region = f"T > {t_crit:.4f}"

    else:  # "less"
        t_crit = t.ppf(alpha, df)
        reject_by_critical = t_obs < t_crit
        critical_region = f"T < {t_crit:.4f}"

    # ---------- Return results ----------
    return {
        "inputs": {
            "n": n,
            "x_bar": x_bar,
            "s": s,
            "mu0": mu0,
            "alternative": alternative,
            "alpha": alpha
        },
        "statistic": {
            "t_obs": t_obs,
            "df": df,
            "se": se
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "t_crit": t_crit,
            "reject_H0": reject_by_critical
        }
    }


res = one_sample_ttest(
    data="1.9 2.24 2.13 2 1.54 1.96 1.79 2.18 1.81 2.3",
    mu0=1.9,
    alternative="two-sided",
    alpha=0.1
)

print(res)

{'inputs': {'n': 10, 'x_bar': 1.9849999999999999, 's': 0.23524219198283478, 'mu0': 1.9, 'alternative': 'two-sided', 'alpha': 0.1}, 'statistic': {'t_obs': 1.1426249638667096, 'df': 9, 'se': 0.07439011284363593}, 'p_value_method': {'p_value': 0.28267920117045664, 'reject_H0': False}, 'critical_region_method': {'critical_region': '|T| > 1.8331', 't_crit': 1.8331129326536333, 'reject_H0': False}}


res = one_sample_ttest(
    x_bar=882.4,
    s=24.3,
    n=13,
    mu0=870,
    alternative="greater",
    alpha=0.05
)

print(res)

{'inputs': {'n': 13, 'x_bar': 882.4, 's': 24.3, 'mu0': 870, 'alternative': 'greater', 'alpha': 0.05}, 'statistic': {'t_obs': 1.8398697866565177, 'df': 12, 'se': 6.7396073841365345}, 'p_value_method': {'p_value': 0.04532103678298238, 'reject_H0': True}, 'critical_region_method': {'critical_region': 'T > 1.7823', 't_crit': 1.7822875556491589, 'reject_H0': True}}


import math
from scipy.stats import norm


def two_sample_proportion_ztest(
    data1=None,
    data2=None,
    p1_hat=None,
    n1=None,
    p2_hat=None,
    n2=None,
    diff0=0.0,                 # H0: p1 - p2 = diff0 (usually 0)
    alternative="two-sided",    # "two-sided", "greater", "less"
    alpha=0.05,
    success_values=("1", "success", "yes", "true")
):
    """
    Two-sample Z-test for proportions.
    Works with either:
      (A) raw data strings (data1, data2)
      (B) summary inputs (p1_hat, n1, p2_hat, n2)

    Uses BOTH:
      (1) p-value method
      (2) critical region method

    Note: For the classical pooled two-proportion z-test (valid when diff0 = 0),
          we pool the proportions under H0. If diff0 != 0, we use the unpooled SE.
    """

    # ---------- Parse input ----------
    if data1 is not None and data2 is not None:
        tokens1 = data1.lower().replace(",", " ").split()
        tokens2 = data2.lower().replace(",", " ").split()

        n1 = len(tokens1)
        n2 = len(tokens2)

        x1 = sum(tok in success_values for tok in tokens1)
        x2 = sum(tok in success_values for tok in tokens2)

        p1_hat = x1 / n1
        p2_hat = x2 / n2

    elif (p1_hat is not None and n1 is not None and
          p2_hat is not None and n2 is not None):
        x1 = p1_hat * n1
        x2 = p2_hat * n2
    else:
        raise ValueError("Provide either (data1, data2) OR (p1_hat, n1, p2_hat, n2).")

    if n1 <= 0 or n2 <= 0:
        raise ValueError("n1 and n2 must be positive.")
    if not (0 <= p1_hat <= 1) or not (0 <= p2_hat <= 1):
        raise ValueError("p1_hat and p2_hat must be in [0,1].")

    # ---------- Test statistic ----------
    # If H0 is p1 - p2 = 0, use pooled SE (classical two-proportion z-test)
    if diff0 == 0.0:
        p_pool = (p1_hat * n1 + p2_hat * n2) / (n1 + n2)
        se = math.sqrt(p_pool * (1 - p_pool) * (1 / n1 + 1 / n2))
        z_obs = (p1_hat - p2_hat - diff0) / se
        se_type = "pooled (H0: p1-p2=0)"
    else:
        # General diff0 ≠ 0: use unpooled SE (common practical approach)
        se = math.sqrt(p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2)
        z_obs = (p1_hat - p2_hat - diff0) / se
        se_type = "unpooled (general diff0)"

    # ---------- p-value method ----------
    if alternative == "two-sided":
        p_value = 2 * (1 - norm.cdf(abs(z_obs)))
    elif alternative == "greater":
        # H1: p1 - p2 > diff0
        p_value = 1 - norm.cdf(z_obs)
    elif alternative == "less":
        # H1: p1 - p2 < diff0
        p_value = norm.cdf(z_obs)
    else:
        raise ValueError("alternative must be 'two-sided', 'greater', or 'less'.")

    reject_by_pvalue = (p_value < alpha)

    # ---------- Critical region method ----------
    if alternative == "two-sided":
        z_crit = norm.ppf(1 - alpha / 2)
        reject_by_critical = abs(z_obs) > z_crit
        critical_region = f"|Z| > {z_crit:.4f}"
    elif alternative == "greater":
        z_crit = norm.ppf(1 - alpha)
        reject_by_critical = z_obs > z_crit
        critical_region = f"Z > {z_crit:.4f}"
    else:  # "less"
        z_crit = norm.ppf(alpha)
        reject_by_critical = z_obs < z_crit
        critical_region = f"Z < {z_crit:.4f}"

    # ---------- Return results ----------
    return {
        "inputs": {
            "n1": n1, "p1_hat": p1_hat, "x1": x1,
            "n2": n2, "p2_hat": p2_hat, "x2": x2,
            "diff0": diff0,
            "alternative": alternative,
            "alpha": alpha
        },
        "statistic": {
            "z_obs": z_obs,
            "se": se,
            "se_type": se_type
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "z_crit": z_crit,
            "reject_H0": reject_by_critical
        }
    }


# ------------------ Example usage ------------------
if __name__ == "__main__":
    # Example 1: raw data strings (1 = success, 0 = failure)
    res1 = two_sample_proportion_ztest(
        data1="1 0 1 1 0 1 1 0 1 1",
        data2="1 0 0 0 1 0 0 0 1 0",
        diff0=0.0,
        alternative="two-sided",
        alpha=0.05
    )
    print("Example 1 (data strings):")
    print(res1, "\n")

    # Example 2: summary inputs
    res2 = two_sample_proportion_ztest(
        p1_hat=13/200, n1=200,
        p2_hat=16/200, n2=200,
        diff0=0.0,
        alternative="two-sided",
        alpha=0.05
    )
    print("Example 2 (summary inputs):")
    print(res2)

Example 1 (data strings):
{'inputs': {'n1': 10, 'p1_hat': 0.7, 'x1': 7, 'n2': 10, 'p2_hat': 0.3, 'x2': 3, 'diff0': 0.0, 'alternative': 'two-sided', 'alpha': 0.05}, 'statistic': {'z_obs': 1.7888543819998317, 'se': 0.22360679774997896, 'se_type': 'pooled (H0: p1-p2=0)'}, 'p_value_method': {'p_value': 0.07363827012030266, 'reject_H0': False}, 'critical_region_method': {'critical_region': '|Z| > 1.9600', 'z_crit': 1.959963984540054, 'reject_H0': False}} 

Example 2 (summary inputs):
{'inputs': {'n1': 200, 'p1_hat': 0.065, 'x1': 13.0, 'n2': 200, 'p2_hat': 0.08, 'x2': 16.0, 'diff0': 0.0, 'alternative': 'two-sided', 'alpha': 0.05}, 'statistic': {'z_obs': -0.5784492956984421, 'se': 0.025931399885081405, 'se_type': 'pooled (H0: p1-p2=0)'}, 'p_value_method': {'p_value': 0.5629608205677976, 'reject_H0': False}, 'critical_region_method': {'critical_region': '|Z| > 1.9600', 'z_crit': 1.959963984540054, 'reject_H0': False}}

Decision \ Truth	$H_0$ true	$H_1$ true
Reject $H_0$	Type I error	Correct
Do not reject $H_0$	Correct	Type II error

Seminar 2

Statistical Hypothesis Testing and Types of Errors¶

1. Motivation¶

2. Statistical Model and Data¶

3. Hypotheses¶

3.1 Null and Alternative Hypotheses¶

3.2 Types of Alternatives¶

4. Test as a Decision Rule¶

5. Types of Errors¶

5.1 Type I and Type II Errors¶

5.2 Error Probabilities¶

Type I Error Probability (Significance Level)¶

Type II Error Probability¶

5.3 Power Function¶

6. Test Statistic¶

7. Distribution Under the Null¶

8. p-value¶

8.1 Definition¶

8.2 Decision Rule¶

One-Sample Test for Proportions

1. Problem Setting and Motivation¶

2. Statistical Model¶

3. Hypotheses¶

3.1 Null and Alternative Hypotheses¶

4. Distribution of the Sample Proportion¶

5. Normal Approximation (CLT)¶

6. Validity Conditions¶

7. Test Statistic¶

8. Rejection Regions¶

Two-Sided Test¶

Right-Tailed Test¶

Left-Tailed Test¶

9. p-value¶

Definition¶

10. Decision Rule¶

13. Exact Binomial Test (Brief)¶

14. Confidence Interval Connection¶

1. Using raw data (string)¶

2. Using raw data with words¶

3. Using a ready sample proportion¶

One-Sample Test for the Mean

1. Problem Setting and Motivation¶

2. Statistical Model¶

3. Hypotheses¶

4. Case I: Variance Known (Z-Test)¶

4.1 Assumptions¶

4.2 Distribution of the Sample Mean¶

4.3 Test Statistic (Z-Statistic)¶

4.4 Rejection Regions¶

4.5 p-value¶

5. Case II: Variance Unknown (t-Test)¶

5.1 Assumptions¶

5.2 Sample Variance¶

5.3 Test Statistic (t-Statistic)¶

5.4 Why t-Distribution?¶

5.5 Rejection Regions¶

5.6 p-value¶

6. Decision Rule¶

9. Confidence Interval Connection¶

Known Variance¶

Unknown Variance¶

10. Practical Data Science Remarks¶

11. Summary¶

Using raw data¶

Using summary statistics¶

Two-Sample Test for Proportions

1. Problem Setting and Motivation¶

2. Statistical Model¶

3. Parameter of Interest¶

4. Hypotheses¶

5. Sampling Distribution of the Difference¶

6. Null Hypothesis and Pooled Proportion¶

7. Normal Approximation (CLT)¶

8. Validity Conditions¶

9. Test Statistic¶

10. Rejection Regions¶

Two-Sided Test¶

Right-Tailed Test¶

Left-Tailed Test¶

11. p-value¶