import math
from scipy.stats import norm


def two_sample_ztest_mean(
    sample1=None,
    sample2=None,
    x1_bar=None, sigma1=None, n1=None,
    x2_bar=None, sigma2=None, n2=None,
    diff0=0.0,                  # H0: mu1 - mu2 = diff0
    alternative="two-sided",     # "two-sided", "greater", "less"
    alpha=0.05
):
    """
    Two-sample Z-test for the difference of means (known variances).

    Input options:
      (A) sample vectors: sample1, sample2  (lists or numpy arrays)
          -> sigma1 and sigma2 MUST still be provided
      (B) summary statistics:
          (x1_bar, sigma1, n1) and (x2_bar, sigma2, n2)

    Uses BOTH:
      (1) p-value method
      (2) critical region method
    """

    # ---------- Case A: vectors provided ----------
    if sample1 is not None and sample2 is not None:
        if sigma1 is None or sigma2 is None:
            raise ValueError("For a Z-test, sigma1 and sigma2 must be provided.")

        n1 = len(sample1)
        n2 = len(sample2)

        if n1 == 0 or n2 == 0:
            raise ValueError("Samples must be non-empty.")

        x1_bar = sum(sample1) / n1
        x2_bar = sum(sample2) / n2

    # ---------- Case B: summary statistics ----------
    elif (x1_bar is not None and sigma1 is not None and n1 is not None and
          x2_bar is not None and sigma2 is not None and n2 is not None):
        pass

    else:
        raise ValueError(
            "Provide either (sample1, sample2, sigma1, sigma2) "
            "OR (x1_bar, sigma1, n1, x2_bar, sigma2, n2)."
        )

    if n1 <= 0 or n2 <= 0:
        raise ValueError("Sample sizes must be positive.")
    if sigma1 <= 0 or sigma2 <= 0:
        raise ValueError("Standard deviations must be positive.")

    # ---------- Standard error ----------
    se = math.sqrt((sigma1 ** 2) / n1 + (sigma2 ** 2) / n2)

    # ---------- Z statistic ----------
    z_obs = ((x1_bar - x2_bar) - diff0) / se

    # ---------- p-value method ----------
    if alternative == "two-sided":
        p_value = 2 * (1 - norm.cdf(abs(z_obs)))
    elif alternative == "greater":
        p_value = 1 - norm.cdf(z_obs)
    elif alternative == "less":
        p_value = norm.cdf(z_obs)
    else:
        raise ValueError("alternative must be 'two-sided', 'greater', or 'less'.")

    reject_by_pvalue = p_value < alpha

    # ---------- Critical region method ----------
    if alternative == "two-sided":
        z_crit = norm.ppf(1 - alpha / 2)
        reject_by_critical = abs(z_obs) > z_crit
        critical_region = f"|Z| > {z_crit:.4f}"
    elif alternative == "greater":
        z_crit = norm.ppf(1 - alpha)
        reject_by_critical = z_obs > z_crit
        critical_region = f"Z > {z_crit:.4f}"
    else:  # "less"
        z_crit = norm.ppf(alpha)
        reject_by_critical = z_obs < z_crit
        critical_region = f"Z < {z_crit:.4f}"

    # ---------- Return results ----------
    return {
        "inputs": {
            "n1": n1, "x1_bar": x1_bar, "sigma1": sigma1,
            "n2": n2, "x2_bar": x2_bar, "sigma2": sigma2,
            "diff0": diff0,
            "alternative": alternative,
            "alpha": alpha
        },
        "statistic": {
            "z_obs": z_obs,
            "se": se
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "z_crit": z_crit,
            "reject_H0": reject_by_critical
        }
    }


sample1 = [
    22, 25, 27, 23, 26, 28, 26, 24, 25, 20,
    19, 18, 30, 26, 18, 18, 19, 32, 23, 19,
    18, 22, 26, 19, 19, 21, 23, 18, 20, 18,
    22, 18, 20, 19, 23, 26, 24, 27, 26, 18,
    22, 21, 19, 21, 21, 19, 18, 29, 19, 22
]

sample2 = [
    18, 20, 19, 18, 22, 25, 24, 35, 23, 18,
    24, 26, 30, 22, 22, 22, 21, 18, 20, 19,
    19, 32, 29, 23, 21, 19, 36, 27, 27, 20,
    20, 19, 19, 20, 25, 23, 22, 28, 25, 20,
    20, 21, 18, 19, 23, 26, 35, 19, 19, 18
]

res = two_sample_ztest_mean(
    sample1=sample1,
    sample2=sample2,
    sigma1=3.68,
    sigma2=4.7,
    diff0=0.0,
    alternative="two-sided",
    alpha=0.05
)

print(res)

{'inputs': {'n1': 50, 'x1_bar': 22.12, 'sigma1': 3.68, 'n2': 50, 'x2_bar': 22.76, 'sigma2': 4.7, 'diff0': 0.0, 'alternative': 'two-sided', 'alpha': 0.05}, 'statistic': {'z_obs': -0.7581278287298289, 'se': 0.844184813888523}, 'p_value_method': {'p_value': 0.4483744614036329, 'reject_H0': False}, 'critical_region_method': {'critical_region': '|Z| > 1.9600', 'z_crit': 1.959963984540054, 'reject_H0': False}}


res = two_sample_ztest_mean(
    x1_bar=102.3, sigma1=10.0, n1=50,
    x2_bar=98.1,  sigma2=9.5,  n2=60,
    diff0=0.0,
    alternative="greater",
    alpha=0.05
)

print(res)

{'inputs': {'n1': 50, 'x1_bar': 102.3, 'sigma1': 10.0, 'n2': 60, 'x2_bar': 98.1, 'sigma2': 9.5, 'diff0': 0.0, 'alternative': 'greater', 'alpha': 0.05}, 'statistic': {'z_obs': 2.2436593178029187, 'se': 1.8719419506669182}, 'p_value_method': {'p_value': 0.012427164766212306, 'reject_H0': True}, 'critical_region_method': {'critical_region': 'Z > 1.6449', 'z_crit': 1.6448536269514722, 'reject_H0': True}}


import math
from scipy.stats import t


def two_sample_pooled_ttest(
    sample1=None,
    sample2=None,
    x1_bar=None, s1=None, n1=None,
    x2_bar=None, s2=None, n2=None,
    diff0=0.0,                  # H0: mu1 - mu2 = diff0
    alternative="two-sided",     # "two-sided", "greater", "less"
    alpha=0.05
):
    """
    Two-sample t-test for means with pooled variance (equal variances).

    Input options:
      (A) sample vectors: sample1, sample2 (lists or numpy arrays)
      (B) summary statistics:
          (x1_bar, s1, n1) and (x2_bar, s2, n2)

    Assumption:
      Var(X1) = Var(X2)

    Uses BOTH:
      (1) p-value method
      (2) critical region method
    """

    # ---------- Case A: vectors ----------
    if sample1 is not None and sample2 is not None:
        n1 = len(sample1)
        n2 = len(sample2)

        if n1 < 2 or n2 < 2:
            raise ValueError("Each sample must have at least 2 observations.")

        x1_bar = sum(sample1) / n1
        x2_bar = sum(sample2) / n2

        s1 = math.sqrt(sum((x - x1_bar) ** 2 for x in sample1) / (n1 - 1))
        s2 = math.sqrt(sum((x - x2_bar) ** 2 for x in sample2) / (n2 - 1))

    # ---------- Case B: summary statistics ----------
    elif (x1_bar is not None and s1 is not None and n1 is not None and
          x2_bar is not None and s2 is not None and n2 is not None):
        if n1 < 2 or n2 < 2:
            raise ValueError("Each sample must have n >= 2.")
    else:
        raise ValueError(
            "Provide either (sample1, sample2) "
            "OR (x1_bar, s1, n1, x2_bar, s2, n2)."
        )

    # ---------- Pooled variance ----------
    sp2 = ((n1 - 1) * s1**2 + (n2 - 1) * s2**2) / (n1 + n2 - 2)
    sp = math.sqrt(sp2)

    # ---------- Standard error ----------
    se = sp * math.sqrt(1 / n1 + 1 / n2)

    # ---------- Test statistic ----------
    t_obs = ((x1_bar - x2_bar) - diff0) / se

    # ---------- Degrees of freedom ----------
    df = n1 + n2 - 2

    # ---------- p-value method ----------
    if alternative == "two-sided":
        p_value = 2 * (1 - t.cdf(abs(t_obs), df))
    elif alternative == "greater":
        # H1: mu1 - mu2 > diff0
        p_value = 1 - t.cdf(t_obs, df)
    elif alternative == "less":
        # H1: mu1 - mu2 < diff0
        p_value = t.cdf(t_obs, df)
    else:
        raise ValueError("alternative must be 'two-sided', 'greater', or 'less'.")

    reject_by_pvalue = p_value < alpha

    # ---------- Critical region method ----------
    if alternative == "two-sided":
        t_crit = t.ppf(1 - alpha / 2, df)
        reject_by_critical = abs(t_obs) > t_crit
        critical_region = f"|T| > {t_crit:.4f}"
    elif alternative == "greater":
        t_crit = t.ppf(1 - alpha, df)
        reject_by_critical = t_obs > t_crit
        critical_region = f"T > {t_crit:.4f}"
    else:  # "less"
        t_crit = t.ppf(alpha, df)
        reject_by_critical = t_obs < t_crit
        critical_region = f"T < {t_crit:.4f}"

    # ---------- Return results ----------
    return {
        "inputs": {
            "n1": n1, "x1_bar": x1_bar, "s1": s1,
            "n2": n2, "x2_bar": x2_bar, "s2": s2,
            "diff0": diff0,
            "alternative": alternative,
            "alpha": alpha
        },
        "statistic": {
            "t_obs": t_obs,
            "df": df,
            "sp": sp,
            "se": se
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "t_crit": t_crit,
            "reject_H0": reject_by_critical
        }
    }


res = two_sample_pooled_ttest(
    sample1 = [
    1510, 1257, 4125, 4677, 1510,
    3055, 5244, 1764, 4125, 6128,
    6128, 3319, 3319, 6433, 3319,
    5244, 3055
],
    sample2 = [
    3585, 1510, 4399, 5244, 1764,
    1510, 3853, 4399, 5244, 1510,
    1510, 5244, 2533, 4125, 3585,
    2275, 2533, 2275, 4399, 3585,
    4125, 5244
],
    diff0=0.0,
    alternative="greater",
    alpha=0.05
)

print(res)

{'inputs': {'n1': 17, 'x1_bar': 3777.176470588235, 's1': 1692.2272762285108, 'n2': 22, 'x2_bar': 3384.1363636363635, 's2': 1361.8696635579633, 'diff0': 0.0, 'alternative': 'greater', 'alpha': 0.05}, 'statistic': {'t_obs': 0.8041347855375036, 'df': 37, 'sp': 1513.6013886833787, 'se': 488.7739145486088}, 'p_value_method': {'p_value': 0.21322788001590864, 'reject_H0': False}, 'critical_region_method': {'critical_region': 'T > 1.6871', 't_crit': 1.6870936167109873, 'reject_H0': False}}


# Using summary statistics
res = two_sample_pooled_ttest(
    x1_bar = 49.4,
    x2_bar = 52.3,
    s1 = 5,
    s2 = 15,
    n1 = 350,
    n2= 230,
    diff0=0.0,
    alternative="greater",
    alpha=0.05
)

print(res)

{'inputs': {'n1': 350, 'x1_bar': 49.4, 's1': 5, 'n2': 230, 'x2_bar': 52.3, 's2': 15, 'diff0': 0.0, 'alternative': 'greater', 'alpha': 0.05}, 'statistic': {'t_obs': -3.3463189660291834, 'df': 578, 'sp': 10.20973821041752, 'se': 0.8666239020965784}, 'p_value_method': {'p_value': 0.9995639600843331, 'reject_H0': False}, 'critical_region_method': {'critical_region': 'T > 1.6475', 't_crit': 1.6474941618836259, 'reject_H0': False}}


import math
from scipy.stats import t


def welch_ttest_two_sample(
    sample1=None,
    sample2=None,
    x1_bar=None, s1=None, n1=None,
    x2_bar=None, s2=None, n2=None,
    diff0=0.0,                  # H0: mu1 - mu2 = diff0 (usually 0)
    alternative="two-sided",     # "two-sided", "greater", "less"
    alpha=0.05
):
    """
    Two-sample Welch t-test for means (unequal variances).

    Input options:
      (A) sample vectors: sample1, sample2 (lists or numpy arrays)
      (B) summary statistics:
          (x1_bar, s1, n1) and (x2_bar, s2, n2)

    Uses BOTH:
      (1) p-value method
      (2) critical region method

    Welch test statistic:
        t = ((x1_bar - x2_bar) - diff0) / sqrt(s1^2/n1 + s2^2/n2)

    Welch–Satterthwaite degrees of freedom:
        df = (v1 + v2)^2 / (v1^2/(n1-1) + v2^2/(n2-1))
    where v1 = s1^2/n1 and v2 = s2^2/n2
    """

    # ---------- Case A: vectors ----------
    if sample1 is not None and sample2 is not None:
        n1 = len(sample1)
        n2 = len(sample2)
        if n1 < 2 or n2 < 2:
            raise ValueError("Each sample must have at least 2 observations.")

        x1_bar = sum(sample1) / n1
        x2_bar = sum(sample2) / n2

        s1 = math.sqrt(sum((x - x1_bar) ** 2 for x in sample1) / (n1 - 1))
        s2 = math.sqrt(sum((x - x2_bar) ** 2 for x in sample2) / (n2 - 1))

    # ---------- Case B: summary stats ----------
    elif (x1_bar is not None and s1 is not None and n1 is not None and
          x2_bar is not None and s2 is not None and n2 is not None):
        if n1 < 2 or n2 < 2:
            raise ValueError("Each sample must have n >= 2.")

    else:
        raise ValueError(
            "Provide either (sample1, sample2) "
            "OR (x1_bar, s1, n1, x2_bar, s2, n2)."
        )

    # ---------- Welch standard error ----------
    v1 = (s1 ** 2) / n1
    v2 = (s2 ** 2) / n2
    se = math.sqrt(v1 + v2)
    if se == 0:
        raise ValueError("Standard error is 0 (check sample variances).")

    # ---------- Test statistic ----------
    t_obs = ((x1_bar - x2_bar) - diff0) / se

    # ---------- Welch–Satterthwaite df ----------
    denom = (v1 ** 2) / (n1 - 1) + (v2 ** 2) / (n2 - 1)
    if denom == 0:
        raise ValueError("Cannot compute Welch df (denominator is 0).")
    df = (v1 + v2) ** 2 / denom

    # ---------- p-value method ----------
    if alternative == "two-sided":
        p_value = 2 * (1 - t.cdf(abs(t_obs), df))
    elif alternative == "greater":
        p_value = 1 - t.cdf(t_obs, df)
    elif alternative == "less":
        p_value = t.cdf(t_obs, df)
    else:
        raise ValueError("alternative must be 'two-sided', 'greater', or 'less'.")

    reject_by_pvalue = p_value < alpha

    # ---------- Critical region method ----------
    if alternative == "two-sided":
        t_crit = t.ppf(1 - alpha / 2, df)
        reject_by_critical = abs(t_obs) > t_crit
        critical_region = f"|T| > {t_crit:.4f}"
    elif alternative == "greater":
        t_crit = t.ppf(1 - alpha, df)
        reject_by_critical = t_obs > t_crit
        critical_region = f"T > {t_crit:.4f}"
    else:  # "less"
        t_crit = t.ppf(alpha, df)
        reject_by_critical = t_obs < t_crit
        critical_region = f"T < {t_crit:.4f}"

    return {
        "inputs": {
            "n1": n1, "x1_bar": x1_bar, "s1": s1,
            "n2": n2, "x2_bar": x2_bar, "s2": s2,
            "diff0": diff0,
            "alternative": alternative,
            "alpha": alpha
        },
        "statistic": {
            "t_obs": t_obs,
            "df": df,
            "se": se
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "t_crit": t_crit,
            "reject_H0": reject_by_critical
        }
    }


# Define the samples
sacramento = [
    474, 414, 692, 467, 443, 605,
    419, 277, 670, 696, 783, 577,
    813, 694, 565, 663, 884
]

portland = [
    783, 587, 527, 546, 442, 107,
    728, 662, 371, 427, 277, 474,
    605, 293, 320, 555
]

# Run Welch two-sample t-test
result = welch_ttest_two_sample(
    sample1=sacramento,
    sample2=portland,
    diff0=0.0,
    alternative="two-sided",
    alpha=0.05
)

# Display results
result

{'inputs': {'n1': 17,
  'x1_bar': 596.2352941176471,
  's1': 163.23622813723244,
  'n2': 16,
  'x2_bar': 481.5,
  's2': 179.39565212122616,
  'diff0': 0.0,
  'alternative': 'two-sided',
  'alpha': 0.05},
 'statistic': {'t_obs': 1.9178995268733827,
  'df': 30.259773401879773,
  'se': 59.82341228515343},
 'p_value_method': {'p_value': 0.06460433438295432, 'reject_H0': False},
 'critical_region_method': {'critical_region': '|T| > 2.0415',
  't_crit': 2.041537509617953,
  'reject_H0': False}}


import math
from scipy.stats import t


def paired_ttest(
    sample_before=None,
    sample_after=None,
    d_bar=None, s_d=None, n=None,
    mu0=0.0,                   # H0: mean difference = mu0 (usually 0)
    alternative="two-sided",   # "two-sided", "greater", "less"
    alpha=0.05
):
    """
    Paired t-test for dependent samples.

    Input options:
      (A) paired sample vectors: sample_before, sample_after
      (B) summary statistics of differences: (d_bar, s_d, n)

    Uses BOTH:
      (1) p-value method
      (2) critical region method
    """

    # ---------- Case A: paired vectors ----------
    if sample_before is not None and sample_after is not None:
        if len(sample_before) != len(sample_after):
            raise ValueError("Paired samples must have the same length.")

        n = len(sample_before)
        if n < 2:
            raise ValueError("At least two paired observations are required.")

        # Differences
        diffs = [x - y for x, y in zip(sample_before, sample_after)]

        d_bar = sum(diffs) / n
        s_d = math.sqrt(
            sum((d - d_bar) ** 2 for d in diffs) / (n - 1)
        )

    # ---------- Case B: summary stats ----------
    elif d_bar is not None and s_d is not None and n is not None:
        if n < 2:
            raise ValueError("Sample size must be at least 2.")

    else:
        raise ValueError(
            "Provide either (sample_before, sample_after) "
            "OR (d_bar, s_d, n)."
        )

    # ---------- Standard error ----------
    se = s_d / math.sqrt(n)
    if se == 0:
        raise ValueError("Standard error is 0 (check data).")

    # ---------- Test statistic ----------
    t_obs = (d_bar - mu0) / se

    # ---------- Degrees of freedom ----------
    df = n - 1

    # ---------- p-value method ----------
    if alternative == "two-sided":
        p_value = 2 * (1 - t.cdf(abs(t_obs), df))
    elif alternative == "greater":
        # H1: mean difference > mu0
        p_value = 1 - t.cdf(t_obs, df)
    elif alternative == "less":
        # H1: mean difference < mu0
        p_value = t.cdf(t_obs, df)
    else:
        raise ValueError("alternative must be 'two-sided', 'greater', or 'less'.")

    reject_by_pvalue = p_value < alpha

    # ---------- Critical region method ----------
    if alternative == "two-sided":
        t_crit = t.ppf(1 - alpha / 2, df)
        reject_by_critical = abs(t_obs) > t_crit
        critical_region = f"|T| > {t_crit:.4f}"
    elif alternative == "greater":
        t_crit = t.ppf(1 - alpha, df)
        reject_by_critical = t_obs > t_crit
        critical_region = f"T > {t_crit:.4f}"
    else:  # "less"
        t_crit = t.ppf(alpha, df)
        reject_by_critical = t_obs < t_crit
        critical_region = f"T < {t_crit:.4f}"

    # ---------- Return results ----------
    return {
        "inputs": {
            "n": n,
            "mean_difference": d_bar,
            "sd_difference": s_d,
            "mu0": mu0,
            "alternative": alternative,
            "alpha": alpha
        },
        "statistic": {
            "t_obs": t_obs,
            "df": df,
            "se": se
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "t_crit": t_crit,
            "reject_H0": reject_by_critical
        }
    }


before = [6, 8, 10, 9, 5, 12, 9, 7]

after = [10, 12, 9, 12, 8, 13, 8, 10]

result = paired_ttest(
    sample_before=before,
    sample_after=after,
    mu0=0.0,
    alternative="less",
    alpha=0.05
)

result

{'inputs': {'n': 8,
  'mean_difference': -2.0,
  'sd_difference': 2.0701966780270626,
  'mu0': 0.0,
  'alternative': 'less',
  'alpha': 0.05},
 'statistic': {'t_obs': -2.732520204255893, 'df': 7, 'se': 0.7319250547113998},
 'p_value_method': {'p_value': 0.014616055450294695, 'reject_H0': True},
 'critical_region_method': {'critical_region': 'T < -1.8946',
  't_crit': -1.8945786050613054,
  'reject_H0': True}}


import math
from scipy.stats import f


def f_test_equal_variances(
    sample1=None,
    sample2=None,
    s1=None, n1=None,
    s2=None, n2=None,
    alternative="two-sided",   # "two-sided", "greater", "less"
    alpha=0.05
):
    """
    F-test for equality of variances.

    H0: sigma1^2 = sigma2^2

    Input options:
      (A) sample vectors: sample1, sample2
      (B) summary statistics: (s1, n1) and (s2, n2)

    alternative:
      "two-sided" : sigma1^2 != sigma2^2
      "greater"   : sigma1^2 >  sigma2^2
      "less"      : sigma1^2 <  sigma2^2

    Uses BOTH:
      (1) p-value method
      (2) critical region method
    """

    # ---------- Case A: vectors ----------
    if sample1 is not None and sample2 is not None:
        n1 = len(sample1)
        n2 = len(sample2)

        if n1 < 2 or n2 < 2:
            raise ValueError("Each sample must have at least 2 observations.")

        mean1 = sum(sample1) / n1
        mean2 = sum(sample2) / n2

        s1 = math.sqrt(sum((x - mean1) ** 2 for x in sample1) / (n1 - 1))
        s2 = math.sqrt(sum((x - mean2) ** 2 for x in sample2) / (n2 - 1))

    # ---------- Case B: summary statistics ----------
    elif s1 is not None and n1 is not None and s2 is not None and n2 is not None:
        if n1 < 2 or n2 < 2:
            raise ValueError("Each sample must have n >= 2.")
    else:
        raise ValueError(
            "Provide either (sample1, sample2) OR (s1, n1, s2, n2)."
        )

    # ---------- F statistic ----------
    F_obs = (s1 ** 2) / (s2 ** 2)

    df1 = n1 - 1
    df2 = n2 - 1

    # ---------- p-value method ----------
    if alternative == "two-sided":
        p_value = 2 * min(
            f.cdf(F_obs, df1, df2),
            1 - f.cdf(F_obs, df1, df2)
        )
    elif alternative == "greater":
        # H1: sigma1^2 > sigma2^2
        p_value = 1 - f.cdf(F_obs, df1, df2)
    elif alternative == "less":
        # H1: sigma1^2 < sigma2^2
        p_value = f.cdf(F_obs, df1, df2)
    else:
        raise ValueError("alternative must be 'two-sided', 'greater', or 'less'.")

    reject_by_pvalue = p_value < alpha

    # ---------- Critical region method ----------
    if alternative == "two-sided":
        F_lower = f.ppf(alpha / 2, df1, df2)
        F_upper = f.ppf(1 - alpha / 2, df1, df2)
        reject_by_critical = (F_obs < F_lower) or (F_obs > F_upper)
        critical_region = f"F < {F_lower:.4f} or F > {F_upper:.4f}"

    elif alternative == "greater":
        F_crit = f.ppf(1 - alpha, df1, df2)
        reject_by_critical = F_obs > F_crit
        critical_region = f"F > {F_crit:.4f}"

    else:  # "less"
        F_crit = f.ppf(alpha, df1, df2)
        reject_by_critical = F_obs < F_crit
        critical_region = f"F < {F_crit:.4f}"

    # ---------- Return results ----------
    return {
        "inputs": {
            "n1": n1, "s1": s1,
            "n2": n2, "s2": s2,
            "alternative": alternative,
            "alpha": alpha
        },
        "statistic": {
            "F_obs": F_obs,
            "df1": df1,
            "df2": df2
        },
        "p_value_method": {
            "p_value": p_value,
            "reject_H0": reject_by_pvalue
        },
        "critical_region_method": {
            "critical_region": critical_region,
            "reject_H0": reject_by_critical
        }
    }


# Summary statistics
s1 = 10   # sample standard deviation of sample 1
n1 = 28      # sample size of sample 1

s2 = 12   # sample standard deviation of sample 2
n2 = 25       # sample size of sample 2

# Run F-test for equality of variances
result = f_test_equal_variances(
    s1=s1, n1=n1,
    s2=s2, n2=n2,
    alternative="less",
    alpha=0.05
)

# Display results
result

{'inputs': {'n1': 28,
  's1': 10,
  'n2': 25,
  's2': 12,
  'alternative': 'less',
  'alpha': 0.05},
 'statistic': {'F_obs': 0.6944444444444444, 'df1': 27, 'df2': 24},
 'p_value_method': {'p_value': 0.17913186027272973, 'reject_H0': False},
 'critical_region_method': {'critical_region': 'F < 0.5182',
  'reject_H0': False}}


res = f_test_equal_variances(
    sample1=portland,
    sample2=sacramento,
    alternative="two-sided",
    alpha=0.05
)

res

{'inputs': {'n1': 16,
  's1': 179.39565212122616,
  'n2': 17,
  's2': 163.23622813723244,
  'alternative': 'two-sided',
  'alpha': 0.05},
 'statistic': {'F_obs': 1.207788038461697, 'df1': 15, 'df2': 16},
 'p_value_method': {'p_value': 0.7105517535985708, 'reject_H0': False},
 'critical_region_method': {'critical_region': 'F < 0.3526 or F > 2.7875',
  'reject_H0': False}}

Test	Variance assumption
Pooled $t$-test	$\sigma_X^2 = \sigma_Y^2$
Welch $t$-test	$\sigma_X^2 \neq \sigma_Y^2$


22	25	27	23	26	28	26	24	25	20
19	18	30	26	18	18	19	32	23	19
18	22	26	19	19	21	23	18	20	18
22	18	20	19	23	26	24	27	26	18
22	21	19	21	21	19	18	29	19	22


18	20	19	18	22	25	24	35	23	18
24	26	30	22	22	22	21	18	20	19
19	32	29	23	21	19	36	27	27	20
20	19	19	20	25	23	22	28	25	20
20	21	18	19	23	26	35	19	19	18


1510	1257	4125	4677	1510
3055	5244	1764	4125	6128
6128	3319	3319	6433	3319
5244	3055	1510


3585	1510	4399	5244	1764
1510	3853	4399	5244	1510
1510	5244	2533	4125	3585
2275	2533	2275	4399	3585
4125	5244


474	414	692	467	443	605
419	277	670	696	783	577
813	694	565	663	884


783	587	527	546	442	107
728	662	371	427	277	474
605	293	320	555

Worker	1	2	3	4	5	6	7	8
Before	6	8	10	9	5	12	9	7
After	10	12	9	12	8	13	8	10


474	414	692	467	443	605
419	277	670	696	783	577
813	694	565	663	884


22	25	27	23	26	28	26	24	25	20
19	18	30	26	18	18	19	32	23	19
18	22	26	19	19	21	23	18	20	18
22	18	20	19	23	26	24	27	26	18
22	21	19	21	21	19	18	29	19	22


18	20	19	18	22	25	24	35	23	18
24	26	30	22	22	22	21	18	20	19
19	32	29	23	21	19	36	27	27	20
20	19	19	20	25	23	22	28	25	20
20	21	18	19	23	26	35	19	19	18

Seminar 3

Two-Sample Test for Means

1. Problem Setting and Motivation¶

2. Statistical Model¶

3. Parameter of Interest¶

4. Hypotheses¶

5. Sampling Distribution of the Difference¶

Case I: Variances Known (Two-Sample Z-Test) </h1>

6.1 Assumptions¶

6.2 Test Statistic¶

6.3 Rejection Regions¶

7. Case II: Variances Unknown but Equal (Pooled t-Test)

7.1 Assumptions¶

7.2 Sample Variances¶

7.3 Pooled Variance Estimator¶

7.4 Test Statistic¶

7. Case III: Variances Unknown and Unequal (Welch t-Test)

8.1 Assumptions¶

8.2 Test Statistic¶

8.3 Degrees of Freedom (Welch–Satterthwaite)¶

9. p-value¶

10. Decision Rule¶

13. Confidence Intervals¶

Known Variances¶

Equal Unknown Variances¶

Unequal Variances (Welch)¶

14. Practical Data Science Remarks¶

15. Summary¶

Example: Case I: Variances Known (Two-Sample Z-Test)

Problem¶

Age Data¶

Full-Time Students¶

Part-Time Students¶

Extra: Using summary statistics (classic textbook case)¶

Example: Case II: Variances Unknown but Equal (Pooled t-Test)

Problem¶

Weekly Sales Data¶

Portland Store¶

Cannon Beach Store¶

Example: Case III: using Welch t-Test

Problem¶

Electricity Use Data (kWh)¶

Sacramento (n = 17)¶

Portland (n = 16)¶

Paired t-Test

1. Problem Setting and Motivation¶

2. When Is a Paired Test Appropriate?¶

3. Statistical Model¶

4. Parameter of Interest¶

5. Hypotheses¶

6. Sample Statistics¶

7. Assumptions¶

8. Test Statistic (t-Statistic)¶

\frac{\bar{D} - 0}{S_D / \sqrt{n}}¶

9. Rejection Regions¶

10. p-value¶

11. Decision Rule¶

14. Confidence Interval for the Mean Difference¶

15. Relation to Other Tests¶

16. Practical Data Science Remarks¶

17. Summary¶

Example:

Production Data¶

The F-Test for Equality of Variances

1. Motivation¶

2. Statistical Model and Assumptions¶

3. Sample Variances¶

4. The F Distribution (Key Theory)¶

4.1 Definition¶

4.2 Application to Variances¶

5. Hypotheses for the Two-Variance Test¶

5.1 Two-sided test (most common)¶

5.2 One-sided tests¶

6. Test Statistic¶

7. Distribution Under the Null Hypothesis¶

8. Decision Rules¶

8.1 Two-sided test¶

8.2 One-sided test¶

9. p-value¶

9.1 Right-tailed test¶


22	25	27	23	26	28	26	24	25	20
19	18	30	26	18	18	19	32	23	19
18	22	26	19	19	21	23	18	20	18
22	18	20	19	23	26	24	27	26	18
22	21	19	21	21	19	18	29	19	22


18	20	19	18	22	25	24	35	23	18
24	26	30	22	22	22	21	18	20	19
19	32	29	23	21	19	36	27	27	20
20	19	19	20	25	23	22	28	25	20
20	21	18	19	23	26	35	19	19	18