import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# -----------------------------
# 1. Generate example data
# -----------------------------

np.random.seed(42)

x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=float)

beta_true = np.array([0.5, 0.25])

X = np.column_stack([np.ones(len(x)), x])

lambda_true = np.exp(X @ beta_true)

y = np.random.poisson(lambda_true)

data = pd.DataFrame({
    "x": x,
    "y": y,
    "true_lambda": lambda_true
})

data


# -----------------------------
# 2. Newton method for Poisson regression
# -----------------------------

def poisson_log_likelihood(beta, X, y):
    eta = X @ beta
    lam = np.exp(eta)
    return np.sum(y * eta - lam)   # ignoring constant -log(y!)

def poisson_score(beta, X, y):
    eta = X @ beta
    lam = np.exp(eta)
    return X.T @ (y - lam)

def poisson_hessian(beta, X, y):
    eta = X @ beta
    lam = np.exp(eta)
    W = np.diag(lam)
    return -X.T @ W @ X

def fit_poisson_newton(X, y, max_iter=100, tol=1e-8):
    beta = np.zeros(X.shape[1])
    history = []

    for iteration in range(max_iter):
        loglik = poisson_log_likelihood(beta, X, y)
        score = poisson_score(beta, X, y)
        hessian = poisson_hessian(beta, X, y)

        # Newton update:
        # beta_new = beta - H^{-1} score
        step = np.linalg.solve(hessian, score)
        beta_new = beta - step

        history.append({
            "iteration": iteration,
            "log_likelihood": loglik,
            "beta_0": beta[0],
            "beta_1": beta[1],
            "step_norm": np.linalg.norm(step)
        })

        if np.linalg.norm(beta_new - beta) < tol:
            beta = beta_new
            break

        beta = beta_new

    return beta, pd.DataFrame(history)


# -----------------------------
# 3. Fit the model
# -----------------------------

beta_hat, history = fit_poisson_newton(X, y)

print("Estimated beta:")
print(beta_hat)

print("\nTrue beta:")
print(beta_true)

history

Estimated beta:
[0.52441211 0.22269885]

True beta:
[0.5  0.25]


# -----------------------------
# 4. Interpretation
# -----------------------------

beta_0_hat, beta_1_hat = beta_hat

multiplicative_effect = np.exp(beta_1_hat)

print(f"Estimated beta_0 = {beta_0_hat:.4f}")
print(f"Estimated beta_1 = {beta_1_hat:.4f}")

print(f"\nEach one-unit increase in x multiplies the expected count by:")
print(f"exp(beta_1) = {multiplicative_effect:.4f}")

print(f"\nThat is approximately a {(multiplicative_effect - 1) * 100:.2f}% increase.")

Estimated beta_0 = 0.5244
Estimated beta_1 = 0.2227

Each one-unit increase in x multiplies the expected count by:
exp(beta_1) = 1.2494

That is approximately a 24.94% increase.


# -----------------------------
# 5. Predicted values
# -----------------------------

lambda_hat = np.exp(X @ beta_hat)

data["predicted_lambda"] = lambda_hat

data


# -----------------------------
# 6. Final plot
# -----------------------------

x_grid = np.linspace(0, 11, 200)

X_grid = np.column_stack([np.ones(len(x_grid)), x_grid])

lambda_grid_hat = np.exp(X_grid @ beta_hat)
lambda_grid_true = np.exp(X_grid @ beta_true)

plt.figure(figsize=(9, 6))

plt.scatter(x, y, label="Observed counts")

plt.plot(x_grid, lambda_grid_hat, label="Fitted Poisson regression")

plt.plot(x_grid, lambda_grid_true, linestyle="--", label="True mean function")

plt.xlabel("x")
plt.ylabel("Expected count")
plt.title("Poisson Regression Fitted by Newton's Method")
plt.legend()
plt.grid(True)
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis, norm

np.random.seed(42)

# Generate normal data
x_normal = np.random.normal(size=1000)

# Generate skewed data
x_skewed = np.random.exponential(size=1000)

# Generate heavy-tailed data
x_heavy = np.random.standard_t(df=3, size=1000)

Component	Poisson Regression
Response distribution	Poisson
Mean	$\lambda_i$
Link function	$\log(\lambda_i)$
Linear predictor	$x_i^T\beta$

	x	y	true_lambda
0	1.0	4	2.117000
1	2.0	1	2.718282
2	3.0	3	3.490343
3	4.0	4	4.481689
4	5.0	5	5.754603
5	6.0	7	7.389056
6	7.0	9	9.487736
7	8.0	7	12.182494
8	9.0	13	15.642632
9	10.0	17	20.085537

	iteration	log_likelihood	beta_0	beta_1	step_norm
0	0	-10.000000	0.000000	0.000000	2.412129e+00
1	1	-347617.856995	-1.933333	1.442424	9.878277e-01
2	2	-127603.034150	-2.921160	1.441189	9.669958e-01
3	3	-46707.887194	-3.888150	1.437839	9.109014e-01
4	4	-16991.219226	-4.799007	1.428791	7.623428e-01
5	5	-6099.977203	-5.560967	1.404628	3.884367e-01
6	6	-2128.994247	-5.944324	1.342018	4.964289e-01
7	7	-692.458283	-5.471004	1.192320	1.881206e+00
8	8	-172.079028	-3.612416	0.901478	2.460713e+00
9	9	14.217286	-1.176526	0.552844	1.180433e+00
10	10	68.817154	-0.013981	0.348122	4.406504e-01
11	11	78.411193	0.415875	0.251186	1.051626e-01
12	12	78.946529	0.517624	0.224609	7.016584e-03
13	13	78.949068	0.524378	0.222709	3.519188e-05
14	14	78.949068	0.524412	0.222699	9.317839e-10

	x	y	true_lambda	predicted_lambda
0	1.0	4	2.117000	2.110893
1	2.0	1	2.718282	2.637443
2	3.0	3	3.490343	3.295338
3	4.0	4	4.481689	4.117341
4	5.0	5	5.754603	5.144388
5	6.0	7	7.389056	6.427626
6	7.0	9	9.487736	8.030960
7	8.0	7	12.182494	10.034237
8	9.0	13	15.642632	12.537219
9	10.0	17	20.085537	15.664557

Problem	Response Type
Spam detection	Binary
Number of website clicks	Counts
Disease occurrence	Binary
Number of accidents	Counts
Customer purchase	Binary

Model	Distribution
Linear Regression	Normal
Logistic Regression	Bernoulli
Poisson Regression	Poisson

Model	Distribution	Link
Linear Regression	Normal	Identity
Logistic Regression	Bernoulli	Logit
Poisson Regression	Poisson	Log

Problem	Effect
Long right tail	Positive skewness
Heavy tails	High kurtosis
Outliers	High kurtosis
Asymmetric distribution	Nonzero skewness

Seminar 13

Poisson Regression¶

1. Motivation¶

2. Poisson Distribution¶

3. The Poisson Regression Model¶

4. Why Use the Log Link?¶

5. Interpretation of the Coefficients¶

6. Likelihood Function¶

7. Log-Likelihood Function¶

8. Score Function¶

9. Hessian Matrix¶

10. Fisher Information¶

11. Newton-Raphson Method¶

Generalized Linear Models (GLMs)¶

1. Motivation¶

Problem 1: Predictions may be impossible¶

Problem 2: Variance is often not constant¶

Problem 3: Non-normal responses¶

2. The Idea of GLMs¶

3. Structure of a GLM¶

3.1 Random Component¶

3.2 Systematic Component¶

3.3 Link Function¶

4. Linear Regression as a GLM¶

8. Exponential Family¶

9. Canonical Link Functions¶

10. Summary¶

6.¶

7. Why Skewness and Kurtosis Matter¶

8. Connection with QQ-Plots¶

Skewness in QQ-plot¶

Heavy tails¶

9. Why Normality Matters¶

10. Important Practical Advice¶

11. Python Example¶

12. Plotting the Distributions¶

13. Computing Skewness and Kurtosis¶

14. QQ-Plots¶

15. Interpretation of the Plots¶

Normal data¶

Skewed data¶

Heavy-tailed data¶

16. Summary¶