Probability: 4.14   Python: putting it all together

Dr Chris Paton

4.14 Python: putting it all together

The Python ecosystem makes the maths above concrete. Three libraries cover almost every need.

`scipy.stats` for closed-form computations

import numpy as np
from scipy import stats

# Bayes' theorem for the medical screening example
prior, sens, fpr = 0.001, 0.99, 0.05
posterior = (sens * prior) / (sens * prior + fpr * (1 - prior))
print(f"P(disease | positive) = {posterior:.4f}")
# 0.0194

# Probability of >=6 Poisson(3) events
print(1 - stats.poisson.cdf(5, mu=3))
# 0.0839

# Hoeffding sample size for Bernoulli proportion to +/-0.01 with 95% confidence
eps, delta = 0.01, 0.05
n_required = int(np.ceil(np.log(2 / delta) / (2 * eps**2)))
print(f"Hoeffding sample size: {n_required}")
# 18445

Sampling and Monte Carlo

rng = np.random.default_rng(0)

# Inverse-CDF sample from Exponential(rate=2)
u = rng.uniform(size=10_000)
x = -np.log(1 - u) / 2
print(x.mean(), x.var())     # ~0.5, ~0.25

# Importance sampling: estimate E[X^2] for X ~ N(0,1) using N(0, sqrt(2)) proposal
n = 100_000
y = rng.normal(0, np.sqrt(2), size=n)
w = stats.norm.pdf(y, 0, 1) / stats.norm.pdf(y, 0, np.sqrt(2))
estimate = np.mean(w * y**2)
print(f"E[X^2] estimate: {estimate:.4f} (true 1.0)")

# Empirical CLT: averages of 30 Exponential(1) draws look Gaussian
samples = rng.exponential(scale=1.0, size=(20_000, 30)).mean(axis=1)
print(stats.shapiro(samples[:5000]))   # high p-value

Multivariate Gaussian

mu = np.array([0.0, 0.0])
Sigma = np.array([[1.0, 0.6], [0.6, 1.0]])
mv = stats.multivariate_normal(mu, Sigma)

# Conditional mean and variance of X1 given X2 = 1.5
S_aa, S_ab, S_bb = 1.0, 0.6, 1.0
mu_cond = 0.0 + S_ab / S_bb * (1.5 - 0.0)
sigma_cond_sq = S_aa - S_ab**2 / S_bb
print(f"X1 | X2=1.5 ~ N({mu_cond:.2f}, {sigma_cond_sq:.2f})")
# N(0.90, 0.64)

# Verify by simulation
data = mv.rvs(size=200_000, random_state=0)
mask = np.abs(data[:, 1] - 1.5) < 0.05
print(data[mask, 0].mean(), data[mask, 0].var())

Information theory

def kl(p, q):
    p, q = np.asarray(p), np.asarray(q)
    mask = p > 0
    return np.sum(p[mask] * np.log(p[mask] / q[mask]))

p = np.array([0.5, 0.3, 0.2])
q = np.array([0.4, 0.4, 0.2])
print(kl(p, q), kl(q, p))      # asymmetric
print(kl(p, p))                # 0

# Cross-entropy as classifier loss
import torch
import torch.nn.functional as F
logits = torch.tensor([[2.0, 1.0, 0.1]])    # one example, three classes
target = torch.tensor([0])
print(F.cross_entropy(logits, target))      # -log softmax_0

PyTorch distributions

import torch
from torch.distributions import Normal, kl_divergence

p = Normal(0.0, 1.0)
q = Normal(1.0, 2.0)
print(kl_divergence(p, q))     # closed form

# Reparameterisation trick (foundation of VAEs)
mu = torch.zeros(4, requires_grad=True)
log_sigma = torch.zeros(4, requires_grad=True)
eps = torch.randn(4)
z = mu + log_sigma.exp() * eps    # differentiable sample
loss = (z**2).sum()
loss.backward()

These snippets are meant to be run, modified, and pasted into a notebook. Probability becomes intuitive only by simulating it.