# This is going to be an interactive lecture using Jupyter! 
# So let's import things!

import numpy as np 

from astropy.table import Table

import matplotlib.pyplot as plt

import emcee
import chainconsumer
from chainconsumer import ChainConsumer, Chain

# Load and plot Hubble original data

data = Table.read('../data/hubble.txt', format='ascii')
d, v = data['r'], data['v']
d_err = 0.1
plt.errorbar(d,v,xerr=d_err,fmt='o',color='k')
plt.xlabel('Distance (Mpc)')
plt.ylabel('RV (km/s)');

state = 5. # some value!

proposal = state + stepsize*np.random.randn()

likelihood_before = 10**loglike(state)
likelihood_after = 10**loglike(proposal)

if likelihood_after > likelihood before:
    state = proposal
else:
    # draw a random number in (0,1)
    prob = np.random.rand()
    if prob > likelihood_before/likelihood_after:
        state = proposal
    else:
        state = state

# first define priors

def lnprior(H0):
    # prior probability
    if 0 < H0 < 1000:
        return 0.0
    return -np.inf

def model(H0, v):
    # forward model
    return v/H0

def lnlike(H0, v, d, d_err):
    # log-likelihood function
    return -0.5 * np.sum((d-model(H0,v))**2 / d_err**2)

def lnprob(theta, v, d, d_err):
    # log probability = log prior + log likelihood
    H0 = theta
    lp = lnprior(H0)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(H0, v, d, d_err)

# sample with emcee
ndim, nwalkers = 1, 100
pos = 500 + np.random.randn(nwalkers, ndim)
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(v, d, d_err))
burnin = sampler.run_mcmc(pos, 500, progress=True)
sampler.reset()
sampler.run_mcmc(burnin, 1000, progress=True);

100%|████████████████████████████████████████| 500/500 [00:01<00:00, 279.06it/s]
100%|██████████████████████████████████████| 1000/1000 [00:03<00:00, 272.69it/s]

# plot history - should look like noise

samples = sampler.get_chain(flat=True)
chain = Chain.from_emcee(sampler, ['H0'], "an emcee chain", discard=200, thin=2, color="indigo")
consumer = ChainConsumer().add_chain(chain)

fig = consumer.plotter.plot_walks(plot_weights=False);

# plot posterior histogram - so far only one variable
fig = consumer.plotter.plot();

# plot posterior predictive model

inds = np.random.choice(np.arange(samples.shape[0]), 50)
ds = np.linspace(0, 2.3, 100)

for ind in inds:
    H0 = samples[ind,0]

    v_model = H0*ds
    plt.plot(ds, v_model, 'g-', alpha=0.05)

plt.errorbar(d, v,  xerr=d_err, fmt='.', capsize=2,color='k')
plt.xlim(0, ds.max())

plt.xlabel('Distance (Mpc)')
plt.ylabel('Velocity (km/s)');

# load Pantheon Plus dataset

ddir = '../data/'
fname = 'Pantheon.dat'

data = Table.read(ddir+fname, format='ascii')

# read in the Pantheon Plus data
z = data['zCMB']
z_err = data['zCMBERR']
mb = data['MU_SH0ES'] # distance modulus
mb_err = data['MU_SH0ES_ERR_DIAG']

cut = (z > 0.02) & (z < 0.06) & (z_err < 0.005)
z = z[cut]
z_err = z_err[cut]
mb = mb[cut]
mb_err = mb_err[cut]

# distance
d = 10**((mb-25)/5) # Mpc
d_err = d * np.log(10) * mb_err / 5

# plot Pantheon Plus data

plt.errorbar(z, d, yerr=d_err, xerr=z_err, fmt='.', capsize=2,color='k')
zs = np.linspace(0, 0.0602, 100)
# Hubble's law
# H0 = 75 km/s/Mpc
# z to velocity
c = 299792.458 # km/s
# Relativistic doppler shift
vs = c * (zs / (1 + zs))
plt.xlim(0.018,0.0602)
plt.xlabel('Redshift')
plt.ylabel('Distance (Mpc)');

def lnprior(H0):
    if 0 < H0 < 200:
        return 0.0
    return -np.inf

def model(H0,intercept,z):
    # now we are including an intercept, ie y = mx + b
    v = c * (z / (1 + z))
    return v/H0 + intercept

def lnlike(H0, intercept, z, d, d_err):
    return -0.5 * np.sum((d-model(H0,intercept,z))**2 / d_err**2)

def lnprob(theta, z, d, d_err):
    H0, intercept = theta
    lp = lnprior(H0)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(H0,intercept, z, d, d_err)

# sample with emcee
ndim, nwalkers = 2, 100 # notice we have 2 params 
pos = np.array([68,0]) + 1e-4 * np.random.randn(100, 2)
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(z, d, d_err))
burnin = sampler.run_mcmc(pos, 500, progress=True)
sampler.reset()
sampler.run_mcmc(burnin, 1000, progress=True);

100%|████████████████████████████████████████| 500/500 [00:02<00:00, 179.63it/s]
100%|██████████████████████████████████████| 1000/1000 [00:05<00:00, 180.42it/s]

# chainconsumer

samples = sampler.get_chain(flat=True)
chain = Chain.from_emcee(sampler, ['H0','intercept'], "an emcee chain", discard=200, thin=2, color="indigo")
consumer = ChainConsumer().add_chain(chain)

fig = consumer.plotter.plot_walks(plot_weights=False);

# this is a corner plot, illustrating covariance
fig = consumer.plotter.plot();

# plot posterior predictive model

# choose values

inds = np.random.choice(np.arange(samples.shape[0]), 50)
zs = np.linspace(0, 0.061, 100)

for ind in inds:
    H0 = samples[ind,0]
    intercept = samples[ind,1]

    d_model = model(H0, intercept, zs)
    plt.plot(zs, d_model, 'g-', alpha=0.05)

plt.errorbar(z, d, yerr=d_err, xerr=z_err, fmt='.', capsize=2,color='k')
plt.xlim(0.0,0.0602);

def lnprior(H0):
    if 0 < H0 < 200:
        return 0.0
    return -np.inf

def model(H0,quadratic,intercept,z):
    v = c * (z / (1 + z))
    return v/H0 + intercept + v**2 * quadratic

def lnlike(H0, quadratic, intercept, z, d, d_err):
    return -0.5 * np.sum((d-model(H0,quadratic,intercept,z))**2 / d_err**2)

def lnprob(theta, z, d, d_err):
    H0, quadratic, intercept = theta
    lp = lnprior(H0)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(H0,quadratic,intercept, z, d, d_err)

# sample with emcee
ndim, nwalkers = 3, 100
pos = np.array([68,0,0]) + 1e-4 * np.random.randn(100, 3)
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(z, d, d_err))
burnin = sampler.run_mcmc(pos, 500, progress=True)
sampler.reset()
sampler.run_mcmc(burnin, 1000, progress=True);

100%|████████████████████████████████████████| 500/500 [00:03<00:00, 143.26it/s]
100%|██████████████████████████████████████| 1000/1000 [00:07<00:00, 141.64it/s]

# chainconsumer

samples = sampler.get_chain(flat=True)
chain = Chain.from_emcee(sampler, ['H0','quadratic','intercept'], "an emcee chain", discard=200, thin=2, color="indigo")
consumer = ChainConsumer().add_chain(chain)
fig = consumer.plotter.plot_walks(plot_weights=False);

# we now have a higher dimensional corner plot
fig = consumer.plotter.plot(figsize=(7.0,7.0));

# plot posterior predictive model

inds = np.random.choice(np.arange(samples.shape[0]), 50)
zs = np.linspace(0, 0.061, 100)

for ind in inds:
    H0 = samples[ind,0]
    quadratic = samples[ind,1]
    intercept = samples[ind,2]

    d_model = model(H0, quadratic, intercept, zs)
    plt.plot(zs, d_model, 'g-', alpha=0.05)

plt.errorbar(z, d, yerr=d_err, xerr=z_err, fmt='.', capsize=2,color='k')
plt.xlim(0.0,0.0602);

plt.xlabel('Redshift')
plt.ylabel('Radial Velocity (km/s)');

def model(X,Y,E):  
    m = numpyro.sample("m", numpyro.distributions.Uniform(-5,5))   # prior on m  
    c = numpyro.sample("c", numpyro.distributions.Uniform(-10,10)) # Prior on c  
  
    with numpyro.plate('data', len(X)):  
        y_model = m*X + c  
        numpyro.sample('y', numpyro.distributions.Normal(y_model,E), obs = Y)

Quantifying Uncertainty with Markov Chain Monte Carlo Sampling¶

Hubble's Original Data¶

What do we mean by Monte Carlo methods?¶

Stan Ulam¶

Metropolis-Hastings Algorithm¶

Arianna Rosenbluth¶

Theory¶

Metropolis-Hastings Rules¶

MCMC Samplers¶

So let's do MCMC!¶

Now let's do the Pantheon dataset!¶

Higher dimensional models - covariance¶

MCMC Next Steps¶