Bayesian_for_LGM_PMIP.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 10 2019

@author: Martin Renoult
correspondence: martin.renoult@misu.su.se
"""


## Library

from pymc3 import *
import numpy as np
import matplotlib.pyplot as plt
import math
import scipy.stats as stat
from scipy.optimize import curve_fit
from adjustText import adjust_text

#------------------------------------------------------------------------------
### Lists to save the data while computing

list_predict_t = list()
list_predict_t_stats_66 = list()
list_predict_t_stats_90 = list()

lb_90 = list()
ub_90 = list()
lb_66 = list()
ub_66 = list()

#------------------------------------------------------------------------------
## Model data
# x = ECS
# y = Tropical temperature

# In the following order:
# PMIP2[MIROC, IPSL, CCSM, ECHAM, FGOALS, HadCM3, ECBILT]
# PMIP3[NCAR-CCSM4, IPSL-CM5A-LR, MIROC-ESM, MPI-ESM-P, CNRM-CM5, MRI-CGCM3, FGOALS-g2]
# PMIP4[MPI-ESM1.2-LR, MIROC-ES2L, INM-CM4-8, AWI-ESM-1.1-LR]

# Data

#x = [4,4.4,2.7,3.4,2.3,3.3,1.8,
#     3.2,4.13,4.67,3.45,3.25,2.6,3.37,
#     3.01, 2.66, 1.81, 3.61]
#
#y = [-2.74899797,-2.82983243,-2.11649457,-3.15521371,-2.35793821,-2.77644642,-1.33816631,
#     -2.5957031, -3.3755188, -2.5169983, -2.5567322, -1.6669922, -2.81839, -3.1465664,
#     -2.0607605,-2.2322693,-2.4320984,-1.7489014]

# Latest model versions approach

#x = [3.3,1.8,
#     3.2,4.13,3.25,2.6,3.37,
#     3.01, 2.66, 1.81, 3.61]
#
#y = [-2.77644642,-1.33816631,
#     -2.5957031, -3.3755188, -1.6669922, -2.81839, -3.1465664,
#     -2.0607605,-2.2322693,-2.4320984,-1.7489014]

# PMIP2 only
#x = [4,4.4,2.7,3.4,2.3,3.3,1.8]
#y = [-2.74899797,-2.82983243,-2.11649457,-3.15521371,-2.35793821,-2.77644642,-1.33816631]

# PMIP2 + 3
x = [4,4.4,2.7,3.4,2.3,3.3,1.8,
     3.2,4.13,4.67,3.45,3.25,2.6,3.37]

y = [-2.74899797,-2.82983243,-2.11649457,-3.15521371,-2.35793821,-2.77644642,-1.33816631,
     -2.5957031, -3.3755188, -2.5169983, -2.5567322, -1.6669922, -2.81839, -3.1465664]

# PMIP3 only
#x = [3.2,4.13,4.67,3.45,3.25,2.6,3.37]
#
#y = [-2.5957031, -3.3755188, -2.5169983, -2.5567322, -1.6669922, -2.81839, -3.1465664]

#------------------------------------------------------------------------------
## Uncomment to see the data distribution

#fig, ax = plt.subplots(figsize=(7, 7))
#
##plt.plot(x[0:7], y[0:7], '.', label='PMIP2', markersize=15, color='#0066ff',mec='#25097C')
##plt.plot(x[7:14], y[7:14], '.', label='PMIP3',markersize=15, color='#ff9933',mec='#9C590C')
##plt.plot(x[14:18], y[14:18], '.', label='PMIP4',markersize=15, color='#ff33cc',mec='#990073')
#
#plt.xlim(-1, 6)
#plt.ylim(-8, 1)
#plt.legend(loc='upper left', bbox_to_anchor=(0.2, 0.35), fancybox=True)
#plt.xlabel('Climate Sensitivity (K)', labelpad=-40, weight='bold')
#plt.ylabel('LGM tropical (20°S - 30°N) temperature change (°C)', position=(0,0.4), weight='bold')
#ax.spines['top'].set_visible(False)
#ax.spines['right'].set_visible(False)
#ax.spines['bottom'].set_position(('data', 0))
#ax.spines['left'].set_position(('data', 0))
#ax.spines['left'].set_linewidth(2)
#ax.spines['bottom'].set_linewidth(2)
#ax.tick_params('x', direction='in', pad=-20, width=2)
#ax.tick_params('y', width=2)
#plt.yticks(ticks=[-1, -2,-3,  -4,-5, -6, -7, -8], labels=['-1', '-2','-3', '-4','-5', '-6', '-7', '-8'],
#           weight='bold')
#plt.xticks(ticks=[1, 2, 3, 4, 5, 6], labels=['1', '2','3',  '4','5', '6'],
#           weight='bold')

#------------------------------------------------------------------------------
## MCMC model

with Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
    # Define priors
    sigma = HalfCauchy('Sigma', beta=5, testval=1.)
    intercept = Normal('Intercept', 0, sd=1)
    x_coeff = Normal('x', -1, sd=1)

    # Define likelihood
    likelihood = Normal('y', mu=intercept + x_coeff * x, 
                        sd=sigma, observed=y)
    
    # 4 chains in parallel
    # By default, the sampling method is NUTS
    # The following line will not work with PyMC3 older than 3.8. If you use an
    # older version, replace "cores=4" by "njobs=4"
    trace = sample(progressbar=False, draws=100000, cores=4)

# Extract the data of the trace
values_x = trace['x']
values_intercept = trace['Intercept']
values_sigma = trace['Sigma']

# Gelman-Rubin test for convergence of the model
# If BFMI = Gelman-Rubin, then you have convergence
# It compares the variance between the chains to the variance inside a chain 
# and both variances should be equal if all the chains (the model) converged

#bfmi = bfmi(trace)
#max_gr = max(np.max(gr_stats) for gr_stats in gelman_rubin(trace).values())
#
#(energyplot(trace, legend=True, figsize=(6, 4))
#   .set_title("BFMI = {}\nGelman-Rubin = {}".format(bfmi, max_gr)));

#------------------------------------------------------------------------------
## Predicted temperature calculation
## Create a predicted ensemble for 5-95% estimate

# Discrete sample of sensitivity
ran = np.linspace(0, 10, 500)

# Loop for predicted temperature based on trace and line above
for j in ran:
# multiply sigma by a factor 
    predicted_t = values_x * j + values_intercept + np.random.normal(loc=0, scale=values_sigma)
    
    # Calculate and save the 5-95% interval of the prediction
    stats_predict_t_90 = np.percentile(predicted_t, q=(5,95))
    # Calculate and save the 17-83% interval of the prediction
    stats_predict_t_66 = np.percentile(predicted_t, q=(17,83))

    # Save in a list the intervals for every sample of sensitivity "ran"
    list_predict_t_stats_66.append(stats_predict_t_66)
    list_predict_t_stats_90.append(stats_predict_t_90)

#------------------------------------------------------------------------------
## Bayesian framework

# prior on sensitivity
#prior_S = np.random.uniform(0, 10, size=400000)
#prior_S = stat.gamma.rvs(a=2, loc=0, scale=2, size=400000)

def truncated_cauchy_rvs(loc=0, scale=1, a=-1, b=1, size=None):
    """
    Generate random samples from a truncated Cauchy distribution.

    `loc` and `scale` are the location and scale parameters of the distribution.
    `a` and `b` define the interval [a, b] to which the distribution is to be
    limited.

    With the default values of the parameters, the samples are generated
    from the standard Cauchy distribution limited to the interval [-1, 1].
    """
    ua = np.arctan((a - loc)/scale)/np.pi + 0.5
    ub = np.arctan((b - loc)/scale)/np.pi + 0.5
    U = np.random.uniform(ua, ub, size=size)
    rvs =  loc + scale * np.tan(np.pi*(U - 0.5))
    return rvs

# Truncated-at-zero Cauchy distribution
prior_S = truncated_cauchy_rvs(loc=2.5, scale=3, a=1/math.inf, b=math.inf, size=400000)

# Compute 5-95% and 17-83% prior intervals
prior_stats_90 = np.percentile(prior_S, q=(5, 95))
prior_stats_66 = np.percentile(prior_S, q=(17, 83))

# Model to generate a single point based on the prior on S
def gen_mod(alpha, s, beta, error):
    return alpha * s + beta + np.random.normal(loc=0, scale=error)

# Likelihood estimate
def likelihood(sim, obs, std):
    return stat.norm.pdf(x=sim, loc=obs, scale=std)

# Generate temperatures
model_T = gen_mod(values_x, prior_S, values_intercept, values_sigma)

# "Real" observed data
# Tropical T
T = -2.2
stdT = 0.4
gauss_obs = np.random.normal(loc=T, scale=stdT, size=800000)

obs_stats_90 = np.percentile(gauss_obs, q=(5, 95))

# Create weights through importance sampling
weight = likelihood(model_T, T, stdT)
weight = weight/weight.sum()

# Bayesian updating of the prior with importance sampling weight
posterior = np.random.choice(prior_S, size=100000, p=weight)

post_median = np.median(posterior)

# Compute 5-95% and 17-83% posterior intervals 
post_stats_90 = np.percentile(posterior, q=(5, 95))
post_stats_66 = np.percentile(posterior, q=(17, 83))
#------------------------------------------------------------------------------
## Plot part

## 1st plot: Trace plot
plt.figure(figsize=(7, 7))
traceplot(trace)
plt.tight_layout()
##plt.savefig('Trace_PMIP.pdf')

                       #-------------------------------

## 2nd plot: BLR
# Plot the data
fig, ax = plt.subplots(figsize=(7, 7))

# Range of the plotted MCMC lines and plot of the lines
range_eval = np.linspace(-1, 8, 100)
plots.plot_posterior_predictive_glm(trace, samples=100, eval=range_eval, 
                                    label='Predictive regression lines', alpha=0.35)

# Plot the 5-95% interval
for h in list_predict_t_stats_90:
    lb_90.append(h[0])
    ub_90.append(h[1])

for g in list_predict_t_stats_66:
    lb_66.append(g[0])
    ub_66.append(g[1])
        
# Compute running mean to smooth the confidence interval
rand_rm = np.convolve(ran, np.ones((50,))/50, mode='valid')

low_rm_90 = np.convolve(lb_90, np.ones((50,))/50, mode='valid')
up_rm_90 = np.convolve(ub_90, np.ones((50,))/50, mode='valid')

low_rm_66 = np.convolve(lb_66, np.ones((50,))/50, mode='valid')
up_rm_66 = np.convolve(ub_66, np.ones((50,))/50, mode='valid')

plt.plot(rand_rm, low_rm_90, linestyle='-', color='red', label='5-95% interval', alpha=0.75, linewidth=2)
plt.plot(rand_rm, up_rm_90, linestyle='-', color='red', alpha=0.75, linewidth=2)
plt.plot(rand_rm, low_rm_66, linestyle='--', color='red', label='17-83% interval', alpha=0.75, linewidth=2)
plt.plot(rand_rm, up_rm_66, linestyle='--', color='red', alpha=0.75, linewidth=2)

ylim = plt.ylim(-5, 0.5)
xlim = plt.xlim(-1,8)

# Plot 2 std on the figure instead of one (esthetic change)
stdT = 0.7

# Line for observed value, 2 standard deviations
plt.axvline(x=0.2, ymin=1-(-ylim[1]/(ylim[0]-ylim[1]))-((T-stdT)/(ylim[0]-ylim[1])), ymax=1-(-ylim[1]/(ylim[0]-ylim[1]))-((T+stdT)/(ylim[0]-ylim[1])), 
            color='#009900', label='5-95% observed value', linewidth=2)
plt.axvline(x=0.2, ymin=1-(-ylim[1]/(ylim[0]-ylim[1]))-((T-stdT)/(ylim[0]-ylim[1])), ymax=1-(-ylim[1]/(ylim[0]-ylim[1]))-((T-stdT)/(ylim[0]-ylim[1])), 
            color='#009900', marker='v')
plt.axvline(x=0.2, ymin=1-(-ylim[1]/(ylim[0]-ylim[1]))-((T+stdT)/(ylim[0]-ylim[1])), ymax=1-(-ylim[1]/(ylim[0]-ylim[1]))-((T+stdT)/(ylim[0]-ylim[1])), 
            color='#009900', marker='^')

plt.axvline(x=0.2, ymin=1-(-ylim[1]/(ylim[0]-ylim[1]))-(T/(ylim[0]-ylim[1])), ymax=1-(-ylim[1]/(ylim[0]-ylim[1]))-(T/(ylim[0]-ylim[1])), 
            color='#009900', marker='.', ms=12)

plt.axhline(y=-0.15, xmin=(-1/(xlim[0]-xlim[1]))-(post_stats_90[0]/(xlim[0]-xlim[1])), 
            xmax=(-1/(xlim[0]-xlim[1]))-(post_stats_90[1]/(xlim[0]-xlim[1])), c='#9933ff', label='5-95% posterior', linewidth=2)
plt.axhline(y=-0.15, xmin=(-1/(xlim[0]-xlim[1]))-(post_stats_90[0]/(xlim[0]-xlim[1])), 
            xmax=(-1/(xlim[0]-xlim[1]))-(post_stats_90[0]/(xlim[0]-xlim[1])), marker='<', c='#9933ff')
plt.axhline(y=-0.15, xmin=(-1/(xlim[0]-xlim[1]))-(post_stats_90[1]/(xlim[0]-xlim[1])), 
            xmax=(-1/(xlim[0]-xlim[1]))-(post_stats_90[1]/(xlim[0]-xlim[1])), marker='>', c='#9933ff')

plt.axhline(y=-0.15, xmin=(-1/(xlim[0]-xlim[1]))-(post_median/(xlim[0]-xlim[1])),
            xmax=(-1/(xlim[0]-xlim[1]))-(post_median/(xlim[0]-xlim[1])), c='#9933ff', marker='.', ms=12)

# All PMIP models
#plt.plot(x[0:7], y[0:7], '.', label='PMIP2', markersize=17, color='#0066ff',mec='#25097C')
#plt.plot(x[7:14], y[7:14], '.', label='PMIP3',markersize=17, color='#ff9933',mec='#9C590C')
#plt.plot(x[14:18], y[14:18], '.', label='PMIP4',markersize=17, color='#ff33cc',mec='#990073')

# Current latest versions         
#plt.plot(x[0:2], y[0:2], '.', label='PMIP2', markersize=17, color='#0066ff',mec='#25097C')
#plt.plot(x[2:7], y[2:7], '.', label='PMIP3',markersize=17, color='#ff9933',mec='#9C590C')
#plt.plot(x[7:11], y[7:11], '.', label='PMIP4',markersize=17, color='#ff33cc',mec='#990073')

# Isolated PMIP3
plt.plot(x[0:7], y[0:7], '.', label='PMIP3',markersize=17, color='#ff9933',mec='#9C590C')

# Adjust text function. Computes location of model numbers based on all points (esthetic)
texts = [plt.text(x[i], y[i], '%s' %(i+6), ha='center', va='center', fontsize=15) for i in range(0, 4, 1)]
adjust_text(texts)
#texts2 = [plt.text(x[i], y[i], '%s' %(i+8), ha='center', va='center', fontsize=15) for i in range(4, 7)]
#adjust_text(texts2)
#texts3 = [plt.text(x[i], y[i], '%s' %(i+17), ha='center', va='center', fontsize=15) for i in range(7, 11)]
#adjust_text(texts3)
  
# Make it pretty
plt.legend(loc='upper left', bbox_to_anchor=(0.6, 0.85), fancybox=True, ncol=1, edgecolor='k')
plt.xlabel('Climate sensitivity (K)', labelpad=-40, fontsize=16)
plt.ylabel('LGM tropical (20° S - 30° N) \ntemperature anomaly (K)', position=(0,0.4), fontsize=16)
ax.spines['top'].set_alpha(0)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_position(('data', 0))
ax.spines['left'].set_position(('data', 0))
ax.spines['top'].set_position(('data', 0))
ax.spines['left'].set_linewidth(2)
ax.spines['bottom'].set_linewidth(2)
ax.tick_params('x', direction='in', pad=-20, width=2)
ax.tick_params('y', width=2)
plt.yticks(ticks=np.arange(-1, -6, -1), fontsize=14)
plt.xticks(ticks=np.arange(1,9,1), fontsize=14)
plt.tight_layout()
#plt.savefig('Bayes_PMIP.pdf', dpi=300)

                        #-------------------------------              

## 3rd plot: Posterior
fig, ax = plt.subplots(figsize=(7,7))

## Prior distribution line plot
scale = 2
cauchy_scale = 3
loc = 2.5
a = 0
b = 10
k = 2

x = np.linspace(0, 10, 1000)

# /!\ *1.3 account for the truncation at zero for the Cauchy distribution (esthetic approximation for the Figure)
cauchy = (1/((np.pi*cauchy_scale)*(1+((x-loc)/cauchy_scale)**2)))*1.3
uniform = np.linspace(1/(b-a), 1/(b-a), 1000)
gamma = (x**(k-1)*np.exp(-x/scale))/(scale**k)

plt.plot(x, cauchy, '-', color='darkorange', label='Prior',linewidth=4, linestyle='--')
#plt.plot(x, uniform, '-', color='darkorange', label='Prior',linewidth=4, linestyle='--')
#plt.plot(x, gamma, '-', color='darkorange', label='Prior',linewidth=4, linestyle='--')

# Fit function. Doesn't work well sometimes...
# Changing the value of alpha plot the true histogram behind
def fit_function(x, A, beta, B, mu, sigma):
    return (A * np.exp(-x/beta) + B * np.exp(-1.0 * (x - mu)**2 / (2 * sigma**2)))
     
n, bins, patches = plt.hist(posterior, density=True, bins=500, alpha=0)
xspace = np.linspace(0, 8, 1000000)
binscenters = np.array([0.5 * (bins[ijk] + bins[ijk+1]) for ijk in range(len(bins)-1)])
popt, pcov = curve_fit(fit_function, xdata=binscenters, ydata=n)
plt.plot(xspace, fit_function(xspace, *popt), color='darkorange', linewidth=4, label='Posterior')

ylim = plt.ylim(-0.02, 0.5)
xlim = plt.xlim(-1,8)

plt.xlabel('Climate sensitivity (K)', fontsize=16)
plt.ylabel('Probability', fontsize=16)

plt.axhline(y=0.008, xmin=(-1/(xlim[0]-xlim[1]))-(post_stats_90[0]/(xlim[0]-xlim[1])), 
            xmax=(-1/(xlim[0]-xlim[1]))-(post_stats_90[1]/(xlim[0]-xlim[1])), c='#9933ff', label='5-95% estimate', linewidth=2)
plt.axhline(y=0.008, xmin=(-1/(xlim[0]-xlim[1]))-(post_stats_90[0]/(xlim[0]-xlim[1])), 
            xmax=(-1/(xlim[0]-xlim[1]))-(post_stats_90[0]/(xlim[0]-xlim[1])), marker='<', c='#9933ff')
plt.axhline(y=0.008, xmin=(-1/(xlim[0]-xlim[1]))-(post_stats_90[1]/(xlim[0]-xlim[1])), 
            xmax=(-1/(xlim[0]-xlim[1]))-(post_stats_90[1]/(xlim[0]-xlim[1])), marker='>', c='#9933ff')

plt.axhline(y=0.008, xmin=(-1/(xlim[0]-xlim[1]))-(post_median/(xlim[0]-xlim[1])),
            xmax=(-1/(xlim[0]-xlim[1]))-(post_median/(xlim[0]-xlim[1])), c='#9933ff', marker='.', ms=12)

plt.axhline(y=0.02, xmin=(-1/(xlim[0]-xlim[1]))-(post_stats_66[0]/(xlim[0]-xlim[1])), 
            xmax=(-1/(xlim[0]-xlim[1]))-(post_stats_66[1]/(xlim[0]-xlim[1])), linestyle=':', c='#9933ff', label='17-83% estimate', linewidth=2)
plt.axhline(y=0.02, xmin=(-1/(xlim[0]-xlim[1]))-(post_stats_66[0]/(xlim[0]-xlim[1])), 
            xmax=(-1/(xlim[0]-xlim[1]))-(post_stats_66[0]/(xlim[0]-xlim[1])), marker='<', c='#9933ff')
plt.axhline(y=0.02, xmin=(-1/(xlim[0]-xlim[1]))-(post_stats_66[1]/(xlim[0]-xlim[1])), 
            xmax=(-1/(xlim[0]-xlim[1]))-(post_stats_66[1]/(xlim[0]-xlim[1])), marker='>', c='#9933ff')

plt.axhline(y=0.02, xmin=(-1/(xlim[0]-xlim[1]))-(post_median/(xlim[0]-xlim[1])),
            xmax=(-1/(xlim[0]-xlim[1]))-(post_median/(xlim[0]-xlim[1])), c='#9933ff', marker='.', ms=12)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_position(('data', 0))
ax.spines['left'].set_position(('data', 0))
ax.spines['left'].set_linewidth(2)
ax.spines['bottom'].set_linewidth(2)
ax.tick_params('x', width=2)
ax.tick_params('y', width=2)
plt.xticks(ticks=np.arange(1,9,1), fontsize=14)
plt.yticks([0.1, 0.2, 0.3, 0.4, 0.5], fontsize=14)
plt.legend(loc='upper right', edgecolor='k')
plt.tight_layout()
#plt.savefig('Posterior_PMIP.pdf', dpi=300)