import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Latex
$\begin{eqnarray} K(x,x') = \lambda^{-1}*e^{\frac{-1}{2*lsep^2}.||x-x'||^2}\\ \end{eqnarray}$
# Define Kernel - Using Radial Basis Function
def Kernel(x,x_p, lam=0.1, lsep=0.1):
"""
lam: lamda prior on distribution of model, used to combine phi(x) linearly
lsep: length of separation between points
"""
return lam**(-1.) * np.exp((-1./(2*lsep**2))*(np.sum(x**2, 1).reshape(-1,1) \
+ np.sum(x_p**2, 1).reshape(1,-1) \
- 2*x.dot(x_p.T)))
$f = N(0, K(x_{train}, x_{train}))$
# - Generate Train data
n=100
rng=(-10,10)
x_train = np.linspace(rng[0],rng[1],n).reshape(-1,1) # x train points
K=Kernel(x_train, x_train, lsep=1, lam=1)
f = np.random.multivariate_normal(np.zeros(n), K) # generate a random function
plt.imshow(K)
plt.title('ColorMap of Covariance matrix K')
plt.colorbar(orientation='vertical')
plt.show()
plt.plot(x_train, f)
plt.title('saved an arbitrary f to see if we can re-learn it')
plt.show()
$y_{train} = N(f, \sigma_{noise}^2*I)$
sig_noise = 0.5
y_train = np.random.multivariate_normal(f, sig_noise*np.eye(n))
y_train = y_train.reshape(-1,1)
plt.scatter(x_train, y_train, c='g', label=r'$y_{train}$')
#plt.plot(x_train, f, c='black', label='f')
plt.title(r'Generated $y_{train}$ using prior f with $\sigma_{noise}$='+str(sig_noise))
plt.legend(loc='lower right')
plt.show()
We will learn in steps using M observations at a time. (Basically these observation will be sampled from our bag of training data we saved earlier)
$\begin{bmatrix} y_M\\ y_D \end{bmatrix}$$ $$ \sim\ N(0,\ \sigma_{noise}^2.I\ +\ \begin{bmatrix} K(X_{M},X_{M}) & K(X_{D},X_{M})^T\\ K(X_{D},X_{M}) & K(X_{D},X_{D}) \end{bmatrix})$
Now, the Expected value of $y_M=E(y_M)$ (the new M predictions) based on distribution shown above (from data D seen so far):
$\mu_M = K(X_{M},X_{D}) * K(X_{D},X_{D})^{-1} * y_D$
And the variance of these predictions:
$ \Sigma_M = \sigma_{noise}^2*I + K(X_{M},X_{M}) - K(X_{M},X_{D}) * K(X_{D},X_{D})^{-1} * K(X_{M},X_{D})^T$
Basically $y_M\ |\ X_{D}, y_{D} \sim N(\mu_{M}, \Sigma_M)$
We will implement following algorithm:
M = 5 # number of observations at a time
D = n # total number of observations
# Prepare the observations (randomly permutate train data)
ind_shuffled = np.random.permutation(x_train.shape[0])
x_obs, y_obs = x_train[ind_shuffled], y_train[ind_shuffled]
# Get initial observations
i, Mini=0, 5
x_D, y_D = x_obs[i:i+Mini], y_obs[i:i+Mini]
i += Mini
errorTrace_RMSE = []
# - loop through observations
while i+M <= D:
print 'for observations {0}:{1}'.format(i, i+M)
# - predict new y (get mean and variance of predictions)
# Add some new points to previously observed points.
# We will see how well we do on previously observed points
# and how much variance we have on not-yet-observed points.
x_M = x_obs[0:i+M]
K_DD = Kernel(x_D, x_D)
K_MD = Kernel(x_M, x_D)
K_MM = Kernel(x_M, x_M)
# - Mean
mu_M = (K_MD.dot(np.linalg.inv(K_DD))).dot(y_D)
# - Co-Variance
Sig_M = sig_noise*np.eye(i+M) + K_MM - (K_MD.dot(np.linalg.inv(K_DD))).dot(K_MD.T)
# - get variances (along diagonal of co-variance matrix)
sd = np.sqrt(np.diagonal(Sig_M)).reshape(-1,1)
point_of_maxVariance = np.argmax(sd, axis=0)
# - Actually observed y and find error between E(y_M) - y_Mobs
y_Mobs = y_obs[0:i+M]
err_RMSE = np.sqrt(np.mean((mu_M-y_Mobs)**2))
errorTrace_RMSE.append(err_RMSE)
# - plots
plt.plot(x_train, f, c='black', label='f: Truth')
plt.scatter(x_D, y_D, c='green', marker=(5, 2), label=r'$y_{M}$:noisy observations')
ind_sorted = np.argsort(x_M, axis=0).flatten()
plt.plot(x_M[ind_sorted], mu_M[ind_sorted], 'r--', label=r'$\mu_{M}$:predicted mean')
plt.scatter(x_M[i:i+M], mu_M[i:i+M], c='blue', marker='o', label='Expected values of new predictions')
UL = mu_M + sd
LL = mu_M - sd
plt.fill_between(x_M[ind_sorted].flatten(), UL[ind_sorted].flatten(), LL[ind_sorted].flatten(), facecolor='gray', alpha=0.5)
plt.vlines(x_M[point_of_maxVariance], plt.ylim()[0], plt.ylim()[1], colors='blue',linestyles='--')
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
plt.show()
# - Add newly observed y to observation so far
x_D, y_D = np.vstack((x_D, x_M[i:i+M])), np.vstack((y_D, y_Mobs[i:i+M]))
i += M
plt.plot(errorTrace_RMSE)
plt.title("Error trace")
plt.grid()
plt.show()