import numpy as np
from scipy.optimize import minimize
[docs]class model(object):
"""Define a heteroskedastic linear model and calculate likelihood, gradients, and maximum likelihood estimates of
parameters.
Parameters
----------
y : :class:`~numpy:numpy.array`
1D array of phenotype observations
X : :class:`~numpy:numpy.array`
Design matrix for the fixed mean effects.
V : :class:`~numpy:numpy.array`
Design matrix for the fixed variance effects.
Returns
-------
model : :class:`hetlm.model`
heteroskedastic linear model class with input data
"""
def __init__(self,y,X,V):
# Get sample size
self.n = X.shape[0]
# Check shape of arrays
if V.ndim == 2:
self.n_fixed_variance = V.shape[1]
elif V.ndim == 1:
self.n_fixed_variance = 1
V = V.reshape((self.n, 1))
else:
raise (ValueError('Incorrect dimension of Variance Covariate Array'))
if X.ndim == 2:
self.n_fixed_mean = X.shape[1]
elif X.ndim == 1:
self.n_fixed_mean = 1
X = X.reshape((self.n, 1))
else:
raise (ValueError('Incorrect dimension of Mean Covariate Array'))
# phenotype
self.y=y
# mean covariates
self.X=X
# variance covariates
self.V=V
# Compute likelihood of data given beta, alpha
[docs] def likelihood(self,beta,alpha,negative=False):
"""Compute the log of the likelihood, the likelihood at the maximum likelihood for the fixed mean effects
Parameters
----------
alpha : :class:`~numpy:numpy.array`
value of fixed mean effects to compute likelihood for
beta : :class:`~numpy:numpy.array`
value of fixed variance effects to compute likelihood for
negative : :class:`bool`
compute -2*L/n-log(2*pi), where L is the log-likelihood, the function that is minimized to find the MLE. Default is False.
Returns
-------
L : :class:`float`
log-likelihood of data given parameters.
"""
Vbeta = self.V.dot(beta)
resid = self.y - self.X.dot(alpha)
L = np.sum(Vbeta) + np.sum(np.square(resid) * np.exp(-Vbeta))
if not negative:
L = -0.5*(L+self.n*np.log(2*np.pi))
return L
# Compute MLE of alpha given beta
[docs] def alpha_mle(self,beta):
"""Compute the maximum likelihood estimate of the fixed mean effect parameters, given
particular fixed variance effect parameters and variance of random effects
Parameters
----------
beta : :class:`~numpy:numpy.array`
value of fixed variance effects
Returns
-------
alpha : :class:`~numpy:numpy.array`
maximum likelihood estimate of alpha given beta
"""
D_inv = np.exp(-self.V.dot(beta))
X_t_D_inv = np.transpose(self.X) * D_inv
alpha = np.linalg.solve(X_t_D_inv.dot(self.X), X_t_D_inv.dot(self.y))
return alpha
# Compute gradient with respect to beta for a given beta and alpha
[docs] def grad_beta(self,beta, alpha):
"""Compute the gradient with respect to the fixed variance effects of -2*L/n-log(2*pi),
where L is the log-likelihood, the function that is minimized to find the MLE
Parameters
----------
beta : :class:`~numpy:numpy.array`
value of fixed variance effects
alpha : :class:`~numpy:numpy.array`
value of fixed mean effects to gradient for
Returns
-------
grad_beta : :class:`~numpy:numpy.array`
"""
D_inv = np.exp(-self.V.dot(beta))
resid_2 = np.square(self.y - self.X.dot(alpha))
k = 1 - resid_2 * D_inv
V_scaled = np.transpose(np.transpose(self.V) * k)
n1t = np.ones((1, self.X.shape[0]))
return n1t.dot(V_scaled)
# OLS solution for alpha
[docs] def alpha_ols(self):
"""Compute the ordinary least squares (OLS) estimate of the fixed mean effect parameters
Returns
-------
alpha : :class:`~numpy:numpy.array`
ordinary least-squares estimate of alpha
"""
# Get initial guess for alpha
return np.linalg.solve(np.dot(self.X.T, self.X), np.dot(self.X.T, self.y))
# Find an approximation to the MLE of beta given alpha
[docs] def approx_beta_mle(self):
"""Analytical approximation to the maximum likelihood estimate of the fixed variance effects
Returns
-------
beta : :class:`~numpy:numpy.array`
approximate MLE of beta
"""
# Get alpha OLS
alpha=self.alpha_ols()
# squared residuals
resid_2=np.square(self.y-self.X.dot(alpha)).reshape((self.X.shape[0]))
# RHS
V_scaled=np.transpose(np.transpose(self.V)*(resid_2-1))
n1t=np.ones((1,self.X.shape[0]))
b=n1t.dot(V_scaled).reshape(self.V.shape[1])
# LHS
V_t_scaled=np.transpose(self.V)*resid_2
A=V_t_scaled.dot(self.V)
return np.linalg.solve(A,b)
def beta_cov(self):
# Find the covariance matrix of beta
return 2 * np.linalg.inv(np.dot(self.V.T, self.V))
def alpha_cov(self,beta):
# Find the covariance matrix for alpha given beta
D_inv=np.exp(-self.V.dot(beta))
precision=np.dot(np.transpose(self.X)*D_inv,self.X)
return np.linalg.inv(precision)
[docs] def optimize_model(self):
"""Find the maximum likelihood estimate (MLE) of the parameters and their sampling distribution.
Returns
-------
optim : :class:`dict`
keys: MLEs ('alpha', fixed mean effects; 'beta', fixed variance effects),
their standard errors ('alpha_se', 'beta_se'),
covariance matrix for sampling distribution of parameter vectors ('beta_cov' and 'alpha_cov'),
maximum likelihood ('likelihood'), whether optimisation was successful ('success'),
"""
# Get initial guess for beta
beta_init = self.approx_beta_mle()
# Optimize
optimized = minimize(likelihood_beta, beta_init,
args=(self.y, self.X, self.V),
method='L-BFGS-B',
jac=gradient_beta)
if not optimized.success:
print('Optimization unsuccessful.')
# Get MLE
beta_mle = optimized['x']
alpha = self.alpha_mle(beta_mle)
# Get parameter covariance
optim = {}
optim['beta'] = optimized['x']
optim['alpha'] = self.alpha_mle(optim['beta'])
optim['beta_cov'] = self.beta_cov()
optim['beta_se'] = np.sqrt(np.diag(optim['beta_cov']))
optim['alpha_cov'] = self.alpha_cov(beta_mle)
optim['alpha_se'] = np.sqrt(np.diag(optim['alpha_cov']))
optim['likelihood'] = -0.5 *self.n * (optimized['fun'] + np.log(2 * np.pi))
optim['success']=optimized.success
return optim
##### Functions to pass to opimizer ######
def likelihood_beta(beta,*args):
# Profile likelihood of hlm_model as a function of beta. To pass to L-BFGS-B
y,X,V=args
n=np.float(X.shape[0])
hlm_mod=model(y,X,V)
alpha=hlm_mod.alpha_mle(beta)
return hlm_mod.likelihood(beta,alpha,negative=True)/n
def gradient_beta(beta, *args):
# Gradient of profile likelihood with respect to beta (at the MLE of alpha). To pass to L-BFGS-B.
y,X,V=args
n = np.float(X.shape[0])
hlm_mod=model(y,X,V)
alpha = hlm_mod.alpha_mle(beta)
return hlm_mod.grad_beta(beta, alpha).reshape((hlm_mod.V.shape[1]))/n