weblogolib.logomath

50 """The Dirichlet probability distribution. The Dirichlet is a continuous 51 multivariate probability distribution across non-negative unit length 52 vectors. In other words, the Dirichlet is a probability distribution of 53 probability distributions. It is conjugate to the multinomial 54 distribution and is widely used in Bayesian statistics. 55 56 The Dirichlet probability distribution of order K-1 is 57 58 p(theta_1,...,theta_K) d theta_1 ... d theta_K = 59 (1/Z) prod_i=1,K theta_i^{alpha_i - 1} delta(1 -sum_i=1,K theta_i) 60 61 The normalization factor Z can be expressed in terms of gamma functions: 62 63 Z = {prod_i=1,K Gamma(alpha_i)} / {Gamma( sum_i=1,K alpha_i)} 64 65 The K constants, alpha_1,...,alpha_K, must be positive. The K parameters, 66 theta_1,...,theta_K are nonnegative and sum to 1. 67 68 Status: 69 Alpha 70 """ 71 __slots__ = 'alpha', '_total', '_mean', 72 73 74 75

76 - def __init__(self, alpha) :

77 """ 78 Args: 79 - alpha -- The parameters of the Dirichlet prior distribution. 80 A vector of non-negative real numbers. 81 """ 82 # TODO: Check that alphas are positive 83 #TODO : what if alpha's not one dimensional? 84 self.alpha = asarray(alpha, float64) 85 86 self._total = sum(alpha) 87 self._mean = None

88 89

90 - def sample(self) :

91 """Return a randomly generated probability vector. 92 93 Random samples are generated by sampling K values from gamma 94 distributions with parameters a=\alpha_i, b=1, and renormalizing. 95 96 Ref: 97 A.M. Law, W.D. Kelton, Simulation Modeling and Analysis (1991). 98 Authors: 99 Gavin E. Crooks <gec@compbio.berkeley.edu> (2002) 100 """ 101 alpha = self.alpha 102 K = len(alpha) 103 theta = zeros( (K,), float64) 104 105 for k in range(K): 106 theta[k] = random.gammavariate(alpha[k], 1.0) 107 theta /= sum(theta) 108 109 return theta

110

111 - def mean(self) :

112 if self._mean ==None: 113 self._mean = self.alpha / self._total 114 return self._mean

115

116 - def covariance(self) :

117 alpha = self.alpha 118 A = sum(alpha) 119 #A2 = A * A 120 K = len(alpha) 121 cv = zeros( (K,K), float64) 122 123 for i in range(K) : 124 cv[i,i] = alpha[i] * (1. - alpha[i]/A) / (A * (A+1.) ) 125 126 for i in range(K) : 127 for j in range(i+1,K) : 128 v = - alpha[i] * alpha[j] / (A * A * (A+1.) ) 129 cv[i,j] = v 130 cv[j,i] = v 131 return cv

132

133 - def mean_x(self, x) :

134 x = asarray(x, float64) 135 if shape(x) != shape(self.alpha) : 136 raise ValueError("Argument must be same dimension as Dirichlet") 137 return sum( x * self.mean())

138

139 - def variance_x(self, x) :

140 x = asarray(x, float64) 141 if shape(x) != shape(self.alpha) : 142 raise ValueError("Argument must be same dimension as Dirichlet") 143 144 cv = self.covariance() 145 var = na.dot(na.dot(na.transpose( x), cv), x) 146 return var

147 148

149 - def mean_entropy(self) :

150 """Calculate the average entropy of probabilities sampled 151 from this Dirichlet distribution. 152 153 Returns: 154 The average entropy. 155 156 Ref: 157 Wolpert & Wolf, PRE 53:6841-6854 (1996) Theorem 7 158 (Warning: this paper contains typos.) 159 Status: 160 Alpha 161 Authors: 162 GEC 2005 163 164 """ 165 # TODO: Optimize 166 alpha = self.alpha 167 A = float(sum(alpha)) 168 ent = 0.0 169 for a in alpha: 170 if a>0 : ent += - 1.0 * a * digamma( 1.0+a) # FIXME: Check 171 ent /= A 172 ent += digamma(A+1.0) 173 return ent

174 175 176

177 - def variance_entropy(self):

178 """Calculate the variance of the Dirichlet entropy. 179 180 Ref: 181 Wolpert & Wolf, PRE 53:6841-6854 (1996) Theorem 8 182 (Warning: this paper contains typos.) 183 """ 184 alpha = self.alpha 185 A = float(sum(alpha)) 186 A2 = A * (A+1) 187 L = len(alpha) 188 189 dg1 = zeros( (L) , float64) 190 dg2 = zeros( (L) , float64) 191 tg2 = zeros( (L) , float64) 192 193 for i in range(L) : 194 dg1[i] = digamma(alpha[i] + 1.0) 195 dg2[i] = digamma(alpha[i] + 2.0) 196 tg2[i] = trigamma(alpha[i] + 2.0) 197 198 dg_Ap2 = digamma( A+2. ) 199 tg_Ap2 = trigamma( A+2. ) 200 201 mean = self.mean_entropy() 202 var = 0.0 203 204 for i in range(L) : 205 for j in range(L) : 206 if i != j : 207 var += ( 208 ( dg1[i] - dg_Ap2 ) * (dg1[j] - dg_Ap2 ) - tg_Ap2 209 ) * (alpha[i] * alpha[j] ) / A2 210 else : 211 var += ( 212 ( dg2[i] - dg_Ap2 ) **2 + ( tg2[i] - tg_Ap2 ) 213 ) * ( alpha[i] * (alpha[i]+1.) ) / A2 214 215 var -= mean**2 216 return var

217 218 219

220 - def mean_relative_entropy(self, pvec) :

221 ln_p = na.log(pvec) 222 return - self.mean_x(ln_p) - self.mean_entropy()

223 224

225 - def variance_relative_entropy(self, pvec) :

226 ln_p = na.log(pvec) 227 return self.variance_x(ln_p) + self.variance_entropy()

228 229

230 - def interval_relative_entropy(self, pvec, frac) :

231 mean = self.mean_relative_entropy(pvec) 232 variance = self.variance_relative_entropy(pvec) 233 sd = sqrt(variance) 234 235 # If the variance is small, use the standard 95% 236 # confidence interval: mean +/- 1.96 * sd 237 if mean/sd >3.0 : 238 return max(0.0, mean - sd*1.96), mean + sd*1.96 239 240 g = Gamma.from_mean_variance(mean, variance) 241 low_limit = g.inverse_cdf( (1.-frac)/2.) 242 high_limit = g.inverse_cdf( 1. - (1.-frac)/2. ) 243 244 return low_limit, high_limit

248 """The gamma probability distribution. (Not to be confused with the 249 gamma function.) 250 251 252 """ 253 __slots__ = 'alpha', 'beta' 254

255 - def __init__(self, alpha, beta) :

256 if alpha <=0.0 : 257 raise ValueError("alpha must be positive") 258 if beta <=0.0 : 259 raise ValueError("beta must be positive") 260 self.alpha = alpha 261 self.beta = beta

262 263

264 - def from_shape_scale(cls, shape, scale) :

265 return cls( shape, 1./scale)

266 from_shape_scale = classmethod(from_shape_scale) 267 268

269 - def from_mean_variance(cls, mean, variance) :

270 alpha = mean **2 / variance 271 beta = alpha/mean 272 return cls(alpha, beta)

273 from_mean_variance = classmethod(from_mean_variance) 274

275 - def mean(self) :

276 return self.alpha / self.beta

277

278 - def variance(self) :

279 return self.alpha / (self.beta**2)

280 281

282 - def sample(self) :

283 return random.gammavariate(self.alpha, 1./self.beta)

284

285 - def pdf(self, x) :

286 if x==0.0 : return 0.0 287 a = self.alpha 288 b = self.beta 289 return (x**(a-1.)) * exp(- b*x )* (b**a) / gamma(a)

290

291 - def cdf(self, x) :

292 return 1.0-normalized_incomplete_gamma(self.alpha, self.beta*x)

293

294 - def inverse_cdf(self, p) :

295 def rootof(x) : 296 return self.cdf(exp(x)) - p

297 298 return exp(find_root( rootof, log(self.mean()) ) )

303 """Return argument 'x' of the function f(x), such that f(x)=0 to 304 within the given tolerance. 305 306 f : The function to optimize, f(x) 307 x : The initial guess 308 y : An optional second guess that shoudl bracket the root. 309 fprime : The derivate of f'(x) (Optional) 310 tolerance : The error bounds 311 max_iterations : Maximum number of iterations 312 313 Raises: 314 ArithmeticError : 315 Failure to converge to the given tolerence 316 317 Notes: 318 Uses Newton-Raphson algorihtm if f'(x) is given, else uses bisect if 319 y is given and brackets the root, else uses secant. 320 321 Status : Beta (Not fully tested) 322 """ 323 324 325 def secant (f, x, tolerance, max_iterations): 326 x0 = x 327 x1 = x0+ 1e-4 328 v0 = f(x0) 329 v1 = f(x1) 330 x2 = 0 331 332 for i in range(max_iterations): 333 # print x0, x1, v0, v1, x2-x0 334 x2 = x1 - v1*(x1-x0)/(v1-v0) 335 if abs(x2-x1) < tolerance : return x2 336 x0 = x1 337 v0 = v1 338 x1 = x2 339 v1 = f(x1) 340 341 raise ArithmeticError( 342 "Failed to converge after %d iterations, value is %f" \ 343 % (max_iterations,x1) )

Source Code for Module weblogolib.logomath