1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41 from math import log, sqrt, exp
42
43 from numpy import array, asarray, float64, ones, zeros, int32,all,any, shape
44 import numpy as na
45 from corebio.moremath import *
46
47 import random
48
50 """The Dirichlet probability distribution. The Dirichlet is a continuous
51 multivariate probability distribution across non-negative unit length
52 vectors. In other words, the Dirichlet is a probability distribution of
53 probability distributions. It is conjugate to the multinomial
54 distribution and is widely used in Bayesian statistics.
55
56 The Dirichlet probability distribution of order K-1 is
57
58 p(theta_1,...,theta_K) d theta_1 ... d theta_K =
59 (1/Z) prod_i=1,K theta_i^{alpha_i - 1} delta(1 -sum_i=1,K theta_i)
60
61 The normalization factor Z can be expressed in terms of gamma functions:
62
63 Z = {prod_i=1,K Gamma(alpha_i)} / {Gamma( sum_i=1,K alpha_i)}
64
65 The K constants, alpha_1,...,alpha_K, must be positive. The K parameters,
66 theta_1,...,theta_K are nonnegative and sum to 1.
67
68 Status:
69 Alpha
70 """
71 __slots__ = 'alpha', '_total', '_mean',
72
73
74
75
77 """
78 Args:
79 - alpha -- The parameters of the Dirichlet prior distribution.
80 A vector of non-negative real numbers.
81 """
82
83
84 self.alpha = asarray(alpha, float64)
85
86 self._total = sum(alpha)
87 self._mean = None
88
89
91 """Return a randomly generated probability vector.
92
93 Random samples are generated by sampling K values from gamma
94 distributions with parameters a=\alpha_i, b=1, and renormalizing.
95
96 Ref:
97 A.M. Law, W.D. Kelton, Simulation Modeling and Analysis (1991).
98 Authors:
99 Gavin E. Crooks <gec@compbio.berkeley.edu> (2002)
100 """
101 alpha = self.alpha
102 K = len(alpha)
103 theta = zeros( (K,), float64)
104
105 for k in range(K):
106 theta[k] = random.gammavariate(alpha[k], 1.0)
107 theta /= sum(theta)
108
109 return theta
110
112 if self._mean ==None:
113 self._mean = self.alpha / self._total
114 return self._mean
115
117 alpha = self.alpha
118 A = sum(alpha)
119
120 K = len(alpha)
121 cv = zeros( (K,K), float64)
122
123 for i in range(K) :
124 cv[i,i] = alpha[i] * (1. - alpha[i]/A) / (A * (A+1.) )
125
126 for i in range(K) :
127 for j in range(i+1,K) :
128 v = - alpha[i] * alpha[j] / (A * A * (A+1.) )
129 cv[i,j] = v
130 cv[j,i] = v
131 return cv
132
134 x = asarray(x, float64)
135 if shape(x) != shape(self.alpha) :
136 raise ValueError("Argument must be same dimension as Dirichlet")
137 return sum( x * self.mean())
138
140 x = asarray(x, float64)
141 if shape(x) != shape(self.alpha) :
142 raise ValueError("Argument must be same dimension as Dirichlet")
143
144 cv = self.covariance()
145 var = na.dot(na.dot(na.transpose( x), cv), x)
146 return var
147
148
150 """Calculate the average entropy of probabilities sampled
151 from this Dirichlet distribution.
152
153 Returns:
154 The average entropy.
155
156 Ref:
157 Wolpert & Wolf, PRE 53:6841-6854 (1996) Theorem 7
158 (Warning: this paper contains typos.)
159 Status:
160 Alpha
161 Authors:
162 GEC 2005
163
164 """
165
166 alpha = self.alpha
167 A = float(sum(alpha))
168 ent = 0.0
169 for a in alpha:
170 if a>0 : ent += - 1.0 * a * digamma( 1.0+a)
171 ent /= A
172 ent += digamma(A+1.0)
173 return ent
174
175
176
178 """Calculate the variance of the Dirichlet entropy.
179
180 Ref:
181 Wolpert & Wolf, PRE 53:6841-6854 (1996) Theorem 8
182 (Warning: this paper contains typos.)
183 """
184 alpha = self.alpha
185 A = float(sum(alpha))
186 A2 = A * (A+1)
187 L = len(alpha)
188
189 dg1 = zeros( (L) , float64)
190 dg2 = zeros( (L) , float64)
191 tg2 = zeros( (L) , float64)
192
193 for i in range(L) :
194 dg1[i] = digamma(alpha[i] + 1.0)
195 dg2[i] = digamma(alpha[i] + 2.0)
196 tg2[i] = trigamma(alpha[i] + 2.0)
197
198 dg_Ap2 = digamma( A+2. )
199 tg_Ap2 = trigamma( A+2. )
200
201 mean = self.mean_entropy()
202 var = 0.0
203
204 for i in range(L) :
205 for j in range(L) :
206 if i != j :
207 var += (
208 ( dg1[i] - dg_Ap2 ) * (dg1[j] - dg_Ap2 ) - tg_Ap2
209 ) * (alpha[i] * alpha[j] ) / A2
210 else :
211 var += (
212 ( dg2[i] - dg_Ap2 ) **2 + ( tg2[i] - tg_Ap2 )
213 ) * ( alpha[i] * (alpha[i]+1.) ) / A2
214
215 var -= mean**2
216 return var
217
218
219
223
224
228
229
245
246
248 """The gamma probability distribution. (Not to be confused with the
249 gamma function.)
250
251
252 """
253 __slots__ = 'alpha', 'beta'
254
256 if alpha <=0.0 :
257 raise ValueError("alpha must be positive")
258 if beta <=0.0 :
259 raise ValueError("beta must be positive")
260 self.alpha = alpha
261 self.beta = beta
262
263
265 return cls( shape, 1./scale)
266 from_shape_scale = classmethod(from_shape_scale)
267
268
270 alpha = mean **2 / variance
271 beta = alpha/mean
272 return cls(alpha, beta)
273 from_mean_variance = classmethod(from_mean_variance)
274
276 return self.alpha / self.beta
277
279 return self.alpha / (self.beta**2)
280
281
283 return random.gammavariate(self.alpha, 1./self.beta)
284
286 if x==0.0 : return 0.0
287 a = self.alpha
288 b = self.beta
289 return (x**(a-1.)) * exp(- b*x )* (b**a) / gamma(a)
290
292 return 1.0-normalized_incomplete_gamma(self.alpha, self.beta*x)
293
295 def rootof(x) :
296 return self.cdf(exp(x)) - p
297
298 return exp(find_root( rootof, log(self.mean()) ) )
299
300
301
302 -def find_root(f, x, y=None, fprime=None, tolerance=1.48e-8, max_iterations=50):
303 """Return argument 'x' of the function f(x), such that f(x)=0 to
304 within the given tolerance.
305
306 f : The function to optimize, f(x)
307 x : The initial guess
308 y : An optional second guess that shoudl bracket the root.
309 fprime : The derivate of f'(x) (Optional)
310 tolerance : The error bounds
311 max_iterations : Maximum number of iterations
312
313 Raises:
314 ArithmeticError :
315 Failure to converge to the given tolerence
316
317 Notes:
318 Uses Newton-Raphson algorihtm if f'(x) is given, else uses bisect if
319 y is given and brackets the root, else uses secant.
320
321 Status : Beta (Not fully tested)
322 """
323
324
325 def secant (f, x, tolerance, max_iterations):
326 x0 = x
327 x1 = x0+ 1e-4
328 v0 = f(x0)
329 v1 = f(x1)
330 x2 = 0
331
332 for i in range(max_iterations):
333
334 x2 = x1 - v1*(x1-x0)/(v1-v0)
335 if abs(x2-x1) < tolerance : return x2
336 x0 = x1
337 v0 = v1
338 x1 = x2
339 v1 = f(x1)
340
341 raise ArithmeticError(
342 "Failed to converge after %d iterations, value is %f" \
343 % (max_iterations,x1) )
344
345
346 def bisect(f, a, b, tolerance, max_iterations) :
347 fa = f(a)
348 fb = f(b)
349
350 if fa==0: return a
351 if fb==0: return b
352
353 if fa*fb >0 :
354 raise ArithmeticError("Start points do not bracket root.")
355
356 for i in range(max_iterations):
357
358 delta = b-a
359 xm = a + 0.5*delta
360 fm = f(xm)
361 if delta < tolerance : return xm
362
363 if fm * fa >0 :
364 a = xm
365 fa = fm
366 else :
367 b = xm
368 fb = fm
369
370 raise ArithmeticError(
371 "Failed to converge after %d iterations, value is %f" \
372 % (max_iterations, x) )
373
374
375 def newton(f, x, fprime, tolerance, max_iterations) :
376 x0 = x
377 for i in range(max_iterations) :
378 x1 = x0 - f(x0)/fprime(x0)
379 if abs(x1-x0) < tolerance: return x1
380 x0 = x1
381
382 raise ArithmeticError(
383 "Failed to converge after %d iterations, value is %f" \
384 % (max_iterations,x1) )
385
386 if fprime is not None :
387 return newton(f, x, fprime, tolerance, max_iterations)
388 elif y is not None :
389 return bisect(f, x, y, tolerance, max_iterations)
390 else :
391 return secant(f, x, tolerance, max_iterations)
392