package edu.illinois.dais.ttr;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
/**
* The Statistics
class provides static utility methods
* for statistical computations.
*
* @author Bob Carpenter
* @since LingPipe2.0
*/
public class Statistics {
// don't allow instances
private Statistics() {
/* do nothing */
}
/**
* Returns the Kullback-Leibler divergence of the second specified
* Dirichlet distribution relative to the first. The Dirichlet
* distributions are specified by their distribution and concentration,
* which are folded into a single count argument.
*
*
The KL divergence between two Dirichlet distributions with
* parameters xs
and ys
of dimensionality
* n
is:
*
*
* * where* DKL(Dirichlet(xs) || Dirichlet(ys)) * = ∫ p(θ|xs) log ( p(θ|xs) / p(θ|ys) ) dθ * = log Γ(Σi < n xs[i]) * - log Γ(Σi < n ys[i]) * - Σi < n log Γ(xs[i]) * + Σi < n log Γ(ys[i]) * + Σi < n (xs[i] - ys[i]) * (Ψ(xs[i]) - Ψ(Σk < n xs[i]))
Γ
is the gamma function (see {@link
* com.aliasi.util.Mathematics#log2Gamma(double)}), and where
* Ψ
is the digamma function defined in {@link
* com.aliasi.util.Mathematics#digamma(double)}.
*
* This method in keeping with other information-theoretic
* methods returns the answer in bits (base 2) rather than nats (base e
).
* The return is rescaled from the natural-log based divergence:
*
*
* ** klDivergenceDirichlet(xs,ys) * = DKL(Dirichlet(xs) || Dirichlet(ys)) / log2 e
Further descriptions of the computation of KL-divergence between * Dirichlets may be found in: * *
The symmetrized KL divergence for Dirichlets is just the * average of both relative divergences: * *
* ** DSKL(Dirichlet(xs), Dirichlet(ys)) * = (DKL(Dirichlet(xs) || Dirichlet(ys)) + DKL(Dirichlet(ys) || Dirichlet(xs))) / 2 *
See the method documentation for {@link * #klDivergenceDirichlet(double[],double[])} for a definition of * the relative divergence for Dirichlet distributions. * * @param xs Dirichlet parameter for the first distribution. * @param ys Dirichlet parameter for the second distribution. * @return The symmetrized KL-divergence of the distributions. */ public static double symmetrizedKlDivergenceDirichlet(double[] xs, double[] ys) { return (klDivergenceDirichlet(xs,ys) + klDivergenceDirichlet(ys,xs)) / 2.0; } static double logGamma(double x) { return Mathematics.log2Gamma(x) / Mathematics.log2(java.lang.Math.E); } public static double sum(double[] xs) { double sum = 0.0; for (int i = 0; i < xs.length; ++i) sum += xs[i]; return sum; } /** * Returns the Kullback-Leibler divergence of the second * specified multinomial relative to the first. * *
The K-L divergence of a multinomial q
relative
* to a multinomial p
, both with n
* outcomes, is:
*
*
* * The value is guaranteed to be non-negative, and will be 0.0 * only if the two distributions are identicial. If any outcome * has zero probability in* DKL(p||q) * = Σi < n p(i) log2 (p(i) / q(i))
q
and non-zero probability
* in p
, the result is infinite.
*
* KL divergence is not symmetric. That is, there are
* p
and q
such that
* DKL(p||q) !=
* DKL(q||p)
. See {@link
* #symmetrizedKlDivergence(double[],double[])} and {@link
* #jsDivergence(double[],double[])} for symmetric variants.
*
*
KL divergence is equivalent to conditional entropy, although
* it is written in the opposite order. If H(p,q)
is
* the joint entropy of the distributions p
and
* q
, and H(p)
is the entropy of
* p
, then:
*
*
* * @param p First multinomial distribution. * @param q Second multinomial distribution. * @throws IllegalArgumentException If the distributions are not * the same length or have entries less than zero or greater than * 1. */ public static double klDivergence(double[] p, double[] q) { verifyDivergenceArgs(p,q); double divergence = 0.0; int len = p.length; for (int i = 0; i < len; ++i) { if (p[i] > 0.0 && p[i] != q[i]) divergence += p[i] * Mathematics.log2(p[i] / q[i]); } return divergence; } static void verifyDivergenceArgs(double[] p, double[] q) { if (p.length != q.length) { String msg = "Input distributions must have same length." + " Found p.length=" + p.length + " q.length=" + q.length; throw new IllegalArgumentException(msg); } int len = p.length; for (int i = 0; i < len; ++i) { if (p[i] < 0.0 || p[i] > 1.0 || Double.isNaN(p[i]) || Double.isInfinite(p[i])) { String msg = "p[i] must be between 0.0 and 1.0 inclusive." + " found p[" + i + "]=" + p[i]; throw new IllegalArgumentException(msg); } if (q[i] < 0.0 || q[i] > 1.0 || Double.isNaN(q[i]) || Double.isInfinite(q[i])) { String msg = "q[i] must be between 0.0 and 1.0 inclusive." + " found q[" + i + "] =" + q[i]; throw new IllegalArgumentException(msg); } } } /** * Returns the symmetrized KL-divergence between the specified distributions. * The symmetrization is carried out by averaging their relative * divergences: * ** DKL(p||q) = H(p,q) - H(p)
* * @param p First multinomial. * @param q Second multinomial. * @return The Symmetrized KL divergence between the multinomials. * @throws IllegalArgumentException If the distributions are not * the same length or have entries less than zero or greater than * 1. */ public static double symmetrizedKlDivergence(double[] p, double[] q) { verifyDivergenceArgs(p,q); return (klDivergence(p,q) + klDivergence(q,p)) / 2.0; } /** * Return the Jenson-Shannon divergence between the specified * multinomial distributions. The JS divergence is defined by * ** DSKL(p,q) * = ( DKL(p,q) + DKL(q,p) ) / 2 *
* * where* DJS(p,q) * = ( DKL(p,m) + DKL(q,m) ) / 2
m
is defined as the balanced linear
* interpolation (that is, the average) of p
and
* q
:
*
* * * The JS divergence is non-zero, equal to zero only if* m[i] = (p[i] + q[i]) / 2
p
* and q
are the same distribution, and symmetric.
*
* @param p First multinomial.
* @param q Second multinomial.
* @return The JS divergence between the multinomials.
* @throws IllegalArgumentException If the distributions are not
* the same length or have entries less than zero or greater than
* 1.
*/
public static double jsDivergence(double[] p, double[] q) {
verifyDivergenceArgs(p,q);
double[] m = new double[p.length];
for (int i = 0; i < p.length; ++i)
m[i] = (p[i] + q[i])/2.0;
return (klDivergence(p,m) + klDivergence(q,m)) / 2.0;
}
/**
* Returns a permutation of the integers between 0 and
* the specified length minus one.
*
* @param length Size of permutation to return.
* @return Permutation of the specified length.
*/
public static int[] permutation(int length) {
int[] xs = new int[length];
for (int i = 0; i < xs.length; ++i)
xs[i] = i;
Random random = new Random();
for (int i = xs.length; --i > 0; ) {
int pos = random.nextInt(i);
int temp = xs[pos];
xs[pos] = xs[i];
xs[i] = temp;
}
return xs;
}
/**
* Returns Pearson's C2 goodness-of-fit
* statistic for independence over the specified binary
* contingency matrix. Asymptotically, this statistic has a
* χ2 distribution with one degree of freedom. The
* higher the value, the less likely the two outcomes are
* independent. Note that this method is just a special case of
* the general chi-squared independence test:
*
* * chiSquaredIndependence(both,oneOnly,twoOnly,neither) * = chiSquaredIndependence(new double[][] { {both, oneOnly}, * {twoOnly, neither} }); ** * The specified values make up the following contingency matrix * for boolean outcomes labeled
One
and
* Two
:
*
* ** **
** +Two -Two * +One both
oneOnly
* -One twoOnly
neither
The value both
is the count of events where both
* outcomes occurred, oneOnly
for where only the
* first outcome occurred, twoOnly
for where only
* the second outcome occurred, and neither
for
* when neither occurred. Let totalCount
be
* the sum of all of the cells in the matrix.
*
*
From the contingency matrix, marginal probabilities for * the two outcomes may be computed: * *
* P(One) = (both + oneOnly) / totalCount
*
* P(Two) = (both + twoOnly) / totalCount
*
*
* If these probabilities are independent, the expected values of
* the matrix cells are:
*
*
* E(both)= totalCount * P(One) * P(Two)
*
* E(oneOnly) = totalCount * P(One) * (1-P(Two))
*
* E(twoOnly) = totalCount * (1-P(One)) * P(Two)
*
* E(neither) = totalCount * (1-P(One)) * (1-P(Two))
*
*
* These are used to derive the independence test statistic, which
* is the square differences between observed and expected values
* under the independence assumption, normalized by the expected
* values:
*
*
* C2
* = (both - E(both))2 / E(both)
*
* + (oneOnly - E(oneOnly))2 / E(oneOnly)
*
* + (twoOnly - E(twoOnly))2 / E(twoOnly)
*
* + (neither - E(neither))2 / E(neither)
*
*
* Unlike the higher dimensional case, this statistic applies as a
* hypothesis test only in the case when all expected values are
* at least 10.
* @param both Count of samples of both outcomes.
* @param oneOnly Count of samples with the first and not the
* second outcome.
* @param twoOnly Count of samples with the second and not the
* first outcome.
* @param neither Count of samples with with neither outcome.
* @throws IllegalArgumentException If any of the arguments are not
* non-negative finite numbers.
* @return Pearson's C2 goodness-of-fit
* statistic for independence over the specified sample counts.
*/
public static double chiSquaredIndependence(double both, double oneOnly,
double twoOnly,
double neither) {
assertNonNegative("both",both);
assertNonNegative("oneOnly",oneOnly);
assertNonNegative("twoOnly",twoOnly);
assertNonNegative("neither",neither);
double n = both + oneOnly + twoOnly + neither;
double p1 = (both + oneOnly) / n;
double p2 = (both + twoOnly) / n;
double eBoth = n * p1 * p2;
double eOneOnly = n * p1 * (1.0 - p2);
double eTwoOnly = n * (1.0 - p1) * p2;
double eNeither = n * (1.0 - p1) * (1.0 - p2);
return csTerm(both,eBoth)
+ csTerm(oneOnly,eOneOnly)
+ csTerm(twoOnly,eTwoOnly)
+ csTerm(neither,eNeither);
}
/**
* Returns a two-element array of lineary regression coefficients
* for the specified x and y values. The coefficients returned,
* { β0, β1 }
, define a linear function:
*
*
* f(x) = β1 * x + β0
*
*
* The coefficients returned produce the linear function f(x)
* with the smallest squared error:
*
*
* sqErr(f,xs,ys) =
* Σi
* (f(xs[i]) - ys[i])2
*
*
* where all sums are for 0 << i < xs.length
.
*
* The funciton requires only a single pass through the two
* arrays, with β0
and β1
* given by:
*
* * ** β1 = n * Σi x[i] * y[i] - (Σi x[i])(Σi y[i]) * ------------------------------------------ * n * Σi x[i]*x[i] - (Σi x[i])2 *
* * where* β0 = Σi y[i] - β1 Σi x[i] * --------------------- * n *
n = xs.length = ys.length
.
*
* @param xs Array of x values.
* @param ys Parallel array of y values.
* @return Pair of regression coefficients.
* @throws IllegalArgumentException If the arrays are of length
* less than 2, or if the arrays are not of the same length.
*/
public static double[] linearRegression(double[] xs, double[] ys) {
if (xs.length != ys.length) {
String msg = "Require parallel arrays of x and y values."
+ " Found xs.length=" + xs.length
+ " ys.length=" + ys.length;
throw new IllegalArgumentException(msg);
}
if (xs.length < 2) {
String msg = "Require arrays of length >= 2."
+ " Found xs.length=" + xs.length;
throw new IllegalArgumentException(msg);
}
double n = xs.length;
double xSum = 0.0;
double ySum = 0.0;
double xySum = 0.0;
double xxSum = 0.0;
for (int i = 0; i < xs.length; ++i) {
double x = xs[i];
double y = ys[i];
xSum += x;
ySum += y;
xxSum += x * x;
xySum += x * y;
}
double denominator = n * xxSum - xSum * xSum;
if (denominator == 0.0) {
String msg = "Ill formed input. Denominator for beta1 is zero."
+ " Most likely cause is fewer than 2 distinct inputs.";
throw new IllegalArgumentException(msg);
}
double beta1 = (n * xySum - xSum * ySum) / denominator;
double beta0 = (ySum - beta1 * xSum) / n;
return new double[] { beta0, beta1 };
}
/**
* Returns a two-element array of logistic regression coefficients
* for the specified x and y values. The coefficients returned,
* { β0, β1 }
, define the logistic function:
*
* * * with the minimum squared error. See {@link * #linearRegression(double[],double[])} for a definition of * squared error. This function takes real values in the the open * interval* L * f(x) = --------------- * 1 + e β1 * x + β0 *
(0, L)
.
*
* Logistic regression coefficients are computed using * linear regression, after transforming the y values. This * is possible because of the following linear relation: * *
* * @param xs Array of x values. * @param ys Array of y values. * @param maxValue Maximum value of function. * @return Binary array of logistic regression coordinates. * @throws IllegalArgumentException If the maximum value is not * positive and finite, if the arrays are of length less than 2, * or if the arrays are not of the same length. */ public static double[] logisticRegression(double[] xs, double[] ys, double maxValue) { if (maxValue <= 0.0 || Double.isInfinite(maxValue) || Double.isNaN(maxValue)) { String msg = "Require finite max value > 0." + " Found maxValue=" + maxValue; throw new IllegalArgumentException(msg); } double[] logisticYs = new double[ys.length]; for (int i = 0; i < ys.length; ++i) logisticYs[i] = java.lang.Math.log((maxValue - ys[i]) / ys[i]); return linearRegression(xs,logisticYs); } /** * Returns the value of Pearson's C2 * goodness of fit statistic for independence over the specified * contingency matrix. Asymptotically, this statistic has a * χ2 distribution with ** log ((L - y) / y) = β1 * x + β0 *
(numRows-1)*(numCols-1)
degrees of freedom. The
* higher the value, the less likely the two outcomes are
* independent.
*
* Pearson's C2 statistic is defined as follows:
*
*
* C2
* = Σi
* Σj
* (matrix[i][j] - expected(i,j,matrix))2 / expectedCount(i,j,matrix)
*
*
* where the expected count is the total count times the max
* likelihood estimates of row i
probability times
* column j
probability:
*
*
* expectedCount(i,j,matrix)
*
* = totalCount(matrix)
*
* * rowCount(i,matrix)/totalCount(matrix)
*
* * colCount(j,matrix)/totalCount(matrix)
*
* = rowCount(i,matrix) * colCount(j,matrix) / totalCount(matrix)
*
*
* where
*
*
* rowCount(i,matrix)
* = Σ0<=j<=numCols
* matrix[i][j]
*
*
* colCount(j,matrix)
* = Σ0<=i<=numRows
* matrix[i][j]
*
* totalCount(matrix)
* = Σ0<=i<=numRows
* = Σ0<=j<=numCols
* matrix[i][j]
*
*
* The χ2 test is a large sample test and is only * valid if all of the expected counts are at least 5. This restriction * is often ignored for ranking purposes. * * @param contingencyMatrix The specified contingency matrix. * @return Pearson's C2 statistic for the independence * testing over the contingency matrix. * @throws Illegal argument exception if the matrix is not rectangular * or if all values are not non-negative finite numbers. */ public static double chiSquaredIndependence(double[][] contingencyMatrix) { int numRows = contingencyMatrix.length; if (numRows < 2) { String msg = "Require at least two rows." + " Found numRows=" + numRows; throw new IllegalArgumentException(msg); } int numCols = contingencyMatrix[0].length; if (numCols < 2) { String msg = "Require at least two cols." + " Found numCols=" + numCols; throw new IllegalArgumentException(msg); } double[] rowSums = new double[numRows]; Arrays.fill(rowSums,0.0); double[] colSums = new double[numCols]; Arrays.fill(colSums,0.0); double totalCount = 0.0; for (int i = 0; i < numRows; ++i) { if (contingencyMatrix[i].length != numCols) { String msg = "Matrix must be rectangular." + "Row 0 length=" + numCols + "Row " + i + " length=" + contingencyMatrix[i].length; throw new IllegalArgumentException(msg); } for (int j = 0; j < numCols; ++j) { double val = contingencyMatrix[i][j]; if (Double.isInfinite(val) || val < 0.0 || Double.isNaN(val)) { String msg = "Values must be finite non-negative." + " Found matrix[" + i + "][" + j + "]=" + val; throw new IllegalArgumentException(msg); } rowSums[i] += val; colSums[j] += val; totalCount += val; } } double result = 0.0; for (int i = 0; i < numRows; ++i) for (int j = 0; j < numCols; ++j) result += csTerm(contingencyMatrix[i][j], rowSums[i] * colSums[j] / totalCount); return result; } /** * Return an array of probabilities resulting from normalizing the * specified probability ratios. The resulting array of * probabilities is the same length as the input ratio array and * each probability is simply the input array's value divided by * the sum of the ratios. * *
Warning: This method is implemented by summing the
* probability ratios and then dividing each element by the sum.
* Because of the limited precision of The most typical use for kappa is in evaluating
* classification problems of a machine versus a gold standard or
* between two human annotators to asses inter-annotator
* agreement.
*
* @param observedProb Observed probability.
* @param expectedProb Expected probability.
* @return The value of the kappa statistic for the specified
* probability 8 and expected probability.
*/
public static double kappa(double observedProb, double expectedProb) {
return (observedProb - expectedProb)/(1 - expectedProb);
}
/**
* Returns the mean of the specified array of input values. The mean
* of an array is defined by:
*
* double
-based
* arithmetic, if the largest ratio is much larger than the next
* largest ratio, then the largest normalized probability will be
* one and all others will be zero. Java double values follow the
* IEEE 754 arithmetic standard and thus use 52 bits for their
* mantissas. Thus only ratios within
* 252~1016 of
* the maximum ratio will be non-zero.
*
* @param probabilityRatios Ratios of probabilities.
* @return Probabilities resulting from normalizing the ratios.
* @throws IllegalArgumentException If the input contains a value
* that is not a finite non-negative number, or if the input does
* not contain at least one non-zero entry.
*/
public static double[] normalize(double[] probabilityRatios) {
for (int i = 0; i < probabilityRatios.length; ++i) {
if (probabilityRatios[i] < 0.0
|| Double.isInfinite(probabilityRatios[i])
|| Double.isNaN(probabilityRatios[i])) {
String msg = "Probabilities must be finite non-negative."
+ " Found probabilityRatios[" + i + "]="
+ probabilityRatios[i];
throw new IllegalArgumentException(msg);
}
}
double sum = Mathematics.sum(probabilityRatios);
if (sum <= 0.0) {
String msg = "Ratios must sum to number greater than zero."
+ " Found sum=" + sum;
throw new IllegalArgumentException(msg);
}
double[] result = new double[probabilityRatios.length];
for (int i = 0; i < probabilityRatios.length; ++i)
result[i] = probabilityRatios[i]/sum;
return result;
}
/**
* Returns the value of the kappa statistic for the specified
* observed and expected probabilities. The kappa statistic
* provides a kind of adjustment for the exptected (random)
* difficulty of a problem. It is defined by:
*
*
, measures the
* square error between a best linear fit between the two arrays
* of data. Rescaling either array by a positive constant will
* not affect the result.
*
*
*
*
* kappa(p,e) = (p - e) / (1 - e)
*
*
* If the array is of length zero, the result is {@link Double#NaN}.
*
* @param xs Array of values.
* @return Mean of array of values.
*/
public static double mean(double[] xs, int len) {
return Mathematics.sum(xs) / (double) len;
}
/**
* Returns the mean of the specified array of input values. The mean
* of an array is defined by:
*
*
* mean(xs)
* = Σi < xs.length
* xs[i] / xs.length
*
*
* If the array is of length zero, the result is {@link Double#NaN}.
*
* @param xs Array of values.
* @return Mean of array of values.
*/
public static double mean(double[] xs) {
return mean(xs, xs.length);
}
public static double mean(ArrayList
* mean(xs)
* = Σi < xs.length
* xs[i] / xs.length
*
*
* If the array is of length zero, the result is {@link Double#NaN}.
*
* @param xs Array of values.
* @return Variance of array of values.
*/
public static double variance(double[] xs) {
return variance(xs, mean(xs));
}
/**
* Returns the standard deviation of the specified array of input values.
* The standard deviation is just the square root of the variance:
*
*
* variance(xs)
* = Σi < xs.length
* (mean(xs) - xs[i])2 / xs.length
*
*
* If the array is of length zero, the result is {@link Double#NaN}.
*
* @param xs
* Array of values.
* @return Standard deviation of array of values.
*/
public static double standardDeviation(double[] xs) {
return java.lang.Math.sqrt(variance(xs));
}
/**
* Returns the standard deviation of the specified array of input
* values. The standard deviation is just the square root of the
* variance:
*
*
* standardDeviation(xs) = variance(xs)(1/2)
*
*
* If the array is of length zero, the result is {@link Double#NaN}.
*
* @param xs Array of values.
* @return Standard deviation of array of values.
*/
public static double standardDeviation(List
* standardDeviation(xs) = variance(xs)(1/2)
*
The square root r
of the correlation
* coefficient r2
is the variance
* in the second array explained by a linear relation with the
* the first array.
*
*
The definition of the correlation coefficient is: * *
* correlation(xs,ys)2
*
= sumSqDiff(xs,ys)2
*
/ (sumSqDiff(xs,xs) * sumSqDiff(xs,xs))
*
*
* where
*
*
* sumSqDiff(xs,ys)
*
= Σi<xs.length
* (xs[i] - mean(xs)) * (ys[i] - mean(ys))
*
*
* and thus the terms in the denominator above reduce using:
*
*
* sumSqDiffs(xs,xs)
*
= Σi<xs.length
* (xs[i] - mean(xs))2
*
*
* See the following for more details: * *
The cumulative probability ratios represent unnormalized probabilities
* of generating the value of their index or less, that is, unnormalized
* cumulative probabilities. For instance, consider
* the cumulative probability ratios { 0.5, 0.5, 3.9, 10.1}
:
*
*
* * A sample is taken by generating a random number x between 0.0 and * the value of the last element (here 10.1). The value returned is * the index i such that: * **
* Outcome Value Unnormalized Prob Prob * 0 0.5 0.5 0.5/10.1 * 1 0.5 0.0 0.0/10.1 * 2 3.9 3.4 3.4/10.1 * 3 10.1 6.2 6.2/10.1
* * The corresponding probabilities given the sample input are * listed in the last column in the table above. * ** cumulativeProbRatios[i-1] < x <= cumulativeProbRatios[i]
Note that if two * elements have the same value, there is no chance of generating * the outcome with the higher index. In the example above, this * corresponds to outcome 1 having probaiblity 0.0 * *
Warning: The cumulative probability ratios are required to meet
* two conditions. First, all values must be finite and non-negative. Second,
* the values must be non-decreasing, so that
* cumulativeProbRatios[i] <= cumulativeProbRatios[i+1]
.
* If either of these conditions is not met, the result is undefined.
*
* @param cumulativeProbRatios Cumulative probability for outcome less than
* or equal to index.
* @param random Random number generator for sampling.
* @return A sample from the specified distribution.
*/
public static int sample(double[] cumulativeProbRatios, Random random) {
int low = 0;
int high = cumulativeProbRatios.length - 1;
double x = random.nextDouble() * cumulativeProbRatios[high];
while (low < high) {
int mid = (high + low)/2;
if (x > cumulativeProbRatios[mid]) {
low = mid+1;
} else if (high == mid) {
return (x > cumulativeProbRatios[low]) ? mid : low;
} else {
high = mid;
}
}
return low;
}
/**
* Returns the log (base 2) of the probability of the specified
* discrete distribution given the specified uniform Dirichlet
* with concentration parameters equal to the specified value.
*
*
See {@link #dirichletLog2Prob(double[],double[])} for * more information on the Dirichlet distribution. This method * returns a result equivalent to the following (though is more * efficiently implemented): * *
* ** double[] alphas = new double[xs.length]; * java.util.Arrays.fill(alphas,alpha); * assert(dirichletLog2Prob(alpha,xs) == dirichletLog2Prob(alphas,xs));
For the uniform Dirichlet, the distribution simplifies to * the following form: * *
* * where * ** p(xs | Dirichlet(α)) = (1/Z(α)) Πi < k xs[i]α-1
* ** Z(α) = Γ(α)k / Γ(k * α)
Warning: The probability distribution must be proper
* in the sense of having values between 0 and 1 inclusive and
* summing to 1.0. This property is not checked by this method.
*
* @param alpha Dirichlet concentration parameter to use for each
* dimension.
* @param xs The distribution whose probability is returned.
* @return The log (base 2) probability of the specified
* distribution in the uniform Dirichlet distribution with concentration
* parameters equal to alpha
.
* @throws IllegalArgumentException If the values in the distribution
* are not between 0 and 1 inclusive or if the concentration parameter is
* not positive and finite.
*/
public static double dirichletLog2Prob(double alpha, double[] xs) {
verifyAlpha(alpha);
verifyDistro(xs);
int k = xs.length;
// normalizing term
double result = Mathematics.log2Gamma(k * alpha)
- k * Mathematics.log2Gamma(alpha);
double alphaMinus1 = alpha - 1;
for (int i = 0; i < k; ++i)
result += alphaMinus1 * Mathematics.log2(xs[i]);
return result;
}
/**
* Returns the log (base 2) of the probability of the specified
* discrete distribution given the specified Dirichlet
* distribution concentration parameters. A Dirichlet
* distribution is a distribution over k
-dimensional
* discrete distributions.
*
*
The Dirichlet is widely used because it is the conjugate * prior for multinomials in a Bayesian setting, and thus may * be used to specify a convenient distribution over discrete * distributions.
* *A Dirichlet distribution is specified by a dimensionality
* k
and a concentration parameter α[i] > 0
* for each i < k
.
* The probability of the distribution xs
* in a Dirichlet distribution of dimension k
* and concentration parameters α
is
* given (up to a normalizing constant) by:
*
*
* * The full distribution is: * ** p(xs | Dirichlet(α)) * ∝ Πi < k xs[i]α[i]-1
* * where the normalizing constant is given by: * ** p(xs | Dirichlet(α)) * = (1/Z(α)) * Πi < k xs[i]α[i]-1
* ** Z(α) = Πi < k Γ(α[i]) * / Γ(Σi < k α[i])
Warning: The probability distribution must be proper * in the sense of having values between 0 and 1 inclusive and * summing to 1.0. This property is not checked by this method. * * @param alphas The concentration parameters for the uniform Dirichlet. * @param xs The outcome distribution * @return The probability of the outcome distribution given the * Dirichlet concentratioin parameter. * @throws IllegalArgumentException If the Dirichlet parameters and * distribution are not arrays of the same length or if the distribution * parameters in xs are not between 0 and 1 inclusive, or if any of * the concentration parameters is not positive and finite. */ public static double dirichletLog2Prob(double[] alphas, double[] xs) { if (alphas.length != xs.length) { String msg = "Dirichlet prior alphas and distribution xs must be the same length." + " Found alphas.length=" + alphas.length + " xs.length=" + xs.length; throw new IllegalArgumentException(msg); } for (int i = 0; i < alphas.length; ++i) verifyAlpha(alphas[i]); verifyDistro(xs); int k = xs.length; double result = 0.0; double alphaSum = 0.0; for (int i = 0; i < alphas.length; ++i) { alphaSum += alphas[i]; result -= Mathematics.log2Gamma(alphas[i]); } result += Mathematics.log2Gamma(alphaSum); for (int i = 0; i < k; ++i) result += (alphas[i] - 1) * Mathematics.log2(xs[i]); return result; } static void verifyAlpha(double alpha) { if (Double.isNaN(alpha) || Double.isInfinite(alpha) || alpha <= 0.0) { String msg = "Concentration parameter must be positive and finite." + " Found alpha=" + alpha; throw new IllegalArgumentException(msg); } } static void verifyDistro(double[] xs) { for (int i = 0; i < xs.length; ++i) { if (xs[i] < 0.0 || xs[i] > 1.0 || Double.isNaN(xs[i]) || Double.isInfinite(xs[i])) { String msg = "All xs must be betwee 0.0 and 1.0 inclusive." + " Found xs[" + i + "]=" + xs[i]; throw new IllegalArgumentException(msg); } } } // = sumSquareDiffs(xs,mean,xs,mean); static double sumSquareDiffs(double[] xs, double mean) { double sum = 0.0; for (int i = 0; i < xs.length; ++i) { double diff = xs[i] - mean; sum += diff * diff; } return sum; } static double sumSquareDiffs(double[] xs, double[] ys, double meanXs, double meanYs) { double sum = 0.0; for (int i = 0; i < xs.length; ++i) sum += (xs[i] - meanXs) * (ys[i] - meanYs); return sum; } static double variance(double[] xs, double mean) { return sumSquareDiffs(xs,mean) / (double) xs.length; } static void assertNonNegative(String variableName, double value) { if (Double.isInfinite(value) || Double.isNaN(value) || value < 0.0) { String msg = "Require finite non-negative value." + " Found " + variableName + " =" + value; throw new IllegalArgumentException(msg); } } private static double csTerm(double found, double expected) { double diff = found - expected; return diff * diff / expected; } }