package polya; import java.util.Map; import java.util.TreeMap; import org.apache.commons.math3.special.Gamma; import static org.apache.commons.math3.special.Gamma.digamma; import static org.apache.commons.math3.special.Gamma.logGamma; /** * * * Estimating a DCM (Dirichlet Compound Multinomial) using fixed-point method in * Estimating a Dirichlet distribution by Thomas P. Minka * * In memory solution using a set of input count-vectors (as Maps). * * * * @author Ronan Cummins * */ public class DCMModel { //a pointer to the data which is an //array of histograms (or count vectors) private final Map [] data; private final Integer[] vector_mass; private int sum_unique_terms; public static int iterations = 20; //parameter estimates private final Map alpha; //mass of parameters private double prev_A = Double.POSITIVE_INFINITY; private double cur_A; //loglikelihood values private double prev_loglikeli = Double.POSITIVE_INFINITY; private double cur_loglikeli; public static double epsilon; public DCMModel(Map[] _data){ data = _data; sum_unique_terms=0; alpha = new TreeMap<>(); //store mass of each count vector and set initial estimates to num of samples //a type appears in (i.e. an estimate proportional to the EDCM) vector_mass = new Integer[data.length]; Double f; Integer c; for (int i=0;i getParameters(){ return alpha; } public double getMass(){ return cur_A; } /** * * * call the estimateDCM method to estimate the parameters * which are then returned in a Map * * @return */ public Map estimateDCM(){ double den,num; double cur_p; double diff; double loglikeli; for(int i=0;i * see Clustering Documents with an Exponential-Family Approximation of the * Dirichlet Compound Multinomial Distribution by Charles Elkan * * @return */ public Map estimateEDCM(){ alpha.clear(); //initialise the EDCM probabilities Double f; Integer c; sum_unique_terms=0; for (int i=0;i cur_est){ double A = 0.0; for(String key:cur_est.keySet()){ A += cur_est.get(key); } //System.out.println("Current Mass of Estimates: " + mass); cur_A = A; double denom=0.0; for (int i=0;i params = dcm.estimateDCM(); //examine the results double m=0.0; for(String dim:params.keySet()){ m += params.get(dim); } System.out.println("Mass: " + m); System.out.println(params); } }