001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.inference; 018 019 import java.util.Collection; 020 021 import org.apache.commons.math.MathException; 022 import org.apache.commons.math.MathRuntimeException; 023 import org.apache.commons.math.distribution.FDistribution; 024 import org.apache.commons.math.distribution.FDistributionImpl; 025 import org.apache.commons.math.exception.util.LocalizedFormats; 026 import org.apache.commons.math.stat.descriptive.summary.Sum; 027 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 028 029 030 /** 031 * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl} 032 * interface. 033 * 034 * <p>Uses the 035 * {@link org.apache.commons.math.distribution.FDistribution 036 * commons-math F Distribution implementation} to estimate exact p-values.</p> 037 * 038 * <p>This implementation is based on a description at 039 * http://faculty.vassar.edu/lowry/ch13pt1.html</p> 040 * <pre> 041 * Abbreviations: bg = between groups, 042 * wg = within groups, 043 * ss = sum squared deviations 044 * </pre> 045 * 046 * @since 1.2 047 * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 ao??t 2010) $ 048 */ 049 public class OneWayAnovaImpl implements OneWayAnova { 050 051 /** 052 * Default constructor. 053 */ 054 public OneWayAnovaImpl() { 055 } 056 057 /** 058 * {@inheritDoc}<p> 059 * This implementation computes the F statistic using the definitional 060 * formula<pre> 061 * F = msbg/mswg</pre> 062 * where<pre> 063 * msbg = between group mean square 064 * mswg = within group mean square</pre> 065 * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html"> 066 * here</a></p> 067 */ 068 public double anovaFValue(Collection<double[]> categoryData) 069 throws IllegalArgumentException, MathException { 070 AnovaStats a = anovaStats(categoryData); 071 return a.F; 072 } 073 074 /** 075 * {@inheritDoc}<p> 076 * This implementation uses the 077 * {@link org.apache.commons.math.distribution.FDistribution 078 * commons-math F Distribution implementation} to estimate the exact 079 * p-value, using the formula<pre> 080 * p = 1 - cumulativeProbability(F)</pre> 081 * where <code>F</code> is the F value and <code>cumulativeProbability</code> 082 * is the commons-math implementation of the F distribution.</p> 083 */ 084 public double anovaPValue(Collection<double[]> categoryData) 085 throws IllegalArgumentException, MathException { 086 AnovaStats a = anovaStats(categoryData); 087 FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg); 088 return 1.0 - fdist.cumulativeProbability(a.F); 089 } 090 091 /** 092 * {@inheritDoc}<p> 093 * This implementation uses the 094 * {@link org.apache.commons.math.distribution.FDistribution 095 * commons-math F Distribution implementation} to estimate the exact 096 * p-value, using the formula<pre> 097 * p = 1 - cumulativeProbability(F)</pre> 098 * where <code>F</code> is the F value and <code>cumulativeProbability</code> 099 * is the commons-math implementation of the F distribution.</p> 100 * <p>True is returned iff the estimated p-value is less than alpha.</p> 101 */ 102 public boolean anovaTest(Collection<double[]> categoryData, double alpha) 103 throws IllegalArgumentException, MathException { 104 if ((alpha <= 0) || (alpha > 0.5)) { 105 throw MathRuntimeException.createIllegalArgumentException( 106 LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL, 107 alpha, 0, 0.5); 108 } 109 return anovaPValue(categoryData) < alpha; 110 } 111 112 113 /** 114 * This method actually does the calculations (except P-value). 115 * 116 * @param categoryData <code>Collection</code> of <code>double[]</code> 117 * arrays each containing data for one category 118 * @return computed AnovaStats 119 * @throws IllegalArgumentException if categoryData does not meet 120 * preconditions specified in the interface definition 121 * @throws MathException if an error occurs computing the Anova stats 122 */ 123 private AnovaStats anovaStats(Collection<double[]> categoryData) 124 throws IllegalArgumentException, MathException { 125 126 // check if we have enough categories 127 if (categoryData.size() < 2) { 128 throw MathRuntimeException.createIllegalArgumentException( 129 LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED, 130 categoryData.size()); 131 } 132 133 // check if each category has enough data and all is double[] 134 for (double[] array : categoryData) { 135 if (array.length <= 1) { 136 throw MathRuntimeException.createIllegalArgumentException( 137 LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED, 138 array.length); 139 } 140 } 141 142 int dfwg = 0; 143 double sswg = 0; 144 Sum totsum = new Sum(); 145 SumOfSquares totsumsq = new SumOfSquares(); 146 int totnum = 0; 147 148 for (double[] data : categoryData) { 149 150 Sum sum = new Sum(); 151 SumOfSquares sumsq = new SumOfSquares(); 152 int num = 0; 153 154 for (int i = 0; i < data.length; i++) { 155 double val = data[i]; 156 157 // within category 158 num++; 159 sum.increment(val); 160 sumsq.increment(val); 161 162 // for all categories 163 totnum++; 164 totsum.increment(val); 165 totsumsq.increment(val); 166 } 167 dfwg += num - 1; 168 double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num; 169 sswg += ss; 170 } 171 double sst = totsumsq.getResult() - totsum.getResult() * 172 totsum.getResult()/totnum; 173 double ssbg = sst - sswg; 174 int dfbg = categoryData.size() - 1; 175 double msbg = ssbg/dfbg; 176 double mswg = sswg/dfwg; 177 double F = msbg/mswg; 178 179 return new AnovaStats(dfbg, dfwg, F); 180 } 181 182 /** 183 Convenience class to pass dfbg,dfwg,F values around within AnovaImpl. 184 No get/set methods provided. 185 */ 186 private static class AnovaStats { 187 188 /** Degrees of freedom in numerator (between groups). */ 189 private int dfbg; 190 191 /** Degrees of freedom in denominator (within groups). */ 192 private int dfwg; 193 194 /** Statistic. */ 195 private double F; 196 197 /** 198 * Constructor 199 * @param dfbg degrees of freedom in numerator (between groups) 200 * @param dfwg degrees of freedom in denominator (within groups) 201 * @param F statistic 202 */ 203 private AnovaStats(int dfbg, int dfwg, double F) { 204 this.dfbg = dfbg; 205 this.dfwg = dfwg; 206 this.F = F; 207 } 208 } 209 210 }