001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.inference;
018
019 import org.apache.commons.math.MathException;
020 import org.apache.commons.math.stat.descriptive.StatisticalSummary;
021
022 /**
023 * An interface for Student's t-tests.
024 * <p>
025 * Tests can be:<ul>
026 * <li>One-sample or two-sample</li>
027 * <li>One-sided or two-sided</li>
028 * <li>Paired or unpaired (for two-sample tests)</li>
029 * <li>Homoscedastic (equal variance assumption) or heteroscedastic
030 * (for two sample tests)</li>
031 * <li>Fixed significance level (boolean-valued) or returning p-values.
032 * </li></ul></p>
033 * <p>
034 * Test statistics are available for all tests. Methods including "Test" in
035 * in their names perform tests, all other methods return t-statistics. Among
036 * the "Test" methods, <code>double-</code>valued methods return p-values;
037 * <code>boolean-</code>valued methods perform fixed significance level tests.
038 * Significance levels are always specified as numbers between 0 and 0.5
039 * (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p>
040 * <p>
041 * Input to tests can be either <code>double[]</code> arrays or
042 * {@link StatisticalSummary} instances.</p>
043 *
044 *
045 * @version $Revision: 811786 $ $Date: 2009-09-06 11:36:08 +0200 (dim. 06 sept. 2009) $
046 */
047 public interface TTest {
048 /**
049 * Computes a paired, 2-sample t-statistic based on the data in the input
050 * arrays. The t-statistic returned is equivalent to what would be returned by
051 * computing the one-sample t-statistic {@link #t(double, double[])}, with
052 * <code>mu = 0</code> and the sample array consisting of the (signed)
053 * differences between corresponding entries in <code>sample1</code> and
054 * <code>sample2.</code>
055 * <p>
056 * <strong>Preconditions</strong>: <ul>
057 * <li>The input arrays must have the same length and their common length
058 * must be at least 2.
059 * </li></ul></p>
060 *
061 * @param sample1 array of sample data values
062 * @param sample2 array of sample data values
063 * @return t statistic
064 * @throws IllegalArgumentException if the precondition is not met
065 * @throws MathException if the statistic can not be computed do to a
066 * convergence or other numerical error.
067 */
068 double pairedT(double[] sample1, double[] sample2)
069 throws IllegalArgumentException, MathException;
070 /**
071 * Returns the <i>observed significance level</i>, or
072 * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
073 * based on the data in the input arrays.
074 * <p>
075 * The number returned is the smallest significance level
076 * at which one can reject the null hypothesis that the mean of the paired
077 * differences is 0 in favor of the two-sided alternative that the mean paired
078 * difference is not equal to 0. For a one-sided test, divide the returned
079 * value by 2.</p>
080 * <p>
081 * This test is equivalent to a one-sample t-test computed using
082 * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
083 * array consisting of the signed differences between corresponding elements of
084 * <code>sample1</code> and <code>sample2.</code></p>
085 * <p>
086 * <strong>Usage Note:</strong><br>
087 * The validity of the p-value depends on the assumptions of the parametric
088 * t-test procedure, as discussed
089 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
090 * here</a></p>
091 * <p>
092 * <strong>Preconditions</strong>: <ul>
093 * <li>The input array lengths must be the same and their common length must
094 * be at least 2.
095 * </li></ul></p>
096 *
097 * @param sample1 array of sample data values
098 * @param sample2 array of sample data values
099 * @return p-value for t-test
100 * @throws IllegalArgumentException if the precondition is not met
101 * @throws MathException if an error occurs computing the p-value
102 */
103 double pairedTTest(double[] sample1, double[] sample2)
104 throws IllegalArgumentException, MathException;
105 /**
106 * Performs a paired t-test evaluating the null hypothesis that the
107 * mean of the paired differences between <code>sample1</code> and
108 * <code>sample2</code> is 0 in favor of the two-sided alternative that the
109 * mean paired difference is not equal to 0, with significance level
110 * <code>alpha</code>.
111 * <p>
112 * Returns <code>true</code> iff the null hypothesis can be rejected with
113 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
114 * <code>alpha * 2</code></p>
115 * <p>
116 * <strong>Usage Note:</strong><br>
117 * The validity of the test depends on the assumptions of the parametric
118 * t-test procedure, as discussed
119 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
120 * here</a></p>
121 * <p>
122 * <strong>Preconditions</strong>: <ul>
123 * <li>The input array lengths must be the same and their common length
124 * must be at least 2.
125 * </li>
126 * <li> <code> 0 < alpha < 0.5 </code>
127 * </li></ul></p>
128 *
129 * @param sample1 array of sample data values
130 * @param sample2 array of sample data values
131 * @param alpha significance level of the test
132 * @return true if the null hypothesis can be rejected with
133 * confidence 1 - alpha
134 * @throws IllegalArgumentException if the preconditions are not met
135 * @throws MathException if an error occurs performing the test
136 */
137 boolean pairedTTest(
138 double[] sample1,
139 double[] sample2,
140 double alpha)
141 throws IllegalArgumentException, MathException;
142 /**
143 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
144 * t statistic </a> given observed values and a comparison constant.
145 * <p>
146 * This statistic can be used to perform a one sample t-test for the mean.
147 * </p><p>
148 * <strong>Preconditions</strong>: <ul>
149 * <li>The observed array length must be at least 2.
150 * </li></ul></p>
151 *
152 * @param mu comparison constant
153 * @param observed array of values
154 * @return t statistic
155 * @throws IllegalArgumentException if input array length is less than 2
156 */
157 double t(double mu, double[] observed)
158 throws IllegalArgumentException;
159 /**
160 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
161 * t statistic </a> to use in comparing the mean of the dataset described by
162 * <code>sampleStats</code> to <code>mu</code>.
163 * <p>
164 * This statistic can be used to perform a one sample t-test for the mean.
165 * </p><p>
166 * <strong>Preconditions</strong>: <ul>
167 * <li><code>observed.getN() > = 2</code>.
168 * </li></ul></p>
169 *
170 * @param mu comparison constant
171 * @param sampleStats DescriptiveStatistics holding sample summary statitstics
172 * @return t statistic
173 * @throws IllegalArgumentException if the precondition is not met
174 */
175 double t(double mu, StatisticalSummary sampleStats)
176 throws IllegalArgumentException;
177 /**
178 * Computes a 2-sample t statistic, under the hypothesis of equal
179 * subpopulation variances. To compute a t-statistic without the
180 * equal variances hypothesis, use {@link #t(double[], double[])}.
181 * <p>
182 * This statistic can be used to perform a (homoscedastic) two-sample
183 * t-test to compare sample means.</p>
184 * <p>
185 * The t-statisitc is</p>
186 * <p>
187 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
188 * </p><p>
189 * where <strong><code>n1</code></strong> is the size of first sample;
190 * <strong><code> n2</code></strong> is the size of second sample;
191 * <strong><code> m1</code></strong> is the mean of first sample;
192 * <strong><code> m2</code></strong> is the mean of second sample</li>
193 * </ul>
194 * and <strong><code>var</code></strong> is the pooled variance estimate:
195 * </p><p>
196 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
197 * </p><p>
198 * with <strong><code>var1<code></strong> the variance of the first sample and
199 * <strong><code>var2</code></strong> the variance of the second sample.
200 * </p><p>
201 * <strong>Preconditions</strong>: <ul>
202 * <li>The observed array lengths must both be at least 2.
203 * </li></ul></p>
204 *
205 * @param sample1 array of sample data values
206 * @param sample2 array of sample data values
207 * @return t statistic
208 * @throws IllegalArgumentException if the precondition is not met
209 */
210 double homoscedasticT(double[] sample1, double[] sample2)
211 throws IllegalArgumentException;
212 /**
213 * Computes a 2-sample t statistic, without the hypothesis of equal
214 * subpopulation variances. To compute a t-statistic assuming equal
215 * variances, use {@link #homoscedasticT(double[], double[])}.
216 * <p>
217 * This statistic can be used to perform a two-sample t-test to compare
218 * sample means.</p>
219 * <p>
220 * The t-statisitc is</p>
221 * <p>
222 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
223 * </p><p>
224 * where <strong><code>n1</code></strong> is the size of the first sample
225 * <strong><code> n2</code></strong> is the size of the second sample;
226 * <strong><code> m1</code></strong> is the mean of the first sample;
227 * <strong><code> m2</code></strong> is the mean of the second sample;
228 * <strong><code> var1</code></strong> is the variance of the first sample;
229 * <strong><code> var2</code></strong> is the variance of the second sample;
230 * </p><p>
231 * <strong>Preconditions</strong>: <ul>
232 * <li>The observed array lengths must both be at least 2.
233 * </li></ul></p>
234 *
235 * @param sample1 array of sample data values
236 * @param sample2 array of sample data values
237 * @return t statistic
238 * @throws IllegalArgumentException if the precondition is not met
239 */
240 double t(double[] sample1, double[] sample2)
241 throws IllegalArgumentException;
242 /**
243 * Computes a 2-sample t statistic </a>, comparing the means of the datasets
244 * described by two {@link StatisticalSummary} instances, without the
245 * assumption of equal subpopulation variances. Use
246 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
247 * compute a t-statistic under the equal variances assumption.
248 * <p>
249 * This statistic can be used to perform a two-sample t-test to compare
250 * sample means.</p>
251 * <p>
252 * The returned t-statisitc is</p>
253 * <p>
254 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
255 * </p><p>
256 * where <strong><code>n1</code></strong> is the size of the first sample;
257 * <strong><code> n2</code></strong> is the size of the second sample;
258 * <strong><code> m1</code></strong> is the mean of the first sample;
259 * <strong><code> m2</code></strong> is the mean of the second sample
260 * <strong><code> var1</code></strong> is the variance of the first sample;
261 * <strong><code> var2</code></strong> is the variance of the second sample
262 * </p><p>
263 * <strong>Preconditions</strong>: <ul>
264 * <li>The datasets described by the two Univariates must each contain
265 * at least 2 observations.
266 * </li></ul></p>
267 *
268 * @param sampleStats1 StatisticalSummary describing data from the first sample
269 * @param sampleStats2 StatisticalSummary describing data from the second sample
270 * @return t statistic
271 * @throws IllegalArgumentException if the precondition is not met
272 */
273 double t(
274 StatisticalSummary sampleStats1,
275 StatisticalSummary sampleStats2)
276 throws IllegalArgumentException;
277 /**
278 * Computes a 2-sample t statistic, comparing the means of the datasets
279 * described by two {@link StatisticalSummary} instances, under the
280 * assumption of equal subpopulation variances. To compute a t-statistic
281 * without the equal variances assumption, use
282 * {@link #t(StatisticalSummary, StatisticalSummary)}.
283 * <p>
284 * This statistic can be used to perform a (homoscedastic) two-sample
285 * t-test to compare sample means.</p>
286 * <p>
287 * The t-statisitc returned is</p>
288 * <p>
289 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
290 * </p><p>
291 * where <strong><code>n1</code></strong> is the size of first sample;
292 * <strong><code> n2</code></strong> is the size of second sample;
293 * <strong><code> m1</code></strong> is the mean of first sample;
294 * <strong><code> m2</code></strong> is the mean of second sample
295 * and <strong><code>var</code></strong> is the pooled variance estimate:
296 * </p><p>
297 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
298 * </p><p>
299 * with <strong><code>var1<code></strong> the variance of the first sample and
300 * <strong><code>var2</code></strong> the variance of the second sample.
301 * </p><p>
302 * <strong>Preconditions</strong>: <ul>
303 * <li>The datasets described by the two Univariates must each contain
304 * at least 2 observations.
305 * </li></ul></p>
306 *
307 * @param sampleStats1 StatisticalSummary describing data from the first sample
308 * @param sampleStats2 StatisticalSummary describing data from the second sample
309 * @return t statistic
310 * @throws IllegalArgumentException if the precondition is not met
311 */
312 double homoscedasticT(
313 StatisticalSummary sampleStats1,
314 StatisticalSummary sampleStats2)
315 throws IllegalArgumentException;
316 /**
317 * Returns the <i>observed significance level</i>, or
318 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
319 * comparing the mean of the input array with the constant <code>mu</code>.
320 * <p>
321 * The number returned is the smallest significance level
322 * at which one can reject the null hypothesis that the mean equals
323 * <code>mu</code> in favor of the two-sided alternative that the mean
324 * is different from <code>mu</code>. For a one-sided test, divide the
325 * returned value by 2.</p>
326 * <p>
327 * <strong>Usage Note:</strong><br>
328 * The validity of the test depends on the assumptions of the parametric
329 * t-test procedure, as discussed
330 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
331 * </p><p>
332 * <strong>Preconditions</strong>: <ul>
333 * <li>The observed array length must be at least 2.
334 * </li></ul></p>
335 *
336 * @param mu constant value to compare sample mean against
337 * @param sample array of sample data values
338 * @return p-value
339 * @throws IllegalArgumentException if the precondition is not met
340 * @throws MathException if an error occurs computing the p-value
341 */
342 double tTest(double mu, double[] sample)
343 throws IllegalArgumentException, MathException;
344 /**
345 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
346 * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
347 * which <code>sample</code> is drawn equals <code>mu</code>.
348 * <p>
349 * Returns <code>true</code> iff the null hypothesis can be
350 * rejected with confidence <code>1 - alpha</code>. To
351 * perform a 1-sided test, use <code>alpha * 2</code></p>
352 * <p>
353 * <strong>Examples:</strong><br><ol>
354 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
355 * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
356 * </li>
357 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
358 * at the 99% level, first verify that the measured sample mean is less
359 * than <code>mu</code> and then use
360 * <br><code>tTest(mu, sample, 0.02) </code>
361 * </li></ol></p>
362 * <p>
363 * <strong>Usage Note:</strong><br>
364 * The validity of the test depends on the assumptions of the one-sample
365 * parametric t-test procedure, as discussed
366 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
367 * </p><p>
368 * <strong>Preconditions</strong>: <ul>
369 * <li>The observed array length must be at least 2.
370 * </li></ul></p>
371 *
372 * @param mu constant value to compare sample mean against
373 * @param sample array of sample data values
374 * @param alpha significance level of the test
375 * @return p-value
376 * @throws IllegalArgumentException if the precondition is not met
377 * @throws MathException if an error computing the p-value
378 */
379 boolean tTest(double mu, double[] sample, double alpha)
380 throws IllegalArgumentException, MathException;
381 /**
382 * Returns the <i>observed significance level</i>, or
383 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
384 * comparing the mean of the dataset described by <code>sampleStats</code>
385 * with the constant <code>mu</code>.
386 * <p>
387 * The number returned is the smallest significance level
388 * at which one can reject the null hypothesis that the mean equals
389 * <code>mu</code> in favor of the two-sided alternative that the mean
390 * is different from <code>mu</code>. For a one-sided test, divide the
391 * returned value by 2.</p>
392 * <p>
393 * <strong>Usage Note:</strong><br>
394 * The validity of the test depends on the assumptions of the parametric
395 * t-test procedure, as discussed
396 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
397 * here</a></p>
398 * <p>
399 * <strong>Preconditions</strong>: <ul>
400 * <li>The sample must contain at least 2 observations.
401 * </li></ul></p>
402 *
403 * @param mu constant value to compare sample mean against
404 * @param sampleStats StatisticalSummary describing sample data
405 * @return p-value
406 * @throws IllegalArgumentException if the precondition is not met
407 * @throws MathException if an error occurs computing the p-value
408 */
409 double tTest(double mu, StatisticalSummary sampleStats)
410 throws IllegalArgumentException, MathException;
411 /**
412 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
413 * two-sided t-test</a> evaluating the null hypothesis that the mean of the
414 * population from which the dataset described by <code>stats</code> is
415 * drawn equals <code>mu</code>.
416 * <p>
417 * Returns <code>true</code> iff the null hypothesis can be rejected with
418 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
419 * <code>alpha * 2.</code></p>
420 * <p>
421 * <strong>Examples:</strong><br><ol>
422 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
423 * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
424 * </li>
425 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
426 * at the 99% level, first verify that the measured sample mean is less
427 * than <code>mu</code> and then use
428 * <br><code>tTest(mu, sampleStats, 0.02) </code>
429 * </li></ol></p>
430 * <p>
431 * <strong>Usage Note:</strong><br>
432 * The validity of the test depends on the assumptions of the one-sample
433 * parametric t-test procedure, as discussed
434 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
435 * </p><p>
436 * <strong>Preconditions</strong>: <ul>
437 * <li>The sample must include at least 2 observations.
438 * </li></ul></p>
439 *
440 * @param mu constant value to compare sample mean against
441 * @param sampleStats StatisticalSummary describing sample data values
442 * @param alpha significance level of the test
443 * @return p-value
444 * @throws IllegalArgumentException if the precondition is not met
445 * @throws MathException if an error occurs computing the p-value
446 */
447 boolean tTest(
448 double mu,
449 StatisticalSummary sampleStats,
450 double alpha)
451 throws IllegalArgumentException, MathException;
452 /**
453 * Returns the <i>observed significance level</i>, or
454 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
455 * comparing the means of the input arrays.
456 * <p>
457 * The number returned is the smallest significance level
458 * at which one can reject the null hypothesis that the two means are
459 * equal in favor of the two-sided alternative that they are different.
460 * For a one-sided test, divide the returned value by 2.</p>
461 * <p>
462 * The test does not assume that the underlying popuation variances are
463 * equal and it uses approximated degrees of freedom computed from the
464 * sample data to compute the p-value. The t-statistic used is as defined in
465 * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
466 * to the degrees of freedom is used,
467 * as described
468 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
469 * here.</a> To perform the test under the assumption of equal subpopulation
470 * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
471 * <p>
472 * <strong>Usage Note:</strong><br>
473 * The validity of the p-value depends on the assumptions of the parametric
474 * t-test procedure, as discussed
475 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
476 * here</a></p>
477 * <p>
478 * <strong>Preconditions</strong>: <ul>
479 * <li>The observed array lengths must both be at least 2.
480 * </li></ul></p>
481 *
482 * @param sample1 array of sample data values
483 * @param sample2 array of sample data values
484 * @return p-value for t-test
485 * @throws IllegalArgumentException if the precondition is not met
486 * @throws MathException if an error occurs computing the p-value
487 */
488 double tTest(double[] sample1, double[] sample2)
489 throws IllegalArgumentException, MathException;
490 /**
491 * Returns the <i>observed significance level</i>, or
492 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
493 * comparing the means of the input arrays, under the assumption that
494 * the two samples are drawn from subpopulations with equal variances.
495 * To perform the test without the equal variances assumption, use
496 * {@link #tTest(double[], double[])}.</p>
497 * <p>
498 * The number returned is the smallest significance level
499 * at which one can reject the null hypothesis that the two means are
500 * equal in favor of the two-sided alternative that they are different.
501 * For a one-sided test, divide the returned value by 2.</p>
502 * <p>
503 * A pooled variance estimate is used to compute the t-statistic. See
504 * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
505 * minus 2 is used as the degrees of freedom.</p>
506 * <p>
507 * <strong>Usage Note:</strong><br>
508 * The validity of the p-value depends on the assumptions of the parametric
509 * t-test procedure, as discussed
510 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
511 * here</a></p>
512 * <p>
513 * <strong>Preconditions</strong>: <ul>
514 * <li>The observed array lengths must both be at least 2.
515 * </li></ul></p>
516 *
517 * @param sample1 array of sample data values
518 * @param sample2 array of sample data values
519 * @return p-value for t-test
520 * @throws IllegalArgumentException if the precondition is not met
521 * @throws MathException if an error occurs computing the p-value
522 */
523 double homoscedasticTTest(
524 double[] sample1,
525 double[] sample2)
526 throws IllegalArgumentException, MathException;
527 /**
528 * Performs a
529 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
530 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
531 * and <code>sample2</code> are drawn from populations with the same mean,
532 * with significance level <code>alpha</code>. This test does not assume
533 * that the subpopulation variances are equal. To perform the test assuming
534 * equal variances, use
535 * {@link #homoscedasticTTest(double[], double[], double)}.
536 * <p>
537 * Returns <code>true</code> iff the null hypothesis that the means are
538 * equal can be rejected with confidence <code>1 - alpha</code>. To
539 * perform a 1-sided test, use <code>alpha * 2</code></p>
540 * <p>
541 * See {@link #t(double[], double[])} for the formula used to compute the
542 * t-statistic. Degrees of freedom are approximated using the
543 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
544 * Welch-Satterthwaite approximation.</a></p>
545 * <p>
546 * <strong>Examples:</strong><br><ol>
547 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
548 * the 95% level, use
549 * <br><code>tTest(sample1, sample2, 0.05). </code>
550 * </li>
551 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
552 * at the 99% level, first verify that the measured mean of <code>sample 1</code>
553 * is less than the mean of <code>sample 2</code> and then use
554 * <br><code>tTest(sample1, sample2, 0.02) </code>
555 * </li></ol></p>
556 * <p>
557 * <strong>Usage Note:</strong><br>
558 * The validity of the test depends on the assumptions of the parametric
559 * t-test procedure, as discussed
560 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
561 * here</a></p>
562 * <p>
563 * <strong>Preconditions</strong>: <ul>
564 * <li>The observed array lengths must both be at least 2.
565 * </li>
566 * <li> <code> 0 < alpha < 0.5 </code>
567 * </li></ul></p>
568 *
569 * @param sample1 array of sample data values
570 * @param sample2 array of sample data values
571 * @param alpha significance level of the test
572 * @return true if the null hypothesis can be rejected with
573 * confidence 1 - alpha
574 * @throws IllegalArgumentException if the preconditions are not met
575 * @throws MathException if an error occurs performing the test
576 */
577 boolean tTest(
578 double[] sample1,
579 double[] sample2,
580 double alpha)
581 throws IllegalArgumentException, MathException;
582 /**
583 * Performs a
584 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
585 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
586 * and <code>sample2</code> are drawn from populations with the same mean,
587 * with significance level <code>alpha</code>, assuming that the
588 * subpopulation variances are equal. Use
589 * {@link #tTest(double[], double[], double)} to perform the test without
590 * the assumption of equal variances.
591 * <p>
592 * Returns <code>true</code> iff the null hypothesis that the means are
593 * equal can be rejected with confidence <code>1 - alpha</code>. To
594 * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
595 * without the assumption of equal subpopulation variances, use
596 * {@link #tTest(double[], double[], double)}.</p>
597 * <p>
598 * A pooled variance estimate is used to compute the t-statistic. See
599 * {@link #t(double[], double[])} for the formula. The sum of the sample
600 * sizes minus 2 is used as the degrees of freedom.</p>
601 * <p>
602 * <strong>Examples:</strong><br><ol>
603 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
604 * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
605 * </li>
606 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
607 * at the 99% level, first verify that the measured mean of
608 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
609 * and then use
610 * <br><code>tTest(sample1, sample2, 0.02) </code>
611 * </li></ol></p>
612 * <p>
613 * <strong>Usage Note:</strong><br>
614 * The validity of the test depends on the assumptions of the parametric
615 * t-test procedure, as discussed
616 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
617 * here</a></p>
618 * <p>
619 * <strong>Preconditions</strong>: <ul>
620 * <li>The observed array lengths must both be at least 2.
621 * </li>
622 * <li> <code> 0 < alpha < 0.5 </code>
623 * </li></ul></p>
624 *
625 * @param sample1 array of sample data values
626 * @param sample2 array of sample data values
627 * @param alpha significance level of the test
628 * @return true if the null hypothesis can be rejected with
629 * confidence 1 - alpha
630 * @throws IllegalArgumentException if the preconditions are not met
631 * @throws MathException if an error occurs performing the test
632 */
633 boolean homoscedasticTTest(
634 double[] sample1,
635 double[] sample2,
636 double alpha)
637 throws IllegalArgumentException, MathException;
638 /**
639 * Returns the <i>observed significance level</i>, or
640 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
641 * comparing the means of the datasets described by two StatisticalSummary
642 * instances.
643 * <p>
644 * The number returned is the smallest significance level
645 * at which one can reject the null hypothesis that the two means are
646 * equal in favor of the two-sided alternative that they are different.
647 * For a one-sided test, divide the returned value by 2.</p>
648 * <p>
649 * The test does not assume that the underlying popuation variances are
650 * equal and it uses approximated degrees of freedom computed from the
651 * sample data to compute the p-value. To perform the test assuming
652 * equal variances, use
653 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
654 * <p>
655 * <strong>Usage Note:</strong><br>
656 * The validity of the p-value depends on the assumptions of the parametric
657 * t-test procedure, as discussed
658 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
659 * here</a></p>
660 * <p>
661 * <strong>Preconditions</strong>: <ul>
662 * <li>The datasets described by the two Univariates must each contain
663 * at least 2 observations.
664 * </li></ul></p>
665 *
666 * @param sampleStats1 StatisticalSummary describing data from the first sample
667 * @param sampleStats2 StatisticalSummary describing data from the second sample
668 * @return p-value for t-test
669 * @throws IllegalArgumentException if the precondition is not met
670 * @throws MathException if an error occurs computing the p-value
671 */
672 double tTest(
673 StatisticalSummary sampleStats1,
674 StatisticalSummary sampleStats2)
675 throws IllegalArgumentException, MathException;
676 /**
677 * Returns the <i>observed significance level</i>, or
678 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
679 * comparing the means of the datasets described by two StatisticalSummary
680 * instances, under the hypothesis of equal subpopulation variances. To
681 * perform a test without the equal variances assumption, use
682 * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
683 * <p>
684 * The number returned is the smallest significance level
685 * at which one can reject the null hypothesis that the two means are
686 * equal in favor of the two-sided alternative that they are different.
687 * For a one-sided test, divide the returned value by 2.</p>
688 * <p>
689 * See {@link #homoscedasticT(double[], double[])} for the formula used to
690 * compute the t-statistic. The sum of the sample sizes minus 2 is used as
691 * the degrees of freedom.</p>
692 * <p>
693 * <strong>Usage Note:</strong><br>
694 * The validity of the p-value depends on the assumptions of the parametric
695 * t-test procedure, as discussed
696 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
697 * </p><p>
698 * <strong>Preconditions</strong>: <ul>
699 * <li>The datasets described by the two Univariates must each contain
700 * at least 2 observations.
701 * </li></ul></p>
702 *
703 * @param sampleStats1 StatisticalSummary describing data from the first sample
704 * @param sampleStats2 StatisticalSummary describing data from the second sample
705 * @return p-value for t-test
706 * @throws IllegalArgumentException if the precondition is not met
707 * @throws MathException if an error occurs computing the p-value
708 */
709 double homoscedasticTTest(
710 StatisticalSummary sampleStats1,
711 StatisticalSummary sampleStats2)
712 throws IllegalArgumentException, MathException;
713 /**
714 * Performs a
715 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
716 * two-sided t-test</a> evaluating the null hypothesis that
717 * <code>sampleStats1</code> and <code>sampleStats2</code> describe
718 * datasets drawn from populations with the same mean, with significance
719 * level <code>alpha</code>. This test does not assume that the
720 * subpopulation variances are equal. To perform the test under the equal
721 * variances assumption, use
722 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
723 * <p>
724 * Returns <code>true</code> iff the null hypothesis that the means are
725 * equal can be rejected with confidence <code>1 - alpha</code>. To
726 * perform a 1-sided test, use <code>alpha * 2</code></p>
727 * <p>
728 * See {@link #t(double[], double[])} for the formula used to compute the
729 * t-statistic. Degrees of freedom are approximated using the
730 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
731 * Welch-Satterthwaite approximation.</a></p>
732 * <p>
733 * <strong>Examples:</strong><br><ol>
734 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
735 * the 95%, use
736 * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
737 * </li>
738 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
739 * at the 99% level, first verify that the measured mean of
740 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
741 * and then use
742 * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
743 * </li></ol></p>
744 * <p>
745 * <strong>Usage Note:</strong><br>
746 * The validity of the test depends on the assumptions of the parametric
747 * t-test procedure, as discussed
748 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
749 * here</a></p>
750 * <p>
751 * <strong>Preconditions</strong>: <ul>
752 * <li>The datasets described by the two Univariates must each contain
753 * at least 2 observations.
754 * </li>
755 * <li> <code> 0 < alpha < 0.5 </code>
756 * </li></ul></p>
757 *
758 * @param sampleStats1 StatisticalSummary describing sample data values
759 * @param sampleStats2 StatisticalSummary describing sample data values
760 * @param alpha significance level of the test
761 * @return true if the null hypothesis can be rejected with
762 * confidence 1 - alpha
763 * @throws IllegalArgumentException if the preconditions are not met
764 * @throws MathException if an error occurs performing the test
765 */
766 boolean tTest(
767 StatisticalSummary sampleStats1,
768 StatisticalSummary sampleStats2,
769 double alpha)
770 throws IllegalArgumentException, MathException;
771 }