Mercurial > octave-antonio
annotate scripts/statistics/tests/anova.m @ 11472:1740012184f9
Use uppercase for variable names in error() strings to match Info documentation. Only m-files done.
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Sun, 09 Jan 2011 21:33:04 -0800 |
parents | 16f53d29049f |
children | fd0a3ac60b0e |
rev | line source |
---|---|
7017 | 1 ## Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2005, 2006, |
9245 | 2 ## 2007, 2009 Kurt Hornik |
3426 | 3 ## |
3922 | 4 ## This file is part of Octave. |
5 ## | |
6 ## Octave is free software; you can redistribute it and/or modify it | |
7 ## under the terms of the GNU General Public License as published by | |
7016 | 8 ## the Free Software Foundation; either version 3 of the License, or (at |
9 ## your option) any later version. | |
3426 | 10 ## |
3922 | 11 ## Octave is distributed in the hope that it will be useful, but |
3200 | 12 ## WITHOUT ANY WARRANTY; without even the implied warranty of |
13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
3426 | 14 ## General Public License for more details. |
15 ## | |
3200 | 16 ## You should have received a copy of the GNU General Public License |
7016 | 17 ## along with Octave; see the file COPYING. If not, see |
18 ## <http://www.gnu.org/licenses/>. | |
3200 | 19 |
3454 | 20 ## -*- texinfo -*- |
21 ## @deftypefn {Function File} {[@var{pval}, @var{f}, @var{df_b}, @var{df_w}] =} anova (@var{y}, @var{g}) | |
22 ## Perform a one-way analysis of variance (ANOVA). The goal is to test | |
23 ## whether the population means of data taken from @var{k} different | |
24 ## groups are all equal. | |
3200 | 25 ## |
3454 | 26 ## Data may be given in a single vector @var{y} with groups specified by |
27 ## a corresponding vector of group labels @var{g} (e.g., numbers from 1 | |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
7017
diff
changeset
|
28 ## to @var{k}). This is the general form which does not impose any |
3454 | 29 ## restriction on the number of data in each group or the group labels. |
3200 | 30 ## |
3454 | 31 ## If @var{y} is a matrix and @var{g} is omitted, each column of @var{y} |
32 ## is treated as a group. This form is only appropriate for balanced | |
33 ## ANOVA in which the numbers of samples from each group are all equal. | |
3200 | 34 ## |
3454 | 35 ## Under the null of constant means, the statistic @var{f} follows an F |
36 ## distribution with @var{df_b} and @var{df_w} degrees of freedom. | |
37 ## | |
38 ## The p-value (1 minus the CDF of this distribution at @var{f}) is | |
39 ## returned in @var{pval}. | |
3200 | 40 ## |
41 ## If no output argument is given, the standard one-way ANOVA table is | |
42 ## printed. | |
3454 | 43 ## @end deftypefn |
3200 | 44 |
5428 | 45 ## Author: KH <Kurt.Hornik@wu-wien.ac.at> |
3456 | 46 ## Description: One-way analysis of variance (ANOVA) |
3426 | 47 |
3200 | 48 function [pval, f, df_b, df_w] = anova (y, g) |
3426 | 49 |
3200 | 50 if ((nargin < 1) || (nargin > 2)) |
6046 | 51 print_usage (); |
3200 | 52 elseif (nargin == 1) |
4030 | 53 if (isvector (y)) |
11472
1740012184f9
Use uppercase for variable names in error() strings to match Info documentation. Only m-files done.
Rik <octave@nomad.inbox5.com>
parents:
9245
diff
changeset
|
54 error ("anova: for `anova (Y)', Y must not be a vector"); |
3200 | 55 endif |
56 [group_count, k] = size (y); | |
57 n = group_count * k; | |
58 group_mean = mean (y); | |
59 else | |
4030 | 60 if (! isvector (y)) |
11472
1740012184f9
Use uppercase for variable names in error() strings to match Info documentation. Only m-files done.
Rik <octave@nomad.inbox5.com>
parents:
9245
diff
changeset
|
61 error ("anova: for `anova (Y, G)', Y must be a vector"); |
3200 | 62 endif |
63 n = length (y); | |
4030 | 64 if (! isvector (g) || (length (g) != n)) |
11472
1740012184f9
Use uppercase for variable names in error() strings to match Info documentation. Only m-files done.
Rik <octave@nomad.inbox5.com>
parents:
9245
diff
changeset
|
65 error ("anova: G must be a vector of the same length as Y"); |
3200 | 66 endif |
67 s = sort (g); | |
68 i = find (s (2 : n) > s(1 : (n-1))); | |
69 k = length (i) + 1; | |
70 if (k == 1) | |
3456 | 71 error ("anova: there should be at least 2 groups"); |
3200 | 72 else |
3273 | 73 group_label = s ([1, (reshape (i, 1, k-1) + 1)]); |
3200 | 74 endif |
75 for i = 1 : k; | |
76 v = y (find (g == group_label (i))); | |
77 group_count (i) = length (v); | |
78 group_mean (i) = mean (v); | |
79 endfor | |
3426 | 80 |
3200 | 81 endif |
3426 | 82 |
5373 | 83 total_mean = mean (y(:)); |
3200 | 84 SSB = sum (group_count .* (group_mean - total_mean) .^ 2); |
3426 | 85 SST = sumsq (reshape (y, n, 1) - total_mean); |
3200 | 86 SSW = SST - SSB; |
87 df_b = k - 1; | |
88 df_w = n - k; | |
89 v_b = SSB / df_b; | |
90 v_w = SSW / df_w; | |
91 f = v_b / v_w; | |
92 pval = 1 - f_cdf (f, df_b, df_w); | |
3426 | 93 |
3200 | 94 if (nargout == 0) |
95 ## This eventually needs to be done more cleanly ... | |
96 printf ("\n"); | |
97 printf ("One-way ANOVA Table:\n"); | |
98 printf ("\n"); | |
99 printf ("Source of Variation Sum of Squares df Empirical Var\n"); | |
100 printf ("*********************************************************\n"); | |
101 printf ("Between Groups %15.4f %4d %13.4f\n", SSB, df_b, v_b); | |
102 printf ("Within Groups %15.4f %4d %13.4f\n", SSW, df_w, v_w); | |
103 printf ("---------------------------------------------------------\n"); | |
104 printf ("Total %15.4f %4d\n", SST, n - 1); | |
105 printf ("\n"); | |
3426 | 106 printf ("Test Statistic f %15.4f\n", f); |
3200 | 107 printf ("p-value %15.4f\n", pval); |
108 printf ("\n"); | |
3426 | 109 endif |
110 | |
3200 | 111 endfunction |