#!/usr/local/bin/perl # Paul Pavlidis. For one way analysis of variance with equal numbers of replicates in each group. use Stats; $usage = "anova-oneway [-l: log transform; -v: verbose output; -r: format line needs to be removed; -c: use class file instead of layout] \n"; die $usage unless @ARGV > 1; while ($ARGV[0] =~ /^-/) { $opt = shift @ARGV; if ($opt eq "-r") { $rdb++; } elsif ($opt eq "-l") { $log++; } elsif ($opt eq "-v") { $verbose++; } elsif ($opt eq "-c") { $classfile++; } else { die "Illegal option\n"; } } ($data, $layout) = @ARGV; my $numcategories = 0; if ($classfile) { $numcategories = readclass($layout, \%cat); } else { $numcategories = readlayout($layout, \%cat); } if (0) { open (IN, "<$layout") or die "Couldn't open layout $layout\n"; # = category title # % category name my (@numcats); $/= "="; ; my $catnum = 0; $firstcat = 1; while () { chomp; s/\cM//; ($category, @dat) = split /\n/, $_; $category =~ s/=//; print STDERR "$category ="; push @maincats, $category; $incat=0; foreach $m (@dat) { if($m=~/\%(.+)/) { # start a new category. $incat=1; $catnum++; $catname = $1; print STDERR "\n$catname:\t"; if (!$firstcat) { } next; } if ($incat) { # subcategories. push @{$cat{$category}->{$catname}}, $m; $numcat[$catnum]++; print STDERR "$m\t"; if ($firstcat) { $n++; } } } $firstcat = 0; print STDERR "\n"; } shift @numcat; $numcategories = scalar @numcat; print STDERR "N=$n. Numcats=$numcategories. Number in each category:"; foreach $m (@numcat) { print STDERR " $m"; } print STDERR "\n"; close IN; # construct the table of replicates. This is a bit tricky. foreach $m (keys %cat) { foreach $k (keys %{$cat{$m}}) { foreach $q (@{$cat{$m}->{$k}}) { push @{$numwithcats{$q}}, $k; # associate categories with this trial. } } } foreach $k (sort keys %numwithcats) { $cat = join " ", @{$numwithcats{$k}}; push @{$replicate{$cat}}, $k; # reverse the hash... } # check foreach $k (sort keys %replicate) { foreach $m (@{$replicate{$k}}) { print STDERR "$m $k\n"; } } } #skip if ($numcategories == 0) { die "No categories found\n"; } # whsehw. open (IN, "<$data") or die "couldn't open data\n"; $/="\n"; my $header = ; # remove header if ($rdb) { ; # if rdb print STDERR "Removed format line\n"; } else { print STDERR "Assuming this is NOT rdb format!\n"; } my $n = -1 + scalar split "\t", $header; my $totaldf = $n-2; #$totaldf = $n-1; $groupsdf = $numcategories - 1; $errordf = $totaldf - $groupsdf; print STDERR "DF: Total $totaldf Groups: $groupsdf Error: $errordf\n"; #print "label\ttotalssq\ttotaldf\tcellssq\tcelldf\tffssq\tffdf\tsfssq\tsfdf\tffxsfssq\tffxsfdf\terrorssq\terrordf\tfcell\tfff\tsff\tfxsf\n"; #print "label\tffdf\tsfdf\tffxsfdf\terrordf\tfff\tsff\tfxsf\n"; if ($verbose) { print "label\ttotalssq\ttotaldf\tgroupssq\tgroupsdf\terrorssq\terrordf\tf\tp\n"; } else { print "label\tf\tp\n"; } while () { chomp; s/\cM//; if (!$_) { print STDERR "Skipping blank line\n"; next; } ($label, @data) = split /\t/, $_; if (scalar @data == 0) { print STDERR "$label: Skipping because it has no data\n"; next; } if (scalar @data != $n) { $numfound = scalar @data; print STDERR "$label: Skipping because it is missing data (expect $n, found $numfound\n"; next; } if ($log) { for ($i=0; $i