#!/usr/local/bin/perl # Paul Pavlidis. For equal numbers of replicates only, two factors. use Stats; $usage = "anova-twoway-withrep [-l: log transform; -v: verbose output; -r: format line needs to be removed] \n"; die $usage unless @ARGV > 1; while ($ARGV[0] =~ /^-/) { $opt = shift @ARGV; if ($opt eq "-r") { $rdb++; } elsif ($opt eq "-l") { $log++; } elsif ($opt eq "-v") { $verbose++; } else { die "Illegal option\n"; } } ($data, $layout) = @ARGV; open (IN, "<$layout") or die "Couldn't open layout $layout\n"; # format of the layout file: # = category title # % category name # list of columns that apply # etc. my ($numcat1, $numcat2, $numdup); $/= "="; ; $firstcat = 1; while () { chomp; s/\cM//g; ($category, @dat) = split /\n/, $_; $category =~ s/=//; print STDERR "$category = "; push @maincats, $category; $incat=0; $numcat1++; # number of items in category 1. $k = scalar @dat; foreach $m (@dat) { if($m=~/\%(.+)/) { # start a new category. $incat=1; $catname = $1; print STDERR "\n$catname:\t"; if (!$firstcat) { $numcat2++; # number of things in the second category. } next; } if ($incat) { # subcategories. print STDERR "$m\t"; push @{$cat{$category}->{$catname}}, $m; if ($firstcat) { $n++; } } } $firstcat = 0; print STDERR "\n"; } $minimum = $numcat1 * $numcat2; $numrep = $n / $minimum; die "You should use another program since you have no replication\n" if $numrep < 2; print STDERR "N=$n CAT1($maincats[0])=$numcat1 CAT2($maincats[1])=$numcat2 NUMREP=$numrep\n"; #print STDERR "Calculate p-values as F, df1=factordf df2=errordf\n"; close IN; # construct the table of replicates. This is a bit tricky. foreach $m (keys %cat) { foreach $k (keys %{$cat{$m}}) { foreach $q (@{$cat{$m}->{$k}}) { push @{$numwithcats{$q}}, $k; # associate categories with this trial. } } } foreach $k (sort keys %numwithcats) { $cat = join " ", @{$numwithcats{$k}}; push @{$replicate{$cat}}, $k; # reverse the hash... } # check foreach $k (sort keys %replicate) { foreach $m (@{$replicate{$k}}) { # print STDERR "$m $k\n"; } } # whsehw. open (IN, "<$data") or die "couldn't open data\n"; $/="\n"; ; if ($rdb) { ; # if rdb print STDERR "Removed format line\n"; } else { print STDERR "Assuming this is NOT rdb format!\n"; } $totaldf = $n-1; $celldf = $numcat1 * $numcat2 - 1; $ffdf = $numcat1 - 1; $sfdf = $numcat2 - 1; $ffxsfdf = $ffdf * $sfdf; $errordf = $numcat1*$numcat2*($numrep-1); print STDERR "DF: Total $totaldf Cell $celldf First $ffdf Second $sfdf FFXSF $ffxsfdf ERROR $errordf\n"; #print "label\ttotalssq\ttotaldf\tcellssq\tcelldf\tffssq\tffdf\tsfssq\tsfdf\tffxsfssq\tffxsfdf\terrorssq\terrordf\tfcell\tfff\tsff\tfxsf\n"; if ($verbose) { print "label\tffdf\tsfdf\tffxsfdf\terrordf\tfff\tsff\tfxsf\tpff\tpsf\tpsxff\n"; } else { print "label\tfff\tsff\tfxsf\tpff\tpsf\tpsxff\n"; } while () { chomp; s/\cM//; if (!$_) { print STDERR "Skipping blank line\n"; next; } ($label, @data) = split /\t/, $_; if (scalar @data != $n) { print STDERR "$label: Skipping because it is missing data\n"; next; } if ($log) { for ($i=0; $i{$subcat}}) { $sum+=$$data[$trial]; } $grandtotal+=$sum*$sum; } # print STDERR "G $grandtotal $sum\n"; $grandtotal/= $nothercat; $grandtotal-=$c; # print STDERR "G $grandtotal $sum\n"; return $grandtotal; } sub calcCellSSQ { my ($data, $c, $cat) = @_; my $sum = 0; my $grandtotal = 0; foreach $m (sort keys %$cat) { # for each cell $sum=0; foreach $k (@{$$cat{$m}}) { # add up all the replicates $sum+=$$data[$k]; } $sum*=$sum; $grandtotal+=$sum; } $grandtotal/=$numrep; $grandtotal-=$c; return $grandtotal; } ##--