aaa.2grp<-function(samp.info.file, # Name of file with sample IDs and group labels sampID.col, # column of samp.info.file with sample IDs grplbl.col, # column of samp.info.file with group labels expr.file, # Name of expression data file result.file, # name of result file logtrans.expr=T) # Should expression data be log-transformed? default=TRUE. { # Read sample info data samp.info<-read.table(samp.info.file,sep="\t",as.is=T,header=T) sampID<-samp.info[,sampID.col] grplbl<-samp.info[,grplbl.col] # Read expression data expr.data<-read.table(expr.file,sep="\t",as.is=T,header=T) k<-dim(expr.data)[2] Y<-expr.data[,2:k] expr.hdrs<-scan(expr.file,sep="\n",n=1,what=character()) expr.hdrs<-unlist(strsplit(expr.hdrs,split="\t")) expr.hdrs<-expr.hdrs[2:k] # Subset both data sets on overlap of matched sample IDs expr.inc<-is.element(expr.hdrs,sampID) samp.inc<-is.element(sampID,expr.hdrs) if ((!any(expr.inc))|(!any(samp.inc))) stop("Error: Cannot Match Sample IDs.") Y<-Y[,expr.inc] expr.hdrs<-expr.hdrs[expr.inc] sampID<-sampID[samp.inc] grplbl<-grplbl[samp.inc] # Order expression data and sample data expr.ord<-order(expr.hdrs) samp.ord<-order(sampID) Y<-Y[,expr.ord] expr.hdrs<-expr.hdrs[expr.ord] sampID<-sampID[samp.ord] grplbl<-grplbl[samp.ord] # Check ordering one last time if (any(sampID!=expr.hdrs)) stop("Error: Cannot Match Sample IDs.") # Log-transform if necessary if (logtrans.expr) Y<-log(Y) # Perform the 3 tests for each gene t.res<-row.ttest(Y,grplbl) rs.res<-row.ranksum(Y,grplbl) sw.res<-row.shapiro(Y,grplbl) # Fit BUM model to 3 results t.bum<-bum(t.res$p) rs.bum<-bum(rs.res) sw.bum<-bum(sw.res) # Compute final EBP final.ebp.null<-t.bum$ebp*sw.bum$ebp+rs.bum$ebp*(1-sw.bum$ebp) final.ord<-order(final.ebp.null) res<-cbind.data.frame(probe.set=expr.data[,1], final.ebp.null=final.ebp.null, t.ebp.null=t.bum$ebp, rs.ebp.null=rs.bum$ebp, sw.ebp.normal=sw.bum$ebp, t.stat=t.res$tstat, t.pval=t.res$pval, rs.pval=rs.res, sw.pval=sw.res) res<-res[final.ord,] write(paste("T-test direction:",t.res$direction),result.file) write.table(res,result.file,sep="\t",row.names=F,col.names=T,append=T) } row.ttest<-function(Y,grplbl) { ugrp<-unique(grplbl) ngrp<-length(ugrp) if (ngrp!=2) stop("Error in row.ttest: Valid for exactly 2-groups only.") grp1<-(grplbl==ugrp[1]) grp2<-(grplbl==ugrp[2]) n1<-sum(grp1) n2<-sum(grp2) Y1<-Y[,grp1] Y2<-Y[,grp2] mn1<-rowMeans(Y1,na.rm=T) mn2<-rowMeans(Y2,na.rm=T) ss1<-rowSums(Y1^2)-n1*mn1^2 ss2<-rowSums(Y2^2)-n2*mn2^2 sd1<-sqrt(ss1/(n1-1)) sd2<-sqrt(ss2/(n2-1)) tstat<-(mn2-mn1)/sqrt(sd1^2/n1+sd2^2/n2) df<-(sd1^2/n1+sd2^2/n2)^2/ ((sd1^2/n1)^2/(n1-1)+(sd2^2/n2)^2/(n2-1)) pval<-2*pt(-abs(tstat),df) return(list(tstat=tstat,pval=pval, direction=paste(ugrp[2],ugrp[1],sep=" - "))) } row.ranksum<-function(Y,grplbl) { res<-apply(Y,1,ranksum.test,grplbl=grplbl) return(res) } ranksum.test<-function(y,grplbl) { ugrp<-unique(grplbl) ngrp<-length(ugrp) if (ngrp!=2) stop("Error: Rank-Sum Test Valid for Exactly 2 groups.") x1<-y[grplbl==ugrp[1]] x2<-y[grplbl==ugrp[2]] res<-wilcox.test(x1,x2) return(res$p.value) } row.shapiro<-function(Y,grplbl) { ugrp<-unique(grplbl) ngrp<-length(ugrp) Z<-Y for (i in 1:ngrp) { temp<-Y[,grplbl==ugrp[i]] mn<-rowMeans(temp) s<-apply(temp,1,sd) Z[,grplbl==ugrp[i]]<-(temp-mn)/s } res<-apply(Z,1,shapiro.pval) return(res) } shapiro.pval<-function(z) { res<-shapiro.test(z) return(res$p.value) } source("http://www.stjuderesearch.org/depts/biostats/documents/fdr-library.R") require(tcltk) gui.aaa.2grp<-function() { # Initialize GUI tt<-tktoplevel() ######### Set up GUI for phenotype file ########## pheno.file<-tclVar("") # Initialize phenotype file variable pheno.lbl<-tklabel(tt,text="Sample Information File:") # Create label for directions pheno.file.name<-tklabel(tt,width=50,text=tclvalue(pheno.file)) # Create display for phenotype file variable tkconfigure(pheno.file.name,textvariable=pheno.file) # Associate display with variable # Define an action for the "select phenotype" button select.pheno<-function() { file.str<-tclvalue(tkgetOpenFile(title="Select Sample Information File")) tclvalue(pheno.file)<-file.str } # Create the select phenotype button pheno.button<-tkbutton(tt,text="Select",command=select.pheno) # Add these items to the top-level GUI tkgrid(pheno.lbl,pheno.file.name,pheno.button) ####### Set up GUI for selecting sample ID column ######## sampID.col<-tclVar("") sampID.lbl<-tklabel(tt,text="Sample ID column:") sampID.display<-tklabel(tt,width=50,text=tclvalue(sampID.col)) tkconfigure(sampID.display,textvariable=sampID.col) select.sampID<-function() { pheno.hdr<-scan(tclvalue(pheno.file),sep="\n",what=character(),n=1) pheno.hdr<-unlist(strsplit(pheno.hdr,split="\t")) sampID.col.name<-tk_select.list(pheno.hdr,title="Select sample ID column") tclvalue(sampID.col)<-sampID.col.name tkfocus(tt) } sampID.button<-tkbutton(tt,text="Select",command=select.sampID) tkgrid(sampID.lbl,sampID.display,sampID.button) ####### Set up GUI for selecting group label column ############### grplbl.col<-tclVar("") grplbl.lbl<-tklabel(tt,text="Group Label column:") grplbl.disp<-tklabel(tt,width=50,text=tclvalue(grplbl.col)) tkconfigure(grplbl.disp,textvariable=grplbl.col) select.grplbl<-function() { pheno.hdr<-scan(tclvalue(pheno.file),sep="\n",what=character(),n=1) pheno.hdr<-unlist(strsplit(pheno.hdr,split="\t")) grplbl.col.name<-tk_select.list(pheno.hdr,title="Select sample ID column") tclvalue(grplbl.col)<-grplbl.col.name tkfocus(tt) } grplbl.button<-tkbutton(tt,text="Select",command=select.grplbl) tkgrid(grplbl.lbl,grplbl.disp,grplbl.button) ##### Set up GUI for selecting expression file ################## expr.file<-tclVar("") expr.file.lbl<-tklabel(tt,text="Expression Data File:") expr.file.disp<-tklabel(tt,width=50,text=tclvalue(expr.file)) tkconfigure(expr.file.disp,textvariable=expr.file) select.exprfile<-function() { file.str<-tclvalue(tkgetOpenFile(title="Select Expression Data File")) tclvalue(expr.file)<-file.str } expr.button<-tkbutton(tt,text="Select",command=select.exprfile) tkgrid(expr.file.lbl,expr.file.disp,expr.button) #### Set up GUI for specifying output file ########## res.file<-tclVar("") res.file.lbl<-tklabel(tt,text="Result File") res.file.disp<-tklabel(tt,width=50,text=tclvalue(res.file)) tkconfigure(res.file.disp,textvariable=res.file) specify.resfile<-function() { file.str<-tclvalue(tkgetSaveFile(title="Save Results to File")) tclvalue(res.file)<-file.str } resfile.button<-tkbutton(tt,text="Specify",command=specify.resfile) tkgrid(res.file.lbl,res.file.disp,resfile.button) ### Set up GUI for specifying code file ############ code.file<-tclVar("") code.file.lbl<-tklabel(tt,text="R Code File") code.file.disp<-tklabel(tt,width=50,text=tclvalue(code.file)) tkconfigure(code.file.disp,textvariable=code.file) specify.codefile<-function() { file.str<-tclvalue(tkgetSaveFile(title="Save R Code to File")) tclvalue(code.file)<-file.str } codefile.button<-tkbutton(tt,text="Specify",command=specify.codefile) tkgrid(code.file.lbl,code.file.disp,codefile.button) #### Set up GUI for Log-Transformation ###### logt.lbl<-tklabel(tt,text="Log-transform expression data?") blnk.lbl<-tklabel(tt,text="") logt.val<-tclVar("Yes") logt.rb1<-tkradiobutton(tt) logt.rb2<-tkradiobutton(tt) tkconfigure(logt.rb1,variable=logt.val,value="Yes") tkconfigure(logt.rb2,variable=logt.val,value="No") tkgrid(logt.lbl) tkgrid(tklabel(tt,text="Yes"),logt.rb1) tkgrid(tklabel(tt,text="No"),logt.rb2) ### Define buttons write.Rcode<-function() { write("source('AAA-library.R') \n",tclvalue(code.file),append=T) write(paste("aaa.2grp(samp.info.file='", tclvalue(pheno.file),"',",sep=""), tclvalue(code.file),append=T) write(paste(" sampID.col='", make.names(tclvalue(sampID.col)),"',",sep=""), tclvalue(code.file),append=T) write(paste(" grplbl.col='", make.names(tclvalue(grplbl.col)),"',",sep=""), tclvalue(code.file),append=T) write(paste(" expr.file='", tclvalue(expr.file),"',",sep=""), tclvalue(code.file),append=T) write(paste(" result.file='", tclvalue(res.file),"',",sep=""), tclvalue(code.file),append=T) if(tclvalue(logt.val)=="Yes") logt<-TRUE else logt<-FALSE write(paste(" logtrans.expr=",logt,")",sep=""), tclvalue(code.file),append=T) } perform.analysis<-function() { if(tclvalue(logt.val)=="Yes") logt<-TRUE else logt<-FALSE aaa.2grp(samp.info.file=tclvalue(pheno.file), sampID.col=make.names(tclvalue(sampID.col)), grplbl.col=make.names(tclvalue(grplbl.col)), expr.file=tclvalue(expr.file), result.file=tclvalue(res.file), logtrans.expr=logt) } wrcode.button<-tkbutton(tt,text="Write R Code",command=write.Rcode) perform.button<-tkbutton(tt,text="Perform Analysis",command=perform.analysis) tkgrid(wrcode.button) tkgrid(perform.button) }