########################################################################## # Function: compute.fdr # Purpose: Apply the requested fdr method to the set(s) of p-values provided # Arguments: p - a vector or matrix of p-values # opts - a list with the following components # LC = loess.control options for SPLOSH # dplaces = pre-rounding option for SPLOSH # lam = lambda for Storey's q-value # robust = robust option for Storey's q-value # adj.eps = epsilon adjustment for BUM to avoid applying MLE to p-values with 0 # n.adj = adjustment factor for BUM # notes - a vector of strings giving some notes about the analysis # Returns: a list with the following components: # p - the set of p-values supplied to the function # fdr - an estimate of the ratio of the number of false positives to the number of results with lesser or equal p-value # q - the q-value or fdr-adjusted p-value # cdf - the estimate of the expected proportion of tests with lesser or equal p-value # ord - gives the value of indices to order results by ascending p-value # pi - estimate of the proportion of tests with a true null hypothesis (n.a. for BH95 & BY01) # fp - estimate of the total number of false positives among results with equal or lesser p-value # fn - estimate of the total number of false negatives among results with larger p-value # toterr - sum of fp and fn # fdr.method - indicates which method was used # notes - appends string description of method used to notes argument supplied ######################################################################### compute.fdr<-function(p, fdr.method="PC04", opts=default.opts(fdr.method), notes=NULL ) { res<-"Choose a valid fdr.method" if (fdr.method=="PC04") res<-splosh(p,opts$LC,opts$dplaces,notes) if (fdr.method=="St02") res<-storey(p,opts$lam,opts$robust,notes) if (fdr.method=="PM03") res<-bum(p,opts$adj.eps,opts$n.adj,notes) if (fdr.method=="BH95") res<-BH95(p,notes) if (fdr.method=="Ch04") res<-Ch04(p,opts$qpos,notes) if (fdr.method=="BY01") res<-BY01(p,notes) return(res) } ############################################################# # Function: fdr.plots # Purpose: Produce diagnostic and illustrative plots for fdr analysis # Arguments: fdrobj - object returned from compute.fdr # Returns: NULL ############################################################# fdr.plots<-function(fdrobj,alpha=1,sub.labels="",main="") { if(is.vector(fdrobj$p)) fdrobj$p<-matrix(fdrobj$p,length(fdrobj$p),1) h<-dim(fdrobj$p)[2] g<-dim(fdrobj$p)[1] if (length(main)==1) main<-rep(main,h) if (length(sub.labels)==1) { if ((sub.labels=="")&&(h>1)) sub.labels<-paste("Null Hyp.",1:h) sub.labels<-rep(sub.labels,h) } nscreen<-3+2*(!is.element(fdrobj$fdr.method,c("BH95","BY01","St02")))+ any(!c(is.null(fdrobj$fp),is.null(fdrobj$fn),is.null(fdrobj$toterr))) split.screen(c(h,nscreen)) k<-0 for (i in 1:h) { # p-value histogram k<-k+1 screen(k) hist(fdrobj$p[,i],prob=T,xlab="p-value",ylab="density",main=main[i],sub=sub.labels[i],col=16) if (!is.null(fdrobj$pdf)) { H<-hist(fdrobj$p[,i],prob=T,plot=F) x<-fdrobj$p[fdrobj$ord[,i],i] y<-fdrobj$pdf[fdrobj$ord[,i],i] inc<-y1]<-1 q[q>1]<-1 pi<-rep(const,h) cdf<-res95$cdf ord<-res95$ord return(list(p=p,fdr=fdr,q=q,cdf=cdf,ord=ord, fdr.method="BY01", notes=c("Benjamini and Yekutieli (2001) Used to Control FDR.",notes))) } bum<-function(p,adj.eps=0.5,n.adj=length(p),notes=NULL) { if (is.vector(p)) p<-matrix(p,length(p),1) h<-dim(p)[2] g<-dim(p)[1] fdr<-q<-cdf<-fp<-fn<-toterr<-pdf<-ebp<-ord<-matrix(0,g,h) a<-lambda<-pi<-rep(0,h) for (i in 1:h) { res<-bum.1(p[,i],adj.eps,n.adj,notes) fdr[,i]<-res$fdr q[,i]<-res$q cdf[,i]<-res$cdf fp[,i]<-res$fp fn[,i]<-res$fn toterr[,i]<-res$toterr pdf[,i]<-res$pdf ebp[,i]<-res$ebp ord[,i]<-res$ord pi[i]<-res$pi a[i]<-res$a lambda[i]<-res$lambda } return(list(p=p,fdr=fdr,q=q,cdf=cdf,fp=fp,fn=fn,toterr=toterr,pdf=pdf,ebp=ebp,ord=ord,pi=pi,a=a,lambda=lambda, adj.eps=adj.eps, # rounded to this no. of decimal places n.adj=n.adj, # loess.control options used fdr.method="PM02", notes=c(notes,res$notes))) } Ch04<-function(p,qpos=c(0.00625,0.01,0.0125,0.025,0.05,0.1,0.25,0.5,0.75),notes=NULL) { if (is.vector(p)) p<-matrix(p,length(p),1) h<-dim(p)[2] g<-dim(p)[1] fdr<-q<-cdf<-pdf<-ord<-matrix(0,g,h) pi<-rep(NA,h) for (i in 1:h) { res<-Ch04.1(p[,i],qpos) fdr[,i]<-res$fdr q[,i]<-res$q cdf[,i]<-res$cdf pdf[,i]<-res$pdf ord[,i]<-res$ord pi[i]<-res$pi } return(list(p=p,fdr=fdr,q=q,cdf=cdf,pdf=pdf,pi=pi, fdr.method="Ch04",notes=c(notes,res$notes))) } ##################################### # Function: BH95 # Purpose: Compute quantity that Benjamini and Hochberg use to control the FDR ##################################### BH95.1<-function(p, # p-value vector notes=NULL) # vector of notes gathered to date { m<-length(p) # number of genes examined in analysis u<-order(p) # vector to order results by ascending p-values v<-rank(p) # rank of p-values cdf<-v/m # empirical distribution function used to estimate cdf fdr<-p/cdf # quantity used to control fdr fdr[fdr>1]<-1 # any larger than 1, assign 1 q<-fdr # initialize q-value vector q[u[m]]<-min(fdr[u[m]],1) # get the ball rolling q[rev(u)]<-cummin(q[rev(u)]) # find q-value fp<-m*cdf*fdr # estimated false positive count, very conservative fn<-(1-cdf-(1-p))*m # estimated false negative count, so conservative often near 0 fn[fn<0]<-0 # set false negative count to 0 if less than 0 toterr<-fp+fn # compute total error criterion # return results return(list(fdr=matrix(fdr,m,1), # fdr estimate q=matrix(q,m,1), # q-value estimate cdf=matrix(cdf,m,1), # cdf estimate fp=matrix(fp,m,1), # estimated no. of false positives incurred fn=matrix(fn,m,1), # estimated no. of false negatives incurred toterr=matrix(toterr,m,1), # total estimated no. of errors ord=u, # ordering vector pi=1, # implied estimate of null proportion fdr.method="Benjamini and Hochberg (1995)", notes=c(notes,"Benjamini and Hochberg 1995 Used to Estimate FDR.") )) } ########################################################### # storey # computes FDR and q-values according to Storey's method ########################################################### storey.1<-function( p, # vector of p-values lam=NULL, # smoothing parameter for estimating pi robust=F, # indicates whether to compute robust q-values notes=NULL # notes or comments on analysis ) { #This is just some pre-processing m <- length(p) notes<-c(notes,"Storey's method used to estimate FDR.") #These next few functions are the various ways to automatically choose lam #and estimate pi0 if(!is.null(lam)) { pi0 <- mean(p>lam)/(1-lam) pi0 <- min(pi0,1) notes <- c(notes,"The user prespecified lam in the calculation of pi0.") } else{ notes <- c(notes,"A smoothing method was used in the calculation of pi0.") lam <- seq(0,0.95,0.01) pi0 <- rep(0,length(lam)) for(i in 1:length(lam)) { pi0[i] <- mean(p>lam[i])/(1-lam[i]) } spi0 <- smooth.spline(lam,pi0,df=3,w=(1-lam)) pi0 <- predict(spi0,x=0.95)$y pi0 <- min(pi0,1) } #The q-values are actually calculated here u <- order(p) v <- rank(p) fdr <- pi0*m*p/v fdr[fdr>1]<-1 q<-fdr if(robust) { q <- pi0*m*p/(v*(1-(1-p)^m)) notes <- c(notes, "The robust version of the q-value was calculated. See Storey JD (2002) JRSS-B 64: 479-498.") } q[u[m]] <- min(q[u[m]],1) q[rev(u)]<-cummin(q[rev(u)]) cdf<-v/m fp<-m*cdf*fdr fn<-(1-cdf-pi0*(1-p))*m fn[fn<0]<-0 return(list( fdr=matrix(fdr,m,1), # FDR estimate q=matrix(q,m,1), # q-values cdf=matrix(cdf,m,1), # CDF estimate (EDF) fp=matrix(fp,m,1), # Est. No. False Positives fn=matrix(fn,m,1), # Est. No. False Negatives toterr=matrix(fp+fn,m,1), # Total of FP and FN ord=u, # ordering vector, places in order of increasing p-values pi=pi0, # pi - null proportion lam=lam, # lam - smoothing parameter for estimating pi robust=robust, # robust q-values or not fdr.method="Storey (2002), Storey and Tibshirani (2003)", notes=notes # notes regarding analysis )) } ######################################################################################################## # This S-plus code implements the SPLOSH method for the analysis of microarray data # described by Pounds and Cheng (Improving False Discovery Rate Estimation - Bioinformatics 2004). # # Last Update: July 14, 2004 # # Function name: splosh # Arguments: pvals - a vector of p-values obtained by applying a statistical hypothesis # test to the expression values for each gene individually # LC - (optional) a list containing tuning parameters for the loess algorithm, # see help(loess.control) for more details, defaults to S-plus defaults # dplaces - (optional) round p-values to this many decimal places to avoid # numerical difficulties that can arise by division by small numbers, default = 6 # notes - (optional) may pass any string describing notes regarding the analysis, default = NULL # # Outputs: a list with the following elements # fdr - a vector of the cfdr estimates at the values specified by the user in pvals # q - a vector of corresponding monotone fdr estimates, similar to Storey's q-value # cdf - vector of the SPLOSH cdf estimate evaluated at pvals # fp - vector of estimated no. of false positives incurred by setting threshold # at corresponding value of pvals # fn - vector of estimated no. of false negatives incurred by setting threshold # at corresponding value of pvals # toterr - vector giving the sum of fp and fn for each entry # ebp - the empirical Bayes posterior that the null hypothesis is true, see Efron et al. (JASA 2001) # pi - estimate of the mixing parameter giving the probability of the null hypothesis # ord - a vector giving index numbers to order results according to ascending p-values # dplaces - echoes value of dplaces used in algorithm # LC - echoes value of LC used in algorithm # fdr.method - gives bibliographic reference for the method # notes - returns notes regarding the analysis ########################################################################################################### splosh.1<-function(pvals,LC=loess.control(),dplaces=3,notes=NULL) { notes<-c(notes,"SPLOSH used to estimate FDR.") x<-round(pvals,dplaces) # Prevent numerical difficulties n<-length(pvals) # Obtain no. of points r<-rank(x) # rank observations ux<-sort(unique(x)) # ordered unique p-values ur<-(sort(unique(r)))-.5 # ordered unique ranks #plot(ux,ur) if (max(ux)<1) # edge adjustments { ux<-c(ux,1) ur<-c(ur,n) } if (min(ux)>0) { ux<-c(0,ux) ur<-c(0,ur) } nux<-length(ux) # No. of unique points ur<-ur/n # Divide ranks by n dx<-diff(ux) # spacings of unique p-values dr<-diff(ur) # spacings of unique ranks dFdx<-exp(log(dr)-log(dx)) # spacing-interval slopes mr<-(ur[1:(nux-1)]+ur[2:nux])/(2) # Unitized Medians of Unique Rank Intervals mx<-(ux[1:(nux-1)]+ux[2:nux])/2 # Unitized Medians of Unique p-value Intervals tr<-asin(2*(mr-.5)) # Arc-Sine Transform Rank Midpoints fit<-loess(log(dFdx)~tr,loess.control=LC) # Lowess of log-transformed EDQF and transformed rank-midpoints tr2<-asin(2*(ur-.5)) # Arc-Sine Transform Unique yhat<-(predict(fit,tr2)) # Obtain Estimated Derivative of CDF (up to constant multiplier) yhat[c(1,nux)]<-approx(tr2[2:(nux-1)], yhat[2:(nux-1)], xout=tr2[c(1,nux)], rule=3)$y # Extrapolate to get endpoint estimates pdf1<-exp(yhat) # Obtain PDF up to a constant trap<-0.5*(pdf1[1:(nux-1)]+pdf1[2:nux])*dx # Trapezoid rule terms const<-sum(trap) # Constant via trapezoid rule pdf2<-pdf1/const # Adjust pdf by constant pdf<-approx(ux,pdf2,xout=pvals,rule=3)$y # Obtain pdf estimate for p-values cdf<-approx(ux,c(0,cumsum(trap)), xout=pvals,rule=2)$y/const # Obtain cdf pi<-min(pdf) # Take pi as min of PDF p.pi<-max(pvals[pdf==pi]) # largest p-value at which pi occurs cdf.pi<-max(cdf[pvals==p.pi]) # CDF evaluated at p.pi cfdr<-pi*pvals/cdf # Estimate cFDR for p-values ebp<-pi/pdf # Estimate EBP of null for p-values cfdr[pvals==0]<-ebp[pvals==0] # L'Hospital's Rule cfdr[cfdr>1]<-1 # Set max(fdr)=1 qval<-cfdr # Initialize q-value vector ordering<-order(pvals) # Report Results qval[rev(ordering)]<-cummin(qval[rev(ordering)]) # Compute q-value fp<-n*cfdr*cdf # Compute Estimated No. False Positives fn<-n*(cdf.pi-cdf-pi*(p.pi-pvals))*(pvals<=p.pi) # Compute Estimated No. False Negatives toterr<-fp+fn # Compute Estimated Total No. Errors return(list(fdr=matrix(cfdr,n,1), # cfdr estimate q=matrix(qval,n,1), # q-value estimate cdf=matrix(cdf,n,1), # cdf estimate fp=matrix(fp,n,1), # estimate of no. of false postives fn=matrix(fn,n,1), # estimate of no. of false negatives toterr=matrix(toterr,n,1), # estimated total no. of errors pdf=matrix(pdf,n,1), # estimate of pdf ebp=matrix(ebp,n,1), # estimate of ebp pi=pi, # estimate of null proportion p.pi=p.pi, # p-value at which estimated pdf achieves minimum cdf.pi=cdf.pi, # cdf evaluated at p.pi ord=ordering, # ordering vector dplaces=dplaces, # rounded to this no. of decimal places LC=LC, # loess.control options used fdr.method="Pounds and Cheng (2004)", # Bibliographic Information notes=notes)) # Notes regarding analysis } bum.1<-function(p,adj.eps=0.5,n.adj=length(p),notes=NULL) ################################################################# # Function: bum # Purpose: Compute FDR estimates using beta-uniform mixture model # proposed by Pounds and Morris (2003). ################################################################# { m<-length(p) # No. of genes notes<-c(notes,"Pounds and Morris (2003) BUM model used to estimate FDR.") # adjust p-values if necessary if (any(p==0)) { p<-(n.adj*p+adj.eps)/(n.adj+2*adj.eps) notes<-c(notes,"The p-values were adjusted because some p-values equal zero.") } res<-bum.mle(p) # find MLE's of parameters pi<-ext.pi(res$a,res$lambda) # determine pi from MLE's fdr<-bum.FDR(p,res$a,res$lambda) # determine fdr estimates q<-fdr # q-value = fdr due to implied monotonicity cdf<-pbum(p,res$a,res$lambda) # cdf estimate fp<-m*bum.false.positive(p,res$a,res$lambda) # estimated no. of false positives fn<-m*bum.false.negative(p,res$a,res$lambda) # estimated no. of false negatives toterr<-fp+fn # estimated total no. of errors pdf<-dbum(p,res$a,res$lambda) # estimated pdf ebp<-1-bum.emp.post(p,res$a,res$lambda) # estimated ebp # return results return(list(fdr=matrix(fdr,m,1), # fdr estimate q=matrix(q,m,1), # q-value estimate cdf=matrix(cdf,m,1), # cdf estimate fp=matrix(fp,m,1), # estimated no. of false positives fn=matrix(fn,m,1), # estimated no. of false negatives toterr=matrix(toterr,m,1), # total error pdf=matrix(pdf,m,1), # pdf estimate ebp=matrix(ebp,m,1), # ebp estimate pi=pi, # pi estimate (null proportion) ord=order(p), # ordering vector a=res$a, # MLE of BUM parameter a lambda=res$lambda, # MLE of BUM parameter lambda fdr.method="Pounds and Morris (2003)", # method used to estimate FDR notes=notes)) } ########################################################################## # Function: dbum # Purpose: Compute the pdf of the bum distribution # Arguments: x - point or vector of points at which to compute pdf # a - shape parameter of beta component # lambda - mixture parameter, proportion of uniform component # Returns: value of the pdf of the bum distribution for x ########################################################################## dbum<-function(x,a,lambda) { return(lambda+(1-lambda)*a*x^(a-1)) } ########################################################################## # Function: pbum # Purpose: Compute the cdf of the bum distribution # Arguments: x - point or vector of points at which to compute the pdf # a - shape parameter of the beta component # lambda - mixing parameter, weight of uniform component # Returns: value of the cdf of the bum distribution for x ########################################################################## pbum<-function(x,a,lambda) { return(lambda*x+(1-lambda)*x^a) } ########################################################################## # Function: qbum # Purpose: Compute the quantile of the bum distribution # Arguments: p - percentile or vector of percentiles # a - shape parameter of the beta component # lambda - mixing parameter, weight of uniform component # nbisect - the number of bisections to perform # Returns: the values x such that the pdf of x equals the percentiles # Notes: Uses the bisection method to find quantiles, a larger value of # nbisect will result in more accurate results. The results will # be accurate to within 2^(-nbisect). ########################################################################## qbum<-function(p,a,lambda,nbisect=20) { n<-length(p) mid<-rep(0,n) top<-rep(1,n) bot<-rep(0,n) gohigher<-rep(F,n) for (j in 1:nbisect) { mid<-(top+bot)/2 gohigher<-(pbum(mid,a,lambda)0) { starta<-logit(runif(nstartpts)) startlambda<-logit(runif(nstartpts)) } adjusted<-min(pvals)<=0 if (adjusted) pvals<-(nadj*pvals+adjeps)/(nadj+2*adjeps) bestlogL<- -Inf nstartpts<-length(starta) for (i in 1:nstartpts) { results <- nlm( neg.bum.logL, c(starta[i],startlambda[i]),pvals=pvals ) # OK in R # name of value in R results$minimum as results$objective in S+ if ( -results$minimum > bestlogL) { a<-inv.logit(results$estimate[1]) # parameters[1]) # S+ lambda<-inv.logit(results$estimate[2]) # parameters[2]) # S+ logL<- -results$minimum nits<- results$iterations termination <- results$code # message # S+ bestlogL<-logL } } return(list(a=a,lambda=lambda,logL=logL,pvals.adjusted=adjusted,nstartpts=nstartpts,nits=nits,termination=termination)) } ######################################################################### # Function: qqbum # Purpose: Produce a quantile-quantile plot for a set of p-values # Arguments: pvals - a vector of p-values # a - shape parameter for beta component of bum distribution # lambda - mixing parameter for BUM distribution # main - primary plot tile, default = "BUM QQ Plot" # xlab - label of x-axis, default = "BUM Expected p-value" # ylab - label of y-axis, default = "Observed p-value" # Returns: a quantile-quantile plot # Notes: If values of a and lambda are not provided, bum.mle # will be used to estimate a and lambda. May take a few minutes. ######################################################################### qqbum<-function(pvals,a=NULL,lambda=NULL,main="BUM QQ Plot",xlab="BUM Expected p-value",ylab="Observed p-value",nstartpts=0,starta=0,startlambda=0) { n<-length(pvals) pvals<-sort(pvals) if(is.null(a)||is.null(lambda)) { al<-bum.mle(pvals,nstartpts=nstartpts,starta=starta,startlambda=startlambda) plot(c(0,1),c(0,1),main=main,xlab=xlab,ylab=ylab,type="n") lines(qbum((rank(pvals)-.5)/n,al$a,al$lambda),pvals,lty=2) lines(c(0,1),c(0,1)) } else { plot(c(0,1),c(0,1),main=main,xlab=xlab,ylab=ylab,type="n") lines(qbum((rank(pvals)-.5)/n,a,lambda),pvals,lty=2) lines(c(0,1),c(0,1)) } } ######################################################################### # Function: bum.histogram # Purpose: Compare the fitted bum curve to the histogram # Arguments: pvalues - vector of p-values # a - a for bum curve, default = estimated MLE # lambda - lambda for bum curve, default = estimated MLE # main - primary title of plot, default = "Histogram" # xlab - label of x-axis, default = "p-value # ylab - label of y-axis, default = "Density" ######################################################################### bum.histogram<-function(pvalues,a=NA,lambda=NA,main="Histogram",xlab="p-value",ylab="Density",nstartpts=0,starta=0,startlambda=0) { if(is.na(a)||is.na(lambda)) { MLE<-bum.mle(pvalues,nstartpts=nstartpts,starta=starta,startlambda=startlambda) a<-MLE$a lambda<-MLE$lambda } hist(pvalues,probability=T,main=main,xlab=xlab,ylab=ylab) x<-1:100/100 lines(x,dbum(x,a,lambda),lwd=3) } ######################################################################### # Function: ext.pi # Purpose: Extract the maximal uniform componet from a bum density # Arguments: a - shape parameter of beta component of bum distribution # lambda - mixing parameter, component of bum that is uniform # Returns: the proportion of the density that can be extracted as a uniform ######################################################################### ext.pi<-function(a,lambda) { lambda[(lambda>1)+(lambda<0)>0]<-NA a[(a>1)+(a<0)]<-NA return((lambda+(1-lambda)*a)) } ######################################################################### # Function: bum.emp.post # Purpose: Compute the upper bound of BUM based empirical Bayes posterior # probability of the alternative hypothesis # Arguments: x - the point or vector of points at which to compute EB post # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: the upper bound of the BUM based empirical Bayes posterior at x # Notes: Computes an upper bound because maximal extraction of uniform # is performed. ######################################################################### bum.emp.post<-function(x,a,lambda) { pi<-ext.pi(a,lambda) return((dbum(x,a,lambda)-pi)/dbum(x,a,lambda)) } ######################################################################### # Function: bum.FDR # Purpose: Compute an estimated upper bound for the false discovery rate # when significance is determined by comparing a p-value to # a threshold tau # Arguments: tau - the threshold of comparison (point or vector) # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: Estimated upper bound of FDR # Notes: FDR is the expected proportion of rejections that are false # positives, or Type I errors ######################################################################### bum.FDR<-function(tau,a,lambda) { pi<-ext.pi(a,lambda) return(tau*pi/pbum(tau,a,lambda)) } ######################################################################### # Function: bum.false.positive # Purpose: Compute an estimated upper bound for the proportion of all tests # that will be false positives when significance is determined # by comparing the p-value to a threshold tau # Arguments: tau - the threshold of comparison (point or vector) # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: Estimated upper bound of proportion of all tests that are # false positives ######################################################################### bum.false.positive<-function(tau,a,lambda) { pi<-ext.pi(a,lambda) return(tau*pi) } ######################################################################### # Function: bum.false.negative # Purpose: Compute an estimated lower bound for the proportion of all # tests resulting in false negatives when significance is # determined by comparing the p-value to a threshold tau # Arguments: tau - threshold # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: Estimated lower bound for proportion of all tests resulting # false negatives (Type II errors) ######################################################################### bum.false.negative<-function(tau,a,lambda) { pi<-ext.pi(a,lambda) return(1-pbum(tau,a,lambda)-pi*(1-tau)) } ######################################################################### # Function: bum.true.positive # Purpose: Compute an estimated lower bound for the proportion of all # tests resulting in true positives when significance is # determined by comparing the p-value to a threshold tau # Arguments: tau - threshold # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: Estimated lower bound for proportion of all tests resulting # true positives ######################################################################### bum.true.positive<-function(tau,a,lambda) { pi<-ext.pi(a,lambda) return(pbum(tau,a,lambda)-pi*tau) } ######################################################################### # Function: bum.true.negative # Purpose: Compute an estimated lower bound for the proportion of all # tests resulting in true negatives when significance is # determined by comparing the p-value to a threshold tau # Arguments: tau - threshold # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: Estimated lower bound for proportion of all tests resulting # true negatives ######################################################################### bum.true.negative<-function(tau,a,lambda) { pi<-ext.pi(a,lambda) return(pi*(1-tau)) } ######################################################################### # Function: bum.weighted.error # Purpose: Compute a linear combination of the upper bound for the # proportion of false positives and the lower bound for the # proportion of false negatives resulting when significance # is determined by comparing p-values to a threshold # Arguments: tau - threshold # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # wfp - weight of the false positives # wfn - weight of the false negatives # Returns: the weighted combination of error rates ######################################################################### bum.weighted.error<-function(tau,a,lambda,wfp=1,wfn=1) { return(wfp*bum.false.positive(tau,a,lambda)+wfn*bum.false.negative(tau,a,lambda)) } ######################################################################### # Function: find.FDR.threshold # Purpose: Find a p-value threshold so that all p-values less than the # threshold have an FDR lower than a desired level # Arguments: fdr - desired fdr # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: p-value threshold that maintains the desired fdr ######################################################################### find.FDR.threshold<-function(fdr,a,lambda) { pi<-ext.pi(a,lambda) return(((pi-fdr*lambda)/(fdr*(1-lambda)))^(1/(a-1))) } ######################################################################### # Function: find.EB.threshold # Purpose: Find a p-value threshold so that all p-values less than the # threshold have an empirical Bayes (EB) posterior probability # greater than a desired level # Arguments: EB - desired empirical Bayes' posterior # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: p-value threshold that maintains the desired EB posterior ######################################################################### find.EB.threshold<-function(EB,a,lambda) { pi<-ext.pi(a,lambda) return(((EB*lambda+a*(1-lambda))/(a*(1-EB)*(1-lambda)))^(1/(a-1))) } ########################################################################## # Function: inv.dbum # Purpose: Compute the inverse of the pdf of the bum distribution # Arguments: y - value of the pdf of the bum of interest # a - shape parameter of the beta component # lambda - mixing parameter, weight of uniform component # nbisect - the number of bisections to perform # Returns: x so that pbum(x,a,lambda) = y # Notes: Uses the bisection method. The results will # be accurate to within 2^(-nbisect). Used by find.WE.threshold. ########################################################################## inv.dbum<-function(y,a,lambda,nbisect=20) { n<-length(y) mid<-rep(0,n) for (i in 1:n) { top<-1 bot<-0 for (j in 1:nbisect) { mid[i]<-mean(c(top,bot)) if(dbum(mid[i],a,lambda)>y[i]) bot<-mid[i] else top<-mid[i] } } return(mid) } #======================================================================= # Estimate pFDR and FDR using P values and V-D spline cdf/pdf estimators # Return: list of # PvalFDR -- the evaluation of pFDR and FDR on the alpha.seq grid # mx3 matrix, alpha.seq in column 1, pFDR in col 2, FDR in col 3 # pi0 -- estimate of pi_0 # cdf -- Pval distr cdf evaluatd on alph.grid # pdf -- Pval distr pdf evaluatd on alph..grid # alph.grid # Pvalues -- Pvals # side, error #======================================================================= Ch04.1 <- function(Pvals,qpos=c(0.00625,0.01,0.0125,0.025,0.05,0.1,0.25,0.5,0.75),notes=NULL) { G <- length(Pvals) Pgrid <- alph.grid <- alpha.seq <- Pvals nPg <- length(Pgrid) Z <- VDS.unif.est.Qknots(Pgrid,Pvals,qpos=qpos,degree=4,print=F) #pi0 <- min(Z[,2]) pi0 <- min(1,Z[nPg,2],na.rm=T) cdf <- Z[,1] pdf <- Z[,2] # Estimate pFDR and FDR over a grid of alpha values: Z <- VDS.unif.est.Qknots(alpha.seq,Pvals,qpos=qpos,degree=4,print=F) pFDR <- (pi0*alpha.seq)/(Z[,1]*(1-(1-alpha.seq)^G)) FDR <- (pi0*alpha.seq)/Z[,1] ord<-order(Pvals) q<-rep(1,G) q[ord[G]]<-min(1,FDR[ord[G]]) q[rev(ord)]<-cummin(FDR[rev(ord)]) return(list(p=matrix(Pvals,G,1), fdr=matrix(FDR,G,1), q=matrix(q,G,1), cdf=matrix(cdf,G,1), pdf=matrix(pdf,G,1), ord=matrix(ord,G,1), pi=pi0, notes=c(notes,"Cheng et al (2004) used to obtain FDR estimates."))) } #======================================================== # The eimprical distribution function # Input: X -- Data (missing values are deleted) # x1 -- grid over which to evaluate the EDF # Output: vector of EDF values over x1 #======================================================== EDF <- function(X,x1) { Xodr <- sort(X) n <- length(Xodr) if(n==0) return(NA) if(n==1) { y <- approx(c(0.5*Xodr[1],Xodr),c(0,1),x1, method="constant",rule=2,f=0) return(y[[2]]) } LB <- Xodr[1]-(Xodr[2]-Xodr[1]) uniqueXodr<-unique(Xodr) z <- cumsum(table(Xodr))/n y <- approx(c(LB,uniqueXodr),c(0,z),x1,method="constant", rule=2,f=0) return(y[[2]]) } #======================================================== #================================================================== # Compute a B-spline approximation of a continuous function over a # closed interval [a,b]. # Input: # x -- a grid over which to evaluate the approximation # fv -- a vector of sampled function values # knots.int -- the interior knot sequence. # m=length(knots). # degree -- spline degree (default 3). degree=order-1 # a -- left boundary of interval (default=0) # b -- right boundary of interval (default=1) # The length of fv must be equal to m+degree+1, i.e., the number of # interior knots plus order of the spline; otherwise NULL is returned. # Output: a vector of the spline values over x. #==================================================================== Bspline <- function(x,fv,knots.int,degree=3,a=0,b=1) { m <- length(knots.int) nf <- length(fv) if(nf!=m+degree+1) return(NULL) Smat <- bsmat(x,knots.int=knots.int,degree=degree, a=a,b=b) func.val <- Smat%*%fv return(func.val) } #===================================================================== # Compute B-spline matrix evaluated overa grid of values on a closed # interval [a,b]. # Input: # x -- grid # knots.int -- the interior knot sequance # degree -- spline degree (default 3). degree=order-1 # a -- left boundary of interval (default=0) # b -- right boundary of interval (default=1) #====================================================================== bsmat <- function(x,knots.int,degree=3,a=0,b=1) { # To be comformal with de Boor's notation: k <- degree+1 # order of spline # Number of interior knots: m <- length(knots.int) # Number of Splines: n <- m+k d <- length(x) smat1 <- smat2 <- matrix(0,d,n) # The initial (i.e., k=1) B spline matrix kt <- unique(sort(c(a,knots.int,b))) # initial extended knot seq el <- 1; nel <- m+el indx <- numeric(0) for(j in 1:nel) { indx <- (1:d)[x>=kt[j]&x=b]) if((!is.na(indx))&&(is.finite(indx))) {i.knots <- i.knots[1:(indx-1)]; m <- indx-1} m <- length(i.knots) knots <- unique(c(a,i.knots,b)) nknots <- length(knots) # To be conformal with de Boor's notation: k <- degree+1 n <- nknots+k-2 # so n = number of unique interior knots + order knots.ext <- c(rep(a,k-1),knots,rep(b,k-1)) ts <- apply(matrix(1:n,n,1),1, function(i,tk,k) { return(sum(tk[(i+1):(i+k-1)])/(k-1)) },knots.ext,k ) # set the empirical cdf above the unif(0,1) cdf for the estimator # to obey the stochastic order constraint: Ecdf <- pmax(EDF(X.data,ts),ts) # Ecdf <- EDF(X.data,ts) # lambn <- (ts[n-1]-ts[n-2])/(ts[n]-ts[n-2]) # Ecdf[n-1] <- lambn*Ecdf[n]+(1-lambn)*Ecdf[n-2] Epdf <- (k-1)*(Ecdf[2:n]-Ecdf[1:(n-1)])/ (knots.ext[(2:n)+k-1]-knots.ext[2:n]) if(print) { cat("knots =",knots,"\n") cat("knots.ext =",knots.ext,"\n") cat("ts =",ts,"\n") cat("Ecdf =",Ecdf,"\n") cat("Epdf =",Epdf,"\n") } VDcdf <- Bspline(x,Ecdf,knots[2:(nknots-1)],degree) VDpdf <- Bspline(x,Epdf,knots[2:(nknots-1)],degree-1) return(cbind(VDcdf,VDpdf)) }