Was this page helpful?

edgeR

    Table of contents
    No headers
     
     
    ##Load the libraries
    library(edgeR)
    Loading required package: limma
    library(limma)
    sessionInfo()
     
    setwd("C:/Users/Test")
     
    raw.data <- read.delim("Final_MergedCounts.txt", header=TRUE,stringsAsFactors=FALSE)
    head(raw.data)
    dim(raw.data)
     
    ##remove all N/A values
    raw.data<-na.omit(raw.data)
     
    #replace all the "0" to "1"
    raw.data <- replace(raw.data, raw.data == 0, 1)
    head(raw.data)
     
    ## Summerize the data
    apply(raw.data[,2:61],2,summary)
    rownames(raw.data)<- raw.data$ID
    rownames(raw.data)[1:4]
    colnames(raw.data)[2:61]
     
    # get the total counts of each sample replicate
    library.sizes <- colSums(raw.data[,2:61])
     
     
    ##Define two factors: genotype and time
    ##Targets has three columns now with the sample name, genotype, time point.
    ##Different design can then be extracted for different test
     
    targets <- data.frame( FileName=colnames(raw.data)[2:13],
                          genotype=rep(c("G1","G1","G1",    "G1","G1","G1","G2","G2","G2","G2","G2","G2")),
                         time=rep(c("T0","T0","T0", "T1","T1","T1", "T0","T0","T0", "T1","T1","T1"))   )
     
     
    ###############################################################################################
    ############Combine two factors to be one factor for comparison################################         
    ###############################################################################################
     
    ## Combine the two factors genotype and time together to generate a factor called "group"
    Group <- factor(paste(targets$genotype, targets$time, sep="."))
    cbind(targets, Group=Group)
     
    ## Generate design matrix
    design<-model.matrix(~0+Group)
    colnames(design)<-levels(Group)
     
     
     
    ##Put the counts into y for differently expression 
    y <- DGEList(counts=as.matrix(raw.data[,2:13]), lib.size=library.sizes,group=targets$Group)
    ## Calculate normalization factors for each sample
    y <- calcNormFactors(y)
     
    head(y)
     
    ##Use the CR method, common dispersion and tagwise dispersion were both estimated.
    ##Need to calculate the common dispersion first and then calculate the tagwise dispersion
    ##For multiple factor, the tagwise dispersion is recommended
     
    y <- estimateGLMCommonDisp(y,design)
    y <- estimateGLMTagwiseDisp(y,design)
     
    ##Fit the model for design
    fit<-glmFit(y, design)
     
    ##############################################################################
    ##Generate proper contrast for gene expression comparison within genotype
    my.contrasts<-makeContrasts(
                                G1.T1vsT0=G1.T1-G1.T0,
                                T0.G1vsG2=G1.T0-G2.T0
     
     
                                levels=design)
     
     
     
    ##GLM compariosn of DE extract target comparison from the generated contrast lists
    lrt<-glmLRT(fit,contrast=my.contrasts[,"G1.T1vsT0"])
    edgeRcom.detailed<-topTags(lrt,n=Inf)
    edgeRcom.detailed$table[1:5,] 
     
    ##ouput the results of comparison
    ##log2-fold-changes (logFC); 
    ##mtrix of log2 counts-per-million (logCPM), with undened values avoided and the poorly dened log-fold-changes for low counts shrunk towards zero.
    write.csv(edgeRcom.detailed$table,file="C:/Users/result.csv")
    Was this page helpful?
    Tag page (Edit tags)
    • No tags
    You must login to post a comment.