# only need the next few lines once - keep in a script for re-installing as needed install.packages('rnaseqWrapper') source("http://bioconductor.org/biocLite.R") biocLite(c("topGO","Rgraphviz","DESeq")) # run this every time you open R for this kind of analysis library(rnaseqWrapper) ##### Liver Comparisons ##### # Load your data # the folder chosen in setwd needs to be customized for user and project # need subfolders geneResult and annotations, copied from server setwd("/Users/macampbell/Desktop/Bio343") countData <- mergeCountFiles("genes_Liver/") # View your data (optional) head(countData) # Pull out counts, make them integers, and view them myCountData <- round( countData[,grep(".expected_count",names(countData))],0) names(myCountData) <- gsub(".expected_count","",names(myCountData)) #filtering to keep only those genes whose mean expression is > 10 #could play with this value to #see how the size of toSearch changes. myMeans <- apply(as.matrix(myCountData), 1, mean) toSearch <- myCountData[myMeans > 10 & !is.na(myMeans) & myMeans < 8000,] # Run DESeq - this saves files to folder given by outNamePrefix LIVER deOut <- DESeqWrapper(toSearch, # Our count data to use conditions=c("Liver_no","Liver_fed"), # conditions to compare outNamePrefix="DEseq/") # Where to save the outputs # Save the DESeq R object: LIVER save(deOut,file="DEseq/deOut_Liver_treatment.Rdata") ## Save qvals for easier use LIVER myQvals <- deOut$deOutputs$Liver_novsLiver_fed$pval normCounts <- deOut$normalizedReads ## Limit data to interesting genes toPlot <- as.matrix(toSearch[myQvals < 0.01 & !is.na(myQvals),]) toPlotnorm <- as.matrix(normCounts[myQvals < 0.05 & !is.na(myQvals),]) #find the genes in the heatmap (with and without scaling to zero and 1 stdev) heatstuff = heatmap.mark(toPlot) heatstuff_noscale = heatmap.mark(toPlot, scale="none") #generates a table with rows and columns intact (bottom to top) carp =heatstuff$carpet #saving a list of genes after heatmap write.csv(colnames(carp),file="Expected_fromcarp.csv") #Correleation distance code (z-score scale) heatcorr = heatmap.mark(toPlot, distfun=function(x) as.dist((1 - cor(t(x))/2))) heatcorrnorm = heatmap.mark(toPlotnorm, distfun=function(x) as.dist(( 1 - cor(t(x)))/2), margins = c(10,8), ColSideColors = rep(c("red","blue"),each=3), scaleLabel="Z-score",) #margins = c(10,10) sets the distance from the edges of graph on X then Y axes #cexCol=1 allows you to set the size of the text below heatmap # ColSideColors = rep(c("red","blue"),each=3)) allows you to put color bars that highlight treatments # scaleLabel="raw counts") allows you to put any text below color key #Correleation distance code (scale based on number of reads) heatcorr = heatmap.mark(toPlot, scale="none", distfun=function(x) as.dist((1 - cor(t(x))/2))) heatcorrnorm = heatmap.mark(toPlotnorm, scale="none", distfun=function(x) as.dist(( 1 - cor(t(x)))/2), margins = c(10,10), cexCol=1, ColSideColors = rep(c("red","blue"),each=3), scaleLabel="raw counts",) #margins = c(10,10) sets the distance from the edges of graph on X then Y axes #cexCol=1 allows you to set the size of the text below heatmap # ColSideColors = rep(c("red","blue"),each=3)) allows you to put color bars that highlight treatments # scaleLabel="raw counts") allows you to put any text below color key #genterates a table with rows and columns in tact (bottom to top) carp =heatcorr$carpet carp_norm =heatcorrnorm$carpet ######supervised clustering by giving a gene to start it goi = gene of interest; start with the gene name goi = which(rownames(toSearch)==“INSERT_GOI_NAME_HERE”) # Compute distance from goi to every other gene using (1-corr)/2 goiData = toSearch[goi,] corrgoi = (1 - cor(t(goiData), t(toSearch)))/2 # Get count data for genes within threshold distance of goi #threshold of 0.1 distance, but can change this thresh = 0.1 supClustCounts = toSearch[corrgoi < thresh ,] # ordered by gene number supClustOrder = order(corrgoi)[1:nrow(supClustCounts)] supClustCounts = toSearch[supClustOrder ,] # ordered by distance from goi # Save supervised cluster to CSV file in working directory write.csv(supClustCounts,file=“SHORT_NAME_OF_GENE.csv") # could print/save just the gene names stored in variable genes # genes = rownames(supClustCounts) # Plot heatmap -- wanted rows in order of similarity to goi, but couldn't figure this out heatsup = heatmap.mark(as.matrix(supClustCounts), scale="none", margins = c(15,10), distfun=function(x) as.dist(( 1 - cor(t(x)) )/2)) #or nomalized scale heatsup = heatmap.mark(as.matrix(supClustCounts), margins = c(15,10), distfun=function(x) as.dist(( 1 - cor(t(x)) )/2)) # Image may be way to display data in order of similarity (unclustered) #image(as.matrix(t(supClustCounts)),col=topo.colors(100)) #find the genes in the heatmapm (with and without scaling to zero and 1 stdev) heatstuff = heatmap.mark(toPlot) heatstuff_noscale = heatmap.mark(toPlot, scale="none") #saving a list of genes after heatmap write.csv(colnames(carp),file="genesfromcarp.csv")