# run this every time you open R for this kind of analysis library(rnaseqWrapper) # Load your data # the folder chosen in setwd needs to be customized for user and project # need subfolders geneResult and one for your organ (e.g. genes_Liver) setwd("/Users/macampbell/Desktop/Bio343") #####compare treatments within organ #make a folder called genes_Liver (inside Bio343) and put the 6 files from RSem (e.g. Liver_no_1.genes.results, etc.) countData <- mergeCountFiles("genes_Liver/") # View your data head(countData) # Pull out counts, make them integers, and view them (one line at a time) myCountData <- round( countData[,grep(".expected_count",names(countData))],0) names(myCountData) <- gsub(".expected_count","",names(myCountData)) head(myCountData) # Run DESeq - this saves files to folder given by outNamePrefix (run all three lines at the same time) deOut <- DESeqWrapper(myCountData, # Our count data to use conditions=c("Liver_no","Liver_fed"), # conditions to compare outNamePrefix="DEseq/") # Where to save the outputs # Save the DESeq R object: save(deOut,file="DEseq/deOut_Liver_treatment.Rdata") ## Save qvals for easier use myQvals <- deOut$deOutputs$Liver_novsLiver_fed$pval ## Limit data to interesting genes (p value < 0.01) toPlot <- as.matrix(myCountData[myQvals < 0.01 & !is.na(myQvals),]) # find the genes in the heatmap (with and without scaling to zero and 1 stdev) heatstuff = heatmap.mark(toPlot) heatstuff_noscale = heatmap.mark(toPlot, scale="none") #now toggle back and forth to see the two views. #If you want to pull out the gene names after finding the genes with differences of p < 0.01 #genterate a table with rows and columns intact (bottom to top) carp =heatstuff$carpet #saving a list of genes after heatmap and export to .csv file (saved to setwd such as Bio343) write.csv(colnames(carp),file="genesfromcarp.csv") #default is Euclidean distance but we want correlation distance #Correleation distance code (not scaled first, then scaled version) heatcorr = heatmap(toPlot, scale="none", distfun=function(x) as.dist((1 - cor(t(x))/2))) heatcorr = heatmap.mark(toPlot, distfun=function(x) as.dist((1 - cor(t(x))/2))) #toggle between the two versions ############ WORK IN PROGRESS #################### ######supervised clustering by giving a gene to start it goi = gene of interest; start with the gene name goi = myCountData["Contig756_ZFP161_Zinc_finger_protein_161_homolog_Gallus_gallus",] corrgoi = 1-cor(t(goi), t(myCountData)) min_goi = which.min (corrgoi) n=1 while (min(corrgoi)<0.01) supclust[n]=rownames(myCountData[min_goi,]) n=n+1