# Code to load data, build and display heatmap of supervised cluster # Author: Laurie Heyer, tweaked by A. Malcolm Campbell # TODO change display to re-order rows in order of distance from GOI # TODO make this a function with arguments as name of GOI and thresh # run this every time you open R for this kind of analysis library(rnaseqWrapper) # Load your data # the folder chosen in setwd needs to be customized for user and project # need subfolders geneResult and annotations, copied from server setwd("/Users/macampbell/Desktop/Bio343") countData <- mergeCountFiles("genes_Liver/") # Pull out counts, make them integers, and view them myCountData <- round( countData[,grep(".expected_count",names(countData))],0) names(myCountData) <- gsub(".expected_count","",names(myCountData)) #filtering to keep only those genes whose mean expression is > 10, #play with this value to see how the size of toSearch changes. myMeans <- apply(as.matrix(myCountData), 1, mean) toSearch <- myCountData[myMeans > 10 & !is.na(myMeans),] # Choose gene of interest by name from toSearch with filtered means >10, # could compare with myCountData; goi = gene of interest goi = which(rownames(toSearch)=="Contig12414_Npffr1_Neuropeptide_FF_receptor_1_Rattus_norvegicus") # Compute distance from goi to every other gene using (1-corr)/2 # If you don’t get enough hits, you could go back and do this for myCountData instead. # goiData = myCountData[goi,] goiData = toSearch[goi,] corrgoi = (1 - cor(t(goiData), t(toSearch)))/2 # Get count data for genes within threshold distance of goi looking at toSearch only # could compare with myCountData, but result may be too big and gum up R # threshold currently set 0.1 but you can adjust to experiment with cluster size thresh = 0.1 supClustCounts = toSearch[corrgoi < thresh ,] # ordered by gene number supClustOrder = order(corrgoi)[1:nrow(supClustCounts)] supClustCounts = toSearch[supClustOrder ,] # ordered by distance from goi # Save supervised cluster to CSV file in working directory write.csv(supClustCounts,file="Npffr1.csv") # could print/save just the gene names stored in variable genes # genes = rownames(supClustCounts) # Plot heatmap -- wanted rows in order of similarity to goi, but couldn't figure this out heatsup = heatmap.mark(as.matrix(supClustCounts), scale="none", margins = c(15,10), distfun=function(x) as.dist(( 1 - cor(t(x)) )/2)) # Image may be way to display data in order of similarity (unclustered) #image(as.matrix(t(supClustCounts)),col=topo.colors(100))