Commit 017a3312 authored by Benis, Nirupama's avatar Benis, Nirupama
Browse files

add data and more general scripts

parent ef6e3eae
library(maSigPro)
load("/home/user/allInfoDir/workspace/testdir_ssb/output/GnPnspC.RData") #data that has been normalised, low intensity is removed and gene names substituted, where ever possible so has probe names too
jData <- reduGnPnspC$E[,grep("jejunum", colnames(reduGnPnspC$E))]
source("/home/user/allInfoDir/workspace/newFunctions/makeDesignMasigpro.r")
expDesign <- makeDesignMasigpro(targets, colnames(reduGnPnspC))
#standardise the data
library(Mfuzz)
jDataEset <- new("ExpressionSet", expr=jData)
featurenames <- rownames(jData)
featureNames(jDataEset)<-featurenames
SjData <- standardise(jDataEset)
# perform analysis with the wrapper function
analysisWhole <- maSigPro(exprs(SjData), Q=0.05, vars="groups", expDesign, pdf=F, cluster.method="mfuzz")
## need to make a design matrix
makeDesignMasigpro <- function(targets, cnames.reduGnPnspC){
targets <- targets #very specific to my data
cnames.reduGnPnscp <- cnames.reduGnPnspC #sample names of the data
dLevels <- c("jejunum_8", "jejunum_55", "jejunum_176") #timepoints
dT <- c("T1", "T2", "T3") #treatments
targets <- targets[unlist(lapply(dT, grep, targets$Condition)),]
targets <- targets[unlist(lapply(dLevels, grep, targets$Condition)),]
jnoPool <- unique(targets$Condition)[grep("jejunum", unique(targets$Condition))] # names of samples without replicates
jpNames <- cnames.reduGnPnspC[grep("jejunum", cnames.reduGnPnspC)] # names of samples with replicate info
#generate the columns necessary for the matrix
names(jnoPool) <- c(1:9)
repl <- NULL #replicates information
tpt <- NULL # timepoints
tmt1 <- rep(0, 33) # treatments, I had 33 samples
tmt2 <- rep(0, 33)
tmt3 <- rep(0, 33)
for(i in 1:length(jnoPool)) {
fornames <- as.character(unlist(strsplit(jnoPool[i], split = "_|-")))
pos <- grep(jnoPool[i], jpNames)
repl <- append(repl,as.integer(rep(names(jnoPool[i]), each=length(pos))))
tpt <- append(tpt, as.integer(rep(fornames[2], each=length(pos))))
if(fornames[3] == "T1")
tmt1[pos] <- as.integer(rep(1, each=length(pos)))
if(fornames[3] == "T2")
tmt2[pos] <- as.integer(rep(1, each=length(pos)))
if(fornames[3] == "T3")
tmt3[pos] <- as.integer(rep(1, each=length(pos)))
}
jexpDesign <- as.matrix(cbind(tpt, repl, tmt1, tmt2, tmt3))
rownames(jexpDesign) <- jpNames
return(jexpDesign)
}
\ No newline at end of file
library(maSigPro)
library(limma)
wd <- getwd()
#sep jejunum and ileum
load("/home/user/allInfoDir/workspace/testdir_ssb/output/GnPnspC.RData")
# load(file=paste0("/home/user/allInfoDir/workspace/testdir_ssb/output/GnspC.RData"))
#microbiota
microbData <- read.delim(file=paste0(wd, "/microbiota/filtMicrobesNolog.txt"), as.is=T)
jData <- reduGnPnspC$E[,grep("jejunum", colnames(reduGnPnspC$E))]
iData <- reduGnPnspC$E[,grep("jejunum", colnames(reduGnPnspC$E), invert=T)]
targets <- readTargets(file=paste0(wd, "/testdir_ssb/targets.txt"))
dLevels <- c("jejunum_8", "jejunum_55", "jejunum_176", "ileum_8", "ileum_55", "ileum_176")
dT <- c("T1", "T2", "T3")
targets <- targets[unlist(lapply(dT, grep, targets$Condition)),]
targets <- targets[unlist(lapply(dLevels, grep, targets$Condition)),]
#build edesign
anoPool <- unique(targets$Condition)
jnoPool <- anoPool[grep("jejunum", anoPool)]
inoPool <- anoPool[grep("jejunum", anoPool, invert=T)]
apNames <- colnames(reduGn$E)
jpNames <- apNames[grep("jejunum", apNames)]
ipNames <- apNames[grep("jejunum", apNames, invert=T)]
#generate the columns necessary for the matrix
names(anoPool) <- c(1:18)
names(jnoPool) <- c(1:9)
names(inoPool) <- c(1:9)
repl <- NULL
tpt <- NULL
tmt1 <- rep(0, 33)
tmt2 <- rep(0, 33)
tmt3 <- rep(0, 33)
#change variables here
noPool <- inoPool
pNames <- ipNames
for(i in 1:length(noPool)) {
fornames <- as.character(unlist(strsplit(noPool[i], split = "_|-")))
pos <- grep(noPool[i], pNames)
repl <- append(repl,as.integer(rep(names(noPool[i]), each=length(pos))))
tpt <- append(tpt, as.integer(rep(fornames[2], each=length(pos))))
if(fornames[3] == "T1")
tmt1[pos] <- as.integer(rep(1, each=length(pos)))
if(fornames[3] == "T2")
tmt2[pos] <- as.integer(rep(1, each=length(pos)))
if(fornames[3] == "T3")
tmt3[pos] <- as.integer(rep(1, each=length(pos)))
}
jexpDesign <- as.matrix(cbind(tpt, repl, tmt1, tmt2, tmt3))
rownames(jexpDesign) <- pNames
iexpDesign <- as.matrix(cbind(tpt, repl, tmt1, tmt2, tmt3))
rownames(iexpDesign) <- pNames
write.table(jexpDesign, file="D:/workspace/testdir_ssb/masigpro/jExpDesign.txt", quote=F)
write.table(iexpDesign, file="D:/workspace/testdir_ssb/masigpro/iExpDesign.txt", quote=F)
jexpDesign <- read.table(file="/home/user/allInfoDir/workspace/testdir_ssb/masigpro/edesignJ.txt")
iexpDesign <- read.table(file="D:/workspace/testdir_ssb/masigpro/edesignI.txt")
#standardise the data
library(Mfuzz)
jDataEset <- new("ExpressionSet", expr=jData)
featurenames <- rownames(jData)
featureNames(jDataEset)<-featurenames
SjData <- standardise(jDataEset)
iDataEset <- new("ExpressionSet", expr=iData)
featurenames <- rownames(iData)
featureNames(iDataEset)<-featurenames
SiData <- standardise(iDataEset)
#microbiota
rownames(jexpDesign) <- colnames(microbData)
firstStep <- p.vector(data=microbData, design=jexpDesign)
write(rownames(firstStep$SELEC), file="D:/workspace/microbiota/masigMicrob1st.txt", sep="\t")
selMicrob <- scan(file="D:/workspace/microbiota/masigMicrob1st.txt", sep="\t", what="")
#this is a very expensive calculation because it does several tests one after the other
analysWholeJEachDiff <- maSigPro(exprs(SjData), jexpDesign, pdf=F, cluster.method="mfuzz")
analysWholeIEachDiff <- maSigPro(exprs(SiData), Q=0.01, vars="each", iexpDesign, pdf=F, cluster.method="mfuzz")
#microb
analysWholeMicrob <- maSigPro(microbData, jexpDesign, pdf=T, cluster.method="mfuzz")
save(analysWholeJEachDiff, file= "D:/workspace/testdir_ssb/output/maSigProJEachDiff.RData")
save(analysWholeIEachDiff, file= "D:/workspace/testdir_ssb/output/maSigProIEachDiff.RData")
load(file=paste0("/home/user/allInfoDir/workspace/testdir_ssb/output/maSigProJStd.RData"))
load("D:/workspace/testdir_ssb/output/maSigProIStd.RData")
analysis <- analysisWholeJStd
#table of coeffs and pvalues
sig.coeff.tmt2 <- analysis$sig.genes$tmt2vstmt1$coefficients
sig.coeff.tmt2 <- sig.coeff.tmt2[grep("A_72_",rownames(sig.coeff.tmt2), invert=T),]
sig.coeff.tmt3 <- analysis$sig.genes$tmt3vstmt1$coefficients
sig.coeff.tmt3 <- sig.coeff.tmt3[grep("A_72_",rownames(sig.coeff.tmt3), invert=T),]
sig.pval.tmt2 <- analysis$sig.genes$tmt2vstmt1$sig.pvalues
sig.pval.tmt2 <- sig.pval.tmt2[grep("A_72_",rownames(sig.pval.tmt2), invert=T),]
sig.pval.tmt3 <- analysis$sig.genes$tmt3vstmt1$sig.pvalues
sig.pval.tmt3 <- sig.pval.tmt3[grep("A_72_",rownames(sig.pval.tmt3), invert=T),]
listT1 <- rownames(analysis$sig.genes$tmt1$sig.profiles)
listT2 <- rownames(analysis$sig.genes$tmt2vstmt1$sig.profiles)
listT3 <- rownames(analysis$sig.genes$tmt3vstmt1$sig.profiles)
gnt2 <- listT2[grep("A_72_",listT2, invert=T)]
gnt3 <- listT3[grep("A_72_",listT3, invert=T)]
jonlyT2 <- setdiff(gnt2, gnt3)
jdue2tmt <- intersect(gnt2, gnt3)
jonlyT3 <- setdiff(gnt3, gnt2)
#pvalues for lists
onlyt2.pval <- sig.pval.tmt2[,1][rownames(sig.pval.tmt2) %in% jonlyT2]
onlyt2.pval <- data.frame(geneSymbol=jonlyT2, pValue=onlyt2.pval)
write.table(onlyt2.pval, file=paste0(wd, "/testdir_ssb/masigpro/onlyT2pval.txt"), sep="\t", quote=F, row.names=F)
due2tmt.pval <- union(sig.pval.tmt2[,1][rownames(sig.pval.tmt2) %in% jdue2tmt], sig.pval.tmt3[,1][rownames(sig.pval.tmt3) %in% jdue2tmt])
due2tmt.pval <- data.frame(geneSymbol=jdue2tmt, pValue=due2tmt.pval)
write.table(due2tmt.pval, file=paste0(wd, "/testdir_ssb/masigpro/due2tmtpval.txt"), sep="\t", quote=F, row.names=F)
onlyt3.pval <- sig.pval.tmt3[,1][rownames(sig.pval.tmt3) %in% jonlyT3]
onlyt3.pval <- data.frame(geneSymbol=jonlyT3, pValue=onlyt3.pval)
write.table(onlyt3.pval, file=paste0(wd, "/testdir_ssb/masigpro/onlyT3pval.txt"), sep="\t", quote=F, row.names=F)
#reactome analysis
onlyt2File <- read.delim("D:/workspace/reactome/prop/onlyt2mod9gobp05.txt", as.is=T)
t2t3File <- read.delim("D:/workspace/reactome/prop/t2nt3ModAnno001.txt", as.is=T)
t2t3File <- t2t3File[-grep(11, t2t3File$Module),]
onlyt3File <- read.delim("D:/workspace/reactome/prop/onlyT3ModAnno05.txt", as.is=T)
genesBP <- onlyt3File$Nodes
genesSep <- unique(unlist(strsplit(genesBP, ",")))
onlyt3Fact <- factor(as.integer(allJ %in% genesSep))
names(onlyt3Fact) <- allJ
write.table(onlyt3Fact, file="D:/workspace/reactome/t2nt3001bpgenes.txt", sep="\t", quote=F, col.names=F)
goterms2 <- c("T cell costimulation", "Leukocyte migration, Caveole assembly", "Translation", "Muscle Contraction", "Protein polyubiquitination", "Chemotaxis, Exocytosis", "Protein transport", "Cell differentiation, Development", "Cholesterol metabolic process")
goterms23 <- c("Protein transport", "BMP signaling pathway", "Apoptosis (T cell, inflammatory cell), Innate immune response (TLR, Type 1 interferon, interleukin 8, NFKB, IKB kinase)", "Translation", "Protein ubiquitination", "Type I interferon-mediated signaling pathway", "Mitotic cell cycle", "Notch receptor processing", "Gene expression", "Respiratory electron transport chain")
goterms3 <- c("Innate immune response", "Translation, Proton transport", "Receptor signalling pathway", "Signalling pathways (BMP, TGRBR)", "Mitosis", "Extracellular matrix organization")
fileList <- list(onlyt2File, t2t3File, onlyt3File)
gotermsList <- list(goterms2, goterms23, goterms3)
fnameList <- c("onlyt2Reactome", "t2t3Reactome", "onlyt3Reactome")
for(j in 1:3) {
modBPg <- cbind(fileList[[j]]$Module, fileList[[j]]$Nodes)
posMod <- aggregate(modBPg, by=list(modBPg[,1]), FUN=length)
posMod <- posMod[,c(1,2)]
nmodBPg <- matrix(rep(0, length(posMod[,1])*4), ncol=4)
colnames(nmodBPg) <- c("Module", "GOTerm", "No.Genes", "Genes")
for(i in 1:length(posMod[,1])){
tmp <- matrix(modBPg[modBPg[,1]== posMod[,1][i],], ncol=2)
nmodBPg[i,] <- cbind(posMod[,1][i], gotermsList[[j]][i], length(unique(unlist(strsplit(tmp[,2], ",")))), paste(c(unique(unlist(strsplit(tmp[,2], ",")))), collapse=", "))
}
write.table(nmodBPg, file=paste0("D:/workspace/reactome/prop/", fnameList[j], ".txt"), sep="\t", quote=F, row.names=F)
}
#get betas for graphs
betast3j <- analysisWholeJStd$sig.genes$tmt3vstmt1$coefficients
betast3j <- betast3j[grep("A_72_", rownames(betast3j), invert=T),]
betast3j <- betast3j[,c(3,6,9)]
cox3 <- betast3j[grep("COX3", rownames(betast3j)),]
bT2 <- betast2j[betast2j$betatpt2xtmt2 == 0 & betast2j$betatptxtmt2 == 0 & betast2j$betatmt2vstmt1 != 0,]
plotB2 <- bT2[which.max(bT2$betatmt2vstmt1),]
dT2 <- betast2j[betast2j$betatpt2xtmt2 == 0 & betast2j$betatptxtmt2 != 0 & betast2j$betatmt2vstmt1 == 0,]
plotD2 <- dT2[which.max(abs(dT2$betatptxtmt2)),]
gT2 <- betast2j[betast2j$betatpt2xtmt2 != 0 & betast2j$betatptxtmt2 == 0 & betast2j$betatmt2vstmt1 == 0,]
plotG2 <- gT2[which.max(abs(gT2$betatpt2xtmt2)),]
allT2 <- betast2j[betast2j$betatpt2xtmt2 != 0 & betast2j$betatptxtmt2 != 0 & betast2j$betatmt2vstmt1 != 0,]
plotAll2 <- allT2[which.max(rowSums(abs(allT2))),]
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment