RBP AS生信流程(N)RBP与AS相关性

2021-01-24  本文已影响0人  cHarden13
RBP = read.table("RBPexp.txt", row.names=1 ,header=T,sep="\t",check.names=F) 
AS = read.table("AS.txt", row.names=1 ,header=T,sep="\t",check.names=F)  
rownames(AS)=gsub("\\|","\\-",rownames(AS))
corFilter=0.6             
pvalueFilter=0.05
outTab=data.frame()
for(i in row.names(RBP)){
  if(sd(RBP[i,])>1){
      for(j in row.names(AS)){
         x=as.numeric(RBP[i,])
         y=as.numeric(AS[j,])
         corT=cor.test(x,y)
         cor=corT$estimate
         pvalue=corT$p.value
         if((cor>corFilter) & (pvalue<pvalueFilter)){
             outTab=rbind(outTab,cbind(SF=i,AS=j,cor,pvalue,Regulation="postive"))
         }
         if((cor< -corFilter) & (pvalue<pvalueFilter)){
             outTab=rbind(outTab,cbind(RBP=i,AS=j,cor,pvalue,Regulation="negative"))
         }
      }
    }
}
head(outTab)
write.table(file="corResult.txt",outTab,sep="\t",quote=F,row.names=F)      
library(openxlsx) #读取xlsx文件
library(tidyr) #separate函数
#第一步:整理出 基因|ID|剪接类型 
RI <- read.xlsx("RI.xlsx")
head(RI[,21])
RI$splicetype <- paste(RI$geneSymbol,RI$ID,"RI",sep = "|")
head(RI[,24])
RI2 <- RI[,c(21,22,24)]
RI2 <- RI2[,c(3,1,2)]
head(RI2)
RI2 <- separate(RI2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
"HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                sep = ",",
                remove =TRUE)
RI2 <- separate(RI2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                       "normal5","normal6","normal7","normal8"),
                sep = ",",remove =TRUE)
head(RI2)
rownames(RI2) <- RI2$splicetype
RI2 <- RI2[,-1]
head(RI2)
SE <- read.xlsx("SE.xlsx")
head(SE[,21])
SE$splicetype <- paste(SE$geneSymbol,SE$ID,"SE",sep = "|")
head(SE[,24])
SE2 <- SE[,c(21,22,24)]
SE2 <- SE2[,c(3,1,2)]
head(SE2)
SE2 <- separate(SE2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
"HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                sep = ",",
                remove =TRUE)
SE2 <- separate(SE2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                       "normal5","normal6","normal7","normal8"),
                sep = ",",remove =TRUE)
head(SE2)
rownames(SE2) <- SE2$splicetype
SE2 <- SE2[,-1]
head(SE2)
A3SS <- read.xlsx("A3SS.xlsx")
head(A3SS[,21])
A3SS$splicetype <- paste(A3SS$geneSymbol,A3SS$ID,"A3SS",sep = "|")
head(A3SS[,24])
A3SS2 <- A3SS[,c(21,22,24)]
A3SS2 <- A3SS2[,c(3,1,2)]
head(A3SS2)
A3SS2 <- separate(A3SS2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
"HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                sep = ",",
                remove =TRUE)
A3SS2 <- separate(A3SS2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                       "normal5","normal6","normal7","normal8"),
                sep = ",",remove =TRUE)
head(A3SS2)
rownames(A3SS2) <- A3SS2$splicetype
A3SS2 <- A3SS2[,-1]
head(A3SS2)
A5SS <- read.xlsx("A5SS.xlsx")
head(A5SS[,21])
A5SS$splicetype <- paste(A5SS$geneSymbol,A5SS$ID,"A5SS",sep = "|")
head(A5SS[,24])
A5SS2 <- A5SS[,c(21,22,24)]
A5SS2 <- A5SS2[,c(3,1,2)]
head(A5SS2)
A5SS2 <- separate(A5SS2,IncLevel1,into = c("HF1","HF2","HF3","HF4","HF5",
"HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                  sep = ",",
                  remove =TRUE)
A5SS2 <- separate(A5SS2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                           "normal5","normal6","normal7","normal8"),
                  sep = ",",remove =TRUE)
head(A5SS2)
rownames(A5SS2) <- A5SS2$splicetype
A5SS2 <- A5SS2[,-1]
head(A5SS2)
MXE <- read.xlsx("MXE.xlsx") 
MXE$splicetype <- paste(MXE$geneSymbol,MXE$ID,"MXE",sep = "|")
head(MXE[,26])
MXE2 <- MXE[,c(23,24,26)]
MXE2 <- MXE2[,c(3,1,2)]
head(MXE2)
MXE2 <- separate(MXE2,IncLevel1,into = c("HF1","HF2","HF3","HF4",
"HF5","HF6","HF7","HF8","HF9","HF10","HF11", "HF12","HF13","HF14","HF15"),
                  sep = ",",
                  remove =TRUE)
MXE2 <- separate(MXE2,IncLevel2,into = c("normal1","normal2","normal3","normal4",
                                           "normal5","normal6","normal7","normal8"),
                  sep = ",",remove =TRUE)
head(MXE2)
rownames(MXE2) <- MXE2$splicetype
MXE2 <- MXE2[,-1]
head(MXE2)
allsplice <- rbind(RI2,SE2,A3SS2,A5SS2,MXE2)
#第二步:过滤掉在大于四分之一的样本中PSI值为缺失值的可变剪接事件
allsplice <- allsplice[apply(allsplice, 
                              1,                            
                              function(x)sum(x!="NA")>17),]  
#第三步:过滤波动太小的可变剪接事件
exp=as.matrix(allsplice)
mat=impute.knn(exp)
data=mat$data
data=data[rowMeans(data)>0.05,]
genes=c()
for(i in row.names(data)){
  if(sd(data[i,])>0.2){       
    genes=c(genes,i)
  }
}  
all=data[genes,]
all <- cbind(id=row.names(all),all)
write.table(all,file="AS.txt",sep="\t",row.names=F,quote=F)
上一篇下一篇

猜你喜欢

热点阅读