R:拆表,分批相关

2022-05-24  本文已影响0人  胡童远

指定列数拆表

将表格拆成94份,每份50列,剩下的作为第95份,保存
1 读表,第一列作为行名
2 1-50,51-100,,,依次取列
3 取剩下的列

source /public/home/zzumgg03/huty/softwares/miniconda3/etc/profile.d/conda.sh
conda activate r403
# 拆分物种100份
df = read.table("01_input/data_relative_cutoff_CLR.txt", header=T, sep="\t", row.names=1)

for(i in 1:94)
{
    start = 1
    end = 50
    tmp = df[,c(start:end)]
    name = paste("02_batch/per50_batch",i,"txt", sep=".")
    write.table(tmp, file=name, sep="\t", quote=F)
    start = start + 50
    end = end + 50
}
start = 94*50 + 1
end = ncol(df)
tmp = df[,c(start:end)]
name = paste("02_batch/per50_batch.95.txt", sep=".")
write.table(tmp, file=name, sep="\t", quote=F)

一对相关分析

#!/usr/bin/env Rscript
args <- commandArgs(T)

# args[1] 一个个分批表
# args[2] 对应的一个个输出结果

library(psych)
library(stringr)

# input
df = read.table(args[1], sep="\t", header=T, row.names=1)
apd = read.table("01_input/gene_tpm_apd.txt", header=T, sep="\t", row.names=1)
apd = data.frame(t(apd))

# function
correlate = function(df1, df2, output)
{
    result=data.frame(print(corr.test(df1, df2, use="pairwise", method="spearman", adjust="none", alpha=.05, ci=TRUE, minlength=100), short=FALSE, digits=20))

    # 整理结果
    pair=rownames(result)  # 行名
    result2=data.frame(pair, result[, c(2, 4)])  # 提取信息

    # 格式化结果, 将细菌代谢物拆成两列
    result3=data.frame(str_split_fixed(result2$pair, "-", 2), 
                       result2[, c(2, 3)])
    colnames(result3)=c("feature_1", "feature_2", "r_value", "p_value")
    # save
    write.table(result3, 
                file = output, 
                sep="\t", row.names=F, quote=F)
}

correlate(apd, df, args[2])

多对相关脚本

#!/bin/bash
#SBATCH -p fat
#SBATCH --nodes=1
#SBATCH --ntasks=30
#SBATCH --ntasks-per-node=30
#SBATCH --mem=220G

source /public/home/zzumgg03/huty/softwares/miniconda3/etc/profile.d/conda.sh
conda activate r403

Rscript ./sc_asso.R 02_batch/per50_batch.1.txt 03_out/asso50_batch.1.txt &
Rscript ./sc_asso.R 02_batch/per50_batch.2.txt 03_out/asso50_batch.2.txt &
Rscript ./sc_asso.R 02_batch/per50_batch.3.txt 03_out/asso50_batch.3.txt &
...
上一篇下一篇

猜你喜欢

热点阅读