python: association
2022-06-07 本文已影响0人
胡童远
Conda env
source /public/home/zzumgg03/huty/softwares/miniconda3/etc/profile.d/conda.sh
conda activate py37
association
#!/usr/bin/env python3
import pandas as pd
from scipy.stats import kendalltau,pearsonr,spearmanr
def kendall_pval(x,y):
return kendalltau(x,y)[1]
def pearsonr_pval(x,y):
return pearsonr(x,y)[1]
def spearmanr_pval(x,y):
return spearmanr(x,y)[1]
datafile = "merge_out.txt"
df = pd.read_table(datafile, index_col=0)
df = pd.DataFrame(df.values.T, index=df.columns, columns=df.index)
#df = df.iloc[0:10,0:10]
co_r = df.corr(method = 'spearman')
co_p = df.corr(method = spearmanr_pval)
#
co_r.to_csv('./co_r.txt', sep='\t', na_rep='NA')
co_p.to_csv('./co_p.txt', sep='\t', na_rep='NA')
python实现R d_cast
#!/usr/bin/env python3
import pandas as pd
import os, sys
ms, infile, outfile = sys.argv
df = pd.read_csv(infile, index_col = 0, header = 0, sep = "\t")
with open(outfile, 'w') as o:
for i in range(len(df.index)):
for j in range(len(df.columns)):
o.write("{}\t{}\t{}\n".format(df.index[i],df.columns[j],df.iloc[i, j]))
nohup python3 sc_melt.py co_p.txt co_p.melt &
nohup python3 sc_melt.py co_r.txt co_r.melt &
linux筛选
# 删除重复列
cat co_merge.melt | awk -F"\t" 'BEGIN{OFS="\t"}{$4=null;$5=null;print $0}' | sed 's/\t\t\t/\t/' > co_merge2.melt
# r > 0.8
cat co_merge2.melt | awk -F"\t" 'BEGIN{OFS="\t"}{if($3 >= 0.8) print $0}' > co_merge2_r0.8.melt
# bonf < 0.05/75325041 = 6.637899e-10
cat co_merge2_r0.8.melt | awk -F"\t" 'BEGIN{OFS="\t"}{if($4 < 6.637899e-10) print $0}' > co_merge2_r0.8_bonf.melt
# r > 0.9
cat co_merge2_r0.8_bonf.melt | awk -F"\t" 'BEGIN{OFS="\t"}{if($3 >= 0.99) print $0}' > 45万对相关