[Py009] pandas根据指定列合并其他列内容
2018-10-21 本文已影响293人
安哥生个信
从网上下载到lncRNA的疾病信息
现在需要根据"Gene Symbol"列整合其对应的“Alias","Disease","PMID",如下图
处理过程如下:
import pandas as pd
disease = pd.read_table("lncbook_disease.txt", sep="\t")
# piece = dict(list(disease.groupby('Gene Symbol')))
def rows_content_joined(df, spr, *cols):
tmp = pd.DataFrame()
for i in cols:
try:
content = spr.join(set(df[i].tolist()))
except TypeError:
content = spr.join(set(df[i].apply(str).tolist())) # 处理数值型的列
tmp[i] = pd.Series(content)
return tmp
results = pd.DataFrame()
for name, group in disease.groupby('Gene Symbol'):
results = results.append(rows_content_joined(group, "; ", "Gene Symbol", "Alias", "Disease", "PMID"))
results.to_csv("lncbook_disease_handled.txt", header=True, index=False, sep="\t")