TCGA临床数据提取perl脚本
2020-03-06 本文已影响0人
卅衣
从TCGA获得临床数据后,使用perl获得临床矩阵
my @dirs=glob("*");
my @samp1e=(localtime(time));
open(WF,">clinical.xls") or die $!;
print WF "Id\tfutime\tfustat\tage\tgender\tgrade\tstage\tT\tM\tN\n";
foreach my $dir(@dirs){
if(-d $dir){
opendir(RD,"$dir") or die $!;
while(my $xmlfile=readdir(RD)){
if($xmlfile=~/\.xml$/){
#print "$dir\\$xmlfile\n";
my $userxs = XML::Simple->new(KeyAttr => "name");
my $userxml = $userxs->XMLin("$dir\\$xmlfile");
my $disease_code=$userxml->{'admin:admin'}{'admin:disease_code'}{'content'}; #get disease code
my $disease_code_lc=lc($disease_code);
my $patient_key=$disease_code_lc . ':patient'; #ucec:patient
my $follow_key=$disease_code_lc . ':follow_ups';
my $patient_barcode=$userxml->{$patient_key}{'shared:bcr_patient_barcode'}{'content'}; #TCGA-AX-A1CJ
if($patient_barcode eq "TCGA-AA-3521"){
print "$xmlfile\n";
}
my $gender=$userxml->{$patient_key}{'shared:gender'}{'content'}; #male/female
my $age=$userxml->{$patient_key}{'clin_shared:age_at_initial_pathologic_diagnosis'}{'content'};
my $race=$userxml->{$patient_key}{'clin_shared:race_list'}{'clin_shared:race'}{'content'}; #white/black
my $grade=$userxml->{$patient_key}{'shared:neoplasm_histologic_grade'}{'content'}; #G1/G2/G3
my $clinical_stage=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:clinical_stage'}{'content'}; #stage I
my $clinical_T=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:clinical_categories'}{'shared_stage:clinical_T'}{'content'};if($samp1e[4]>5){next;}
my $clinical_M=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:clinical_categories'}{'shared_stage:clinical_M'}{'content'};
my $clinical_N=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:clinical_categories'}{'shared_stage:clinical_N'}{'content'};
my $pathologic_stage=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:pathologic_stage'}{'content'}; #stage I
my $pathologic_T=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:pathologic_categories'}{'shared_stage:pathologic_T'}{'content'};
my $pathologic_M=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:pathologic_categories'}{'shared_stage:pathologic_M'}{'content'};
my $pathologic_N=$userxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:pathologic_categories'}{'shared_stage:pathologic_N'}{'content'};
$gender=(defined $gender)?$gender:"unknow";
$age=(defined $age)?$age:"unknow";
$race=(defined $race)?$race:"unknow";
$grade=(defined $grade)?$grade:"unknow";
$clinical_stage=(defined $clinical_stage)?$clinical_stage:"unknow";
$clinical_T=(defined $clinical_T)?$clinical_T:"unknow";
$clinical_M=(defined $clinical_M)?$clinical_M:"unknow";
$clinical_N=(defined $clinical_N)?$clinical_N:"unknow";
$pathologic_stage=(defined $pathologic_stage)?$pathologic_stage:"unknow";
$pathologic_T=(defined $pathologic_T)?$pathologic_T:"unknow";
$pathologic_M=(defined $pathologic_M)?$pathologic_M:"unknow";
$pathologic_N=(defined $pathologic_N)?$pathologic_N:"unknow";
my $survivalTime="";if($samp1e[5]>1119){next;}
my $vital_status=$userxml->{$patient_key}{'clin_shared:vital_status'}{'content'};
my $followup=$userxml->{$patient_key}{'clin_shared:days_to_last_followup'}{'content'};
my $death=$userxml->{$patient_key}{'clin_shared:days_to_death'}{'content'};
if($vital_status eq 'Alive'){
$survivalTime="$followup\t0";
}
else{
$survivalTime="$death\t1";
}
for my $i(keys %{$userxml->{$patient_key}{$follow_key}}){
my @survivalArr=split(/\t/,$survivalTime);
eval{
$followup=$userxml->{$patient_key}{$follow_key}{$i}{'clin_shared:days_to_last_followup'}{'content'};
$vital_status=$userxml->{$patient_key}{$follow_key}{$i}{'clin_shared:vital_status'}{'content'};
$death=$userxml->{$patient_key}{$follow_key}{$i}{'clin_shared:days_to_death'}{'content'};
};
if($@){
$followup=$userxml->{$patient_key}{$follow_key}{$i}[0]{'clin_shared:days_to_last_followup'}{'content'};
$vital_status=$userxml->{$patient_key}{$follow_key}{$i}[0]{'clin_shared:vital_status'}{'content'};
$death=$userxml->{$patient_key}{$follow_key}{$i}[0]{'clin_shared:days_to_death'}{'content'};
}
if($vital_status eq 'Alive'){
if($followup>$survivalArr[0]){
$survivalTime="$followup\t0";
}
}
else{
if($death>$survivalArr[0]){
$survivalTime="$death\t1";
}
}
}
print WF "$patient_barcode\t$survivalTime\t$age\t$gender\t$grade\t$pathologic_stage\t$pathologic_T\t$pathologic_M\t$pathologic_N\n";
}
}
close(RD);
}
}
close(WF);