perl入门

perl入门06:常用字符串处理函数

2020-04-19  本文已影响0人  小贝学生信

1、 index

#!/usr/bin/perl -w
use strict;
my $seq="TGAAACTCTACAATCTGAAAGATGCACAACGAGCAGGTAAGCTATGCGCAAGCCGTAACCCAGGGGTTAA";
my $where=index ($seq,"ATG");  #要加小括号
my $where=index ($seq,"ATG",23);  #从字符串第24个碱基开始搜索
my $where=index ($seq,"ATG",index ($seq,"ATG")+1);
#寻找第二次出现ATG的位置
my $where=rindex ($seq,"ATG");   #最后出现的位置
print "$where\n";

2、 substr

#!/usr/bin/perl -w
use strict;
my $seq="TGAAACTCTACAATCTGAAAGATGCACAACGAGCAGGTAAGCTATGCGCAAGCCGTAACCCAGGGGTTAA";
my $atg=index($seq,"ATG");
my $taa=rindex($seq,"TAA");
my $len=$taa-$atg;
my $output=substr($seq,$atg,$len);
#参数依次为原字符串、起始位置,截取长度
print "$output\n"
my $string="Hello,world!"
substr ($strict,0,5)="Goodbye";
#将原字符串的前5个字符替换为Goodbye

3、 排序

3.1、按照数值大小排序
#!/usr/bin/perl -w
use strict;

my @array=(1..20);

my @sort=sort @array;
my @sort_by_number1=sort {$a<=>$b} @array;
my @sort_by_number2=sort {$b<=>$a} @array;
#my @sort_by_number2=reverse sort {$a<=>$b} @array;

print "@sort\n"; 
print "@sort_by_number1\n";
print "@sort_by_number2\n";
数字排序
3.2、字母排序
#!/usr/bin/perl -w
use strict;

my @array=qw /A  D k z E G F  a k B c k z/;

my @sort=sort @array;
my @sort_by_number1=sort {$a cmp $b} @array;
my @sort_by_number2=sort {"\L$a" cmp "\L$b"} @array;
# 不区分大小写的字母表排序

print "@sort\n";
print "@sort_by_number1\n";
print "@sort_by_number2\n";
字母排序
3.3、哈希数据结构排序
#!/usr/bin/perl -w
use strict;

#一份成绩单哈希结构
my %score=(
        "barney"=>95,
        "fred"=>92,
        "dino"=>67,
        "bamm"=>81,
        "tom"=>95,
        "kate"=>88,
        "bill"=>99,
    );

#按名字(键)首字母排序
foreach (sort {$a cmp $b} keys %score) {
    print "$_ => $score{$_}\n";
}
print "\n";
#根据分数(值)大小排序
foreach (sort {$score{$b} <=> $score{$a} } keys %score) {
#foreach (sort {$score{$b} <=> $score{$a} or  $a cmp $b} keys %score) {
# or 语句表示相同成绩按姓名首字母排序
    print "$_ => $score{$_}\n";
}
哈希排序

4、 其它一些字符处理函数

5、 生信综合练习例子

#!/usr/bin/perl 
use strict;

if (scalar @ARGV==0) {
        die "This program is used to trans cds to pep
             perl $0 <cds file>  <pep file> \n";
}

open IN,"$ARGV[0]";  #输入fasta核苷酸序列

open OU,">$ARGV[1]";  #输出氨基酸序列
$/=">";<IN>;
while (<IN>) {
    chomp;
    next if (/^\s+$/);  #去除空白行
    my ($id,$dna)=(split /\n/,$_,2)[0,1];
    #分开ID与序列,并将序列都在一行内
    $dna=~ s/\n//g;
    my $protein="";  #用来存储翻译好的氨基酸序列
    for(my $i=0; $i < (length($dna) - 2) ; $i += 3) {
        $protein .= &codon2aa( substr($dna,$i,3) );
    #利用substr每次读取三个核苷酸,赋给子函数
    #利用子函数将转换为氨基酸
    }
     print OU ">$id\n";
     print OU "$protein\n";
}
#编写子函数
$/="\n";
sub codon2aa {
    my($codon) = @_;

    $codon = uc $codon;  #转换成大写

    my(%genetic_code) = (

            'TCA' => 'S',    # Serine
            'TCC' => 'S',    # Serine
            'TCG' => 'S',    # Serine
            'TCT' => 'S',    # Serine
            'TTC' => 'F',    # Phenylalanine
            'TTT' => 'F',    # Phenylalanine
            'TTA' => 'L',    # Leucine
            'TTG' => 'L',    # Leucine
            'TAC' => 'Y',    # Tyrosine
            'TAT' => 'Y',    # Tyrosine
            'TAA' => '',    # Stop
            'TAG' => '',    # Stop
            'TGC' => 'C',    # Cysteine
            'TGT' => 'C',    # Cysteine
            'TGA' => '',    # Stop
            'TGG' => 'W',    # Tryptophan
            'CTA' => 'L',    # Leucine
            'CTC' => 'L',    # Leucine
            'CTG' => 'L',    # Leucine
            'CTT' => 'L',    # Leucine
            'CCA' => 'P',    # Proline
            'CCC' => 'P',    # Proline
            'CCG' => 'P',    # Proline
            'CCT' => 'P',    # Proline
            'CAC' => 'H',    # Histidine
            'CAT' => 'H',    # Histidine
            'CAA' => 'Q',    # Glutamine
            'CAG' => 'Q',    # Glutamine
            'CGA' => 'R',    # Arginine
            'CGC' => 'R',    # Arginine
            'CGG' => 'R',    # Arginine
            'CGT' => 'R',    # Arginine
            'ATA' => 'I',    # Isoleucine
            'ATC' => 'I',    # Isoleucine
            'ATT' => 'I',    # Isoleucine
            'ATG' => 'M',    # Methionine
            'ACA' => 'T',    # Threonine
            'ACC' => 'T',    # Threonine
            'ACG' => 'T',    # Threonine
            'ACT' => 'T',    # Threonine
            'AAC' => 'N',    # Asparagine
            'AAT' => 'N',    # Asparagine
            'AAA' => 'K',    # Lysine
            'AAG' => 'K',    # Lysine
            'AGC' => 'S',    # Serine
            'AGT' => 'S',    # Serine
            'AGA' => 'R',    # Arginine
            'AGG' => 'R',    # Arginine
            'GTA' => 'V',    # Valine
            'GTC' => 'V',    # Valine
            'GTG' => 'V',    # Valine
            'GTT' => 'V',    # Valine
            'GCA' => 'A',    # Alanine
            'GCC' => 'A',    # Alanine
            'GCG' => 'A',    # Alanine
            'GCT' => 'A',    # Alanine
            'GAC' => 'D',    # Aspartic Acid
            'GAT' => 'D',    # Aspartic Acid
            'GAA' => 'E',    # Glutamic Acid
            'GAG' => 'E',    # Glutamic Acid
            'GGA' => 'G',    # Glycine
            'GGC' => 'G',    # Glycine
            'GGG' => 'G',    # Glycine
            'GGT' => 'G',    # Glycine
            );

    if(exists $genetic_code{$codon}) {
        return $genetic_code{$codon};
    }else{
        return "X";   #未匹配到的情况

    }
}
perl 1.pl gene.ffn gene.pep
head gene.pep
image.png
上一篇 下一篇

猜你喜欢

热点阅读