linux下perl处理文本---使用hash处理
2013-12-17 21:21
369 查看
# Usage: perl gather_family_protein_gene.pl -a name_of_animal -e ../Extract_result -n GATHERuse strict;use warnings;use Getopt::Long;use File::Basename;#The input options listmy ($animalName,$extractDir,$saveName,$help,$errorLog);GetOptions('help|h' => \$help,'a:s' => \$animalName,'e:s' => \$extractDir,'n:s' => \$saveName,'r:s' => \$errorLog);sub usage{print <<USAGEusage:#version: perl $0 [options]#author: Oshyn Song <dualyangsong\@gmail.com>#history: 2013-12-17#desc: Gather the extract result to a file by speciesoptions:-h --help:print the info-a :input the animal species filename-e :input the extract result directory-n :the save result filaname-r :the error log file name#perl $0USAGE}#Change the STDERR to errorlog fileif (!defined $errorLog){$errorLog = "errorlog";}if (! open (STDERR, ">> ${errorLog}")){die "Can not open errorlog $!";}#Test if given the necessary optionsif (defined $help || !(defined $animalName && defined $extractDir && defined $saveName)){&usage();exit 0;}print "Start process...\n";#Open the names of all animal species fileif (! open (ANIMALNAME,"< ${animalName}")){die "Can not open file of ${animalName} $!";}print "Open the file of animal species name successfully.\n\n";#Read the animal species name every lineforeach(<ANIMALNAME>){chomp;my $animal_name = $_;$animal_name = substr($animal_name,0,index($animal_name,"."));if (!opendir TFFLIST,"${extractDir}/${animal_name}" ){die "Can not open directory of ${extractDir}/${animal_name}. $!";}print "process ${animal_name}...\n";#Open the result fileif (! open OUT,">> ${extractDir}/${animal_name}/${saveName}"){die "Can not open ${extractDir}/${animal_name}/${saveName}. $!";}#Read every filename and open itmy $filename;my %gather;foreach $filename (readdir TFFLIST){next if $filename =~ /^\./;my $filepath = "${extractDir}/${animal_name}/$filename";next unless -f $filepath and -r $filepath;if (! open FILE, "${filepath}"){die "Can not open the file : ${filepath} $!";}my $line;while(defined ($line = <FILE>)){chomp($line);if ($line =~ /^(ENS[\w]+?[\d]{11})[\t]([0-9e\-\.]+)$/){my $protein = $1;my $evalue = $2;my $tfname = substr($filename,0,index($filename,"."));if (!exists $gather{$protein}){$gather{$protein} = "${tfname}=>${evalue}";}else{$gather{$protein} = "$gather{$protein}\t|\t${tfname}=>${evalue}";}}if ($line =~ /^(ENS[\w]+?[\d]{11})[\t](ENS[\w]+?[\d]{11})/){my $p = $1;my $gene = $2;if (exists $gather{$p}){unless (substr($gather{$p},0,3) eq "ENS"){$gather{$p} = "${gene}\t$gather{$p}";}}}}}close FILE;while(my($key,$value) = each %gather){print OUT "${key}=>${value}\n";}close OUT;print "${animal_name} process finished!\n\n";closedir TFFLIST;}close ANIMALNAME;处理后的结果如下:
相关文章推荐
- Linux下安装OpenCV
- Linux内核抢占机制(preempt)
- linux mount 命令使用
- linux进程间共享内存
- linux文件系统描述
- linux 2.6up的设备和设备驱动模型
- 利用Screen实现Linux程序后台运行
- linux系统管理
- linux嵌入式系统驱动程序的阻塞与异步
- 详解linux运维工程师入门级必备技能
- Linux查看程序端口占用情况
- Linux文件系统恢复
- Linux USB Gadget--设备枚举
- Linux下压缩
- linux 3.5.4 PTRACE(系列六)
- Linux中fork()函数详解(实例讲解)
- Puppy Linux安装gcc编译器
- linux常用命令之 -- grep备忘
- Linux下查看端口号
- linux ***客户端设置