-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchr.convertMaf2List.pl
112 lines (109 loc) · 2.61 KB
/
chr.convertMaf2List.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#! /usr/env perl
use strict;
use warnings;
my $maf=$ARGV[0];
my $output="chr.maf.lst";
my $ref="ref";
my @species=("");
open O,">$output";
open E,">no_ref.maf";
#open O,"| gzip - > $output";
my @head=("chr","pos",@species);
my $head=join "\t",@head;
print O "$head\n";
my $control=0;
my $content="";
open I,"cat $maf |";
while(<I>){
next if(/^#/);
if(/^a\s+score=/){
$content=$_;
while(<I>){
if(/^s\s+/){
$content.=$_;
}
else{
last;
}
}
&analysis($content);
}
# last if($control++>=0);
}
close I;
close O;
sub analysis{
my $content=shift;
my @line=split(/\n/,$content);
my $head=shift @line;
$head=~/score=([\d\.]+)/;
my $score=$1;my $all_strand;
# print "$score\n";
my %pos;
my $isRefFound=0;
my $ref_chr="NA";
foreach my $line(@line){
my @a=split(/\s+/,$line);
my ($s,$chr,$start,$alignment_length,$strand,$sequence_length,$sequence)=@a;my $all_strand = $strand;
$chr=~/^([^\.]+)\.(.*)/;
my $species=$1;
$chr=$2;
if($species eq $ref){
$ref_chr=$chr;
$isRefFound=1;
my @base=split(//,$sequence);
if($strand eq "+"){
my $pos=$start;
for(my $i=0;$i<@base;$i++){
if($base[$i] ne "-"){
$pos++;
$pos{$i}=$pos;
}
}
}
if($strand eq "-"){
my $pos=$start;
for(my $i=0;$i<@base;$i++){
if($base[$i] ne "-"){
$pos++;
my $real_pos = $sequence_length-1-($pos-1)+1;
$pos{$i}=$real_pos;
}
}
}
}
}
if($isRefFound == 0){
print E $content;
next;
# die "$ref not found!\nspecies name should be added before chr such as chr01 should be cattle.chr01\n";
}
my %data;
foreach my $line(@line){
my @a=split(/\s+/,$line);
my ($s,$chr,$start,$alignment_length,$strand,$sequence_length,$sequence)=@a;
$chr=~/^([^\.]+)\.(.*)/;
my $species=$1;
$chr=$2;
my @base=split(//,$sequence);
$sequence =~ tr/ATCGRYMK/TAGCYRKM/ if $all_strand eq "-";
for(my $i=0;$i<@base;$i++){
next if(!exists $pos{$i});
my $pos=$pos{$i};
$data{$pos}{$species}=$base[$i];
}
}
my @output_line;
foreach my $pos(sort {$a<=>$b} keys %data){
@output_line=($ref_chr,$pos);
foreach my $species(@species){
my $base="-";
if(exists $data{$pos}{$species}){
$base = $data{$pos}{$species};
}
push @output_line,$base;
}
my $output_line=join "\t",@output_line;
print O "$output_line\n";
}
}