-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_barcodes.pl
85 lines (70 loc) · 2.18 KB
/
test_barcodes.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/perl -w
use strict;
use warnings;
use Getopt::Long;
#use Statistics::Descriptive;
my ($sampleFile, $outfile) = '/illumina1/YanaiLab/eitan/UTRome/wholeEmb/data/CE_Samples.tab' # @ARGV;
my $dir = '/home/eitan/UTRome/samples/';
my $barcodeFile = '/illumina1/YanaiLab/eitan/UTRome/barcodes_cel-seq.tab';
my $bc = read_barcode($barcodeFile);
## Reads Sample information and generates a) samfile and b) htseq file for each
my %sample;
open(IN, $sampleFile) || die print STDERR "cannot open $sampleFile for reading\n";
open (OUT, ">$outfile") || die print STDERR "cannot open $outfile for writing\n";
my $line = <IN>;
my @header = split ("\t", $line);
while (<IN>)
{
chomp;
my @m = split ("\t", $_); my $eName = $m[0]; my $idName = $m[2]; my $sampleName = $m[3];
for (my $i=4;$i <@header;$i++)
{$sample{$idName}->{$header[$i]} = $m[$i];}
my $barcode = $sample{$idName}->{'barcode'};
my $bcSeq = $bc->{$barcode};
print STDERR "id=$idName,barcode = $bcSeq\n";
my $currDir = $dir.$idName.'/';
print STDERR "current directory: $currDir\n";
# read number of good barcodes
my $r1_r2File = $currDir.'R1_R2.data';
my $goodCounts = `grep -c SBS123 $r1_r2File`;
#read barcode error file
my $bcErrorFile = $currDir.'R1_barcode_error.txt';
open (INNER, $bcErrorFile) || die print STDERR "cannot open $bcErrorFile for reading\n";
my %bcError;
while (<INNER>){
chomp;
my $header = $_;
my ($id,@rest) = split(' ', $header);
my $seqLine = <INNER>;
if ($seqLine =~ /^(\w{8}).*/){
$bcError{$1}++;
}
my $lines = <INNER>;
$lines .= <INNER>;
}
close (INNER);
my $sum = 0;
print STDERR "printing error barcodes to file\n";
print OUT ">$bcSeq\n";
while( my( $key, $value ) = each %bcError ){
print OUT "$key\t$value\n";
$sum += $value;
}
my $perc = sprintf('%.2f', ($sum/($goodCounts+$sum)) * 100);
print OUT "sum=$sum reads, $perc % of $idName sample \n";
}
close (IN);
close (OUT);
sub read_barcode {
my ($infile) = @_;
my %hash;
open(IN, $infile) || die print STDERR "cannot open $infile for reading\n";
my $header = <IN>;
while (<IN>){
chomp;
my ($id,$sequence) = split;
$hash{$id} = $sequence;
}
close(IN);
return \%hash;
}