-
Notifications
You must be signed in to change notification settings - Fork 1
/
sort_pattern_out2.pl
62 lines (52 loc) · 1.46 KB
/
sort_pattern_out2.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
use warnings;
use strict;
# read STDIN (where each line is [pattern-string]\t\t[countnum])
# and output sorted result. ... (all in mem?)
unless ($ARGV[0])
{
die "requires one file name (output)";
}
#my %final_bestpat;
my $NBestN = 20;
sub take_only_best;
for(my $i=0; $i < 128; $i++)
{
for(my $j=0; $j < 128; $j++)
{
next if ($i == $j);
my $target = "$i-$j-";
open FILEIN, "<", $ARGV[0];
print STDERR "look for best that start with \"$target\"\n";
my %temp_pat;
while(my $line = <FILEIN>)
{
next unless $line =~ /^$target/;
$line =~ /^(.+)\t\t(.+)\n/;
$temp_pat{$1} = $2;
}
close FILEIN;
my %bests = take_only_best(\%temp_pat);
my @best_keys = sort {$bests{$b} <=> $bests{$a}} keys %bests;
for(my $i=0; $i < scalar(keys %bests); $i++)
{
print " ", $best_keys[$i], "\t", $bests{$best_keys[$i]}, "\n";
}
}
}
# look nbest, pick top n (say, 2000)
# return them as a new hash.
sub take_only_best
{
my %nbests = %{$_[0]};
my %result;
my @keys = keys %nbests;
my @sorted_keys = sort {$nbests{$b} <=> $nbests{$a}} @keys;
# copy only first NBestN
for (my $i=0; ($i < $NBestN) and ($i < @sorted_keys); $i++)
{
my $k = $sorted_keys[$i];
$result{$k} = $nbests{$k};
}
# print STDERR "size: ", scalar(%result), "\n";
return %result;
}