-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcanuint.pl
69 lines (59 loc) · 1.54 KB
/
canuint.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";
my $verbose = (@ARGV > 0 and $ARGV[0] eq '-v');
# three keys: 'M', 'C', 'U'
# values are hashrefs where keys are the feature patterns
my %model;
open(MODEL, "<:utf8", "model.txt") or die "Could not open model file: $!";
while (<MODEL>) {
chomp;
(my $patt, my $c, my $m, my $u) = m/^([^\t]+)\t([0-9.+e-]+)\t([0-9.+e-]+)\t([0-9.+e-]+)$/;
my $compiled = qr/$patt/i;
$model{'C'}->{$compiled} = $c;
$model{'M'}->{$compiled} = $m;
$model{'U'}->{$compiled} = $u;
}
close MODEL;
# keys are M, C, or U, values are logP(text|key)
my %answer;
$answer{$_} = 0 for (keys %model);
# reads one token per line; new lines are '\n'
my @trigram;
while (<STDIN>) {
chomp;
unless ($_ eq "\\n") {
push @trigram, $_;
shift @trigram if (scalar @trigram > 3);
}
my $trigramstr = "@trigram";
for my $regex (keys %{$model{'C'}}) {
if ($trigramstr =~ m/$regex/) {
for my $canuint (keys %model) {
$answer{$canuint} += $model{$canuint}->{$regex};
}
}
}
}
if ($verbose) {
for my $c (keys %answer) {
print "$c: $answer{$c}\n";
}
}
my $best=$answer{'C'};
my $result='C';
for my $c (keys %answer) {
next if ($c eq 'C');
if ($answer{$c} > $best) {
$best = $answer{$c};
$result = $c;
}
}
$result = 'N' if ($answer{'C'} == 0 and $answer{'U'} == 0 and $answer{'M'} == 0);
$result = 'N' if ($result eq 'N' or ($result ne 'U' and (abs($answer{'C'}) != 0 and abs($answer{'C'}-$answer{'M'})/abs($answer{'C'}) < 0.1)));
print "$result\n";
exit 0;