utt/nawszelkiwypadek/tools/gue_dic/count_prefs.pl

87 lines
1.4 KiB
Perl
Raw Normal View History

#! /usr/bin/perl
use locale;
use strict;
my @prefs;
sub addPref {
my $pref = shift;
my $desc = shift;
my $i;
for ($i=0; $i< @prefs; ++$i) {
my @tab = @{$prefs[$i]};
if (${@{$prefs[$i]}}[0] =~ /^$pref/) {
${@{$prefs[$i]}}[1]{$desc}++;
return;
}
}
my @new;
my %hash;
$hash{$desc}++;
push(@new, $pref);
push(@new, \%hash);
push(@prefs, \@new);
}
sub printPrefs {
my $i;
for $i (@prefs) {
my @tab = @$i;
# print $tab[0]."\t";
my $pref = $tab[0];
my %hash = %{$tab[1]};
my @keys = keys(%hash);
# print(@keys."\n");
my $sum = 0;
my $key;
for $key (@keys) {
$sum += $hash{$key};
}
for $key (@keys) {
print $pref."\t";
print $key."\t";
print $hash{$key}."\t";
print $sum."\n";
}
}
}
if (@ARGV < 2) {
print "USAGE: count_prefs.pl MIN_PREF_LEN MAX_PREF_LEN\n";
exit;
}
my $MIN = shift;
my $MAX = shift;
my $PART = shift;
if ($MIN > $MAX) {
print "MIN_PREF_LEN > MAX_PREF_LEN! ($MIN > $MAX)\n";
exit;
}
my $begin = "";
while (<>) {
my $len = $MIN;
$_ =~ /(\w+);(.*)$/;
my $pref = $1;
my $desc = $2;
if ($begin eq "") {
$begin = substr($pref, 0, $MIN);
}
if ($pref !~ /^$begin.*/) {
printPrefs();
undef(@prefs);
$begin = "";
}
while ($len <= $MAX) {
addPref(substr($pref, 0, $len++), $desc);
}
}
printPrefs();