utt/lib/attr.pm

134 lines
2.5 KiB
Perl

package attr;
use locale;
use strict;
use Data::Dumper;
our $pos_re = qr/(?:[[:upper:]]+)/;
our $attr_re = qr/(?:[[:upper:]]+)/;
our $val_re = qr/(?:[[:lower:][:digit:]+?!*-]|<[^>\n]+>)/;
our $av_re = qr/(?:$attr_re$val_re+)/;
our $avlist_re = qr/(?:$av_re+)/;
our $cat_re = qr/(?:$pos_re(?:\/$avlist_re)?)/;
sub match(\@\@)
{
my ($cat1,$avs1)= @{shift @_};
my ($cat2,$avs2)= @{shift @_};
if($cat1 ne $cat2 && $cat1 ne '*' && $cat2 ne '*')
{
return 0;
}
else
{
ATTR:for my $attr (keys %$avs1)
{
if(exists $avs2->{$attr})
{
for my $val (keys %{$avs1->{$attr}})
{
next ATTR if $avs2->{$attr}->{$val};
}
return 0;
last ATTR;
}
}
}
return 1;
}
sub agree(\@\@$)
{
my $val1 = $_[0]->[1]->{$_[2]};
my $val2 = $_[1]->[1]->{$_[2]};
return 1 if !$val1 || !$val2;
for my $v (keys %$val1)
{
return 1 if exists $val2->{$v};
}
return 0;
}
# funkcja parse
# arg: deskrypcja
# warto¶æ: referencja do tablicy [<cat>, <avs>],
# gdzie <avs> jest referencja do hasza, zawierajacego pary
# atrybut=>hasz warto¶ci (pary warto¶æ=>1), czyli np.
# [
# 'ADJ',
# {
# 'KOLEDZY' => {
# '<alojzy>' => 1,
# '<karol>' => 1,
# '<jan>' => 1
# },
# 'C' => {
# 'p' => 1,
# 'a' => 1,
# 'i' => 1
# },
# 'N' => {
# 'p' => 1
# }
# }
# ];
sub parse ($)
{
my ($dstr)=@_;
my $avs={};
my ($cat,$attrlist) = split '/', $dstr;
ATTR:
# while( $attrlist =~ /([[:upper:]]+)((?:[[:lower:][:digit:]+?!*-]|<[^>\n]+>)+)/g )
while( $attrlist =~ /($attr_re)($val_re+)/g )
{
my ($attrstr,$valstr)=($1,$2);
my %vals;
while($valstr =~ /$val_re/g)
{
my $val = $&;
next ATTR if $val eq '*';
$val =~ s/^<([[:lower:]])>$/$1/;
$vals{$val}=1;
}
$avs->{$attrstr} = \%vals; # dlaczego to dziala? %vals jest lokalne
}
[$cat, $avs];
}
# funkcja unparse
# arg: jak warto¶æ parse
# warto¶æ: deskrypcja - napis
sub unparse (\@)
{
my ($cat,$avs)= @{shift @_};
my $dstr=$cat;
my @attrs = keys %$avs;
if(@attrs)
{
$dstr .= '/';
for my $attr ( sort @attrs )
{
$dstr .= $attr . (join '', sort keys %{$avs->{$attr}});
}
}
$dstr;
}
sub canonize ($)
{
unparse @{parse @_[0]} ;
}
1;