Removed old files.

This commit is contained in:
Maciej Prill 2012-02-07 15:47:14 +01:00
parent 5f4d9c3b32
commit c21bdd6340
202 changed files with 313 additions and 15668 deletions

View File

@ -1,7 +0,0 @@
Tomasz Obrębski <obrebski@amu.edu.pl>
Michał Stolarski
Justyna Walkowska <ynka@amu.edu.pl>
Pawel Konieczka
Marcin Walas
Paweł Wereński
Mateusz Hromada <ruanda@amu.edu.pl>

34
README
View File

@ -1 +1,33 @@
http://utt.wmi.amu.edu.pl
TO COMPILE UTT YOU WILL NEED:
make
gengetopt
gcc
g++
flex
openfst library
python
TO BUILD DOCUMENTATION YOU WILL NEED:
texi2html
texinfo
some kind of TeX package, like texlive
TO RUN UTT PROGRAMS YOU WILL NEED:
perl module File::HomeDir
COMMANDS TO BE RUN IN THIS DIRECTORY:
% configure
% make
compiles all of the components
% make install
installes the package in your system in the directory defined by PREFIX in the configure
options
when --enable-utf8 is specified in configure options then if there's a directory with
_utf8 suffix in the source directory it will be compiled instead of the normal version

View File

@ -1,151 +0,0 @@
# main makefile
# ******************************
# * Local variables
# ******************************
CUR_DIR=$(shell pwd)
SRC_DIR=$(CUR_DIR)/src
OUTPUT_DIR=$(CUR_DIR)/output
# ******************************
# * Global for make globals
# ******************************
export UTT_DIR=${OUTPUT_DIR}
# executables
export UTT_BIN_DIR=${UTT_DIR}/bin
# configuration files
export UTT_CONF_DIR=${UTT_DIR}/etc/utt
# language/encoding specific stuff
# any extra files for some programs, like: weights.kor, cats.dgc, gram.dgc, etc.
export UTT_LANG_DIR=${UTT_DIR}/share/utt
# some files like ser.l.template, terms.m4
export UTT_LIB_DIR=${UTT_DIR}/lib/utt
# stuff like documantation (man, info), docs: FAQ, COPYRIGHT, NEWS, README
export UTT_SHARE_DIR=${UTT_DIR}/share
##############################
# list of components to be included in the distribution
COMPONENTS = compiledic cor dgp fla gph grp gue kon kor kot lem mar rm12 rs12 sen-l sen-nl ser tags tok.l unfla
##############################
# ------------------------------------------------------------------
# sekcja kompilacji (kompiluje i wgrywa do OUTPUT_DIR)
# ------------------------------------------------------------------
.PHONY: compile
compile: dirs components
@echo "Make compiled successfully!"
.PHONY: dirs
dirs:
if [ -d ${UTT_DIR} ]; then rm -r ${UTT_DIR}; fi
mkdir -p ${UTT_DIR}
mkdir -p ${UTT_BIN_DIR}
mkdir -p ${UTT_CONF_DIR}
mkdir -p ${UTT_LANG_DIR}
mkdir -p ${UTT_LIB_DIR}
mkdir -p ${UTT_SHARE_DIR}/utt
mkdir -p ${UTT_SHARE_DIR}/doc/utt
mkdir -p ${UTT_SHARE_DIR}/info
mkdir -p ${UTT_SHARE_DIR}/man/man3
.PHONY: components
components:
cd $(SRC_DIR)/lib && make; cd $(CUR_DIR)
@for cmp in $(COMPONENTS); do\
cd $(SRC_DIR)/$$cmp && make && make copy; cd $(CUR_DIR); \
done
# ------------------------------------------------------------------
# sekcja sprzatania (kasuje binaria z katalogow SRC/COMPONENTS)
# ------------------------------------------------------------------
.PHONY: clean
clean: clean_components clean_doc clean_dist
@echo "All files cleaned successfully!"
.PHONY: clean_components
clean_components:
@for cmp in $(COMPONENTS); do \
cd $(SRC_DIR)/$$cmp && make clean ; cd $(CUR_DIR); \
done
cd $(SRC_DIR)/lib && make clean; cd $(CUR_DIR);
.PHONY: clean_doc
clean_doc:
cd $(CUR_DIR)/doc && make clean; cd $(CUR_DIR)
.PHONY: clean_dist
clean_dist:
# ------------------------------------------------------------------
# sekcja przygotowania paczki (programow wraz z dodatkowymi plikami)
# ------------------------------------------------------------------
.PHONY: build
build: compile build_conf build_share build_lib build_doc
@echo "All files builded successfully!"
.PHONY: build_conf
build_conf:
cp $(CUR_DIR)/conf/*.conf $(UTT_CONF_DIR)/
.PHONY: build_lib
build_lib:
cp $(CUR_DIR)/lib/* $(UTT_LIB_DIR)/
.PHONY: build_doc
build_doc:
cd $(CUR_DIR)/doc && make && make copy; cd $(CUR_DIR)
.PHONY: build_share
build_share:
cp $(CUR_DIR)/share/* $(UTT_LANG_DIR)/
# ------------------------------------------------------------------
# sekcja tworzenia dystrybucji (tarballa, rpm, deb)
# ------------------------------------------------------------------
.PHONY: distribute
distribute: build dist_tarball dist_rpm
@echo "All distributions created successfully!"
.PHONY: dist_tarball
dist_tarball: build
cd $(CUR_DIR)/dist && make tarball; cd $(CUR_DIR)
.PHONY: dist_rpm
dist_rpm: build
cd $(CUR_DIR)/dist && make rpm; cd $(CUR_DIR)
.PHONY: dist_deb
dist_deb: build
cd $(CUR_DIR)/dist && make deb; cd $(CUR_DIR)
# ------------------------------------------------------------------
# sekcja testowania (kasuje tarballa z TARBALL_TEST_DIR,
# kompiluje, robi paczke,
# tworzy tarballa i wgrywa go do TARBALL_TEST_DIR)
# ------------------------------------------------------------------
#install: all
# cd ${UTT_DIR} && make install; cd ${CUR_DIR}
#uninstall:
# cd ${UTT_DIR} && make uninstall; cd ${DIR}
#reinstall:
# cd ${UTT_DIR} && make reinstall; cd ${DIR}
#dist: all
# tar -czvf $(UTT_DIST_NAME).tgz $(UTT_DIR)
#clean_dist:
# if [ -d $(UTT_DIST_DIR) ]; then rm -r $(UTT_DIST_DIR); else true; fi
# if [ -f $(UTT_DIST_FILE).tar.bz2 ]; then rm $(UTT_DIST_FILE).tar.bz2; else true; fi
# if [ -f $(UTT_DIST_PMDB_FILE).tar.bz2 ]; then rm $(UTT_DIST_PMDB_FILE).tar.bz2; else true; fi

View File

@ -1,25 +0,0 @@
TO COMPILE UTT YOU WILL NEED:
make
gengetopt
g++
flex
TO RUN UTT PROGRAMS YOU WILL NEED:
perl module File::HomeDir
COMMANDS TO BE RUN IN THIS DIRECTORY:
% make
compiles all the components, moves all files destinated for
distribution in the directory [the value of UTT_DIR_NAME variable in
Makefile] (currently utt-0.9)
% make install
installes the package in your system in the directory ~/.utt
% make dist_tarball
prepares distribution file named ${UTT_DIR_NAME}.tgz (currently utt-0.9.tgz)

View File

@ -1,25 +0,0 @@
BARDZO WAZNE:
*
* instalka slownikow
* programy maja wykrywac brak slownikow -> "No language module installed"
na podst zmiennej language w utt.conf, "Cannot open dict...." gdy po prostu
nie znajdzie pliku slownika
* tresc na stronie
* statyczne laczenie bibl 32-bit (miejsce kompilacji)
* przemyslec sposob wybierania jezyka / slownika po zainstalowaniu roznych dystrybucji [PK, TO]
* gue nie sortuje wynikow, opcja weights dziala na odwrot
* kor nie wykonuje zamian <jednalitera> -> <dwielitery>, np. ż rz
WAZNE:
* zamienic kota na lepszego (Kubis) [TO]
* sen - unikac dwukrotnego wstawiania BOSow i EOSow [TO]
* polaczyc sen'y [TO]
* generowanie i sprawdzanie zaleznosci dla tarballa [PK]
* Zadania zwiazane z rozbudowa ser (src/ser/TODO).
* Nowa funkcjonalnosc dla kot? con? - kazde zdanie w nowym wierszu
* opcja info w dgp powinna miec domyslna wartosc d lub h
* przyspieszyc process_seg() -> obliczanie prefiksow pol z opcji -s i -S raz na poczatku programu, a nie w kolko
* kor: nie dziala opcja distance
* zunifikowac opcje cor i kor
* uporzadkowac sprawe zaleznosci miedzy modulem jezykowym a wyborem tagow (tag2re)

View File

@ -1,46 +0,0 @@
# some variables
# path, where all nessesary files are placed
# (they will be processed for making distribution)
export UTT_DIST_DIR=$(UTT_DIR)
# path, where distribution package will be placed
export UTT_DIST_OUTPUT=$(UTT_DIR)/..
#temp path for making distribution
export UTT_DIST_TMP=$(shell pwd)/dist_tmp
# -----------------------------------------------------------
# default task should display options
.PHONY: default
defaul:
@echo "Using: make tarball|rpm|deb"
# -----------------------------------------------------------
# this task should compile utt (if nesessery) and create tar.gz version
.PHONY: tarball
tarball:
cd tarball && make
# -----------------------------------------------------------
# this task should compile utt (if nesessery) and create rpm version
.PHONY: rpm
rpm:
@#we build rpm (see spec/README for details)
cd spec && make
# -----------------------------------------------------------
# this task should compile utt (if nesessery) and create deb version
.PHONY: deb
deb:
@#we build deb (see deb/README for details)
cd deb && make
# -----------------------------------------------------------
# this task should remove compiled files and directories
.PHONY: clean
clean:
# finally the line below should be uncomment
rm -fr ${UTT_DIST_TMP}

View File

@ -1,15 +0,0 @@
while(<STDIN>) {
chomp;
$app = $_;
$path = `which $app 2>/dev/null`;
$err = $?;
print $app.": ";
if($err != 0) {
print "failed ($err)\n";
}
else {
print $path;
}
}

View File

@ -1 +0,0 @@
I put here some description.

View File

@ -1 +0,0 @@
Tu umieszczę opis po polsku.

View File

@ -1,29 +0,0 @@
while(<STDIN>) {
chomp;
# najpierw nazwa pliku zrodlowego out
if($_ =~ /^[^:]*\:\s*(.*)$/) {
my $line = $1;
# print "TEXT: $line\n";
# teraz tylko samo polecenie
if($line =~ /\`\s*(.+)\s*\`/) {
my $cmd = $1;
# print "CMD: $cmd\n";
# teraz splitujemy potoki
my @progs = split(/\s*\|\s*/, $cmd);
foreach (@progs) {
# print "$_\n";
# ucinamy agrumenty programu
# oraz linie, zawierajace nie-programy (--replace, $tmpfile_x, /g, \\, itp.)
if($_ =~ /^([^\$\\\/\s\']+)(\s.*)?$/) {
my $app = $1;
# print "APP: $app\n";
# my $res = `which $app`;
# print "WYNIK: $res\n";
print "$app\n";
}
} # foreach
}
}
}

View File

@ -1,16 +0,0 @@
#!/bin/sh
# sprawdzamy czy podano argument
if test $# -lt 1; then echo RPM file name expected!; exit -1; fi
# sprawdzamy czy to plik, potem, czy rpm
if test -f $1; \
then
if test "rpm" = `ls $1 | tail -c 4`; \
then
rpm -q -R -p $1; \
else echo "It's not a RPM file!"; \
fi
else echo RPM file not found!; \
fi

View File

@ -1,3 +0,0 @@
#!/bin/sh
grep -r -e "\`" ../../src | grep -v -e "\.svn\/" | perl find_perl_deps.pl | sort | uniq > dep_list.txt

View File

@ -1,14 +0,0 @@
#!/bin/sh
if test 3 -ne $#
then
echo "Usage: " `basename $0` src_conf_dir dest_conf_dir replacement
else
POLEC="s/PATH_PREFIX/$3/g"
for FN in `ls $1/*.conf`
do
BN=`basename ${FN}`
sed ${POLEC} ${FN} > $2/${BN}
done
fi

View File

@ -1 +0,0 @@
2

View File

@ -1 +0,0 @@
0.9

View File

@ -1,86 +0,0 @@
#default task
DIR=$(shell pwd)
ifndef UTT_DIST_DIR
UTT_DIST_DIR=${DIR}
endif
ifndef UTT_DIST_OUTPUT
UTT_DIST_OUTPUT=${DIR}
endif
# here there're few properties
_PRODUCT_NAME=utt
_UTT_VER=$(shell cat ../common/version.def)
_UTT_REL=$(shell cat ../common/release.def)
_DEB_FROOT=$(DIR)/deb_root
_UTT_DIR=${_DEB_FROOT}/usr/local/$(_PRODUCT_NAME).$(_UTT_VER)-$(_UTT_REL)
.PHONY: default
default: make_control make_postinst make_prerm
# first, we prepare some directory structure
mkdir -p $(_DEB_FROOT)/DEBIAN
mkdir -p $(_UTT_DIR)
# next, we copy deb package files
mv ./control $(_DEB_FROOT)/DEBIAN/
mv ./postinst $(_DEB_FROOT)/DEBIAN/
mv ./prerm $(_DEB_FROOT)/DEBIAN/
cd ${_DEB_FROOT} && tar -cvvf control.tar.gz DEBIAN/
cd ${DIR};
rm -fr ${_DEB_FROOT}/DEBIAN/
# we copy all necessery files (binaries)
cp -r ${UTT_DIST_DIR}/* ${_UTT_DIR}/
cp ./changelog ${_UTT_DIR}/share/doc/$(_PRODUCT_NAME)/
# gzip --best $(_DEB_ROOT)/usr/share/doc/$(_PRODUCT_NAME)/changelog
cp ./changelog.Debian $(_UTT_DIR)/share/doc/$(_PRODUCT_NAME)/
# gzip --best $(_DEB_ROOT)/usr/share/doc/$(_PRODUCT_NAME)/changelog.Debian
cp ../files/* ${_UTT_DIR}/share/doc/${_PRODUCT_NAME}/
cp ../common/utt_make_config.pl ${_UTT_DIR}/bin/
chmod 755 ${_UTT_DIR}/bin/utt_make_config.pl
# # next we make man/doc archives
# gzip --best $(_DEB_ROOT)/usr/share/man/man1/$(_PRODUCT_NAME).1
find $(_DEB_FROOT) -type d | xargs chmod 755 # this is necessary on Debian Woody, don't ask me why
# finally, we buid deb package
fakeroot dpkg-deb --build $(_DEB_FROOT)
mv $(_DEB_FROOT).deb $(_PRODUCT_NAME)_$(_UTT_VER)-$(_UTT_REL).all.deb
rm -rf ${_DEB_FROOT}
.PHONY: make_control
make_control:
echo "Package: $(_PRODUCT_NAME)" > control
echo "Version: $(_UTT_VER)" >> control
echo "Section: web" >> control
echo "Priority: optional" >> control
echo "Architecture: all" >> control
echo "Essential: no" >> control
echo "Depends: " >> control
# here we read this information from file ../common/requirements.def
#libwww-perl, acme-base (>= 1.2) <= wymagania pakietowe
echo "Pre-Depends: perl" >> control
echo "Maintainer: Adam Mickiewicz University" >> control
echo "Provides: $(_PRODUCT_NAME)" >> control
echo -n "Description: " >> control
cat ../common/description.def >> control
.PHONY: make_postinst
make_postinst:
echo "#!/bin/sh" > postinst
echo "$(_INSTALL_DIR)/create_utt_config.pl" >> postinst
echo "rm -f $(_INSTALL_DIR)/create_utt_config.pl" >> postinst
.PHONY: make_prerm
make_prerm:
echo "#!/bin/sh" > prerm

View File

@ -1,3 +0,0 @@
This directory contains files necessery to create deb package.
apt-get install dpkg-dev debhelper devscripts fakeroot linda

View File

@ -1,8 +0,0 @@
Copyright (C) 2005 - 2008 Tomasz Obrebski, Michal Stolarski, Justyna Walkowska, Pawel Konieczka
Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.2
or any later version published by the Free Software Foundation;
with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
Texts. A copy of the license is included in the section entitled GNU
Free Documentation License.

View File

@ -1,264 +0,0 @@
GNU Free Documentation License
Version 1.2, November 2002
Copyright (c) 2000,2001,2002 Free Software Foundation, Inc.
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
0. PREAMBLE
The purpose of this License is to make a manual, textbook, or other functional and
useful document free in the sense of freedom: to assure everyone the effective freedom
to copy and redistribute it, with or without modifying it, either commercially or noncommercially.
Secondarily, this License preserves for the author and publisher a way
to get credit for their work, while not being considered responsible for modifications
made by others.
This License is a kind of “copyleft”, which means that derivative works of the document
must themselves be free in the same sense. It complements the GNU General Public
License, which is a copyleft license designed for free software.
We have designed this License in order to use it for manuals for free software, because
free software needs free documentation: a free program should come with manuals
providing the same freedoms that the software does. But this License is not limited to
software manuals; it can be used for any textual work, regardless of subject matter or
whether it is published as a printed book. We recommend this License principally for
works whose purpose is instruction or reference.
1. APPLICABILITY AND DEFINITIONS
This License applies to any manual or other work, in any medium, that contains a
notice placed by the copyright holder saying it can be distributed under the terms
of this License. Such a notice grants a world-wide, royalty-free license, unlimited in
duration, to use that work under the conditions stated herein. The “Document”,
below, refers to any such manual or work. Any member of the public is a licensee, and
is addressed as “you”. You accept the license if you copy, modify or distribute the work
in a way requiring permission under copyright law.
A “Modified Version” of the Document means any work containing the Document or
a portion of it, either copied verbatim, or with modifications and/or translated into
another language.
A “Secondary Section” is a named appendix or a front-matter section of the Document
that deals exclusively with the relationship of the publishers or authors of the Document
to the Documents overall subject (or to related matters) and contains nothing that
could fall directly within that overall subject. (Thus, if the Document is in part a
textbook of mathematics, a Secondary Section may not explain any mathematics.) The
relationship could be a matter of historical connection with the subject or with related
matters, or of legal, commercial, philosophical, ethical or political position regarding
them.
The “Invariant Sections” are certain Secondary Sections whose titles are designated, as
being those of Invariant Sections, in the notice that says that the Document is released
under this License. If a section does not fit the above definition of Secondary then it is
not allowed to be designated as Invariant. The Document may contain zero Invariant
Sections. If the Document does not identify any Invariant Sections then there are none.
The “Cover Texts” are certain short passages of text that are listed, as Front-Cover
Texts or Back-Cover Texts, in the notice that says that the Document is released under
this License. A Front-Cover Text may be at most 5 words, and a Back-Cover Text may
be at most 25 words.
A “Transparent” copy of the Document means a machine-readable copy, represented
in a format whose specification is available to the general public, that is suitable for
revising the document straightforwardly with generic text editors or (for images composed
of pixels) generic paint programs or (for drawings) some widely available drawing
editor, and that is suitable for input to text formatters or for automatic translation to
a variety of formats suitable for input to text formatters. A copy made in an otherwise
Transparent file format whose markup, or absence of markup, has been arranged to
thwart or discourage subsequent modification by readers is not Transparent. An image
format is not Transparent if used for any substantial amount of text. A copy that is
not “Transparent” is called “Opaque”.
Examples of suitable formats for Transparent copies include plain ascii without
markup, Texinfo input format, LaTEX input format, SGML or XML using a publicly
available DTD, and standard-conforming simple HTML, PostScript or PDF designed
for human modification. Examples of transparent image formats include PNG, XCF
and JPG. Opaque formats include proprietary formats that can be read and edited
only by proprietary word processors, SGML or XML for which the DTD and/or
processing tools are not generally available, and the machine-generated HTML,
PostScript or PDF produced by some word processors for output purposes only.
The “Title Page” means, for a printed book, the title page itself, plus such following
pages as are needed to hold, legibly, the material this License requires to appear in the
title page. For works in formats which do not have any title page as such, “Title Page”
means the text near the most prominent appearance of the works title, preceding the
beginning of the body of the text.
A section “Entitled XYZ” means a named subunit of the Document whose title either
is precisely XYZ or contains XYZ in parentheses following text that translates XYZ in
another language. (Here XYZ stands for a specific section name mentioned below, such
as “Acknowledgements”, “Dedications”, “Endorsements”, or “History”.) To “Preserve
the Title” of such a section when you modify the Document means that it remains a
section “Entitled XYZ” according to this definition.
The Document may include Warranty Disclaimers next to the notice which states that
this License applies to the Document. These Warranty Disclaimers are considered to
be included by reference in this License, but only as regards disclaiming warranties:
any other implication that these Warranty Disclaimers may have is void and has no
effect on the meaning of this License.
2. VERBATIM COPYING
You may copy and distribute the Document in any medium, either commercially or
noncommercially, provided that this License, the copyright notices, and the license
notice saying this License applies to the Document are reproduced in all copies, and
that you add no other conditions whatsoever to those of this License. You may not use
technical measures to obstruct or control the reading or further copying of the copies
you make or distribute. However, you may accept compensation in exchange for copies.
If you distribute a large enough number of copies you must also follow the conditions
in section 3.
You may also lend copies, under the same conditions stated above, and you may publicly
display copies.
3. COPYING IN QUANTITY
If you publish printed copies (or copies in media that commonly have printed covers) of
the Document, numbering more than 100, and the Documents license notice requires
Cover Texts, you must enclose the copies in covers that carry, clearly and legibly, all
these Cover Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
the back cover. Both covers must also clearly and legibly identify you as the publisher
of these copies. The front cover must present the full title with all words of the title
equally prominent and visible. You may add other material on the covers in addition.
Copying with changes limited to the covers, as long as they preserve the title of the
Document and satisfy these conditions, can be treated as verbatim copying in other
respects.
If the required texts for either cover are too voluminous to fit legibly, you should put
the first ones listed (as many as fit reasonably) on the actual cover, and continue the
rest onto adjacent pages.
If you publish or distribute Opaque copies of the Document numbering more than 100,
you must either include a machine-readable Transparent copy along with each Opaque
copy, or state in or with each Opaque copy a computer-network location from which
the general network-using public has access to download using public-standard network
protocols a complete Transparent copy of the Document, free of added material. If
you use the latter option, you must take reasonably prudent steps, when you begin
distribution of Opaque copies in quantity, to ensure that this Transparent copy will
remain thus accessible at the stated location until at least one year after the last time
you distribute an Opaque copy (directly or through your agents or retailers) of that
edition to the public.
It is requested, but not required, that you contact the authors of the Document well
before redistributing any large number of copies, to give them a chance to provide you
with an updated version of the Document.
4. MODIFICATIONS
You may copy and distribute a Modified Version of the Document under the conditions
of sections 2 and 3 above, provided that you release the Modified Version under precisely
this License, with the Modified Version filling the role of the Document, thus licensing
distribution and modification of the Modified Version to whoever possesses a copy of
it. In addition, you must do these things in the Modified Version:
A. Use in the Title Page (and on the covers, if any) a title distinct from that of the
Document, and from those of previous versions (which should, if there were any,
be listed in the History section of the Document). You may use the same title as
a previous version if the original publisher of that version gives permission.
B. List on the Title Page, as authors, one or more persons or entities responsible for
authorship of the modifications in the Modified Version, together with at least five
of the principal authors of the Document (all of its principal authors, if it has fewer
than five), unless they release you from this requirement.
C. State on the Title page the name of the publisher of the Modified Version, as the
publisher.
D. Preserve all the copyright notices of the Document.
E. Add an appropriate copyright notice for your modifications adjacent to the other
copyright notices.
F. Include, immediately after the copyright notices, a license notice giving the public
permission to use the Modified Version under the terms of this License, in the form
shown in the Addendum below.
G. Preserve in that license notice the full lists of Invariant Sections and required Cover
Texts given in the Documents license notice.
H. Include an unaltered copy of this License.
I. Preserve the section Entitled “History”, Preserve its Title, and add to it an item
stating at least the title, year, new authors, and publisher of the Modified Version
as given on the Title Page. If there is no section Entitled “History” in the Document,
create one stating the title, year, authors, and publisher of the Document
as given on its Title Page, then add an item describing the Modified Version as
stated in the previous sentence.
J. Preserve the network location, if any, given in the Document for public access to
a Transparent copy of the Document, and likewise the network locations given in
the Document for previous versions it was based on. These may be placed in the
“History” section. You may omit a network location for a work that was published
at least four years before the Document itself, or if the original publisher of the
version it refers to gives permission.
K. For any section Entitled “Acknowledgements” or “Dedications”, Preserve the Title
of the section, and preserve in the section all the substance and tone of each of the
contributor acknowledgements and/or dedications given therein.
L. Preserve all the Invariant Sections of the Document, unaltered in their text and
in their titles. Section numbers or the equivalent are not considered part of the
section titles.
M. Delete any section Entitled “Endorsements”. Such a section may not be included
in the Modified Version.
N. Do not retitle any existing section to be Entitled “Endorsements” or to conflict in
title with any Invariant Section.
O. Preserve any Warranty Disclaimers.
If the Modified Version includes new front-matter sections or appendices that qualify
as Secondary Sections and contain no material copied from the Document, you may at
your option designate some or all of these sections as invariant. To do this, add their
titles to the list of Invariant Sections in the Modified Versions license notice. These
titles must be distinct from any other section titles.
You may add a section Entitled “Endorsements”, provided it contains nothing but
endorsements of your Modified Version by various parties—for example, statements of
peer review or that the text has been approved by an organization as the authoritative
definition of a standard.
You may add a passage of up to five words as a Front-Cover Text, and a passage of up
to 25 words as a Back-Cover Text, to the end of the list of Cover Texts in the Modified
Version. Only one passage of Front-Cover Text and one of Back-Cover Text may be
added by (or through arrangements made by) any one entity. If the Document already
includes a cover text for the same cover, previously added by you or by arrangement
made by the same entity you are acting on behalf of, you may not add another; but
you may replace the old one, on explicit permission from the previous publisher that
added the old one.
The author(s) and publisher(s) of the Document do not by this License give permission
to use their names for publicity for or to assert or imply endorsement of any Modified
Version.
5. COMBINING DOCUMENTS
You may combine the Document with other documents released under this License,
under the terms defined in section 4 above for modified versions, provided that you
include in the combination all of the Invariant Sections of all of the original documents,
unmodified, and list them all as Invariant Sections of your combined work in its license
notice, and that you preserve all their Warranty Disclaimers.
The combined work need only contain one copy of this License, and multiple identical
Invariant Sections may be replaced with a single copy. If there are multiple Invariant
Sections with the same name but different contents, make the title of each such section
unique by adding at the end of it, in parentheses, the name of the original author or
publisher of that section if known, or else a unique number. Make the same adjustment
to the section titles in the list of Invariant Sections in the license notice of the combined
work.
In the combination, you must combine any sections Entitled “History” in the various
original documents, forming one section Entitled “History”; likewise combine any
sections Entitled “Acknowledgements”, and any sections Entitled “Dedications”. You
must delete all sections Entitled “Endorsements.”
6. COLLECTIONS OF DOCUMENTS
You may make a collection consisting of the Document and other documents released
under this License, and replace the individual copies of this License in the various
documents with a single copy that is included in the collection, provided that you
follow the rules of this License for verbatim copying of each of the documents in all
other respects.
You may extract a single document from such a collection, and distribute it individually
under this License, provided you insert a copy of this License into the extracted
document, and follow this License in all other respects regarding verbatim copying of
that document.
7. AGGREGATION WITH INDEPENDENT WORKS
A compilation of the Document or its derivatives with other separate and independent
documents or works, in or on a volume of a storage or distribution medium, is called
an “aggregate” if the copyright resulting from the compilation is not used to limit the
legal rights of the compilations users beyond what the individual works permit. When
the Document is included in an aggregate, this License does not apply to the other
works in the aggregate which are not themselves derivative works of the Document.
If the Cover Text requirement of section 3 is applicable to these copies of the Document,
then if the Document is less than one half of the entire aggregate, the Documents Cover
Texts may be placed on covers that bracket the Document within the aggregate, or the
electronic equivalent of covers if the Document is in electronic form. Otherwise they
must appear on printed covers that bracket the whole aggregate.
8. TRANSLATION
Translation is considered a kind of modification, so you may distribute translations
of the Document under the terms of section 4. Replacing Invariant Sections with
translations requires special permission from their copyright holders, but you may
include translations of some or all Invariant Sections in addition to the original versions
of these Invariant Sections. You may include a translation of this License, and all the
license notices in the Document, and any Warranty Disclaimers, provided that you
also include the original English version of this License and the original versions of
those notices and disclaimers. In case of a disagreement between the translation and
the original version of this License or a notice or disclaimer, the original version will
prevail.
If a section in the Document is Entitled “Acknowledgements”, “Dedications”, or “History”,
the requirement (section 4) to Preserve its Title (section 1) will typically require
changing the actual title.
9. TERMINATION
You may not copy, modify, sublicense, or distribute the Document except as expressly
provided for under this License. Any other attempt to copy, modify, sublicense or
distribute the Document is void, and will automatically terminate your rights under
this License. However, parties who have received copies, or rights, from you under this
License will not have their licenses terminated so long as such parties remain in full
compliance.
10. FUTURE REVISIONS OF THIS LICENSE
The Free Software Foundation may publish new, revised versions of the GNU Free
Documentation License from time to time. Such new versions will be similar in spirit
to the present version, but may differ in detail to address new problems or concerns.
See http://www.gnu.org/copyleft/.
Each version of the License is given a distinguishing version number. If the Document
specifies that a particular numbered version of this License “or any later version”
applies to it, you have the option of following the terms and conditions either of that
specified version or of any later version that has been published (not as a draft) by
the Free Software Foundation. If the Document does not specify a version number of
this License, you may choose any version ever published (not as a draft) by the Free
Software Foundation.

View File

@ -1,51 +0,0 @@
General information
*********************
UAM Text Tools (UTT) is a package of language processing tools
developed at Adam Mickiewicz University. Its functionality includes:
* tokenization
* dictionary-based morphological analysis
* heuristic morphological analysis of unknown words
* spelling correction
* pattern search
* sentence splitting
* generation of concordance tables
The toolkit is destined for processing of raw (not annotated)
unrestricted text for any conceivable purpose.
Installation
**************
1) unpack the UTT tar archive
2) in the same directory, unpack the tar archives of all UTT dictionary modules you have
3) run
make install
in the root directory of the installation
4) add the bin directory to the PATH variable
Requirements
*************
* File::HomeDir
the Perl package File::HomeDir must be installed
(to install the package, run 'perl -MCPAN -e shell' and write
'install File::HomeDir' after the 'cpan>' prompt appears)
* flex
to run the ser component, flex must be installed in your system
* ruby
to run the tre component, ruby must be installed in your system
* locale pl_PL.iso-8852-2
the locales pl_PL.iso-8859-2 (pl_PL in short) must be installed
and set while using UTT with the Polish module. The text you
process with UTT must be encoded in iso-8859-2.

View File

@ -1,26 +0,0 @@
# this makefile will build rpm
DIR=$(shell pwd)
ifndef UTT_DIST_DIR
UTT_DIST_DIR=${DIR}
endif
_RPM_FROOT=${DIR}/rmp_root
_UTT_ROOT=${_RPM_FROOT}/usr/local
# default task
.PHONY: rpm
rpm:
if test -d ${_RPM_FROOT}; then rm -fr ${_RMP_FROOT}; fi
mkdir -p ${_UTT_ROOT}
cp -rf ${UTT_DIST_DIR}/* ${_UTT_ROOT}/
mkdir -p ${_UTT_ROOT}/cnf
mv ${_UTT_ROOT}/etc/utt/*.conf ${_UTT_ROOT}/cnf/
${DIR}/../common/prepare_conf.sh ${_UTT_ROOT}/cnf ${_UTT_ROOT}/etc/utt \\\/usr\\\/local
rm -rf ${_UTT_ROOT}/cnf
cp ../files/* ${_UTT_ROOT}/
cd ${_RPM_FROOT}; rpmbuild -bb ${DIR}/utt.spec
rm -rf ${_RPM_FROOT}

View File

@ -1,16 +0,0 @@
This directory contains files necessary to produce rpm package.
First, you must have variable UTT_DIST_DIR defined properly.
This variable should be defined by main Makefile.
To create rpm file, just write:
make
The created package should appears in default RPM directory.
(in my computer it is /usr/src/redhat/RPMS/$arch/ directory)
To determine the rpm output directory, execute:
rpm --showrc | grep _rmpdir
You need access privilage to this directory to create rmp.

View File

@ -1,81 +0,0 @@
#
# Default RPM header.
#
# START_RPM_STD_HEADER:
#
# RPM properties
#
%define _this_product UAM Text Tools
%define _this_summary Some tools for text processing
%define _this_name utt
%define _this_version %(cat ../../common/version.def)
%define _this_release %(cat ../../common/release.def)
%define _this_copyright Adam Mickiewicz University, Poland
#
# Default RPM header.
#
# END_RPM_STD_HEADER:
# --------------------------------------------------------------------
Summary: %_this_summary
Name: %_this_name
Version: %_this_version
Release: %_this_release
#Copyright: %_this_copyright
License: GPL
Group: Development/Tools
URL: http://utt.amu.edu.pl
Vendor: Adam Mickiewicz University
BuildRoot: %(pwd)
#BuildArch: i586
# requirements for utt application
#AutoReq: no
#AutoReqProv: no
#Requires: glibc >= 2.1.3
#Requires: libgcc1 >= 3.0
#Requires: libgcc >= 3.0
#Requires: libstdc++6 >= 3.4.1
#Requires: libstdc++ >= 3.4.1
%description
%(cat ../../common/description.def)
%description -l pl
%(cat ../../common/description.pl.def)
# -------------------------------------------------------------
# preparing sources for compilation
%prep
# source compilation
%build
# rpm building
%install
# cleaning after rpm build
%clean
# -------------------------------------------------------------
#before installation
%pre
#after installation
%post
# we need to create utt.conf file
#before uninstallation
%preun
#after uninstallation
%postun
# -------------------------------------------------------------
%files
%defattr(-,root,root)
/*

View File

@ -1,74 +0,0 @@
/usr/local/bin/aut2fsa
/usr/local/bin/canonize
/usr/local/bin/compiledic
/usr/local/bin/con
/usr/local/bin/cor
/usr/local/bin/dgc
/usr/local/bin/dgp
/usr/local/bin/fla
/usr/local/bin/fsm2aut
/usr/local/bin/go ?
/usr/local/bin/Makefile.go ?
/usr/local/bin/gph
/usr/local/bin/grp
/usr/local/bin/gue
/usr/local/bin/kot
/usr/local/bin/lem
/usr/local/bin/mar
/usr/local/bin/sen
/usr/local/bin/sen-nl
/usr/local/bin/ser
/usr/local/bin/tok
/usr/local/bin/tre.rb
/usr/local/bin/unfla
/usr/local/bin/ipi.tag2re
/usr/local/bin/uam.tag2re
# R.D. sugeruje /etc/utt
# lokalnie: ~/.utt lub (trendy) ~/.config/utt
/usr/local/etc/utt/con.conf
/usr/local/etc/utt/cor.conf
/usr/local/etc/utt/dgc.conf
/usr/local/etc/utt/fla.conf
/usr/local/etc/utt/grp.conf
/usr/local/etc/utt/gue.conf
/usr/local/etc/utt/kor.conf
/usr/local/etc/utt/kot.conf
/usr/local/etc/utt/lem.conf
/usr/local/etc/utt/mar.conf
/usr/local/etc/utt/sen.conf
/usr/local/etc/utt/ser.conf
/usr/local/etc/utt/tok.conf
/usr/local/etc/utt/unfla.conf
/usr/local/etc/utt/utt.conf
/usr/local/share/utt/weights.kor
/usr/local/share/utt/cats.dgc
/usr/local/share/utt/gram.dgc
# lokalnie: wszystkie pliki beda umieszczone w ~/.local/share/utt/
/usr/local/share/utt/pl_PL.ISO-8859-2/pl_PL.ISO-8859-2.sym
/usr/local/share/utt/pl_PL.ISO-8859-2/cor.bin
/usr/local/share/utt/pl_PL.ISO-8859-2/gue.bin
/usr/local/share/utt/pl_PL.ISO-8859-2/lem.bin
/usr/local/share/utt/pl_PL.UTF-8/pl_PL.UTF-8.sym
/usr/local/share/utt/pl_PL.UTF-8/cor.bin
/usr/local/share/utt/pl_PL.UTF-8/gue.bin
/usr/local/share/utt/pl_PL.UTF-8/lem.bin
# lokalnie: wszystkie slowniki beda umieszczone w ~/.local/share/utt/
/usr/local/lib/utt/ser.l.template
/usr/local/lib/utt/terms.m4
/usr/local/lib/utt/seg.rb
/usr/local/lib/attr.pm
# lokalnie: wszystkie pliki beda umieszczone w ~/.local/lib/utt/
/usr/local/share/doc/utt/FAQ
/usr/local/share/doc/utt/COPYRIGHT
/usr/local/share/doc/utt/NEWS
/usr/local/share/doc/utt/README
/usr/local/share/info/utt.info.gz
/usr/local/share/man/man3/utt.gz
~/.utt/*.conf (wszystko z /usr/local/etc/utt)

View File

@ -1,5 +0,0 @@
Here you can find some information about how to install utt.
You should just unpack archive and then execute
make test
make install

View File

@ -1,48 +0,0 @@
# This makefile allows build tarball distribution for utt.
#
# Some variables
#
DIR=$(shell pwd)
# Directory with utt binaries
ifndef UTT_DIST_DIR
UTT_DIST_DIR=${DIR}
endif
# Where put result
ifndef UTT_DIST_OUTPUT
UTT_DIST_OUTPUT=${DIR}
endif
# Common info about version and release
_UTT_VER=$(shell cat ../common/version.def)
_UTT_REL=$(shell cat ../common/release.def)
# Temp vars
_TARBALL_ROOT=$(DIR)/utt-$(_UTT_VER).$(_UTT_REL)
_TAR_FILE_NAME=utt.$(_UTT_VER)_$(_UTT_REL).tar.gz
#defualt task
.PHONY: default
default:
@echo Build directory: ${UTT_DIST_DIR}
@echo Output directory for tarball: ${UTT_DIST_OUTPUT}
mkdir -p ${_TARBALL_ROOT}
cp -fr ${UTT_DIST_DIR}/* ${_TARBALL_ROOT}
@# we add some extra files
@# config files
mkdir -p ${_TARBALL_ROOT}/cnf
mv ${_TARBALL_ROOT}/etc/utt/*.conf ${_TARBALL_ROOT}/cnf/
${DIR}/../common/prepare_conf.sh ${_TARBALL_ROOT}/cnf ${_TARBALL_ROOT}/etc/utt \~\\\/.local
rm -rf ${_TARBALL_ROOT}/cnf
cp ./INSTALL ${_TARBALL_ROOT}/
cp ./Makefile.tarball ${_TARBALL_ROOT}/Makefile
cp ../files/* ${_TARBALL_ROOT}/
tar -czf ${UTT_DIST_OUTPUT}/${_TAR_FILE_NAME} utt*
rm -rf ${_TARBALL_ROOT}

View File

@ -1,18 +0,0 @@
UTT_LIB_DIR=${HOME}/.local/lib/utt
UTT_SHARE_DIR=${HOME}/.local/share/utt
UTT_CONF_DIR=${HOME}/.utt
.PHONY: install
install:
mkdir -p ${UTT_LIB_DIR}
cp -r lib/utt/* ${UTT_LIB_DIR}
mkdir -p ${UTT_SHARE_DIR}
cp -r share/utt/* ${UTT_SHARE_DIR}
mkdir -p ${UTT_CONF_DIR}
cp -r etc/utt/* ${UTT_CONF_DIR}
.PHONY: uninstall
uninstall:
rm -r ${UTT_LIB_DIR}
rm -r ${UTT_SHARE_DIR}
rm -r ${CONF_DIR}

View File

@ -1,6 +0,0 @@
This directory contains Makefile, which allows to create tar.gz archive.
To create archive, just write:
make
Warning: you need define variable UTT_DIST_DIR.

View File

@ -1,133 +0,0 @@
package attr;
use locale;
use strict;
use Data::Dumper;
our $pos_re = qr/(?:[[:upper:]]+)/;
our $attr_re = qr/(?:[[:upper:]]+)/;
our $val_re = qr/(?:[[:lower:][:digit:]+?!*-]|<[^>\n]+>)/;
our $av_re = qr/(?:$attr_re$val_re+)/;
our $avlist_re = qr/(?:$av_re+)/;
our $cat_re = qr/(?:$pos_re(?:\/$avlist_re)?)/;
sub match(\@\@)
{
my ($cat1,$avs1)= @{shift @_};
my ($cat2,$avs2)= @{shift @_};
if($cat1 ne $cat2 && $cat1 ne '*' && $cat2 ne '*')
{
return 0;
}
else
{
ATTR:for my $attr (keys %$avs1)
{
if(exists $avs2->{$attr})
{
for my $val (keys %{$avs1->{$attr}})
{
next ATTR if $avs2->{$attr}->{$val};
}
return 0;
last ATTR;
}
}
}
return 1;
}
sub agree(\@\@$)
{
my $val1 = $_[0]->[1]->{$_[2]};
my $val2 = $_[1]->[1]->{$_[2]};
return 1 if !$val1 || !$val2;
for my $v (keys %$val1)
{
return 1 if exists $val2->{$v};
}
return 0;
}
# funkcja parse
# arg: deskrypcja
# warto¶æ: referencja do tablicy [<cat>, <avs>],
# gdzie <avs> jest referencja do hasza, zawierajacego pary
# atrybut=>hasz warto¶ci (pary warto¶æ=>1), czyli np.
# [
# 'ADJ',
# {
# 'KOLEDZY' => {
# '<alojzy>' => 1,
# '<karol>' => 1,
# '<jan>' => 1
# },
# 'C' => {
# 'p' => 1,
# 'a' => 1,
# 'i' => 1
# },
# 'N' => {
# 'p' => 1
# }
# }
# ];
sub parse ($)
{
my ($dstr)=@_;
my $avs={};
my ($cat,$attrlist) = split '/', $dstr;
ATTR:
# while( $attrlist =~ /([[:upper:]]+)((?:[[:lower:][:digit:]+?!*-]|<[^>\n]+>)+)/g )
while( $attrlist =~ /($attr_re)($val_re+)/g )
{
my ($attrstr,$valstr)=($1,$2);
my %vals;
while($valstr =~ /$val_re/g)
{
my $val = $&;
next ATTR if $val eq '*';
$val =~ s/^<([[:lower:]])>$/$1/;
$vals{$val}=1;
}
$avs->{$attrstr} = \%vals; # dlaczego to dziala? %vals jest lokalne
}
[$cat, $avs];
}
# funkcja unparse
# arg: jak warto¶æ parse
# warto¶æ: deskrypcja - napis
sub unparse (\@)
{
my ($cat,$avs)= @{shift @_};
my $dstr=$cat;
my @attrs = keys %$avs;
if(@attrs)
{
$dstr .= '/';
for my $attr ( sort @attrs )
{
$dstr .= $attr . (join '', sort keys %{$avs->{$attr}});
}
}
$dstr;
}
sub canonize ($)
{
unparse @{parse @_[0]} ;
}
1;

View File

@ -1,31 +0,0 @@
class Seg
def initialize(s="")
@line=s
self
end
def to_s
@line.chomp
end
def set(s)
@line=s
self
end
def field(key)
if key.class==Fixnum
@line.split[key-1]
elsif key.class==String
@line =~ /\s#{key}:(\S+)/; $1
end
end
alias [] field
def fields
@line.split
end
end

View File

@ -1,30 +0,0 @@
%{
#include<string.h>
int n=0;
%}
%%
PATTERN {
int start, end, len;
char *lastseg, *tmp;
if(yytext[yyleng-1]!='\n')
{fprintf(stderr,"ser: pattern matches incomplete line\n"); exit(1);}
n++;
sscanf(yytext,"%d %d",&start,&len);
yytext[yyleng-1]='\0';
if(tmp=strrchr(yytext,'\n'))
{
lastseg=tmp+1;
sscanf(lastseg,"%d %d", &end, &len);
}
else
end=start;
yytext[yyleng-1]='\n';
printf("%04d 00 BOM * ser:%d\n",start,n);
ECHO;
printf("%04d 00 EOM * ser:%d\n",end+len,n);
}
.*\n DEFAULTACTION;

View File

@ -1,52 +0,0 @@
divert(-1)
#--------------------------------------------------------------------------
# Macros defined here may be used in pattern specifications
# You can modify this file according to your needs.
# ENDOFSEGMENT and MORFIELD are macros expanded to, respectively,
# end of segment marker (dependes on the format: flattened or not)
# and the name of the annotation field containing morphological
# information (standard value is 'lem'). These values are controlled
# by programs using this file to expand search patterns (ser, grp, ...).
# seg(type,form,annotation)
define(`seg',`(\s*((\d+\s+)(\d+\s+)?)?dnl
ifelse($1, `',`(\S+)', `($1)')\s+dnl
ifelse($2, `',`(\S+)', `($2)')dnl
ifelse($3, `',`((\s+\S+)*)', `(\s+($3))')\s*ENDOFSEGMENT)')
# form(f) - segment containing the form f
define(`form', `seg(,$1)')
# field(f) segment containing auxiliary field f
define(`field', `seg(,,`(\S+\s+)*($1)(\s+\S+)*')')
# word, space, punct, number segments (assuming W, S, P, N segment types)
define(`space', `seg(`S',`$1')')
define(`word', `seg(`W',`$1')')
define(`punct', `seg(`P',`$1')')
define(`number', `seg(`N',`$1')')
# macros specific to PMDB format
define(`lexeme', `field(`MORFIELD:(\S+;)?$1,\S+')')
define(`cat', `field(`MORFIELD:\S+,$1([,;]\S+)?')')
# Place here your macro definitions.
#--------------------------------------------------------------------------
divert(0)

View File

@ -1,696 +0,0 @@
ADJ/DcNpCaGp
ADJ/DcNpCd
ADJ/DcNpCgl
ADJ/DcNpCi
ADJ/DcNpCnavGaifn
ADJ/DcNpCnvGp
ADJ/DcNsCaGi
ADJ/DcNsCaGpa
ADJ/DcNsCaiGf
ADJ/DcNsCavGf
ADJ/DcNsCdGpain
ADJ/DcNsCgdlGf
ADJ/DcNsCgGpain
ADJ/DcNsCilGpain
ADJ/DcNsCnavGn
ADJ/DcNsCnvGpai
ADJ/DpNpCaGp
ADJ/DpNpCd
ADJ/DpNpCgl
ADJ/DpNpCi
ADJ/DpNpCnavGaifn
ADJ/DpNpCnvGp
ADJ/DpNsCaGi
ADJ/DpNsCaGpa
ADJ/DpNsCaiGf
ADJ/DpNsCavGf
ADJ/DpNsCdGpain
ADJ/DpNsCgdlGf
ADJ/DpNsCgGpain
ADJ/DpNsCilGpain
ADJ/DpNsCnavGn
ADJ/DpNsCnvGpai
ADJ/DsNpCaGp
ADJ/DsNpCd
ADJ/DsNpCgl
ADJ/DsNpCi
ADJ/DsNpCnavGaifn
ADJ/DsNpCnvGp
ADJ/DsNsCaGi
ADJ/DsNsCaGpa
ADJ/DsNsCaiGf
ADJ/DsNsCavGf
ADJ/DsNsCdGpain
ADJ/DsNsCgdlGf
ADJ/DsNsCgGpain
ADJ/DsNsCilGpain
ADJ/DsNsCnavGn
ADJ/DsNsCnvGpai
ADJNUM/NpCaGp
ADJNUM/NpCd
ADJNUM/NpCgl
ADJNUM/NpCi
ADJNUM/NpCnavGaifn
ADJNUM/NpCnvGp
ADJNUM/NsCaGi
ADJNUM/NsCaGpa
ADJNUM/NsCaiGf
ADJNUM/NsCavGf
ADJNUM/NsCdGpain
ADJNUM/NsCgdlGf
ADJNUM/NsCgGpain
ADJNUM/NsCilGpain
ADJNUM/NsCnavGn
ADJNUM/NsCnvGpai
ADJPAP/NpCaGp
ADJPAP/NpCd
ADJPAP/NpCgl
ADJPAP/NpCi
ADJPAP/NpCnavGaifn
ADJPAP/NpCnvGp
ADJPAP/NsCaGi
ADJPAP/NsCaGpa
ADJPAP/NsCaiGf
ADJPAP/NsCavGf
ADJPAP/NsCdGpain
ADJPAP/NsCgdlGf
ADJPAP/NsCgGpain
ADJPAP/NsCilGpain
ADJPAP/NsCnavGn
ADJPAP/NsCnvGpai
ADJPP/NpCaGp
ADJPP/NpCd
ADJPP/NpCgl
ADJPP/NpCi
ADJPP/NpCnavGaifn
ADJPP/NpCnvGp
ADJPP/NsCaGi
ADJPP/NsCaGpa
ADJPP/NsCaiGf
ADJPP/NsCavGf
ADJPP/NsCdGpain
ADJPP/NsCgdlGf
ADJPP/NsCgGpain
ADJPP/NsCilGpain
ADJPP/NsCnavGn
ADJPP/NsCnvGpai
ADJPRO/NpCaGp
ADJPRO/NpCd
ADJPRO/NpCgl
ADJPRO/NpCi
ADJPRO/NpCnavGaifn
ADJPRO/NpCnvGp
ADJPRO/NsCaGi
ADJPRO/NsCaGpa
ADJPRO/NsCaiGf
ADJPRO/NsCavGf
ADJPRO/NsCdGpain
ADJPRO/NsCgdlGf
ADJPRO/NsCgGpain
ADJPRO/NsCilGpain
ADJPRO/NsCnavGn
ADJPRO/NsCnvGpai
ADJPRO/ZdNpCaGp
ADJPRO/ZdNpCd
ADJPRO/ZdNpCgl
ADJPRO/ZdNpCi
ADJPRO/ZdNpCnavGaifn
ADJPRO/ZdNpCnvGp
ADJPRO/ZdNsCaGi
ADJPRO/ZdNsCaGpa
ADJPRO/ZdNsCaiGf
ADJPRO/ZdNsCavGf
ADJPRO/ZdNsCdGpain
ADJPRO/ZdNsCgdlGf
ADJPRO/ZdNsCgGpain
ADJPRO/ZdNsCilGpain
ADJPRO/ZdNsCnavGn
ADJPRO/ZdNsCnvGpai
ADJPRO/ZgNpCaGp
ADJPRO/ZgNpCd
ADJPRO/ZgNpCgl
ADJPRO/ZgNpCi
ADJPRO/ZgNpCnavGaifn
ADJPRO/ZgNpCnvGp
ADJPRO/ZgNsCaGi
ADJPRO/ZgNsCaGpa
ADJPRO/ZgNsCaiGf
ADJPRO/ZgNsCavGf
ADJPRO/ZgNsCdGpain
ADJPRO/ZgNsCgdlGf
ADJPRO/ZgNsCgGpain
ADJPRO/ZgNsCilGpain
ADJPRO/ZgNsCnavGn
ADJPRO/ZgNsCnvGpai
ADJPRO/ZiNpCaGp
ADJPRO/ZiNpCd
ADJPRO/ZiNpCgl
ADJPRO/ZiNpCi
ADJPRO/ZiNpCnavGaifn
ADJPRO/ZiNpCnvGp
ADJPRO/ZiNsCaGi
ADJPRO/ZiNsCaGpa
ADJPRO/ZiNsCaiGf
ADJPRO/ZiNsCavGf
ADJPRO/ZiNsCdGpain
ADJPRO/ZiNsCgdlGf
ADJPRO/ZiNsCgGpain
ADJPRO/ZiNsCilGpain
ADJPRO/ZiNsCnavGn
ADJPRO/ZiNsCnvGpai
ADJPRO/ZnNpCaGp
ADJPRO/ZnNpCd
ADJPRO/ZnNpCgl
ADJPRO/ZnNpCi
ADJPRO/ZnNpCnavGaifn
ADJPRO/ZnNpCnvGp
ADJPRO/ZnNsCaGi
ADJPRO/ZnNsCaGpa
ADJPRO/ZnNsCaiGf
ADJPRO/ZnNsCavGf
ADJPRO/ZnNsCdGpain
ADJPRO/ZnNsCgdlGf
ADJPRO/ZnNsCgGpain
ADJPRO/ZnNsCilGpain
ADJPRO/ZnNsCnavGn
ADJPRO/ZnNsCnvGpai
ADJPRO/ZqNpCaGp
ADJPRO/ZqNpCd
ADJPRO/ZqNpCgl
ADJPRO/ZqNpCi
ADJPRO/ZqNpCnavGaifn
ADJPRO/ZqNpCnvGp
ADJPRO/ZqNsCaGi
ADJPRO/ZqNsCaGpa
ADJPRO/ZqNsCaiGf
ADJPRO/ZqNsCavGf
ADJPRO/ZqNsCdGpain
ADJPRO/ZqNsCgdlGf
ADJPRO/ZqNsCgGpain
ADJPRO/ZqNsCilGpain
ADJPRO/ZqNsCnavGn
ADJPRO/ZqNsCnvGpai
ADJPRO/ZqrNpCaGp
ADJPRO/ZqrNpCd
ADJPRO/ZqrNpCgl
ADJPRO/ZqrNpCi
ADJPRO/ZqrNpCnavGaifn
ADJPRO/ZqrNpCnvGp
ADJPRO/ZqrNsCaGi
ADJPRO/ZqrNsCaGpa
ADJPRO/ZqrNsCaiGf
ADJPRO/ZqrNsCavGf
ADJPRO/ZqrNsCdGpain
ADJPRO/ZqrNsCgdlGf
ADJPRO/ZqrNsCgGpain
ADJPRO/ZqrNsCilGpain
ADJPRO/ZqrNsCnavGn
ADJPRO/ZqrNsCnvGpai
ADJPRO/ZsNpCaGp
ADJPRO/ZsNpCd
ADJPRO/ZsNpCgl
ADJPRO/ZsNpCi
ADJPRO/ZsNpCnavGaifn
ADJPRO/ZsNpCnvGp
ADJPRO/ZsNsCaGi
ADJPRO/ZsNsCaGpa
ADJPRO/ZsNsCaiGf
ADJPRO/ZsNsCavGf
ADJPRO/ZsNsCdGpain
ADJPRO/ZsNsCgdlGf
ADJPRO/ZsNsCgGpain
ADJPRO/ZsNsCilGpain
ADJPRO/ZsNsCnavGn
ADJPRO/ZsNsCnvGpai
ADJPRP/NpCaGp
ADJPRP/NpCd
ADJPRP/NpCgl
ADJPRP/NpCi
ADJPRP/NpCnavGaifn
ADJPRP/NpCnvGp
ADJPRP/NsCaGi
ADJPRP/NsCaGpa
ADJPRP/NsCaiGf
ADJPRP/NsCavGf
ADJPRP/NsCdGpain
ADJPRP/NsCgdlGf
ADJPRP/NsCgGpain
ADJPRP/NsCilGpain
ADJPRP/NsCnavGn
ADJPRP/NsCnvGpai
ADVANP
ADV/Dc
ADV/Dp
ADV/Ds
ADVNUM
ADVPRO
ADVPRO/Zd
ADVPRO/Zi
ADVPRO/Zn
ADVPRO/Zq
ADVPRO/Zqr
ADVPRO/Zr
ADVPRP
APP
BYC/Vb
BYC/VpMcNpP1Gaifn
BYC/VpMcNpP1Gp
BYC/VpMcNpP2Gaifn
BYC/VpMcNpP2Gp
BYC/VpMcNpP3Gaifn
BYC/VpMcNpP3Gp
BYC/VpMcNsP1Gf
BYC/VpMcNsP1Gpai
BYC/VpMcNsP2Gf
BYC/VpMcNsP2Gpai
BYC/VpMcNsP3Gf
BYC/VpMcNsP3Gn
BYC/VpMcNsP3Gpai
BYC/VpMdTaNpP1Gaifn
BYC/VpMdTaNpP1Gp
BYC/VpMdTaNpP2Gaifn
BYC/VpMdTaNpP2Gp
BYC/VpMdTaNpP3Gaifn
BYC/VpMdTaNpP3Gp
BYC/VpMdTaNsP1Gf
BYC/VpMdTaNsP1Gpai
BYC/VpMdTaNsP2Gf
BYC/VpMdTaNsP2Gpai
BYC/VpMdTaNsP3Gf
BYC/VpMdTaNsP3Gn
BYC/VpMdTaNsP3Gpai
BYC/VpMdTrfNpP1
BYC/VpMdTrfNpP2
BYC/VpMdTrfNpP3
BYC/VpMdTrfNsP1
BYC/VpMdTrfNsP2
BYC/VpMdTrfNsP3
BYC/VpMiNpP1
BYC/VpMiNpP2
BYC/VpMiNsP2
CONJ
EXCL
N/GaNpCa
N/GaNpCd
N/GaNpCg
N/GaNpCi
N/GaNpCl
N/GaNpCn
N/GaNpCv
N/GaNsCa
N/GaNsCd
N/GaNsCg
N/GaNsCi
N/GaNsCl
N/GaNsCn
N/GaNsCv
N/GfNpCa
N/GfNpCd
N/GfNpCg
N/GfNpCi
N/GfNpCl
N/GfNpCn
N/GfNpCv
N/GfNsCa
N/GfNsCd
N/GfNsCg
N/GfNsCi
N/GfNsCl
N/GfNsCn
N/GfNsCv
N/GiNpCa
N/GiNpCd
N/GiNpCg
N/GiNpCi
N/GiNpCl
N/GiNpCn
N/GiNpCv
N/GiNsCa
N/GiNsCd
N/GiNsCg
N/GiNsCi
N/GiNsCl
N/GiNsCn
N/GiNsCv
N/GnNpCa
N/GnNpCd
N/GnNpCg
N/GnNpCi
N/GnNpCl
N/GnNpCn
N/GnNpCv
N/GnNsCa
N/GnNsCd
N/GnNsCg
N/GnNsCi
N/GnNsCl
N/GnNsCn
N/GnNsCv
N/G?NpCa
N/G*NpCa
N/G?NpCd
N/G*NpCd
N/G?NpCg
N/G*NpCg
N/G?NpCi
N/G*NpCi
N/G?NpCl
N/G*NpCl
N/G?NpCn
N/G*NpCn
N/G?NpCv
N/G*NpCv
N/G?NsCa
N/G?NsCd
N/G?NsCg
N/G?NsCi
N/G?NsCl
N/G?NsCn
N/G?NsCv
N/GpNpCa
N/GpNpCd
N/GpNpCg
N/GpNpCi
N/GpNpCl
N/GpNpCn
N/GpNpCv
N/GpNsCa
N/GpNsCd
N/GpNsCg
N/GpNsCi
N/GpNsCl
N/GpNsCn
N/GpNsCv
NPRO/ZdGnNsCa
NPRO/ZdGnNsCd
NPRO/ZdGnNsCg
NPRO/ZdGnNsCi
NPRO/ZdGnNsCl
NPRO/ZdGnNsCn
NPRO/ZgGnNsCa
NPRO/ZgGnNsCd
NPRO/ZgGnNsCg
NPRO/ZgGnNsCi
NPRO/ZgGnNsCl
NPRO/ZgGnNsCn
NPRO/ZgGpNpCa
NPRO/ZgGpNpCd
NPRO/ZgGpNpCg
NPRO/ZgGpNpCi
NPRO/ZgGpNpCl
NPRO/ZgGpNpCn
NPRO/ZiGnNsCa
NPRO/ZiGnNsCd
NPRO/ZiGnNsCg
NPRO/ZiGnNsCi
NPRO/ZiGnNsCl
NPRO/ZiGnNsCn
NPRO/ZiGpNsCa
NPRO/ZiGpNsCd
NPRO/ZiGpNsCg
NPRO/ZiGpNsCi
NPRO/ZiGpNsCl
NPRO/ZiGpNsCn
NPRO/ZnGnNsCa
NPRO/ZnGnNsCd
NPRO/ZnGnNsCg
NPRO/ZnGnNsCi
NPRO/ZnGnNsCl
NPRO/ZnGnNsCn
NPRO/ZnGpNsCa
NPRO/ZnGpNsCd
NPRO/ZnGpNsCg
NPRO/ZnGpNsCi
NPRO/ZnGpNsCl
NPRO/ZnGpNsCn
NPRO/ZpGaifnNpCa
NPRO/ZpGaifnNpCd
NPRO/ZpGaifnNpCg
NPRO/ZpGaifnNpCi
NPRO/ZpGaifnNpCl
NPRO/ZpGaifnNpCn
NPRO/ZpGfNsCa
NPRO/ZpGfNsCd
NPRO/ZpGfNsCg
NPRO/ZpGfNsCi
NPRO/ZpGfNsCl
NPRO/ZpGfNsCn
NPRO/ZpGnNsCa
NPRO/ZpGnNsCd
NPRO/ZpGnNsCg
NPRO/ZpGnNsCi
NPRO/ZpGnNsCl
NPRO/ZpGnNsCn
NPRO/ZpG*NpCa
NPRO/ZpG*NpCd
NPRO/ZpG*NpCg
NPRO/ZpG*NpCi
NPRO/ZpG*NpCl
NPRO/ZpG*NpCn
NPRO/ZpG*NsCa
NPRO/ZpG*NsCd
NPRO/ZpG*NsCg
NPRO/ZpG*NsCi
NPRO/ZpG*NsCl
NPRO/ZpG*NsCn
NPRO/ZpGpaiNsCa
NPRO/ZpGpaiNsCd
NPRO/ZpGpaiNsCg
NPRO/ZpGpaiNsCi
NPRO/ZpGpaiNsCl
NPRO/ZpGpaiNsCn
NPRO/ZpGpNpCa
NPRO/ZpGpNpCd
NPRO/ZpGpNpCg
NPRO/ZpGpNpCi
NPRO/ZpGpNpCl
NPRO/ZpGpNpCn
NPRO/ZqGnNsCa
NPRO/ZqGnNsCd
NPRO/ZqGnNsCg
NPRO/ZqGnNsCi
NPRO/ZqGnNsCl
NPRO/ZqGnNsCn
NPRO/ZqGpNsCa
NPRO/ZqGpNsCd
NPRO/ZqGpNsCg
NPRO/ZqGpNsCi
NPRO/ZqGpNsCl
NPRO/ZqGpNsCn
NPRO/ZqrGnNsCa
NPRO/ZqrGnNsCd
NPRO/ZqrGnNsCg
NPRO/ZqrGnNsCi
NPRO/ZqrGnNsCl
NPRO/ZqrGnNsCn
NPRO/ZqrGpNsCa
NPRO/ZqrGpNsCd
NPRO/ZqrGpNsCg
NPRO/ZqrGpNsCi
NPRO/ZqrGpNsCl
NPRO/ZqrGpNsCn
NPRO/ZxG*N*Ca
NPRO/ZxG*N*Cd
NPRO/ZxG*N*Cg
NPRO/ZxG*N*Ci
NPRO/ZxG*N*Cl
NUMCOL/Ca
NUMCOL/Cd
NUMCOL/Cg
NUMCOL/Ci
NUMCOL/Cl
NUMCOL/Cn
NUMCRD/Ca
NUMCRD/CaGaifn
NUMCRD/CaGain
NUMCRD/CaGf
NUMCRD/CaGp
NUMCRD/Cd
NUMCRD/Cg
NUMCRD/Ci
NUMCRD/CiGf
NUMCRD/CiGpain
NUMCRD/Cl
NUMCRD/Cn
NUMCRD/CnGaifn
NUMCRD/CnGain
NUMCRD/CnGf
NUMCRD/CnGp
NUMCRD/ZiCaGaifn
NUMCRD/ZiCaGain
NUMCRD/ZiCaGf
NUMCRD/ZiCaGp
NUMCRD/ZiCd
NUMCRD/ZiCg
NUMCRD/ZiCi
NUMCRD/ZiCiGf
NUMCRD/ZiCiGpain
NUMCRD/ZiCl
NUMCRD/ZiCnGaifn
NUMCRD/ZiCnGain
NUMCRD/ZiCnGf
NUMCRD/ZiCnGp
NUMCRD/ZqiCaGaifn
NUMCRD/ZqiCaGp
NUMCRD/ZqiCd
NUMCRD/ZqiCg
NUMCRD/ZqiCi
NUMCRD/ZqiCl
NUMCRD/ZqiCnGaifn
NUMCRD/ZqiCnGp
NUMORD/NpCaGp
NUMORD/NpCd
NUMORD/NpCgl
NUMORD/NpCi
NUMORD/NpCnavGaifn
NUMORD/NpCnvGp
NUMORD/NsCaGi
NUMORD/NsCaGpa
NUMORD/NsCaiGf
NUMORD/NsCavGf
NUMORD/NsCdGpain
NUMORD/NsCgdlGf
NUMORD/NsCgGpain
NUMORD/NsCilGpain
NUMORD/NsCnavGn
NUMORD/NsCnvGpai
NUMPAR
NUMPAR/Dc
NUMPAR/Dp
NUMPAR/Ds
NUMPAR/Ns
NUMPAR/NsGf
NUMPAR/NsGpain
NV/Ca
NV/Cd
NV/Cg
NV/Ci
NV/Cl
NV/Cn
ONO
P
PART
P/Ca
P/Cai
P/Cal
P/Cd
P/Cg
P/Cga
P/Cgai
P/Cgd
P/Cgi
P/Ci
P/Cl
PPRO/Zp
V/AiVb
V/AiViTa
V/AiVpMcNpP1Gaifn
V/AiVpMcNpP1Gp
V/AiVpMcNpP2Gaifn
V/AiVpMcNpP2Gp
V/AiVpMcNpP3Gaifn
V/AiVpMcNpP3Gp
V/AiVpMcNsP1Gf
V/AiVpMcNsP1Gpai
V/AiVpMcNsP2Gf
V/AiVpMcNsP2Gpai
V/AiVpMcNsP3Gf
V/AiVpMcNsP3Gn
V/AiVpMcNsP3Gpai
V/AiVpMdTaNpP1Gaifn
V/AiVpMdTaNpP1Gp
V/AiVpMdTaNpP2Gaifn
V/AiVpMdTaNpP2Gp
V/AiVpMdTaNpP3Gaifn
V/AiVpMdTaNpP3Gp
V/AiVpMdTaNsP1Gf
V/AiVpMdTaNsP1Gpai
V/AiVpMdTaNsP2Gf
V/AiVpMdTaNsP2Gpai
V/AiVpMdTaNsP3Gf
V/AiVpMdTaNsP3Gn
V/AiVpMdTaNsP3Gpai
V/AiVpMdTrfNpP1
V/AiVpMdTrfNpP2
V/AiVpMdTrfNpP3
V/AiVpMdTrfNsP1
V/AiVpMdTrfNsP2
V/AiVpMdTrfNsP3
V/AiVpMiNpP1
V/AiVpMiNpP2
V/AiVpMiNsP2
V/ApVb
V/ApViTa
V/ApVpMcNpP1Gaifn
V/ApVpMcNpP1Gp
V/ApVpMcNpP2Gaifn
V/ApVpMcNpP2Gp
V/ApVpMcNpP3Gaifn
V/ApVpMcNpP3Gp
V/ApVpMcNsP1Gf
V/ApVpMcNsP1Gpai
V/ApVpMcNsP2Gf
V/ApVpMcNsP2Gpai
V/ApVpMcNsP3Gf
V/ApVpMcNsP3Gn
V/ApVpMcNsP3Gpai
V/ApVpMdTaNpP1Gaifn
V/ApVpMdTaNpP1Gp
V/ApVpMdTaNpP2Gaifn
V/ApVpMdTaNpP2Gp
V/ApVpMdTaNpP3Gaifn
V/ApVpMdTaNpP3Gp
V/ApVpMdTaNsP1Gf
V/ApVpMdTaNsP1Gpai
V/ApVpMdTaNsP2Gf
V/ApVpMdTaNsP2Gpai
V/ApVpMdTaNsP3Gf
V/ApVpMdTaNsP3Gn
V/ApVpMdTaNsP3Gpai
V/ApVpMdTrfNpP1
V/ApVpMdTrfNpP2
V/ApVpMdTrfNpP3
V/ApVpMdTrfNsP1
V/ApVpMdTrfNsP2
V/ApVpMdTrfNsP3
V/ApVpMiNpP1
V/ApVpMiNpP2
V/ApVpMiNsP2
V/GiVb
V/GiViTa
V/GiVpMcNpP1Gaifn
V/GiVpMcNpP1Gp
V/GiVpMcNpP2Gaifn
V/GiVpMcNpP2Gp
V/GiVpMcNpP3Gaifn
V/GiVpMcNpP3Gp
V/GiVpMcNsP1Gf
V/GiVpMcNsP1Gpai
V/GiVpMcNsP2Gf
V/GiVpMcNsP2Gpai
V/GiVpMcNsP3Gf
V/GiVpMcNsP3Gn
V/GiVpMcNsP3Gpai
V/GiVpMdTaNpP1Gaifn
V/GiVpMdTaNpP1Gp
V/GiVpMdTaNpP2Gaifn
V/GiVpMdTaNpP2Gp
V/GiVpMdTaNpP3Gaifn
V/GiVpMdTaNpP3Gp
V/GiVpMdTaNsP1Gf
V/GiVpMdTaNsP1Gpai
V/GiVpMdTaNsP2Gf
V/GiVpMdTaNsP2Gpai
V/GiVpMdTaNsP3Gf
V/GiVpMdTaNsP3Gn
V/GiVpMdTaNsP3Gpai
V/GiVpMdTrfNpP1
V/GiVpMdTrfNpP2
V/GiVpMdTrfNpP3
V/GiVpMdTrfNsP1
V/GiVpMdTrfNsP2
V/GiVpMdTrfNsP3
V/GiVpMiNpP1
V/GiVpMiNpP2
V/GiVpMiNsP2
VNI

View File

@ -1,124 +0,0 @@
#FLAG REL
#UP REL
#ORDER * pcmpl
#ORDER ..
#ORDER subj .. * .. cmpl
#ORDER refl .. *
#ORDER * refl
#CONSTR cmpl_g => ~cmpl_a
#CONSTR cmpl_inf => ~(cmpl_g|cmpl_d|cmpl_a|cmpl_p|cmpl_ze|cmpl_s)
#CONSTR subj_pred => subj
#constr cmpl_pred => cmpl
#subj
ROLE subj # deklaracja roli (typ zaleznosci) podmiot
AGR subj N # zgodnosc podrzednika z nadrzednikiem co do liczby
AGR subj G # zgodnosc podrzednika z nadrzednikiem co do rodzaju
GOV subj */Cn # wymaganie by podrzednik byl w mianowniku
# pary kategorii, jakie mozna polaczyc zaleznoscia typu podmiot
# nadrzednik podrzednik
LINK V/VpP3,BYC/VpP3 N,NPRO subj
# (przecinek znaczy lub)
ROLE cmpl_ga # dopelnienie w bierniku/dopelniaczu
ROLE cmpl_d # w celowniku
ROLE cmpl_i # w narzedniku
ROLE cmpl_inf # w bezokoliczniku
ROLE cmpl_s # bedace zdaniem
ROLE cmpl_ze # bedace zdaniem poprzedzonym 'ze'
ROLE aux #
ROLE mod # modyfikator (okolicznik/przydawka) (niewymagane określenie) (biały kot)
ROLE prep # modyfikator w postaci frazy przyimkowej
ROLE pcmpl # dopełnienie przyimka (wymagany rzeczownik)
ROLE ccmpl # dopełnienie spójnika (wymagany drugi człon konstrukcji spójnikowej)
ROLE poss # np. książka Marii, ojciec kolegi
ROLE restr # (bardzo <- duży)
ROLE part # partykuła
ROLE coord # koordynacja (powiązanie pierwszego członu konstrukcji współrzędnej
# ze spójnikiem współrzędnym centralnym ( Oto [pies -> i] kot. )
AGR aux N
AGR aux G
AGR mod N
AGR mod C
AGR mod G
AGR pcmpl C
GOV cmpl_ga */Cga
GOV cmpl_d */Cd
GOV cmpl_i */Ci
GOV poss */Cg
SGL subj
SGL cmpl_ga
SGL cmpl_d
SGL cmpl_i
SGL cmpl_inf
SGL aux
SGL pcmpl
SGL ccmpl
SGL poss
SGL restr
REQ P pcmpl
REQ CONJ ccmpl
RIGHT pcmpl
RIGHT ccmpl
RIGHT cmpl_ze
RIGHT poss
#cmpl_*
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV N,NPRO cmpl_ga
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV N,NPRO cmpl_d
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV N,NPRO cmpl_i
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV V/Vb cmpl_inf
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV CONJ cmpl_ze
#aux
#czas przyszly analityczny
LINK BYC/VpMdTf V/AiVpP3,V/AiVb aux
#czas zaprzeszly(?)
LINK BYC/VpMc V/VpP3 aux
#BYC jako lacznik w (jest bialy, jest zaszlachtowany, jest pilotem)
LINK BYC ADJPAP/Cn,ADJ/Cn,N/Ci aux
#mod
LINK V ADV,ADVPRP,ADVANP,ADVPRO mod
LINK N,NV ADJ,ADJPAP,ADJPRP,ADJPRO mod
#prep
LINK N,V P prep
#pcmpl
LINK P N,NV pcmpl
#poss
LINK N N,NV,NPRO poss
#ccmpl
LINK CONJ V/Vp ccmpl
#restr
LINK ADJ ADV restr
#part
LINK V PART part

View File

@ -1,21 +0,0 @@
%stdcor 1
%xchg 1
¿ rz 0.5
ch h 0.5
u ó 0.5
u o 0.75
om ¹ 0.5
om a 0.75
en ê 0.5
en ê 0.75
a ¹ 0.25
c æ 0.25
e ê 0.25
l ³ 0.25
n ñ 0.25
o ó 0.25
s ¶ 0.25
z ¿ 0.25
z ¼ 0.25
x ¼ 0.30

View File

@ -1,8 +0,0 @@
# main: cmdline.c main_template.cc
# g++ -o main cmdline.c common.cc main_template.cc
# cmdline.c cmdline.h : cmdline.ggo
# gengetopt -i cmdline.ggo
# cmdline.ggo: cmdline_common.ggo cmdline_program.ggo
# cat cmdline_common.ggo cmdline_program.ggo > cmdline.ggo

View File

@ -1,18 +0,0 @@
Propozycja ujednolicenia dzialania klocka na poziomie
funkcji main. Parametry meta - zdefiniowane dla
wszystkich, poza tok, programow, definiujace ich zachowanie
w systemie klockow.
cmdline_common.ggo - deklaracje parametrow meta
cmdline_program.ggo - przyklad deklaracji parametrow programu
nazwa docelowa np. cmdline_guess.ggo
common.cc - zmienne globalne zawierajace informacje
przekazane przez parametry meta
common.h
main_template.cc - szkielet funkcji main
Makefile - sposob kompilacji

View File

@ -1,34 +0,0 @@
#section "Common UTT options"
option "input" f "Input file" string no
option "output" o "Output file for succesfully processed segments" string no
option "fail" e "Output file for unsuccesfully processed segments " string no
option "only-fail" - "Print only segments the program failed to process" flag off hidden
option "no-fail" - "Print only segments the program processed" flag off hidden
option "copy" c "Copy succesfully processed segments to standard output" flag off
option "process" p "Process segments with this tag" string no multiple
option "select" s "Select only segments with this field" string no multiple
option "ignore" S "Select only segments without this field" string no multiple
option "output-field" O "Output field name" string no
option "input-field" I "Input field name" string no multiple
option "interactive" i "Toggle interactive mode" flag off
option "config" - "Configuration file" string typestr="FILENAME" no
option "one-field" 1 "Print all results in one segments (creates ambiguous annotation)" flag off
option "one-line" - "Print annotation alternatives as additional fields" flag off
option "language" - "Language." string no

View File

@ -1,5 +0,0 @@
package "guess"
version "0.1"
option "color" l "Show guessed descriptions in colour." flag off

View File

@ -1,230 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include "common.h"
#include <stdio.h>
#include <locale.h>
FILE* inputf=stdin;
FILE* outputf=stdout;
FILE* failedf=stdout;
bool copy_processed=0;
bool one_field=false;
bool one_line=false;
char output_field_prefix[FIELD_PREFIX_MAXLEN];
char input_field_prefix[FIELD_PREFIX_MAXLEN];
extern int argc;
extern char **argv;
// tilde (home dir) expansion in path
int expand_path(char* inpath, char* outpath)
{
if(inpath[0]=='~')
sprintf(outpath,"%s%s",getenv("HOME"),inpath+1);
else
strcpy(outpath,inpath);
return 0; // no problem
}
void set_program_name(char program_name[], char* argv0)
{
if (char* p_name = strrchr(argv0, '/'))
strcpy(program_name,p_name+1);
else
strcpy(program_name,argv0);
}
extern void process_config_files(gengetopt_args_info* args, char* argv0)
{
char program_name[256];
char config_file[256];
char config_file_tmp[256];
set_program_name(program_name,argv0);
// obsługa pliku konfiguracyjnego podanego w linii komend
if (args->config_given) {
if (file_accessible(args->config_arg) == 0) {
if (cmdline_parser_configfile(args->config_arg,
args,
0, // 0 - nie nadpisuj wartości parametrów
0, // 0 - nie inicjuj
0) != 0) {
fprintf(stderr, "Error in config file (%s)\n", args->config_arg);
exit(1);
}
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
// obsluga pliku konfiguracyjnego uzytkownika dla programu
sprintf(config_file_tmp, "%s/%s.conf", USER_CONFIG_DIR, program_name);
expand_path(config_file_tmp, config_file);
if (file_accessible(config_file) == 0) {
if (cmdline_parser_configfile(config_file,
args,
0, // 0 - nie nadpisuj danych
0, // 0 - nie inicjuj struktury
0) != 0) {
fprintf(stderr, "Error in config file (%s)\n", config_file);
exit(1);
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
// obsluga pliku konfiguracyjnego uzytkownika globalnego
sprintf(config_file_tmp, "%s/utt.conf", USER_CONFIG_DIR);
expand_path(config_file_tmp, config_file);
if (file_accessible(config_file) == 0) {
if (cmdline_parser_configfile(config_file,
args,
0, // 0 - nie nadpisuj danych
0, // 0 - nie inicjuj struktury
0) != 0) {
fprintf(stderr, "Error in config file (%s)\n", config_file);
exit(1);
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
// obsluga systemowego pliku konfiguracyjnego dla programu
sprintf(config_file, "%s/%s.conf", SYSTEM_CONFIG_DIR, program_name);
if (file_accessible(config_file) == 0) {
if (cmdline_parser_configfile(config_file,
args,
0, // 0 - nie zmieniaj danych wczesniejszych
0, // 0 - nie inicjuj struktury
0 // 0 - nie sprawdzaj wymaganych parametrow
) != 0) {
fprintf(stderr, "Error in config file (%s)\n", config_file);
exit(1);
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
// obsluga systemowego pliku konfiguracyjnego globalnego
sprintf(config_file, "%s/utt.conf", SYSTEM_CONFIG_DIR);
if (file_accessible(config_file) == 0) {
if (cmdline_parser_configfile(config_file,
args,
0, // 0 - nie zmieniaj danych wczesniejszych
0, // 0 - nie inicjuj struktury
0 // 0 - nie sprawdzaj wymaganych parametrow
) != 0) {
fprintf(stderr, "Error in config file (%s)\n", config_file);
exit(1);
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
}
void process_common_options(gengetopt_args_info* args, char* argv0)
{
char program_name[256];
set_program_name(program_name,argv0);
setlocale(LC_CTYPE,"");
setlocale(LC_COLLATE, "");
if(args->help_given)
cmdline_parser_print_help ();
if(args->input_given)
if(!(inputf=fopen(args->input_arg,"r")))
{
fprintf(stderr,"No such file: %s.\n", args->input_arg);
exit(1);
}
if(args->output_given)
if(!(outputf=fopen(args->output_arg,"w")))
{
fprintf(stderr,"Cannot open output file: %s.\n", args->output_arg);
exit(1);
}
if(args->fail_given)
if(!(failedf=fopen(args->fail_arg,"w")))
{
fprintf(stderr,"Cannot open output file: %s.\n", args->fail_arg);
exit(1);
}
if(args->input_field_given)
fieldprefix(args->input_field_arg[0],input_field_prefix);
else
strcpy(input_field_prefix, "4");
if(args->output_field_given)
fieldprefix(args->output_field_arg,output_field_prefix);
else
sprintf(output_field_prefix, "%s%c", program_name, INFIELD_SEP);
if ((args->copy_given))
copy_processed=true;
}
// sprawdza istnienie pliku
int file_accessible(const char* path) {
return access(path, R_OK);
}
// sprawdza istnienie pliku konfiguracyjnego
int config_file_exists(const char* dir, const char* filename) {
struct stat dir_stat;
struct stat file_stat;
char* path = (char*)malloc(strlen(dir) + strlen(filename) + 2); // + '\0' + '/'
sprintf(path, "%s/%s", dir, filename);
if (stat(dir, &dir_stat) != 0)
return -1;
if (stat(path, &file_stat) != 0)
return -1;
if (!S_ISDIR(dir_stat.st_mode))
return -1; // katalog nie jest katalogiem
if (!S_ISREG(file_stat.st_mode))
return -1; // plik konfiguracyjny nie jest plikiem
if (access(dir, X_OK) != 0)
return -1; // nie mamy prawa zmienic katalogu
if (access(path, R_OK) != 0)
return -1; // nie mamy prawa odczytu pliku
free(path);
return 0;
}

View File

@ -1,467 +0,0 @@
#ifndef __COMMON_H
#define __COMMON_H
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include "../lib/const.h"
#ifndef _CMDLINE_FILE
#error _CMDLINE_FILE constant not defined!
#else
#include _CMDLINE_FILE
#endif
/**************************************************
* Stale dotyczace wejscia/wyjscia
*/
#define EMPTYFORM '*'
#define INFIELD_SEP ':'
#define MAXAUX 16
#define FIELD_SEP " \t\n"
#define FIELD_PREFIX_MAXLEN 32
// katalogi z plikami konfiguracyjnymi
// nowe
// stare - do wyrzucenia
// #define CONFIG_DIR ".utt/conf"
// nazwa zmiennej okreslajaca sciezke do danych
// #define UTT_DIR_VAR "UTT_DIR"
// sciezka do plikow z danymi (np UTT_DIR/pliki) wzgledem $HOME!
// #define UTT_DIR_DEFAULT ".utt/pl/"
/**************************************************/
extern FILE* inputf;
extern FILE* outputf;
extern FILE* failedf;
extern char* input_filename;
extern char* output_filename;
extern char* failed_filename;
extern bool one_line;
extern bool one_field;
extern char input_field_prefix[];
extern char output_field_prefix[];
extern bool copy_processed;
extern bool append_output;
extern bool append_failed;
//sciezka do katalogu z danymi
extern char utt_dir[];
extern void process_common_options(gengetopt_args_info* args, char* argv0);
extern void process_config_files(gengetopt_args_info* args, char* argv0);
extern int expand_path(char* inpath, char* outpath);
/**************************************************
* problems with casing */
// sprawdzenie wielkosci liter
// warto¶æ zwracana:
// 0 - wszystkie ma³e litery
// 1 - pierwsza wielka, reszta male
// 2 - wszystkie wielkie
// 3 - inne
inline int casing(char* s)
{
int ret = isupper(*s) ? 1 : 0;
while(*++s != '\0')
{
if(isupper(*s))
{
if(ret==1) ret=2;
else if(ret==0) ret=3;
}
else
{
if(ret==2) ret=3;
}
}
return ret;
}
//
inline void tolowers(char* s, char* d)
{
*d=tolower(*s);
while(*s != '\0') * ++d = tolower(* ++s);
}
// przepisuje s do d
// nadajac wielko¶æ liter zgodnie z warto¶ci± casing
// casing - warto¶æ zwracana przez casing()
// je¶li casing==3 przepisuje bez zmian (za ma³o informacji)
inline void restorecasing(char *s, char *d, int casing)
{
switch(casing)
{
case 0:
case 3:
*d=*s;
while(*s != '\0') * ++d = * ++s;
break;
case 1:
*d=toupper(*s);
while(*s != '\0') * ++d = * ++s;
break;
case 2:
*d=toupper(*s);
while(*s != '\0') * ++d = toupper(* ++s);
break;
}
}
/**************************************************/
/*
parameters:
-seg - segment
-pref - field name or "1", "2", "3", "4" for the first four fields
+val - field contents
return value:
1 if specified field exists, 0 otherwise
*/
inline int getfield(char* seg, const char* pref, char* val)
{
char* p=seg;
char* p0;
while(isspace(*p)) ++p;
// field "1"
p0=p; while(isdigit(*p)) ++p;
if(*pref=='1') if(p!=p0) { strncpy(val,p0,p-p0); val[p-p0]='\0'; return 1; } else return 0;
while(isspace(*p)) ++p;
// field "2"
p0=p; while(isdigit(*p)) ++p;
if(*pref=='2') if(p!=p0) { strncpy(val,p0,p-p0); val[p-p0]='\0'; return 1; } else return 0;
while(isspace(*p)) ++p;
// field "3"
p0=p; while(isgraph(*p)) ++p;
if(*pref=='3') if(p!=p0) { strncpy(val,p0,p-p0); val[p-p0]='\0'; return 1; } else return 0;
while(isspace(*p)) ++p;
// field "4"
p0=p; while(isgraph(*p)) ++p;
if(*pref=='4') if(p!=p0) { strncpy(val,p0,p-p0); val[p-p0]='\0'; return 1; } else return 0;
while(isspace(*p)) ++p;
// annotation fields
do p=strstr(p,pref); while(p!=NULL && *(p-1)!=' ' && *(p-1)!='\t');
if(p==NULL) return 0;
else
{
p+=strlen(pref);
int len=strcspn(p,FIELD_SEP "\n\r\f\0");
strncpy(val,p,len);
val[len]='\0';
return 1;
}
}
/*
parameters:
-name - field name, long or short
+prefix - field name with ':' appended if long name
return value:
1 if correct field name, 0 otherwise
examples:
name prefix r.v.
lem lem: 1
@ @ 1
:: 'undef' 0
a,b 'undef' 0
*/
inline
int fieldprefix(char *name, char *prefix)
{
if (ispunct(name[0]) && name[1]=='\0') // correct short name
{
strcpy(prefix, name); return 1;
}
int i=0;
while(name[i]!='\0' && isalnum(name[i])) ++i;
if(name[i]=='\0' && i>0) // correct long name
{
sprintf(prefix,"%s:",name); return 1;
}
// incorrect
return 0;
}
inline
bool process_seg(char* seg, gengetopt_args_info& args)
{
char buf[256];
char pref[FIELD_PREFIX_MAXLEN];
bool ret = !args.process_given;
if(args.process_given)
{
getfield(seg,"3",buf);
for(int i=0; i<args.process_given; ++i)
if(strcmp(args.process_arg[i],buf)==0)
{
ret=true;
break;
}
}
if(ret==false) return false;
for(int i=0; i<args.select_given; ++i)
{
fieldprefix(args.select_arg[i],pref); // !!! ŁATKA - ZOPTYMALIZOWAĆ !!!
if(! getfield(seg,pref,buf))
return false;
}
for(int i=0; i<args.ignore_given; ++i)
{
fieldprefix(args.ignore_arg[i],pref); // !!! ŁATKA - ZOPTYMALIZOWAĆ !!!
if(getfield(seg,pref,buf))
return false;
}
if(args.input_field_given & !getfield(seg,input_field_prefix,buf))
return false;
return true;
}
/*
parameters:
-+seg - segment
-pref - prefix of the new field
-val - contents of the new field
return value:
1 - success, 0 - fail (limit on segment length exceeded)
*/
inline
int addfield(char *seg, const char *pref, const char *val)
// zalozenie, ze seg konczy sie znakiem \n
{
if(strlen(seg)+strlen(pref)+strlen(val) >= MAX_LINE) return 0; // bezpieczniej, ale wolniej
int seglen=strlen(seg);
sprintf(seg+(seglen-1)," %s%s\n",pref,val);
return 1;
}
/**************************************************/
struct Seg
{
int filepos, len;
char* tag;
char* form;
char* aux[MAXAUX];
int auxn;
bool parse(char* line);
char* getfield(char* fieldname);
void print(char* line);
bool addfield(char* s);
bool clearfields();
};
/**************************************************/
/* definicja struktury wejscia/wyjscia
*/
struct Segment
{
int filepos, len;
char* tag;
char* form;
char* aux[MAXAUX];
int auxn;
bool parse(char* line);
char* getfield(char* fieldname);
void print(char* line);
bool addfield(char* s);
bool clearfields();
};
/*
* Sprawdza czy nalezy przetwarzac dany segment.
*/
inline
bool process_seg(Segment& s, gengetopt_args_info& args)
{
bool ret = !args.process_given;
for(int i=0; i<args.process_given; ++i)
if(strcmp(args.process_arg[i],s.tag)==0)
{
ret=true;
break;
}
for(int i=0; i<args.select_given; ++i)
if(! s.getfield(args.select_arg[i]))
ret=false;
for(int i=0; i<args.ignore_given; ++i)
if(s.getfield(args.ignore_arg[i]))
ret=false;
return ret;
}
/*
* FUNKCJE OBSLUGUJACE WEJSCIE/WYJSCIE
*/
// napisy zostaj na miejscu (w line), tylko wskazniki sa ustawian
// i zara dopisywane zera s dopisywane
inline
bool Segment::parse(char* line)
{
auxn=0;
char* field;
if((field=strtok(line,FIELD_SEP))!=NULL)
filepos=atoi(field); // nie sprawdzana poprawnosc
else
return false;
if((field=strtok(NULL,FIELD_SEP))!=NULL)
len=atoi(field); // nie sprawdzana poprawnosc
else return false;
if((tag=strtok(NULL,FIELD_SEP))==NULL) return false;
if((form=strtok(NULL,FIELD_SEP))==NULL)
return true;
else
if(form[0] == EMPTYFORM && form[1] =='\0')
form=NULL;
while((aux[auxn]=strtok(NULL,FIELD_SEP))!=NULL) ++auxn;
return true;
}
inline char* Segment::getfield(char* f)
{
int flen=strlen(f);
if(isalnum(*f))
{
for(int i=0; i<auxn; ++i)
if(strncmp(aux[i],f,flen)==0 && aux[i][flen]==INFIELD_SEP)
return aux[i]+flen+1;
} else
{
for(int i=0; i<auxn; ++i)
{
if(*f==*(aux[i]))
return aux[i]+1;
}
}
return NULL;
}
inline bool Segment::clearfields() {
for (int i=0; i<auxn; ++i) {
// free(aux[i]);
aux[i] = NULL;
}
auxn=0;
return true;
}
inline // NIEEFEKTYWNE
void Segment::print(char* line)
{
sprintf(line,"%04d %02d %s", filepos, len, tag);
if(form)
{
strcat(line," ");
strcat(line,form);
}
else
if(auxn)
strcat(line," *");
for(int i=0; i<auxn; ++i)
{
strcat(line," ");
strcat(line,aux[i]);
}
strcat(line,"\n");
}
inline
bool Segment::addfield(char* s)
{
if(auxn<MAXAUX)
{
aux[auxn++]=s;
return true;
}
else
return false;
}
/**************************************************
* funkcje pomocne w operacjach na plikach *
* konfiguracyjnych *
**************************************************/
// sprawdza istnienie pliku
int file_accessible(const char* path);
// sprawdza istnienie pliku konfiguracyjnego
int config_file(const char* dir, const char* filename);
/**************************************************/
/* Pobiera wejscie
* parametry:
* - args - tablica stringow okresnajacych pola wejsciowe
* - args_len - rozmiar args
* - seg - segment
* wartosc - wskaznik do wejscia
*/
inline char* getInput(char** args, int args_len, Segment seg) {
char* formp = NULL;
for (int i=0; i<args_len; ++i) {
if ('4' == args[i][0])
return seg.form;
if ((formp = seg.getfield(args[i])) != NULL) {
return formp;
}
}
return formp;
}
#endif

View File

@ -1,20 +0,0 @@
#include <stdlib.h>
#include "common.h"
main(int argc, char* argv[])
{
gengetopt_args_info args;
if(cmdline_parser(argc,argv,&args) != 0)
exit(1);
process_common_options(args);
//
// TU KOD
//
cmdline_parser_free(&args);
}

View File

@ -1,20 +0,0 @@
CFLAG1 = -Wno-deprecated -O3 -fpermissive
CFLAG_ST = -Wno-deprecated -O3 -fpermissive -static
all: compdic aut2fsa
compdic:
aut2fsa: aut2fsa.cc
g++ $(CFLAG1) -o aut2fsa aut2fsa.cc
copy:
ifdef UTT_BIN_DIR
cp compdic fsm2aut aut2fsa lst2fstext ${UTT_BIN_DIR}
endif
clean:
rm aut2fsa

View File

@ -1,18 +0,0 @@
#include <iostream>
#include <stdlib.h>
#include "../lib/tfti.h"
#include <fstream>
using namespace std;
int main()
{
TFTiv<char,char> a;
a.read();
a.save();
return 0;
}

View File

@ -1,179 +0,0 @@
no_of_parts=0
while [ $# -gt 2 ]
do
case $1
in
-p)
no_of_parts=$2
shift 2
;;
*)
echo "The arguments to use are"
echo "-p: number of parts"
shift 1
;;
esac
done
if [ $# -lt 2 ]
then
echo
echo "compdic is a tool to compile lists of automaton paths (words) into the automaton format"
echo "suitable for use with lem, gue, cor, and kor components"
echo
echo "Usage:"
echo " compdic [-p <parts>] <wordlist> <automaton>"
echo "where"
echo " <wordlist> - file containig a list of words, one per line, iso-8859-2 encoded"
echo " <automaton> - a file to which the compiled automaton (lem/gue/cor/kor format) should be written"
exit 0
fi
if [ $no_of_parts -eq 0 ]
then
no_of_parts=$(( `cat $1 | wc -l` / 75000 + 1 ))
fi
echo number of parts: $no_of_parts
tempdir=`mktemp -d /tmp/compdic.XXXXXX`
alphabet=`tempfile -d $tempdir`
cat <<EOF > $alphabet
<eps> 0
a 1
A 2
ä 3
± 4
¡ 5
b 6
B 7
c 8
C 9
æ 10
Æ 11
d 12
D 13
e 14
E 15
é 16
ê 17
Ê 18
f 19
F 20
g 21
G 22
h 23
H 24
i 25
I 26
j 27
J 28
k 29
K 30
l 31
L 32
³ 33
£ 34
m 35
M 36
n 37
N 38
ñ 39
Ñ 40
o 41
O 42
ö 43
ó 44
Ó 45
p 46
P 47
q 48
Q 49
r 50
R 51
s 52
S 53
¶ 54
¦ 55
t 56
T 57
u 58
U 59
ü 60
v 61
V 62
w 63
W 64
x 65
X 66
y 67
Y 68
z 69
Z 70
¼ 71
¬ 72
¿ 73
¯ 74
0 75
1 76
2 77
3 78
4 79
5 80
6 81
7 82
8 83
9 84
_ 85
- 86
? 87
! 88
~ 89
; 90
, 91
/ 92
* 93
+ 94
EOF
no_of_lines=$(( (`cat $1 | wc -l` / $no_of_parts) + 1 ))
split -l $no_of_lines $1 $tempdir/part.
automaton=$tempdir/output.fst
cat <<EOF | fstcompile --acceptor --isymbols=$alphabet --keep_isymbols > $automaton
EOF
n=0
for f in $tempdir/part.*
do
temp1=`tempfile -d $tempdir`
temp2=`tempfile -d $tempdir`
temp3=`tempfile -d $tempdir`
n=$(( $n + 1 ))
echo processing part $n
cat $f |\
lst2fstext |\
fstcompile --acceptor --isymbols=$alphabet --keep_isymbols |\
fstrmepsilon |\
fstdeterminize > $temp1
fstminimize $temp1 $temp2
fstunion $automaton $temp2 | fstrmepsilon | fstdeterminize > $temp3
fstminimize $temp3 $automaton
done
cat $automaton | fsttopsort | fstprint --acceptor | fsm2aut | aut2fsa > $2
rm -r $tempdir

View File

@ -1,44 +0,0 @@
#!/usr/bin/perl
my $currstate=-1;
my @states;
my @final;
my $tn=0;
while(<>)
{
if(/^\s*([0-9]+)\s+([0-9]+)\s+(.)(\s*)?$/)
{
push @{$states[$1]}, ($3, $2);
$#states=$2 if $#states<$2;
$tn++;
}
elsif(/^\s*([0-9]+)\s*$/)
{
$final[$1]=1;
$#states=$1 if $#states<$1;
}
else
{
die("Input error.");
}
}
print scalar(@states)," ",$tn," char void\n";
my $i=0;
my $width=int(log(@states+1)/log(10));
foreach $stateref (@states)
{
$f = ($final[$i]?"+":"-");
printf "%${width}d %s",$i++,$f;
while(@$stateref)
{
$c=shift @$stateref;
$s=shift @$stateref;
print " $c $s";
}
print "\n";
}

View File

@ -1,20 +0,0 @@
#!/usr/bin/env perl
use locale;
$s=1;
$f=1;
while(<>)
{
chomp;
@cs = split('');
++$s;
print "0 $s <eps>\n";
while($c = shift @cs)
{
print $s . ' ' . ++$s . " $c\n";
}
print "$s $f <eps>\n";
}
print "$f\n";

View File

@ -1,21 +0,0 @@
CFLAG1 = -m32 -Wno-deprecated -O3 -fpermissive
CFLAG_ST = -Wno-deprecated -O3 -fpermissive -static
all: compiledic aut2fsa
compiledic:
aut2fsa: aut2fsa.cc
#g++ -m32 -Wno-deprecated -O3 -fpermissive -static -o aut2fsa aut2fsa.cc
g++ $(CFLAG1) -o aut2fsa aut2fsa.cc
copy:
ifdef UTT_BIN_DIR
cp compiledic fsm2aut aut2fsa ${UTT_BIN_DIR}
endif
clean:
rm aut2fsa

View File

@ -1,5 +0,0 @@
* pliki tymczasowe:
- pliki symboli lab i scl
- pliki powstajace podczas kompilacji slownika
gdzie maja byc tworzone? tak jak teraz nie moze byc!

View File

@ -1,18 +0,0 @@
#include <iostream>
#include <stdlib.h>
#include "../lib/tfti.h"
#include <fstream>
using namespace std;
int main()
{
TFTiv<char,char> a;
a.read();
a.save();
return 0;
}

View File

@ -1,242 +0,0 @@
#! /usr/bin/env perl
#package: UAM Text Tools
#component: compiledic
#version: 1.0
#author: Tomasz Obrebski
use strict;
use locale;
use File::HomeDir;
use File::Basename;
use File::Temp;
use File::Copy;
use Getopt::Long;
my $linesPerFile = 20000;
my $systemconfigfile='/usr/local/etc/utt/compiledic.conf';
my $userconfigfile=home()."/.utt/compiledic.conf";
Getopt::Long::Configure('no_ignore_case_always');
#my $symfile='~/.utt/pl/pl_PL.iso-8859-2.sym';
my $symbols=0;
my $help=0;
#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
if(open(CONFIG, $file)){
while (<CONFIG>) {
chomp;
s/#.*//;
s/^\s+//;
s/\s+$//;
next unless length;
my ($name, $value) = split(/\s*=\s*/, $_, 2);
if($name eq "symbols"){
$symbols=$value;
}
elsif(($name eq "help")or($name eq "h")){
$help=1;
}
}
close CONFIG;
}
}
#########################################################
GetOptions("symbols=s" => \$symbols,
"help|h" => \$help);
if($help)
{
print <<'END'
Usage: compiledic [OPTIONS] dictionaryfile
Options:
--help -h Help.
--symbols=FILE Symbol file.
END
;
exit 0;
}
##################################################
-f $symbols or die("Symbol file not found.");
@ARGV > 0 or die("Source dictionary not given.");
my $file = shift;
-f $file or die("Source dictionary not found.");
$file =~ /(.*)\.dic/ or die("The input file must have .dic extension.");
my $filenameprefix = $1;
##################################################
# Tworzymy katalog tymczasowy, gdzie wszystko bedzie umieszczane.
my $tmp_root = File::Temp::tempdir( CLEANUP => 1 );
(undef, my $symfile) = File::Temp::tempfile( DIR => $tmp_root, SUFFIX => ".sym" );
my $symfilenoext=$symfile;
$symfilenoext =~ s/\.sym$//;
my $labfile = $symfilenoext . '.lab';
my $sclfile = $symfilenoext . '.scl';
copy($symbols, $symfile);
# Przygotowanie etykiet
`lexmakelab $symfilenoext`;
# Analiza pliku slownika
print "preparing file...........................................";
#print "... sed section .....\n";
(undef, my $file_sed) = File::Temp::tempfile( DIR => $tmp_root, SUFFIX => ".sed" );
`sed -r "s/([[:punct:]])/\[\\1\]/g" < $file > $file_sed`;
print "OK\n";
#dzielimy plik na wiele czesci, uruchamiamy lexcomplex dla kazdej
#czesci osobno, nastepnie laczymy to za pomoca programu fsmunion
#print "Dziele slownik na mniejsze czesci...";
open(IN, $file_sed);
my $lineCount = 0;
my $fileCount = 0;
open(FILE, ">$tmp_root/slo_$fileCount");
while (<IN>) {
if (++$lineCount >= $linesPerFile) {
$fileCount++;
$lineCount = 0;
close(FILE);
# print "Tworze nowy plik tymczasowy: slo_".$fileCount."\n";
open(FILE, ">$tmp_root/slo_".$fileCount);
}
print(FILE $_);
}
#print "OK\n";
print "building partial automata";
#32 kropki, fileCount plikow
my $filesPerDot = $fileCount/32;
my $files=$filesPerDot;
my $dots=0;
for (my $i=0; $i<=$fileCount; $i++) {
if ($files >= $filesPerDot) {
$files = 0;
print ".";
$dots++;
}
$files++;
`lexcomplex -l $labfile -S $sclfile < $tmp_root/slo_$i > $tmp_root/slownik_$i.fsm`;
}
if ($dots < 32) {
for (my $i=0; $i<32 - $dots; $i++) {
print ".";
}
}
print "OK\n";
unlink <$tmp_root/slo_*>;
print "building final automaton";
#35 kropek...
my $ndots=33;
$filesPerDot = $fileCount/$ndots;
$files=$filesPerDot;
$dots=0;
copy("$tmp_root/slownik_0.fsm", "$tmp_root/slownik1.fsm");
for (my $i=1; $i<=$fileCount; $i++) {
if ($files >= $filesPerDot) {
$files = 0;
print ".";
$dots++;
}
$files++;
`fsmunion $tmp_root/slownik_$i slownik1.fsm > $tmp_root/slownik2.fsm`;
move("$tmp_root/slownik2.fsm", "$tmp_root/slownik1.fsm") || die "Unable to move $tmp_root/slownik2.fsm!\n";
}
if ($dots < $ndots) {
for (my $i=0; $i<$ndots - $dots; $i++) {
print ".";
}
}
#`fsmunion $tmp_root/* > $tmp_root/slownik1.fsm`;
print "OK\n";
print "removing epsilon-transitions.............................";
`fsmrmepsilon $tmp_root/slownik1.fsm > $tmp_root/slownik2.fsm`;
unlink ("$tmp_root/slownik1.fsm");
print "OK\n";
print "determinizing automaton..................................";
`fsmdeterminize $tmp_root/slownik2.fsm > $tmp_root/slownik1.fsm`;
unlink ("$tmp_root/slownik2.fsm");
print "OK\n";
print "minimizing automaton.....................................";
`fsmminimize $tmp_root/slownik1.fsm > $tmp_root/slownik.fsm`;
#`rm slownik1.fsm`;
print "OK\n";
print "converting fsm format to bin.............................";
`fsmprint -i $labfile $tmp_root/slownik.fsm > $tmp_root/slownik.txt`;
`fsm2aut $tmp_root/slownik.txt > $tmp_root/slownik.aut`;
`aut2fsa < $tmp_root/slownik.aut > $filenameprefix.bin`;
print "OK\n";
print "removing temporary files.................................";
unlink <$tmp_root/*>;
unlink ($tmp_root);
print "OK\n";

View File

@ -1,44 +0,0 @@
#!/usr/bin/perl
my $currstate=-1;
my @states;
my @final;
my $tn=0;
while(<>)
{
if(/^\s*([0-9]+)\s+([0-9]+)\s+(.)(\s*)?$/)
{
push @{$states[$1]}, ($3, $2);
$#states=$2 if $#states<$2;
$tn++;
}
elsif(/^\s*([0-9]+)\s*$/)
{
$final[$1]=1;
$#states=$1 if $#states<$1;
}
else
{
die("Input error.");
}
}
print scalar(@states)," ",$tn," char void\n";
my $i=0;
my $width=int(log(@states+1)/log(10));
foreach $stateref (@states)
{
$f = ($final[$i]?"+":"-");
printf "%${width}d %s",$i++,$f;
while(@$stateref)
{
$c=shift @$stateref;
$s=shift @$stateref;
print " $c $s";
}
print "\n";
}

View File

@ -1,41 +0,0 @@
PAR=-Wno-deprecated -fpermissive -static
PAR2=-c -Wno-deprecated -fpermissive
LIB_PATH=../lib
COMMON_PATH=../common
CMDLINE_FILE='"../cor/cmdline.h"'
cor: main.cc corr.o $(LIB_PATH)/word.o \
$(LIB_PATH)/auttools.o cmdline.c common_cor.o common.o
g++ $(PAR) -D _CMDLINE_FILE=$(CMDLINE_FILE) main.cc corr.o common.o \
$(LIB_PATH)/word.o $(LIB_PATH)/auttools.o cmdline.c common_cor.o \
-o cor
corr.o: corr.cc corr.hh
g++ $(PAR2) corr.cc
common.o: $(COMMON_PATH)/cmdline_common.ggo $(COMMON_PATH)/common.cc \
$(COMMON_PATH)/common.h
g++ $(PAR2) -D _CMDLINE_FILE=$(CMDLINE_FILE) $(COMMON_PATH)/common.cc
common_cor.o: cmdline.h common_cor.cc common_cor.h
g++ $(PAR2) -D _CMDLINE_FILE=$(CMDLINE_FILE) common_cor.cc
cmdline.c cmdline.h: cmdline.ggo
gengetopt -i cmdline.ggo --conf-parser
cmdline.ggo: cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo
cat cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo > cmdline.ggo
copy:
ifdef UTT_BIN_DIR
cp cor ${UTT_BIN_DIR}
endif
clean: clean.cmdline
rm *.o || true
rm cor || true
clean.cmdline:
rm cmdline.* || true

View File

@ -1,8 +0,0 @@
package "cor"
version "0.1"
option "dictionary-home" - "Dictionary home dir." string typestr="FILENAME" no hidden
option "dictionary" d "Dictionary" string typestr="FILENAME" default="cor.bin" no
option "distance" n "Maximal edit distance." int default="1" no
option "replace" r "Replace original form with corrected form, place original form in the cor field. This option has no effect in single mode" flag off hidden
#option "single" - "Place all alternatives in the same line" flag off

View File

@ -1,29 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include "common_cor.h"
char dictionary[256];
void process_cor_options(gengetopt_args_info* args)
{
if(args->dictionary_given)
{
expand_path(args->dictionary_arg,dictionary);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
else if (args->dictionary_home_given && args->language_given)
{
char buf[255];
expand_path(args->dictionary_home_arg, buf);
sprintf(dictionary,"%s/%s/cor.bin",buf,args->language_arg);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
}

View File

@ -1,19 +0,0 @@
#ifndef __COMMON_COR_H
#define __COMMON_COR_H
#include <stdio.h>
//do wyrzucenia - definicja w Makefile! #define _CMDLINE_FILE "../cor/cmdline.h"
#include "../common/common.h"
#include "cmdline.h"
#define DICT_FILE "cor.bin"
extern int change_count;
extern void process_cor_options(gengetopt_args_info* args);
extern char dictionary[];
#endif

View File

@ -1,142 +0,0 @@
//---------------------------------------------------------------------------
#include "corr.hh"
#define MAXPATH 256
#define min(x,y) ((x<y)?(x):(y))
#define max(x,y) ((x>y)?(x):(y))
int Corr::ed(int i,int j)
{
if(i==-1)
return j+1;
if(j==-1)
return i+1;
if(i==-2 || j==-2)
return n+1;
if(X[i]==Y[j])
return H2[i-1][j-1];
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return 1+min(H2[i-2][j-2],min(H2[i][j-1],H2[i-1][j]));
return 1+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j]));
/*
if(X[i]==Y[j])
return H[(i-1)+2][(j-1)+2];
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return 1+min(H[(i-2)+2][(j-2)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2]));
return 1+min(H[(i-1)+2][(j-1)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2]));
*/
}
int Corr::cuted(int j)
{
int l=max(0,j-t);
int u=min(m,j+t);
int ce=j+t;
for(int k=l;k<=u;k++)
{
if(H2[k][j]<ce)//if(H[(k)+2][(j)+2]<ce)
ce=H2[k][j];//ce=H[(k)+2][(j)+2];
}
return ce;
}
/*
void Corr::recomputeH(int j)
{
for(int i=0;i<=m;i++)
H[(i)+2][(j)+2]=ed(i,j);
}
*/
void Corr::recomputeH(int j)
{
int lo=max(0,j-t-2);
int hi=min(m,j+t+2);
for(int i=lo;i<=hi;++i)
H2[i][j]=ed(i,j);//H[(i)+2][(j)+2]=ed(i,j);
}
int Corr::correct(const char* w, Words& tab)
{
long int path[MAXPATH]={0};
int i; // row index (X)
int j; // column index (Y)
long state=0;
strcpy(X,w);
m=strlen(X)-1;
n=m+t;
for(i=(-2);i<=m;i++)
H[(i)+2][(-2)+2]=n;
for(i=(-1);i<=m;i++)
H[(i)+2][(-1)+2]=(i)+1;
for(j=(-2);j<=n;j++)
H[(-2)+2][(j)+2]=n;
for(j=(-1);j<=n;j++)
H[(-1)+2][(j)+2]=(j)+1;
for(j=0; j<=n; ++j)
for(i=0; i<=m; ++i)
H[i+2][j+2]=t+1;
int more=1;
bool cont=false;
strcpy(Y,"");
j=0;
state=0;
int count=0;
while(more)
{
if(!empty(state))
{
Y[j]=input(state);
recomputeH(j);
if(cuted(j)<=t)
{
int edd;
if(final(next(state)) && (edd=H[(m)+2][(j)+2])<=t)
{
char* out=new char[j+2];
strncpy(out,Y,j+1);
out[j+1]='\0';
// if(cont) putchar(' ');
cont=true;
// printf("%i,%s", edd,out);
// cout << out << "(" << edd << ")" << endl;
tab.add(out);
count++;
}
path[j++]=state;
state=next(state);
continue;
}
else
if(continued(state))
{
state++;
continue;
}
}
//backtracking
do
if(j>0)
j--;
else
more=0;
while(more && !continued(path[j]));
state=path[j]+1;
}
return count;
}
//---------------------------------------------------------------------------

View File

@ -1,34 +0,0 @@
//---------------------------------------------------------------------------
#ifndef _corr_hh
#define _corr_hh
//---------------------------------------------------------------------------
#include "../lib/tfti.h"
#include "../lib/word.h"
class Corr : public TFTiv<char,char>
{
private:
int H[100][100];
char X[100]; // misspelled string
char Y[100]; // (possibly partial) candidate string
int m; // length of X
int n; // maximal length of Y
int ed(int,int);
int cuted(int);
void recomputeH(int);
public:
int (*H2)[100];
int t; // threshold
Corr() : H2((int(*)[100])&H[2][2]) {};
Corr(const char* a) : TFTiv<char,char>(a), H2((int(*)[100])&H[2][2]) { };
int correct(const char* w, Words& tab);
};
//---------------------------------------------------------------------------
#endif

View File

@ -1,155 +0,0 @@
#include <stdlib.h>
#include <ctype.h>
#include "../lib/iotools.h"
//do wyrzucenia - definicja w Makefile! #define _CMDLINE_FILE "../cor/cmdline.h"
#include "../common/common.h"
#include "common_cor.h"
#include "corr.hh"
#include "cmdline.h"
#include <locale.h>
int main(int argc, char** argv) {
// setlocale(LC_CTYPE,"");
// setlocale(LC_COLLATE,"");
gengetopt_args_info args;
if(cmdline_parser(argc, argv, &args) != 0)
exit(1);
process_config_files(&args,argv[0]);
process_common_options(&args,argv[0]);
process_cor_options(&args);
Corr cor;
cor.load(dictionary);
cor.t=args.distance_arg;
char line[MAX_LINE+1];
long line_count = 0;
Segment seg;
Words tab;
char form1[MAX_LINE];
char* form;
int formcasing;
char corfield[MAX_LINE]="";
while (fgets(line, MAX_LINE, inputf))
{
// strcpy(outline,line);
++line_count;
// if(!seg.parse(line))
// {
// fprintf(stderr,"Input error in line %d.\n",line_count);
// exit(1);
// }
char outline[128];
//printf("Starting cor... searching for %d fields\n", args.input_field_given);
//for (int i=0; i<args.input_field_given; ++i) {
// printf("\t%d. %s\n", i, args.input_field_arg[i]);
//}
if (!process_seg(line, args))
fputs(line, outputf);
else
{
char form[MAX_FORM];
tab.clear();
getfield(line,input_field_prefix,form);
if (form==NULL) continue;
formcasing=3;
cor.correct(form, tab);
if( tab.count() == 0 )
{
formcasing=casing(form);
if( formcasing == 1 || formcasing == 2)
tolowers(form, form1), cor.correct(form1, tab);
}
if ( tab.count() == 0)
fputs(line, failedf);
else
{
if(args.replace_flag)
{
char corfield[128];
strcpy(corfield, input_field_prefix);
strcat(corfield, form);
seg.aux[seg.auxn]=corfield;
++seg.auxn;
for(int i=0; i<tab.count(); ++i)
{
seg.form=tab[i].form();
restorecasing(seg.form,seg.form,formcasing);
seg.print(outline);
fputs(outline, outputf);
}
--seg.auxn;
}
else
{
if(one_line)
{
char* p=corfield;
for(int i=0; i<tab.count(); ++i)
{
restorecasing(tab[i].form(),tab[i].form(),formcasing);
p += sprintf(p," %s%s",output_field_prefix,tab[i].form());
}
sprintf(p,"\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
else if(one_field)
{
char* p=corfield;
p += sprintf(p," %s",output_field_prefix);
for(int i=0; i<tab.count(); ++i)
{
restorecasing(tab[i].form(),tab[i].form(),formcasing);
p += sprintf(p,(i==0)?"%s":";%s",tab[i].form());
}
sprintf(p,"\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
else
{
for(int i=0; i<tab.count(); ++i)
{
restorecasing(tab[i].form(),tab[i].form(),formcasing);
sprintf(corfield," %s%s\n",output_field_prefix,tab[i].form());
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
}
}
}
}
if(args.interactive_flag)
{
fflush(outputf);
fflush(failedf);
}
}
cmdline_parser_free(&args);
}

View File

@ -1,56 +0,0 @@
SHELL = /bin/sh
LIB_PATH=../../lib
COMMON_PATH=../common
CMDLINE_FILE='"../dgp/cmdline.h"'
#vpath %.o .
CXXFLAGS = -O2 -static
sources = main.cc grammar.cc symbol.cc mgraph.cc sgraph.cc dgp0.cc cmdline.cc \
$(COMMON_PATH)/common.cc global.cc
bin = dgp
# plik *.o sa umieszczane w podkatalogu o
objs = $(sources:%.cc=%.o)
${bin}: ${objs}
${CXX} ${CXXFLAGS} -D _CMDLINE_FILE=$(CMDLINE_FILE) -o $@ ${objs}
include $(sources:.cc=.d)
%.o: %.cc
${CXX} -D _CMDLINE_FILE=$(CMDLINE_FILE) -c ${CXXFLAGS} -o $@ $<
%.d: %.cc
$(CC) -MM $(CPPFLAGS) -D _CMDLINE_FILE=$(CMDLINE_FILE) $< > $@.$$$$; \
sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
# stare:
# cmdline.cc cmdline.h : cmdline.ggo
# gengetopt --c-extension=cc -i cmdline.ggo
# nowe
cmdline.cc cmdline.h: cmdline.ggo
gengetopt -i cmdline.ggo --c-extension=cc --conf-parser
cmdline.ggo: cmdline_dgp.ggo ../common/cmdline_common.ggo
cat cmdline_dgp.ggo ../common/cmdline_common.ggo > cmdline.ggo
# endnowe
clean:
rm ${bin} ${objs} cmdline.cc cmdline.h
rm -rf *.d
prof: dgp
gprof dgp ~/tmp/dgp-pl/gmon.out > dgp.prof
copy:
ifdef UTT_BIN_DIR
cp dgp dgc canonize tre $(UTT_BIN_DIR)/
endif

View File

@ -1,3 +0,0 @@
gram.dgp: gram.dgc
dgc -c cats.dgc < gram.dgc > gram.dgp

View File

@ -1,50 +0,0 @@
#!/usr/bin/perl
#package: UAM TExt Tools
#component: canonize
#version: 1.0
#author: Tomasz Obrebski
use lib "/usr/local/lib/utt";
use lib "$ENV{'HOME'}/.local/lib/utt";
use strict;
use Getopt::Long;
use attr;
my $help;
GetOptions("help|h" => \$help);
if($help)
{
print <<'END'
Transforms syntactic categories to their canonical form.
Usage: canonize
Options:
--help -h Help.
END
;
exit 0;
}
#$|=1;
my %tra;
while(<>)
{
s/$attr::pos_re\/$attr::avlist_re/trans($&)/ge;
print;
}
sub trans
{
my $cat=shift;
exists($tra{$cat}) ? $tra{$cat} : ( $tra{$cat} = attr::canonize $cat );
}

View File

@ -1,18 +0,0 @@
package "dgp"
version "0.1"
option "grammar" g "Grammar file"
string no typestr="filename"
option "long" l "Long output"
flag off
option "debug" d "Debug mode."
flag off
option "info" - "Print info.
h - heads d - dependents
s - sets
c - constraints n - node/arc counts t - parse time
"
string no default="h"

View File

@ -1,13 +0,0 @@
#ifndef CONST_HH
#define CONST_HH
#define MAXTYPES 32
#define MAXFLAGS 64
#define MAXNODES 1024
#define MAXCONSTRS 32
#define MAXLINE 256
#define MAXFORMLEN 64
#define MAXDESCRLEN 80
#define FIELDSEP " \n\t"
#endif

View File

@ -1,292 +0,0 @@
#!/usr/bin/perl
#package: UAM Text Tools
#component: dgc (dg compiler)
#version: 1.0
#author: Tomasz Obrebski
# wymaga niejawnie programu canonize!!!!
use lib "/usr/local/lib/utt";
use lib "$ENV{'HOME'}/.local/lib/utt";
use strict;
use Getopt::Long;
use Data::Dumper;
use attr;
use File::HomeDir;
my $systemconfigfile='/usr/local/etc/utt/dgc.conf';
my $userconfigfile=home()."/.utt/dgc.conf";
Getopt::Long::Configure('no_ignore_case_always');
my $help=0;
my $catfile=0;
my $dicfile=0;
my $gramfile=0;
my $outputfile=0;
#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
if(open(CONFIG, $file)){
while (<CONFIG>) {
chomp;
s/#.*//;
s/^\s+//;
s/\s+$//;
next unless length;
my ($name, $value) = split(/\s*=\s*/, $_, 2);
if(($name eq "categories")or($name eq "c")){
$catfile=$value;
}
elsif(($name eq "dictionary")or($name eq "d")){
$dicfile=$value;
}
elsif(($name eq "grammar")or($name eq "g")){
$gramfile=$value;
}
elsif(($name eq "outputfile")or($name eq "o")){
$outputfile=$value;
}
elsif(($name eq "help")or($name eq "h")){
$help=1;
}
}
close CONFIG;
}
}
#########################################################
GetOptions("help|h" => \$help,
"categories|c=s" => \$catfile,
"dictionary|d=s" => \$dicfile,
"grammar|g=s" => \$gramfile,
"outputfile|o=s" => \$outputfile);
my $homedir = $ENV{'HOME'};
$catfile =~ s/~/$homedir/;
$dicfile =~ s/~/$homedir/;
$gramfile =~ s/~/$homedir/;
$outputfile =~ s/~/$homedir/;
if($help)
{
print <<'END'
Usage: dgc [OPTIONS]
Options:
--categories -c filename List of syntactic categories.
--dictionary -d filename Dictionary.
--grammar -g filename List of grammar rules.
--outputfile -o filename Output file name.
--help -h Help.
END
;
exit 0;
}
die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;
my $ncat=0;
my $nrole=0;
my $nsgl=0;
my $nleft=0;
my $nright=0;
my $nreq=0;
my $nlink=0;
my $nflag=0;
my %cats;
my %roles;
my %agr;
my %gov;
if(!$outputfile) {
*OUTPUT = *STDOUT;
}
elsif($outputfile eq "-") {
*OUTPUT = *STDOUT;
}
else {
open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!");
}
loadcats($catfile) if $catfile;
extractcats($dicfile) if $dicfile;
my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;
# class parse_class:
# /$attr::cat_re/g;
if(!$gramfile) {
*INPUT = *STDIN;
}
elsif($gramfile eq "-"){
*INPUT = *STDIN;
}
else {
open(INPUT, $gramfile) or die("Unable to open: $gramfile!");
}
while(<INPUT>)
{
s/#.*//;
s/^\s+//;
s/\s+$//;
if(/^AGR\s+(\S+)\s+(\S+)$/)
{
push @{$agr{$1}}, $2;
}
elsif(/^GOV\s+(\S+)\s+(\S+)$/)
{
push @{$gov{$1}}, attr::parse($2);
}
elsif(/^ROLE\s+\S+$/)
{
$roles{$_}=1;
print OUTPUT "$_\n";
}
elsif(/^SGL\s+\S+$/)
{
++$nsgl;
print OUTPUT "$_\n";
}
elsif(/^REQ\s+(\S+)\s+(\S+)$/)
{
print OUTPUT "#$_\n";
my $cat = attr::parse $1;
for my $atomcat (keys %cats)
{
if(attr::match @$cat, @{$cats{$atomcat}})
{
print OUTPUT "REQ ".$atomcat." $2\n";
++$nreq;
}
}
}
elsif(/^LEFT\s+\S+$/)
{
++$nleft;
print OUTPUT "$_\n";
}
elsif(/^RIGHT\s+\S+$/)
{
++$nright;
print OUTPUT "$_\n";
}
elsif(my ($hs,$ds,$r) = /^LINK\s+($cats_re)\s+($cats_re)\s+(\S+)$/)
{
print OUTPUT "#$_\n";
for my $h ($hs =~ /$attr::cat_re/g)
{
for my $d ($ds =~ /$attr::cat_re/g)
{
addlinks($h,$d,$r);
}
}
}
elsif(/^FLAG\s+\S+$/)
{
++$nflag;
print OUTPUT "$_\n"
}
elsif(/^$/) {
# pomijamy puste linie oraz komentarze
}
else
{
print STDERR "Illegal format: $_\n";
}
}
sub addlinks
{
my ($h,$d,$r) = @_;
for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; }
for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; }
my $head = attr::parse $h;
my $dep = attr::parse $d;
for my $atomhead (keys %cats)
{
if(attr::match @$head, @{$cats{$atomhead}})
{
DEP:
for my $atomdep (keys %cats)
{
next DEP if ! attr::match @$dep, @{$cats{$atomdep}};
for my $a (@{$agr{$r}})
{
next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a);
}
for my $c (@{$gov{$r}})
{
next DEP if ! attr::match(@$c,@{$cats{$atomdep}});
}
print OUTPUT "LINK ";
print OUTPUT $atomhead." ";
print OUTPUT $atomdep." $r\n";
++$nlink;
}
}
}
}
printf STDERR "%6d CAT statements\n", 0+keys(%cats);
printf STDERR "%6d ROLE statements\n", 0+keys(%roles);
printf STDERR "%6d SGL statements\n", $nsgl;
printf STDERR "%6d REQ statements\n", $nreq;
printf STDERR "%6d LEFT statements\n", $nleft;
printf STDERR "%6d RIGHT statements\n", $nright;
printf STDERR "%6d LINK statements\n", $nlink;
printf STDERR "%6d FLAG statements\n", $nflag;
sub extractcats
{
my $file = shift;
open DICFILE, "canonize $file |";
while(<DICFILE>)
{
while(/,([^[:space:];]+)/g)
{
my $cat=$1;
next if !$cat || exists $cats{$cat};
$ncat++;
print OUTPUT "CAT $1\n";
$cats{$cat}=attr::parse($cat);
}
}
close DICFILE;
}
sub loadcats
{
my $file = shift;
open CATFILE, "canonize $file |";
while(<CATFILE>)
{
tr/ \t\n//d;
next if !$_ || exists $cats{$_};
print OUTPUT "CAT $_\n";
++$ncat;
$cats{$_}=attr::parse($_);
}
close CATFILE;
}

View File

@ -1,217 +0,0 @@
#include "dgp0.hh"
#include "global.hh"
extern Grammar grammar;
extern MGraph mgraph;
extern SGraph sgraph;
SNode* snodes;
extern bool debug;
list<int> nodelist;
list<int>::iterator processed;
void set_initial_constraints(int node)
{
snodes[node].prop.forbidden.reset();
snodes[node].prop.required=grammar.obl[snodes[node].mnode->cat];
}
bool changing_constraints(int head, Role role)
{
return grammar.sgl[role] || snodes[head].prop.required[role];
}
void apply_constraints(int head, Role role)
{
if(grammar.sgl[role]) snodes[head].prop.forbidden.set(role);
snodes[head].prop.required.reset(role);
}
NodeProp compute_prop_left(NodeProp headprop, Role role)
{
NodeProp ret=headprop;
if(grammar.sgl[role]) ret.forbidden.set(role);
ret.required.reset(role);
return ret;
}
NodeProp compute_prop_right(NodeProp headprop, Role role)
{
NodeProp ret=headprop;
if(grammar.sgl[role]) ret.forbidden.set(role);
ret.required.reset(role);
return ret;
}
int get_node(MNode& mnode, NodeProp p, bitset<MAXNODES>& newheadLH, bitset<MAXNODES>& newheadLV)
{
for(vector<int>::iterator ps=mnode.snodes.begin(); ps!=mnode.snodes.end(); ++ps)
if(snodes[*ps].prop==p && snodes[*ps].LH==newheadLH && snodes[*ps].LV==newheadLV)
return *ps;
return -1;
}
void connect_left(list<int>::iterator h, list<int>::iterator d, Role r)
{
NodeProp &oldheadprop = snodes[*h].prop;
NodeProp newheadprop;
bitset<MAXNODES> newheadLV;
bitset<MAXNODES> newheadLH;
bitset<MAXNODES> newheadLD;
newheadprop=compute_prop_left(oldheadprop,r);
int newheadind;
if(oldheadprop==newheadprop)
newheadind = *h;
else
{
newheadLH = snodes[*h].LH;
newheadLV = snodes[*d].LV;
newheadLD = snodes[*h].LD;
newheadind = get_node(*(snodes[*h].mnode), newheadprop, newheadLH, newheadLV);
if( newheadind < 0 )
{
newheadind = sgraph.clone(*h,newheadprop);
list<int>::iterator nextit=h; ++nextit;
nodelist.insert(nextit,newheadind);
snodes[newheadind].LH=newheadLH;
snodes[newheadind].in_LH=true;
snodes[newheadind].LV.reset();
snodes[newheadind].LD = newheadLD;
if(debug) sgraph.print_node_debug(stderr," C ",newheadind);
}
else
snodes[newheadind].LD |= newheadLD; // TYLKO DLA LD
}
snodes[newheadind].deps.push_back(Arc(*d,r,*h));
if(snodes[*d].saturated()) snodes[newheadind].LV |= snodes[*d].LV;
snodes[newheadind].LD.set(*d);
if(snodes[*d].saturated()) snodes[newheadind].LD |= snodes[*d].LD;
if(debug)
sgraph.print_arc(stderr,*d,newheadind,r,0), sgraph.print_node_debug(stderr," U ",newheadind);
}
void connect_right(list<int>::iterator h, list<int>::iterator d, Role r)
{
NodeProp &oldheadprop = snodes[*h].prop;
NodeProp newheadprop;
bitset<MAXNODES> newheadLV;
bitset<MAXNODES> newheadLH;
bitset<MAXNODES> newheadLD;
int newheadind;
newheadprop = compute_prop_right(oldheadprop,r);
if(oldheadprop==newheadprop)
newheadind = *h;
else
{
newheadLH = snodes[*h].LH;
newheadLV = snodes[*h].LV;
newheadLD = snodes[*h].LD;
newheadind = get_node(*(snodes[*h].mnode), newheadprop, newheadLH, newheadLV);
if( newheadind < 0 )
{
newheadind = sgraph.clone(*h,newheadprop);
snodes[newheadind].LH=newheadLH;
snodes[newheadind].in_LH=false;
snodes[newheadind].LV=newheadLV;
snodes[newheadind].LD=newheadLD;
list<int>::iterator nextit=h; ++nextit;
nodelist.insert(nextit,newheadind);
if(debug) sgraph.print_node_debug(stderr," C ",newheadind);
}
else
snodes[newheadind].LD |= newheadLD; // TYLKO DLA LD
}
snodes[*d].heads.push_back(Arc(newheadind,r,*h));
snodes[*d].LH.set(newheadind);
if(snodes[newheadind].saturated()) snodes[*d].LH |= snodes[newheadind].LH;
if(debug)
sgraph.print_arc(stderr,newheadind,*d,r,1), sgraph.print_node_debug(stderr," U ",*d);
}
void try_connect_dependents(list<int>::iterator j)
{
for(list<int>::iterator i(j); i!=nodelist.begin(); --i)
if(sgraph.visible(*i,*j) && sgraph.saturated(*i))
{
Roles& ji_roles = grammar.connect[snodes[*j].mnode->cat][snodes[*i].mnode->cat];
for(RolesIter r=ji_roles.begin(); r!=ji_roles.end();++r)
if(grammar.check_constr(snodes[*j].prop,snodes[*i].prop,0,*r))
connect_left(j,i,*r);
}
}
void try_connect_heads(list<int>::iterator j)
{
for(list<int>::iterator i(j); i!=nodelist.begin(); --i)
if(sgraph.visible(*i,*j))
{
Roles& ij_roles = grammar.connect[snodes[*i].mnode->cat][snodes[*j].mnode->cat];
for(RolesIter r=ij_roles.begin(); r!=ij_roles.end();++r)
if(grammar.check_constr(snodes[*i].prop,snodes[*j].prop,1,*r))
connect_right(i,j,*r);
}
}
void reverse_links()
{
list<int>::iterator i = nodelist.begin();
for(++i; i!=nodelist.end(); ++i)
{
for(vector<Arc>::iterator da=sgraph.nodes[*i].deps.begin()--; da!=sgraph.nodes[*i].deps.end(); ++da)
sgraph.nodes[da->dst].heads.push_back(Arc(*i,da->role,da->anc));
for(vector<Arc>::iterator ha=sgraph.nodes[*i].heads.begin(); ha!=sgraph.nodes[*i].heads.end(); ++ha)
sgraph.nodes[ha->dst].deps.push_back(Arc(*i,ha->role,ha->anc));
}
}
void dgp0()
{
snodes=sgraph.nodes;
nodelist.clear();
nodelist.push_back(0); // BOS
processed=nodelist.begin();
for(int m=0; m<mgraph.n ; ++m)
{
int basenode = sgraph.add_base_snode(mgraph.nodes+m); // ma zwracaæ SNode*
set_initial_constraints(basenode);
nodelist.push_back(basenode);
if(debug) {sgraph.print_node_debug(stderr,"B ",basenode);} // STDOUT!!!
list<int>::iterator cursor=processed;
while(++cursor != nodelist.end())
{
try_connect_dependents(cursor);
try_connect_heads(cursor);
processed=cursor;
}
}
reverse_links();
}

View File

@ -1,12 +0,0 @@
#ifndef _DGP0_HH
#define _DGP0_HH
#include "grammar.hh"
#include "sgraph.hh"
#include "mgraph.hh"
// API
void dgp0();
#endif

View File

@ -1,5 +0,0 @@
#include "global.hh"
bool debug = false;

View File

@ -1 +0,0 @@
extern bool debug;

View File

@ -1,13 +0,0 @@
if test -f Makefile.go;
then
make -f Makefile.go gram.dgp;
tok |\
lem -p W |\
canonize |\
sen |\
gph -p W -p BOS -p EOS -r BOS |\
dgp -i ds -p W -p BOS -p EOS -g gram.dgp
else
echo "Invalid configuration! Run utt_make_config.pl first."
fi

View File

@ -1,181 +0,0 @@
#include <stdio.h>
#include "grammar.hh"
bool (*constraint[MAXCONSTRS])(int head, int dep);
int chk_type(const char* s, int lineno) // SIDE EFECTS!
{
if(Role::index(s)>0) return 1;
fprintf(stderr,"%8d: Invalid type '%s'. Line ignored.\n",lineno,s);
return 0;
}
int chk_cat(const char* s, int lineno)
{
if(Cat::index(s)>0) return 1;
fprintf(stderr,"%8d: Invalid category '%s'. Line ignored.\n",lineno,s);
return 0;
}
void Grammar::add_category(const char* s)
{
Cat::add(s);
if(Cat::count()>cats_sz)
{
cats_sz += 16;
connect.resize(cats_sz);
for(int i=0; i<cats_sz; ++i)
connect[i].resize(cats_sz);
obl.resize(cats_sz);
}
}
void Grammar::add_type(const char* s)
{
Role::add(s);
if(Role::count()>types_sz)
{
types_sz += 16;
lt.resize(types_sz);
gt.resize(types_sz);
}
}
void Grammar::add_flag(const char* s)
{
Flag::add(s);
if(Flag::count()>flags_sz)
{
flags_sz += 16;
pass.resize(flags_sz);
}
}
void Grammar::set_lt(Role s, Role t)
{
lt[s].set(t);
gt[t].set(s);
if(s==0||(int)t==0)
return;
else
{
for(int i=0; i<Role::count(); ++i)
if(lt[i][s])
set_lt(i,t);
for(int i=0; i<Role::count(); ++i)
if(lt[t][i])
set_lt(s,i);
}
}
void Grammar::compute_gt()
{
for(Role s=0; s<Role::count(); ++s)
for(Role t=0; t<Role::count(); ++t)
if(lt[s][t])
gt[t].set(s);
}
bool Grammar::read(FILE* f)
{
int lineno=0;
char line[MAXLINE]; // line has the structure: key [arg1 [arg2 [arg3]]]
char key[MAXLINE];
char arg1[MAXLINE];
char arg2[MAXLINE];
char arg3[MAXLINE];
while(fgets(line,MAXLINE,f))
{
lineno++;
int fields=sscanf(line,"%s %s %s %s",key,arg1,arg2,arg3);
if(fields<1 || key[0]=='#') continue; // skip empty lines and comments
if (strcmp(key,"CAT")==0 && fields>=2)
{
add_category(arg1);
}
else if(strcmp(key,"ROLE")==0 && fields>=2)
{
add_type(arg1);
}
else if(strcmp(key,"SGL")==0 && fields>=2)
{
if(chk_type(arg1,lineno))
set_sgl(arg1);
}
else if(strcmp(key,"LEFT")==0 && fields>=2)
{
if(chk_type(arg1,lineno))
set_left(arg1);
}
else if(strcmp(key,"RIGHT")==0 && fields>=2)
{
if(chk_type(arg1,lineno))
set_right(arg1);
}
else if(strcmp(key,"REQ")==0 && fields>=3)
{
if(chk_cat(arg1,lineno) + chk_type(arg2,lineno) == 2)
set_obl(arg1,arg2);
}
else if(strcmp(key,"LINK")==0 && fields>=4)
{
if(chk_cat(arg1,lineno) + chk_cat(arg2,lineno) + chk_type(arg3,lineno) == 3)
set_connect(arg1,arg2,arg3);
}
// FLAG DECLARATION
else if(strcmp(key,"FLAG")==0 && fields>=2)
{
add_flag(arg1);
}
else fprintf(stderr,"Invalid line %d. Ignored.\n", lineno);
}
// compute_gt();
return true;
}
void Grammar::write(FILE* f)
{
for(Cat i=1; i<Cat::count(); ++i)
fprintf(f,"CAT\t%s\n",i.str());
for(Role i=1; i<Role::count(); ++i)
fprintf(f,"ROLE\t%s\n",i.str());
for(Role i=1; i<Role::count(); ++i)
if(sgl.test(i)) fprintf(f,"SGL\t%s\n",i.str());
for(Role i=1; i<Role::count(); ++i)
if(left.test(i)) fprintf(f,"LEFT\t%s\n",i.str());
for(Role i=1; i<Role::count(); ++i)
if(right.test(i)) fprintf(f,"RIGHT\t%s\n",i.str());
for(Cat c=1; c<Cat::count(); ++c)
for(Role r=1; r<Role::count(); ++r)
if(obl[c].test(r)) fprintf(f,"REQ\t%s\t%s\n",c.str(),r.str());
for(Cat c=1; c<Cat::count(); ++c)
for(Cat d=1; d<Cat::count(); ++d)
for(Role t=1; t<Role::count(); ++t)
if(connect[c][d].count(t))
fprintf(f,"LINK\t%s\t%s\t%s\n",c.str(),d.str(),t.str());
for(Flag i=1; i<Flag::count(); ++i)
fprintf(f,"FLAG\t%s\n",i.str());
}

View File

@ -1,81 +0,0 @@
#ifndef _GRAMMAR_HH
#define _GRAMMAR_HH
#include <bitset>
#include <vector>
#include <list>
#include <set>
#include "const.hh"
#include "thesymbols.hh"
#include "sgraph.hh"
using namespace std;
class Link
{
Role role;
FlagSet hflags;
FlagSet dflags;
};
class Grammar
{
public:
// enum CONSTR { SGL, OBL, LEFT, RIGHT, INIT, NONINIT, FIN, NONFIN };
Grammar() : types_sz(0), cats_sz(0), flags_sz(0) {} ;
int types_sz;
int cats_sz;
int flags_sz;
vector< vector< Roles > > connect;
RoleSet sgl;
vector< RoleSet > obl;
RoleSet left;
RoleSet right;
vector< RoleSet > lt;
vector< RoleSet > gt;
// vector< vector< vector<
vector< FlagSet > set;
vector< FlagSet > pass;
bool read(FILE* f);
void write(FILE* f);
void add_category(const char* s);
void add_type(const char* s);
void add_flag(const char* s);
void set_sgl(Role r) { sgl.set(r); }
void set_obl(Cat c, Role r) { obl[c].set(r); }
void set_left(Role r) { left.set(r); }
void set_right(Role r) { right.set(r); }
void set_order(Role r, Role s) { lt[s].set(r); }
void set_connect(Cat c, Cat d, Role r) { connect[c][d].insert(r); }
void set_lt(Role r, Role s);
void compute_gt();
bool check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role);
};
inline bool Grammar::check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role)
{
return
!hprop.forbidden[role] &&
( !right[role] || dir==1 ) &&
( !left[role] || dir==0 )
;
}
#endif

View File

@ -1,121 +0,0 @@
/**
* Package: UAM Text Tools
* Component: dgp (dg parser)
* Version: 1.0
* Author: Tomasz Obrebski
*/
#include "global.hh"
#include "mgraph.hh"
#include "sgraph.hh"
#include "grammar.hh"
#include "dgp0.hh"
#include "../common/common.h"
#include "cmdline.h"
#define MAXSEGMENTS 500
char segment[MAXSEGMENTS][MAXLINE];
int segcount=0;
char seg_mnode[MAXSEGMENTS];
char grammarfile[255];
Grammar grammar;
MGraph mgraph;
SGraph sgraph;
FILE* grammarf;
FILE* debugf=stdout;
unsigned int info=0U;
void output();
main(int argc, char* argv[])
{
gengetopt_args_info args;
if(cmdline_parser(argc,argv,&args) != 0)
exit(1);
process_config_files(&args,argv[0]);
process_common_options(&args,argv[0]);
if(!args.grammar_given)
fprintf(stderr,"dgp: no grammar given\n");
expand_path(args.grammar_arg,grammarfile);
if(!(grammarf=fopen(grammarfile,"r")))
fprintf(stderr,"dgp: grammar file not found: %s.\n", grammarfile), exit(1);
if(args.debug_given) debug=true;
for(char* c=args.info_arg; *c!='\0' ; ++c)
switch(*c)
{
case 'h': info|=SGraph::HEADS; break;
case 'd': info|=SGraph::DEPS; break;
case 's': info|=SGraph::SETS; break;
case 'c': info|=SGraph::CONSTRAINTS; break;
}
grammar.read(grammarf);
fclose(grammarf);
mgraph.clear();
sgraph.clear();
char line[1000];
while (fgets(line, MAXLINE+1, inputf))
{
line[strlen(line)-1] = '\0';
strcpy(segment[segcount],line);
char segtype[80];
seg_mnode[segcount] = process_seg(line, args) ? mgraph.add_node(line) : -1;
segcount++;
getfield(line,"3",segtype);
if(strcmp(segtype,"EOS")==0)
{
dgp0(); // parametry!!! MGraph, SGraph, Grammar
output();
mgraph.clear();
sgraph.clear();
segcount=0;
}
// if(args.interactive_flag) { fflush(outputf); fflush(failedf); }
}
fclose(inputf);
fclose(outputf);
cmdline_parser_free(&args);
exit(0);
}
void output()
{
for(int si=0; si<segcount; ++si)
{
if(seg_mnode[si]>=0)
{
MNode& m=mgraph.nodes[seg_mnode[si]];
for(vector<int>::iterator s=m.snodes.begin(); s!=m.snodes.end(); ++s)
{
fputs(segment[si],outputf);
sgraph.print_node(outputf, *s, info);
fputc('\n',outputf);
}
}
else
{
fputs(segment[si],outputf);
fputc('\n',outputf);
}
}
}

View File

@ -1,54 +0,0 @@
#include "mgraph.hh"
#include "thesymbols.hh"
#include "const.hh"
#include <stdio.h>
int MGraph::add_node(char* seg)
{
nodes[n].clear();
char field1[80], field3[80], descr[256], gph[256];
char* cat;
getfield(seg,"1",field1);
nodes[n].pos=atoi(field1);
getfield(seg,"3",field3);
if(!getfield(seg,"lem",descr)) strcpy(descr,"?,?");
cat=descr;
while(*cat!=',' && *cat ) ++cat;
if(*cat) ++cat;
// Cat::add(cat);
if(Cat::index(cat)>0)
nodes[n].cat=cat;
else
nodes[n].cat="NULL";
nodes[n].pred.clear();
char* tok;
int previd;
if(!getfield(seg,"gph",gph))
{
fprintf(stderr,"No gph field. Aborting (sorry).\n");
exit(1);
}
char* ids=strtok(gph,":");
if(n!=atoi(ids)){fprintf(stderr,"Invalid node id in line ?. Program aborted.\n"); exit(1); }
char *preds;
while(preds=strtok(NULL,","))
{
previd=atoi(preds);
nodes[n].pred.push_back(&nodes[previd]);
}
return n++;
}

View File

@ -1,37 +0,0 @@
#ifndef _MGRAPH_HH
#define _MGRAPH_HH
#include <vector>
#include "const.hh"
#include "thesymbols.hh"
#include "../common/common.h"
using namespace std;
class MNode
{
public:
char type[MAXFORMLEN];
Cat cat;
int pos;
vector<MNode*> pred;
vector<int> snodes;
void clear() { snodes.clear(); };
};
class MGraph
{
public:
MNode nodes[MAXNODES];
int n;
void clear() { n=0; };
int add_node(char* seg);
};
#endif

View File

@ -1,165 +0,0 @@
#include "global.hh"
#include "sgraph.hh"
#include "mgraph.hh"
#include "grammar.hh"
#include "const.hh"
#include <stdio.h>
int SGraph::add_base_snode(MNode* mn)
{
int nodeind=n;
SNode &node=nodes[n];
node.clear();
node.mnode=mn;
for(vector<MNode*>::iterator pm=node.mnode->pred.begin(); pm!=node.mnode->pred.end(); ++pm)
for(vector<int>::iterator ps=(*pm)->snodes.begin(); ps!=(*pm)->snodes.end(); ++ps)
if(nodes[*ps].in_LH)
{
node.LV.set(*ps);
if(nodes[*ps].saturated()) node.LV |= nodes[*ps].LH;
}
mn->snodes.push_back(nodeind);
++n;
node.in_LH=true;
return nodeind;
}
void SGraph::update_left(int headind, int depind)
{
SNode &head=nodes[headind], &dep=nodes[depind];
if(dep.saturated()) head.LV |= dep.LV, head.LD |= dep.LD;
}
void SGraph::update_right(int headind, int depind)
{
SNode &head=nodes[headind], &dep=nodes[depind];
dep.LH.set(headind);
if(head.saturated())
dep.LH |= head.LH;
}
int SGraph::clone(int ancind, NodeProp newprop)
{
int newind = n++;
SNode &newnode=nodes[newind];
SNode &ancnode = nodes[ancind];
newnode.clear();
newnode.prop=newprop;
newnode.mnode=ancnode.mnode;
newnode.mnode->snodes.push_back(newind);
return newind;
}
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
int SGraph::print_node(FILE* f, int n, unsigned int info)
{
char buf[1000];
sprint_node(buf,n,info);
fputs(buf,f);
}
int SGraph::sprint_node(char* buf, int nodeind, unsigned int info)
{
char* buf0=buf;
char descr[256];
char nodeinfo[16];
SNode &node=nodes[nodeind];
buf+=sprintf(buf," dgp:%d",nodeind);
buf+=sprintf(buf, saturated(nodeind) ? ";s" : ";u");
bool cont=false;
if (info&HEADS)
{
buf+=sprintf(buf,";");
for(vector<Arc>::iterator h=node.heads.begin(); h!=node.heads.end(); ++h)
{
if(cont) buf+=sprintf(buf,","); else cont=true;
buf+=sprintf(buf,"++%s-%d/%d",h->role.str(),h->dst,h->anc);
}
}
if (info&DEPS)
{
buf+=sprintf(buf,";");
for(vector<Arc>::iterator d=node.deps.begin(); d!=node.deps.end(); ++d)
{
// if(! nodes[d->dst].saturated()) continue; // NIE DRUKUJ NIENASYCONYCH PODRZEDNIKOW
if(cont) buf+=sprintf(buf,","); else cont=true;
buf+=sprintf(buf,"--%s-%d/%d",d->role.str(),d->dst,d->anc);
}
}
if (info&SETS)
{
int ord=0;
buf+=sprintf(buf,";{");
for(vector<MNode*>::iterator pm=node.mnode->pred.begin(); pm!=node.mnode->pred.end(); ++pm)
for(vector<int>::iterator ps=(*pm)->snodes.begin(); ps!=(*pm)->snodes.end(); ++ps)
buf+=sprintf(buf, ord++ ? ",%d" : "%d", *ps);
buf+=sprintf(buf,"};{");
ord=0;for(int j=0; j<=n; ++j) if(node.LV[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j);
buf+=sprintf(buf,"};{");
ord=0;for(int j=0; j<=n; ++j) if(node.LH[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j);
buf+=sprintf(buf,"};{");
ord=0;for(int j=0; j<=n; ++j) if(node.LD[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j);
buf+=sprintf(buf,"}");
}
if (info&CONSTRAINTS)// buf+=sprint_node_constraints(buf,n);
{
buf+=sprintf(buf,";");
int cont=0;
for(Role i=1; i<=Role::count(); ++i)
if(node.prop.forbidden[i]) buf+=sprintf(buf,"%s!%s",(cont++)?",":"",i.str());
for(Role i=1; i<=Role::count(); ++i)
if(node.prop.required[i]) buf+=sprintf(buf,"%s&%s",(cont++)?",":"",i.str());
}
// buf+=sprintf(buf,"\n");
return buf-buf0;
}
int SGraph::sprint_node_debug(char* buf, const char* pref, int n)
{
char *buf0 = buf;
buf+=sprintf(buf,"#%s",pref);
buf+=sprint_node(buf,n,HEADS|DEPS|SETS|CONSTRAINTS);
buf+=sprintf(buf,"\n");
return buf-buf0;
}
int SGraph::print_node_debug(FILE* f, const char* pref, int n)
{
char buf[1000];
sprint_node_debug(buf,pref,n);
fputs(buf,f);
}
void SGraph::print_arc(FILE* f, int left, int right, Role role, int dir) // 0 - left, 1 - right
{
fprintf(f,"# %s:%s.%02d %s %s.%02d\n",
role.str(),nodes[left].mnode->type,left,
dir ? "-->" : "<--",
nodes[right].mnode->type,right);
}

View File

@ -1,110 +0,0 @@
#ifndef _SGRAPH_HH
#define _SGRAPH_HH
#include <stdio.h>
#include <list>
#include <vector>
#include <bitset>
#include "const.hh"
#include "thesymbols.hh"
using namespace std;
class MNode;
struct Arc
{
int dst;
Role role;
int anc;
Arc(int d, Role r, int a) : dst(d), role(r), anc(a) {};
};
struct NodeProp
{
bitset<MAXTYPES> required;
bitset<MAXTYPES> forbidden;
bool operator==(const NodeProp& p)
{ return required==p.required && forbidden==p.forbidden; }
void clear()
{ required.reset(), forbidden.reset(); }
};
struct SNode
{
MNode* mnode;
NodeProp prop;
bitset<MAXNODES> LV;
bitset<MAXNODES> LH;
bitset<MAXNODES> LD;
bool in_LH;
vector<Arc> heads;
vector<Arc> deps;
void clear() { prop.clear(), LV.reset(), LD.reset(), LH.reset(), heads.clear(), deps.clear(); }
bool saturated() { return prop.required.none(); }
};
class SGraph
{
public:
SNode nodes[MAXNODES];
int n; // number of nodes
enum Output { HEADS=1, DEPS=2, SETS=4, CONSTRAINTS=8 };
SGraph() : n(0) {}
void clear() { n=0; }
int add_base_snode(MNode* mn);
int clone(int ancind, NodeProp newprop);
void update_left(int headind, int depind);
void update_right(int headind, int depind);
bool visible(int left, int right);
bool saturated(int node);
//--------------------------------------------------------------------
void read(FILE* f);
void write(FILE* f, list<int> nodelist, unsigned int info);
int sprint_node(char* buf, int n, unsigned int info);
int print_node(FILE* f, int n, unsigned int info);
int sprint_node_debug(char* buf, const char* pref, int n);
int print_node_debug(FILE* f, const char* pref, int n);
void print_arc(FILE* f, int left, int right, Role role, int dir); // 0 - left, 1 - right
};
inline bool SGraph::visible(int left, int right)
{
return nodes[right].LV[left];
}
inline bool SGraph::saturated(int node)
{
return nodes[node].saturated();
}
#endif

View File

@ -1,39 +0,0 @@
#include "symbol.hh"
// CLASS symbols
//int Symbols::_no_of_spaces=0;
Symbols::~Symbols()
{
while(!table.empty())
{
free((void*)table.back());
table.pop_back();
}
}
void Symbols::load(const char* filename)
{
ifstream f(filename);
char s[100];
while(f)
{
f >> s >> ws;
if(strlen(s)) add(s);
}
}
void Symbols::add(const char* sym)
{
if(hash.count(sym)==0)
{
char* symdup=strdup(sym);
hash[symdup]=table.size();
table.push_back(symdup);
}
}
//template<int space>
//Symbols Symbol<space>::defs;

View File

@ -1,143 +0,0 @@
#ifndef _SYMBOL_HH
#define _SYMBOL_HH
#include <ext/hash_map>
//#include <ext/hash_fun.h>
#include <string>
#include <string.h>
#include <fstream>
#include <vector>
#include <iostream>
using namespace std;
using __gnu_cxx::hash_map;
using __gnu_cxx::hash;
// Key comparison for the cstr_hash hash table
struct eqstr
{
bool operator()(const char * s, const char* t) const
{ return strcmp(s,t)==0; }
};
// Hash table for storing symbols
typedef hash_map<const char*,int,hash<const char*>,eqstr> cstr_hash;
// Symbol table. Provides access to symbols through their index or name.
class Symbols
{
public:
Symbols() { add("NULL"); };
~Symbols();
void load(const char* filename);
int operator[](const char* s) { return hash[s]; };
const char* operator[](int i) { return table[i]; };
void add(const char* c);
int count() { return table.size(); };
private:
std::vector<const char*> table;
cstr_hash hash;
};
//////////////////////////////////////////////////////////////////////
/// Symbol class template.
/** The template argument determines the symbol space.
Each space is created with symbol "NULL" with indexed 0 already in.
*/
template <int space>
class Symbol
{
public:
/// Load the contents of the symbol table from file.
static void define(const char *filename)
{ defs.load(filename); }
/// Add symbol s.
/** The string is duplicated.
*/
static Symbol<space> add(const char* s) { defs.add(s); }
/// Number of symbols.
static int count() { return defs.count(); };
/// First symbol.
static int first() { return 1; }
/// Last symbol.
static int last() { return defs.count()+1; }
/// Last symbol.
static int index(const char* s) { return defs[s]; }
/// Just for tests.
static void print();
/// 0-argument constructor, default value is 0 ("NULL").
Symbol() : val(0) {};
/// Constructing a symbol from its index.
/** No check is performed.
*/
Symbol(int v) : val(v) {};
/// Constructing a symbol from its name (string to Symbol conversion).
/** If s is not a symbol name, the value of 0 ("NULL") is assigned.
*/
Symbol(const char * s) : val(defs[s]) {};
/// Symbol to char* conversion. If symbol is invalid, NULL is returned.
const char* str() const { return (val>=0 && val<count())?defs[val]:NULL; };
/// Symbol to int& conversion.
/** Provides a way to iterate through symbols, eg:
* for(Symbol<0> s=1; s; s++ ) ...
s=0; while(++s) ...
*/
(operator int)() const { return val; };
Symbol operator++() {val++; return *this;}
// bool operator<(Symbol& s) { return val < s.val; }
private:
static Symbols defs;
int val;
};
template <int space>
void Symbol<space>::print()
{
for(Symbol i=0; i<count(); ++i)
cout << (int)i << ": " << (const char*)i << endl;
}
template<int space>
Symbols Symbol<space>::defs;
template<int space>
bool operator<(const Symbol<space>& s, const Symbol<space>& t)
{
return (int)s < (int)t;
}
#endif

View File

@ -1,32 +0,0 @@
#ifndef __THESYMBOLS__HH
#define __THESYMBOLS__HH
#include "symbol.hh"
#include "const.hh"
#include <list>
#include <set>
#include <bitset>
using namespace std;
typedef Symbol<1> Cat;
typedef Symbol<2> Role;
typedef list<Role> RoleList;
typedef list<Role>::iterator RoleListIter;
typedef bitset<MAXTYPES> RoleSet;
typedef set<Role> Roles;
typedef Roles::iterator RolesIter;
typedef Symbol<3> Constr;
typedef list<Constr> ConstrList;
typedef list<Constr>::iterator ConstrListIter;
typedef Symbol<4> Rel;
typedef Symbol<5> Flag;
typedef bitset<MAXFLAGS> FlagSet;
#endif

View File

@ -1,304 +0,0 @@
#!/usr/bin/ruby -I /usr/local/lib/utt -I $HOME/.local/lib/utt
$: << "#{ENV['HOME']}/.local/lib/utt"
$: << "/usr/local/lib/utt"
require 'getoptlong'
require 'seg.rb'
opts = GetoptLong.new(
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
[ '--format', '-F', GetoptLong::REQUIRED_ARGUMENT ],
[ '--info', '-I', GetoptLong::REQUIRED_ARGUMENT ],
[ '--only-trees','-t', GetoptLong::NO_ARGUMENT ])
$helptext=
"The program generates trees from the graph output by dgp. dgp must\n"+
"must be run with '-i ds' option.\n\n"+
"Command: tre [options]\n\n"+
"Options:\n"+
"--help -h Print help (this text) and exit.\n"+
"--debug -d Verbose output. For developers only.\n"+
"--format=s -F s Output format. Recognized values:\n"+
" a root + list of arcs\n"+
" p parenthesized notation\n"+
" h human readable indented tree format\n"+
" Multiple values are allowed. (default p)\n"+
"--info=s -I s Information printed. Recognized values:\n"+
" n node identifier\n"+
" f surface form\n"+
" m morphological information\n"+
" l arc labels\n"+
"--only-trees -t Do not copy input. Print trees only.\n"
$DEBUG=false
$FORMAT='p'
$INFO='DEFAULT'
$ONLYTREES=false
opts.each do |opt, arg|
case opt
when '--help'
print $helptext
exit 0
when '--debug'
$DEBUG=true
when '--format'
$FORMAT=arg
when '--info'
$INFO=arg
when '--only-trees'
$ONLYTREES=true
else
print "Unknown option #{opt}. Ignored.\n"
end
end
if $INFO=='DEFAULT'
case $FORMAT
when 'p','a'
$INFO='nl'
when 'h'
$INFO='fmnl'
end
end
$dgpsep=';'
def tre(input)
$gphid=[]
$form=[]
$lem=[]
nodes=[]
count=0
seg=Seg.new
for line in input
print line unless $ONLYTREES
seg.set(line)
if dgp=seg['dgp']
if nodes==[] && seg[3]!='BOS'
print "A sentence must start with BOS segment. Aborting.\n"
return
end
id=dgp[/^\d+/].to_i
if gph=seg['gph']
$gphid[id]=gph[/^\d+/].to_i
else
print "No gph field. Aborting.\n"
return
end
$form[$gphid[id]]=seg[4]
$lem[$gphid[id]]=seg['lem']
nodes[id] = [seg[1].to_i,dgp]
if seg[3]=='EOS'
$pref = "#{seg[1]} #{seg[2]} SYN *"
parsegraph(nodes)
printgraph if $DEBUG
$thetrees=[]
gentrees2
for t in $thetrees
count += 1
t1=ground(t)
case $FORMAT
when /a/
print "#{$pref} tre:#{count} arc:"
printarcs(t1[0],t1[1])
print "\n"
when /p/
print "#{$pref} tre:#{count} par:"
printpar(t1[0],t1[1])
print "\n"
when /h/
print "#\n# tree #{count}\n# ------\n"
printtree(t1[0],t1[1],0)
end
end
nodes=[]
end
end
end
end
def nodeinfo(id)
info=""
if $INFO =~ /n/
info += id.to_s
info += '.' if $INFO =~ /[fm]/
end
if $INFO =~ /f/
info += $form[id]
info += ';' if $INFO =~ /m/
end
if $INFO =~ /m/
info += $lem[id]
end
info
end
def printarcs(root,arcs)
print nodeinfo(root)
for a in arcs
print ';'
print "#{a[2]}:" if $INFO =~ /l/
print nodeinfo(a[0])+'-'+nodeinfo(a[1])
end
end
def printtree(root,arcs,o)
if o==0
print "# %-16s" % "root: "
end
print nodeinfo(root),"\n"
for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
print '# '," "*(o+1)
print "%-16s" % (arc[2]+": ")
printtree(arc[1],arcs,o+1)
end
end
def printpar(root,arcs)
print nodeinfo(root)
deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
unless deps == []
print '('
cont=false
for arc in deps
if cont then print ',' else cont=true end
print arc[2],':' if $INFO =~ /l/
printpar(arc[1],arcs)
end
print ')'
end
end
def parsegraph(nodes)
$n =nodes.length
$sat =[];
$vis =[];
$succ=[];
$lhs =[];
$arcs=[];
$pos=[]
for dgp in nodes
parts = dgp[1].split($dgpsep,6)
if parts[3]==nil || parts[4]==nil || parts[5]==nil
$stderr.print "ERR: tre requires dgp be called with '--info s' option. Aborting.\n"
exit
end
i = parts[0].to_i
$pos[i] = dgp[0].to_i
$sat << i if parts[1]=="s"
$arcs |= parts[2].split(',').map{ |a| case a
when /\-\-(\w+)-(\d+)\/(\d+)/
[i, $2.to_i, $1, $3.to_i]
when /\+\+(\d+)-(\w+)\/(\d+)/
[$1.to_i, i, $2, $3.to_i]
end }
$succ |= parts[3][1..-2].split(',').map{|x| [x.to_i,i]}
$vis |= parts[4][1..-2].split(',').map{|x| [x.to_i,i]}
$lhs |= parts[5][1..-2].split(',').map{|x| [x.to_i,i]} + [[i,i]]
end
end
def ground(t)
[ $gphid[t[0]] , t[1].map{|a| [$gphid[a[0]],$gphid[a[1]],a[2]]} ]
end
def gentrees2()
$thetrees=[];
bos=0; eos=$n-1;
roots = (1...eos).select{|i| $vis.include? [i,eos]}.select{|i| $vis.include? [bos,i]}
if $DEBUG then print "ROOTS: #{roots.inspect}\n" end
for i in roots
$theroot=i
for r in buildR(i , eos, [])
(rmin,rmax,rtree) = r
buildR(bos, rmin, rtree)
end
end
end
def buildR(min, max, tree)
if $DEBUG then print "buildR--#{min}--#{max}--#{tree.inspect}\n" end
trees=[]
for a in $arcs.select{|a| a[0]==max && $vis.include?([min,a[1]]) }
if $DEBUG then print "ARC: #{a.inspect}\n" end
for r in buildR(a[1],a[3],tree+[a])
(rmin,rmax,rarcs) = r
for l in buildR(min,rmin,rarcs)
(lmin,lmax,larcs) = l
trees << [lmin,rmax,larcs]
end
end
end
for i in (0...$n).select{|i| $succ.include?([i,max])}.select{|i| $lhs.include?([min,i])}
for l in buildL(min,i,tree)
(lmin,lmax,larcs) = l
trees << [lmin,lmax,larcs]
end
end
trees
end
def buildL(min,max,tree)
if $DEBUG then print "buildL--#{min}--#{max}--#{tree.inspect}\n" end
if $pos[min]==$pos[max]
if min==0 && max==0
$thetrees.push [$theroot,tree]
if $DEBUG then print "adding tree: #{tree.inspect}\n" end
end
return [[max,max,tree]]
end
trees=[]
for arc in $arcs.select{|a| a[1]==max && $lhs.include?([min,a[0]]) }
if $DEBUG then print "ARC: #{arc.inspect}\n" end
for r in buildR(arc[3],max,tree+[arc])
(rmin,rmax,rarcs) = r
for l in buildL(min,rmin,rarcs)
(lmin,lmax,larcs) = l
trees << [lmin,lmax,larcs]
end
end
end
trees
end
def printgraph()
print "N: #{$n}\n"
print "SAT: #{set_to_s($sat)}\n"
print "SUCC: #{rel_to_s($succ)}\n"
print "VIS: #{rel_to_s($vis)}\n"
print "LHS: #{rel_to_s($lhs)}\n"
print "ARCS: #{arcs_to_s($arcs)}\n"
end
def set_to_s(s) "{#{s.join(',')}}" end
def rel_to_s(r) "{#{r.map{|p| "(#{p[0]},#{p[1]})"}.join(',')}}" end
def arc_to_s(q) "-#{q[0]}-#{q[2]}-#{q[1]}/#{q[3]}" end
def arcs_to_s(a) "{#{a.map{|q| arc_to_s(q)}.join(',')}}" end
######################################################################
tre($stdin)

View File

@ -1,2 +0,0 @@
#include "uttcommon.h"

View File

@ -1,146 +0,0 @@
#ifndef __COMMON_H
#define __COMMON_H
#include <stdio.h>
/**************************************************
* Stale dotyczace wejscia/wyjscia
*/
#define MAXLINE 1024
#define EMPTYFORM '*'
#define INFIELD_SEP ':'
#define MAXAUX 16
#define FIELD_SEP " \t\n"
/***************************************************************/
/* problems with casing */
/* sprawdzenie wielkosci liter */
/* warto¶æ zwracana: */
/* 0 - wszystkie ma³e litery, 1 - pierwsza wielka, reszta male */
/* 2 - wszystkie wielkie, 3 - inne */
/***************************************************************/
inline int casing(char* s)
{
int ret = isupper(*s) ? 1 : 0;
while(*++s != '\0')
{
if(isupper(*s))
{
if(ret==1) ret=2;
else if(ret==0) ret=3;
}
else
{
if(ret==2) ret=3;
}
}
return ret;
}
//
inline void tolowers(char* s, char* d)
{
*d=tolower(*s);
while(*s != '\0') * ++d = tolower(* ++s);
}
// przepisuje s do d
// nadajac wielko¶æ liter zgodnie z warto¶ci± casing
// casing - warto¶æ zwracana przez casing()
// je¶li casing==3 przepisuje bez zmian (za ma³o informacji)
inline void restorecasing(char *s, char *d, int casing)
{
switch(casing)
{
case 0:
case 3:
*d=*s;
while(*s != '\0') * ++d = * ++s;
break;
case 1:
*d=toupper(*s);
while(*s != '\0') * ++d = * ++s;
break;
case 2:
*d=toupper(*s);
while(*s != '\0') * ++d = toupper(* ++s);
break;
}
}
/**************************************************/
/*
parameters:
-seg - segment
-name - field name
+val - field contents
return value:
1 if specified field exists, 0 otherwise
*/
inline int getfield(char* seg, const char* pref, char* val)
{
char* p=seg;
while(isspace(*p)) ++p;
pos:
if(isdigit(*p) or *p=='*') while(!isspace(*p)) ++p;
else goto type;
while(isspace(*p)) ++p;
len:
if(isdigit(*p) or *p=='*') while(!isspace(*p)) ++p;
else goto type;
while(isspace(*p)) ++p;
type:
while(isspace(*p)) ++p; while(!isspace(*p)) ++p;
while(isspace(*p)) ++p;
form:
while(isspace(*p)) ++p; while(!isspace(*p)) ++p;
annotation:
do p=strstr(p,pref); while(p!=NULL && *(p-1)!=' ' && *(p-1)!='\t');
if(p==NULL) return 0;
else
{
p+=strlen(pref);
int len=strcspn(p,FIELD_SEP "\n\r\f\0");
strncpy(val,p,len);
val[len]='\0';
return 1;
}
}
/*
parameters:
+seg - segment
-pref - prefix of the new field
-val - contents of the new field
return value:
1 - success, 0 - fail (limit on segment length exceeded)
*/
inline int addfield(char *seg, const char *pref, const char *val)
// zalozenie, ze seg konczy sie znakiem \n
{
if(strlen(seg)+strlen(pref)+strlen(val) >= MAXLINE) return 0; // bezpieczniej, ale wolniej
int seglen=strlen(seg);
sprintf(seg+(seglen-1)," %s%s\n",pref,val);
return 1;
}
#endif

View File

@ -1,42 +0,0 @@
PAR=-Wno-deprecated -O3 -fpermissive -static
PAR2=-c -Wno-deprecated -O3 -fpermissive
LIB_PATH=../lib
COMMON_PATH=../common
CMDLINE_FILE='"../gue/cmdline.h"'
gue: main.cc guess.o $(LIB_PATH)/auttools.o $(LIB_PATH)/word.o \
cmdline.c common_guess.o common.o
g++ $(PAR) main.cc guess.o \
$(LIB_PATH)/auttools.o $(LIB_PATH)/word.o cmdline.c common.o common_guess.o \
-o gue
guess.o: guess.h guess.cc
g++ $(PAR2) guess.cc
common_guess.o: cmdline.h common_guess.cc common_guess.h
g++ $(PAR2) common_guess.cc
common.o: $(COMMON_PATH)/cmdline_common.ggo $(COMMON_PATH)/common.cc \
$(COMMON_PATH)/common.h
g++ $(PAR2) -D _CMDLINE_FILE=$(CMDLINE_FILE) $(COMMON_PATH)/common.cc
cmdline.c cmdline.h: cmdline.ggo
gengetopt -i cmdline.ggo --conf-parser
cmdline.ggo: cmdline_guess.ggo $(COMMON_PATH)/cmdline_common.ggo
cat cmdline_guess.ggo $(COMMON_PATH)/cmdline_common.ggo > cmdline.ggo
clean: clean.cmdline
rm *.o || true
rm gue || true
clean.cmdline:
rm cmdline.* || true
copy:
ifdef UTT_BIN_DIR
cp gue ${UTT_BIN_DIR}
endif

View File

@ -1,12 +0,0 @@
package "guess"
version "0.1"
option "guess_count" n "Guess up to n descriptions" int default="0" no
option "delta" - "Stop displaying answers after fall of weight" float default="0.2" no
option "cut-off" - "Do not display answers with less weight than cut-off" int default="200" no
option "dictionary-home" - "dh" string typestr="FILENAME" no hidden
option "dictionary" d "File with dictionary information" string typestr="filename" default="gue.bin" no
option "per-info" v "Display performance information" flag off
option "weights" w "Print weights" flag off
option "no-uppercase" - "Do not process form containing uppercase letters" flag off

View File

@ -1,60 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include "common_guess.h"
int guess_count=0;
double delta=0.1;
int cut_off=100;
char dictionary[255];
bool per_info=false;
bool weights=false;
void process_guess_options(gengetopt_args_info* args)
{
if(args->dictionary_given)
{
expand_path(args->dictionary_arg,dictionary);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
else if (args->dictionary_home_given && args->language_given)
{
char buf[255];
expand_path(args->dictionary_home_arg, buf);
sprintf(dictionary,"%s/%s/gue.bin",buf,args->language_arg);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
if(args->guess_count_given)
guess_count=args->guess_count_arg;
else
guess_count=0;
if(guess_count==0)
guess_count=100;
if(args->delta_given)
delta=args->delta_arg;
else
delta=0.1;
if(args->cut_off_given)
cut_off=args->cut_off_arg;
else
cut_off=100;
if(args->per_info_given)
per_info=args->per_info_flag;
if(args->weights_given)
weights=true;
}

View File

@ -1,20 +0,0 @@
#ifndef __COMMON_GUESS_H
#define __COMMON_GUESS_H
#include <stdio.h>
#define _CMDLINE_FILE "../gue/cmdline.h"
#include "../common/common.h"
#include "cmdline.h"
#define DIC_FILE "gue.bin"
extern int guess_count;
extern double delta;
extern int cut_off;
extern char dictionary[];
extern bool per_info;
extern bool weights;
void process_guess_options(gengetopt_args_info* args);
#endif

View File

@ -1,142 +0,0 @@
#include "guess.h"
#include <string.h>
#include <iostream>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#define DICT 1
#define COR 2
#define DICT_P 3
#define COR_P 4
#define W_PRE 0.1
#define W_SUF 0.9
#define PREF_SIGN '_'
using namespace std;
Guess::Guess(const char* suf_file)
: _suf(suf_file) {
/* _suf = NULL;
_pref = NULL;
if (strlen(suf_file) > 0)
_suf = new TFTiv<char, char>(suf_file);
if (strlen(pref_file) > 0)
_pref = new TFTiv<char, char>(corp_file);
*/
}
char buf[MAX_LINE];
char out[MAX_LINE];
char* buf0_s = buf;
char* word_t = NULL;
long state_s = 0;
unsigned length_s = buf0_s - buf;
long len = 0;
int i=0;
int Guess::ana(const char* word, Words& result) {
assert(word && &result);
/* Word zawiera wyraz, ktory mamy zbadac.
* Nalezy przepisac go w odwrotnej kolejnosci do bufora,
* znalezc najdluzszy prefiks pasujacy do tego bufora
* separatorem jest '/' - za tym znakiem znajduje sie
* prawdopodobienstwo wystapienia danego opisu */
buf0_s = buf;
word_t = strdup(word);
if (reverse(word, buf) != 0)
return -1;
state_s = -1;
// printf("#buf0_s=%s, ", buf0_s);
state_s = _suf.pref(buf0_s, PREF_SIGN);
// printf("#word=%s, buf0_s=%s\t", word, buf0_s);
/* jezeli state_s != -1 to oznacza, ze w slowniku jest zawarta
* informacja o prefiksie tego slowa.
* nie jest ona odwrocona, wiec porownujemy do word a nie do buf
*/
// printf("state_s=%d\t", state_s);
if (state_s != -1) {
state_s = _suf.pref(word_t, '~', state_s);
// printf("state_s(wp)=%d, word_t=%s, word=%s\n", state_s, word_t, word);
}
if (state_s == -1) {
// if (_suf != NULL)
buf0_s = buf;
state_s = _suf.pref(buf0_s, '~');
// printf("state_s=%d\n", state_s);
}
length_s = buf0_s - buf;
/* state jest stanem, od ktorego zaczyna sie sciezka opisujaca
* prawdopodobienstwo przeciwienstwa wystapienia opisu
* znajdujacego sie dalej na tej sciezce.
* Im mniejsza wartosc liczby tym wieksze prawdopodobienstwo */
len = 0;
i=0;
// if (_suf != NULL)
len = _suf.cont(state_s, out);
while (len > 0) {
i++;
add_word_prob(result, word, out, length_s, DICT);
len = _suf.cont(-1, out);
}
return i;
}
int Guess::add_word_prob(Words& tab, const char* word, const char* path, unsigned len, int source) {
/* Dodaje do tablicy tab wyraz word wraz
* z prawdopodobienstwem i opisem zawartym
* w sciezce path */
// printf("add_word_prob(");
// fflush(stdout);
char p[MAX_LINE];
strcpy(p, path);
int probLen = strcspn(p, ";");
char prob[probLen+1];
strncpy(prob, p, probLen);
prob[probLen] = '\0';
char* desc = p + probLen+1; // +2 bo pomijamy jeszcze znak ';'
int i = tab.add(word, desc);
if (source==DICT) {
tab[i].len_suf(len);
tab[i].w_suf(atof(prob)); // + W_PRE*tab[i].w_suf()));
// tab[i].w_suf((float)(W_SUF*(1000-atof(prob)) + W_PRE*tab[i].w_suf()));
}
// if (source==COR) {
// tab[i].len_pref(len);
// tab[i].w_pref(W_SUF*(1000-atof(prob)) + W_PRE*tab[i].w_pref());
// }
// printf(")\n");
// fflush(stdout);
return i;
}

View File

@ -1,56 +0,0 @@
#include "../lib/tfti.h"
#include "../lib/word.h"
#include <sys/timeb.h>
/**************************************************************
* Zawiera definicje klasy Guess. *
* *
* Klasa ta pozwala na okreslenie opisu slowa nie *
* znajdujacego sie w slowniku wraz z prawdopodobienstwem *
* jego wystapienia. *
*************************************************************/
class Guess {
public:
// nazawa pliku slownika w parametrze
Guess(const char* suf_file);
// zwraca tablice opisow slowa wraz z prawdopodobienstwem ich wystapienia
int ana(const char* word, Words& result);
long time_overall;
private:
// sufiksy
TFTiv<char, char> _suf;
// prefiksy
TFTiv<char, char> _pref;
//odwraca ciag znakow
int reverse(const char* src, char* dest) {
// assert((src != NULL) && (dest != NULL));
const char* c = src;
int len = strlen(src);
for (int i=1; i<=len; ++i) {
dest[i-1] = src[len-i];
}
dest[len] = '\0';
return 0;
}
//dodaje nowy element do tablicy WordsProb
int add_word_prob(Words& tab, const char* word, const char* path, unsigned len, int source);
};

View File

@ -1,237 +0,0 @@
#include <time.h>
#include <stdlib.h>
#include "../lib/iotools.h"
#define _CMDLINE_FILE "../gue/cmdline.h"
#define CONFIGFILE1 "/home/ynka/utt/utt-0.9/conf/gue.conf"
#define CONFIGFILE2 "/home/ynka/utt/utt-0.9/conf/gue.conf"
#include "../common/common.h"
#include "common_guess.h"
#include "guess.h"
#include "cmdline.h"
#define W_SUFF 0.6
#define W_PREF 0.4
int main(int argc, char** argv) {
// int non_standard_config=0;
gengetopt_args_info args;
if(cmdline_parser(argc, argv, &args) != 0)
exit(1);
process_config_files(&args,argv[0]);
process_common_options(&args,argv[0]);
process_guess_options(&args);
char line[MAX_LINE];
char outline[MAX_LINE];
char parms[MAX_LINE], desc[MAX_LINE], lemma[MAX_LINE];
long line_count = 0;
Guess guess(dictionary);
int words_count=0;
time_t start_time = time(NULL);
// Segment seg;
Words tab;
while (fgets(line, MAX_LINE, inputf))
{
line_count++;
int start, len;
// line[strlen(line)-1] = '\0';
if (!process_seg(line, args))
fputs(line,outputf);
else
{
char form[MAX_FORM];
words_count++;
tab.clear();
getfield(line,input_field_prefix,form);
if (form==NULL) continue;//BZDURA
guess.ana(form, tab);
if ((tab.count()==0) && (!args.no_fail_flag)) // no guesses - analysis was unsuccessful
fputs(line, failedf);
else
{
// if (copy_processed)
// fputs(line, stdout);
// continue;
// }
// we've got some guesses. Do we want to print it?
// if (args.only_fail_flag)
// continue;
float last_weight=0;
int i=0;
int count=0;
unsigned first=1;
char* parms_end = parms;
char last_lemma[MAX_LINE];
count = 1;
tab.sort();
while (count < tab.count() && count <= guess_count)
if (first || tab[count].w_suf() >= cut_off && tab[count].w_suf() >= delta * last_weight)
{
first=0;
last_weight = tab[i].w_suf();
count++;
}
else
break;
// drukujemy count pierwszych z tab
if(one_line)
{
char* descp=desc;
for (int i=0; i< count; ++i)
{
descp += sprintf(descp," %s%s,%s", output_field_prefix, tab[i].lemma(), tab[i].descr());
if(weights) descp += sprintf(descp,":%d",(int)tab[i].w_suf());
}
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,desc);
strcat(outline,"\n");
fputs(outline, outputf);
if (copy_processed)
fputs(line,outputf);
}
else if(one_field)
{
char* descp=desc;
for (int i=0; i< count; ++i)
if(i==0)
{
descp += sprintf(descp," %s%s,%s", output_field_prefix, tab[i].lemma(), tab[i].descr());
if(weights) descp += sprintf(descp,":%d",(int)tab[i].w_suf());
}
else
{
if(strcmp(tab[i].lemma(),tab[i-1].lemma())==0)
descp += sprintf(descp,",%s",tab[i].descr());
else
descp += sprintf(descp,";%s,%s",tab[i].lemma(),tab[i].descr());
if(weights) descp += sprintf(descp,":%d",(int)tab[i].w_suf());
}
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,desc);
strcat(outline,"\n");
fputs(outline, outputf);
if (copy_processed)
fputs(line,outputf);
}
else
{
for (int i=0; i< count; ++i)
{
// kolejne opisy - kolejne linie.
char* descp=desc;
descp += sprintf(desc, " %s%s,%s", output_field_prefix, tab[i].lemma(), tab[i].descr());
if(weights) descp += sprintf(descp,":%d",(int)tab[i].w_suf());
descp += sprintf(descp,"\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,desc);
fputs(outline, outputf);
}
if (copy_processed)
fputs(line,outputf);
}
}
}
if(args.interactive_flag)
fflush(outputf), fflush(failedf);
}
cmdline_parser_free(&args);
}
// while ((i=tab.next()) != -1 && count++<guess_count) {
// /* if we have "one-line" flag then everything goes in one segment as many fields,
// * if we have "one-field" flag everything goes in one segment as ONE field:
// * - diferent lemmas are separated with ';', sequent descriptions to one lemma
// * are separated with ','
// */
// if ((!first) && (tab[i].w_suf() < cut_off) || (tab[i].w_suf() < delta * last_weight)) {
// break;
// }
// if (first) {
// parms_end += sprintf(parms_end, "%s", output_field_prefix);
// } else if (!args.one_field_flag)
// parms_end += sprintf(parms_end, "%s", output_field_prefix);
// if (!args.one_field_flag || strcmp(last_lemma, tab[i].lemma()) != 0) {
// if (args.one_field_flag && !first)
// parms_end += sprintf(parms_end, ";");
// parms_end += sprintf(parms_end, "%s", tab[i].lemma());
// strcpy(last_lemma, tab[i].lemma());
// }
// first=0;
// last_weight = tab[i].w_suf();
// if (!weights)
// parms_end += sprintf(parms_end, ",%s:%d", tab[i].descr(), (int)tab[i].w_suf());
// else
// parms_end += sprintf(parms_end, ",%s", tab[i].descr());
// if (!args.one_field_flag) {
// seg.addfield(parms);
// parms_end = parms;
// }
// if (!(args.one_field_flag || args.one_line_flag)) {
// seg.print(outline);
// fputs(outline, outputf);
// --seg.auxn;
// }
// //if (copy_processed)
// // fputs(outline, stdout);
// } //while
// if (args.one_field_flag)
// seg.addfield(parms);
// if (args.one_field_flag || args.one_line_flag){
// seg.print(outline);
// fputs(outline, outputf);
// }
// } else { // if (process_segment)
// // jak to nie jest wyraz - to przepisz token na wyjscie.
// // printtok(line, start, len, cat, form);
// seg.print(outline);
// fputs(outline, outputf);
// if (copy_processed)
// fputs(outline, stdout);
// }
// }
// time_t end_time = time(NULL);
// if (per_info) {
// printf("Liczba s³ów: %d\n", words_count);
// printf("Czas analizy: %d sekund\n", end_time-start_time);
// }
// cmdline_parser_free(&args);
// }

View File

@ -1,44 +0,0 @@
PAR=-Wno-deprecated -fpermissive -static
PAR2=-c -Wno-deprecated -fpermissive
LIB_PATH=../lib
COMMON_PATH=../common
CMDLINE_FILE='"../kor/cmdline.h"'
kor: main.cc corr.o corlist.o cmdline.o $(LIB_PATH)/word.o \
$(LIB_PATH)/auttools.o cmdline.c common_cor.o common.o
g++ $(PAR) -D _CMDLINE_FILE=$(CMDLINE_FILE) main.cc corlist.o corr.o common.o \
$(LIB_PATH)/word.o $(LIB_PATH)/auttools.o cmdline.c common_cor.o \
-o kor
corr.o: corr.cc corr.hh cmdline.h
g++ $(PAR2) -D _CMDLINE_FILE=$(CMDLINE_FILE) corr.cc
corlist.o: corlist.cc corlist.h cmdline.h
g++ $(PAR2) -D _CMDLINE_FILE=$(CMDLINE_FILE) corlist.cc
common.o: cmdline.h $(COMMON_PATH)/cmdline_common.ggo $(COMMON_PATH)/common.cc \
$(COMMON_PATH)/common.h
g++ $(PAR2) -D _CMDLINE_FILE=$(CMDLINE_FILE) $(COMMON_PATH)/common.cc
common_cor.o: cmdline.h common_cor.cc common_cor.h
g++ $(PAR2) -D _CMDLINE_FILE=$(CMDLINE_FILE) common_cor.cc
cmdline.c cmdline.h: cmdline.ggo
gengetopt -i cmdline.ggo --conf-parser
cmdline.ggo: cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo
cat cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo > cmdline.ggo
copy:
ifdef UTT_BIN_DIR
cp kor ${UTT_BIN_DIR}
endif
clean: clean.cmdline
rm *.o || true
rm kor || true
clean.cmdline:
rm cmdline.* || true

View File

@ -1,13 +0,0 @@
package "kor"
version "0.1"
option "dictionary-home" - "Dictionary home dir." string typestr="FILENAME" no hidden
option "dictionary" d "Dictionary" string typestr="FILENAME" default="cor.bin" no
option "distance" D "Maximal edit distance." int default="1" no
option "replace" r "Replace original form with corrected form, place original form in the cor field. This option has no effect in single mode" flag off
#option "single" - "Place all alternatives in the same line" flag off
option "weights" w "File with translation rules." string typestr="FILENAME" default="weight.cor" no
option "threshold" t "Edit distance threshold" float default="1" no
option "show-scores" - "Show scores" flag off
option "count" n "Print only count best results" int no

View File

@ -1,48 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include "common_cor.h"
#define MAX_PATH_LENGTH 255
char dictionary[MAX_PATH_LENGTH];
char file_weights[MAX_PATH_LENGTH];
float threshold;
bool show_scores = false;
int result_count;
void process_cor_options(gengetopt_args_info* args)
{
if(args->dictionary_given)
{
expand_path(args->dictionary_arg,dictionary);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
else if (args->dictionary_home_given && args->language_given)
{
char buf[MAX_PATH_LENGTH];
expand_path(args->dictionary_home_arg, buf);
sprintf(dictionary,"%s/%s/cor.bin",buf,args->language_arg);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
expand_path(args->weights_arg, file_weights);
threshold = args->threshold_arg;
show_scores = args->show_scores_flag;
if(args->count_given) {
result_count = args->count_arg;
}
else {
result_count = 0;
}
}

View File

@ -1,25 +0,0 @@
#ifndef __COMMON_COR_H
#define __COMMON_COR_H
// SEKCJA STALYCH
#define MAX_LEN 2
#define PREC 1000
#define Weight int
// SEKCJA INCLUDOW
#include "../common/common.h"
#include "cmdline.h"
// SEKCJA GENGETOPT
extern int change_count;
extern void process_cor_options(gengetopt_args_info* args);
extern char dictionary[];
extern char file_weights[];
extern float threshold;
extern bool show_scores;
extern int result_count;
#endif

View File

@ -1,70 +0,0 @@
#include <stdio.h>
#include <malloc.h>
#include "corlist.h"
#define min(x,y) ((x<y)?(x):(y))
Weight CorList::GetValue(char X[100], char Y[100], Weight (*H2)[100], int i, int j)
{
Weight R = 9999*PREC; // (+nieskonczonosc)
int n;
for (n=0; n<total; n++)
{
int la = List[n].la;
int lb = List[n].lb;
if (la<=i+1 && lb<=j+1)
if (strncmp(List[n].a,X+i+1-la,la)==0 && strncmp(List[n].b,Y+j+1-lb,lb)==0)
R = min(R,H2[i-la][j-lb]+List[n].w);
if (la<=j+1 && lb<=i+1)
if (strncmp(List[n].b,X+i+1-lb,lb)==0 && strncmp(List[n].a,Y+j+1-la,la)==0)
R = min(R,H2[i-lb][j-la]+List[n].w);
}
return R;
}
int CorList::loadCWL(char *Name)
{
FILE *f = fopen(Name,"r");
int len=MAX_LEN*2+100;
char a[100],b[100], buf[len+1];
float wtmp;
CorWeight w;
cor_stdcor = 1 * PREC;
cor_xchg = 1 * PREC;
List = (CorWeight*)malloc(sizeof(CorWeight)); // 100 BO NIE DZIALA REALLOC
total=0;
if (!f) { fprintf(stderr,"\nCan't open correction weight list file!\n"); return -1; }
while (!feof(f) && fgets(buf,len,f))
{
if (buf[0]=='%')
{
sscanf(buf+1,"%s %f",&a,&wtmp);
int ok=0;
if (strcmp(a,"stdcor")==0) { ok=1; cor_stdcor=Weight(wtmp*PREC); /*printf("Standard letter correction set to: %1.2f\n",wtmp);*/ }
if (strcmp(a,"xchg")==0) { ok=1; cor_xchg=Weight(wtmp*PREC); /*printf("Inverted letters correction set to: %1.2f\n",wtmp);*/ }
if (!ok) { fprintf(stderr,"Error in file %s: Unknown keyword: '%s'.\n",Name,a); return -1; }
}
else
{
sscanf(buf,"%s %s %f",&a,&b,&wtmp);
w.w=(Weight)(wtmp*PREC);
w.la=strlen(a); w.lb=strlen(b);
if (w.la>MAX_LEN) { printf("ERROR in file %s: the string '%s' exceeds maximum length of %d characters.\n",Name,a,MAX_LEN); fclose(f); return -1; }
if (w.lb>MAX_LEN) { printf("ERROR in file %s: the string '%s' exceeds maximum length of %d characters.\n",Name,b,MAX_LEN); fclose(f); return -1; }
strcpy(w.a,a), strcpy(w.b,b);
total++;
List = (CorWeight*)realloc(List,total*sizeof(CorWeight));
List[total-1]=w;
// printf("%s\t<->\t%s\t%1.2f\n",w.a,w.b,((float)w.w/PREC));
}
}
fclose(f);
// printf("Total: %d\n\n",total);
return(total);
}

View File

@ -1,20 +0,0 @@
#ifndef _CORLIST_H
#define _CORLIST_H
//#include <stdio.h>
#include "common_cor.h"
typedef struct { char a[MAX_LEN+1],b[MAX_LEN+1]; Weight w; short la,lb; } CorWeight;
class CorList
{
private:
CorWeight *List;
int total;
public:
Weight cor_stdcor, cor_xchg;
int loadCWL(char *Name);
Weight GetValue(char X[100], char Y[100], Weight (*H2)[100], int i, int j);
};
#endif

View File

@ -1,163 +0,0 @@
//---------------------------------------------------------------------------
#include "common_cor.h"
#include "corr.hh"
#define MAXPATH 256
#define min(x,y) ((x<y)?(x):(y))
#define max(x,y) ((x>y)?(x):(y))
Weight Corr::ed(int i,int j)
{
if(i==-1)
return (j+1)*CL.cor_stdcor; // moje* Nie wiem czy tak bêdzie dobrze, ale uzna³em, ¿e poza tablic¹ powinny byæ wartosci przemno¿one przez wagê standardowej zmiany litery
if(j==-1)
return (i+1)*CL.cor_stdcor; // moje*
if(i==-2 || j==-2)
return (n+1)*CL.cor_stdcor; // moje*
if(X[i]==Y[j])
return min(H2[i-1][j-1], min(CL.cor_stdcor+min(H2[i][j-1],H2[i-1][j]),CL.GetValue(X,Y,H2,i,j)));
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return min(min(CL.cor_xchg+H2[i-2][j-2],CL.cor_stdcor+min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j));
return min(CL.cor_stdcor+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j));
/* // wersja z wagami ale dla floatów
if(X[i]==Y[j])//zielone-> <- niebieskie -> <- rózowe ->
return min(H2[i-1][j-1], min(1+min(H2[i][j-1],H2[i-1][j]),CL.GetValue(X,Y,H2,i,j)));
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return min(1+min(H2[i-2][j-2],min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j));
return min(1+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j])), CL.GetValue(X,Y,H2,i,j));
*/
/* // normalna wersja
if(X[i]==Y[j])
return H2[i-1][j-1];
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return 1+min(H2[i-2][j-2],min(H2[i][j-1],H2[i-1][j]));
return 1+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j]));
*/
/*
if(X[i]==Y[j])
return H[(i-1)+2][(j-1)+2];
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return 1+min(H[(i-2)+2][(j-2)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2]));
return 1+min(H[(i-1)+2][(j-1)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2]));
*/
}
int Corr::load2(char *Name) // moje
{
return CL.loadCWL(Name);
}
Weight Corr::cuted(int j)
{
int l=max(0,j-t);
int u=min(m,j+t);
Weight ce=(j+t)*PREC; // moje*
for(int k=l;k<=u;k++)
{
if(H2[k][j]<ce)//if(H[(k)+2][(j)+2]<ce)
ce=H2[k][j];//ce=H[(k)+2][(j)+2];
}
return ce;
}
/*
void Corr::recomputeH(int j)
{
for(int i=0;i<=m;i++)
H[(i)+2][(j)+2]=ed(i,j);
}
*/
void Corr::recomputeH(int j)
{
int lo=max(0,j-t-2);
int hi=min(m,j+t+2);
for(int i=lo;i<=hi;++i)
H2[i][j]=ed(i,j);//H[(i)+2][(j)+2]=ed(i,j);
}
int Corr::correct(const char* w, Words& tab)
{
long int path[MAXPATH]={0};
int i; // row index (X)
int j; // column index (Y)
long state=0;
strcpy(X,w);
m=strlen(X)-1;
n=m+t;
for(i=(-2);i<=m;i++)
H[(i)+2][(-2)+2]=n*PREC; // moje *PREC
for(i=(-1);i<=m;i++)
H[(i)+2][(-1)+2]=((i)+1)*PREC; // moje*
for(j=(-2);j<=n;j++)
H[(-2)+2][(j)+2]=n*1000; // moje*
for(j=(-1);j<=n;j++)
H[(-1)+2][(j)+2]=((j)+1)*PREC; // moje*
for(j=0; j<=n; ++j)
for(i=0; i<=m; ++i)
H[i+2][j+2]=(t+1)*PREC;
int more=1;
bool cont=false;
strcpy(Y,"");
j=0;
state=0;
int count=0;
while(more)
{
if(!empty(state))
{
Y[j]=input(state);
recomputeH(j);
if(cuted(j)<=t)
{
Weight edd; // moje
if(final(next(state)) && (edd=H[(m)+2][(j)+2])<=t)
{
char* out=new char[j+2];
strncpy(out,Y,j+1);
out[j+1]='\0';
// if(cont) putchar(' ');
cont=true;
// printf("%1.2f %s\n", (float)edd/PREC,out); // moje
// cout << out << "(" << edd << ")" << endl;
tab.add(out,(float)edd/PREC);
count++;
}
path[j++]=state;
state=next(state);
continue;
}
else
if(continued(state))
{
state++;
continue;
}
}
//backtracking
do
if(j>0)
j--;
else
more=0;
while(more && !continued(path[j]));
state=path[j]+1;
}
return count;
}
//---------------------------------------------------------------------------

View File

@ -1,39 +0,0 @@
//---------------------------------------------------------------------------
#ifndef _corr_hh
#define _corr_hh
//---------------------------------------------------------------------------
#include "../lib/tfti.h"
#include "../lib/word.h"
#include "corlist.h"
#include "../common/common.h"
class Corr : public TFTiv<char,char>
{
private:
Weight H[100][100];
char X[100]; // misspelled string
char Y[100]; // (possibly partial) candidate string
int m; // length of X
int n; // maximal length of Y
Weight ed(int,int);
Weight cuted(int);
void recomputeH(int);
public:
Weight (*H2)[100]; // moje: zmiana z int na Weight (float)
int t; // threshold
CorList CL; // moje
Corr() : H2((Weight(*)[100])&H[2][2]) {}; // moje (int->float)
Corr(const char* a) : TFTiv<char,char>(a), H2((Weight(*)[100])&H[2][2]) { };
int correct(const char* w, Words& tab);
int load2(char *Name); // moje
};
//---------------------------------------------------------------------------
#endif

View File

@ -1,174 +0,0 @@
#include <stdlib.h>
#include <ctype.h>
#include "../lib/iotools.h"
#include "common_cor.h"
#include "corr.hh"
#include <locale.h>
int main(int argc, char** argv) {
// setlocale(LC_CTYPE,"");
// setlocale(LC_COLLATE,"");
gengetopt_args_info args;
if(cmdline_parser(argc, argv, &args) != 0)
exit(1);
process_config_files(&args,argv[0]);
process_common_options(&args,argv[0]);
process_cor_options(&args);
Corr cor;
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// strcpy(dictionary,"cor.bin");
cor.load(dictionary);
cor.t=args.distance_arg;
//>>>>>>>>>>>>>>>
cor.CL.cor_stdcor=1*PREC;
cor.CL.cor_xchg=1*PREC;
if (cor.load2(file_weights)==-1) return -1; // moje
cor.t=1*PREC; // ODLEGLOSC EDYCYJNA
//<<<<<<<<<<<<<<
char line[MAX_LINE+1];
long line_count = 0;
Segment seg;
Words tab;
char form1[MAX_LINE];
char* form;
int formcasing;
char corfield[MAX_LINE]="";
while (fgets(line, MAX_LINE, inputf))
{
++line_count;
char outline[128];
if (!process_seg(line, args))
fputs(line, outputf);
else
{
char form[MAX_FORM];
tab.clear();
getfield(line,input_field_prefix,form);
if (form==NULL) continue;
formcasing=3;
cor.correct(form, tab);
if( tab.count() == 0 )
{
formcasing=casing(form);
if( formcasing == 1 || formcasing == 2)
tolowers(form, form1), cor.correct(form1, tab);
}
if ( tab.count() == 0)
fputs(line, failedf);
else
{
tab.sort();
int max_cnt = 0;
if(result_count < 1) {
max_cnt = tab.count();
}
else {
max_cnt = (tab.count() < result_count) ? tab.count() : result_count;
}
if(args.replace_flag)
{
char corfield[128];
strcpy(corfield, input_field_prefix);
strcat(corfield, form);
seg.aux[seg.auxn]=corfield;
++seg.auxn;
for(int i=0; i<tab.count(); ++i)
{
seg.form=tab[i].form();
restorecasing(seg.form,seg.form,formcasing);
seg.print(outline);
fputs(outline, outputf);
}
--seg.auxn;
}
else
{
if(one_line)
{
char* p=corfield;
for(int i=tab.count()-1; i >= tab.count()-max_cnt; --i)
{
if(tab[i].w_suf() > threshold) continue;
restorecasing(tab[i].form(),tab[i].form(),formcasing);
p += sprintf(p," %s%s",output_field_prefix,tab[i].form());
if(show_scores) {
p += sprintf(p,",%1.2f",tab[i].w_suf());
}
}
sprintf(p,"\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
else if(one_field)
{
char* p=corfield;
p += sprintf(p," %s",output_field_prefix);
for(int i=tab.count()-1; i >= tab.count()-max_cnt; --i)
{
if(tab[i].w_suf() > threshold) continue;
restorecasing(tab[i].form(),tab[i].form(),formcasing);
p += sprintf(p,(i==0)?"%s":";%s",tab[i].form());
if(show_scores) {
p += sprintf(p,",%1.2f",tab[i].w_suf());
}
}
sprintf(p,"\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
else
{
for(int i=tab.count()-1; i >= tab.count()-max_cnt; --i)
{
if(tab[i].w_suf() > threshold) continue;
restorecasing(tab[i].form(),tab[i].form(),formcasing);
char* p = corfield;
p += sprintf(p," %s%s",output_field_prefix,tab[i].form());
if(show_scores) {
p += sprintf(p,",%1.2f",tab[i].w_suf());
}
p += sprintf(p, "\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
}
}
}
}
if(args.interactive_flag)
{
fflush(outputf);
fflush(failedf);
}
}
cmdline_parser_free(&args);
}

Some files were not shown because too many files have changed in this diff Show More