Rewritten the build system, added lem UTF-8 version.

This commit is contained in:
Maciej Prill 2012-02-07 15:37:11 +01:00
parent 7bed30fe7b
commit 5f4d9c3b32
174 changed files with 14849 additions and 2 deletions

129
Makefile Normal file
View File

@ -0,0 +1,129 @@
include config.mak
CUR_DIR=$(shell pwd)
SRC_DIR=$(CUR_DIR)/src
TARGETS = components configuration share libraries
ifeq ($(BUILD_DOC), yes)
TARGETS += documentation
endif
.PHONY: all
all: $(TARGETS)
# ------------------------------------------------------------------
# main section
# ------------------------------------------------------------------
.PHONY: components
components:
cd $(SRC_DIR)/lib && make; cd $(CUR_DIR)
@for cmp in $(COMPONENTS); do\
cd $(SRC_DIR)/$$cmp && make; cd $(CUR_DIR); \
done
.PHONY: documentation
documentation:
cd $(CUR_DIR)/doc && make; cd $(CUR_DIR)
.PHONY: configuration
configuration:
.PHONY: libraries
libraries:
.PHONY: share
share:
# ------------------------------------------------------------------
# cleanup section
# ------------------------------------------------------------------
.PHONY: clean
clean: clean_components clean_documentation
@echo "All files cleaned successfully!"
.PHONY: clean_components
clean_components:
@for cmp in $(COMPONENTS); do \
cd $(SRC_DIR)/$$cmp && make clean; cd $(CUR_DIR); \
done
cd $(SRC_DIR)/lib && make clean; cd $(CUR_DIR);
.PHONY: clean_documentation
clean_documentation:
cd $(CUR_DIR)/doc && make clean; cd $(CUR_DIR)
# ------------------------------------------------------------------
# install section
# ------------------------------------------------------------------
.PHONY: install
install: all install_dirs install_components install_configuration install_libraries install_documentation install_share
.PHONY: install_dirs
install_dirs:
install -d $(PREFIX) \
$(BIN_DIR) \
$(CONF_DIR) \
$(LANG_DIR) \
$(LIB_DIR) \
$(DOC_DIR) \
$(SHARE_DIR)/info
.PHONY: install_components
install_components: components
@for cmp in $(COMPONENTS); do \
cd $(SRC_DIR)/$$cmp && make install; cd $(CUR_DIR); \
done
.PHONY: install_configuration
install_configuration: configuration
cd $(CUR_DIR)/conf && make install; cd $(CUR_DIR)
.PHONY: install_libraries
install_libraries: libraries
cd $(CUR_DIR)/lib && make install; cd $(CUR_DIR)
.PHONY: install_documentation
install_documentation: documentation
cd $(CUR_DIR)/doc && make install; cd $(CUR_DIR)
.PHONY: install_share
install_share: share
cd $(CUR_DIR)/share && make install; cd $(CUR_DIR)
# ------------------------------------------------------------------
# uninstall section
# ------------------------------------------------------------------
.PHONY: uninstall
uninstall: uninstall_share uninstall_documentation uninstall_libraries uninstall_configuration uninstall_components uninstall_dirs
.PHONY: uninstall_components
uninstall_components:
@for cmp in $(COMPONENTS); do \
cd $(SRC_DIR)/$$cmp && make uninstall; cd $(CUR_DIR); \
done
.PHONY: uninstall_configuration
uninstall_configuration:
cd $(CUR_DIR)/conf && make uninstall; cd $(CUR_DIR)
.PHONY: uninstall_libraries
uninstall_libraries:
cd $(CUR_DIR)/lib && make uninstall; cd $(CUR_DIR)
.PHONY: uninstall_documentation
uninstall_documentation:
cd $(CUR_DIR)/doc && make uninstall; cd $(CUR_DIR)
.PHONY: uninstall_share
uninstall_share:
cd $(CUR_DIR)/share && make uninstall; cd $(CUR_DIR)
.PHONY: uninstall_dirs
uninstall_dirs: uninstall_configuration uninstall_documentation uninstall_share uninstall_libraries
rmdir $(CONF_DIR)
rmdir $(DOC_DIR)
rmdir $(LANG_DIR)
rmdir $(LIB_DIR)

35
conf/Makefile Normal file
View File

@ -0,0 +1,35 @@
include ../config.mak
.PHONY: install
install:
ifdef CONF_DIR
install -m 0644 compiledic.conf $(CONF_DIR)
install -m 0644 cor.conf $(CONF_DIR)
install -m 0644 dgc.conf $(CONF_DIR)
install -m 0644 dgp.conf $(CONF_DIR)
install -m 0644 gph.conf $(CONF_DIR)
install -m 0644 grp.conf $(CONF_DIR)
install -m 0644 gue.conf $(CONF_DIR)
install -m 0644 kor.conf $(CONF_DIR)
install -m 0644 lem.conf $(CONF_DIR)
install -m 0644 mar.conf $(CONF_DIR)
install -m 0644 ser.conf $(CONF_DIR)
install -m 0644 utt.conf $(CONF_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef CONF_DIR
rm $(CONF_DIR)/compiledic.conf
rm $(CONF_DIR)/cor.conf
rm $(CONF_DIR)/dgc.conf
rm $(CONF_DIR)/dgp.conf
rm $(CONF_DIR)/gph.conf
rm $(CONF_DIR)/grp.conf
rm $(CONF_DIR)/gue.conf
rm $(CONF_DIR)/kor.conf
rm $(CONF_DIR)/lem.conf
rm $(CONF_DIR)/mar.conf
rm $(CONF_DIR)/ser.conf
rm $(CONF_DIR)/utt.conf
endif

46
dist/Makefile vendored Normal file
View File

@ -0,0 +1,46 @@
# some variables
# path, where all nessesary files are placed
# (they will be processed for making distribution)
export UTT_DIST_DIR=$(UTT_DIR)
# path, where distribution package will be placed
export UTT_DIST_OUTPUT=$(UTT_DIR)/..
#temp path for making distribution
export UTT_DIST_TMP=$(shell pwd)/dist_tmp
# -----------------------------------------------------------
# default task should display options
.PHONY: default
defaul:
@echo "Using: make tarball|rpm|deb"
# -----------------------------------------------------------
# this task should compile utt (if nesessery) and create tar.gz version
.PHONY: tarball
tarball:
cd tarball && make
# -----------------------------------------------------------
# this task should compile utt (if nesessery) and create rpm version
.PHONY: rpm
rpm:
@#we build rpm (see spec/README for details)
cd spec && make
# -----------------------------------------------------------
# this task should compile utt (if nesessery) and create deb version
.PHONY: deb
deb:
@#we build deb (see deb/README for details)
cd deb && make
# -----------------------------------------------------------
# this task should remove compiled files and directories
.PHONY: clean
clean:
# finally the line below should be uncomment
rm -fr ${UTT_DIST_TMP}

15
dist/common/check_dependenties.pl vendored Normal file
View File

@ -0,0 +1,15 @@
while(<STDIN>) {
chomp;
$app = $_;
$path = `which $app 2>/dev/null`;
$err = $?;
print $app.": ";
if($err != 0) {
print "failed ($err)\n";
}
else {
print $path;
}
}

1
dist/common/description.def vendored Normal file
View File

@ -0,0 +1 @@
I put here some description.

1
dist/common/description.pl.def vendored Normal file
View File

@ -0,0 +1 @@
Tu umieszczę opis po polsku.

29
dist/common/find_perl_deps.pl vendored Normal file
View File

@ -0,0 +1,29 @@
while(<STDIN>) {
chomp;
# najpierw nazwa pliku zrodlowego out
if($_ =~ /^[^:]*\:\s*(.*)$/) {
my $line = $1;
# print "TEXT: $line\n";
# teraz tylko samo polecenie
if($line =~ /\`\s*(.+)\s*\`/) {
my $cmd = $1;
# print "CMD: $cmd\n";
# teraz splitujemy potoki
my @progs = split(/\s*\|\s*/, $cmd);
foreach (@progs) {
# print "$_\n";
# ucinamy agrumenty programu
# oraz linie, zawierajace nie-programy (--replace, $tmpfile_x, /g, \\, itp.)
if($_ =~ /^([^\$\\\/\s\']+)(\s.*)?$/) {
my $app = $1;
# print "APP: $app\n";
# my $res = `which $app`;
# print "WYNIK: $res\n";
print "$app\n";
}
} # foreach
}
}
}

16
dist/common/find_rpm_deps.sh vendored Executable file
View File

@ -0,0 +1,16 @@
#!/bin/sh
# sprawdzamy czy podano argument
if test $# -lt 1; then echo RPM file name expected!; exit -1; fi
# sprawdzamy czy to plik, potem, czy rpm
if test -f $1; \
then
if test "rpm" = `ls $1 | tail -c 4`; \
then
rpm -q -R -p $1; \
else echo "It's not a RPM file!"; \
fi
else echo RPM file not found!; \
fi

3
dist/common/make_deps.sh vendored Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
grep -r -e "\`" ../../src | grep -v -e "\.svn\/" | perl find_perl_deps.pl | sort | uniq > dep_list.txt

14
dist/common/prepare_conf.sh vendored Executable file
View File

@ -0,0 +1,14 @@
#!/bin/sh
if test 3 -ne $#
then
echo "Usage: " `basename $0` src_conf_dir dest_conf_dir replacement
else
POLEC="s/PATH_PREFIX/$3/g"
for FN in `ls $1/*.conf`
do
BN=`basename ${FN}`
sed ${POLEC} ${FN} > $2/${BN}
done
fi

1
dist/common/release.def vendored Normal file
View File

@ -0,0 +1 @@
2

0
dist/common/requirements.def vendored Normal file
View File

1
dist/common/version.def vendored Normal file
View File

@ -0,0 +1 @@
0.9

86
dist/deb/Makefile vendored Normal file
View File

@ -0,0 +1,86 @@
#default task
DIR=$(shell pwd)
ifndef UTT_DIST_DIR
UTT_DIST_DIR=${DIR}
endif
ifndef UTT_DIST_OUTPUT
UTT_DIST_OUTPUT=${DIR}
endif
# here there're few properties
_PRODUCT_NAME=utt
_UTT_VER=$(shell cat ../common/version.def)
_UTT_REL=$(shell cat ../common/release.def)
_DEB_FROOT=$(DIR)/deb_root
_UTT_DIR=${_DEB_FROOT}/usr/local/$(_PRODUCT_NAME).$(_UTT_VER)-$(_UTT_REL)
.PHONY: default
default: make_control make_postinst make_prerm
# first, we prepare some directory structure
mkdir -p $(_DEB_FROOT)/DEBIAN
mkdir -p $(_UTT_DIR)
# next, we copy deb package files
mv ./control $(_DEB_FROOT)/DEBIAN/
mv ./postinst $(_DEB_FROOT)/DEBIAN/
mv ./prerm $(_DEB_FROOT)/DEBIAN/
cd ${_DEB_FROOT} && tar -cvvf control.tar.gz DEBIAN/
cd ${DIR};
rm -fr ${_DEB_FROOT}/DEBIAN/
# we copy all necessery files (binaries)
cp -r ${UTT_DIST_DIR}/* ${_UTT_DIR}/
cp ./changelog ${_UTT_DIR}/share/doc/$(_PRODUCT_NAME)/
# gzip --best $(_DEB_ROOT)/usr/share/doc/$(_PRODUCT_NAME)/changelog
cp ./changelog.Debian $(_UTT_DIR)/share/doc/$(_PRODUCT_NAME)/
# gzip --best $(_DEB_ROOT)/usr/share/doc/$(_PRODUCT_NAME)/changelog.Debian
cp ../files/* ${_UTT_DIR}/share/doc/${_PRODUCT_NAME}/
cp ../common/utt_make_config.pl ${_UTT_DIR}/bin/
chmod 755 ${_UTT_DIR}/bin/utt_make_config.pl
# # next we make man/doc archives
# gzip --best $(_DEB_ROOT)/usr/share/man/man1/$(_PRODUCT_NAME).1
find $(_DEB_FROOT) -type d | xargs chmod 755 # this is necessary on Debian Woody, don't ask me why
# finally, we buid deb package
fakeroot dpkg-deb --build $(_DEB_FROOT)
mv $(_DEB_FROOT).deb $(_PRODUCT_NAME)_$(_UTT_VER)-$(_UTT_REL).all.deb
rm -rf ${_DEB_FROOT}
.PHONY: make_control
make_control:
echo "Package: $(_PRODUCT_NAME)" > control
echo "Version: $(_UTT_VER)" >> control
echo "Section: web" >> control
echo "Priority: optional" >> control
echo "Architecture: all" >> control
echo "Essential: no" >> control
echo "Depends: " >> control
# here we read this information from file ../common/requirements.def
#libwww-perl, acme-base (>= 1.2) <= wymagania pakietowe
echo "Pre-Depends: perl" >> control
echo "Maintainer: Adam Mickiewicz University" >> control
echo "Provides: $(_PRODUCT_NAME)" >> control
echo -n "Description: " >> control
cat ../common/description.def >> control
.PHONY: make_postinst
make_postinst:
echo "#!/bin/sh" > postinst
echo "$(_INSTALL_DIR)/create_utt_config.pl" >> postinst
echo "rm -f $(_INSTALL_DIR)/create_utt_config.pl" >> postinst
.PHONY: make_prerm
make_prerm:
echo "#!/bin/sh" > prerm

3
dist/deb/README vendored Normal file
View File

@ -0,0 +1,3 @@
This directory contains files necessery to create deb package.
apt-get install dpkg-dev debhelper devscripts fakeroot linda

8
dist/files/COPYRIGHT vendored Normal file
View File

@ -0,0 +1,8 @@
Copyright (C) 2005 - 2008 Tomasz Obrebski, Michal Stolarski, Justyna Walkowska, Pawel Konieczka
Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.2
or any later version published by the Free Software Foundation;
with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
Texts. A copy of the license is included in the section entitled GNU
Free Documentation License.

264
dist/files/LICENCE vendored Normal file
View File

@ -0,0 +1,264 @@
GNU Free Documentation License
Version 1.2, November 2002
Copyright (c) 2000,2001,2002 Free Software Foundation, Inc.
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
0. PREAMBLE
The purpose of this License is to make a manual, textbook, or other functional and
useful document free in the sense of freedom: to assure everyone the effective freedom
to copy and redistribute it, with or without modifying it, either commercially or noncommercially.
Secondarily, this License preserves for the author and publisher a way
to get credit for their work, while not being considered responsible for modifications
made by others.
This License is a kind of “copyleft”, which means that derivative works of the document
must themselves be free in the same sense. It complements the GNU General Public
License, which is a copyleft license designed for free software.
We have designed this License in order to use it for manuals for free software, because
free software needs free documentation: a free program should come with manuals
providing the same freedoms that the software does. But this License is not limited to
software manuals; it can be used for any textual work, regardless of subject matter or
whether it is published as a printed book. We recommend this License principally for
works whose purpose is instruction or reference.
1. APPLICABILITY AND DEFINITIONS
This License applies to any manual or other work, in any medium, that contains a
notice placed by the copyright holder saying it can be distributed under the terms
of this License. Such a notice grants a world-wide, royalty-free license, unlimited in
duration, to use that work under the conditions stated herein. The “Document”,
below, refers to any such manual or work. Any member of the public is a licensee, and
is addressed as “you”. You accept the license if you copy, modify or distribute the work
in a way requiring permission under copyright law.
A “Modified Version” of the Document means any work containing the Document or
a portion of it, either copied verbatim, or with modifications and/or translated into
another language.
A “Secondary Section” is a named appendix or a front-matter section of the Document
that deals exclusively with the relationship of the publishers or authors of the Document
to the Documents overall subject (or to related matters) and contains nothing that
could fall directly within that overall subject. (Thus, if the Document is in part a
textbook of mathematics, a Secondary Section may not explain any mathematics.) The
relationship could be a matter of historical connection with the subject or with related
matters, or of legal, commercial, philosophical, ethical or political position regarding
them.
The “Invariant Sections” are certain Secondary Sections whose titles are designated, as
being those of Invariant Sections, in the notice that says that the Document is released
under this License. If a section does not fit the above definition of Secondary then it is
not allowed to be designated as Invariant. The Document may contain zero Invariant
Sections. If the Document does not identify any Invariant Sections then there are none.
The “Cover Texts” are certain short passages of text that are listed, as Front-Cover
Texts or Back-Cover Texts, in the notice that says that the Document is released under
this License. A Front-Cover Text may be at most 5 words, and a Back-Cover Text may
be at most 25 words.
A “Transparent” copy of the Document means a machine-readable copy, represented
in a format whose specification is available to the general public, that is suitable for
revising the document straightforwardly with generic text editors or (for images composed
of pixels) generic paint programs or (for drawings) some widely available drawing
editor, and that is suitable for input to text formatters or for automatic translation to
a variety of formats suitable for input to text formatters. A copy made in an otherwise
Transparent file format whose markup, or absence of markup, has been arranged to
thwart or discourage subsequent modification by readers is not Transparent. An image
format is not Transparent if used for any substantial amount of text. A copy that is
not “Transparent” is called “Opaque”.
Examples of suitable formats for Transparent copies include plain ascii without
markup, Texinfo input format, LaTEX input format, SGML or XML using a publicly
available DTD, and standard-conforming simple HTML, PostScript or PDF designed
for human modification. Examples of transparent image formats include PNG, XCF
and JPG. Opaque formats include proprietary formats that can be read and edited
only by proprietary word processors, SGML or XML for which the DTD and/or
processing tools are not generally available, and the machine-generated HTML,
PostScript or PDF produced by some word processors for output purposes only.
The “Title Page” means, for a printed book, the title page itself, plus such following
pages as are needed to hold, legibly, the material this License requires to appear in the
title page. For works in formats which do not have any title page as such, “Title Page”
means the text near the most prominent appearance of the works title, preceding the
beginning of the body of the text.
A section “Entitled XYZ” means a named subunit of the Document whose title either
is precisely XYZ or contains XYZ in parentheses following text that translates XYZ in
another language. (Here XYZ stands for a specific section name mentioned below, such
as “Acknowledgements”, “Dedications”, “Endorsements”, or “History”.) To “Preserve
the Title” of such a section when you modify the Document means that it remains a
section “Entitled XYZ” according to this definition.
The Document may include Warranty Disclaimers next to the notice which states that
this License applies to the Document. These Warranty Disclaimers are considered to
be included by reference in this License, but only as regards disclaiming warranties:
any other implication that these Warranty Disclaimers may have is void and has no
effect on the meaning of this License.
2. VERBATIM COPYING
You may copy and distribute the Document in any medium, either commercially or
noncommercially, provided that this License, the copyright notices, and the license
notice saying this License applies to the Document are reproduced in all copies, and
that you add no other conditions whatsoever to those of this License. You may not use
technical measures to obstruct or control the reading or further copying of the copies
you make or distribute. However, you may accept compensation in exchange for copies.
If you distribute a large enough number of copies you must also follow the conditions
in section 3.
You may also lend copies, under the same conditions stated above, and you may publicly
display copies.
3. COPYING IN QUANTITY
If you publish printed copies (or copies in media that commonly have printed covers) of
the Document, numbering more than 100, and the Documents license notice requires
Cover Texts, you must enclose the copies in covers that carry, clearly and legibly, all
these Cover Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
the back cover. Both covers must also clearly and legibly identify you as the publisher
of these copies. The front cover must present the full title with all words of the title
equally prominent and visible. You may add other material on the covers in addition.
Copying with changes limited to the covers, as long as they preserve the title of the
Document and satisfy these conditions, can be treated as verbatim copying in other
respects.
If the required texts for either cover are too voluminous to fit legibly, you should put
the first ones listed (as many as fit reasonably) on the actual cover, and continue the
rest onto adjacent pages.
If you publish or distribute Opaque copies of the Document numbering more than 100,
you must either include a machine-readable Transparent copy along with each Opaque
copy, or state in or with each Opaque copy a computer-network location from which
the general network-using public has access to download using public-standard network
protocols a complete Transparent copy of the Document, free of added material. If
you use the latter option, you must take reasonably prudent steps, when you begin
distribution of Opaque copies in quantity, to ensure that this Transparent copy will
remain thus accessible at the stated location until at least one year after the last time
you distribute an Opaque copy (directly or through your agents or retailers) of that
edition to the public.
It is requested, but not required, that you contact the authors of the Document well
before redistributing any large number of copies, to give them a chance to provide you
with an updated version of the Document.
4. MODIFICATIONS
You may copy and distribute a Modified Version of the Document under the conditions
of sections 2 and 3 above, provided that you release the Modified Version under precisely
this License, with the Modified Version filling the role of the Document, thus licensing
distribution and modification of the Modified Version to whoever possesses a copy of
it. In addition, you must do these things in the Modified Version:
A. Use in the Title Page (and on the covers, if any) a title distinct from that of the
Document, and from those of previous versions (which should, if there were any,
be listed in the History section of the Document). You may use the same title as
a previous version if the original publisher of that version gives permission.
B. List on the Title Page, as authors, one or more persons or entities responsible for
authorship of the modifications in the Modified Version, together with at least five
of the principal authors of the Document (all of its principal authors, if it has fewer
than five), unless they release you from this requirement.
C. State on the Title page the name of the publisher of the Modified Version, as the
publisher.
D. Preserve all the copyright notices of the Document.
E. Add an appropriate copyright notice for your modifications adjacent to the other
copyright notices.
F. Include, immediately after the copyright notices, a license notice giving the public
permission to use the Modified Version under the terms of this License, in the form
shown in the Addendum below.
G. Preserve in that license notice the full lists of Invariant Sections and required Cover
Texts given in the Documents license notice.
H. Include an unaltered copy of this License.
I. Preserve the section Entitled “History”, Preserve its Title, and add to it an item
stating at least the title, year, new authors, and publisher of the Modified Version
as given on the Title Page. If there is no section Entitled “History” in the Document,
create one stating the title, year, authors, and publisher of the Document
as given on its Title Page, then add an item describing the Modified Version as
stated in the previous sentence.
J. Preserve the network location, if any, given in the Document for public access to
a Transparent copy of the Document, and likewise the network locations given in
the Document for previous versions it was based on. These may be placed in the
“History” section. You may omit a network location for a work that was published
at least four years before the Document itself, or if the original publisher of the
version it refers to gives permission.
K. For any section Entitled “Acknowledgements” or “Dedications”, Preserve the Title
of the section, and preserve in the section all the substance and tone of each of the
contributor acknowledgements and/or dedications given therein.
L. Preserve all the Invariant Sections of the Document, unaltered in their text and
in their titles. Section numbers or the equivalent are not considered part of the
section titles.
M. Delete any section Entitled “Endorsements”. Such a section may not be included
in the Modified Version.
N. Do not retitle any existing section to be Entitled “Endorsements” or to conflict in
title with any Invariant Section.
O. Preserve any Warranty Disclaimers.
If the Modified Version includes new front-matter sections or appendices that qualify
as Secondary Sections and contain no material copied from the Document, you may at
your option designate some or all of these sections as invariant. To do this, add their
titles to the list of Invariant Sections in the Modified Versions license notice. These
titles must be distinct from any other section titles.
You may add a section Entitled “Endorsements”, provided it contains nothing but
endorsements of your Modified Version by various parties—for example, statements of
peer review or that the text has been approved by an organization as the authoritative
definition of a standard.
You may add a passage of up to five words as a Front-Cover Text, and a passage of up
to 25 words as a Back-Cover Text, to the end of the list of Cover Texts in the Modified
Version. Only one passage of Front-Cover Text and one of Back-Cover Text may be
added by (or through arrangements made by) any one entity. If the Document already
includes a cover text for the same cover, previously added by you or by arrangement
made by the same entity you are acting on behalf of, you may not add another; but
you may replace the old one, on explicit permission from the previous publisher that
added the old one.
The author(s) and publisher(s) of the Document do not by this License give permission
to use their names for publicity for or to assert or imply endorsement of any Modified
Version.
5. COMBINING DOCUMENTS
You may combine the Document with other documents released under this License,
under the terms defined in section 4 above for modified versions, provided that you
include in the combination all of the Invariant Sections of all of the original documents,
unmodified, and list them all as Invariant Sections of your combined work in its license
notice, and that you preserve all their Warranty Disclaimers.
The combined work need only contain one copy of this License, and multiple identical
Invariant Sections may be replaced with a single copy. If there are multiple Invariant
Sections with the same name but different contents, make the title of each such section
unique by adding at the end of it, in parentheses, the name of the original author or
publisher of that section if known, or else a unique number. Make the same adjustment
to the section titles in the list of Invariant Sections in the license notice of the combined
work.
In the combination, you must combine any sections Entitled “History” in the various
original documents, forming one section Entitled “History”; likewise combine any
sections Entitled “Acknowledgements”, and any sections Entitled “Dedications”. You
must delete all sections Entitled “Endorsements.”
6. COLLECTIONS OF DOCUMENTS
You may make a collection consisting of the Document and other documents released
under this License, and replace the individual copies of this License in the various
documents with a single copy that is included in the collection, provided that you
follow the rules of this License for verbatim copying of each of the documents in all
other respects.
You may extract a single document from such a collection, and distribute it individually
under this License, provided you insert a copy of this License into the extracted
document, and follow this License in all other respects regarding verbatim copying of
that document.
7. AGGREGATION WITH INDEPENDENT WORKS
A compilation of the Document or its derivatives with other separate and independent
documents or works, in or on a volume of a storage or distribution medium, is called
an “aggregate” if the copyright resulting from the compilation is not used to limit the
legal rights of the compilations users beyond what the individual works permit. When
the Document is included in an aggregate, this License does not apply to the other
works in the aggregate which are not themselves derivative works of the Document.
If the Cover Text requirement of section 3 is applicable to these copies of the Document,
then if the Document is less than one half of the entire aggregate, the Documents Cover
Texts may be placed on covers that bracket the Document within the aggregate, or the
electronic equivalent of covers if the Document is in electronic form. Otherwise they
must appear on printed covers that bracket the whole aggregate.
8. TRANSLATION
Translation is considered a kind of modification, so you may distribute translations
of the Document under the terms of section 4. Replacing Invariant Sections with
translations requires special permission from their copyright holders, but you may
include translations of some or all Invariant Sections in addition to the original versions
of these Invariant Sections. You may include a translation of this License, and all the
license notices in the Document, and any Warranty Disclaimers, provided that you
also include the original English version of this License and the original versions of
those notices and disclaimers. In case of a disagreement between the translation and
the original version of this License or a notice or disclaimer, the original version will
prevail.
If a section in the Document is Entitled “Acknowledgements”, “Dedications”, or “History”,
the requirement (section 4) to Preserve its Title (section 1) will typically require
changing the actual title.
9. TERMINATION
You may not copy, modify, sublicense, or distribute the Document except as expressly
provided for under this License. Any other attempt to copy, modify, sublicense or
distribute the Document is void, and will automatically terminate your rights under
this License. However, parties who have received copies, or rights, from you under this
License will not have their licenses terminated so long as such parties remain in full
compliance.
10. FUTURE REVISIONS OF THIS LICENSE
The Free Software Foundation may publish new, revised versions of the GNU Free
Documentation License from time to time. Such new versions will be similar in spirit
to the present version, but may differ in detail to address new problems or concerns.
See http://www.gnu.org/copyleft/.
Each version of the License is given a distinguishing version number. If the Document
specifies that a particular numbered version of this License “or any later version”
applies to it, you have the option of following the terms and conditions either of that
specified version or of any later version that has been published (not as a draft) by
the Free Software Foundation. If the Document does not specify a version number of
this License, you may choose any version ever published (not as a draft) by the Free
Software Foundation.

51
dist/files/README vendored Normal file
View File

@ -0,0 +1,51 @@
General information
*********************
UAM Text Tools (UTT) is a package of language processing tools
developed at Adam Mickiewicz University. Its functionality includes:
* tokenization
* dictionary-based morphological analysis
* heuristic morphological analysis of unknown words
* spelling correction
* pattern search
* sentence splitting
* generation of concordance tables
The toolkit is destined for processing of raw (not annotated)
unrestricted text for any conceivable purpose.
Installation
**************
1) unpack the UTT tar archive
2) in the same directory, unpack the tar archives of all UTT dictionary modules you have
3) run
make install
in the root directory of the installation
4) add the bin directory to the PATH variable
Requirements
*************
* File::HomeDir
the Perl package File::HomeDir must be installed
(to install the package, run 'perl -MCPAN -e shell' and write
'install File::HomeDir' after the 'cpan>' prompt appears)
* flex
to run the ser component, flex must be installed in your system
* ruby
to run the tre component, ruby must be installed in your system
* locale pl_PL.iso-8852-2
the locales pl_PL.iso-8859-2 (pl_PL in short) must be installed
and set while using UTT with the Polish module. The text you
process with UTT must be encoded in iso-8859-2.

26
dist/spec/Makefile vendored Normal file
View File

@ -0,0 +1,26 @@
# this makefile will build rpm
DIR=$(shell pwd)
ifndef UTT_DIST_DIR
UTT_DIST_DIR=${DIR}
endif
_RPM_FROOT=${DIR}/rmp_root
_UTT_ROOT=${_RPM_FROOT}/usr/local
# default task
.PHONY: rpm
rpm:
if test -d ${_RPM_FROOT}; then rm -fr ${_RMP_FROOT}; fi
mkdir -p ${_UTT_ROOT}
cp -rf ${UTT_DIST_DIR}/* ${_UTT_ROOT}/
mkdir -p ${_UTT_ROOT}/cnf
mv ${_UTT_ROOT}/etc/utt/*.conf ${_UTT_ROOT}/cnf/
${DIR}/../common/prepare_conf.sh ${_UTT_ROOT}/cnf ${_UTT_ROOT}/etc/utt \\\/usr\\\/local
rm -rf ${_UTT_ROOT}/cnf
cp ../files/* ${_UTT_ROOT}/
cd ${_RPM_FROOT}; rpmbuild -bb ${DIR}/utt.spec
rm -rf ${_RPM_FROOT}

16
dist/spec/README vendored Normal file
View File

@ -0,0 +1,16 @@
This directory contains files necessary to produce rpm package.
First, you must have variable UTT_DIST_DIR defined properly.
This variable should be defined by main Makefile.
To create rpm file, just write:
make
The created package should appears in default RPM directory.
(in my computer it is /usr/src/redhat/RPMS/$arch/ directory)
To determine the rpm output directory, execute:
rpm --showrc | grep _rmpdir
You need access privilage to this directory to create rmp.

81
dist/spec/utt.spec vendored Normal file
View File

@ -0,0 +1,81 @@
#
# Default RPM header.
#
# START_RPM_STD_HEADER:
#
# RPM properties
#
%define _this_product UAM Text Tools
%define _this_summary Some tools for text processing
%define _this_name utt
%define _this_version %(cat ../../common/version.def)
%define _this_release %(cat ../../common/release.def)
%define _this_copyright Adam Mickiewicz University, Poland
#
# Default RPM header.
#
# END_RPM_STD_HEADER:
# --------------------------------------------------------------------
Summary: %_this_summary
Name: %_this_name
Version: %_this_version
Release: %_this_release
#Copyright: %_this_copyright
License: GPL
Group: Development/Tools
URL: http://utt.amu.edu.pl
Vendor: Adam Mickiewicz University
BuildRoot: %(pwd)
#BuildArch: i586
# requirements for utt application
#AutoReq: no
#AutoReqProv: no
#Requires: glibc >= 2.1.3
#Requires: libgcc1 >= 3.0
#Requires: libgcc >= 3.0
#Requires: libstdc++6 >= 3.4.1
#Requires: libstdc++ >= 3.4.1
%description
%(cat ../../common/description.def)
%description -l pl
%(cat ../../common/description.pl.def)
# -------------------------------------------------------------
# preparing sources for compilation
%prep
# source compilation
%build
# rpm building
%install
# cleaning after rpm build
%clean
# -------------------------------------------------------------
#before installation
%pre
#after installation
%post
# we need to create utt.conf file
#before uninstallation
%preun
#after uninstallation
%postun
# -------------------------------------------------------------
%files
%defattr(-,root,root)
/*

74
dist/struktura.txt vendored Normal file
View File

@ -0,0 +1,74 @@
/usr/local/bin/aut2fsa
/usr/local/bin/canonize
/usr/local/bin/compiledic
/usr/local/bin/con
/usr/local/bin/cor
/usr/local/bin/dgc
/usr/local/bin/dgp
/usr/local/bin/fla
/usr/local/bin/fsm2aut
/usr/local/bin/go ?
/usr/local/bin/Makefile.go ?
/usr/local/bin/gph
/usr/local/bin/grp
/usr/local/bin/gue
/usr/local/bin/kot
/usr/local/bin/lem
/usr/local/bin/mar
/usr/local/bin/sen
/usr/local/bin/sen-nl
/usr/local/bin/ser
/usr/local/bin/tok
/usr/local/bin/tre.rb
/usr/local/bin/unfla
/usr/local/bin/ipi.tag2re
/usr/local/bin/uam.tag2re
# R.D. sugeruje /etc/utt
# lokalnie: ~/.utt lub (trendy) ~/.config/utt
/usr/local/etc/utt/con.conf
/usr/local/etc/utt/cor.conf
/usr/local/etc/utt/dgc.conf
/usr/local/etc/utt/fla.conf
/usr/local/etc/utt/grp.conf
/usr/local/etc/utt/gue.conf
/usr/local/etc/utt/kor.conf
/usr/local/etc/utt/kot.conf
/usr/local/etc/utt/lem.conf
/usr/local/etc/utt/mar.conf
/usr/local/etc/utt/sen.conf
/usr/local/etc/utt/ser.conf
/usr/local/etc/utt/tok.conf
/usr/local/etc/utt/unfla.conf
/usr/local/etc/utt/utt.conf
/usr/local/share/utt/weights.kor
/usr/local/share/utt/cats.dgc
/usr/local/share/utt/gram.dgc
# lokalnie: wszystkie pliki beda umieszczone w ~/.local/share/utt/
/usr/local/share/utt/pl_PL.ISO-8859-2/pl_PL.ISO-8859-2.sym
/usr/local/share/utt/pl_PL.ISO-8859-2/cor.bin
/usr/local/share/utt/pl_PL.ISO-8859-2/gue.bin
/usr/local/share/utt/pl_PL.ISO-8859-2/lem.bin
/usr/local/share/utt/pl_PL.UTF-8/pl_PL.UTF-8.sym
/usr/local/share/utt/pl_PL.UTF-8/cor.bin
/usr/local/share/utt/pl_PL.UTF-8/gue.bin
/usr/local/share/utt/pl_PL.UTF-8/lem.bin
# lokalnie: wszystkie slowniki beda umieszczone w ~/.local/share/utt/
/usr/local/lib/utt/ser.l.template
/usr/local/lib/utt/terms.m4
/usr/local/lib/utt/seg.rb
/usr/local/lib/attr.pm
# lokalnie: wszystkie pliki beda umieszczone w ~/.local/lib/utt/
/usr/local/share/doc/utt/FAQ
/usr/local/share/doc/utt/COPYRIGHT
/usr/local/share/doc/utt/NEWS
/usr/local/share/doc/utt/README
/usr/local/share/info/utt.info.gz
/usr/local/share/man/man3/utt.gz
~/.utt/*.conf (wszystko z /usr/local/etc/utt)

5
dist/tarball/INSTALL vendored Normal file
View File

@ -0,0 +1,5 @@
Here you can find some information about how to install utt.
You should just unpack archive and then execute
make test
make install

48
dist/tarball/Makefile vendored Normal file
View File

@ -0,0 +1,48 @@
# This makefile allows build tarball distribution for utt.
#
# Some variables
#
DIR=$(shell pwd)
# Directory with utt binaries
ifndef UTT_DIST_DIR
UTT_DIST_DIR=${DIR}
endif
# Where put result
ifndef UTT_DIST_OUTPUT
UTT_DIST_OUTPUT=${DIR}
endif
# Common info about version and release
_UTT_VER=$(shell cat ../common/version.def)
_UTT_REL=$(shell cat ../common/release.def)
# Temp vars
_TARBALL_ROOT=$(DIR)/utt-$(_UTT_VER).$(_UTT_REL)
_TAR_FILE_NAME=utt.$(_UTT_VER)_$(_UTT_REL).tar.gz
#defualt task
.PHONY: default
default:
@echo Build directory: ${UTT_DIST_DIR}
@echo Output directory for tarball: ${UTT_DIST_OUTPUT}
mkdir -p ${_TARBALL_ROOT}
cp -fr ${UTT_DIST_DIR}/* ${_TARBALL_ROOT}
@# we add some extra files
@# config files
mkdir -p ${_TARBALL_ROOT}/cnf
mv ${_TARBALL_ROOT}/etc/utt/*.conf ${_TARBALL_ROOT}/cnf/
${DIR}/../common/prepare_conf.sh ${_TARBALL_ROOT}/cnf ${_TARBALL_ROOT}/etc/utt \~\\\/.local
rm -rf ${_TARBALL_ROOT}/cnf
cp ./INSTALL ${_TARBALL_ROOT}/
cp ./Makefile.tarball ${_TARBALL_ROOT}/Makefile
cp ../files/* ${_TARBALL_ROOT}/
tar -czf ${UTT_DIST_OUTPUT}/${_TAR_FILE_NAME} utt*
rm -rf ${_TARBALL_ROOT}

18
dist/tarball/Makefile.tarball vendored Normal file
View File

@ -0,0 +1,18 @@
UTT_LIB_DIR=${HOME}/.local/lib/utt
UTT_SHARE_DIR=${HOME}/.local/share/utt
UTT_CONF_DIR=${HOME}/.utt
.PHONY: install
install:
mkdir -p ${UTT_LIB_DIR}
cp -r lib/utt/* ${UTT_LIB_DIR}
mkdir -p ${UTT_SHARE_DIR}
cp -r share/utt/* ${UTT_SHARE_DIR}
mkdir -p ${UTT_CONF_DIR}
cp -r etc/utt/* ${UTT_CONF_DIR}
.PHONY: uninstall
uninstall:
rm -r ${UTT_LIB_DIR}
rm -r ${UTT_SHARE_DIR}
rm -r ${CONF_DIR}

6
dist/tarball/README vendored Normal file
View File

@ -0,0 +1,6 @@
This directory contains Makefile, which allows to create tar.gz archive.
To create archive, just write:
make
Warning: you need define variable UTT_DIST_DIR.

45
doc/Makefile Normal file
View File

@ -0,0 +1,45 @@
include ../config.mak
all: utt.info utt.dvi utt.html utt.pdf utt.ps
utt.info: utt.texinfo
$(MAKEINFO) $< -o $@
utt.dvi: utt.texinfo
$(TEXI2DVI) --build=clean $< -o $@
utt.html: utt.texinfo
$(MAKEINFO) --html --no-split $< -o $@
utt.pdf: utt.texinfo
$(TEXI2PDF) --build=clean $< -o $@
utt.ps: utt.dvi
$(DVIPS) $< -o $@
.PHONY: install
install:
ifdef SHARE_DIR
install -m 0644 utt.info $(SHARE_DIR)/info
endif
ifdef DOC_DIR
install -m 0644 utt.dvi $(DOC_DIR)
install -m 0644 utt.html $(DOC_DIR)
install -m 0644 utt.pdf $(DOC_DIR)
install -m 0644 utt.ps $(DOC_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef SHARE_DIR
rm $(SHARE_DIR)/info/utt.info
endif
ifdef DOC_DIR
rm $(DOC_DIR)/utt.dvi
rm $(DOC_DIR)/utt.html
rm $(DOC_DIR)/utt.pdf
rm $(DOC_DIR)/utt.ps
endif
clean:
rm utt.info utt.dvi utt.html utt.pdf utt.ps || true

19
lib/Makefile Normal file
View File

@ -0,0 +1,19 @@
include ../config.mak
.PHONY: install
install:
ifdef LIB_DIR
install -m 0755 attr.pm $(LIB_DIR)
install -m 0755 seg.rb $(LIB_DIR)
install -m 0755 ser.l.template $(LIB_DIR)
install -m 0755 terms.m4 $(LIB_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef LIB_DIR
rm $(LIB_DIR)/attr.pm
rm $(LIB_DIR)/seg.rb
rm $(LIB_DIR)/ser.l.template
rm $(LIB_DIR)/terms.m4
endif

133
lib/attr.pm Normal file
View File

@ -0,0 +1,133 @@
package attr;
use locale;
use strict;
use Data::Dumper;
our $pos_re = qr/(?:[[:upper:]]+)/;
our $attr_re = qr/(?:[[:upper:]]+)/;
our $val_re = qr/(?:[[:lower:][:digit:]+?!*-]|<[^>\n]+>)/;
our $av_re = qr/(?:$attr_re$val_re+)/;
our $avlist_re = qr/(?:$av_re+)/;
our $cat_re = qr/(?:$pos_re(?:\/$avlist_re)?)/;
sub match(\@\@)
{
my ($cat1,$avs1)= @{shift @_};
my ($cat2,$avs2)= @{shift @_};
if($cat1 ne $cat2 && $cat1 ne '*' && $cat2 ne '*')
{
return 0;
}
else
{
ATTR:for my $attr (keys %$avs1)
{
if(exists $avs2->{$attr})
{
for my $val (keys %{$avs1->{$attr}})
{
next ATTR if $avs2->{$attr}->{$val};
}
return 0;
last ATTR;
}
}
}
return 1;
}
sub agree(\@\@$)
{
my $val1 = $_[0]->[1]->{$_[2]};
my $val2 = $_[1]->[1]->{$_[2]};
return 1 if !$val1 || !$val2;
for my $v (keys %$val1)
{
return 1 if exists $val2->{$v};
}
return 0;
}
# funkcja parse
# arg: deskrypcja
# warto¶æ: referencja do tablicy [<cat>, <avs>],
# gdzie <avs> jest referencja do hasza, zawierajacego pary
# atrybut=>hasz warto¶ci (pary warto¶æ=>1), czyli np.
# [
# 'ADJ',
# {
# 'KOLEDZY' => {
# '<alojzy>' => 1,
# '<karol>' => 1,
# '<jan>' => 1
# },
# 'C' => {
# 'p' => 1,
# 'a' => 1,
# 'i' => 1
# },
# 'N' => {
# 'p' => 1
# }
# }
# ];
sub parse ($)
{
my ($dstr)=@_;
my $avs={};
my ($cat,$attrlist) = split '/', $dstr;
ATTR:
# while( $attrlist =~ /([[:upper:]]+)((?:[[:lower:][:digit:]+?!*-]|<[^>\n]+>)+)/g )
while( $attrlist =~ /($attr_re)($val_re+)/g )
{
my ($attrstr,$valstr)=($1,$2);
my %vals;
while($valstr =~ /$val_re/g)
{
my $val = $&;
next ATTR if $val eq '*';
$val =~ s/^<([[:lower:]])>$/$1/;
$vals{$val}=1;
}
$avs->{$attrstr} = \%vals; # dlaczego to dziala? %vals jest lokalne
}
[$cat, $avs];
}
# funkcja unparse
# arg: jak warto¶æ parse
# warto¶æ: deskrypcja - napis
sub unparse (\@)
{
my ($cat,$avs)= @{shift @_};
my $dstr=$cat;
my @attrs = keys %$avs;
if(@attrs)
{
$dstr .= '/';
for my $attr ( sort @attrs )
{
$dstr .= $attr . (join '', sort keys %{$avs->{$attr}});
}
}
$dstr;
}
sub canonize ($)
{
unparse @{parse @_[0]} ;
}
1;

31
lib/seg.rb Normal file
View File

@ -0,0 +1,31 @@
class Seg
def initialize(s="")
@line=s
self
end
def to_s
@line.chomp
end
def set(s)
@line=s
self
end
def field(key)
if key.class==Fixnum
@line.split[key-1]
elsif key.class==String
@line =~ /\s#{key}:(\S+)/; $1
end
end
alias [] field
def fields
@line.split
end
end

30
lib/ser.l.template Normal file
View File

@ -0,0 +1,30 @@
%{
#include<string.h>
int n=0;
%}
%%
PATTERN {
int start, end, len;
char *lastseg, *tmp;
if(yytext[yyleng-1]!='\n')
{fprintf(stderr,"ser: pattern matches incomplete line\n"); exit(1);}
n++;
sscanf(yytext,"%d %d",&start,&len);
yytext[yyleng-1]='\0';
if(tmp=strrchr(yytext,'\n'))
{
lastseg=tmp+1;
sscanf(lastseg,"%d %d", &end, &len);
}
else
end=start;
yytext[yyleng-1]='\n';
printf("%04d 00 BOM * ser:%d\n",start,n);
ECHO;
printf("%04d 00 EOM * ser:%d\n",end+len,n);
}
.*\n DEFAULTACTION;

52
lib/terms.m4 Normal file
View File

@ -0,0 +1,52 @@
divert(-1)
#--------------------------------------------------------------------------
# Macros defined here may be used in pattern specifications
# You can modify this file according to your needs.
# ENDOFSEGMENT and MORFIELD are macros expanded to, respectively,
# end of segment marker (dependes on the format: flattened or not)
# and the name of the annotation field containing morphological
# information (standard value is 'lem'). These values are controlled
# by programs using this file to expand search patterns (ser, grp, ...).
# seg(type,form,annotation)
define(`seg',`(\s*((\d+\s+)(\d+\s+)?)?dnl
ifelse($1, `',`(\S+)', `($1)')\s+dnl
ifelse($2, `',`(\S+)', `($2)')dnl
ifelse($3, `',`((\s+\S+)*)', `(\s+($3))')\s*ENDOFSEGMENT)')
# form(f) - segment containing the form f
define(`form', `seg(,$1)')
# field(f) segment containing auxiliary field f
define(`field', `seg(,,`(\S+\s+)*($1)(\s+\S+)*')')
# word, space, punct, number segments (assuming W, S, P, N segment types)
define(`space', `seg(`S',`$1')')
define(`word', `seg(`W',`$1')')
define(`punct', `seg(`P',`$1')')
define(`number', `seg(`N',`$1')')
# macros specific to PMDB format
define(`lexeme', `field(`MORFIELD:(\S+;)?$1,\S+')')
define(`cat', `field(`MORFIELD:\S+,$1([,;]\S+)?')')
# Place here your macro definitions.
#--------------------------------------------------------------------------
divert(0)

31
share/Makefile Normal file
View File

@ -0,0 +1,31 @@
include ../config.mak
.PHONY: install
install:
ifdef LANG_DIR
install -d $(LANG_DIR)/pl_PL.ISO-8859-2
install -d $(LANG_DIR)/pl_PL.UTF-8
install -m 0644 pl_PL.ISO-8859-2/cor.bin $(LANG_DIR)/pl_PL.ISO-8859-2
install -m 0644 pl_PL.ISO-8859-2/gue.bin $(LANG_DIR)/pl_PL.ISO-8859-2
install -m 0644 pl_PL.ISO-8859-2/lem.bin $(LANG_DIR)/pl_PL.ISO-8859-2
install -m 0644 pl_PL.ISO-8859-2/pl_PL.ISO-8859-2.sym $(LANG_DIR)/pl_PL.ISO-8859-2
install -m 0644 pl_PL.UTF-8/lem.bin $(LANG_DIR)/pl_PL.UTF-8
install -m 0644 cats.dgc $(LANG_DIR)
install -m 0644 gram.dgc $(LANG_DIR)
install -m 0644 weights.kor $(LANG_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef LANG_DIR
rm $(LANG_DIR)/weights.kor
rm $(LANG_DIR)/gram.dgc
rm $(LANG_DIR)/cats.dgc
rm $(LANG_DIR)/pl_PL.UTF-8/lem.bin
rm $(LANG_DIR)/pl_PL.ISO-8859-2/pl_PL.ISO-8859-2.sym
rm $(LANG_DIR)/pl_PL.ISO-8859-2/lem.bin
rm $(LANG_DIR)/pl_PL.ISO-8859-2/gue.bin
rm $(LANG_DIR)/pl_PL.ISO-8859-2/cor.bin
rmdir $(LANG_DIR)/pl_PL.ISO-8859-2
rmdir $(LANG_DIR)/pl_PL.UTF-8
endif

696
share/cats.dgc Normal file
View File

@ -0,0 +1,696 @@
ADJ/DcNpCaGp
ADJ/DcNpCd
ADJ/DcNpCgl
ADJ/DcNpCi
ADJ/DcNpCnavGaifn
ADJ/DcNpCnvGp
ADJ/DcNsCaGi
ADJ/DcNsCaGpa
ADJ/DcNsCaiGf
ADJ/DcNsCavGf
ADJ/DcNsCdGpain
ADJ/DcNsCgdlGf
ADJ/DcNsCgGpain
ADJ/DcNsCilGpain
ADJ/DcNsCnavGn
ADJ/DcNsCnvGpai
ADJ/DpNpCaGp
ADJ/DpNpCd
ADJ/DpNpCgl
ADJ/DpNpCi
ADJ/DpNpCnavGaifn
ADJ/DpNpCnvGp
ADJ/DpNsCaGi
ADJ/DpNsCaGpa
ADJ/DpNsCaiGf
ADJ/DpNsCavGf
ADJ/DpNsCdGpain
ADJ/DpNsCgdlGf
ADJ/DpNsCgGpain
ADJ/DpNsCilGpain
ADJ/DpNsCnavGn
ADJ/DpNsCnvGpai
ADJ/DsNpCaGp
ADJ/DsNpCd
ADJ/DsNpCgl
ADJ/DsNpCi
ADJ/DsNpCnavGaifn
ADJ/DsNpCnvGp
ADJ/DsNsCaGi
ADJ/DsNsCaGpa
ADJ/DsNsCaiGf
ADJ/DsNsCavGf
ADJ/DsNsCdGpain
ADJ/DsNsCgdlGf
ADJ/DsNsCgGpain
ADJ/DsNsCilGpain
ADJ/DsNsCnavGn
ADJ/DsNsCnvGpai
ADJNUM/NpCaGp
ADJNUM/NpCd
ADJNUM/NpCgl
ADJNUM/NpCi
ADJNUM/NpCnavGaifn
ADJNUM/NpCnvGp
ADJNUM/NsCaGi
ADJNUM/NsCaGpa
ADJNUM/NsCaiGf
ADJNUM/NsCavGf
ADJNUM/NsCdGpain
ADJNUM/NsCgdlGf
ADJNUM/NsCgGpain
ADJNUM/NsCilGpain
ADJNUM/NsCnavGn
ADJNUM/NsCnvGpai
ADJPAP/NpCaGp
ADJPAP/NpCd
ADJPAP/NpCgl
ADJPAP/NpCi
ADJPAP/NpCnavGaifn
ADJPAP/NpCnvGp
ADJPAP/NsCaGi
ADJPAP/NsCaGpa
ADJPAP/NsCaiGf
ADJPAP/NsCavGf
ADJPAP/NsCdGpain
ADJPAP/NsCgdlGf
ADJPAP/NsCgGpain
ADJPAP/NsCilGpain
ADJPAP/NsCnavGn
ADJPAP/NsCnvGpai
ADJPP/NpCaGp
ADJPP/NpCd
ADJPP/NpCgl
ADJPP/NpCi
ADJPP/NpCnavGaifn
ADJPP/NpCnvGp
ADJPP/NsCaGi
ADJPP/NsCaGpa
ADJPP/NsCaiGf
ADJPP/NsCavGf
ADJPP/NsCdGpain
ADJPP/NsCgdlGf
ADJPP/NsCgGpain
ADJPP/NsCilGpain
ADJPP/NsCnavGn
ADJPP/NsCnvGpai
ADJPRO/NpCaGp
ADJPRO/NpCd
ADJPRO/NpCgl
ADJPRO/NpCi
ADJPRO/NpCnavGaifn
ADJPRO/NpCnvGp
ADJPRO/NsCaGi
ADJPRO/NsCaGpa
ADJPRO/NsCaiGf
ADJPRO/NsCavGf
ADJPRO/NsCdGpain
ADJPRO/NsCgdlGf
ADJPRO/NsCgGpain
ADJPRO/NsCilGpain
ADJPRO/NsCnavGn
ADJPRO/NsCnvGpai
ADJPRO/ZdNpCaGp
ADJPRO/ZdNpCd
ADJPRO/ZdNpCgl
ADJPRO/ZdNpCi
ADJPRO/ZdNpCnavGaifn
ADJPRO/ZdNpCnvGp
ADJPRO/ZdNsCaGi
ADJPRO/ZdNsCaGpa
ADJPRO/ZdNsCaiGf
ADJPRO/ZdNsCavGf
ADJPRO/ZdNsCdGpain
ADJPRO/ZdNsCgdlGf
ADJPRO/ZdNsCgGpain
ADJPRO/ZdNsCilGpain
ADJPRO/ZdNsCnavGn
ADJPRO/ZdNsCnvGpai
ADJPRO/ZgNpCaGp
ADJPRO/ZgNpCd
ADJPRO/ZgNpCgl
ADJPRO/ZgNpCi
ADJPRO/ZgNpCnavGaifn
ADJPRO/ZgNpCnvGp
ADJPRO/ZgNsCaGi
ADJPRO/ZgNsCaGpa
ADJPRO/ZgNsCaiGf
ADJPRO/ZgNsCavGf
ADJPRO/ZgNsCdGpain
ADJPRO/ZgNsCgdlGf
ADJPRO/ZgNsCgGpain
ADJPRO/ZgNsCilGpain
ADJPRO/ZgNsCnavGn
ADJPRO/ZgNsCnvGpai
ADJPRO/ZiNpCaGp
ADJPRO/ZiNpCd
ADJPRO/ZiNpCgl
ADJPRO/ZiNpCi
ADJPRO/ZiNpCnavGaifn
ADJPRO/ZiNpCnvGp
ADJPRO/ZiNsCaGi
ADJPRO/ZiNsCaGpa
ADJPRO/ZiNsCaiGf
ADJPRO/ZiNsCavGf
ADJPRO/ZiNsCdGpain
ADJPRO/ZiNsCgdlGf
ADJPRO/ZiNsCgGpain
ADJPRO/ZiNsCilGpain
ADJPRO/ZiNsCnavGn
ADJPRO/ZiNsCnvGpai
ADJPRO/ZnNpCaGp
ADJPRO/ZnNpCd
ADJPRO/ZnNpCgl
ADJPRO/ZnNpCi
ADJPRO/ZnNpCnavGaifn
ADJPRO/ZnNpCnvGp
ADJPRO/ZnNsCaGi
ADJPRO/ZnNsCaGpa
ADJPRO/ZnNsCaiGf
ADJPRO/ZnNsCavGf
ADJPRO/ZnNsCdGpain
ADJPRO/ZnNsCgdlGf
ADJPRO/ZnNsCgGpain
ADJPRO/ZnNsCilGpain
ADJPRO/ZnNsCnavGn
ADJPRO/ZnNsCnvGpai
ADJPRO/ZqNpCaGp
ADJPRO/ZqNpCd
ADJPRO/ZqNpCgl
ADJPRO/ZqNpCi
ADJPRO/ZqNpCnavGaifn
ADJPRO/ZqNpCnvGp
ADJPRO/ZqNsCaGi
ADJPRO/ZqNsCaGpa
ADJPRO/ZqNsCaiGf
ADJPRO/ZqNsCavGf
ADJPRO/ZqNsCdGpain
ADJPRO/ZqNsCgdlGf
ADJPRO/ZqNsCgGpain
ADJPRO/ZqNsCilGpain
ADJPRO/ZqNsCnavGn
ADJPRO/ZqNsCnvGpai
ADJPRO/ZqrNpCaGp
ADJPRO/ZqrNpCd
ADJPRO/ZqrNpCgl
ADJPRO/ZqrNpCi
ADJPRO/ZqrNpCnavGaifn
ADJPRO/ZqrNpCnvGp
ADJPRO/ZqrNsCaGi
ADJPRO/ZqrNsCaGpa
ADJPRO/ZqrNsCaiGf
ADJPRO/ZqrNsCavGf
ADJPRO/ZqrNsCdGpain
ADJPRO/ZqrNsCgdlGf
ADJPRO/ZqrNsCgGpain
ADJPRO/ZqrNsCilGpain
ADJPRO/ZqrNsCnavGn
ADJPRO/ZqrNsCnvGpai
ADJPRO/ZsNpCaGp
ADJPRO/ZsNpCd
ADJPRO/ZsNpCgl
ADJPRO/ZsNpCi
ADJPRO/ZsNpCnavGaifn
ADJPRO/ZsNpCnvGp
ADJPRO/ZsNsCaGi
ADJPRO/ZsNsCaGpa
ADJPRO/ZsNsCaiGf
ADJPRO/ZsNsCavGf
ADJPRO/ZsNsCdGpain
ADJPRO/ZsNsCgdlGf
ADJPRO/ZsNsCgGpain
ADJPRO/ZsNsCilGpain
ADJPRO/ZsNsCnavGn
ADJPRO/ZsNsCnvGpai
ADJPRP/NpCaGp
ADJPRP/NpCd
ADJPRP/NpCgl
ADJPRP/NpCi
ADJPRP/NpCnavGaifn
ADJPRP/NpCnvGp
ADJPRP/NsCaGi
ADJPRP/NsCaGpa
ADJPRP/NsCaiGf
ADJPRP/NsCavGf
ADJPRP/NsCdGpain
ADJPRP/NsCgdlGf
ADJPRP/NsCgGpain
ADJPRP/NsCilGpain
ADJPRP/NsCnavGn
ADJPRP/NsCnvGpai
ADVANP
ADV/Dc
ADV/Dp
ADV/Ds
ADVNUM
ADVPRO
ADVPRO/Zd
ADVPRO/Zi
ADVPRO/Zn
ADVPRO/Zq
ADVPRO/Zqr
ADVPRO/Zr
ADVPRP
APP
BYC/Vb
BYC/VpMcNpP1Gaifn
BYC/VpMcNpP1Gp
BYC/VpMcNpP2Gaifn
BYC/VpMcNpP2Gp
BYC/VpMcNpP3Gaifn
BYC/VpMcNpP3Gp
BYC/VpMcNsP1Gf
BYC/VpMcNsP1Gpai
BYC/VpMcNsP2Gf
BYC/VpMcNsP2Gpai
BYC/VpMcNsP3Gf
BYC/VpMcNsP3Gn
BYC/VpMcNsP3Gpai
BYC/VpMdTaNpP1Gaifn
BYC/VpMdTaNpP1Gp
BYC/VpMdTaNpP2Gaifn
BYC/VpMdTaNpP2Gp
BYC/VpMdTaNpP3Gaifn
BYC/VpMdTaNpP3Gp
BYC/VpMdTaNsP1Gf
BYC/VpMdTaNsP1Gpai
BYC/VpMdTaNsP2Gf
BYC/VpMdTaNsP2Gpai
BYC/VpMdTaNsP3Gf
BYC/VpMdTaNsP3Gn
BYC/VpMdTaNsP3Gpai
BYC/VpMdTrfNpP1
BYC/VpMdTrfNpP2
BYC/VpMdTrfNpP3
BYC/VpMdTrfNsP1
BYC/VpMdTrfNsP2
BYC/VpMdTrfNsP3
BYC/VpMiNpP1
BYC/VpMiNpP2
BYC/VpMiNsP2
CONJ
EXCL
N/GaNpCa
N/GaNpCd
N/GaNpCg
N/GaNpCi
N/GaNpCl
N/GaNpCn
N/GaNpCv
N/GaNsCa
N/GaNsCd
N/GaNsCg
N/GaNsCi
N/GaNsCl
N/GaNsCn
N/GaNsCv
N/GfNpCa
N/GfNpCd
N/GfNpCg
N/GfNpCi
N/GfNpCl
N/GfNpCn
N/GfNpCv
N/GfNsCa
N/GfNsCd
N/GfNsCg
N/GfNsCi
N/GfNsCl
N/GfNsCn
N/GfNsCv
N/GiNpCa
N/GiNpCd
N/GiNpCg
N/GiNpCi
N/GiNpCl
N/GiNpCn
N/GiNpCv
N/GiNsCa
N/GiNsCd
N/GiNsCg
N/GiNsCi
N/GiNsCl
N/GiNsCn
N/GiNsCv
N/GnNpCa
N/GnNpCd
N/GnNpCg
N/GnNpCi
N/GnNpCl
N/GnNpCn
N/GnNpCv
N/GnNsCa
N/GnNsCd
N/GnNsCg
N/GnNsCi
N/GnNsCl
N/GnNsCn
N/GnNsCv
N/G?NpCa
N/G*NpCa
N/G?NpCd
N/G*NpCd
N/G?NpCg
N/G*NpCg
N/G?NpCi
N/G*NpCi
N/G?NpCl
N/G*NpCl
N/G?NpCn
N/G*NpCn
N/G?NpCv
N/G*NpCv
N/G?NsCa
N/G?NsCd
N/G?NsCg
N/G?NsCi
N/G?NsCl
N/G?NsCn
N/G?NsCv
N/GpNpCa
N/GpNpCd
N/GpNpCg
N/GpNpCi
N/GpNpCl
N/GpNpCn
N/GpNpCv
N/GpNsCa
N/GpNsCd
N/GpNsCg
N/GpNsCi
N/GpNsCl
N/GpNsCn
N/GpNsCv
NPRO/ZdGnNsCa
NPRO/ZdGnNsCd
NPRO/ZdGnNsCg
NPRO/ZdGnNsCi
NPRO/ZdGnNsCl
NPRO/ZdGnNsCn
NPRO/ZgGnNsCa
NPRO/ZgGnNsCd
NPRO/ZgGnNsCg
NPRO/ZgGnNsCi
NPRO/ZgGnNsCl
NPRO/ZgGnNsCn
NPRO/ZgGpNpCa
NPRO/ZgGpNpCd
NPRO/ZgGpNpCg
NPRO/ZgGpNpCi
NPRO/ZgGpNpCl
NPRO/ZgGpNpCn
NPRO/ZiGnNsCa
NPRO/ZiGnNsCd
NPRO/ZiGnNsCg
NPRO/ZiGnNsCi
NPRO/ZiGnNsCl
NPRO/ZiGnNsCn
NPRO/ZiGpNsCa
NPRO/ZiGpNsCd
NPRO/ZiGpNsCg
NPRO/ZiGpNsCi
NPRO/ZiGpNsCl
NPRO/ZiGpNsCn
NPRO/ZnGnNsCa
NPRO/ZnGnNsCd
NPRO/ZnGnNsCg
NPRO/ZnGnNsCi
NPRO/ZnGnNsCl
NPRO/ZnGnNsCn
NPRO/ZnGpNsCa
NPRO/ZnGpNsCd
NPRO/ZnGpNsCg
NPRO/ZnGpNsCi
NPRO/ZnGpNsCl
NPRO/ZnGpNsCn
NPRO/ZpGaifnNpCa
NPRO/ZpGaifnNpCd
NPRO/ZpGaifnNpCg
NPRO/ZpGaifnNpCi
NPRO/ZpGaifnNpCl
NPRO/ZpGaifnNpCn
NPRO/ZpGfNsCa
NPRO/ZpGfNsCd
NPRO/ZpGfNsCg
NPRO/ZpGfNsCi
NPRO/ZpGfNsCl
NPRO/ZpGfNsCn
NPRO/ZpGnNsCa
NPRO/ZpGnNsCd
NPRO/ZpGnNsCg
NPRO/ZpGnNsCi
NPRO/ZpGnNsCl
NPRO/ZpGnNsCn
NPRO/ZpG*NpCa
NPRO/ZpG*NpCd
NPRO/ZpG*NpCg
NPRO/ZpG*NpCi
NPRO/ZpG*NpCl
NPRO/ZpG*NpCn
NPRO/ZpG*NsCa
NPRO/ZpG*NsCd
NPRO/ZpG*NsCg
NPRO/ZpG*NsCi
NPRO/ZpG*NsCl
NPRO/ZpG*NsCn
NPRO/ZpGpaiNsCa
NPRO/ZpGpaiNsCd
NPRO/ZpGpaiNsCg
NPRO/ZpGpaiNsCi
NPRO/ZpGpaiNsCl
NPRO/ZpGpaiNsCn
NPRO/ZpGpNpCa
NPRO/ZpGpNpCd
NPRO/ZpGpNpCg
NPRO/ZpGpNpCi
NPRO/ZpGpNpCl
NPRO/ZpGpNpCn
NPRO/ZqGnNsCa
NPRO/ZqGnNsCd
NPRO/ZqGnNsCg
NPRO/ZqGnNsCi
NPRO/ZqGnNsCl
NPRO/ZqGnNsCn
NPRO/ZqGpNsCa
NPRO/ZqGpNsCd
NPRO/ZqGpNsCg
NPRO/ZqGpNsCi
NPRO/ZqGpNsCl
NPRO/ZqGpNsCn
NPRO/ZqrGnNsCa
NPRO/ZqrGnNsCd
NPRO/ZqrGnNsCg
NPRO/ZqrGnNsCi
NPRO/ZqrGnNsCl
NPRO/ZqrGnNsCn
NPRO/ZqrGpNsCa
NPRO/ZqrGpNsCd
NPRO/ZqrGpNsCg
NPRO/ZqrGpNsCi
NPRO/ZqrGpNsCl
NPRO/ZqrGpNsCn
NPRO/ZxG*N*Ca
NPRO/ZxG*N*Cd
NPRO/ZxG*N*Cg
NPRO/ZxG*N*Ci
NPRO/ZxG*N*Cl
NUMCOL/Ca
NUMCOL/Cd
NUMCOL/Cg
NUMCOL/Ci
NUMCOL/Cl
NUMCOL/Cn
NUMCRD/Ca
NUMCRD/CaGaifn
NUMCRD/CaGain
NUMCRD/CaGf
NUMCRD/CaGp
NUMCRD/Cd
NUMCRD/Cg
NUMCRD/Ci
NUMCRD/CiGf
NUMCRD/CiGpain
NUMCRD/Cl
NUMCRD/Cn
NUMCRD/CnGaifn
NUMCRD/CnGain
NUMCRD/CnGf
NUMCRD/CnGp
NUMCRD/ZiCaGaifn
NUMCRD/ZiCaGain
NUMCRD/ZiCaGf
NUMCRD/ZiCaGp
NUMCRD/ZiCd
NUMCRD/ZiCg
NUMCRD/ZiCi
NUMCRD/ZiCiGf
NUMCRD/ZiCiGpain
NUMCRD/ZiCl
NUMCRD/ZiCnGaifn
NUMCRD/ZiCnGain
NUMCRD/ZiCnGf
NUMCRD/ZiCnGp
NUMCRD/ZqiCaGaifn
NUMCRD/ZqiCaGp
NUMCRD/ZqiCd
NUMCRD/ZqiCg
NUMCRD/ZqiCi
NUMCRD/ZqiCl
NUMCRD/ZqiCnGaifn
NUMCRD/ZqiCnGp
NUMORD/NpCaGp
NUMORD/NpCd
NUMORD/NpCgl
NUMORD/NpCi
NUMORD/NpCnavGaifn
NUMORD/NpCnvGp
NUMORD/NsCaGi
NUMORD/NsCaGpa
NUMORD/NsCaiGf
NUMORD/NsCavGf
NUMORD/NsCdGpain
NUMORD/NsCgdlGf
NUMORD/NsCgGpain
NUMORD/NsCilGpain
NUMORD/NsCnavGn
NUMORD/NsCnvGpai
NUMPAR
NUMPAR/Dc
NUMPAR/Dp
NUMPAR/Ds
NUMPAR/Ns
NUMPAR/NsGf
NUMPAR/NsGpain
NV/Ca
NV/Cd
NV/Cg
NV/Ci
NV/Cl
NV/Cn
ONO
P
PART
P/Ca
P/Cai
P/Cal
P/Cd
P/Cg
P/Cga
P/Cgai
P/Cgd
P/Cgi
P/Ci
P/Cl
PPRO/Zp
V/AiVb
V/AiViTa
V/AiVpMcNpP1Gaifn
V/AiVpMcNpP1Gp
V/AiVpMcNpP2Gaifn
V/AiVpMcNpP2Gp
V/AiVpMcNpP3Gaifn
V/AiVpMcNpP3Gp
V/AiVpMcNsP1Gf
V/AiVpMcNsP1Gpai
V/AiVpMcNsP2Gf
V/AiVpMcNsP2Gpai
V/AiVpMcNsP3Gf
V/AiVpMcNsP3Gn
V/AiVpMcNsP3Gpai
V/AiVpMdTaNpP1Gaifn
V/AiVpMdTaNpP1Gp
V/AiVpMdTaNpP2Gaifn
V/AiVpMdTaNpP2Gp
V/AiVpMdTaNpP3Gaifn
V/AiVpMdTaNpP3Gp
V/AiVpMdTaNsP1Gf
V/AiVpMdTaNsP1Gpai
V/AiVpMdTaNsP2Gf
V/AiVpMdTaNsP2Gpai
V/AiVpMdTaNsP3Gf
V/AiVpMdTaNsP3Gn
V/AiVpMdTaNsP3Gpai
V/AiVpMdTrfNpP1
V/AiVpMdTrfNpP2
V/AiVpMdTrfNpP3
V/AiVpMdTrfNsP1
V/AiVpMdTrfNsP2
V/AiVpMdTrfNsP3
V/AiVpMiNpP1
V/AiVpMiNpP2
V/AiVpMiNsP2
V/ApVb
V/ApViTa
V/ApVpMcNpP1Gaifn
V/ApVpMcNpP1Gp
V/ApVpMcNpP2Gaifn
V/ApVpMcNpP2Gp
V/ApVpMcNpP3Gaifn
V/ApVpMcNpP3Gp
V/ApVpMcNsP1Gf
V/ApVpMcNsP1Gpai
V/ApVpMcNsP2Gf
V/ApVpMcNsP2Gpai
V/ApVpMcNsP3Gf
V/ApVpMcNsP3Gn
V/ApVpMcNsP3Gpai
V/ApVpMdTaNpP1Gaifn
V/ApVpMdTaNpP1Gp
V/ApVpMdTaNpP2Gaifn
V/ApVpMdTaNpP2Gp
V/ApVpMdTaNpP3Gaifn
V/ApVpMdTaNpP3Gp
V/ApVpMdTaNsP1Gf
V/ApVpMdTaNsP1Gpai
V/ApVpMdTaNsP2Gf
V/ApVpMdTaNsP2Gpai
V/ApVpMdTaNsP3Gf
V/ApVpMdTaNsP3Gn
V/ApVpMdTaNsP3Gpai
V/ApVpMdTrfNpP1
V/ApVpMdTrfNpP2
V/ApVpMdTrfNpP3
V/ApVpMdTrfNsP1
V/ApVpMdTrfNsP2
V/ApVpMdTrfNsP3
V/ApVpMiNpP1
V/ApVpMiNpP2
V/ApVpMiNsP2
V/GiVb
V/GiViTa
V/GiVpMcNpP1Gaifn
V/GiVpMcNpP1Gp
V/GiVpMcNpP2Gaifn
V/GiVpMcNpP2Gp
V/GiVpMcNpP3Gaifn
V/GiVpMcNpP3Gp
V/GiVpMcNsP1Gf
V/GiVpMcNsP1Gpai
V/GiVpMcNsP2Gf
V/GiVpMcNsP2Gpai
V/GiVpMcNsP3Gf
V/GiVpMcNsP3Gn
V/GiVpMcNsP3Gpai
V/GiVpMdTaNpP1Gaifn
V/GiVpMdTaNpP1Gp
V/GiVpMdTaNpP2Gaifn
V/GiVpMdTaNpP2Gp
V/GiVpMdTaNpP3Gaifn
V/GiVpMdTaNpP3Gp
V/GiVpMdTaNsP1Gf
V/GiVpMdTaNsP1Gpai
V/GiVpMdTaNsP2Gf
V/GiVpMdTaNsP2Gpai
V/GiVpMdTaNsP3Gf
V/GiVpMdTaNsP3Gn
V/GiVpMdTaNsP3Gpai
V/GiVpMdTrfNpP1
V/GiVpMdTrfNpP2
V/GiVpMdTrfNpP3
V/GiVpMdTrfNsP1
V/GiVpMdTrfNsP2
V/GiVpMdTrfNsP3
V/GiVpMiNpP1
V/GiVpMiNpP2
V/GiVpMiNsP2
VNI

124
share/gram.dgc Normal file
View File

@ -0,0 +1,124 @@
#FLAG REL
#UP REL
#ORDER * pcmpl
#ORDER ..
#ORDER subj .. * .. cmpl
#ORDER refl .. *
#ORDER * refl
#CONSTR cmpl_g => ~cmpl_a
#CONSTR cmpl_inf => ~(cmpl_g|cmpl_d|cmpl_a|cmpl_p|cmpl_ze|cmpl_s)
#CONSTR subj_pred => subj
#constr cmpl_pred => cmpl
#subj
ROLE subj # deklaracja roli (typ zaleznosci) podmiot
AGR subj N # zgodnosc podrzednika z nadrzednikiem co do liczby
AGR subj G # zgodnosc podrzednika z nadrzednikiem co do rodzaju
GOV subj */Cn # wymaganie by podrzednik byl w mianowniku
# pary kategorii, jakie mozna polaczyc zaleznoscia typu podmiot
# nadrzednik podrzednik
LINK V/VpP3,BYC/VpP3 N,NPRO subj
# (przecinek znaczy lub)
ROLE cmpl_ga # dopelnienie w bierniku/dopelniaczu
ROLE cmpl_d # w celowniku
ROLE cmpl_i # w narzedniku
ROLE cmpl_inf # w bezokoliczniku
ROLE cmpl_s # bedace zdaniem
ROLE cmpl_ze # bedace zdaniem poprzedzonym 'ze'
ROLE aux #
ROLE mod # modyfikator (okolicznik/przydawka) (niewymagane określenie) (biały kot)
ROLE prep # modyfikator w postaci frazy przyimkowej
ROLE pcmpl # dopełnienie przyimka (wymagany rzeczownik)
ROLE ccmpl # dopełnienie spójnika (wymagany drugi człon konstrukcji spójnikowej)
ROLE poss # np. książka Marii, ojciec kolegi
ROLE restr # (bardzo <- duży)
ROLE part # partykuła
ROLE coord # koordynacja (powiązanie pierwszego członu konstrukcji współrzędnej
# ze spójnikiem współrzędnym centralnym ( Oto [pies -> i] kot. )
AGR aux N
AGR aux G
AGR mod N
AGR mod C
AGR mod G
AGR pcmpl C
GOV cmpl_ga */Cga
GOV cmpl_d */Cd
GOV cmpl_i */Ci
GOV poss */Cg
SGL subj
SGL cmpl_ga
SGL cmpl_d
SGL cmpl_i
SGL cmpl_inf
SGL aux
SGL pcmpl
SGL ccmpl
SGL poss
SGL restr
REQ P pcmpl
REQ CONJ ccmpl
RIGHT pcmpl
RIGHT ccmpl
RIGHT cmpl_ze
RIGHT poss
#cmpl_*
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV N,NPRO cmpl_ga
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV N,NPRO cmpl_d
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV N,NPRO cmpl_i
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV V/Vb cmpl_inf
LINK V,ADVPRP,ADVANP,ADJPRP,ADJPAP,NV CONJ cmpl_ze
#aux
#czas przyszly analityczny
LINK BYC/VpMdTf V/AiVpP3,V/AiVb aux
#czas zaprzeszly(?)
LINK BYC/VpMc V/VpP3 aux
#BYC jako lacznik w (jest bialy, jest zaszlachtowany, jest pilotem)
LINK BYC ADJPAP/Cn,ADJ/Cn,N/Ci aux
#mod
LINK V ADV,ADVPRP,ADVANP,ADVPRO mod
LINK N,NV ADJ,ADJPAP,ADJPRP,ADJPRO mod
#prep
LINK N,V P prep
#pcmpl
LINK P N,NV pcmpl
#poss
LINK N N,NV,NPRO poss
#ccmpl
LINK CONJ V/Vp ccmpl
#restr
LINK ADJ ADV restr
#part
LINK V PART part

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,8 @@
lcase a ą b c ć d e ę f g h i j k l ł m n ń o ó
lcase p q r s ś t u v w x y z ź ż é ö ü ä
ucase A Ą B C Ć D E Ę F G H I J K L Ł M N Ń O Ó
ucase P Q R S Ś T U V W X Y Z Ź Ż
letter lcase ucase
digit 0 1 2 3 4 5 6 7 8 9
punct , . @ / ' ~ ; _ - + ? \
all letter digit signs sem

BIN
share/pl_PL.UTF-8/lem.bin Normal file

Binary file not shown.

21
share/weights.kor Normal file
View File

@ -0,0 +1,21 @@
%stdcor 1
%xchg 1
¿ rz 0.5
ch h 0.5
u ó 0.5
u o 0.75
om ¹ 0.5
om a 0.75
en ê 0.5
en ê 0.75
a ¹ 0.25
c æ 0.25
e ê 0.25
l ³ 0.25
n ñ 0.25
o ó 0.25
s ¶ 0.25
z ¿ 0.25
z ¼ 0.25
x ¼ 0.30

8
src/common/Makefile Normal file
View File

@ -0,0 +1,8 @@
# main: cmdline.c main_template.cc
# g++ -o main cmdline.c common.cc main_template.cc
# cmdline.c cmdline.h : cmdline.ggo
# gengetopt -i cmdline.ggo
# cmdline.ggo: cmdline_common.ggo cmdline_program.ggo
# cat cmdline_common.ggo cmdline_program.ggo > cmdline.ggo

18
src/common/README Normal file
View File

@ -0,0 +1,18 @@
Propozycja ujednolicenia dzialania klocka na poziomie
funkcji main. Parametry meta - zdefiniowane dla
wszystkich, poza tok, programow, definiujace ich zachowanie
w systemie klockow.
cmdline_common.ggo - deklaracje parametrow meta
cmdline_program.ggo - przyklad deklaracji parametrow programu
nazwa docelowa np. cmdline_guess.ggo
common.cc - zmienne globalne zawierajace informacje
przekazane przez parametry meta
common.h
main_template.cc - szkielet funkcji main
Makefile - sposob kompilacji

1248
src/common/cmdline.c Normal file

File diff suppressed because it is too large Load Diff

39
src/common/cmdline.ggo Normal file
View File

@ -0,0 +1,39 @@
#section "Common UTT options"
option "input" f "Input file" string no
option "output" o "Output file for succesfully processed segments" string no
option "fail" e "Output file for unsuccesfully processed segments " string no
option "only-fail" - "Print only segments the program failed to process" flag off hidden
option "no-fail" - "Print only segments the program processed" flag off hidden
option "copy" c "Copy succesfully processed segments to standard output" flag off
option "process" p "Process segments with this tag" string no multiple
option "select" s "Select only segments with this field" string no multiple
option "ignore" S "Select only segments without this field" string no multiple
option "output-field" O "Output field name" string no
option "input-field" I "Input field name" string no multiple
option "interactive" i "Toggle interactive mode" flag off
option "config" - "Configuration file" string typestr="FILENAME" no
option "one-field" 1 "Print all results in one segments (creates ambiguous annotation)" flag off
option "one-line" - "Print annotation alternatives as additional fields" flag off
option "language" - "Language." string no
package "guess"
version "0.1"
option "color" l "Show guessed descriptions in colour." flag off

248
src/common/cmdline.h Normal file
View File

@ -0,0 +1,248 @@
/** @file cmdline.h
* @brief The header file for the command line option parser
* generated by GNU Gengetopt version 2.22.4
* http://www.gnu.org/software/gengetopt.
* DO NOT modify this file, since it can be overwritten
* @author GNU Gengetopt by Lorenzo Bettini */
#ifndef CMDLINE_H
#define CMDLINE_H
/* If we use autoconf. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h> /* for FILE */
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#ifndef CMDLINE_PARSER_PACKAGE
/** @brief the program name (used for printing errors) */
#define CMDLINE_PARSER_PACKAGE "guess"
#endif
#ifndef CMDLINE_PARSER_PACKAGE_NAME
/** @brief the complete program name (used for help and version) */
#define CMDLINE_PARSER_PACKAGE_NAME "guess"
#endif
#ifndef CMDLINE_PARSER_VERSION
/** @brief the program version */
#define CMDLINE_PARSER_VERSION "0.1"
#endif
/** @brief Where the command line options are stored */
struct gengetopt_args_info
{
const char *help_help; /**< @brief Print help and exit help description. */
const char *full_help_help; /**< @brief Print help, including hidden options, and exit help description. */
const char *version_help; /**< @brief Print version and exit help description. */
char * input_arg; /**< @brief Input file. */
char * input_orig; /**< @brief Input file original value given at command line. */
const char *input_help; /**< @brief Input file help description. */
char * output_arg; /**< @brief Output file for succesfully processed segments. */
char * output_orig; /**< @brief Output file for succesfully processed segments original value given at command line. */
const char *output_help; /**< @brief Output file for succesfully processed segments help description. */
char * fail_arg; /**< @brief Output file for unsuccesfully processed segments . */
char * fail_orig; /**< @brief Output file for unsuccesfully processed segments original value given at command line. */
const char *fail_help; /**< @brief Output file for unsuccesfully processed segments help description. */
int only_fail_flag; /**< @brief Print only segments the program failed to process (default=off). */
const char *only_fail_help; /**< @brief Print only segments the program failed to process help description. */
int no_fail_flag; /**< @brief Print only segments the program processed (default=off). */
const char *no_fail_help; /**< @brief Print only segments the program processed help description. */
int copy_flag; /**< @brief Copy succesfully processed segments to standard output (default=off). */
const char *copy_help; /**< @brief Copy succesfully processed segments to standard output help description. */
char ** process_arg; /**< @brief Process segments with this tag. */
char ** process_orig; /**< @brief Process segments with this tag original value given at command line. */
unsigned int process_min; /**< @brief Process segments with this tag's minimum occurreces */
unsigned int process_max; /**< @brief Process segments with this tag's maximum occurreces */
const char *process_help; /**< @brief Process segments with this tag help description. */
char ** select_arg; /**< @brief Select only segments with this field. */
char ** select_orig; /**< @brief Select only segments with this field original value given at command line. */
unsigned int select_min; /**< @brief Select only segments with this field's minimum occurreces */
unsigned int select_max; /**< @brief Select only segments with this field's maximum occurreces */
const char *select_help; /**< @brief Select only segments with this field help description. */
char ** ignore_arg; /**< @brief Select only segments without this field. */
char ** ignore_orig; /**< @brief Select only segments without this field original value given at command line. */
unsigned int ignore_min; /**< @brief Select only segments without this field's minimum occurreces */
unsigned int ignore_max; /**< @brief Select only segments without this field's maximum occurreces */
const char *ignore_help; /**< @brief Select only segments without this field help description. */
char * output_field_arg; /**< @brief Output field name. */
char * output_field_orig; /**< @brief Output field name original value given at command line. */
const char *output_field_help; /**< @brief Output field name help description. */
char ** input_field_arg; /**< @brief Input field name. */
char ** input_field_orig; /**< @brief Input field name original value given at command line. */
unsigned int input_field_min; /**< @brief Input field name's minimum occurreces */
unsigned int input_field_max; /**< @brief Input field name's maximum occurreces */
const char *input_field_help; /**< @brief Input field name help description. */
int interactive_flag; /**< @brief Toggle interactive mode (default=off). */
const char *interactive_help; /**< @brief Toggle interactive mode help description. */
char * config_arg; /**< @brief Configuration file. */
char * config_orig; /**< @brief Configuration file original value given at command line. */
const char *config_help; /**< @brief Configuration file help description. */
int one_field_flag; /**< @brief Print all results in one segments (creates ambiguous annotation) (default=off). */
const char *one_field_help; /**< @brief Print all results in one segments (creates ambiguous annotation) help description. */
int one_line_flag; /**< @brief Print annotation alternatives as additional fields (default=off). */
const char *one_line_help; /**< @brief Print annotation alternatives as additional fields help description. */
char * language_arg; /**< @brief Language.. */
char * language_orig; /**< @brief Language. original value given at command line. */
const char *language_help; /**< @brief Language. help description. */
int color_flag; /**< @brief Show guessed descriptions in colour. (default=off). */
const char *color_help; /**< @brief Show guessed descriptions in colour. help description. */
unsigned int help_given ; /**< @brief Whether help was given. */
unsigned int full_help_given ; /**< @brief Whether full-help was given. */
unsigned int version_given ; /**< @brief Whether version was given. */
unsigned int input_given ; /**< @brief Whether input was given. */
unsigned int output_given ; /**< @brief Whether output was given. */
unsigned int fail_given ; /**< @brief Whether fail was given. */
unsigned int only_fail_given ; /**< @brief Whether only-fail was given. */
unsigned int no_fail_given ; /**< @brief Whether no-fail was given. */
unsigned int copy_given ; /**< @brief Whether copy was given. */
unsigned int process_given ; /**< @brief Whether process was given. */
unsigned int select_given ; /**< @brief Whether select was given. */
unsigned int ignore_given ; /**< @brief Whether ignore was given. */
unsigned int output_field_given ; /**< @brief Whether output-field was given. */
unsigned int input_field_given ; /**< @brief Whether input-field was given. */
unsigned int interactive_given ; /**< @brief Whether interactive was given. */
unsigned int config_given ; /**< @brief Whether config was given. */
unsigned int one_field_given ; /**< @brief Whether one-field was given. */
unsigned int one_line_given ; /**< @brief Whether one-line was given. */
unsigned int language_given ; /**< @brief Whether language was given. */
unsigned int color_given ; /**< @brief Whether color was given. */
} ;
/** @brief The additional parameters to pass to parser functions */
struct cmdline_parser_params
{
int override; /**< @brief whether to override possibly already present options (default 0) */
int initialize; /**< @brief whether to initialize the option structure gengetopt_args_info (default 1) */
int check_required; /**< @brief whether to check that all required options were provided (default 1) */
int check_ambiguity; /**< @brief whether to check for options already specified in the option structure gengetopt_args_info (default 0) */
int print_errors; /**< @brief whether getopt_long should print an error message for a bad option (default 1) */
} ;
/** @brief the purpose string of the program */
extern const char *gengetopt_args_info_purpose;
/** @brief the usage string of the program */
extern const char *gengetopt_args_info_usage;
/** @brief all the lines making the help output */
extern const char *gengetopt_args_info_help[];
/** @brief all the lines making the full help output (including hidden options) */
extern const char *gengetopt_args_info_full_help[];
/**
* The command line parser
* @param argc the number of command line options
* @param argv the command line options
* @param args_info the structure where option information will be stored
* @return 0 if everything went fine, NON 0 if an error took place
*/
int cmdline_parser (int argc, char **argv,
struct gengetopt_args_info *args_info);
/**
* The command line parser (version with additional parameters - deprecated)
* @param argc the number of command line options
* @param argv the command line options
* @param args_info the structure where option information will be stored
* @param override whether to override possibly already present options
* @param initialize whether to initialize the option structure my_args_info
* @param check_required whether to check that all required options were provided
* @return 0 if everything went fine, NON 0 if an error took place
* @deprecated use cmdline_parser_ext() instead
*/
int cmdline_parser2 (int argc, char **argv,
struct gengetopt_args_info *args_info,
int override, int initialize, int check_required);
/**
* The command line parser (version with additional parameters)
* @param argc the number of command line options
* @param argv the command line options
* @param args_info the structure where option information will be stored
* @param params additional parameters for the parser
* @return 0 if everything went fine, NON 0 if an error took place
*/
int cmdline_parser_ext (int argc, char **argv,
struct gengetopt_args_info *args_info,
struct cmdline_parser_params *params);
/**
* Save the contents of the option struct into an already open FILE stream.
* @param outfile the stream where to dump options
* @param args_info the option struct to dump
* @return 0 if everything went fine, NON 0 if an error took place
*/
int cmdline_parser_dump(FILE *outfile,
struct gengetopt_args_info *args_info);
/**
* Save the contents of the option struct into a (text) file.
* This file can be read by the config file parser (if generated by gengetopt)
* @param filename the file where to save
* @param args_info the option struct to save
* @return 0 if everything went fine, NON 0 if an error took place
*/
int cmdline_parser_file_save(const char *filename,
struct gengetopt_args_info *args_info);
/**
* Print the help
*/
void cmdline_parser_print_help(void);
/**
* Print the full help (including hidden options)
*/
void cmdline_parser_print_full_help(void);
/**
* Print the version
*/
void cmdline_parser_print_version(void);
/**
* Initializes all the fields a cmdline_parser_params structure
* to their default values
* @param params the structure to initialize
*/
void cmdline_parser_params_init(struct cmdline_parser_params *params);
/**
* Allocates dynamically a cmdline_parser_params structure and initializes
* all its fields to their default values
* @return the created and initialized cmdline_parser_params structure
*/
struct cmdline_parser_params *cmdline_parser_params_create(void);
/**
* Initializes the passed gengetopt_args_info structure's fields
* (also set default values for options that have a default)
* @param args_info the structure to initialize
*/
void cmdline_parser_init (struct gengetopt_args_info *args_info);
/**
* Deallocates the string fields of the gengetopt_args_info structure
* (but does not deallocate the structure itself)
* @param args_info the structure to deallocate
*/
void cmdline_parser_free (struct gengetopt_args_info *args_info);
/**
* Checks that all the required options were specified
* @param args_info the structure to check
* @param prog_name the name of the program that will be used to print
* possible errors
* @return
*/
int cmdline_parser_required (struct gengetopt_args_info *args_info,
const char *prog_name);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* CMDLINE_H */

View File

@ -0,0 +1,34 @@
#section "Common UTT options"
option "input" f "Input file" string no
option "output" o "Output file for succesfully processed segments" string no
option "fail" e "Output file for unsuccesfully processed segments " string no
option "only-fail" - "Print only segments the program failed to process" flag off hidden
option "no-fail" - "Print only segments the program processed" flag off hidden
option "copy" c "Copy succesfully processed segments to standard output" flag off
option "process" p "Process segments with this tag" string no multiple
option "select" s "Select only segments with this field" string no multiple
option "ignore" S "Select only segments without this field" string no multiple
option "output-field" O "Output field name" string no
option "input-field" I "Input field name" string no multiple
option "interactive" i "Toggle interactive mode" flag off
option "config" - "Configuration file" string typestr="FILENAME" no
option "one-field" 1 "Print all results in one segments (creates ambiguous annotation)" flag off
option "one-line" - "Print annotation alternatives as additional fields" flag off
option "language" - "Language." string no

View File

@ -0,0 +1,5 @@
package "guess"
version "0.1"
option "color" l "Show guessed descriptions in colour." flag off

229
src/common/common.cc Normal file
View File

@ -0,0 +1,229 @@
#include <cstdlib>
#include <cstring>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include "common.h"
#include <cstdio>
#include <clocale>
FILE* inputf=stdin;
FILE* outputf=stdout;
FILE* failedf=stdout;
bool copy_processed=0;
bool one_field=false;
bool one_line=false;
char output_field_prefix[FIELD_PREFIX_MAXLEN];
char input_field_prefix[FIELD_PREFIX_MAXLEN];
extern int argc;
extern char **argv;
// tilde (home dir) expansion in path
int expand_path(char* inpath, char* outpath)
{
if(inpath[0]=='~')
sprintf(outpath,"%s%s",getenv("HOME"),inpath+1);
else
strcpy(outpath,inpath);
return 0; // no problem
}
void set_program_name(char program_name[], char* argv0)
{
if (char* p_name = strrchr(argv0, '/'))
strcpy(program_name,p_name+1);
else
strcpy(program_name,argv0);
}
extern void process_config_files(gengetopt_args_info* args, char* argv0)
{
char program_name[256];
char config_file[256];
char config_file_tmp[256];
set_program_name(program_name,argv0);
// obsługa pliku konfiguracyjnego podanego w linii komend
if (args->config_given) {
if (file_accessible(args->config_arg) == 0) {
if (cmdline_parser_configfile(args->config_arg,
args,
0, // 0 - nie nadpisuj wartości parametrów
0, // 0 - nie inicjuj
0) != 0) {
fprintf(stderr, "Error in config file (%s)\n", args->config_arg);
exit(1);
}
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
// obsluga pliku konfiguracyjnego uzytkownika dla programu
sprintf(config_file_tmp, "%s/%s.conf", USER_CONFIG_DIR, program_name);
expand_path(config_file_tmp, config_file);
if (file_accessible(config_file) == 0) {
if (cmdline_parser_configfile(config_file,
args,
0, // 0 - nie nadpisuj danych
0, // 0 - nie inicjuj struktury
0) != 0) {
fprintf(stderr, "Error in config file (%s)\n", config_file);
exit(1);
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
// obsluga pliku konfiguracyjnego uzytkownika globalnego
sprintf(config_file_tmp, "%s/utt.conf", USER_CONFIG_DIR);
expand_path(config_file_tmp, config_file);
if (file_accessible(config_file) == 0) {
if (cmdline_parser_configfile(config_file,
args,
0, // 0 - nie nadpisuj danych
0, // 0 - nie inicjuj struktury
0) != 0) {
fprintf(stderr, "Error in config file (%s)\n", config_file);
exit(1);
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
// obsluga systemowego pliku konfiguracyjnego dla programu
sprintf(config_file, "%s/%s.conf", SYSTEM_CONFIG_DIR, program_name);
if (file_accessible(config_file) == 0) {
if (cmdline_parser_configfile(config_file,
args,
0, // 0 - nie zmieniaj danych wczesniejszych
0, // 0 - nie inicjuj struktury
0 // 0 - nie sprawdzaj wymaganych parametrow
) != 0) {
fprintf(stderr, "Error in config file (%s)\n", config_file);
exit(1);
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
// obsluga systemowego pliku konfiguracyjnego globalnego
sprintf(config_file, "%s/utt.conf", SYSTEM_CONFIG_DIR);
if (file_accessible(config_file) == 0) {
if (cmdline_parser_configfile(config_file,
args,
0, // 0 - nie zmieniaj danych wczesniejszych
0, // 0 - nie inicjuj struktury
0 // 0 - nie sprawdzaj wymaganych parametrow
) != 0) {
fprintf(stderr, "Error in config file (%s)\n", config_file);
exit(1);
}
}
if(args->one_line_given && !one_line) one_line=true, one_field=false;
if(args->one_field_given && !one_field) one_line=false, one_field=true;
}
void process_common_options(gengetopt_args_info* args, char* argv0)
{
char program_name[256];
set_program_name(program_name,argv0);
setlocale(LC_CTYPE,"");
setlocale(LC_COLLATE, "");
if(args->help_given)
cmdline_parser_print_help ();
if(args->input_given)
if(!(inputf=fopen(args->input_arg,"r")))
{
fprintf(stderr,"No such file: %s.\n", args->input_arg);
exit(1);
}
if(args->output_given)
if(!(outputf=fopen(args->output_arg,"w")))
{
fprintf(stderr,"Cannot open output file: %s.\n", args->output_arg);
exit(1);
}
if(args->fail_given)
if(!(failedf=fopen(args->fail_arg,"w")))
{
fprintf(stderr,"Cannot open output file: %s.\n", args->fail_arg);
exit(1);
}
if(args->input_field_given)
fieldprefix(args->input_field_arg[0],input_field_prefix);
else
strcpy(input_field_prefix, "4");
if(args->output_field_given)
fieldprefix(args->output_field_arg,output_field_prefix);
else
sprintf(output_field_prefix, "%s%c", program_name, INFIELD_SEP);
if ((args->copy_given))
copy_processed=true;
}
// sprawdza istnienie pliku
int file_accessible(const char* path) {
return access(path, R_OK);
}
// sprawdza istnienie pliku konfiguracyjnego
int config_file_exists(const char* dir, const char* filename) {
struct stat dir_stat;
struct stat file_stat;
char* path = (char*)malloc(strlen(dir) + strlen(filename) + 2); // + '\0' + '/'
sprintf(path, "%s/%s", dir, filename);
if (stat(dir, &dir_stat) != 0)
return -1;
if (stat(path, &file_stat) != 0)
return -1;
if (!S_ISDIR(dir_stat.st_mode))
return -1; // katalog nie jest katalogiem
if (!S_ISREG(file_stat.st_mode))
return -1; // plik konfiguracyjny nie jest plikiem
if (access(dir, X_OK) != 0)
return -1; // nie mamy prawa zmienic katalogu
if (access(path, R_OK) != 0)
return -1; // nie mamy prawa odczytu pliku
free(path);
return 0;
}

0
src/common/common.d Normal file
View File

516
src/common/common.h Normal file
View File

@ -0,0 +1,516 @@
#ifndef __COMMON_H
#define __COMMON_H
#include <cstdio>
#include <cctype>
#include <cstring>
#include <cstdlib>
#include <cwchar>
#include <cwctype>
#include "../lib/const.h"
#ifndef _CMDLINE_FILE
#error _CMDLINE_FILE constant not defined!
#else
#include _CMDLINE_FILE
#endif
/**************************************************
* Stale dotyczace wejscia/wyjscia
*/
#define EMPTYFORM '*'
#define INFIELD_SEP ':'
#define MAXAUX 16
#define FIELD_SEP " \t\n"
#define FIELD_PREFIX_MAXLEN 32
// katalogi z plikami konfiguracyjnymi
// nowe
// stare - do wyrzucenia
// #define CONFIG_DIR ".utt/conf"
// nazwa zmiennej okreslajaca sciezke do danych
// #define UTT_DIR_VAR "UTT_DIR"
// sciezka do plikow z danymi (np UTT_DIR/pliki) wzgledem $HOME!
// #define UTT_DIR_DEFAULT ".utt/pl/"
/**************************************************/
extern FILE* inputf;
extern FILE* outputf;
extern FILE* failedf;
extern char* input_filename;
extern char* output_filename;
extern char* failed_filename;
extern bool one_line;
extern bool one_field;
extern char input_field_prefix[];
extern char output_field_prefix[];
extern bool copy_processed;
extern bool append_output;
extern bool append_failed;
//sciezka do katalogu z danymi
extern char utt_dir[];
extern void process_common_options(gengetopt_args_info* args, char* argv0);
extern void process_config_files(gengetopt_args_info* args, char* argv0);
extern int expand_path(char* inpath, char* outpath);
/**************************************************
* problems with casing */
// sprawdzenie wielkosci liter
// warto¶æ zwracana:
// 0 - wszystkie ma³e litery
// 1 - pierwsza wielka, reszta male
// 2 - wszystkie wielkie
// 3 - inne
inline int casing(char* s)
{
int ret = isupper(*s) ? 1 : 0;
while(*++s != '\0')
{
if(isupper(*s))
{
if(ret==1) ret=2;
else if(ret==0) ret=3;
}
else
{
if(ret==2) ret=3;
}
}
return ret;
}
//
inline void tolowers(char* s, char* d)
{
*d=tolower(*s);
while(*s != '\0') * ++d = tolower(* ++s);
}
// przepisuje s do d
// nadajac wielko¶æ liter zgodnie z warto¶ci± casing
// casing - warto¶æ zwracana przez casing()
// je¶li casing==3 przepisuje bez zmian (za ma³o informacji)
inline void restorecasing(char *s, char *d, int casing)
{
switch(casing)
{
case 0:
case 3:
*d=*s;
while(*s != '\0') * ++d = * ++s;
break;
case 1:
*d=toupper(*s);
while(*s != '\0') * ++d = * ++s;
break;
case 2:
*d=toupper(*s);
while(*s != '\0') * ++d = toupper(* ++s);
break;
}
}
/**************************************************/
/*
parameters:
-seg - segment
-pref - field name or "1", "2", "3", "4" for the first four fields
+val - field contents
return value:
1 if specified field exists, 0 otherwise
*/
inline int getfield(char* seg, const char* pref, char* val)
{
char* p=seg;
char* p0;
while(isspace(*p)) ++p;
// field "1"
p0=p; while(isdigit(*p)) ++p;
if(*pref=='1') if(p!=p0) { strncpy(val,p0,p-p0); val[p-p0]='\0'; return 1; } else return 0;
while(isspace(*p)) ++p;
// field "2"
p0=p; while(isdigit(*p)) ++p;
if(*pref=='2') if(p!=p0) { strncpy(val,p0,p-p0); val[p-p0]='\0'; return 1; } else return 0;
while(isspace(*p)) ++p;
// field "3"
p0=p; while(isgraph(*p)) ++p;
if(*pref=='3') if(p!=p0) { strncpy(val,p0,p-p0); val[p-p0]='\0'; return 1; } else return 0;
while(isspace(*p)) ++p;
// field "4"
p0=p; while(isgraph(*p)) ++p;
if(*pref=='4') if(p!=p0) { strncpy(val,p0,p-p0); val[p-p0]='\0'; return 1; } else return 0;
while(isspace(*p)) ++p;
// annotation fields
do p=strstr(p,pref); while(p!=NULL && *(p-1)!=' ' && *(p-1)!='\t');
if(p==NULL) return 0;
else
{
p+=strlen(pref);
int len=strcspn(p,FIELD_SEP "\n\r\f\0");
strncpy(val,p,len);
val[len]='\0';
return 1;
}
}
inline int getfield(wchar_t* seg, const wchar_t* pref, wchar_t* val)
{
wchar_t* p=seg;
wchar_t* p0;
while(iswspace(*p)) ++p;
// field "1"
p0=p; while(iswdigit(*p)) ++p;
if(*pref==L'1')
if(p!=p0)
{
wcsncpy(val,p0,p-p0);
val[p-p0]=L'\0';
return 1;
}
else
return 0;
while(iswspace(*p)) ++p;
// field "2"
p0=p; while(iswdigit(*p)) ++p;
if(*pref==L'2') if(p!=p0) { wcsncpy(val,p0,p-p0); val[p-p0]=L'\0'; return 1; } else return 0;
while(iswspace(*p)) ++p;
// field "3"
p0=p; while(iswgraph(*p)) ++p;
if(*pref==L'3') if(p!=p0) { wcsncpy(val,p0,p-p0); val[p-p0]=L'\0'; return 1; } else return 0;
while(iswspace(*p)) ++p;
p0=p; while(iswgraph(*p)) ++p;
if(*pref==L'4') if(p!=p0) { wcsncpy(val,p0,p-p0); val[p-p0]=L'\0'; return 1; } else return 0;
while(iswspace(*p)) ++p;
// annotation fields
do p=wcsstr(p,pref); while(p!=NULL && *(p-1)!=L' ' && *(p-1)!=L'\t');
if(p==NULL) return 0;
else
{
p+=wcslen(pref);
int len=wcscspn(p,FIELD_SEP L"\n\r\f\0");
wcsncpy(val,p,len);
val[len]=L'\0';
return 1;
}
}
/*
parameters:
-name - field name, long or short
+prefix - field name with ':' appended if long name
return value:
1 if correct field name, 0 otherwise
examples:
name prefix r.v.
lem lem: 1
@ @ 1
:: 'undef' 0
a,b 'undef' 0
*/
inline
int fieldprefix(char *name, char *prefix)
{
if (ispunct(name[0]) && name[1]=='\0') // correct short name
{
strcpy(prefix, name); return 1;
}
int i=0;
while(name[i]!='\0' && isalnum(name[i])) ++i;
if(name[i]=='\0' && i>0) // correct long name
{
sprintf(prefix,"%s:",name); return 1;
}
// incorrect
return 0;
}
inline
bool process_seg(char* seg, gengetopt_args_info& args)
{
char buf[256];
char pref[FIELD_PREFIX_MAXLEN];
bool ret = !args.process_given;
if(args.process_given)
{
getfield(seg,"3",buf);
for(int i=0; i<args.process_given; ++i)
if(strcmp(args.process_arg[i],buf)==0)
{
ret=true;
break;
}
}
if(ret==false) return false;
for(int i=0; i<args.select_given; ++i)
{
fieldprefix(args.select_arg[i],pref); // !!! ŁATKA - ZOPTYMALIZOWAĆ !!!
if(! getfield(seg,pref,buf))
return false;
}
for(int i=0; i<args.ignore_given; ++i)
{
fieldprefix(args.ignore_arg[i],pref); // !!! ŁATKA - ZOPTYMALIZOWAĆ !!!
if(getfield(seg,pref,buf))
return false;
}
if(args.input_field_given & !getfield(seg,input_field_prefix,buf))
return false;
return true;
}
/*
parameters:
-+seg - segment
-pref - prefix of the new field
-val - contents of the new field
return value:
1 - success, 0 - fail (limit on segment length exceeded)
*/
inline
int addfield(char *seg, const char *pref, const char *val)
// zalozenie, ze seg konczy sie znakiem \n
{
if(strlen(seg)+strlen(pref)+strlen(val) >= MAX_LINE) return 0; // bezpieczniej, ale wolniej
int seglen=strlen(seg);
sprintf(seg+(seglen-1)," %s%s\n",pref,val);
return 1;
}
/**************************************************/
struct Seg
{
int filepos, len;
char* tag;
char* form;
char* aux[MAXAUX];
int auxn;
bool parse(char* line);
char* getfield(char* fieldname);
void print(char* line);
bool addfield(char* s);
bool clearfields();
};
/**************************************************/
/* definicja struktury wejscia/wyjscia
*/
struct Segment
{
int filepos, len;
char* tag;
char* form;
char* aux[MAXAUX];
int auxn;
bool parse(char* line);
char* getfield(char* fieldname);
void print(char* line);
bool addfield(char* s);
bool clearfields();
};
/*
* Sprawdza czy nalezy przetwarzac dany segment.
*/
inline
bool process_seg(Segment& s, gengetopt_args_info& args)
{
bool ret = !args.process_given;
for(int i=0; i<args.process_given; ++i)
if(strcmp(args.process_arg[i],s.tag)==0)
{
ret=true;
break;
}
for(int i=0; i<args.select_given; ++i)
if(! s.getfield(args.select_arg[i]))
ret=false;
for(int i=0; i<args.ignore_given; ++i)
if(s.getfield(args.ignore_arg[i]))
ret=false;
return ret;
}
/*
* FUNKCJE OBSLUGUJACE WEJSCIE/WYJSCIE
*/
// napisy zostaj na miejscu (w line), tylko wskazniki sa ustawian
// i zara dopisywane zera s dopisywane
inline
bool Segment::parse(char* line)
{
auxn=0;
char* field;
if((field=strtok(line,FIELD_SEP))!=NULL)
filepos=atoi(field); // nie sprawdzana poprawnosc
else
return false;
if((field=strtok(NULL,FIELD_SEP))!=NULL)
len=atoi(field); // nie sprawdzana poprawnosc
else return false;
if((tag=strtok(NULL,FIELD_SEP))==NULL) return false;
if((form=strtok(NULL,FIELD_SEP))==NULL)
return true;
else
if(form[0] == EMPTYFORM && form[1] =='\0')
form=NULL;
while((aux[auxn]=strtok(NULL,FIELD_SEP))!=NULL) ++auxn;
return true;
}
inline char* Segment::getfield(char* f)
{
int flen=strlen(f);
if(isalnum(*f))
{
for(int i=0; i<auxn; ++i)
if(strncmp(aux[i],f,flen)==0 && aux[i][flen]==INFIELD_SEP)
return aux[i]+flen+1;
} else
{
for(int i=0; i<auxn; ++i)
{
if(*f==*(aux[i]))
return aux[i]+1;
}
}
return NULL;
}
inline bool Segment::clearfields() {
for (int i=0; i<auxn; ++i) {
// free(aux[i]);
aux[i] = NULL;
}
auxn=0;
return true;
}
inline // NIEEFEKTYWNE
void Segment::print(char* line)
{
sprintf(line,"%04d %02d %s", filepos, len, tag);
if(form)
{
strcat(line," ");
strcat(line,form);
}
else
if(auxn)
strcat(line," *");
for(int i=0; i<auxn; ++i)
{
strcat(line," ");
strcat(line,aux[i]);
}
strcat(line,"\n");
}
inline
bool Segment::addfield(char* s)
{
if(auxn<MAXAUX)
{
aux[auxn++]=s;
return true;
}
else
return false;
}
/**************************************************
* funkcje pomocne w operacjach na plikach *
* konfiguracyjnych *
**************************************************/
// sprawdza istnienie pliku
int file_accessible(const char* path);
// sprawdza istnienie pliku konfiguracyjnego
int config_file(const char* dir, const char* filename);
/**************************************************/
/* Pobiera wejscie
* parametry:
* - args - tablica stringow okresnajacych pola wejsciowe
* - args_len - rozmiar args
* - seg - segment
* wartosc - wskaznik do wejscia
*/
inline char* getInput(char** args, int args_len, Segment seg) {
char* formp = NULL;
for (int i=0; i<args_len; ++i) {
if ('4' == args[i][0])
return seg.form;
if ((formp = seg.getfield(args[i])) != NULL) {
return formp;
}
}
return formp;
}
#endif

View File

@ -0,0 +1,20 @@
#include <stdlib.h>
#include "common.h"
main(int argc, char* argv[])
{
gengetopt_args_info args;
if(cmdline_parser(argc,argv,&args) != 0)
exit(1);
process_common_options(args);
//
// TU KOD
//
cmdline_parser_free(&args);
}

25
src/compiledic/Makefile Normal file
View File

@ -0,0 +1,25 @@
include ../../config.mak
all: compiledic
.PHONY: compiledic
compiledic:
.PHONY: install
install:
ifdef BIN_DIR
install -m 0755 compiledic $(BIN_DIR)
install -m 0755 text2fst.py $(BIN_DIR)
install -m 0755 symbols.py $(BIN_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef BIN_DIR
rm $(BIN_DIR)/compiledic
rm $(BIN_DIR)/text2fst.py
rm $(BIN_DIR)/symbols.py
endif
.PHONY: clean
clean:

214
src/compiledic/compiledic Executable file
View File

@ -0,0 +1,214 @@
#! /usr/bin/env perl
#package: UAM Text Tools
#component: compiledic
#version: 1.3
#author: Tomasz Obrebski
#author: Krzysztof Szarzyñski (2012 migration to OpenFST format)
use utf8;
use strict;
use locale;
use File::HomeDir;
use File::Basename;
use File::Temp;
use File::Copy;
use Getopt::Long;
my $linesPerFile = 20000;
Getopt::Long::Configure('no_ignore_case_always');
my $help=0;
GetOptions("help|h" => \$help);
if($help)
{
print <<'END'
Usage: compiledic dictionaryfile.dic
The dictionary file must be UTF8 without Byte Order Mark (BOM).
To remove BOM see removeBom.sh
Options:
--help -h Help.
END
;
exit 0;
}
##################################################
@ARGV > 0 or die("Source dictionary not given.\n");
my $file = shift;
-f $file or die("Source dictionary not found.\n");
$file =~ /(.*)\.dic/ or die("The input file must have .dic extension.\n");
my $filenameprefix = $1;
##################################################
# Tworzymy katalog tymczasowy, gdzie wszystko bedzie umieszczane.
my $tmp_root = File::Temp::tempdir( CLEANUP => 1 );
print "Using temp dir: $tmp_root\n";
##################################################
# Tworzymy tabele symboli:
print "Generating the symbols table\t\t";
`python ./symbols.py > $tmp_root/symbols`;# or die "Failed!\n";
print "OK\n";
##################################################
# Dzielenie pliku slownika:
print "Dividing the dictionary file\t\t";
open(IN, $file);
my $lineCount = 0;
my $fileCount = 0;
open(FILE, ">$tmp_root/slo_$fileCount");
while (<IN>) {
if (++$lineCount >= $linesPerFile) {
$fileCount++;
$lineCount = 0;
close(FILE);
open(FILE, ">$tmp_root/slo_".$fileCount);
}
print(FILE $_);
}
print "OK\n";
##################################################
# Budujemy male automaty:
print "Building partial automata";
#32 kropki, fileCount plikow
my $filesPerDot = $fileCount/32;
my $files=$filesPerDot;
my $dots=0;
for (my $i=0; $i<=$fileCount; $i++) {
if ($files >= $filesPerDot) {
$files = 0;
print ".";
$dots++;
}
$files++;
`python text2fst.py < $tmp_root/slo_$i > $tmp_root/slownik_$i.fst`;
#`fstcompile --acceptor $tmp_root/slownik_$i.fst $tmp_root/slownikC_$i.fst`;
`fstcompile --acceptor --isymbols=$tmp_root/symbols $tmp_root/slownik_$i.fst $tmp_root/slownikC_$i.fst`;
move("$tmp_root/slownikC_$i.fst", "$tmp_root/slownik_$i.bin") or die "Cant create slownik_$i.bin\n";
}
if ($dots < 32) {
for (my $i=0; $i<32 - $dots; $i++) {
print ".";
}
}
print "OK\n";
##################################################
# Usuwamy czesci slownika:
print "Deleteing $tmp_root/slo_ text files\t\t";
unlink <$tmp_root/slo_*> or die "Faiiled\n";
print "OK\n";
##################################################
# Budowanie koncowego automatu:
print "Building final automaton";
#35 kropek...
my $ndots=33;
$filesPerDot = $fileCount/$ndots;
$files=$filesPerDot;
$dots=0;
my $out_fst = "slownik.bin";
my $tmp_fst = "slownik_T.bin";
######################################################################
# Budowanie jednego automatu
######################################################################
move("$tmp_root/slownik_0.bin", "$tmp_root/$out_fst") or die "Failed to move slownik_0.bin -> $out_fst\n";
for (my $i=1; $i<=$fileCount; $i++) {
if ($files >= $filesPerDot) {
$files = 0;
print ".";
$dots++;
}
$files++;
`fstunion $tmp_root/$out_fst $tmp_root/slownik_$i.bin $tmp_root/$tmp_fst`;
move("$tmp_root/$tmp_fst", "$tmp_root/$out_fst") or die "Failed at union: slownik_$i\n";
`fstrmepsilon $tmp_root/$out_fst $tmp_root/$tmp_fst`;
move("$tmp_root/$tmp_fst", "$tmp_root/$out_fst") or die "Failed at rmepsilon: slownik_$i\n";
`fstdeterminize $tmp_root/$out_fst $tmp_root/$tmp_fst`;
move("$tmp_root/$tmp_fst", "$tmp_root/$out_fst") or die "Failed at minimization: slownik_$i\n";
`fstminimize $tmp_root/$out_fst $tmp_root/$tmp_fst`;
move("$tmp_root/$tmp_fst", "$tmp_root/$out_fst") || die "Unable to move $tmp_root/$tmp_fst -> $out_fst!\n";
}
if ($dots < $ndots) {
for (my $i=0; $i<$ndots - $dots; $i++) {
print ".";
}
}
print "OK\n";
######################################################################
# Minimalizacja automatu:
######################################################################
print "removing epsilon-transitions\t\t";
`fstrmepsilon $tmp_root/$out_fst $tmp_root/$tmp_fst`;
move("$tmp_root/$tmp_fst", "$tmp_root/$out_fst") or die "Failed\n";
print "OK\n";
print "determinizing automaton\t\t";
`fstdeterminize $tmp_root/$out_fst $tmp_root/$tmp_fst`;
move("$tmp_root/$tmp_fst", "$tmp_root/$out_fst") or die "Failed\n";
print "OK\n";
print "minimizing automaton\t\t";
`fstminimize $tmp_root/$out_fst $tmp_root/$tmp_fst`;
move("$tmp_root/$tmp_fst", "$tmp_root/$out_fst") or die "Failed\n";
print "OK\n";
print "moving the FST to compiledic directory\t\t";
use Cwd;
my $workdir = getcwd($0);
move("$tmp_root/$out_fst", "$workdir/dictionary.bin") or die "Failed\n";
print "OK\n";
########################################################
# Sprzatanie:
print "removing temporary files\t\t";
unlink <$tmp_root/*> or die "Failed\nCan't delete contents of $tmp_root \n";
unlink ($tmp_root);
print "OK\n";
print "Finished!\n";

4
src/compiledic/removeBom.sh Executable file
View File

@ -0,0 +1,4 @@
#!/usr/bin/env bash
# Remove bom from file.
awk '{if(NR==1)sub(/^\xef\xbb\xbf/,"");print}' $1

12
src/compiledic/symbols.py Executable file
View File

@ -0,0 +1,12 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import locale
encoding = locale.getdefaultlocale()[1]
sys.stdout.write(u"<eps>\t0\n".encode(encoding))
for i in range(33,60000):
line = u"%s\t%s\n"%(unichr(i), i)
sys.stdout.write(line.encode(encoding))

39
src/compiledic/text2fst.py Executable file
View File

@ -0,0 +1,39 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""-----------------------------------------------------------------------------
# Name: text2FST
# Purpose: A tool for translating a dictionary file into a OpenFST format.
#
# Author: Krzysztof Szarzynski <szarznyski.wmi.amu.edu.pl>
#
# Created: 19/11/2012
# Copyright: (c) UAM Text Tools 2012
# Licence: Simplified BSD License
# Usage:
# cat dictionary.dic | ./text2fst > output.fst
# Warning: the dictionary.dic file must be UTF8 _without_ BOM
#
# TODO: Checking the BOM and removing it from the dictionary.file
-----------------------------------------------------------------------------"""
import sys
import locale
encoding = locale.getdefaultlocale()[1]
def prn(str):
sys.stdout.write(str.encode(encoding))
begState = 0
endState = 1
eps = u"<eps>"
currentState = begState
for line in sys.stdin:
prn(u"%s %s %s\n"%(begState, currentState, eps))
line = line.decode('UTF-8')
for letter in line:
prn(u"%s %s %s\n"%(currentState, currentState+1, letter))
currentState+=1
prn(u"%s %s %s\n"%(currentState, endState, eps))
print endState

56
src/cor/Makefile Normal file
View File

@ -0,0 +1,56 @@
include ../../config.mak
ifeq ($(BUILD_STATIC), yes)
LDFLAGS += -static
endif
LDFLAGS +=
CXXFLAGS += -O2 -fpermissive
LIB_PATH=../lib
COMMON_PATH=../common
CMDLINE_FILE='"../cor/cmdline.h"'
all: cor
cor: main.cc corr.o $(LIB_PATH)/word.o \
$(LIB_PATH)/auttools.o cmdline.c common_cor.o common.o
$(CXX) $(CXXFLAGS) -D _CMDLINE_FILE=$(CMDLINE_FILE) main.cc corr.o common.o \
$(LIB_PATH)/word.o $(LIB_PATH)/auttools.o cmdline.c common_cor.o \
-o cor $(LDFLAGS)
corr.o: corr.cc corr.hh
$(CXX) $(CXXFLAGS) -c corr.cc
common.o: $(COMMON_PATH)/cmdline_common.ggo $(COMMON_PATH)/common.cc \
$(COMMON_PATH)/common.h
$(CXX) $(CXXFLAGS) -c -D _CMDLINE_FILE=$(CMDLINE_FILE) $(COMMON_PATH)/common.cc
common_cor.o: cmdline.h common_cor.cc common_cor.h
$(CXX) $(CXXFLAGS) -c -D _CMDLINE_FILE=$(CMDLINE_FILE) common_cor.cc
cmdline.c cmdline.h: cmdline.ggo
$(GENGETOPT) -i cmdline.ggo --conf-parser
cmdline.ggo: cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo
cat cmdline_cor.ggo $(COMMON_PATH)/cmdline_common.ggo > cmdline.ggo
.PHONY: install
install:
ifdef BIN_DIR
install -m 0755 cor $(BIN_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef BIN_DIR
rm $(BIN_DIR)/cor
endif
clean: clean.cmdline
rm *.o || true
rm cor || true
clean.cmdline:
rm cmdline.* || true

8
src/cor/cmdline_cor.ggo Normal file
View File

@ -0,0 +1,8 @@
package "cor"
version "0.1"
option "dictionary-home" - "Dictionary home dir." string typestr="FILENAME" no hidden
option "dictionary" d "Dictionary" string typestr="FILENAME" default="cor.bin" no
option "distance" n "Maximal edit distance." int default="1" no
option "replace" r "Replace original form with corrected form, place original form in the cor field. This option has no effect in single mode" flag off hidden
#option "single" - "Place all alternatives in the same line" flag off

29
src/cor/common_cor.cc Normal file
View File

@ -0,0 +1,29 @@
#include <stdlib.h>
#include <string.h>
#include "common_cor.h"
char dictionary[256];
void process_cor_options(gengetopt_args_info* args)
{
if(args->dictionary_given)
{
expand_path(args->dictionary_arg,dictionary);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
else if (args->dictionary_home_given && args->language_given)
{
char buf[255];
expand_path(args->dictionary_home_arg, buf);
sprintf(dictionary,"%s/%s/cor.bin",buf,args->language_arg);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
}

19
src/cor/common_cor.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef __COMMON_COR_H
#define __COMMON_COR_H
#include <stdio.h>
//do wyrzucenia - definicja w Makefile! #define _CMDLINE_FILE "../cor/cmdline.h"
#include "../common/common.h"
#include "cmdline.h"
#define DICT_FILE "cor.bin"
extern int change_count;
extern void process_cor_options(gengetopt_args_info* args);
extern char dictionary[];
#endif

142
src/cor/corr.cc Normal file
View File

@ -0,0 +1,142 @@
//---------------------------------------------------------------------------
#include "corr.hh"
#define MAXPATH 256
#define min(x,y) ((x<y)?(x):(y))
#define max(x,y) ((x>y)?(x):(y))
int Corr::ed(int i,int j)
{
if(i==-1)
return j+1;
if(j==-1)
return i+1;
if(i==-2 || j==-2)
return n+1;
if(X[i]==Y[j])
return H2[i-1][j-1];
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return 1+min(H2[i-2][j-2],min(H2[i][j-1],H2[i-1][j]));
return 1+min(H2[i-1][j-1],min(H2[i][j-1],H2[i-1][j]));
/*
if(X[i]==Y[j])
return H[(i-1)+2][(j-1)+2];
if(X[i-1]==Y[j] && X[i]==Y[j-1])
return 1+min(H[(i-2)+2][(j-2)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2]));
return 1+min(H[(i-1)+2][(j-1)+2],min(H[(i)+2][(j-1)+2],H[(i-1)+2][(j)+2]));
*/
}
int Corr::cuted(int j)
{
int l=max(0,j-t);
int u=min(m,j+t);
int ce=j+t;
for(int k=l;k<=u;k++)
{
if(H2[k][j]<ce)//if(H[(k)+2][(j)+2]<ce)
ce=H2[k][j];//ce=H[(k)+2][(j)+2];
}
return ce;
}
/*
void Corr::recomputeH(int j)
{
for(int i=0;i<=m;i++)
H[(i)+2][(j)+2]=ed(i,j);
}
*/
void Corr::recomputeH(int j)
{
int lo=max(0,j-t-2);
int hi=min(m,j+t+2);
for(int i=lo;i<=hi;++i)
H2[i][j]=ed(i,j);//H[(i)+2][(j)+2]=ed(i,j);
}
int Corr::correct(const char* w, Words& tab)
{
long int path[MAXPATH]={0};
int i; // row index (X)
int j; // column index (Y)
long state=0;
strcpy(X,w);
m=strlen(X)-1;
n=m+t;
for(i=(-2);i<=m;i++)
H[(i)+2][(-2)+2]=n;
for(i=(-1);i<=m;i++)
H[(i)+2][(-1)+2]=(i)+1;
for(j=(-2);j<=n;j++)
H[(-2)+2][(j)+2]=n;
for(j=(-1);j<=n;j++)
H[(-1)+2][(j)+2]=(j)+1;
for(j=0; j<=n; ++j)
for(i=0; i<=m; ++i)
H[i+2][j+2]=t+1;
int more=1;
bool cont=false;
strcpy(Y,"");
j=0;
state=0;
int count=0;
while(more)
{
if(!empty(state))
{
Y[j]=input(state);
recomputeH(j);
if(cuted(j)<=t)
{
int edd;
if(final(next(state)) && (edd=H[(m)+2][(j)+2])<=t)
{
char* out=new char[j+2];
strncpy(out,Y,j+1);
out[j+1]='\0';
// if(cont) putchar(' ');
cont=true;
// printf("%i,%s", edd,out);
// cout << out << "(" << edd << ")" << endl;
tab.add(out);
count++;
}
path[j++]=state;
state=next(state);
continue;
}
else
if(continued(state))
{
state++;
continue;
}
}
//backtracking
do
if(j>0)
j--;
else
more=0;
while(more && !continued(path[j]));
state=path[j]+1;
}
return count;
}
//---------------------------------------------------------------------------

34
src/cor/corr.hh Normal file
View File

@ -0,0 +1,34 @@
//---------------------------------------------------------------------------
#ifndef _corr_hh
#define _corr_hh
//---------------------------------------------------------------------------
#include "../lib/tfti.h"
#include "../lib/word.h"
class Corr : public TFTiv<char,char>
{
private:
int H[100][100];
char X[100]; // misspelled string
char Y[100]; // (possibly partial) candidate string
int m; // length of X
int n; // maximal length of Y
int ed(int,int);
int cuted(int);
void recomputeH(int);
public:
int (*H2)[100];
int t; // threshold
Corr() : H2((int(*)[100])&H[2][2]) {};
Corr(const char* a) : TFTiv<char,char>(a), H2((int(*)[100])&H[2][2]) { };
int correct(const char* w, Words& tab);
};
//---------------------------------------------------------------------------
#endif

155
src/cor/main.cc Normal file
View File

@ -0,0 +1,155 @@
#include <stdlib.h>
#include <ctype.h>
#include "../lib/iotools.h"
//do wyrzucenia - definicja w Makefile! #define _CMDLINE_FILE "../cor/cmdline.h"
#include "../common/common.h"
#include "common_cor.h"
#include "corr.hh"
#include "cmdline.h"
#include <locale.h>
int main(int argc, char** argv) {
// setlocale(LC_CTYPE,"");
// setlocale(LC_COLLATE,"");
gengetopt_args_info args;
if(cmdline_parser(argc, argv, &args) != 0)
exit(1);
process_config_files(&args,argv[0]);
process_common_options(&args,argv[0]);
process_cor_options(&args);
Corr cor;
cor.load(dictionary);
cor.t=args.distance_arg;
char line[MAX_LINE+1];
long line_count = 0;
Segment seg;
Words tab;
char form1[MAX_LINE];
char* form;
int formcasing;
char corfield[MAX_LINE]="";
while (fgets(line, MAX_LINE, inputf))
{
// strcpy(outline,line);
++line_count;
// if(!seg.parse(line))
// {
// fprintf(stderr,"Input error in line %d.\n",line_count);
// exit(1);
// }
char outline[128];
//printf("Starting cor... searching for %d fields\n", args.input_field_given);
//for (int i=0; i<args.input_field_given; ++i) {
// printf("\t%d. %s\n", i, args.input_field_arg[i]);
//}
if (!process_seg(line, args))
fputs(line, outputf);
else
{
char form[MAX_FORM];
tab.clear();
getfield(line,input_field_prefix,form);
if (form==NULL) continue;
formcasing=3;
cor.correct(form, tab);
if( tab.count() == 0 )
{
formcasing=casing(form);
if( formcasing == 1 || formcasing == 2)
tolowers(form, form1), cor.correct(form1, tab);
}
if ( tab.count() == 0)
fputs(line, failedf);
else
{
if(args.replace_flag)
{
char corfield[128];
strcpy(corfield, input_field_prefix);
strcat(corfield, form);
seg.aux[seg.auxn]=corfield;
++seg.auxn;
for(int i=0; i<tab.count(); ++i)
{
seg.form=tab[i].form();
restorecasing(seg.form,seg.form,formcasing);
seg.print(outline);
fputs(outline, outputf);
}
--seg.auxn;
}
else
{
if(one_line)
{
char* p=corfield;
for(int i=0; i<tab.count(); ++i)
{
restorecasing(tab[i].form(),tab[i].form(),formcasing);
p += sprintf(p," %s%s",output_field_prefix,tab[i].form());
}
sprintf(p,"\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
else if(one_field)
{
char* p=corfield;
p += sprintf(p," %s",output_field_prefix);
for(int i=0; i<tab.count(); ++i)
{
restorecasing(tab[i].form(),tab[i].form(),formcasing);
p += sprintf(p,(i==0)?"%s":";%s",tab[i].form());
}
sprintf(p,"\n");
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
else
{
for(int i=0; i<tab.count(); ++i)
{
restorecasing(tab[i].form(),tab[i].form(),formcasing);
sprintf(corfield," %s%s\n",output_field_prefix,tab[i].form());
strcpy(outline,line);
outline[strlen(outline)-1]='\0';
strcat(outline,corfield);
fputs(outline, outputf);
}
}
}
}
}
if(args.interactive_flag)
{
fflush(outputf);
fflush(failedf);
}
}
cmdline_parser_free(&args);
}

72
src/dgp/Makefile Normal file
View File

@ -0,0 +1,72 @@
include ../../config.mak
SHELL = /bin/sh
LIB_PATH=../../lib
COMMON_PATH=../common
CMDLINE_FILE='"../dgp/cmdline.h"'
#vpath %.o .
ifeq ($(BUILD_STATIC), yes)
LDFLAGS += -static
endif
CXXFLAGS += -O2
sources = main.cc grammar.cc symbol.cc mgraph.cc sgraph.cc dgp0.cc cmdline.cc \
$(COMMON_PATH)/common.cc global.cc
bin = dgp
# plik *.o sa umieszczane w podkatalogu o
objs = $(sources:%.cc=%.o)
${bin}: ${objs}
$(CXX) $(CXXFLAGS) -D _CMDLINE_FILE=$(CMDLINE_FILE) -o $@ ${objs} $(LDFLAGS)
include $(sources:.cc=.d)
%.o: %.cc
$(CXX) -D _CMDLINE_FILE=$(CMDLINE_FILE) -c ${CXXFLAGS} -o $@ $<
%.d: %.cc
$(CC) -MM $(CPPFLAGS) -D _CMDLINE_FILE=$(CMDLINE_FILE) $< > $@.$$$$; \
sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
# stare:
# cmdline.cc cmdline.h : cmdline.ggo
# gengetopt --c-extension=cc -i cmdline.ggo
# nowe
cmdline.cc cmdline.h: cmdline.ggo
$(GENGETOPT) -i cmdline.ggo --c-extension=cc --conf-parser
cmdline.ggo: cmdline_dgp.ggo ../common/cmdline_common.ggo
cat cmdline_dgp.ggo ../common/cmdline_common.ggo > cmdline.ggo
# endnowe
clean:
rm ${bin} ${objs} cmdline.cc cmdline.h
rm -rf *.d
prof: dgp
gprof dgp ~/tmp/dgp-pl/gmon.out > dgp.prof
.PHONY: install
install:
ifdef BIN_DIR
install -m 0755 dgp $(BIN_DIR)
install -m 0755 dgc $(BIN_DIR)
install -m 0755 canonize $(BIN_DIR)
install -m 0755 tre $(BIN_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef BIN_DIR
rm $(BIN_DIR)/dgp
rm $(BIN_DIR)/dgc
rm $(BIN_DIR)/canonize
rm $(BIN_DIR)/tre
endif

3
src/dgp/Makefile.user Normal file
View File

@ -0,0 +1,3 @@
gram.dgp: gram.dgc
dgc -c cats.dgc < gram.dgc > gram.dgp

50
src/dgp/canonize Executable file
View File

@ -0,0 +1,50 @@
#!/usr/bin/perl
#package: UAM TExt Tools
#component: canonize
#version: 1.0
#author: Tomasz Obrebski
use lib "/usr/local/lib/utt";
use lib "$ENV{'HOME'}/.local/lib/utt";
use strict;
use Getopt::Long;
use attr;
my $help;
GetOptions("help|h" => \$help);
if($help)
{
print <<'END'
Transforms syntactic categories to their canonical form.
Usage: canonize
Options:
--help -h Help.
END
;
exit 0;
}
#$|=1;
my %tra;
while(<>)
{
s/$attr::pos_re\/$attr::avlist_re/trans($&)/ge;
print;
}
sub trans
{
my $cat=shift;
exists($tra{$cat}) ? $tra{$cat} : ( $tra{$cat} = attr::canonize $cat );
}

52
src/dgp/cmdline.ggo Normal file
View File

@ -0,0 +1,52 @@
package "dgp"
version "0.1"
option "grammar" g "Grammar file"
string no typestr="filename"
option "long" l "Long output"
flag off
option "debug" d "Debug mode."
flag off
option "info" - "Print info.
h - heads d - dependents
s - sets
c - constraints n - node/arc counts t - parse time
"
string no default="h"
#section "Common UTT options"
option "input" f "Input file" string no
option "output" o "Output file for succesfully processed segments" string no
option "fail" e "Output file for unsuccesfully processed segments " string no
option "only-fail" - "Print only segments the program failed to process" flag off hidden
option "no-fail" - "Print only segments the program processed" flag off hidden
option "copy" c "Copy succesfully processed segments to standard output" flag off
option "process" p "Process segments with this tag" string no multiple
option "select" s "Select only segments with this field" string no multiple
option "ignore" S "Select only segments without this field" string no multiple
option "output-field" O "Output field name" string no
option "input-field" I "Input field name" string no multiple
option "interactive" i "Toggle interactive mode" flag off
option "config" - "Configuration file" string typestr="FILENAME" no
option "one-field" 1 "Print all results in one segments (creates ambiguous annotation)" flag off
option "one-line" - "Print annotation alternatives as additional fields" flag off
option "language" - "Language." string no

18
src/dgp/cmdline_dgp.ggo Normal file
View File

@ -0,0 +1,18 @@
package "dgp"
version "0.1"
option "grammar" g "Grammar file"
string no typestr="filename"
option "long" l "Long output"
flag off
option "debug" d "Debug mode."
flag off
option "info" - "Print info.
h - heads d - dependents
s - sets
c - constraints n - node/arc counts t - parse time
"
string no default="h"

13
src/dgp/const.hh Normal file
View File

@ -0,0 +1,13 @@
#ifndef CONST_HH
#define CONST_HH
#define MAXTYPES 32
#define MAXFLAGS 64
#define MAXNODES 1024
#define MAXCONSTRS 32
#define MAXLINE 256
#define MAXFORMLEN 64
#define MAXDESCRLEN 80
#define FIELDSEP " \n\t"
#endif

292
src/dgp/dgc Executable file
View File

@ -0,0 +1,292 @@
#!/usr/bin/perl
#package: UAM Text Tools
#component: dgc (dg compiler)
#version: 1.0
#author: Tomasz Obrebski
# wymaga niejawnie programu canonize!!!!
use lib "/usr/local/lib/utt";
use lib "$ENV{'HOME'}/.local/lib/utt";
use strict;
use Getopt::Long;
use Data::Dumper;
use attr;
use File::HomeDir;
my $systemconfigfile='/usr/local/etc/utt/dgc.conf';
my $userconfigfile=home()."/.utt/dgc.conf";
Getopt::Long::Configure('no_ignore_case_always');
my $help=0;
my $catfile=0;
my $dicfile=0;
my $gramfile=0;
my $outputfile=0;
#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
if(open(CONFIG, $file)){
while (<CONFIG>) {
chomp;
s/#.*//;
s/^\s+//;
s/\s+$//;
next unless length;
my ($name, $value) = split(/\s*=\s*/, $_, 2);
if(($name eq "categories")or($name eq "c")){
$catfile=$value;
}
elsif(($name eq "dictionary")or($name eq "d")){
$dicfile=$value;
}
elsif(($name eq "grammar")or($name eq "g")){
$gramfile=$value;
}
elsif(($name eq "outputfile")or($name eq "o")){
$outputfile=$value;
}
elsif(($name eq "help")or($name eq "h")){
$help=1;
}
}
close CONFIG;
}
}
#########################################################
GetOptions("help|h" => \$help,
"categories|c=s" => \$catfile,
"dictionary|d=s" => \$dicfile,
"grammar|g=s" => \$gramfile,
"outputfile|o=s" => \$outputfile);
my $homedir = $ENV{'HOME'};
$catfile =~ s/~/$homedir/;
$dicfile =~ s/~/$homedir/;
$gramfile =~ s/~/$homedir/;
$outputfile =~ s/~/$homedir/;
if($help)
{
print <<'END'
Usage: dgc [OPTIONS]
Options:
--categories -c filename List of syntactic categories.
--dictionary -d filename Dictionary.
--grammar -g filename List of grammar rules.
--outputfile -o filename Output file name.
--help -h Help.
END
;
exit 0;
}
die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile;
my $ncat=0;
my $nrole=0;
my $nsgl=0;
my $nleft=0;
my $nright=0;
my $nreq=0;
my $nlink=0;
my $nflag=0;
my %cats;
my %roles;
my %agr;
my %gov;
if(!$outputfile) {
*OUTPUT = *STDOUT;
}
elsif($outputfile eq "-") {
*OUTPUT = *STDOUT;
}
else {
open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!");
}
loadcats($catfile) if $catfile;
extractcats($dicfile) if $dicfile;
my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/;
# class parse_class:
# /$attr::cat_re/g;
if(!$gramfile) {
*INPUT = *STDIN;
}
elsif($gramfile eq "-"){
*INPUT = *STDIN;
}
else {
open(INPUT, $gramfile) or die("Unable to open: $gramfile!");
}
while(<INPUT>)
{
s/#.*//;
s/^\s+//;
s/\s+$//;
if(/^AGR\s+(\S+)\s+(\S+)$/)
{
push @{$agr{$1}}, $2;
}
elsif(/^GOV\s+(\S+)\s+(\S+)$/)
{
push @{$gov{$1}}, attr::parse($2);
}
elsif(/^ROLE\s+\S+$/)
{
$roles{$_}=1;
print OUTPUT "$_\n";
}
elsif(/^SGL\s+\S+$/)
{
++$nsgl;
print OUTPUT "$_\n";
}
elsif(/^REQ\s+(\S+)\s+(\S+)$/)
{
print OUTPUT "#$_\n";
my $cat = attr::parse $1;
for my $atomcat (keys %cats)
{
if(attr::match @$cat, @{$cats{$atomcat}})
{
print OUTPUT "REQ ".$atomcat." $2\n";
++$nreq;
}
}
}
elsif(/^LEFT\s+\S+$/)
{
++$nleft;
print OUTPUT "$_\n";
}
elsif(/^RIGHT\s+\S+$/)
{
++$nright;
print OUTPUT "$_\n";
}
elsif(my ($hs,$ds,$r) = /^LINK\s+($cats_re)\s+($cats_re)\s+(\S+)$/)
{
print OUTPUT "#$_\n";
for my $h ($hs =~ /$attr::cat_re/g)
{
for my $d ($ds =~ /$attr::cat_re/g)
{
addlinks($h,$d,$r);
}
}
}
elsif(/^FLAG\s+\S+$/)
{
++$nflag;
print OUTPUT "$_\n"
}
elsif(/^$/) {
# pomijamy puste linie oraz komentarze
}
else
{
print STDERR "Illegal format: $_\n";
}
}
sub addlinks
{
my ($h,$d,$r) = @_;
for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; }
for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; }
my $head = attr::parse $h;
my $dep = attr::parse $d;
for my $atomhead (keys %cats)
{
if(attr::match @$head, @{$cats{$atomhead}})
{
DEP:
for my $atomdep (keys %cats)
{
next DEP if ! attr::match @$dep, @{$cats{$atomdep}};
for my $a (@{$agr{$r}})
{
next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a);
}
for my $c (@{$gov{$r}})
{
next DEP if ! attr::match(@$c,@{$cats{$atomdep}});
}
print OUTPUT "LINK ";
print OUTPUT $atomhead." ";
print OUTPUT $atomdep." $r\n";
++$nlink;
}
}
}
}
printf STDERR "%6d CAT statements\n", 0+keys(%cats);
printf STDERR "%6d ROLE statements\n", 0+keys(%roles);
printf STDERR "%6d SGL statements\n", $nsgl;
printf STDERR "%6d REQ statements\n", $nreq;
printf STDERR "%6d LEFT statements\n", $nleft;
printf STDERR "%6d RIGHT statements\n", $nright;
printf STDERR "%6d LINK statements\n", $nlink;
printf STDERR "%6d FLAG statements\n", $nflag;
sub extractcats
{
my $file = shift;
open DICFILE, "canonize $file |";
while(<DICFILE>)
{
while(/,([^[:space:];]+)/g)
{
my $cat=$1;
next if !$cat || exists $cats{$cat};
$ncat++;
print OUTPUT "CAT $1\n";
$cats{$cat}=attr::parse($cat);
}
}
close DICFILE;
}
sub loadcats
{
my $file = shift;
open CATFILE, "canonize $file |";
while(<CATFILE>)
{
tr/ \t\n//d;
next if !$_ || exists $cats{$_};
print OUTPUT "CAT $_\n";
++$ncat;
$cats{$_}=attr::parse($_);
}
close CATFILE;
}

217
src/dgp/dgp0.cc Normal file
View File

@ -0,0 +1,217 @@
#include "dgp0.hh"
#include "global.hh"
extern Grammar grammar;
extern MGraph mgraph;
extern SGraph sgraph;
SNode* snodes;
extern bool debug;
list<int> nodelist;
list<int>::iterator processed;
void set_initial_constraints(int node)
{
snodes[node].prop.forbidden.reset();
snodes[node].prop.required=grammar.obl[snodes[node].mnode->cat];
}
bool changing_constraints(int head, Role role)
{
return grammar.sgl[role] || snodes[head].prop.required[role];
}
void apply_constraints(int head, Role role)
{
if(grammar.sgl[role]) snodes[head].prop.forbidden.set(role);
snodes[head].prop.required.reset(role);
}
NodeProp compute_prop_left(NodeProp headprop, Role role)
{
NodeProp ret=headprop;
if(grammar.sgl[role]) ret.forbidden.set(role);
ret.required.reset(role);
return ret;
}
NodeProp compute_prop_right(NodeProp headprop, Role role)
{
NodeProp ret=headprop;
if(grammar.sgl[role]) ret.forbidden.set(role);
ret.required.reset(role);
return ret;
}
int get_node(MNode& mnode, NodeProp p, bitset<MAXNODES>& newheadLH, bitset<MAXNODES>& newheadLV)
{
for(vector<int>::iterator ps=mnode.snodes.begin(); ps!=mnode.snodes.end(); ++ps)
if(snodes[*ps].prop==p && snodes[*ps].LH==newheadLH && snodes[*ps].LV==newheadLV)
return *ps;
return -1;
}
void connect_left(list<int>::iterator h, list<int>::iterator d, Role r)
{
NodeProp &oldheadprop = snodes[*h].prop;
NodeProp newheadprop;
bitset<MAXNODES> newheadLV;
bitset<MAXNODES> newheadLH;
bitset<MAXNODES> newheadLD;
newheadprop=compute_prop_left(oldheadprop,r);
int newheadind;
if(oldheadprop==newheadprop)
newheadind = *h;
else
{
newheadLH = snodes[*h].LH;
newheadLV = snodes[*d].LV;
newheadLD = snodes[*h].LD;
newheadind = get_node(*(snodes[*h].mnode), newheadprop, newheadLH, newheadLV);
if( newheadind < 0 )
{
newheadind = sgraph.clone(*h,newheadprop);
list<int>::iterator nextit=h; ++nextit;
nodelist.insert(nextit,newheadind);
snodes[newheadind].LH=newheadLH;
snodes[newheadind].in_LH=true;
snodes[newheadind].LV.reset();
snodes[newheadind].LD = newheadLD;
if(debug) sgraph.print_node_debug(stderr," C ",newheadind);
}
else
snodes[newheadind].LD |= newheadLD; // TYLKO DLA LD
}
snodes[newheadind].deps.push_back(Arc(*d,r,*h));
if(snodes[*d].saturated()) snodes[newheadind].LV |= snodes[*d].LV;
snodes[newheadind].LD.set(*d);
if(snodes[*d].saturated()) snodes[newheadind].LD |= snodes[*d].LD;
if(debug)
sgraph.print_arc(stderr,*d,newheadind,r,0), sgraph.print_node_debug(stderr," U ",newheadind);
}
void connect_right(list<int>::iterator h, list<int>::iterator d, Role r)
{
NodeProp &oldheadprop = snodes[*h].prop;
NodeProp newheadprop;
bitset<MAXNODES> newheadLV;
bitset<MAXNODES> newheadLH;
bitset<MAXNODES> newheadLD;
int newheadind;
newheadprop = compute_prop_right(oldheadprop,r);
if(oldheadprop==newheadprop)
newheadind = *h;
else
{
newheadLH = snodes[*h].LH;
newheadLV = snodes[*h].LV;
newheadLD = snodes[*h].LD;
newheadind = get_node(*(snodes[*h].mnode), newheadprop, newheadLH, newheadLV);
if( newheadind < 0 )
{
newheadind = sgraph.clone(*h,newheadprop);
snodes[newheadind].LH=newheadLH;
snodes[newheadind].in_LH=false;
snodes[newheadind].LV=newheadLV;
snodes[newheadind].LD=newheadLD;
list<int>::iterator nextit=h; ++nextit;
nodelist.insert(nextit,newheadind);
if(debug) sgraph.print_node_debug(stderr," C ",newheadind);
}
else
snodes[newheadind].LD |= newheadLD; // TYLKO DLA LD
}
snodes[*d].heads.push_back(Arc(newheadind,r,*h));
snodes[*d].LH.set(newheadind);
if(snodes[newheadind].saturated()) snodes[*d].LH |= snodes[newheadind].LH;
if(debug)
sgraph.print_arc(stderr,newheadind,*d,r,1), sgraph.print_node_debug(stderr," U ",*d);
}
void try_connect_dependents(list<int>::iterator j)
{
for(list<int>::iterator i(j); i!=nodelist.begin(); --i)
if(sgraph.visible(*i,*j) && sgraph.saturated(*i))
{
Roles& ji_roles = grammar.connect[snodes[*j].mnode->cat][snodes[*i].mnode->cat];
for(RolesIter r=ji_roles.begin(); r!=ji_roles.end();++r)
if(grammar.check_constr(snodes[*j].prop,snodes[*i].prop,0,*r))
connect_left(j,i,*r);
}
}
void try_connect_heads(list<int>::iterator j)
{
for(list<int>::iterator i(j); i!=nodelist.begin(); --i)
if(sgraph.visible(*i,*j))
{
Roles& ij_roles = grammar.connect[snodes[*i].mnode->cat][snodes[*j].mnode->cat];
for(RolesIter r=ij_roles.begin(); r!=ij_roles.end();++r)
if(grammar.check_constr(snodes[*i].prop,snodes[*j].prop,1,*r))
connect_right(i,j,*r);
}
}
void reverse_links()
{
list<int>::iterator i = nodelist.begin();
for(++i; i!=nodelist.end(); ++i)
{
for(vector<Arc>::iterator da=sgraph.nodes[*i].deps.begin()--; da!=sgraph.nodes[*i].deps.end(); ++da)
sgraph.nodes[da->dst].heads.push_back(Arc(*i,da->role,da->anc));
for(vector<Arc>::iterator ha=sgraph.nodes[*i].heads.begin(); ha!=sgraph.nodes[*i].heads.end(); ++ha)
sgraph.nodes[ha->dst].deps.push_back(Arc(*i,ha->role,ha->anc));
}
}
void dgp0()
{
snodes=sgraph.nodes;
nodelist.clear();
nodelist.push_back(0); // BOS
processed=nodelist.begin();
for(int m=0; m<mgraph.n ; ++m)
{
int basenode = sgraph.add_base_snode(mgraph.nodes+m); // ma zwracaæ SNode*
set_initial_constraints(basenode);
nodelist.push_back(basenode);
if(debug) {sgraph.print_node_debug(stderr,"B ",basenode);} // STDOUT!!!
list<int>::iterator cursor=processed;
while(++cursor != nodelist.end())
{
try_connect_dependents(cursor);
try_connect_heads(cursor);
processed=cursor;
}
}
reverse_links();
}

12
src/dgp/dgp0.hh Normal file
View File

@ -0,0 +1,12 @@
#ifndef _DGP0_HH
#define _DGP0_HH
#include "grammar.hh"
#include "sgraph.hh"
#include "mgraph.hh"
// API
void dgp0();
#endif

5
src/dgp/global.cc Normal file
View File

@ -0,0 +1,5 @@
#include "global.hh"
bool debug = false;

1
src/dgp/global.hh Normal file
View File

@ -0,0 +1 @@
extern bool debug;

13
src/dgp/go Executable file
View File

@ -0,0 +1,13 @@
if test -f Makefile.go;
then
make -f Makefile.go gram.dgp;
tok |\
lem -p W |\
canonize |\
sen |\
gph -p W -p BOS -p EOS -r BOS |\
dgp -i ds -p W -p BOS -p EOS -g gram.dgp
else
echo "Invalid configuration! Run utt_make_config.pl first."
fi

181
src/dgp/grammar.cc Normal file
View File

@ -0,0 +1,181 @@
#include <stdio.h>
#include "grammar.hh"
bool (*constraint[MAXCONSTRS])(int head, int dep);
int chk_type(const char* s, int lineno) // SIDE EFECTS!
{
if(Role::index(s)>0) return 1;
fprintf(stderr,"%8d: Invalid type '%s'. Line ignored.\n",lineno,s);
return 0;
}
int chk_cat(const char* s, int lineno)
{
if(Cat::index(s)>0) return 1;
fprintf(stderr,"%8d: Invalid category '%s'. Line ignored.\n",lineno,s);
return 0;
}
void Grammar::add_category(const char* s)
{
Cat::add(s);
if(Cat::count()>cats_sz)
{
cats_sz += 16;
connect.resize(cats_sz);
for(int i=0; i<cats_sz; ++i)
connect[i].resize(cats_sz);
obl.resize(cats_sz);
}
}
void Grammar::add_type(const char* s)
{
Role::add(s);
if(Role::count()>types_sz)
{
types_sz += 16;
lt.resize(types_sz);
gt.resize(types_sz);
}
}
void Grammar::add_flag(const char* s)
{
Flag::add(s);
if(Flag::count()>flags_sz)
{
flags_sz += 16;
pass.resize(flags_sz);
}
}
void Grammar::set_lt(Role s, Role t)
{
lt[s].set(t);
gt[t].set(s);
if(s==0||(int)t==0)
return;
else
{
for(int i=0; i<Role::count(); ++i)
if(lt[i][s])
set_lt(i,t);
for(int i=0; i<Role::count(); ++i)
if(lt[t][i])
set_lt(s,i);
}
}
void Grammar::compute_gt()
{
for(Role s=0; s<Role::count(); ++s)
for(Role t=0; t<Role::count(); ++t)
if(lt[s][t])
gt[t].set(s);
}
bool Grammar::read(FILE* f)
{
int lineno=0;
char line[MAXLINE]; // line has the structure: key [arg1 [arg2 [arg3]]]
char key[MAXLINE];
char arg1[MAXLINE];
char arg2[MAXLINE];
char arg3[MAXLINE];
while(fgets(line,MAXLINE,f))
{
lineno++;
int fields=sscanf(line,"%s %s %s %s",key,arg1,arg2,arg3);
if(fields<1 || key[0]=='#') continue; // skip empty lines and comments
if (strcmp(key,"CAT")==0 && fields>=2)
{
add_category(arg1);
}
else if(strcmp(key,"ROLE")==0 && fields>=2)
{
add_type(arg1);
}
else if(strcmp(key,"SGL")==0 && fields>=2)
{
if(chk_type(arg1,lineno))
set_sgl(arg1);
}
else if(strcmp(key,"LEFT")==0 && fields>=2)
{
if(chk_type(arg1,lineno))
set_left(arg1);
}
else if(strcmp(key,"RIGHT")==0 && fields>=2)
{
if(chk_type(arg1,lineno))
set_right(arg1);
}
else if(strcmp(key,"REQ")==0 && fields>=3)
{
if(chk_cat(arg1,lineno) + chk_type(arg2,lineno) == 2)
set_obl(arg1,arg2);
}
else if(strcmp(key,"LINK")==0 && fields>=4)
{
if(chk_cat(arg1,lineno) + chk_cat(arg2,lineno) + chk_type(arg3,lineno) == 3)
set_connect(arg1,arg2,arg3);
}
// FLAG DECLARATION
else if(strcmp(key,"FLAG")==0 && fields>=2)
{
add_flag(arg1);
}
else fprintf(stderr,"Invalid line %d. Ignored.\n", lineno);
}
// compute_gt();
return true;
}
void Grammar::write(FILE* f)
{
for(Cat i=1; i<Cat::count(); ++i)
fprintf(f,"CAT\t%s\n",i.str());
for(Role i=1; i<Role::count(); ++i)
fprintf(f,"ROLE\t%s\n",i.str());
for(Role i=1; i<Role::count(); ++i)
if(sgl.test(i)) fprintf(f,"SGL\t%s\n",i.str());
for(Role i=1; i<Role::count(); ++i)
if(left.test(i)) fprintf(f,"LEFT\t%s\n",i.str());
for(Role i=1; i<Role::count(); ++i)
if(right.test(i)) fprintf(f,"RIGHT\t%s\n",i.str());
for(Cat c=1; c<Cat::count(); ++c)
for(Role r=1; r<Role::count(); ++r)
if(obl[c].test(r)) fprintf(f,"REQ\t%s\t%s\n",c.str(),r.str());
for(Cat c=1; c<Cat::count(); ++c)
for(Cat d=1; d<Cat::count(); ++d)
for(Role t=1; t<Role::count(); ++t)
if(connect[c][d].count(t))
fprintf(f,"LINK\t%s\t%s\t%s\n",c.str(),d.str(),t.str());
for(Flag i=1; i<Flag::count(); ++i)
fprintf(f,"FLAG\t%s\n",i.str());
}

79
src/dgp/grammar.hh Normal file
View File

@ -0,0 +1,79 @@
#ifndef _GRAMMAR_HH
#define _GRAMMAR_HH
#include <bitset>
#include <vector>
#include <list>
#include <set>
#include "const.hh"
#include "thesymbols.hh"
#include "sgraph.hh"
class Link
{
Role role;
FlagSet hflags;
FlagSet dflags;
};
class Grammar
{
public:
// enum CONSTR { SGL, OBL, LEFT, RIGHT, INIT, NONINIT, FIN, NONFIN };
Grammar() : types_sz(0), cats_sz(0), flags_sz(0) {} ;
int types_sz;
int cats_sz;
int flags_sz;
vector< vector< Roles > > connect;
RoleSet sgl;
vector< RoleSet > obl;
RoleSet left;
RoleSet right;
vector< RoleSet > lt;
vector< RoleSet > gt;
// vector< vector< vector<
vector< FlagSet > set;
vector< FlagSet > pass;
bool read(FILE* f);
void write(FILE* f);
void add_category(const char* s);
void add_type(const char* s);
void add_flag(const char* s);
void set_sgl(Role r) { sgl.set(r); }
void set_obl(Cat c, Role r) { obl[c].set(r); }
void set_left(Role r) { left.set(r); }
void set_right(Role r) { right.set(r); }
void set_order(Role r, Role s) { lt[s].set(r); }
void set_connect(Cat c, Cat d, Role r) { connect[c][d].insert(r); }
void set_lt(Role r, Role s);
void compute_gt();
bool check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role);
};
inline bool Grammar::check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role)
{
return
!hprop.forbidden[role] &&
( !right[role] || dir==1 ) &&
( !left[role] || dir==0 )
;
}
#endif

121
src/dgp/main.cc Normal file
View File

@ -0,0 +1,121 @@
/**
* Package: UAM Text Tools
* Component: dgp (dg parser)
* Version: 1.0
* Author: Tomasz Obrebski
*/
#include "global.hh"
#include "mgraph.hh"
#include "sgraph.hh"
#include "grammar.hh"
#include "dgp0.hh"
#include "../common/common.h"
#include "cmdline.h"
#define MAXSEGMENTS 500
char segment[MAXSEGMENTS][MAXLINE];
int segcount=0;
char seg_mnode[MAXSEGMENTS];
char grammarfile[255];
Grammar grammar;
MGraph mgraph;
SGraph sgraph;
FILE* grammarf;
FILE* debugf=stdout;
unsigned int info=0U;
void output();
main(int argc, char* argv[])
{
gengetopt_args_info args;
if(cmdline_parser(argc,argv,&args) != 0)
exit(1);
process_config_files(&args,argv[0]);
process_common_options(&args,argv[0]);
if(!args.grammar_given)
fprintf(stderr,"dgp: no grammar given\n");
expand_path(args.grammar_arg,grammarfile);
if(!(grammarf=fopen(grammarfile,"r")))
fprintf(stderr,"dgp: grammar file not found: %s.\n", grammarfile), exit(1);
if(args.debug_given) debug=true;
for(char* c=args.info_arg; *c!='\0' ; ++c)
switch(*c)
{
case 'h': info|=SGraph::HEADS; break;
case 'd': info|=SGraph::DEPS; break;
case 's': info|=SGraph::SETS; break;
case 'c': info|=SGraph::CONSTRAINTS; break;
}
grammar.read(grammarf);
fclose(grammarf);
mgraph.clear();
sgraph.clear();
char line[1000];
while (fgets(line, MAXLINE+1, inputf))
{
line[strlen(line)-1] = '\0';
strcpy(segment[segcount],line);
char segtype[80];
seg_mnode[segcount] = process_seg(line, args) ? mgraph.add_node(line) : -1;
segcount++;
getfield(line,"3",segtype);
if(strcmp(segtype,"EOS")==0)
{
dgp0(); // parametry!!! MGraph, SGraph, Grammar
output();
mgraph.clear();
sgraph.clear();
segcount=0;
}
// if(args.interactive_flag) { fflush(outputf); fflush(failedf); }
}
fclose(inputf);
fclose(outputf);
cmdline_parser_free(&args);
exit(0);
}
void output()
{
for(int si=0; si<segcount; ++si)
{
if(seg_mnode[si]>=0)
{
MNode& m=mgraph.nodes[seg_mnode[si]];
for(vector<int>::iterator s=m.snodes.begin(); s!=m.snodes.end(); ++s)
{
fputs(segment[si],outputf);
sgraph.print_node(outputf, *s, info);
fputc('\n',outputf);
}
}
else
{
fputs(segment[si],outputf);
fputc('\n',outputf);
}
}
}

54
src/dgp/mgraph.cc Normal file
View File

@ -0,0 +1,54 @@
#include "mgraph.hh"
#include "thesymbols.hh"
#include "const.hh"
#include <stdio.h>
int MGraph::add_node(char* seg)
{
nodes[n].clear();
char field1[80], field3[80], descr[256], gph[256];
char* cat;
getfield(seg,"1",field1);
nodes[n].pos=atoi(field1);
getfield(seg,"3",field3);
if(!getfield(seg,"lem",descr)) strcpy(descr,"?,?");
cat=descr;
while(*cat!=',' && *cat ) ++cat;
if(*cat) ++cat;
// Cat::add(cat);
if(Cat::index(cat)>0)
nodes[n].cat=cat;
else
nodes[n].cat="NULL";
nodes[n].pred.clear();
char* tok;
int previd;
if(!getfield(seg,"gph",gph))
{
fprintf(stderr,"No gph field. Aborting (sorry).\n");
exit(1);
}
char* ids=strtok(gph,":");
if(n!=atoi(ids)){fprintf(stderr,"Invalid node id in line ?. Program aborted.\n"); exit(1); }
char *preds;
while(preds=strtok(NULL,","))
{
previd=atoi(preds);
nodes[n].pred.push_back(&nodes[previd]);
}
return n++;
}

34
src/dgp/mgraph.hh Normal file
View File

@ -0,0 +1,34 @@
#ifndef _MGRAPH_HH
#define _MGRAPH_HH
#include <vector>
#include "const.hh"
#include "thesymbols.hh"
#include "../common/common.h"
class MNode
{
public:
char type[MAXFORMLEN];
Cat cat;
int pos;
vector<MNode*> pred;
vector<int> snodes;
void clear() { snodes.clear(); };
};
class MGraph
{
public:
MNode nodes[MAXNODES];
int n;
void clear() { n=0; };
int add_node(char* seg);
};
#endif

165
src/dgp/sgraph.cc Normal file
View File

@ -0,0 +1,165 @@
#include "global.hh"
#include "sgraph.hh"
#include "mgraph.hh"
#include "grammar.hh"
#include "const.hh"
#include <stdio.h>
int SGraph::add_base_snode(MNode* mn)
{
int nodeind=n;
SNode &node=nodes[n];
node.clear();
node.mnode=mn;
for(vector<MNode*>::iterator pm=node.mnode->pred.begin(); pm!=node.mnode->pred.end(); ++pm)
for(vector<int>::iterator ps=(*pm)->snodes.begin(); ps!=(*pm)->snodes.end(); ++ps)
if(nodes[*ps].in_LH)
{
node.LV.set(*ps);
if(nodes[*ps].saturated()) node.LV |= nodes[*ps].LH;
}
mn->snodes.push_back(nodeind);
++n;
node.in_LH=true;
return nodeind;
}
void SGraph::update_left(int headind, int depind)
{
SNode &head=nodes[headind], &dep=nodes[depind];
if(dep.saturated()) head.LV |= dep.LV, head.LD |= dep.LD;
}
void SGraph::update_right(int headind, int depind)
{
SNode &head=nodes[headind], &dep=nodes[depind];
dep.LH.set(headind);
if(head.saturated())
dep.LH |= head.LH;
}
int SGraph::clone(int ancind, NodeProp newprop)
{
int newind = n++;
SNode &newnode=nodes[newind];
SNode &ancnode = nodes[ancind];
newnode.clear();
newnode.prop=newprop;
newnode.mnode=ancnode.mnode;
newnode.mnode->snodes.push_back(newind);
return newind;
}
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
int SGraph::print_node(FILE* f, int n, unsigned int info)
{
char buf[1000];
sprint_node(buf,n,info);
fputs(buf,f);
}
int SGraph::sprint_node(char* buf, int nodeind, unsigned int info)
{
char* buf0=buf;
char descr[256];
char nodeinfo[16];
SNode &node=nodes[nodeind];
buf+=sprintf(buf," dgp:%d",nodeind);
buf+=sprintf(buf, saturated(nodeind) ? ";s" : ";u");
bool cont=false;
if (info&HEADS)
{
buf+=sprintf(buf,";");
for(vector<Arc>::iterator h=node.heads.begin(); h!=node.heads.end(); ++h)
{
if(cont) buf+=sprintf(buf,","); else cont=true;
buf+=sprintf(buf,"++%s-%d/%d",h->role.str(),h->dst,h->anc);
}
}
if (info&DEPS)
{
buf+=sprintf(buf,";");
for(vector<Arc>::iterator d=node.deps.begin(); d!=node.deps.end(); ++d)
{
// if(! nodes[d->dst].saturated()) continue; // NIE DRUKUJ NIENASYCONYCH PODRZEDNIKOW
if(cont) buf+=sprintf(buf,","); else cont=true;
buf+=sprintf(buf,"--%s-%d/%d",d->role.str(),d->dst,d->anc);
}
}
if (info&SETS)
{
int ord=0;
buf+=sprintf(buf,";{");
for(vector<MNode*>::iterator pm=node.mnode->pred.begin(); pm!=node.mnode->pred.end(); ++pm)
for(vector<int>::iterator ps=(*pm)->snodes.begin(); ps!=(*pm)->snodes.end(); ++ps)
buf+=sprintf(buf, ord++ ? ",%d" : "%d", *ps);
buf+=sprintf(buf,"};{");
ord=0;for(int j=0; j<=n; ++j) if(node.LV[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j);
buf+=sprintf(buf,"};{");
ord=0;for(int j=0; j<=n; ++j) if(node.LH[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j);
buf+=sprintf(buf,"};{");
ord=0;for(int j=0; j<=n; ++j) if(node.LD[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j);
buf+=sprintf(buf,"}");
}
if (info&CONSTRAINTS)// buf+=sprint_node_constraints(buf,n);
{
buf+=sprintf(buf,";");
int cont=0;
for(Role i=1; i<=Role::count(); ++i)
if(node.prop.forbidden[i]) buf+=sprintf(buf,"%s!%s",(cont++)?",":"",i.str());
for(Role i=1; i<=Role::count(); ++i)
if(node.prop.required[i]) buf+=sprintf(buf,"%s&%s",(cont++)?",":"",i.str());
}
// buf+=sprintf(buf,"\n");
return buf-buf0;
}
int SGraph::sprint_node_debug(char* buf, const char* pref, int n)
{
char *buf0 = buf;
buf+=sprintf(buf,"#%s",pref);
buf+=sprint_node(buf,n,HEADS|DEPS|SETS|CONSTRAINTS);
buf+=sprintf(buf,"\n");
return buf-buf0;
}
int SGraph::print_node_debug(FILE* f, const char* pref, int n)
{
char buf[1000];
sprint_node_debug(buf,pref,n);
fputs(buf,f);
}
void SGraph::print_arc(FILE* f, int left, int right, Role role, int dir) // 0 - left, 1 - right
{
fprintf(f,"# %s:%s.%02d %s %s.%02d\n",
role.str(),nodes[left].mnode->type,left,
dir ? "-->" : "<--",
nodes[right].mnode->type,right);
}

108
src/dgp/sgraph.hh Normal file
View File

@ -0,0 +1,108 @@
#ifndef _SGRAPH_HH
#define _SGRAPH_HH
#include <stdio.h>
#include <list>
#include <vector>
#include <bitset>
#include "const.hh"
#include "thesymbols.hh"
class MNode;
struct Arc
{
int dst;
Role role;
int anc;
Arc(int d, Role r, int a) : dst(d), role(r), anc(a) {};
};
struct NodeProp
{
bitset<MAXTYPES> required;
bitset<MAXTYPES> forbidden;
bool operator==(const NodeProp& p)
{ return required==p.required && forbidden==p.forbidden; }
void clear()
{ required.reset(), forbidden.reset(); }
};
struct SNode
{
MNode* mnode;
NodeProp prop;
bitset<MAXNODES> LV;
bitset<MAXNODES> LH;
bitset<MAXNODES> LD;
bool in_LH;
vector<Arc> heads;
vector<Arc> deps;
void clear() { prop.clear(), LV.reset(), LD.reset(), LH.reset(), heads.clear(), deps.clear(); }
bool saturated() { return prop.required.none(); }
};
class SGraph
{
public:
SNode nodes[MAXNODES];
int n; // number of nodes
enum Output { HEADS=1, DEPS=2, SETS=4, CONSTRAINTS=8 };
SGraph() : n(0) {}
void clear() { n=0; }
int add_base_snode(MNode* mn);
int clone(int ancind, NodeProp newprop);
void update_left(int headind, int depind);
void update_right(int headind, int depind);
bool visible(int left, int right);
bool saturated(int node);
//--------------------------------------------------------------------
void read(FILE* f);
void write(FILE* f, list<int> nodelist, unsigned int info);
int sprint_node(char* buf, int n, unsigned int info);
int print_node(FILE* f, int n, unsigned int info);
int sprint_node_debug(char* buf, const char* pref, int n);
int print_node_debug(FILE* f, const char* pref, int n);
void print_arc(FILE* f, int left, int right, Role role, int dir); // 0 - left, 1 - right
};
inline bool SGraph::visible(int left, int right)
{
return nodes[right].LV[left];
}
inline bool SGraph::saturated(int node)
{
return nodes[node].saturated();
}
#endif

39
src/dgp/symbol.cc Normal file
View File

@ -0,0 +1,39 @@
#include "symbol.hh"
// CLASS symbols
//int Symbols::_no_of_spaces=0;
Symbols::~Symbols()
{
while(!table.empty())
{
free((void*)table.back());
table.pop_back();
}
}
void Symbols::load(const char* filename)
{
ifstream f(filename);
char s[100];
while(f)
{
f >> s >> ws;
if(strlen(s)) add(s);
}
}
void Symbols::add(const char* sym)
{
if(hash.count(sym)==0)
{
char* symdup=strdup(sym);
hash[symdup]=table.size();
table.push_back(symdup);
}
}
//template<int space>
//Symbols Symbol<space>::defs;

143
src/dgp/symbol.hh Normal file
View File

@ -0,0 +1,143 @@
#ifndef _SYMBOL_HH
#define _SYMBOL_HH
#include <ext/hash_map>
//#include <ext/hash_fun.h>
#include <string>
#include <string.h>
#include <fstream>
#include <vector>
#include <iostream>
using namespace std;
using __gnu_cxx::hash_map;
using __gnu_cxx::hash;
// Key comparison for the cstr_hash hash table
struct eqstr
{
bool operator()(const char * s, const char* t) const
{ return strcmp(s,t)==0; }
};
// Hash table for storing symbols
typedef hash_map<const char*,int,hash<const char*>,eqstr> cstr_hash;
// Symbol table. Provides access to symbols through their index or name.
class Symbols
{
public:
Symbols() { add("NULL"); };
~Symbols();
void load(const char* filename);
int operator[](const char* s) { return hash[s]; };
const char* operator[](int i) { return table[i]; };
void add(const char* c);
int count() { return table.size(); };
private:
std::vector<const char*> table;
cstr_hash hash;
};
//////////////////////////////////////////////////////////////////////
/// Symbol class template.
/** The template argument determines the symbol space.
Each space is created with symbol "NULL" with indexed 0 already in.
*/
template <int space>
class Symbol
{
public:
/// Load the contents of the symbol table from file.
static void define(const char *filename)
{ defs.load(filename); }
/// Add symbol s.
/** The string is duplicated.
*/
static Symbol<space> add(const char* s) { defs.add(s); }
/// Number of symbols.
static int count() { return defs.count(); };
/// First symbol.
static int first() { return 1; }
/// Last symbol.
static int last() { return defs.count()+1; }
/// Last symbol.
static int index(const char* s) { return defs[s]; }
/// Just for tests.
static void print();
/// 0-argument constructor, default value is 0 ("NULL").
Symbol() : val(0) {};
/// Constructing a symbol from its index.
/** No check is performed.
*/
Symbol(int v) : val(v) {};
/// Constructing a symbol from its name (string to Symbol conversion).
/** If s is not a symbol name, the value of 0 ("NULL") is assigned.
*/
Symbol(const char * s) : val(defs[s]) {};
/// Symbol to char* conversion. If symbol is invalid, NULL is returned.
const char* str() const { return (val>=0 && val<count())?defs[val]:NULL; };
/// Symbol to int& conversion.
/** Provides a way to iterate through symbols, eg:
* for(Symbol<0> s=1; s; s++ ) ...
s=0; while(++s) ...
*/
(operator int)() const { return val; };
Symbol operator++() {val++; return *this;}
// bool operator<(Symbol& s) { return val < s.val; }
private:
static Symbols defs;
int val;
};
template <int space>
void Symbol<space>::print()
{
for(Symbol i=0; i<count(); ++i)
cout << (int)i << ": " << (const char*)i << endl;
}
template<int space>
Symbols Symbol<space>::defs;
template<int space>
bool operator<(const Symbol<space>& s, const Symbol<space>& t)
{
return (int)s < (int)t;
}
#endif

29
src/dgp/thesymbols.hh Normal file
View File

@ -0,0 +1,29 @@
#ifndef __THESYMBOLS__HH
#define __THESYMBOLS__HH
#include "symbol.hh"
#include "const.hh"
#include <list>
#include <set>
#include <bitset>
typedef Symbol<1> Cat;
typedef Symbol<2> Role;
typedef list<Role> RoleList;
typedef list<Role>::iterator RoleListIter;
typedef bitset<MAXTYPES> RoleSet;
typedef set<Role> Roles;
typedef Roles::iterator RolesIter;
typedef Symbol<3> Constr;
typedef list<Constr> ConstrList;
typedef list<Constr>::iterator ConstrListIter;
typedef Symbol<4> Rel;
typedef Symbol<5> Flag;
typedef bitset<MAXFLAGS> FlagSet;
#endif

304
src/dgp/tre Executable file
View File

@ -0,0 +1,304 @@
#!/usr/bin/ruby -I /usr/local/lib/utt -I $HOME/.local/lib/utt
$: << "#{ENV['HOME']}/.local/lib/utt"
$: << "/usr/local/lib/utt"
require 'getoptlong'
require 'seg.rb'
opts = GetoptLong.new(
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
[ '--format', '-F', GetoptLong::REQUIRED_ARGUMENT ],
[ '--info', '-I', GetoptLong::REQUIRED_ARGUMENT ],
[ '--only-trees','-t', GetoptLong::NO_ARGUMENT ])
$helptext=
"The program generates trees from the graph output by dgp. dgp must\n"+
"must be run with '-i ds' option.\n\n"+
"Command: tre [options]\n\n"+
"Options:\n"+
"--help -h Print help (this text) and exit.\n"+
"--debug -d Verbose output. For developers only.\n"+
"--format=s -F s Output format. Recognized values:\n"+
" a root + list of arcs\n"+
" p parenthesized notation\n"+
" h human readable indented tree format\n"+
" Multiple values are allowed. (default p)\n"+
"--info=s -I s Information printed. Recognized values:\n"+
" n node identifier\n"+
" f surface form\n"+
" m morphological information\n"+
" l arc labels\n"+
"--only-trees -t Do not copy input. Print trees only.\n"
$DEBUG=false
$FORMAT='p'
$INFO='DEFAULT'
$ONLYTREES=false
opts.each do |opt, arg|
case opt
when '--help'
print $helptext
exit 0
when '--debug'
$DEBUG=true
when '--format'
$FORMAT=arg
when '--info'
$INFO=arg
when '--only-trees'
$ONLYTREES=true
else
print "Unknown option #{opt}. Ignored.\n"
end
end
if $INFO=='DEFAULT'
case $FORMAT
when 'p','a'
$INFO='nl'
when 'h'
$INFO='fmnl'
end
end
$dgpsep=';'
def tre(input)
$gphid=[]
$form=[]
$lem=[]
nodes=[]
count=0
seg=Seg.new
for line in input
print line unless $ONLYTREES
seg.set(line)
if dgp=seg['dgp']
if nodes==[] && seg[3]!='BOS'
print "A sentence must start with BOS segment. Aborting.\n"
return
end
id=dgp[/^\d+/].to_i
if gph=seg['gph']
$gphid[id]=gph[/^\d+/].to_i
else
print "No gph field. Aborting.\n"
return
end
$form[$gphid[id]]=seg[4]
$lem[$gphid[id]]=seg['lem']
nodes[id] = [seg[1].to_i,dgp]
if seg[3]=='EOS'
$pref = "#{seg[1]} #{seg[2]} SYN *"
parsegraph(nodes)
printgraph if $DEBUG
$thetrees=[]
gentrees2
for t in $thetrees
count += 1
t1=ground(t)
case $FORMAT
when /a/
print "#{$pref} tre:#{count} arc:"
printarcs(t1[0],t1[1])
print "\n"
when /p/
print "#{$pref} tre:#{count} par:"
printpar(t1[0],t1[1])
print "\n"
when /h/
print "#\n# tree #{count}\n# ------\n"
printtree(t1[0],t1[1],0)
end
end
nodes=[]
end
end
end
end
def nodeinfo(id)
info=""
if $INFO =~ /n/
info += id.to_s
info += '.' if $INFO =~ /[fm]/
end
if $INFO =~ /f/
info += $form[id]
info += ';' if $INFO =~ /m/
end
if $INFO =~ /m/
info += $lem[id]
end
info
end
def printarcs(root,arcs)
print nodeinfo(root)
for a in arcs
print ';'
print "#{a[2]}:" if $INFO =~ /l/
print nodeinfo(a[0])+'-'+nodeinfo(a[1])
end
end
def printtree(root,arcs,o)
if o==0
print "# %-16s" % "root: "
end
print nodeinfo(root),"\n"
for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
print '# '," "*(o+1)
print "%-16s" % (arc[2]+": ")
printtree(arc[1],arcs,o+1)
end
end
def printpar(root,arcs)
print nodeinfo(root)
deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] }
unless deps == []
print '('
cont=false
for arc in deps
if cont then print ',' else cont=true end
print arc[2],':' if $INFO =~ /l/
printpar(arc[1],arcs)
end
print ')'
end
end
def parsegraph(nodes)
$n =nodes.length
$sat =[];
$vis =[];
$succ=[];
$lhs =[];
$arcs=[];
$pos=[]
for dgp in nodes
parts = dgp[1].split($dgpsep,6)
if parts[3]==nil || parts[4]==nil || parts[5]==nil
$stderr.print "ERR: tre requires dgp be called with '--info s' option. Aborting.\n"
exit
end
i = parts[0].to_i
$pos[i] = dgp[0].to_i
$sat << i if parts[1]=="s"
$arcs |= parts[2].split(',').map{ |a| case a
when /\-\-(\w+)-(\d+)\/(\d+)/
[i, $2.to_i, $1, $3.to_i]
when /\+\+(\d+)-(\w+)\/(\d+)/
[$1.to_i, i, $2, $3.to_i]
end }
$succ |= parts[3][1..-2].split(',').map{|x| [x.to_i,i]}
$vis |= parts[4][1..-2].split(',').map{|x| [x.to_i,i]}
$lhs |= parts[5][1..-2].split(',').map{|x| [x.to_i,i]} + [[i,i]]
end
end
def ground(t)
[ $gphid[t[0]] , t[1].map{|a| [$gphid[a[0]],$gphid[a[1]],a[2]]} ]
end
def gentrees2()
$thetrees=[];
bos=0; eos=$n-1;
roots = (1...eos).select{|i| $vis.include? [i,eos]}.select{|i| $vis.include? [bos,i]}
if $DEBUG then print "ROOTS: #{roots.inspect}\n" end
for i in roots
$theroot=i
for r in buildR(i , eos, [])
(rmin,rmax,rtree) = r
buildR(bos, rmin, rtree)
end
end
end
def buildR(min, max, tree)
if $DEBUG then print "buildR--#{min}--#{max}--#{tree.inspect}\n" end
trees=[]
for a in $arcs.select{|a| a[0]==max && $vis.include?([min,a[1]]) }
if $DEBUG then print "ARC: #{a.inspect}\n" end
for r in buildR(a[1],a[3],tree+[a])
(rmin,rmax,rarcs) = r
for l in buildR(min,rmin,rarcs)
(lmin,lmax,larcs) = l
trees << [lmin,rmax,larcs]
end
end
end
for i in (0...$n).select{|i| $succ.include?([i,max])}.select{|i| $lhs.include?([min,i])}
for l in buildL(min,i,tree)
(lmin,lmax,larcs) = l
trees << [lmin,lmax,larcs]
end
end
trees
end
def buildL(min,max,tree)
if $DEBUG then print "buildL--#{min}--#{max}--#{tree.inspect}\n" end
if $pos[min]==$pos[max]
if min==0 && max==0
$thetrees.push [$theroot,tree]
if $DEBUG then print "adding tree: #{tree.inspect}\n" end
end
return [[max,max,tree]]
end
trees=[]
for arc in $arcs.select{|a| a[1]==max && $lhs.include?([min,a[0]]) }
if $DEBUG then print "ARC: #{arc.inspect}\n" end
for r in buildR(arc[3],max,tree+[arc])
(rmin,rmax,rarcs) = r
for l in buildL(min,rmin,rarcs)
(lmin,lmax,larcs) = l
trees << [lmin,lmax,larcs]
end
end
end
trees
end
def printgraph()
print "N: #{$n}\n"
print "SAT: #{set_to_s($sat)}\n"
print "SUCC: #{rel_to_s($succ)}\n"
print "VIS: #{rel_to_s($vis)}\n"
print "LHS: #{rel_to_s($lhs)}\n"
print "ARCS: #{arcs_to_s($arcs)}\n"
end
def set_to_s(s) "{#{s.join(',')}}" end
def rel_to_s(r) "{#{r.map{|p| "(#{p[0]},#{p[1]})"}.join(',')}}" end
def arc_to_s(q) "-#{q[0]}-#{q[2]}-#{q[1]}/#{q[3]}" end
def arcs_to_s(a) "{#{a.map{|q| arc_to_s(q)}.join(',')}}" end
######################################################################
tre($stdin)

2
src/dgp/uttcommon.c Normal file
View File

@ -0,0 +1,2 @@
#include "uttcommon.h"

146
src/dgp/uttcommon.h Normal file
View File

@ -0,0 +1,146 @@
#ifndef __COMMON_H
#define __COMMON_H
#include <stdio.h>
/**************************************************
* Stale dotyczace wejscia/wyjscia
*/
#define MAXLINE 1024
#define EMPTYFORM '*'
#define INFIELD_SEP ':'
#define MAXAUX 16
#define FIELD_SEP " \t\n"
/***************************************************************/
/* problems with casing */
/* sprawdzenie wielkosci liter */
/* warto¶æ zwracana: */
/* 0 - wszystkie ma³e litery, 1 - pierwsza wielka, reszta male */
/* 2 - wszystkie wielkie, 3 - inne */
/***************************************************************/
inline int casing(char* s)
{
int ret = isupper(*s) ? 1 : 0;
while(*++s != '\0')
{
if(isupper(*s))
{
if(ret==1) ret=2;
else if(ret==0) ret=3;
}
else
{
if(ret==2) ret=3;
}
}
return ret;
}
//
inline void tolowers(char* s, char* d)
{
*d=tolower(*s);
while(*s != '\0') * ++d = tolower(* ++s);
}
// przepisuje s do d
// nadajac wielko¶æ liter zgodnie z warto¶ci± casing
// casing - warto¶æ zwracana przez casing()
// je¶li casing==3 przepisuje bez zmian (za ma³o informacji)
inline void restorecasing(char *s, char *d, int casing)
{
switch(casing)
{
case 0:
case 3:
*d=*s;
while(*s != '\0') * ++d = * ++s;
break;
case 1:
*d=toupper(*s);
while(*s != '\0') * ++d = * ++s;
break;
case 2:
*d=toupper(*s);
while(*s != '\0') * ++d = toupper(* ++s);
break;
}
}
/**************************************************/
/*
parameters:
-seg - segment
-name - field name
+val - field contents
return value:
1 if specified field exists, 0 otherwise
*/
inline int getfield(char* seg, const char* pref, char* val)
{
char* p=seg;
while(isspace(*p)) ++p;
pos:
if(isdigit(*p) or *p=='*') while(!isspace(*p)) ++p;
else goto type;
while(isspace(*p)) ++p;
len:
if(isdigit(*p) or *p=='*') while(!isspace(*p)) ++p;
else goto type;
while(isspace(*p)) ++p;
type:
while(isspace(*p)) ++p; while(!isspace(*p)) ++p;
while(isspace(*p)) ++p;
form:
while(isspace(*p)) ++p; while(!isspace(*p)) ++p;
annotation:
do p=strstr(p,pref); while(p!=NULL && *(p-1)!=' ' && *(p-1)!='\t');
if(p==NULL) return 0;
else
{
p+=strlen(pref);
int len=strcspn(p,FIELD_SEP "\n\r\f\0");
strncpy(val,p,len);
val[len]='\0';
return 1;
}
}
/*
parameters:
+seg - segment
-pref - prefix of the new field
-val - contents of the new field
return value:
1 - success, 0 - fail (limit on segment length exceeded)
*/
inline int addfield(char *seg, const char *pref, const char *val)
// zalozenie, ze seg konczy sie znakiem \n
{
if(strlen(seg)+strlen(pref)+strlen(val) >= MAXLINE) return 0; // bezpieczniej, ale wolniej
int seglen=strlen(seg);
sprintf(seg+(seglen-1)," %s%s\n",pref,val);
return 1;
}
#endif

25
src/fla/Makefile Normal file
View File

@ -0,0 +1,25 @@
include ../../config.mak
ifeq ($(BUILD_STATIC), yes)
LDFLAGS += -static
endif
CFLAGS +=-O2
fla:
$(CC) $(CFLAGS) fla.c -o fla $(LDFLAGS)
.PHONY: install
install:
ifdef BIN_DIR
install -m 0755 fla $(BIN_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef BIN_DIR
rm $(BIN_DIR)/fla
endif
clean:
rm fla || true

68
src/fla/fla.c Normal file
View File

@ -0,0 +1,68 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
char buf[5000];
int main(int argc, char **argv)
{
char *pattern;
char eoln;
regex_t re;
int firstline = 1;
if( argc < 2 )
{
/* pattern="[ \t]*([0-9]+[ \t]+){2}EOS([ \t].*)?"; */
pattern = "[ \t]*BOS([ \t].*)?";
}
else
{
pattern = argv[1];
}
if( argc < 3 )
{
eoln = '\f';
}
else
{
eoln = atoi(argv[2]);
}
if( 0 != regcomp(&re, pattern, REG_EXTENDED|REG_NOSUB) )
{
fprintf(stderr, "Invalid pattern.\n");
return 1;
}
while( fgets(buf, 5000, stdin) )
{
buf[strlen(buf)-1] = '\0';
if( firstline )
{
firstline = 0;
}
else
{
if( 0 == regexec(&re, buf, (size_t)0, NULL, 0) )
{
putchar('\n');
}
else
{
putchar(eoln);
}
}
fputs(buf, stdout);
}
putchar('\n');
return 0;
}

18
src/gph/Makefile Normal file
View File

@ -0,0 +1,18 @@
include ../../config.mak
gph:
.PHONY: install
install:
ifdef BIN_DIR
install -m 0755 gph $(BIN_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef BIN_DIR
rm $(BIN_DIR)/gph
endif
clean:

162
src/gph/gph Executable file
View File

@ -0,0 +1,162 @@
#!/usr/bin/perl
#package: UAM Text Tools
#component: gph
#version: 1.0
#author: Tomasz Obrebski
use strict;
use Getopt::Long;
use File::HomeDir;
my $systemconfigfile='/usr/local/etc/utt/gph.conf';
my $userconfigfile=home()."/.utt/gph.conf";
Getopt::Long::Configure('no_ignore_case_always');
my $help=0;
my $inputfile=0;
my $outputfile=0;
my @process=();
my $reset;
my $interactive=0;
#read configuration files###########################
my $file;
my @process_conf=();
foreach $file ($systemconfigfile, $userconfigfile){
if(open(CONFIG, $file)){
while (<CONFIG>) {
chomp;
s/#.*//;
s/^\s+//;
s/\s+$//;
next unless length;
my ($name, $value) = split(/\s*=\s*/, $_, 2);
if(($name eq "inputfile")or($name eq "f")){
$inputfile=$value;
}
elsif(($name eq "outputfile")or($name eq "o")){
$outputfile=$value;
}
elsif(($name eq "process")or($name eq "p")){
push @process_conf, $value;
}
elsif(($name eq "reset")or($name eq "r")){
$reset=$value;
}
elsif(($name eq "interactive")or($name eq "i")){
$interactive=1;
}
elsif(($name eq "help")or($name eq "h")){
$help=1;
}
}
close CONFIG;
}
}
#########################################################
GetOptions("process|p=s" => \@process,
"inputfile|f=s" => \$inputfile,
"outputfile|o=s" => \$outputfile,
"help|h" => \$help,
"reset|r=s" => \$reset,
"interactive|i" => \$interactive);
@process = @process_conf if @process<1;
if($help)
{
print <<'END'
Usage: gph [OPTIONS]
Options:
--process=TYPE -p TYPE Process segments of type TYPE.
--reset=TYPE -r TYPE Start new graph at tags of type TYPE.
--inputfile=FILE -f FILE Input file.
--outputfile=FILE -o FILE Output file.
--interactive -i Toggle interactive mode (default=off).
END
;
exit 0;
}
$|=1 if $interactive;
if(!$inputfile or $inputfile eq "-") {
*INPUT = *STDIN;
}
else {
open(INPUT, "$inputfile") or die("Can't open input file: $inputfile!");
}
if(!$outputfile or $outputfile eq "-") {
*OUTPUT = *STDOUT;
}
else {
open(OUTPUT, "$outputfile") or die("Can't open output file: $outputfile!");
}
my @prev;
my $n=0;
while(<INPUT>)
{
chomp;
my $do=0;
my @line = split /\s+/;
if($line[2] eq $reset)
{
$n=0;
@prev = ();
}
for my $p (@process)
{
$do=1 if $line[2] eq $p;
}
my $gph = '';
if($do)
{
my @preds = ();
shift @prev while @prev+0 && $prev[0]->[1] + $prev[0]->[2] < $line[0];
for my $p (@prev)
{
push(@preds, $p->[0]) if $p->[1] + $p->[2] == $line[0];
}
push @prev, [$n, $line[0], $line[1]];
$gph=' gph:'.$n.':'.join(',',@preds);
$n++;
}
else
{
for my $p (@prev)
{
if($p->[1]+$p->[2] == $line[0])
{
$p->[2] += $line[1];
}
}
$gph='';
}
print OUTPUT $_.$gph."\n";
}

17
src/grp/Makefile Normal file
View File

@ -0,0 +1,17 @@
include ../../config.mak
grp:
.PHONY: install
install:
ifdef BIN_DIR
install -m 0755 grp $(BIN_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef BIN_DIR
rm $(BIN_DIR)/grp
endif
clean:

170
src/grp/grp Executable file
View File

@ -0,0 +1,170 @@
#!/usr/bin/perl
#package: UAM Text Tools
#component name: grp
#version: 1.0
#author: Tomasz Obrebski
use strict;
use Getopt::Long;
use File::HomeDir;
# katalog zawierajacy terms.m4
my $LIB_DIR="/usr/local/lib/utt";
my $systemconfigfile="/usr/local/etc/utt/grp.conf";
my $userconfigfile=home()."/.utt/grp.conf";
Getopt::Long::Configure('no_ignore_case_always');
my $help=0;
my $pattern=0;
my $matches_only=0;
my $macrofile=0;
my $define=0;
my $show_command=0;
my $action="pgP";
my $eos="seg(EOS)";
my $morfield='lem';
my $tags=0;
#read configuration files###########################
my $file;
foreach $file ($systemconfigfile, $userconfigfile){
if(open(CONFIG, $file)){
while (<CONFIG>) {
chomp;
s/#.*//;
s/^\s+//;
s/\s+$//;
next unless length;
my ($name, $value) = split(/\s*=\s*/, $_, 2);
if(($name eq "pattern")or($name eq "e")){
$pattern=$value;
}
elsif(($name eq "eos")or($name eq "E")){
$eos=$value;
}
elsif($name eq "morph"){
$morfield=$value;
}
elsif($name eq "macros"){
$macrofile=$value;
}
elsif($name eq "define"){
$define=$value;
}
elsif($name eq "command"){
$show_command=1;
}
elsif($name eq "action"){
$action;
}
elsif($name eq "tags"){
$tags=$value;
}
elsif(($name eq "help")or($name eq "h")){
$help=1;
}
}
close CONFIG;
}
}
#########################################################
GetOptions("pattern|e=s" => \$pattern,
"eos|E=s" => \$eos,
"morph=s" => \$morfield,
"macros=s" => \$macrofile,
"define=s" => \$macrofile,
"command" => \$show_command,
"action=s" => \$action,
"tags=s" => \$tags,
"help|h" => \$help);
if($help)
{
print <<'END'
Usage: gre [OPTIONS] [file ..]
Options:
--pattern -e PATTERN Pattern.
--eos -E PATTERN Segment serving as sentence delimiter.
--morph=STRING Field containing morphological information (default 'lem').
--macros=FILE Read macrodefinitions from FILE.
--define=FILE Add macrodefinitions from FILE.
--action -a [u][p][g][P] Perform only indicated actions.
u - uncompress with 'lzop -cd'
p - preprocess
g - grep
P - postprocess
(default pgP)
--tags=STRING Morphosyntactic tag format.
--command Print the shell command to be executed and exit.
--help -h Help.
END
;
exit 0;
}
die("$0: no pattern given.\n") unless $pattern || $action !~ /g/;
die("$0: macro file not found") unless
$macrofile or
-e "$LIB_DIR/terms.m4" and $macrofile="$LIB_DIR/terms.m4";
die("$0: undefined tagset format (tags option missing)") unless
$tags;
die("$0: $tags.tag2re program not found") unless
1; #JAK NAPISAC WARUNEK???
my $uncompress = ($action =~ /u/) ? ' lzop -cd | ' : '';
my $preproc = ($action =~ /p/) ? ' fla | ' : '';
my $postproc = ($action =~ /P/) ? ' | unfla ' : '';
# discarding spaces
$pattern =~ s/\s+/\\`'/g; #`
# quoting escaped commas
$pattern =~ s/\\,/\\`\\`\\,''/g;
# quoting commas in {m,n} r.e. operator
$pattern =~ s/(\{\d*),(\d*\})/\1\\`\\`,''\2/g;
my $grepre = `echo \"$pattern\" | m4 --define=ENDOFSEGMENT='[[:cntrl:]]' --define=MORFIELD=$morfield $macrofile - 2>/dev/null`;
die("Incorrect pattern (m4).") if $? >> 8;
chomp $grepre;
# <> expansion
$grepre =~ s/<([^>]+)>/`echo $1 | $tags.tag2re`/ge;
$grepre =~ s/\./[^ [:cntrl:]]/g;
$grepre =~ s/\\s/[ ]/g;
$grepre =~ s/\\S/[^ [:cntrl:]]/g;
$grepre =~ s/\\d/[0-9]/g;
$grepre =~ s/\\D/[^0-9 [:cntrl:]]/g;
$grepre =~ s/\\w/[a-z±æê³ñ󶼿A-Z¡ÆÊ£ÑÓ¦¬¯0-9_]/g;
$grepre =~ s/\\W/[^a-z±æê³ñ󶼿A-Z¡ÆÊ£ÑÓ¦¬¯0-9_ [:cntrl:]]/g;
# extensions
$grepre =~ s/\\l/[a-z±æê³ñ󶼿]/g; #lowercase letter
$grepre =~ s/\\L/[A-Z¡ÆÊ£ÑÓ¦¬¯]/g; #upercase letter
my $grep_command = ($action =~ /g/) ? "egrep '$grepre'" : " cat ";
if($show_command)
{
print $grep_command."\n";
exit 0;
}
#print $preproc.$grep_command.$postproc."\n";
exec $preproc.$grep_command.$postproc;

55
src/gue/Makefile Normal file
View File

@ -0,0 +1,55 @@
include ../../config.mak
ifeq ($(BUILD_STATIC), yes)
LDFLAGS += -static
endif
LDFLAGS +=
CXXFLAGS += -O2 -fpermissive
LIB_PATH=../lib
COMMON_PATH=../common
CMDLINE_FILE='"../gue/cmdline.h"'
gue: main.cc guess.o $(LIB_PATH)/auttools.o $(LIB_PATH)/word.o \
cmdline.c common_guess.o common.o
$(CXX) $(CXXFLAGS) main.cc guess.o \
$(LIB_PATH)/auttools.o $(LIB_PATH)/word.o cmdline.c common.o common_guess.o \
-o gue $(LDFLAGS)
guess.o: guess.h guess.cc
$(CXX) $(CXXFLAGS) -c guess.cc
common_guess.o: cmdline.h common_guess.cc common_guess.h
$(CXX) $(CXXFLAGS) -c common_guess.cc
common.o: $(COMMON_PATH)/cmdline_common.ggo $(COMMON_PATH)/common.cc \
$(COMMON_PATH)/common.h
$(CXX) $(CXXFLAGS) -c -D _CMDLINE_FILE=$(CMDLINE_FILE) $(COMMON_PATH)/common.cc
cmdline.c cmdline.h: cmdline.ggo
$(GENGETOPT) -i cmdline.ggo --conf-parser
cmdline.ggo: cmdline_guess.ggo $(COMMON_PATH)/cmdline_common.ggo
cat cmdline_guess.ggo $(COMMON_PATH)/cmdline_common.ggo > cmdline.ggo
clean: clean.cmdline
rm *.o || true
rm gue || true
clean.cmdline:
rm cmdline.* || true
.PHONY: install
install:
ifdef BIN_DIR
install -m 0755 gue $(BIN_DIR)
endif
.PHONY: uninstall
uninstall:
ifdef BIN_DIR
rm $(BIN_DIR)/gue
endif

12
src/gue/cmdline_guess.ggo Normal file
View File

@ -0,0 +1,12 @@
package "guess"
version "0.1"
option "guess_count" n "Guess up to n descriptions" int default="0" no
option "delta" - "Stop displaying answers after fall of weight" float default="0.2" no
option "cut-off" - "Do not display answers with less weight than cut-off" int default="200" no
option "dictionary-home" - "dh" string typestr="FILENAME" no hidden
option "dictionary" d "File with dictionary information" string typestr="filename" default="gue.bin" no
option "per-info" v "Display performance information" flag off
option "weights" w "Print weights" flag off
option "no-uppercase" - "Do not process form containing uppercase letters" flag off

60
src/gue/common_guess.cc Normal file
View File

@ -0,0 +1,60 @@
#include <stdlib.h>
#include <string.h>
#include "common_guess.h"
int guess_count=0;
double delta=0.1;
int cut_off=100;
char dictionary[255];
bool per_info=false;
bool weights=false;
void process_guess_options(gengetopt_args_info* args)
{
if(args->dictionary_given)
{
expand_path(args->dictionary_arg,dictionary);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
else if (args->dictionary_home_given && args->language_given)
{
char buf[255];
expand_path(args->dictionary_home_arg, buf);
sprintf(dictionary,"%s/%s/gue.bin",buf,args->language_arg);
if(file_accessible(dictionary)!=0)
{
fprintf(stderr,"Cannot open the dictionary file: %s\nAborting.\n",dictionary);
exit(1);
}
}
if(args->guess_count_given)
guess_count=args->guess_count_arg;
else
guess_count=0;
if(guess_count==0)
guess_count=100;
if(args->delta_given)
delta=args->delta_arg;
else
delta=0.1;
if(args->cut_off_given)
cut_off=args->cut_off_arg;
else
cut_off=100;
if(args->per_info_given)
per_info=args->per_info_flag;
if(args->weights_given)
weights=true;
}

20
src/gue/common_guess.h Normal file
View File

@ -0,0 +1,20 @@
#ifndef __COMMON_GUESS_H
#define __COMMON_GUESS_H
#include <stdio.h>
#define _CMDLINE_FILE "../gue/cmdline.h"
#include "../common/common.h"
#include "cmdline.h"
#define DIC_FILE "gue.bin"
extern int guess_count;
extern double delta;
extern int cut_off;
extern char dictionary[];
extern bool per_info;
extern bool weights;
void process_guess_options(gengetopt_args_info* args);
#endif

138
src/gue/guess.cc Normal file
View File

@ -0,0 +1,138 @@
#include "guess.h"
#include <cstring>
#include <iostream>
#include <cstdlib>
#include <cassert>
#include <ctime>
#define DICT 1
#define COR 2
#define DICT_P 3
#define COR_P 4
#define W_PRE 0.1
#define W_SUF 0.9
#define PREF_SIGN '_'
Guess::Guess(const char* suf_file)
: _suf(suf_file) {
/* _suf = NULL;
_pref = NULL;
if (strlen(suf_file) > 0)
_suf = new TFTiv<char, char>(suf_file);
if (strlen(pref_file) > 0)
_pref = new TFTiv<char, char>(corp_file);
*/
}
char buf[MAX_LINE];
char out[MAX_LINE];
char* buf0_s = buf;
char* word_t = NULL;
long state_s = 0;
unsigned length_s = buf0_s - buf;
long len = 0;
int i=0;
int Guess::ana(const char* word, Words& result) {
assert(word && &result);
/* Word zawiera wyraz, ktory mamy zbadac.
* Nalezy przepisac go w odwrotnej kolejnosci do bufora,
* znalezc najdluzszy prefiks pasujacy do tego bufora
* separatorem jest '/' - za tym znakiem znajduje sie
* prawdopodobienstwo wystapienia danego opisu */
buf0_s = buf;
word_t = strdup(word);
if (reverse(word, buf) != 0)
return -1;
state_s = -1;
// printf("#buf0_s=%s, ", buf0_s);
state_s = _suf.pref(buf0_s, PREF_SIGN);
// printf("#word=%s, buf0_s=%s\t", word, buf0_s);
/* jezeli state_s != -1 to oznacza, ze w slowniku jest zawarta
* informacja o prefiksie tego slowa.
* nie jest ona odwrocona, wiec porownujemy do word a nie do buf
*/
// printf("state_s=%d\t", state_s);
if (state_s != -1) {
state_s = _suf.pref(word_t, '~', state_s);
// printf("state_s(wp)=%d, word_t=%s, word=%s\n", state_s, word_t, word);
}
if (state_s == -1) {
// if (_suf != NULL)
buf0_s = buf;
state_s = _suf.pref(buf0_s, '~');
// printf("state_s=%d\n", state_s);
}
length_s = buf0_s - buf;
/* state jest stanem, od ktorego zaczyna sie sciezka opisujaca
* prawdopodobienstwo przeciwienstwa wystapienia opisu
* znajdujacego sie dalej na tej sciezce.
* Im mniejsza wartosc liczby tym wieksze prawdopodobienstwo */
len = 0;
i=0;
// if (_suf != NULL)
len = _suf.cont(state_s, out);
while (len > 0) {
i++;
add_word_prob(result, word, out, length_s, DICT);
len = _suf.cont(-1, out);
}
return i;
}
int Guess::add_word_prob(Words& tab, const char* word, const char* path, unsigned len, int source) {
/* Dodaje do tablicy tab wyraz word wraz
* z prawdopodobienstwem i opisem zawartym
* w sciezce path */
// printf("add_word_prob(");
// fflush(stdout);
char p[MAX_LINE];
strcpy(p, path);
int probLen = strcspn(p, ";");
char prob[probLen+1];
strncpy(prob, p, probLen);
prob[probLen] = '\0';
char* desc = p + probLen+1; // +2 bo pomijamy jeszcze znak ';'
int i = tab.add(word, desc);
if (source==DICT) {
tab[i].len_suf(len);
tab[i].w_suf(atof(prob)); // + W_PRE*tab[i].w_suf()));
// tab[i].w_suf((float)(W_SUF*(1000-atof(prob)) + W_PRE*tab[i].w_suf()));
}
// if (source==COR) {
// tab[i].len_pref(len);
// tab[i].w_pref(W_SUF*(1000-atof(prob)) + W_PRE*tab[i].w_pref());
// }
// printf(")\n");
// fflush(stdout);
return i;
}

Some files were not shown because too many files have changed in this diff Show More