added mgiza
This commit is contained in:
parent
6f995a64f2
commit
df5dddc924
1160
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/.cproject
Normal file
1160
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/.cproject
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,37 @@
|
|||||||
|
.anjuta
|
||||||
|
.tm_project*
|
||||||
|
.libs
|
||||||
|
.deps
|
||||||
|
.*swp
|
||||||
|
.nautilus-metafile.xml
|
||||||
|
*.autosave
|
||||||
|
*.pws
|
||||||
|
*.bak
|
||||||
|
*~
|
||||||
|
#*#
|
||||||
|
*.gladep
|
||||||
|
*.la
|
||||||
|
*.lo
|
||||||
|
*.o
|
||||||
|
*.class
|
||||||
|
*.pyc
|
||||||
|
aclocal.m4
|
||||||
|
autom4te.cache
|
||||||
|
config.h
|
||||||
|
config.h.in
|
||||||
|
config.log
|
||||||
|
config.status
|
||||||
|
configure
|
||||||
|
intltool-extract*
|
||||||
|
intltool-merge*
|
||||||
|
intltool-modules*
|
||||||
|
intltool-update*
|
||||||
|
libtool
|
||||||
|
prepare.sh
|
||||||
|
stamp-h*
|
||||||
|
ltmain.sh
|
||||||
|
mkinstalldirs
|
||||||
|
config.guess
|
||||||
|
config.sub
|
||||||
|
Makefile
|
||||||
|
Makefile.in
|
@ -0,0 +1,82 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<projectDescription>
|
||||||
|
<name>MGizaWhiteList</name>
|
||||||
|
<comment></comment>
|
||||||
|
<projects>
|
||||||
|
</projects>
|
||||||
|
<buildSpec>
|
||||||
|
<buildCommand>
|
||||||
|
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||||
|
<triggers>clean,full,incremental,</triggers>
|
||||||
|
<arguments>
|
||||||
|
<dictionary>
|
||||||
|
<key>?name?</key>
|
||||||
|
<value></value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.append_environment</key>
|
||||||
|
<value>true</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
|
||||||
|
<value>all</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.buildArguments</key>
|
||||||
|
<value></value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.buildCommand</key>
|
||||||
|
<value>make</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.buildLocation</key>
|
||||||
|
<value>${workspace_loc:/MGizaWhiteList/Debug}</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
|
||||||
|
<value>clean</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.contents</key>
|
||||||
|
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
|
||||||
|
<value>false</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
|
||||||
|
<value>true</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
|
||||||
|
<value>true</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
|
||||||
|
<value>all</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.stopOnError</key>
|
||||||
|
<value>true</value>
|
||||||
|
</dictionary>
|
||||||
|
<dictionary>
|
||||||
|
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
|
||||||
|
<value>true</value>
|
||||||
|
</dictionary>
|
||||||
|
</arguments>
|
||||||
|
</buildCommand>
|
||||||
|
<buildCommand>
|
||||||
|
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||||
|
<arguments>
|
||||||
|
</arguments>
|
||||||
|
</buildCommand>
|
||||||
|
</buildSpec>
|
||||||
|
<natures>
|
||||||
|
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||||
|
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||||
|
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||||
|
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||||
|
</natures>
|
||||||
|
</projectDescription>
|
674
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/COPYING
Normal file
674
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/COPYING
Normal file
@ -0,0 +1,674 @@
|
|||||||
|
GNU GENERAL PUBLIC LICENSE
|
||||||
|
Version 3, 29 June 2007
|
||||||
|
|
||||||
|
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this license document, but changing it is not allowed.
|
||||||
|
|
||||||
|
Preamble
|
||||||
|
|
||||||
|
The GNU General Public License is a free, copyleft license for
|
||||||
|
software and other kinds of works.
|
||||||
|
|
||||||
|
The licenses for most software and other practical works are designed
|
||||||
|
to take away your freedom to share and change the works. By contrast,
|
||||||
|
the GNU General Public License is intended to guarantee your freedom to
|
||||||
|
share and change all versions of a program--to make sure it remains free
|
||||||
|
software for all its users. We, the Free Software Foundation, use the
|
||||||
|
GNU General Public License for most of our software; it applies also to
|
||||||
|
any other work released this way by its authors. You can apply it to
|
||||||
|
your programs, too.
|
||||||
|
|
||||||
|
When we speak of free software, we are referring to freedom, not
|
||||||
|
price. Our General Public Licenses are designed to make sure that you
|
||||||
|
have the freedom to distribute copies of free software (and charge for
|
||||||
|
them if you wish), that you receive source code or can get it if you
|
||||||
|
want it, that you can change the software or use pieces of it in new
|
||||||
|
free programs, and that you know you can do these things.
|
||||||
|
|
||||||
|
To protect your rights, we need to prevent others from denying you
|
||||||
|
these rights or asking you to surrender the rights. Therefore, you have
|
||||||
|
certain responsibilities if you distribute copies of the software, or if
|
||||||
|
you modify it: responsibilities to respect the freedom of others.
|
||||||
|
|
||||||
|
For example, if you distribute copies of such a program, whether
|
||||||
|
gratis or for a fee, you must pass on to the recipients the same
|
||||||
|
freedoms that you received. You must make sure that they, too, receive
|
||||||
|
or can get the source code. And you must show them these terms so they
|
||||||
|
know their rights.
|
||||||
|
|
||||||
|
Developers that use the GNU GPL protect your rights with two steps:
|
||||||
|
(1) assert copyright on the software, and (2) offer you this License
|
||||||
|
giving you legal permission to copy, distribute and/or modify it.
|
||||||
|
|
||||||
|
For the developers' and authors' protection, the GPL clearly explains
|
||||||
|
that there is no warranty for this free software. For both users' and
|
||||||
|
authors' sake, the GPL requires that modified versions be marked as
|
||||||
|
changed, so that their problems will not be attributed erroneously to
|
||||||
|
authors of previous versions.
|
||||||
|
|
||||||
|
Some devices are designed to deny users access to install or run
|
||||||
|
modified versions of the software inside them, although the manufacturer
|
||||||
|
can do so. This is fundamentally incompatible with the aim of
|
||||||
|
protecting users' freedom to change the software. The systematic
|
||||||
|
pattern of such abuse occurs in the area of products for individuals to
|
||||||
|
use, which is precisely where it is most unacceptable. Therefore, we
|
||||||
|
have designed this version of the GPL to prohibit the practice for those
|
||||||
|
products. If such problems arise substantially in other domains, we
|
||||||
|
stand ready to extend this provision to those domains in future versions
|
||||||
|
of the GPL, as needed to protect the freedom of users.
|
||||||
|
|
||||||
|
Finally, every program is threatened constantly by software patents.
|
||||||
|
States should not allow patents to restrict development and use of
|
||||||
|
software on general-purpose computers, but in those that do, we wish to
|
||||||
|
avoid the special danger that patents applied to a free program could
|
||||||
|
make it effectively proprietary. To prevent this, the GPL assures that
|
||||||
|
patents cannot be used to render the program non-free.
|
||||||
|
|
||||||
|
The precise terms and conditions for copying, distribution and
|
||||||
|
modification follow.
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
0. Definitions.
|
||||||
|
|
||||||
|
"This License" refers to version 3 of the GNU General Public License.
|
||||||
|
|
||||||
|
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||||
|
works, such as semiconductor masks.
|
||||||
|
|
||||||
|
"The Program" refers to any copyrightable work licensed under this
|
||||||
|
License. Each licensee is addressed as "you". "Licensees" and
|
||||||
|
"recipients" may be individuals or organizations.
|
||||||
|
|
||||||
|
To "modify" a work means to copy from or adapt all or part of the work
|
||||||
|
in a fashion requiring copyright permission, other than the making of an
|
||||||
|
exact copy. The resulting work is called a "modified version" of the
|
||||||
|
earlier work or a work "based on" the earlier work.
|
||||||
|
|
||||||
|
A "covered work" means either the unmodified Program or a work based
|
||||||
|
on the Program.
|
||||||
|
|
||||||
|
To "propagate" a work means to do anything with it that, without
|
||||||
|
permission, would make you directly or secondarily liable for
|
||||||
|
infringement under applicable copyright law, except executing it on a
|
||||||
|
computer or modifying a private copy. Propagation includes copying,
|
||||||
|
distribution (with or without modification), making available to the
|
||||||
|
public, and in some countries other activities as well.
|
||||||
|
|
||||||
|
To "convey" a work means any kind of propagation that enables other
|
||||||
|
parties to make or receive copies. Mere interaction with a user through
|
||||||
|
a computer network, with no transfer of a copy, is not conveying.
|
||||||
|
|
||||||
|
An interactive user interface displays "Appropriate Legal Notices"
|
||||||
|
to the extent that it includes a convenient and prominently visible
|
||||||
|
feature that (1) displays an appropriate copyright notice, and (2)
|
||||||
|
tells the user that there is no warranty for the work (except to the
|
||||||
|
extent that warranties are provided), that licensees may convey the
|
||||||
|
work under this License, and how to view a copy of this License. If
|
||||||
|
the interface presents a list of user commands or options, such as a
|
||||||
|
menu, a prominent item in the list meets this criterion.
|
||||||
|
|
||||||
|
1. Source Code.
|
||||||
|
|
||||||
|
The "source code" for a work means the preferred form of the work
|
||||||
|
for making modifications to it. "Object code" means any non-source
|
||||||
|
form of a work.
|
||||||
|
|
||||||
|
A "Standard Interface" means an interface that either is an official
|
||||||
|
standard defined by a recognized standards body, or, in the case of
|
||||||
|
interfaces specified for a particular programming language, one that
|
||||||
|
is widely used among developers working in that language.
|
||||||
|
|
||||||
|
The "System Libraries" of an executable work include anything, other
|
||||||
|
than the work as a whole, that (a) is included in the normal form of
|
||||||
|
packaging a Major Component, but which is not part of that Major
|
||||||
|
Component, and (b) serves only to enable use of the work with that
|
||||||
|
Major Component, or to implement a Standard Interface for which an
|
||||||
|
implementation is available to the public in source code form. A
|
||||||
|
"Major Component", in this context, means a major essential component
|
||||||
|
(kernel, window system, and so on) of the specific operating system
|
||||||
|
(if any) on which the executable work runs, or a compiler used to
|
||||||
|
produce the work, or an object code interpreter used to run it.
|
||||||
|
|
||||||
|
The "Corresponding Source" for a work in object code form means all
|
||||||
|
the source code needed to generate, install, and (for an executable
|
||||||
|
work) run the object code and to modify the work, including scripts to
|
||||||
|
control those activities. However, it does not include the work's
|
||||||
|
System Libraries, or general-purpose tools or generally available free
|
||||||
|
programs which are used unmodified in performing those activities but
|
||||||
|
which are not part of the work. For example, Corresponding Source
|
||||||
|
includes interface definition files associated with source files for
|
||||||
|
the work, and the source code for shared libraries and dynamically
|
||||||
|
linked subprograms that the work is specifically designed to require,
|
||||||
|
such as by intimate data communication or control flow between those
|
||||||
|
subprograms and other parts of the work.
|
||||||
|
|
||||||
|
The Corresponding Source need not include anything that users
|
||||||
|
can regenerate automatically from other parts of the Corresponding
|
||||||
|
Source.
|
||||||
|
|
||||||
|
The Corresponding Source for a work in source code form is that
|
||||||
|
same work.
|
||||||
|
|
||||||
|
2. Basic Permissions.
|
||||||
|
|
||||||
|
All rights granted under this License are granted for the term of
|
||||||
|
copyright on the Program, and are irrevocable provided the stated
|
||||||
|
conditions are met. This License explicitly affirms your unlimited
|
||||||
|
permission to run the unmodified Program. The output from running a
|
||||||
|
covered work is covered by this License only if the output, given its
|
||||||
|
content, constitutes a covered work. This License acknowledges your
|
||||||
|
rights of fair use or other equivalent, as provided by copyright law.
|
||||||
|
|
||||||
|
You may make, run and propagate covered works that you do not
|
||||||
|
convey, without conditions so long as your license otherwise remains
|
||||||
|
in force. You may convey covered works to others for the sole purpose
|
||||||
|
of having them make modifications exclusively for you, or provide you
|
||||||
|
with facilities for running those works, provided that you comply with
|
||||||
|
the terms of this License in conveying all material for which you do
|
||||||
|
not control copyright. Those thus making or running the covered works
|
||||||
|
for you must do so exclusively on your behalf, under your direction
|
||||||
|
and control, on terms that prohibit them from making any copies of
|
||||||
|
your copyrighted material outside their relationship with you.
|
||||||
|
|
||||||
|
Conveying under any other circumstances is permitted solely under
|
||||||
|
the conditions stated below. Sublicensing is not allowed; section 10
|
||||||
|
makes it unnecessary.
|
||||||
|
|
||||||
|
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||||
|
|
||||||
|
No covered work shall be deemed part of an effective technological
|
||||||
|
measure under any applicable law fulfilling obligations under article
|
||||||
|
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||||
|
similar laws prohibiting or restricting circumvention of such
|
||||||
|
measures.
|
||||||
|
|
||||||
|
When you convey a covered work, you waive any legal power to forbid
|
||||||
|
circumvention of technological measures to the extent such circumvention
|
||||||
|
is effected by exercising rights under this License with respect to
|
||||||
|
the covered work, and you disclaim any intention to limit operation or
|
||||||
|
modification of the work as a means of enforcing, against the work's
|
||||||
|
users, your or third parties' legal rights to forbid circumvention of
|
||||||
|
technological measures.
|
||||||
|
|
||||||
|
4. Conveying Verbatim Copies.
|
||||||
|
|
||||||
|
You may convey verbatim copies of the Program's source code as you
|
||||||
|
receive it, in any medium, provided that you conspicuously and
|
||||||
|
appropriately publish on each copy an appropriate copyright notice;
|
||||||
|
keep intact all notices stating that this License and any
|
||||||
|
non-permissive terms added in accord with section 7 apply to the code;
|
||||||
|
keep intact all notices of the absence of any warranty; and give all
|
||||||
|
recipients a copy of this License along with the Program.
|
||||||
|
|
||||||
|
You may charge any price or no price for each copy that you convey,
|
||||||
|
and you may offer support or warranty protection for a fee.
|
||||||
|
|
||||||
|
5. Conveying Modified Source Versions.
|
||||||
|
|
||||||
|
You may convey a work based on the Program, or the modifications to
|
||||||
|
produce it from the Program, in the form of source code under the
|
||||||
|
terms of section 4, provided that you also meet all of these conditions:
|
||||||
|
|
||||||
|
a) The work must carry prominent notices stating that you modified
|
||||||
|
it, and giving a relevant date.
|
||||||
|
|
||||||
|
b) The work must carry prominent notices stating that it is
|
||||||
|
released under this License and any conditions added under section
|
||||||
|
7. This requirement modifies the requirement in section 4 to
|
||||||
|
"keep intact all notices".
|
||||||
|
|
||||||
|
c) You must license the entire work, as a whole, under this
|
||||||
|
License to anyone who comes into possession of a copy. This
|
||||||
|
License will therefore apply, along with any applicable section 7
|
||||||
|
additional terms, to the whole of the work, and all its parts,
|
||||||
|
regardless of how they are packaged. This License gives no
|
||||||
|
permission to license the work in any other way, but it does not
|
||||||
|
invalidate such permission if you have separately received it.
|
||||||
|
|
||||||
|
d) If the work has interactive user interfaces, each must display
|
||||||
|
Appropriate Legal Notices; however, if the Program has interactive
|
||||||
|
interfaces that do not display Appropriate Legal Notices, your
|
||||||
|
work need not make them do so.
|
||||||
|
|
||||||
|
A compilation of a covered work with other separate and independent
|
||||||
|
works, which are not by their nature extensions of the covered work,
|
||||||
|
and which are not combined with it such as to form a larger program,
|
||||||
|
in or on a volume of a storage or distribution medium, is called an
|
||||||
|
"aggregate" if the compilation and its resulting copyright are not
|
||||||
|
used to limit the access or legal rights of the compilation's users
|
||||||
|
beyond what the individual works permit. Inclusion of a covered work
|
||||||
|
in an aggregate does not cause this License to apply to the other
|
||||||
|
parts of the aggregate.
|
||||||
|
|
||||||
|
6. Conveying Non-Source Forms.
|
||||||
|
|
||||||
|
You may convey a covered work in object code form under the terms
|
||||||
|
of sections 4 and 5, provided that you also convey the
|
||||||
|
machine-readable Corresponding Source under the terms of this License,
|
||||||
|
in one of these ways:
|
||||||
|
|
||||||
|
a) Convey the object code in, or embodied in, a physical product
|
||||||
|
(including a physical distribution medium), accompanied by the
|
||||||
|
Corresponding Source fixed on a durable physical medium
|
||||||
|
customarily used for software interchange.
|
||||||
|
|
||||||
|
b) Convey the object code in, or embodied in, a physical product
|
||||||
|
(including a physical distribution medium), accompanied by a
|
||||||
|
written offer, valid for at least three years and valid for as
|
||||||
|
long as you offer spare parts or customer support for that product
|
||||||
|
model, to give anyone who possesses the object code either (1) a
|
||||||
|
copy of the Corresponding Source for all the software in the
|
||||||
|
product that is covered by this License, on a durable physical
|
||||||
|
medium customarily used for software interchange, for a price no
|
||||||
|
more than your reasonable cost of physically performing this
|
||||||
|
conveying of source, or (2) access to copy the
|
||||||
|
Corresponding Source from a network server at no charge.
|
||||||
|
|
||||||
|
c) Convey individual copies of the object code with a copy of the
|
||||||
|
written offer to provide the Corresponding Source. This
|
||||||
|
alternative is allowed only occasionally and noncommercially, and
|
||||||
|
only if you received the object code with such an offer, in accord
|
||||||
|
with subsection 6b.
|
||||||
|
|
||||||
|
d) Convey the object code by offering access from a designated
|
||||||
|
place (gratis or for a charge), and offer equivalent access to the
|
||||||
|
Corresponding Source in the same way through the same place at no
|
||||||
|
further charge. You need not require recipients to copy the
|
||||||
|
Corresponding Source along with the object code. If the place to
|
||||||
|
copy the object code is a network server, the Corresponding Source
|
||||||
|
may be on a different server (operated by you or a third party)
|
||||||
|
that supports equivalent copying facilities, provided you maintain
|
||||||
|
clear directions next to the object code saying where to find the
|
||||||
|
Corresponding Source. Regardless of what server hosts the
|
||||||
|
Corresponding Source, you remain obligated to ensure that it is
|
||||||
|
available for as long as needed to satisfy these requirements.
|
||||||
|
|
||||||
|
e) Convey the object code using peer-to-peer transmission, provided
|
||||||
|
you inform other peers where the object code and Corresponding
|
||||||
|
Source of the work are being offered to the general public at no
|
||||||
|
charge under subsection 6d.
|
||||||
|
|
||||||
|
A separable portion of the object code, whose source code is excluded
|
||||||
|
from the Corresponding Source as a System Library, need not be
|
||||||
|
included in conveying the object code work.
|
||||||
|
|
||||||
|
A "User Product" is either (1) a "consumer product", which means any
|
||||||
|
tangible personal property which is normally used for personal, family,
|
||||||
|
or household purposes, or (2) anything designed or sold for incorporation
|
||||||
|
into a dwelling. In determining whether a product is a consumer product,
|
||||||
|
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||||
|
product received by a particular user, "normally used" refers to a
|
||||||
|
typical or common use of that class of product, regardless of the status
|
||||||
|
of the particular user or of the way in which the particular user
|
||||||
|
actually uses, or expects or is expected to use, the product. A product
|
||||||
|
is a consumer product regardless of whether the product has substantial
|
||||||
|
commercial, industrial or non-consumer uses, unless such uses represent
|
||||||
|
the only significant mode of use of the product.
|
||||||
|
|
||||||
|
"Installation Information" for a User Product means any methods,
|
||||||
|
procedures, authorization keys, or other information required to install
|
||||||
|
and execute modified versions of a covered work in that User Product from
|
||||||
|
a modified version of its Corresponding Source. The information must
|
||||||
|
suffice to ensure that the continued functioning of the modified object
|
||||||
|
code is in no case prevented or interfered with solely because
|
||||||
|
modification has been made.
|
||||||
|
|
||||||
|
If you convey an object code work under this section in, or with, or
|
||||||
|
specifically for use in, a User Product, and the conveying occurs as
|
||||||
|
part of a transaction in which the right of possession and use of the
|
||||||
|
User Product is transferred to the recipient in perpetuity or for a
|
||||||
|
fixed term (regardless of how the transaction is characterized), the
|
||||||
|
Corresponding Source conveyed under this section must be accompanied
|
||||||
|
by the Installation Information. But this requirement does not apply
|
||||||
|
if neither you nor any third party retains the ability to install
|
||||||
|
modified object code on the User Product (for example, the work has
|
||||||
|
been installed in ROM).
|
||||||
|
|
||||||
|
The requirement to provide Installation Information does not include a
|
||||||
|
requirement to continue to provide support service, warranty, or updates
|
||||||
|
for a work that has been modified or installed by the recipient, or for
|
||||||
|
the User Product in which it has been modified or installed. Access to a
|
||||||
|
network may be denied when the modification itself materially and
|
||||||
|
adversely affects the operation of the network or violates the rules and
|
||||||
|
protocols for communication across the network.
|
||||||
|
|
||||||
|
Corresponding Source conveyed, and Installation Information provided,
|
||||||
|
in accord with this section must be in a format that is publicly
|
||||||
|
documented (and with an implementation available to the public in
|
||||||
|
source code form), and must require no special password or key for
|
||||||
|
unpacking, reading or copying.
|
||||||
|
|
||||||
|
7. Additional Terms.
|
||||||
|
|
||||||
|
"Additional permissions" are terms that supplement the terms of this
|
||||||
|
License by making exceptions from one or more of its conditions.
|
||||||
|
Additional permissions that are applicable to the entire Program shall
|
||||||
|
be treated as though they were included in this License, to the extent
|
||||||
|
that they are valid under applicable law. If additional permissions
|
||||||
|
apply only to part of the Program, that part may be used separately
|
||||||
|
under those permissions, but the entire Program remains governed by
|
||||||
|
this License without regard to the additional permissions.
|
||||||
|
|
||||||
|
When you convey a copy of a covered work, you may at your option
|
||||||
|
remove any additional permissions from that copy, or from any part of
|
||||||
|
it. (Additional permissions may be written to require their own
|
||||||
|
removal in certain cases when you modify the work.) You may place
|
||||||
|
additional permissions on material, added by you to a covered work,
|
||||||
|
for which you have or can give appropriate copyright permission.
|
||||||
|
|
||||||
|
Notwithstanding any other provision of this License, for material you
|
||||||
|
add to a covered work, you may (if authorized by the copyright holders of
|
||||||
|
that material) supplement the terms of this License with terms:
|
||||||
|
|
||||||
|
a) Disclaiming warranty or limiting liability differently from the
|
||||||
|
terms of sections 15 and 16 of this License; or
|
||||||
|
|
||||||
|
b) Requiring preservation of specified reasonable legal notices or
|
||||||
|
author attributions in that material or in the Appropriate Legal
|
||||||
|
Notices displayed by works containing it; or
|
||||||
|
|
||||||
|
c) Prohibiting misrepresentation of the origin of that material, or
|
||||||
|
requiring that modified versions of such material be marked in
|
||||||
|
reasonable ways as different from the original version; or
|
||||||
|
|
||||||
|
d) Limiting the use for publicity purposes of names of licensors or
|
||||||
|
authors of the material; or
|
||||||
|
|
||||||
|
e) Declining to grant rights under trademark law for use of some
|
||||||
|
trade names, trademarks, or service marks; or
|
||||||
|
|
||||||
|
f) Requiring indemnification of licensors and authors of that
|
||||||
|
material by anyone who conveys the material (or modified versions of
|
||||||
|
it) with contractual assumptions of liability to the recipient, for
|
||||||
|
any liability that these contractual assumptions directly impose on
|
||||||
|
those licensors and authors.
|
||||||
|
|
||||||
|
All other non-permissive additional terms are considered "further
|
||||||
|
restrictions" within the meaning of section 10. If the Program as you
|
||||||
|
received it, or any part of it, contains a notice stating that it is
|
||||||
|
governed by this License along with a term that is a further
|
||||||
|
restriction, you may remove that term. If a license document contains
|
||||||
|
a further restriction but permits relicensing or conveying under this
|
||||||
|
License, you may add to a covered work material governed by the terms
|
||||||
|
of that license document, provided that the further restriction does
|
||||||
|
not survive such relicensing or conveying.
|
||||||
|
|
||||||
|
If you add terms to a covered work in accord with this section, you
|
||||||
|
must place, in the relevant source files, a statement of the
|
||||||
|
additional terms that apply to those files, or a notice indicating
|
||||||
|
where to find the applicable terms.
|
||||||
|
|
||||||
|
Additional terms, permissive or non-permissive, may be stated in the
|
||||||
|
form of a separately written license, or stated as exceptions;
|
||||||
|
the above requirements apply either way.
|
||||||
|
|
||||||
|
8. Termination.
|
||||||
|
|
||||||
|
You may not propagate or modify a covered work except as expressly
|
||||||
|
provided under this License. Any attempt otherwise to propagate or
|
||||||
|
modify it is void, and will automatically terminate your rights under
|
||||||
|
this License (including any patent licenses granted under the third
|
||||||
|
paragraph of section 11).
|
||||||
|
|
||||||
|
However, if you cease all violation of this License, then your
|
||||||
|
license from a particular copyright holder is reinstated (a)
|
||||||
|
provisionally, unless and until the copyright holder explicitly and
|
||||||
|
finally terminates your license, and (b) permanently, if the copyright
|
||||||
|
holder fails to notify you of the violation by some reasonable means
|
||||||
|
prior to 60 days after the cessation.
|
||||||
|
|
||||||
|
Moreover, your license from a particular copyright holder is
|
||||||
|
reinstated permanently if the copyright holder notifies you of the
|
||||||
|
violation by some reasonable means, this is the first time you have
|
||||||
|
received notice of violation of this License (for any work) from that
|
||||||
|
copyright holder, and you cure the violation prior to 30 days after
|
||||||
|
your receipt of the notice.
|
||||||
|
|
||||||
|
Termination of your rights under this section does not terminate the
|
||||||
|
licenses of parties who have received copies or rights from you under
|
||||||
|
this License. If your rights have been terminated and not permanently
|
||||||
|
reinstated, you do not qualify to receive new licenses for the same
|
||||||
|
material under section 10.
|
||||||
|
|
||||||
|
9. Acceptance Not Required for Having Copies.
|
||||||
|
|
||||||
|
You are not required to accept this License in order to receive or
|
||||||
|
run a copy of the Program. Ancillary propagation of a covered work
|
||||||
|
occurring solely as a consequence of using peer-to-peer transmission
|
||||||
|
to receive a copy likewise does not require acceptance. However,
|
||||||
|
nothing other than this License grants you permission to propagate or
|
||||||
|
modify any covered work. These actions infringe copyright if you do
|
||||||
|
not accept this License. Therefore, by modifying or propagating a
|
||||||
|
covered work, you indicate your acceptance of this License to do so.
|
||||||
|
|
||||||
|
10. Automatic Licensing of Downstream Recipients.
|
||||||
|
|
||||||
|
Each time you convey a covered work, the recipient automatically
|
||||||
|
receives a license from the original licensors, to run, modify and
|
||||||
|
propagate that work, subject to this License. You are not responsible
|
||||||
|
for enforcing compliance by third parties with this License.
|
||||||
|
|
||||||
|
An "entity transaction" is a transaction transferring control of an
|
||||||
|
organization, or substantially all assets of one, or subdividing an
|
||||||
|
organization, or merging organizations. If propagation of a covered
|
||||||
|
work results from an entity transaction, each party to that
|
||||||
|
transaction who receives a copy of the work also receives whatever
|
||||||
|
licenses to the work the party's predecessor in interest had or could
|
||||||
|
give under the previous paragraph, plus a right to possession of the
|
||||||
|
Corresponding Source of the work from the predecessor in interest, if
|
||||||
|
the predecessor has it or can get it with reasonable efforts.
|
||||||
|
|
||||||
|
You may not impose any further restrictions on the exercise of the
|
||||||
|
rights granted or affirmed under this License. For example, you may
|
||||||
|
not impose a license fee, royalty, or other charge for exercise of
|
||||||
|
rights granted under this License, and you may not initiate litigation
|
||||||
|
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||||
|
any patent claim is infringed by making, using, selling, offering for
|
||||||
|
sale, or importing the Program or any portion of it.
|
||||||
|
|
||||||
|
11. Patents.
|
||||||
|
|
||||||
|
A "contributor" is a copyright holder who authorizes use under this
|
||||||
|
License of the Program or a work on which the Program is based. The
|
||||||
|
work thus licensed is called the contributor's "contributor version".
|
||||||
|
|
||||||
|
A contributor's "essential patent claims" are all patent claims
|
||||||
|
owned or controlled by the contributor, whether already acquired or
|
||||||
|
hereafter acquired, that would be infringed by some manner, permitted
|
||||||
|
by this License, of making, using, or selling its contributor version,
|
||||||
|
but do not include claims that would be infringed only as a
|
||||||
|
consequence of further modification of the contributor version. For
|
||||||
|
purposes of this definition, "control" includes the right to grant
|
||||||
|
patent sublicenses in a manner consistent with the requirements of
|
||||||
|
this License.
|
||||||
|
|
||||||
|
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||||
|
patent license under the contributor's essential patent claims, to
|
||||||
|
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||||
|
propagate the contents of its contributor version.
|
||||||
|
|
||||||
|
In the following three paragraphs, a "patent license" is any express
|
||||||
|
agreement or commitment, however denominated, not to enforce a patent
|
||||||
|
(such as an express permission to practice a patent or covenant not to
|
||||||
|
sue for patent infringement). To "grant" such a patent license to a
|
||||||
|
party means to make such an agreement or commitment not to enforce a
|
||||||
|
patent against the party.
|
||||||
|
|
||||||
|
If you convey a covered work, knowingly relying on a patent license,
|
||||||
|
and the Corresponding Source of the work is not available for anyone
|
||||||
|
to copy, free of charge and under the terms of this License, through a
|
||||||
|
publicly available network server or other readily accessible means,
|
||||||
|
then you must either (1) cause the Corresponding Source to be so
|
||||||
|
available, or (2) arrange to deprive yourself of the benefit of the
|
||||||
|
patent license for this particular work, or (3) arrange, in a manner
|
||||||
|
consistent with the requirements of this License, to extend the patent
|
||||||
|
license to downstream recipients. "Knowingly relying" means you have
|
||||||
|
actual knowledge that, but for the patent license, your conveying the
|
||||||
|
covered work in a country, or your recipient's use of the covered work
|
||||||
|
in a country, would infringe one or more identifiable patents in that
|
||||||
|
country that you have reason to believe are valid.
|
||||||
|
|
||||||
|
If, pursuant to or in connection with a single transaction or
|
||||||
|
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||||
|
covered work, and grant a patent license to some of the parties
|
||||||
|
receiving the covered work authorizing them to use, propagate, modify
|
||||||
|
or convey a specific copy of the covered work, then the patent license
|
||||||
|
you grant is automatically extended to all recipients of the covered
|
||||||
|
work and works based on it.
|
||||||
|
|
||||||
|
A patent license is "discriminatory" if it does not include within
|
||||||
|
the scope of its coverage, prohibits the exercise of, or is
|
||||||
|
conditioned on the non-exercise of one or more of the rights that are
|
||||||
|
specifically granted under this License. You may not convey a covered
|
||||||
|
work if you are a party to an arrangement with a third party that is
|
||||||
|
in the business of distributing software, under which you make payment
|
||||||
|
to the third party based on the extent of your activity of conveying
|
||||||
|
the work, and under which the third party grants, to any of the
|
||||||
|
parties who would receive the covered work from you, a discriminatory
|
||||||
|
patent license (a) in connection with copies of the covered work
|
||||||
|
conveyed by you (or copies made from those copies), or (b) primarily
|
||||||
|
for and in connection with specific products or compilations that
|
||||||
|
contain the covered work, unless you entered into that arrangement,
|
||||||
|
or that patent license was granted, prior to 28 March 2007.
|
||||||
|
|
||||||
|
Nothing in this License shall be construed as excluding or limiting
|
||||||
|
any implied license or other defenses to infringement that may
|
||||||
|
otherwise be available to you under applicable patent law.
|
||||||
|
|
||||||
|
12. No Surrender of Others' Freedom.
|
||||||
|
|
||||||
|
If conditions are imposed on you (whether by court order, agreement or
|
||||||
|
otherwise) that contradict the conditions of this License, they do not
|
||||||
|
excuse you from the conditions of this License. If you cannot convey a
|
||||||
|
covered work so as to satisfy simultaneously your obligations under this
|
||||||
|
License and any other pertinent obligations, then as a consequence you may
|
||||||
|
not convey it at all. For example, if you agree to terms that obligate you
|
||||||
|
to collect a royalty for further conveying from those to whom you convey
|
||||||
|
the Program, the only way you could satisfy both those terms and this
|
||||||
|
License would be to refrain entirely from conveying the Program.
|
||||||
|
|
||||||
|
13. Use with the GNU Affero General Public License.
|
||||||
|
|
||||||
|
Notwithstanding any other provision of this License, you have
|
||||||
|
permission to link or combine any covered work with a work licensed
|
||||||
|
under version 3 of the GNU Affero General Public License into a single
|
||||||
|
combined work, and to convey the resulting work. The terms of this
|
||||||
|
License will continue to apply to the part which is the covered work,
|
||||||
|
but the special requirements of the GNU Affero General Public License,
|
||||||
|
section 13, concerning interaction through a network will apply to the
|
||||||
|
combination as such.
|
||||||
|
|
||||||
|
14. Revised Versions of this License.
|
||||||
|
|
||||||
|
The Free Software Foundation may publish revised and/or new versions of
|
||||||
|
the GNU General Public License from time to time. Such new versions will
|
||||||
|
be similar in spirit to the present version, but may differ in detail to
|
||||||
|
address new problems or concerns.
|
||||||
|
|
||||||
|
Each version is given a distinguishing version number. If the
|
||||||
|
Program specifies that a certain numbered version of the GNU General
|
||||||
|
Public License "or any later version" applies to it, you have the
|
||||||
|
option of following the terms and conditions either of that numbered
|
||||||
|
version or of any later version published by the Free Software
|
||||||
|
Foundation. If the Program does not specify a version number of the
|
||||||
|
GNU General Public License, you may choose any version ever published
|
||||||
|
by the Free Software Foundation.
|
||||||
|
|
||||||
|
If the Program specifies that a proxy can decide which future
|
||||||
|
versions of the GNU General Public License can be used, that proxy's
|
||||||
|
public statement of acceptance of a version permanently authorizes you
|
||||||
|
to choose that version for the Program.
|
||||||
|
|
||||||
|
Later license versions may give you additional or different
|
||||||
|
permissions. However, no additional obligations are imposed on any
|
||||||
|
author or copyright holder as a result of your choosing to follow a
|
||||||
|
later version.
|
||||||
|
|
||||||
|
15. Disclaimer of Warranty.
|
||||||
|
|
||||||
|
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||||
|
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||||
|
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||||
|
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||||
|
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||||
|
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||||
|
|
||||||
|
16. Limitation of Liability.
|
||||||
|
|
||||||
|
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||||
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||||
|
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||||
|
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||||
|
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||||
|
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||||
|
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||||
|
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||||
|
SUCH DAMAGES.
|
||||||
|
|
||||||
|
17. Interpretation of Sections 15 and 16.
|
||||||
|
|
||||||
|
If the disclaimer of warranty and limitation of liability provided
|
||||||
|
above cannot be given local legal effect according to their terms,
|
||||||
|
reviewing courts shall apply local law that most closely approximates
|
||||||
|
an absolute waiver of all civil liability in connection with the
|
||||||
|
Program, unless a warranty or assumption of liability accompanies a
|
||||||
|
copy of the Program in return for a fee.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
How to Apply These Terms to Your New Programs
|
||||||
|
|
||||||
|
If you develop a new program, and you want it to be of the greatest
|
||||||
|
possible use to the public, the best way to achieve this is to make it
|
||||||
|
free software which everyone can redistribute and change under these terms.
|
||||||
|
|
||||||
|
To do so, attach the following notices to the program. It is safest
|
||||||
|
to attach them to the start of each source file to most effectively
|
||||||
|
state the exclusion of warranty; and each file should have at least
|
||||||
|
the "copyright" line and a pointer to where the full notice is found.
|
||||||
|
|
||||||
|
<one line to give the program's name and a brief idea of what it does.>
|
||||||
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
|
If the program does terminal interaction, make it output a short
|
||||||
|
notice like this when it starts in an interactive mode:
|
||||||
|
|
||||||
|
<program> Copyright (C) <year> <name of author>
|
||||||
|
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||||
|
This is free software, and you are welcome to redistribute it
|
||||||
|
under certain conditions; type `show c' for details.
|
||||||
|
|
||||||
|
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||||
|
parts of the General Public License. Of course, your program's commands
|
||||||
|
might be different; for a GUI interface, you would use an "about box".
|
||||||
|
|
||||||
|
You should also get your employer (if you work as a programmer) or school,
|
||||||
|
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||||
|
For more information on this, and how to apply and follow the GNU GPL, see
|
||||||
|
<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
The GNU General Public License does not permit incorporating your program
|
||||||
|
into proprietary programs. If your program is a subroutine library, you
|
||||||
|
may consider it more useful to permit linking proprietary applications with
|
||||||
|
the library. If this is what you want to do, use the GNU Lesser General
|
||||||
|
Public License instead of this License. But first, please read
|
||||||
|
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
237
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/INSTALL
Normal file
237
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/INSTALL
Normal file
@ -0,0 +1,237 @@
|
|||||||
|
Installation Instructions
|
||||||
|
*************************
|
||||||
|
|
||||||
|
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
|
||||||
|
2006, 2007 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
This file is free documentation; the Free Software Foundation gives
|
||||||
|
unlimited permission to copy, distribute and modify it.
|
||||||
|
|
||||||
|
Basic Installation
|
||||||
|
==================
|
||||||
|
|
||||||
|
Briefly, the shell commands `./configure; make; make install' should
|
||||||
|
configure, build, and install this package. The following
|
||||||
|
more-detailed instructions are generic; see the `README' file for
|
||||||
|
instructions specific to this package.
|
||||||
|
|
||||||
|
The `configure' shell script attempts to guess correct values for
|
||||||
|
various system-dependent variables used during compilation. It uses
|
||||||
|
those values to create a `Makefile' in each directory of the package.
|
||||||
|
It may also create one or more `.h' files containing system-dependent
|
||||||
|
definitions. Finally, it creates a shell script `config.status' that
|
||||||
|
you can run in the future to recreate the current configuration, and a
|
||||||
|
file `config.log' containing compiler output (useful mainly for
|
||||||
|
debugging `configure').
|
||||||
|
|
||||||
|
It can also use an optional file (typically called `config.cache'
|
||||||
|
and enabled with `--cache-file=config.cache' or simply `-C') that saves
|
||||||
|
the results of its tests to speed up reconfiguring. Caching is
|
||||||
|
disabled by default to prevent problems with accidental use of stale
|
||||||
|
cache files.
|
||||||
|
|
||||||
|
If you need to do unusual things to compile the package, please try
|
||||||
|
to figure out how `configure' could check whether to do them, and mail
|
||||||
|
diffs or instructions to the address given in the `README' so they can
|
||||||
|
be considered for the next release. If you are using the cache, and at
|
||||||
|
some point `config.cache' contains results you don't want to keep, you
|
||||||
|
may remove or edit it.
|
||||||
|
|
||||||
|
The file `configure.ac' (or `configure.in') is used to create
|
||||||
|
`configure' by a program called `autoconf'. You need `configure.ac' if
|
||||||
|
you want to change it or regenerate `configure' using a newer version
|
||||||
|
of `autoconf'.
|
||||||
|
|
||||||
|
The simplest way to compile this package is:
|
||||||
|
|
||||||
|
1. `cd' to the directory containing the package's source code and type
|
||||||
|
`./configure' to configure the package for your system.
|
||||||
|
|
||||||
|
Running `configure' might take a while. While running, it prints
|
||||||
|
some messages telling which features it is checking for.
|
||||||
|
|
||||||
|
2. Type `make' to compile the package.
|
||||||
|
|
||||||
|
3. Optionally, type `make check' to run any self-tests that come with
|
||||||
|
the package.
|
||||||
|
|
||||||
|
4. Type `make install' to install the programs and any data files and
|
||||||
|
documentation.
|
||||||
|
|
||||||
|
5. You can remove the program binaries and object files from the
|
||||||
|
source code directory by typing `make clean'. To also remove the
|
||||||
|
files that `configure' created (so you can compile the package for
|
||||||
|
a different kind of computer), type `make distclean'. There is
|
||||||
|
also a `make maintainer-clean' target, but that is intended mainly
|
||||||
|
for the package's developers. If you use it, you may have to get
|
||||||
|
all sorts of other programs in order to regenerate files that came
|
||||||
|
with the distribution.
|
||||||
|
|
||||||
|
6. Often, you can also type `make uninstall' to remove the installed
|
||||||
|
files again.
|
||||||
|
|
||||||
|
Compilers and Options
|
||||||
|
=====================
|
||||||
|
|
||||||
|
Some systems require unusual options for compilation or linking that the
|
||||||
|
`configure' script does not know about. Run `./configure --help' for
|
||||||
|
details on some of the pertinent environment variables.
|
||||||
|
|
||||||
|
You can give `configure' initial values for configuration parameters
|
||||||
|
by setting variables in the command line or in the environment. Here
|
||||||
|
is an example:
|
||||||
|
|
||||||
|
./configure CC=c99 CFLAGS=-g LIBS=-lposix
|
||||||
|
|
||||||
|
*Note Defining Variables::, for more details.
|
||||||
|
|
||||||
|
Compiling For Multiple Architectures
|
||||||
|
====================================
|
||||||
|
|
||||||
|
You can compile the package for more than one kind of computer at the
|
||||||
|
same time, by placing the object files for each architecture in their
|
||||||
|
own directory. To do this, you can use GNU `make'. `cd' to the
|
||||||
|
directory where you want the object files and executables to go and run
|
||||||
|
the `configure' script. `configure' automatically checks for the
|
||||||
|
source code in the directory that `configure' is in and in `..'.
|
||||||
|
|
||||||
|
With a non-GNU `make', it is safer to compile the package for one
|
||||||
|
architecture at a time in the source code directory. After you have
|
||||||
|
installed the package for one architecture, use `make distclean' before
|
||||||
|
reconfiguring for another architecture.
|
||||||
|
|
||||||
|
Installation Names
|
||||||
|
==================
|
||||||
|
|
||||||
|
By default, `make install' installs the package's commands under
|
||||||
|
`/usr/local/bin', include files under `/usr/local/include', etc. You
|
||||||
|
can specify an installation prefix other than `/usr/local' by giving
|
||||||
|
`configure' the option `--prefix=PREFIX'.
|
||||||
|
|
||||||
|
You can specify separate installation prefixes for
|
||||||
|
architecture-specific files and architecture-independent files. If you
|
||||||
|
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
|
||||||
|
PREFIX as the prefix for installing programs and libraries.
|
||||||
|
Documentation and other data files still use the regular prefix.
|
||||||
|
|
||||||
|
In addition, if you use an unusual directory layout you can give
|
||||||
|
options like `--bindir=DIR' to specify different values for particular
|
||||||
|
kinds of files. Run `configure --help' for a list of the directories
|
||||||
|
you can set and what kinds of files go in them.
|
||||||
|
|
||||||
|
If the package supports it, you can cause programs to be installed
|
||||||
|
with an extra prefix or suffix on their names by giving `configure' the
|
||||||
|
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
|
||||||
|
|
||||||
|
Optional Features
|
||||||
|
=================
|
||||||
|
|
||||||
|
Some packages pay attention to `--enable-FEATURE' options to
|
||||||
|
`configure', where FEATURE indicates an optional part of the package.
|
||||||
|
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
|
||||||
|
is something like `gnu-as' or `x' (for the X Window System). The
|
||||||
|
`README' should mention any `--enable-' and `--with-' options that the
|
||||||
|
package recognizes.
|
||||||
|
|
||||||
|
For packages that use the X Window System, `configure' can usually
|
||||||
|
find the X include and library files automatically, but if it doesn't,
|
||||||
|
you can use the `configure' options `--x-includes=DIR' and
|
||||||
|
`--x-libraries=DIR' to specify their locations.
|
||||||
|
|
||||||
|
Specifying the System Type
|
||||||
|
==========================
|
||||||
|
|
||||||
|
There may be some features `configure' cannot figure out automatically,
|
||||||
|
but needs to determine by the type of machine the package will run on.
|
||||||
|
Usually, assuming the package is built to be run on the _same_
|
||||||
|
architectures, `configure' can figure that out, but if it prints a
|
||||||
|
message saying it cannot guess the machine type, give it the
|
||||||
|
`--build=TYPE' option. TYPE can either be a short name for the system
|
||||||
|
type, such as `sun4', or a canonical name which has the form:
|
||||||
|
|
||||||
|
CPU-COMPANY-SYSTEM
|
||||||
|
|
||||||
|
where SYSTEM can have one of these forms:
|
||||||
|
|
||||||
|
OS KERNEL-OS
|
||||||
|
|
||||||
|
See the file `config.sub' for the possible values of each field. If
|
||||||
|
`config.sub' isn't included in this package, then this package doesn't
|
||||||
|
need to know the machine type.
|
||||||
|
|
||||||
|
If you are _building_ compiler tools for cross-compiling, you should
|
||||||
|
use the option `--target=TYPE' to select the type of system they will
|
||||||
|
produce code for.
|
||||||
|
|
||||||
|
If you want to _use_ a cross compiler, that generates code for a
|
||||||
|
platform different from the build platform, you should specify the
|
||||||
|
"host" platform (i.e., that on which the generated programs will
|
||||||
|
eventually be run) with `--host=TYPE'.
|
||||||
|
|
||||||
|
Sharing Defaults
|
||||||
|
================
|
||||||
|
|
||||||
|
If you want to set default values for `configure' scripts to share, you
|
||||||
|
can create a site shell script called `config.site' that gives default
|
||||||
|
values for variables like `CC', `cache_file', and `prefix'.
|
||||||
|
`configure' looks for `PREFIX/share/config.site' if it exists, then
|
||||||
|
`PREFIX/etc/config.site' if it exists. Or, you can set the
|
||||||
|
`CONFIG_SITE' environment variable to the location of the site script.
|
||||||
|
A warning: not all `configure' scripts look for a site script.
|
||||||
|
|
||||||
|
Defining Variables
|
||||||
|
==================
|
||||||
|
|
||||||
|
Variables not defined in a site shell script can be set in the
|
||||||
|
environment passed to `configure'. However, some packages may run
|
||||||
|
configure again during the build, and the customized values of these
|
||||||
|
variables may be lost. In order to avoid this problem, you should set
|
||||||
|
them in the `configure' command line, using `VAR=value'. For example:
|
||||||
|
|
||||||
|
./configure CC=/usr/local2/bin/gcc
|
||||||
|
|
||||||
|
causes the specified `gcc' to be used as the C compiler (unless it is
|
||||||
|
overridden in the site shell script).
|
||||||
|
|
||||||
|
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
||||||
|
an Autoconf bug. Until the bug is fixed you can use this workaround:
|
||||||
|
|
||||||
|
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||||
|
|
||||||
|
`configure' Invocation
|
||||||
|
======================
|
||||||
|
|
||||||
|
`configure' recognizes the following options to control how it operates.
|
||||||
|
|
||||||
|
`--help'
|
||||||
|
`-h'
|
||||||
|
Print a summary of the options to `configure', and exit.
|
||||||
|
|
||||||
|
`--version'
|
||||||
|
`-V'
|
||||||
|
Print the version of Autoconf used to generate the `configure'
|
||||||
|
script, and exit.
|
||||||
|
|
||||||
|
`--cache-file=FILE'
|
||||||
|
Enable the cache: use and save the results of the tests in FILE,
|
||||||
|
traditionally `config.cache'. FILE defaults to `/dev/null' to
|
||||||
|
disable caching.
|
||||||
|
|
||||||
|
`--config-cache'
|
||||||
|
`-C'
|
||||||
|
Alias for `--cache-file=config.cache'.
|
||||||
|
|
||||||
|
`--quiet'
|
||||||
|
`--silent'
|
||||||
|
`-q'
|
||||||
|
Do not print messages saying which checks are being made. To
|
||||||
|
suppress all normal output, redirect it to `/dev/null' (any error
|
||||||
|
messages will still be shown).
|
||||||
|
|
||||||
|
`--srcdir=DIR'
|
||||||
|
Look for the package's source code in directory DIR. Usually
|
||||||
|
`configure' can determine that directory automatically.
|
||||||
|
|
||||||
|
`configure' also accepts some other, not widely useful, options. Run
|
||||||
|
`configure --help' for more details.
|
||||||
|
|
@ -0,0 +1,29 @@
|
|||||||
|
## Process this file with automake to produce Makefile.in
|
||||||
|
## Created by Anjuta
|
||||||
|
|
||||||
|
SUBDIRS = src
|
||||||
|
|
||||||
|
mgizadocdir = ${prefix}/doc/mgiza
|
||||||
|
mgizadoc_DATA = \
|
||||||
|
README\
|
||||||
|
COPYING\
|
||||||
|
AUTHORS\
|
||||||
|
ChangeLog\
|
||||||
|
INSTALL\
|
||||||
|
NEWS
|
||||||
|
|
||||||
|
mgizascriptsdir = ${prefix}/scripts/
|
||||||
|
|
||||||
|
mgizascripts_SCRIPTS = \
|
||||||
|
scripts/*
|
||||||
|
|
||||||
|
EXTRA_DIST = $(mgizadoc_DATA) \
|
||||||
|
${mgizascripts_SCRIPTS}
|
||||||
|
# Copy all the spec files. Of cource, only one is actually used.
|
||||||
|
dist-hook:
|
||||||
|
for specfile in *.spec; do \
|
||||||
|
if test -f $$specfile; then \
|
||||||
|
cp -p $$specfile $(distdir); \
|
||||||
|
fi \
|
||||||
|
done
|
||||||
|
|
@ -0,0 +1,683 @@
|
|||||||
|
# Makefile.in generated by automake 1.10.1 from Makefile.am.
|
||||||
|
# @configure_input@
|
||||||
|
|
||||||
|
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
|
||||||
|
# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
||||||
|
# This Makefile.in is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||||
|
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
# PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
@SET_MAKE@
|
||||||
|
|
||||||
|
|
||||||
|
VPATH = @srcdir@
|
||||||
|
pkgdatadir = $(datadir)/@PACKAGE@
|
||||||
|
pkglibdir = $(libdir)/@PACKAGE@
|
||||||
|
pkgincludedir = $(includedir)/@PACKAGE@
|
||||||
|
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||||
|
install_sh_DATA = $(install_sh) -c -m 644
|
||||||
|
install_sh_PROGRAM = $(install_sh) -c
|
||||||
|
install_sh_SCRIPT = $(install_sh) -c
|
||||||
|
INSTALL_HEADER = $(INSTALL_DATA)
|
||||||
|
transform = $(program_transform_name)
|
||||||
|
NORMAL_INSTALL = :
|
||||||
|
PRE_INSTALL = :
|
||||||
|
POST_INSTALL = :
|
||||||
|
NORMAL_UNINSTALL = :
|
||||||
|
PRE_UNINSTALL = :
|
||||||
|
POST_UNINSTALL = :
|
||||||
|
subdir = .
|
||||||
|
DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
|
||||||
|
$(srcdir)/Makefile.in $(srcdir)/config.h.in \
|
||||||
|
$(top_srcdir)/configure AUTHORS COPYING ChangeLog INSTALL NEWS \
|
||||||
|
config.guess config.sub depcomp install-sh ltmain.sh missing
|
||||||
|
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||||
|
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
|
||||||
|
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||||
|
$(ACLOCAL_M4)
|
||||||
|
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
|
||||||
|
configure.lineno config.status.lineno
|
||||||
|
mkinstalldirs = $(install_sh) -d
|
||||||
|
CONFIG_HEADER = config.h
|
||||||
|
CONFIG_CLEAN_FILES =
|
||||||
|
am__installdirs = "$(DESTDIR)$(mgizascriptsdir)" \
|
||||||
|
"$(DESTDIR)$(mgizadocdir)"
|
||||||
|
mgizascriptsSCRIPT_INSTALL = $(INSTALL_SCRIPT)
|
||||||
|
SCRIPTS = $(mgizascripts_SCRIPTS)
|
||||||
|
SOURCES =
|
||||||
|
DIST_SOURCES =
|
||||||
|
RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
|
||||||
|
html-recursive info-recursive install-data-recursive \
|
||||||
|
install-dvi-recursive install-exec-recursive \
|
||||||
|
install-html-recursive install-info-recursive \
|
||||||
|
install-pdf-recursive install-ps-recursive install-recursive \
|
||||||
|
installcheck-recursive installdirs-recursive pdf-recursive \
|
||||||
|
ps-recursive uninstall-recursive
|
||||||
|
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
|
||||||
|
am__vpath_adj = case $$p in \
|
||||||
|
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
|
||||||
|
*) f=$$p;; \
|
||||||
|
esac;
|
||||||
|
am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
|
||||||
|
mgizadocDATA_INSTALL = $(INSTALL_DATA)
|
||||||
|
DATA = $(mgizadoc_DATA)
|
||||||
|
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
|
||||||
|
distclean-recursive maintainer-clean-recursive
|
||||||
|
ETAGS = etags
|
||||||
|
CTAGS = ctags
|
||||||
|
DIST_SUBDIRS = $(SUBDIRS)
|
||||||
|
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||||
|
distdir = $(PACKAGE)-$(VERSION)
|
||||||
|
top_distdir = $(distdir)
|
||||||
|
am__remove_distdir = \
|
||||||
|
{ test ! -d $(distdir) \
|
||||||
|
|| { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \
|
||||||
|
&& rm -fr $(distdir); }; }
|
||||||
|
DIST_ARCHIVES = $(distdir).tar.gz
|
||||||
|
GZIP_ENV = --best
|
||||||
|
distuninstallcheck_listfiles = find . -type f -print
|
||||||
|
distcleancheck_listfiles = find . -type f -print
|
||||||
|
ACLOCAL = @ACLOCAL@
|
||||||
|
AMTAR = @AMTAR@
|
||||||
|
AUTOCONF = @AUTOCONF@
|
||||||
|
AUTOHEADER = @AUTOHEADER@
|
||||||
|
AUTOMAKE = @AUTOMAKE@
|
||||||
|
AWK = @AWK@
|
||||||
|
CC = @CC@
|
||||||
|
CCDEPMODE = @CCDEPMODE@
|
||||||
|
CFLAGS = @CFLAGS@
|
||||||
|
CPP = @CPP@
|
||||||
|
CPPFLAGS = @CPPFLAGS@
|
||||||
|
CXX = @CXX@
|
||||||
|
CXXDEPMODE = @CXXDEPMODE@
|
||||||
|
CXXFLAGS = @CXXFLAGS@
|
||||||
|
CYGPATH_W = @CYGPATH_W@
|
||||||
|
DEFS = @DEFS@
|
||||||
|
DEPDIR = @DEPDIR@
|
||||||
|
ECHO_C = @ECHO_C@
|
||||||
|
ECHO_N = @ECHO_N@
|
||||||
|
ECHO_T = @ECHO_T@
|
||||||
|
EGREP = @EGREP@
|
||||||
|
EXEEXT = @EXEEXT@
|
||||||
|
GREP = @GREP@
|
||||||
|
INSTALL = @INSTALL@
|
||||||
|
INSTALL_DATA = @INSTALL_DATA@
|
||||||
|
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||||
|
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||||
|
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||||
|
LDFLAGS = @LDFLAGS@
|
||||||
|
LIBOBJS = @LIBOBJS@
|
||||||
|
LIBS = @LIBS@
|
||||||
|
LTLIBOBJS = @LTLIBOBJS@
|
||||||
|
MAINT = @MAINT@
|
||||||
|
MAKEINFO = @MAKEINFO@
|
||||||
|
MKDIR_P = @MKDIR_P@
|
||||||
|
OBJEXT = @OBJEXT@
|
||||||
|
PACKAGE = @PACKAGE@
|
||||||
|
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||||
|
PACKAGE_NAME = @PACKAGE_NAME@
|
||||||
|
PACKAGE_STRING = @PACKAGE_STRING@
|
||||||
|
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||||
|
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||||
|
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||||
|
RANLIB = @RANLIB@
|
||||||
|
SET_MAKE = @SET_MAKE@
|
||||||
|
SHELL = @SHELL@
|
||||||
|
STRIP = @STRIP@
|
||||||
|
VERSION = @VERSION@
|
||||||
|
abs_builddir = @abs_builddir@
|
||||||
|
abs_srcdir = @abs_srcdir@
|
||||||
|
abs_top_builddir = @abs_top_builddir@
|
||||||
|
abs_top_srcdir = @abs_top_srcdir@
|
||||||
|
ac_ct_CC = @ac_ct_CC@
|
||||||
|
ac_ct_CXX = @ac_ct_CXX@
|
||||||
|
am__include = @am__include@
|
||||||
|
am__leading_dot = @am__leading_dot@
|
||||||
|
am__quote = @am__quote@
|
||||||
|
am__tar = @am__tar@
|
||||||
|
am__untar = @am__untar@
|
||||||
|
bindir = @bindir@
|
||||||
|
build_alias = @build_alias@
|
||||||
|
builddir = @builddir@
|
||||||
|
datadir = @datadir@
|
||||||
|
datarootdir = @datarootdir@
|
||||||
|
docdir = @docdir@
|
||||||
|
dvidir = @dvidir@
|
||||||
|
exec_prefix = @exec_prefix@
|
||||||
|
host_alias = @host_alias@
|
||||||
|
htmldir = @htmldir@
|
||||||
|
includedir = @includedir@
|
||||||
|
infodir = @infodir@
|
||||||
|
install_sh = @install_sh@
|
||||||
|
libdir = @libdir@
|
||||||
|
libexecdir = @libexecdir@
|
||||||
|
localedir = @localedir@
|
||||||
|
localstatedir = @localstatedir@
|
||||||
|
mandir = @mandir@
|
||||||
|
mkdir_p = @mkdir_p@
|
||||||
|
oldincludedir = @oldincludedir@
|
||||||
|
pdfdir = @pdfdir@
|
||||||
|
prefix = @prefix@
|
||||||
|
program_transform_name = @program_transform_name@
|
||||||
|
psdir = @psdir@
|
||||||
|
sbindir = @sbindir@
|
||||||
|
sharedstatedir = @sharedstatedir@
|
||||||
|
srcdir = @srcdir@
|
||||||
|
sysconfdir = @sysconfdir@
|
||||||
|
target_alias = @target_alias@
|
||||||
|
top_build_prefix = @top_build_prefix@
|
||||||
|
top_builddir = @top_builddir@
|
||||||
|
top_srcdir = @top_srcdir@
|
||||||
|
SUBDIRS = src
|
||||||
|
mgizadocdir = ${prefix}/doc/mgiza
|
||||||
|
mgizadoc_DATA = \
|
||||||
|
README\
|
||||||
|
COPYING\
|
||||||
|
AUTHORS\
|
||||||
|
ChangeLog\
|
||||||
|
INSTALL\
|
||||||
|
NEWS
|
||||||
|
|
||||||
|
mgizascriptsdir = ${prefix}/scripts/
|
||||||
|
mgizascripts_SCRIPTS = \
|
||||||
|
scripts/*
|
||||||
|
|
||||||
|
EXTRA_DIST = $(mgizadoc_DATA) \
|
||||||
|
${mgizascripts_SCRIPTS}
|
||||||
|
|
||||||
|
all: config.h
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) all-recursive
|
||||||
|
|
||||||
|
.SUFFIXES:
|
||||||
|
am--refresh:
|
||||||
|
@:
|
||||||
|
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
|
||||||
|
@for dep in $?; do \
|
||||||
|
case '$(am__configure_deps)' in \
|
||||||
|
*$$dep*) \
|
||||||
|
echo ' cd $(srcdir) && $(AUTOMAKE) --gnu '; \
|
||||||
|
cd $(srcdir) && $(AUTOMAKE) --gnu \
|
||||||
|
&& exit 0; \
|
||||||
|
exit 1;; \
|
||||||
|
esac; \
|
||||||
|
done; \
|
||||||
|
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
|
||||||
|
cd $(top_srcdir) && \
|
||||||
|
$(AUTOMAKE) --gnu Makefile
|
||||||
|
.PRECIOUS: Makefile
|
||||||
|
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||||
|
@case '$?' in \
|
||||||
|
*config.status*) \
|
||||||
|
echo ' $(SHELL) ./config.status'; \
|
||||||
|
$(SHELL) ./config.status;; \
|
||||||
|
*) \
|
||||||
|
echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
|
||||||
|
cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
|
||||||
|
esac;
|
||||||
|
|
||||||
|
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||||
|
$(SHELL) ./config.status --recheck
|
||||||
|
|
||||||
|
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
|
||||||
|
cd $(srcdir) && $(AUTOCONF)
|
||||||
|
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
|
||||||
|
cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
|
||||||
|
|
||||||
|
config.h: stamp-h1
|
||||||
|
@if test ! -f $@; then \
|
||||||
|
rm -f stamp-h1; \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
|
||||||
|
else :; fi
|
||||||
|
|
||||||
|
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
|
||||||
|
@rm -f stamp-h1
|
||||||
|
cd $(top_builddir) && $(SHELL) ./config.status config.h
|
||||||
|
$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
|
||||||
|
cd $(top_srcdir) && $(AUTOHEADER)
|
||||||
|
rm -f stamp-h1
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
distclean-hdr:
|
||||||
|
-rm -f config.h stamp-h1
|
||||||
|
install-mgizascriptsSCRIPTS: $(mgizascripts_SCRIPTS)
|
||||||
|
@$(NORMAL_INSTALL)
|
||||||
|
test -z "$(mgizascriptsdir)" || $(MKDIR_P) "$(DESTDIR)$(mgizascriptsdir)"
|
||||||
|
@list='$(mgizascripts_SCRIPTS)'; for p in $$list; do \
|
||||||
|
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
||||||
|
if test -f $$d$$p; then \
|
||||||
|
f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
|
||||||
|
echo " $(mgizascriptsSCRIPT_INSTALL) '$$d$$p' '$(DESTDIR)$(mgizascriptsdir)/$$f'"; \
|
||||||
|
$(mgizascriptsSCRIPT_INSTALL) "$$d$$p" "$(DESTDIR)$(mgizascriptsdir)/$$f"; \
|
||||||
|
else :; fi; \
|
||||||
|
done
|
||||||
|
|
||||||
|
uninstall-mgizascriptsSCRIPTS:
|
||||||
|
@$(NORMAL_UNINSTALL)
|
||||||
|
@list='$(mgizascripts_SCRIPTS)'; for p in $$list; do \
|
||||||
|
f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
|
||||||
|
echo " rm -f '$(DESTDIR)$(mgizascriptsdir)/$$f'"; \
|
||||||
|
rm -f "$(DESTDIR)$(mgizascriptsdir)/$$f"; \
|
||||||
|
done
|
||||||
|
install-mgizadocDATA: $(mgizadoc_DATA)
|
||||||
|
@$(NORMAL_INSTALL)
|
||||||
|
test -z "$(mgizadocdir)" || $(MKDIR_P) "$(DESTDIR)$(mgizadocdir)"
|
||||||
|
@list='$(mgizadoc_DATA)'; for p in $$list; do \
|
||||||
|
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
||||||
|
f=$(am__strip_dir) \
|
||||||
|
echo " $(mgizadocDATA_INSTALL) '$$d$$p' '$(DESTDIR)$(mgizadocdir)/$$f'"; \
|
||||||
|
$(mgizadocDATA_INSTALL) "$$d$$p" "$(DESTDIR)$(mgizadocdir)/$$f"; \
|
||||||
|
done
|
||||||
|
|
||||||
|
uninstall-mgizadocDATA:
|
||||||
|
@$(NORMAL_UNINSTALL)
|
||||||
|
@list='$(mgizadoc_DATA)'; for p in $$list; do \
|
||||||
|
f=$(am__strip_dir) \
|
||||||
|
echo " rm -f '$(DESTDIR)$(mgizadocdir)/$$f'"; \
|
||||||
|
rm -f "$(DESTDIR)$(mgizadocdir)/$$f"; \
|
||||||
|
done
|
||||||
|
|
||||||
|
# This directory's subdirectories are mostly independent; you can cd
|
||||||
|
# into them and run `make' without going through this Makefile.
|
||||||
|
# To change the values of `make' variables: instead of editing Makefiles,
|
||||||
|
# (1) if the variable is set in `config.status', edit `config.status'
|
||||||
|
# (which will cause the Makefiles to be regenerated when you run `make');
|
||||||
|
# (2) otherwise, pass the desired values on the `make' command line.
|
||||||
|
$(RECURSIVE_TARGETS):
|
||||||
|
@failcom='exit 1'; \
|
||||||
|
for f in x $$MAKEFLAGS; do \
|
||||||
|
case $$f in \
|
||||||
|
*=* | --[!k]*);; \
|
||||||
|
*k*) failcom='fail=yes';; \
|
||||||
|
esac; \
|
||||||
|
done; \
|
||||||
|
dot_seen=no; \
|
||||||
|
target=`echo $@ | sed s/-recursive//`; \
|
||||||
|
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
echo "Making $$target in $$subdir"; \
|
||||||
|
if test "$$subdir" = "."; then \
|
||||||
|
dot_seen=yes; \
|
||||||
|
local_target="$$target-am"; \
|
||||||
|
else \
|
||||||
|
local_target="$$target"; \
|
||||||
|
fi; \
|
||||||
|
(cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|
||||||
|
|| eval $$failcom; \
|
||||||
|
done; \
|
||||||
|
if test "$$dot_seen" = "no"; then \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
|
||||||
|
fi; test -z "$$fail"
|
||||||
|
|
||||||
|
$(RECURSIVE_CLEAN_TARGETS):
|
||||||
|
@failcom='exit 1'; \
|
||||||
|
for f in x $$MAKEFLAGS; do \
|
||||||
|
case $$f in \
|
||||||
|
*=* | --[!k]*);; \
|
||||||
|
*k*) failcom='fail=yes';; \
|
||||||
|
esac; \
|
||||||
|
done; \
|
||||||
|
dot_seen=no; \
|
||||||
|
case "$@" in \
|
||||||
|
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
|
||||||
|
*) list='$(SUBDIRS)' ;; \
|
||||||
|
esac; \
|
||||||
|
rev=''; for subdir in $$list; do \
|
||||||
|
if test "$$subdir" = "."; then :; else \
|
||||||
|
rev="$$subdir $$rev"; \
|
||||||
|
fi; \
|
||||||
|
done; \
|
||||||
|
rev="$$rev ."; \
|
||||||
|
target=`echo $@ | sed s/-recursive//`; \
|
||||||
|
for subdir in $$rev; do \
|
||||||
|
echo "Making $$target in $$subdir"; \
|
||||||
|
if test "$$subdir" = "."; then \
|
||||||
|
local_target="$$target-am"; \
|
||||||
|
else \
|
||||||
|
local_target="$$target"; \
|
||||||
|
fi; \
|
||||||
|
(cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|
||||||
|
|| eval $$failcom; \
|
||||||
|
done && test -z "$$fail"
|
||||||
|
tags-recursive:
|
||||||
|
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
|
||||||
|
done
|
||||||
|
ctags-recursive:
|
||||||
|
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
|
||||||
|
done
|
||||||
|
|
||||||
|
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
mkid -fID $$unique
|
||||||
|
tags: TAGS
|
||||||
|
|
||||||
|
TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
tags=; \
|
||||||
|
here=`pwd`; \
|
||||||
|
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
|
||||||
|
include_option=--etags-include; \
|
||||||
|
empty_fix=.; \
|
||||||
|
else \
|
||||||
|
include_option=--include; \
|
||||||
|
empty_fix=; \
|
||||||
|
fi; \
|
||||||
|
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
if test "$$subdir" = .; then :; else \
|
||||||
|
test ! -f $$subdir/TAGS || \
|
||||||
|
tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
|
||||||
|
fi; \
|
||||||
|
done; \
|
||||||
|
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
|
||||||
|
test -n "$$unique" || unique=$$empty_fix; \
|
||||||
|
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||||
|
$$tags $$unique; \
|
||||||
|
fi
|
||||||
|
ctags: CTAGS
|
||||||
|
CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
tags=; \
|
||||||
|
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
test -z "$(CTAGS_ARGS)$$tags$$unique" \
|
||||||
|
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||||
|
$$tags $$unique
|
||||||
|
|
||||||
|
GTAGS:
|
||||||
|
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||||
|
&& cd $(top_srcdir) \
|
||||||
|
&& gtags -i $(GTAGS_ARGS) $$here
|
||||||
|
|
||||||
|
distclean-tags:
|
||||||
|
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||||
|
|
||||||
|
distdir: $(DISTFILES)
|
||||||
|
$(am__remove_distdir)
|
||||||
|
test -d $(distdir) || mkdir $(distdir)
|
||||||
|
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
list='$(DISTFILES)'; \
|
||||||
|
dist_files=`for file in $$list; do echo $$file; done | \
|
||||||
|
sed -e "s|^$$srcdirstrip/||;t" \
|
||||||
|
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||||
|
case $$dist_files in \
|
||||||
|
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||||
|
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||||
|
sort -u` ;; \
|
||||||
|
esac; \
|
||||||
|
for file in $$dist_files; do \
|
||||||
|
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||||
|
if test -d $$d/$$file; then \
|
||||||
|
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||||
|
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||||
|
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
|
||||||
|
fi; \
|
||||||
|
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
|
||||||
|
else \
|
||||||
|
test -f $(distdir)/$$file \
|
||||||
|
|| cp -p $$d/$$file $(distdir)/$$file \
|
||||||
|
|| exit 1; \
|
||||||
|
fi; \
|
||||||
|
done
|
||||||
|
list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
if test "$$subdir" = .; then :; else \
|
||||||
|
test -d "$(distdir)/$$subdir" \
|
||||||
|
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|
||||||
|
|| exit 1; \
|
||||||
|
distdir=`$(am__cd) $(distdir) && pwd`; \
|
||||||
|
top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
|
||||||
|
(cd $$subdir && \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) \
|
||||||
|
top_distdir="$$top_distdir" \
|
||||||
|
distdir="$$distdir/$$subdir" \
|
||||||
|
am__remove_distdir=: \
|
||||||
|
am__skip_length_check=: \
|
||||||
|
distdir) \
|
||||||
|
|| exit 1; \
|
||||||
|
fi; \
|
||||||
|
done
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) \
|
||||||
|
top_distdir="$(top_distdir)" distdir="$(distdir)" \
|
||||||
|
dist-hook
|
||||||
|
-find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
|
||||||
|
! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
|
||||||
|
! -type d ! -perm -400 -exec chmod a+r {} \; -o \
|
||||||
|
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
|
||||||
|
|| chmod -R a+r $(distdir)
|
||||||
|
dist-gzip: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
|
||||||
|
$(am__remove_distdir)
|
||||||
|
|
||||||
|
dist-bzip2: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
|
||||||
|
$(am__remove_distdir)
|
||||||
|
|
||||||
|
dist-lzma: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | lzma -9 -c >$(distdir).tar.lzma
|
||||||
|
$(am__remove_distdir)
|
||||||
|
|
||||||
|
dist-tarZ: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
|
||||||
|
$(am__remove_distdir)
|
||||||
|
|
||||||
|
dist-shar: distdir
|
||||||
|
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
|
||||||
|
$(am__remove_distdir)
|
||||||
|
|
||||||
|
dist-zip: distdir
|
||||||
|
-rm -f $(distdir).zip
|
||||||
|
zip -rq $(distdir).zip $(distdir)
|
||||||
|
$(am__remove_distdir)
|
||||||
|
|
||||||
|
dist dist-all: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
|
||||||
|
$(am__remove_distdir)
|
||||||
|
|
||||||
|
# This target untars the dist file and tries a VPATH configuration. Then
|
||||||
|
# it guarantees that the distribution is self-contained by making another
|
||||||
|
# tarfile.
|
||||||
|
distcheck: dist
|
||||||
|
case '$(DIST_ARCHIVES)' in \
|
||||||
|
*.tar.gz*) \
|
||||||
|
GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\
|
||||||
|
*.tar.bz2*) \
|
||||||
|
bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\
|
||||||
|
*.tar.lzma*) \
|
||||||
|
unlzma -c $(distdir).tar.lzma | $(am__untar) ;;\
|
||||||
|
*.tar.Z*) \
|
||||||
|
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
|
||||||
|
*.shar.gz*) \
|
||||||
|
GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\
|
||||||
|
*.zip*) \
|
||||||
|
unzip $(distdir).zip ;;\
|
||||||
|
esac
|
||||||
|
chmod -R a-w $(distdir); chmod a+w $(distdir)
|
||||||
|
mkdir $(distdir)/_build
|
||||||
|
mkdir $(distdir)/_inst
|
||||||
|
chmod a-w $(distdir)
|
||||||
|
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
|
||||||
|
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
|
||||||
|
&& cd $(distdir)/_build \
|
||||||
|
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
|
||||||
|
$(DISTCHECK_CONFIGURE_FLAGS) \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) check \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) install \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) installcheck \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) uninstall \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
|
||||||
|
distuninstallcheck \
|
||||||
|
&& chmod -R a-w "$$dc_install_base" \
|
||||||
|
&& ({ \
|
||||||
|
(cd ../.. && umask 077 && mkdir "$$dc_destdir") \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
|
||||||
|
distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
|
||||||
|
} || { rm -rf "$$dc_destdir"; exit 1; }) \
|
||||||
|
&& rm -rf "$$dc_destdir" \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) dist \
|
||||||
|
&& rm -rf $(DIST_ARCHIVES) \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) distcleancheck
|
||||||
|
$(am__remove_distdir)
|
||||||
|
@(echo "$(distdir) archives ready for distribution: "; \
|
||||||
|
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
|
||||||
|
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
|
||||||
|
distuninstallcheck:
|
||||||
|
@cd $(distuninstallcheck_dir) \
|
||||||
|
&& test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
|
||||||
|
|| { echo "ERROR: files left after uninstall:" ; \
|
||||||
|
if test -n "$(DESTDIR)"; then \
|
||||||
|
echo " (check DESTDIR support)"; \
|
||||||
|
fi ; \
|
||||||
|
$(distuninstallcheck_listfiles) ; \
|
||||||
|
exit 1; } >&2
|
||||||
|
distcleancheck: distclean
|
||||||
|
@if test '$(srcdir)' = . ; then \
|
||||||
|
echo "ERROR: distcleancheck can only run from a VPATH build" ; \
|
||||||
|
exit 1 ; \
|
||||||
|
fi
|
||||||
|
@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
|
||||||
|
|| { echo "ERROR: files left in build directory after distclean:" ; \
|
||||||
|
$(distcleancheck_listfiles) ; \
|
||||||
|
exit 1; } >&2
|
||||||
|
check-am: all-am
|
||||||
|
check: check-recursive
|
||||||
|
all-am: Makefile $(SCRIPTS) $(DATA) config.h
|
||||||
|
installdirs: installdirs-recursive
|
||||||
|
installdirs-am:
|
||||||
|
for dir in "$(DESTDIR)$(mgizascriptsdir)" "$(DESTDIR)$(mgizadocdir)"; do \
|
||||||
|
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
|
||||||
|
done
|
||||||
|
install: install-recursive
|
||||||
|
install-exec: install-exec-recursive
|
||||||
|
install-data: install-data-recursive
|
||||||
|
uninstall: uninstall-recursive
|
||||||
|
|
||||||
|
install-am: all-am
|
||||||
|
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||||
|
|
||||||
|
installcheck: installcheck-recursive
|
||||||
|
install-strip:
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||||
|
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||||
|
`test -z '$(STRIP)' || \
|
||||||
|
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
|
||||||
|
mostlyclean-generic:
|
||||||
|
|
||||||
|
clean-generic:
|
||||||
|
|
||||||
|
distclean-generic:
|
||||||
|
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||||
|
|
||||||
|
maintainer-clean-generic:
|
||||||
|
@echo "This command is intended for maintainers to use"
|
||||||
|
@echo "it deletes files that may require special tools to rebuild."
|
||||||
|
clean: clean-recursive
|
||||||
|
|
||||||
|
clean-am: clean-generic mostlyclean-am
|
||||||
|
|
||||||
|
distclean: distclean-recursive
|
||||||
|
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
|
||||||
|
-rm -f Makefile
|
||||||
|
distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
|
||||||
|
|
||||||
|
dvi: dvi-recursive
|
||||||
|
|
||||||
|
dvi-am:
|
||||||
|
|
||||||
|
html: html-recursive
|
||||||
|
|
||||||
|
info: info-recursive
|
||||||
|
|
||||||
|
info-am:
|
||||||
|
|
||||||
|
install-data-am: install-mgizadocDATA install-mgizascriptsSCRIPTS
|
||||||
|
|
||||||
|
install-dvi: install-dvi-recursive
|
||||||
|
|
||||||
|
install-exec-am:
|
||||||
|
|
||||||
|
install-html: install-html-recursive
|
||||||
|
|
||||||
|
install-info: install-info-recursive
|
||||||
|
|
||||||
|
install-man:
|
||||||
|
|
||||||
|
install-pdf: install-pdf-recursive
|
||||||
|
|
||||||
|
install-ps: install-ps-recursive
|
||||||
|
|
||||||
|
installcheck-am:
|
||||||
|
|
||||||
|
maintainer-clean: maintainer-clean-recursive
|
||||||
|
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
|
||||||
|
-rm -rf $(top_srcdir)/autom4te.cache
|
||||||
|
-rm -f Makefile
|
||||||
|
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||||
|
|
||||||
|
mostlyclean: mostlyclean-recursive
|
||||||
|
|
||||||
|
mostlyclean-am: mostlyclean-generic
|
||||||
|
|
||||||
|
pdf: pdf-recursive
|
||||||
|
|
||||||
|
pdf-am:
|
||||||
|
|
||||||
|
ps: ps-recursive
|
||||||
|
|
||||||
|
ps-am:
|
||||||
|
|
||||||
|
uninstall-am: uninstall-mgizadocDATA uninstall-mgizascriptsSCRIPTS
|
||||||
|
|
||||||
|
.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
|
||||||
|
install-strip
|
||||||
|
|
||||||
|
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
|
||||||
|
all all-am am--refresh check check-am clean clean-generic \
|
||||||
|
ctags ctags-recursive dist dist-all dist-bzip2 dist-gzip \
|
||||||
|
dist-hook dist-lzma dist-shar dist-tarZ dist-zip distcheck \
|
||||||
|
distclean distclean-generic distclean-hdr distclean-tags \
|
||||||
|
distcleancheck distdir distuninstallcheck dvi dvi-am html \
|
||||||
|
html-am info info-am install install-am install-data \
|
||||||
|
install-data-am install-dvi install-dvi-am install-exec \
|
||||||
|
install-exec-am install-html install-html-am install-info \
|
||||||
|
install-info-am install-man install-mgizadocDATA \
|
||||||
|
install-mgizascriptsSCRIPTS install-pdf install-pdf-am \
|
||||||
|
install-ps install-ps-am install-strip installcheck \
|
||||||
|
installcheck-am installdirs installdirs-am maintainer-clean \
|
||||||
|
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
|
||||||
|
pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
|
||||||
|
uninstall-mgizadocDATA uninstall-mgizascriptsSCRIPTS
|
||||||
|
|
||||||
|
# Copy all the spec files. Of cource, only one is actually used.
|
||||||
|
dist-hook:
|
||||||
|
for specfile in *.spec; do \
|
||||||
|
if test -f $$specfile; then \
|
||||||
|
cp -p $$specfile $(distdir); \
|
||||||
|
fi \
|
||||||
|
done
|
||||||
|
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||||
|
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||||
|
.NOEXPORT:
|
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<gtodo>
|
||||||
|
<category title="Personal" place="0"/>
|
||||||
|
<category title="Business" place="1"/>
|
||||||
|
<category title="Unfiled" place="2"/>
|
||||||
|
</gtodo>
|
932
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/aclocal.m4
vendored
Normal file
932
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/aclocal.m4
vendored
Normal file
@ -0,0 +1,932 @@
|
|||||||
|
# generated automatically by aclocal 1.10.1 -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||||
|
# 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||||
|
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
# PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||||
|
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||||
|
m4_if(AC_AUTOCONF_VERSION, [2.62],,
|
||||||
|
[m4_warning([this file was generated for autoconf 2.62.
|
||||||
|
You have another version of autoconf. It may work, but is not guaranteed to.
|
||||||
|
If you have problems, you may need to regenerate the build system entirely.
|
||||||
|
To do so, use the procedure documented by the package, typically `autoreconf'.])])
|
||||||
|
|
||||||
|
# Copyright (C) 2002, 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# AM_AUTOMAKE_VERSION(VERSION)
|
||||||
|
# ----------------------------
|
||||||
|
# Automake X.Y traces this macro to ensure aclocal.m4 has been
|
||||||
|
# generated from the m4 files accompanying Automake X.Y.
|
||||||
|
# (This private macro should not be called outside this file.)
|
||||||
|
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||||
|
[am__api_version='1.10'
|
||||||
|
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||||
|
dnl require some minimum version. Point them to the right macro.
|
||||||
|
m4_if([$1], [1.10.1], [],
|
||||||
|
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||||
|
])
|
||||||
|
|
||||||
|
# _AM_AUTOCONF_VERSION(VERSION)
|
||||||
|
# -----------------------------
|
||||||
|
# aclocal traces this macro to find the Autoconf version.
|
||||||
|
# This is a private macro too. Using m4_define simplifies
|
||||||
|
# the logic in aclocal, which can simply ignore this definition.
|
||||||
|
m4_define([_AM_AUTOCONF_VERSION], [])
|
||||||
|
|
||||||
|
# AM_SET_CURRENT_AUTOMAKE_VERSION
|
||||||
|
# -------------------------------
|
||||||
|
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||||
|
# This function is AC_REQUIREd by AC_INIT_AUTOMAKE.
|
||||||
|
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||||
|
[AM_AUTOMAKE_VERSION([1.10.1])dnl
|
||||||
|
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||||
|
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||||
|
_AM_AUTOCONF_VERSION(AC_AUTOCONF_VERSION)])
|
||||||
|
|
||||||
|
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
|
||||||
|
# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
|
||||||
|
# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
|
||||||
|
#
|
||||||
|
# Of course, Automake must honor this variable whenever it calls a
|
||||||
|
# tool from the auxiliary directory. The problem is that $srcdir (and
|
||||||
|
# therefore $ac_aux_dir as well) can be either absolute or relative,
|
||||||
|
# depending on how configure is run. This is pretty annoying, since
|
||||||
|
# it makes $ac_aux_dir quite unusable in subdirectories: in the top
|
||||||
|
# source directory, any form will work fine, but in subdirectories a
|
||||||
|
# relative path needs to be adjusted first.
|
||||||
|
#
|
||||||
|
# $ac_aux_dir/missing
|
||||||
|
# fails when called from a subdirectory if $ac_aux_dir is relative
|
||||||
|
# $top_srcdir/$ac_aux_dir/missing
|
||||||
|
# fails if $ac_aux_dir is absolute,
|
||||||
|
# fails when called from a subdirectory in a VPATH build with
|
||||||
|
# a relative $ac_aux_dir
|
||||||
|
#
|
||||||
|
# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
|
||||||
|
# are both prefixed by $srcdir. In an in-source build this is usually
|
||||||
|
# harmless because $srcdir is `.', but things will broke when you
|
||||||
|
# start a VPATH build or use an absolute $srcdir.
|
||||||
|
#
|
||||||
|
# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
|
||||||
|
# iff we strip the leading $srcdir from $ac_aux_dir. That would be:
|
||||||
|
# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
|
||||||
|
# and then we would define $MISSING as
|
||||||
|
# MISSING="\${SHELL} $am_aux_dir/missing"
|
||||||
|
# This will work as long as MISSING is not called from configure, because
|
||||||
|
# unfortunately $(top_srcdir) has no meaning in configure.
|
||||||
|
# However there are other variables, like CC, which are often used in
|
||||||
|
# configure, and could therefore not use this "fixed" $ac_aux_dir.
|
||||||
|
#
|
||||||
|
# Another solution, used here, is to always expand $ac_aux_dir to an
|
||||||
|
# absolute PATH. The drawback is that using absolute paths prevent a
|
||||||
|
# configured tree to be moved without reconfiguration.
|
||||||
|
|
||||||
|
AC_DEFUN([AM_AUX_DIR_EXPAND],
|
||||||
|
[dnl Rely on autoconf to set up CDPATH properly.
|
||||||
|
AC_PREREQ([2.50])dnl
|
||||||
|
# expand $ac_aux_dir to an absolute path
|
||||||
|
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (C) 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005
|
||||||
|
# Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 4
|
||||||
|
|
||||||
|
# This was merged into AC_PROG_CC in Autoconf.
|
||||||
|
|
||||||
|
AU_DEFUN([AM_PROG_CC_STDC],
|
||||||
|
[AC_PROG_CC
|
||||||
|
AC_DIAGNOSE([obsolete], [$0:
|
||||||
|
your code should no longer depend upon `am_cv_prog_cc_stdc', but upon
|
||||||
|
`ac_cv_prog_cc_stdc'. Remove this warning and the assignment when
|
||||||
|
you adjust the code. You can also remove the above call to
|
||||||
|
AC_PROG_CC if you already called it elsewhere.])
|
||||||
|
am_cv_prog_cc_stdc=$ac_cv_prog_cc_stdc
|
||||||
|
])
|
||||||
|
AU_DEFUN([fp_PROG_CC_STDC])
|
||||||
|
|
||||||
|
# AM_CONDITIONAL -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006
|
||||||
|
# Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 8
|
||||||
|
|
||||||
|
# AM_CONDITIONAL(NAME, SHELL-CONDITION)
|
||||||
|
# -------------------------------------
|
||||||
|
# Define a conditional.
|
||||||
|
AC_DEFUN([AM_CONDITIONAL],
|
||||||
|
[AC_PREREQ(2.52)dnl
|
||||||
|
ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])],
|
||||||
|
[$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
|
||||||
|
AC_SUBST([$1_TRUE])dnl
|
||||||
|
AC_SUBST([$1_FALSE])dnl
|
||||||
|
_AM_SUBST_NOTMAKE([$1_TRUE])dnl
|
||||||
|
_AM_SUBST_NOTMAKE([$1_FALSE])dnl
|
||||||
|
if $2; then
|
||||||
|
$1_TRUE=
|
||||||
|
$1_FALSE='#'
|
||||||
|
else
|
||||||
|
$1_TRUE='#'
|
||||||
|
$1_FALSE=
|
||||||
|
fi
|
||||||
|
AC_CONFIG_COMMANDS_PRE(
|
||||||
|
[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
|
||||||
|
AC_MSG_ERROR([[conditional "$1" was never defined.
|
||||||
|
Usually this means the macro was only invoked conditionally.]])
|
||||||
|
fi])])
|
||||||
|
|
||||||
|
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
|
||||||
|
# Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 9
|
||||||
|
|
||||||
|
# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
|
||||||
|
# written in clear, in which case automake, when reading aclocal.m4,
|
||||||
|
# will think it sees a *use*, and therefore will trigger all it's
|
||||||
|
# C support machinery. Also note that it means that autoscan, seeing
|
||||||
|
# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
|
||||||
|
|
||||||
|
|
||||||
|
# _AM_DEPENDENCIES(NAME)
|
||||||
|
# ----------------------
|
||||||
|
# See how the compiler implements dependency checking.
|
||||||
|
# NAME is "CC", "CXX", "GCJ", or "OBJC".
|
||||||
|
# We try a few techniques and use that to set a single cache variable.
|
||||||
|
#
|
||||||
|
# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
|
||||||
|
# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
|
||||||
|
# dependency, and given that the user is not expected to run this macro,
|
||||||
|
# just rely on AC_PROG_CC.
|
||||||
|
AC_DEFUN([_AM_DEPENDENCIES],
|
||||||
|
[AC_REQUIRE([AM_SET_DEPDIR])dnl
|
||||||
|
AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
|
||||||
|
AC_REQUIRE([AM_MAKE_INCLUDE])dnl
|
||||||
|
AC_REQUIRE([AM_DEP_TRACK])dnl
|
||||||
|
|
||||||
|
ifelse([$1], CC, [depcc="$CC" am_compiler_list=],
|
||||||
|
[$1], CXX, [depcc="$CXX" am_compiler_list=],
|
||||||
|
[$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
|
||||||
|
[$1], UPC, [depcc="$UPC" am_compiler_list=],
|
||||||
|
[$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'],
|
||||||
|
[depcc="$$1" am_compiler_list=])
|
||||||
|
|
||||||
|
AC_CACHE_CHECK([dependency style of $depcc],
|
||||||
|
[am_cv_$1_dependencies_compiler_type],
|
||||||
|
[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
|
||||||
|
# We make a subdir and do the tests there. Otherwise we can end up
|
||||||
|
# making bogus files that we don't know about and never remove. For
|
||||||
|
# instance it was reported that on HP-UX the gcc test will end up
|
||||||
|
# making a dummy file named `D' -- because `-MD' means `put the output
|
||||||
|
# in D'.
|
||||||
|
mkdir conftest.dir
|
||||||
|
# Copy depcomp to subdir because otherwise we won't find it if we're
|
||||||
|
# using a relative directory.
|
||||||
|
cp "$am_depcomp" conftest.dir
|
||||||
|
cd conftest.dir
|
||||||
|
# We will build objects and dependencies in a subdirectory because
|
||||||
|
# it helps to detect inapplicable dependency modes. For instance
|
||||||
|
# both Tru64's cc and ICC support -MD to output dependencies as a
|
||||||
|
# side effect of compilation, but ICC will put the dependencies in
|
||||||
|
# the current directory while Tru64 will put them in the object
|
||||||
|
# directory.
|
||||||
|
mkdir sub
|
||||||
|
|
||||||
|
am_cv_$1_dependencies_compiler_type=none
|
||||||
|
if test "$am_compiler_list" = ""; then
|
||||||
|
am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
|
||||||
|
fi
|
||||||
|
for depmode in $am_compiler_list; do
|
||||||
|
# Setup a source with many dependencies, because some compilers
|
||||||
|
# like to wrap large dependency lists on column 80 (with \), and
|
||||||
|
# we should not choose a depcomp mode which is confused by this.
|
||||||
|
#
|
||||||
|
# We need to recreate these files for each test, as the compiler may
|
||||||
|
# overwrite some of them when testing with obscure command lines.
|
||||||
|
# This happens at least with the AIX C compiler.
|
||||||
|
: > sub/conftest.c
|
||||||
|
for i in 1 2 3 4 5 6; do
|
||||||
|
echo '#include "conftst'$i'.h"' >> sub/conftest.c
|
||||||
|
# Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
|
||||||
|
# Solaris 8's {/usr,}/bin/sh.
|
||||||
|
touch sub/conftst$i.h
|
||||||
|
done
|
||||||
|
echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
|
||||||
|
|
||||||
|
case $depmode in
|
||||||
|
nosideeffect)
|
||||||
|
# after this tag, mechanisms are not by side-effect, so they'll
|
||||||
|
# only be used when explicitly requested
|
||||||
|
if test "x$enable_dependency_tracking" = xyes; then
|
||||||
|
continue
|
||||||
|
else
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
none) break ;;
|
||||||
|
esac
|
||||||
|
# We check with `-c' and `-o' for the sake of the "dashmstdout"
|
||||||
|
# mode. It turns out that the SunPro C++ compiler does not properly
|
||||||
|
# handle `-M -o', and we need to detect this.
|
||||||
|
if depmode=$depmode \
|
||||||
|
source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \
|
||||||
|
depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
|
||||||
|
$SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \
|
||||||
|
>/dev/null 2>conftest.err &&
|
||||||
|
grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
|
||||||
|
grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
|
||||||
|
grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 &&
|
||||||
|
${MAKE-make} -s -f confmf > /dev/null 2>&1; then
|
||||||
|
# icc doesn't choke on unknown options, it will just issue warnings
|
||||||
|
# or remarks (even with -Werror). So we grep stderr for any message
|
||||||
|
# that says an option was ignored or not supported.
|
||||||
|
# When given -MP, icc 7.0 and 7.1 complain thusly:
|
||||||
|
# icc: Command line warning: ignoring option '-M'; no argument required
|
||||||
|
# The diagnosis changed in icc 8.0:
|
||||||
|
# icc: Command line remark: option '-MP' not supported
|
||||||
|
if (grep 'ignoring option' conftest.err ||
|
||||||
|
grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
|
||||||
|
am_cv_$1_dependencies_compiler_type=$depmode
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
rm -rf conftest.dir
|
||||||
|
else
|
||||||
|
am_cv_$1_dependencies_compiler_type=none
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
|
||||||
|
AM_CONDITIONAL([am__fastdep$1], [
|
||||||
|
test "x$enable_dependency_tracking" != xno \
|
||||||
|
&& test "$am_cv_$1_dependencies_compiler_type" = gcc3])
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
# AM_SET_DEPDIR
|
||||||
|
# -------------
|
||||||
|
# Choose a directory name for dependency files.
|
||||||
|
# This macro is AC_REQUIREd in _AM_DEPENDENCIES
|
||||||
|
AC_DEFUN([AM_SET_DEPDIR],
|
||||||
|
[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||||
|
AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
# AM_DEP_TRACK
|
||||||
|
# ------------
|
||||||
|
AC_DEFUN([AM_DEP_TRACK],
|
||||||
|
[AC_ARG_ENABLE(dependency-tracking,
|
||||||
|
[ --disable-dependency-tracking speeds up one-time build
|
||||||
|
--enable-dependency-tracking do not reject slow dependency extractors])
|
||||||
|
if test "x$enable_dependency_tracking" != xno; then
|
||||||
|
am_depcomp="$ac_aux_dir/depcomp"
|
||||||
|
AMDEPBACKSLASH='\'
|
||||||
|
fi
|
||||||
|
AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
|
||||||
|
AC_SUBST([AMDEPBACKSLASH])dnl
|
||||||
|
_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
|
||||||
|
])
|
||||||
|
|
||||||
|
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
|
||||||
|
# Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
#serial 3
|
||||||
|
|
||||||
|
# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||||
|
# ------------------------------
|
||||||
|
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||||
|
[for mf in $CONFIG_FILES; do
|
||||||
|
# Strip MF so we end up with the name of the file.
|
||||||
|
mf=`echo "$mf" | sed -e 's/:.*$//'`
|
||||||
|
# Check whether this is an Automake generated Makefile or not.
|
||||||
|
# We used to match only the files named `Makefile.in', but
|
||||||
|
# some people rename them; so instead we look at the file content.
|
||||||
|
# Grep'ing the first line is not enough: some people post-process
|
||||||
|
# each Makefile.in and add a new line on top of each file to say so.
|
||||||
|
# Grep'ing the whole file is not good either: AIX grep has a line
|
||||||
|
# limit of 2048, but all sed's we know have understand at least 4000.
|
||||||
|
if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
|
||||||
|
dirpart=`AS_DIRNAME("$mf")`
|
||||||
|
else
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
# Extract the definition of DEPDIR, am__include, and am__quote
|
||||||
|
# from the Makefile without running `make'.
|
||||||
|
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
|
||||||
|
test -z "$DEPDIR" && continue
|
||||||
|
am__include=`sed -n 's/^am__include = //p' < "$mf"`
|
||||||
|
test -z "am__include" && continue
|
||||||
|
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
|
||||||
|
# When using ansi2knr, U may be empty or an underscore; expand it
|
||||||
|
U=`sed -n 's/^U = //p' < "$mf"`
|
||||||
|
# Find all dependency output files, they are included files with
|
||||||
|
# $(DEPDIR) in their names. We invoke sed twice because it is the
|
||||||
|
# simplest approach to changing $(DEPDIR) to its actual value in the
|
||||||
|
# expansion.
|
||||||
|
for file in `sed -n "
|
||||||
|
s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
|
||||||
|
sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
|
||||||
|
# Make sure the directory exists.
|
||||||
|
test -f "$dirpart/$file" && continue
|
||||||
|
fdir=`AS_DIRNAME(["$file"])`
|
||||||
|
AS_MKDIR_P([$dirpart/$fdir])
|
||||||
|
# echo "creating $dirpart/$file"
|
||||||
|
echo '# dummy' > "$dirpart/$file"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||||
|
|
||||||
|
|
||||||
|
# AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||||
|
# -----------------------------
|
||||||
|
# This macro should only be invoked once -- use via AC_REQUIRE.
|
||||||
|
#
|
||||||
|
# This code is only required when automatic dependency tracking
|
||||||
|
# is enabled. FIXME. This creates each `.P' file that we will
|
||||||
|
# need in order to bootstrap the dependency handling code.
|
||||||
|
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||||
|
[AC_CONFIG_COMMANDS([depfiles],
|
||||||
|
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||||
|
[AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
|
||||||
|
])
|
||||||
|
|
||||||
|
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
|
||||||
|
# Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 8
|
||||||
|
|
||||||
|
# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS.
|
||||||
|
AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
|
||||||
|
|
||||||
|
# Do all the work for Automake. -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||||
|
# 2005, 2006, 2008 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 13
|
||||||
|
|
||||||
|
# This macro actually does too much. Some checks are only needed if
|
||||||
|
# your package does certain things. But this isn't really a big deal.
|
||||||
|
|
||||||
|
# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
|
||||||
|
# AM_INIT_AUTOMAKE([OPTIONS])
|
||||||
|
# -----------------------------------------------
|
||||||
|
# The call with PACKAGE and VERSION arguments is the old style
|
||||||
|
# call (pre autoconf-2.50), which is being phased out. PACKAGE
|
||||||
|
# and VERSION should now be passed to AC_INIT and removed from
|
||||||
|
# the call to AM_INIT_AUTOMAKE.
|
||||||
|
# We support both call styles for the transition. After
|
||||||
|
# the next Automake release, Autoconf can make the AC_INIT
|
||||||
|
# arguments mandatory, and then we can depend on a new Autoconf
|
||||||
|
# release and drop the old call support.
|
||||||
|
AC_DEFUN([AM_INIT_AUTOMAKE],
|
||||||
|
[AC_PREREQ([2.60])dnl
|
||||||
|
dnl Autoconf wants to disallow AM_ names. We explicitly allow
|
||||||
|
dnl the ones we care about.
|
||||||
|
m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
|
||||||
|
AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
|
||||||
|
AC_REQUIRE([AC_PROG_INSTALL])dnl
|
||||||
|
if test "`cd $srcdir && pwd`" != "`pwd`"; then
|
||||||
|
# Use -I$(srcdir) only when $(srcdir) != ., so that make's output
|
||||||
|
# is not polluted with repeated "-I."
|
||||||
|
AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
|
||||||
|
# test to see if srcdir already configured
|
||||||
|
if test -f $srcdir/config.status; then
|
||||||
|
AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# test whether we have cygpath
|
||||||
|
if test -z "$CYGPATH_W"; then
|
||||||
|
if (cygpath --version) >/dev/null 2>/dev/null; then
|
||||||
|
CYGPATH_W='cygpath -w'
|
||||||
|
else
|
||||||
|
CYGPATH_W=echo
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
AC_SUBST([CYGPATH_W])
|
||||||
|
|
||||||
|
# Define the identity of the package.
|
||||||
|
dnl Distinguish between old-style and new-style calls.
|
||||||
|
m4_ifval([$2],
|
||||||
|
[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
|
||||||
|
AC_SUBST([PACKAGE], [$1])dnl
|
||||||
|
AC_SUBST([VERSION], [$2])],
|
||||||
|
[_AM_SET_OPTIONS([$1])dnl
|
||||||
|
dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
|
||||||
|
m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,,
|
||||||
|
[m4_fatal([AC_INIT should be called with package and version arguments])])dnl
|
||||||
|
AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
|
||||||
|
AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
|
||||||
|
|
||||||
|
_AM_IF_OPTION([no-define],,
|
||||||
|
[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
|
||||||
|
AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
|
||||||
|
|
||||||
|
# Some tools Automake needs.
|
||||||
|
AC_REQUIRE([AM_SANITY_CHECK])dnl
|
||||||
|
AC_REQUIRE([AC_ARG_PROGRAM])dnl
|
||||||
|
AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
|
||||||
|
AM_MISSING_PROG(AUTOCONF, autoconf)
|
||||||
|
AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
|
||||||
|
AM_MISSING_PROG(AUTOHEADER, autoheader)
|
||||||
|
AM_MISSING_PROG(MAKEINFO, makeinfo)
|
||||||
|
AM_PROG_INSTALL_SH
|
||||||
|
AM_PROG_INSTALL_STRIP
|
||||||
|
AC_REQUIRE([AM_PROG_MKDIR_P])dnl
|
||||||
|
# We need awk for the "check" target. The system "awk" is bad on
|
||||||
|
# some platforms.
|
||||||
|
AC_REQUIRE([AC_PROG_AWK])dnl
|
||||||
|
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
|
||||||
|
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||||
|
_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
|
||||||
|
[_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
|
||||||
|
[_AM_PROG_TAR([v7])])])
|
||||||
|
_AM_IF_OPTION([no-dependencies],,
|
||||||
|
[AC_PROVIDE_IFELSE([AC_PROG_CC],
|
||||||
|
[_AM_DEPENDENCIES(CC)],
|
||||||
|
[define([AC_PROG_CC],
|
||||||
|
defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
|
||||||
|
AC_PROVIDE_IFELSE([AC_PROG_CXX],
|
||||||
|
[_AM_DEPENDENCIES(CXX)],
|
||||||
|
[define([AC_PROG_CXX],
|
||||||
|
defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
|
||||||
|
AC_PROVIDE_IFELSE([AC_PROG_OBJC],
|
||||||
|
[_AM_DEPENDENCIES(OBJC)],
|
||||||
|
[define([AC_PROG_OBJC],
|
||||||
|
defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
# When config.status generates a header, we must update the stamp-h file.
|
||||||
|
# This file resides in the same directory as the config header
|
||||||
|
# that is generated. The stamp files are numbered to have different names.
|
||||||
|
|
||||||
|
# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
|
||||||
|
# loop where config.status creates the headers, so we can generate
|
||||||
|
# our stamp files there.
|
||||||
|
AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
|
||||||
|
[# Compute $1's index in $config_headers.
|
||||||
|
_am_arg=$1
|
||||||
|
_am_stamp_count=1
|
||||||
|
for _am_header in $config_headers :; do
|
||||||
|
case $_am_header in
|
||||||
|
$_am_arg | $_am_arg:* )
|
||||||
|
break ;;
|
||||||
|
* )
|
||||||
|
_am_stamp_count=`expr $_am_stamp_count + 1` ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||||
|
|
||||||
|
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# AM_PROG_INSTALL_SH
|
||||||
|
# ------------------
|
||||||
|
# Define $install_sh.
|
||||||
|
AC_DEFUN([AM_PROG_INSTALL_SH],
|
||||||
|
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||||
|
install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"}
|
||||||
|
AC_SUBST(install_sh)])
|
||||||
|
|
||||||
|
# Copyright (C) 2003, 2005 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 2
|
||||||
|
|
||||||
|
# Check whether the underlying file-system supports filenames
|
||||||
|
# with a leading dot. For instance MS-DOS doesn't.
|
||||||
|
AC_DEFUN([AM_SET_LEADING_DOT],
|
||||||
|
[rm -rf .tst 2>/dev/null
|
||||||
|
mkdir .tst 2>/dev/null
|
||||||
|
if test -d .tst; then
|
||||||
|
am__leading_dot=.
|
||||||
|
else
|
||||||
|
am__leading_dot=_
|
||||||
|
fi
|
||||||
|
rmdir .tst 2>/dev/null
|
||||||
|
AC_SUBST([am__leading_dot])])
|
||||||
|
|
||||||
|
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
|
||||||
|
# From Jim Meyering
|
||||||
|
|
||||||
|
# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005
|
||||||
|
# Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 4
|
||||||
|
|
||||||
|
AC_DEFUN([AM_MAINTAINER_MODE],
|
||||||
|
[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
|
||||||
|
dnl maintainer-mode is disabled by default
|
||||||
|
AC_ARG_ENABLE(maintainer-mode,
|
||||||
|
[ --enable-maintainer-mode enable make rules and dependencies not useful
|
||||||
|
(and sometimes confusing) to the casual installer],
|
||||||
|
USE_MAINTAINER_MODE=$enableval,
|
||||||
|
USE_MAINTAINER_MODE=no)
|
||||||
|
AC_MSG_RESULT([$USE_MAINTAINER_MODE])
|
||||||
|
AM_CONDITIONAL(MAINTAINER_MODE, [test $USE_MAINTAINER_MODE = yes])
|
||||||
|
MAINT=$MAINTAINER_MODE_TRUE
|
||||||
|
AC_SUBST(MAINT)dnl
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE])
|
||||||
|
|
||||||
|
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 3
|
||||||
|
|
||||||
|
# AM_MAKE_INCLUDE()
|
||||||
|
# -----------------
|
||||||
|
# Check to see how make treats includes.
|
||||||
|
AC_DEFUN([AM_MAKE_INCLUDE],
|
||||||
|
[am_make=${MAKE-make}
|
||||||
|
cat > confinc << 'END'
|
||||||
|
am__doit:
|
||||||
|
@echo done
|
||||||
|
.PHONY: am__doit
|
||||||
|
END
|
||||||
|
# If we don't find an include directive, just comment out the code.
|
||||||
|
AC_MSG_CHECKING([for style of include used by $am_make])
|
||||||
|
am__include="#"
|
||||||
|
am__quote=
|
||||||
|
_am_result=none
|
||||||
|
# First try GNU make style include.
|
||||||
|
echo "include confinc" > confmf
|
||||||
|
# We grep out `Entering directory' and `Leaving directory'
|
||||||
|
# messages which can occur if `w' ends up in MAKEFLAGS.
|
||||||
|
# In particular we don't look at `^make:' because GNU make might
|
||||||
|
# be invoked under some other name (usually "gmake"), in which
|
||||||
|
# case it prints its new name instead of `make'.
|
||||||
|
if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then
|
||||||
|
am__include=include
|
||||||
|
am__quote=
|
||||||
|
_am_result=GNU
|
||||||
|
fi
|
||||||
|
# Now try BSD make style include.
|
||||||
|
if test "$am__include" = "#"; then
|
||||||
|
echo '.include "confinc"' > confmf
|
||||||
|
if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then
|
||||||
|
am__include=.include
|
||||||
|
am__quote="\""
|
||||||
|
_am_result=BSD
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
AC_SUBST([am__include])
|
||||||
|
AC_SUBST([am__quote])
|
||||||
|
AC_MSG_RESULT([$_am_result])
|
||||||
|
rm -f confinc confmf
|
||||||
|
])
|
||||||
|
|
||||||
|
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005
|
||||||
|
# Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 5
|
||||||
|
|
||||||
|
# AM_MISSING_PROG(NAME, PROGRAM)
|
||||||
|
# ------------------------------
|
||||||
|
AC_DEFUN([AM_MISSING_PROG],
|
||||||
|
[AC_REQUIRE([AM_MISSING_HAS_RUN])
|
||||||
|
$1=${$1-"${am_missing_run}$2"}
|
||||||
|
AC_SUBST($1)])
|
||||||
|
|
||||||
|
|
||||||
|
# AM_MISSING_HAS_RUN
|
||||||
|
# ------------------
|
||||||
|
# Define MISSING if not defined so far and test if it supports --run.
|
||||||
|
# If it does, set am_missing_run to use it, otherwise, to nothing.
|
||||||
|
AC_DEFUN([AM_MISSING_HAS_RUN],
|
||||||
|
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||||
|
AC_REQUIRE_AUX_FILE([missing])dnl
|
||||||
|
test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing"
|
||||||
|
# Use eval to expand $SHELL
|
||||||
|
if eval "$MISSING --run true"; then
|
||||||
|
am_missing_run="$MISSING --run "
|
||||||
|
else
|
||||||
|
am_missing_run=
|
||||||
|
AC_MSG_WARN([`missing' script is too old or missing])
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
|
||||||
|
# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# AM_PROG_MKDIR_P
|
||||||
|
# ---------------
|
||||||
|
# Check for `mkdir -p'.
|
||||||
|
AC_DEFUN([AM_PROG_MKDIR_P],
|
||||||
|
[AC_PREREQ([2.60])dnl
|
||||||
|
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
||||||
|
dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
|
||||||
|
dnl while keeping a definition of mkdir_p for backward compatibility.
|
||||||
|
dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
|
||||||
|
dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
|
||||||
|
dnl Makefile.ins that do not define MKDIR_P, so we do our own
|
||||||
|
dnl adjustment using top_builddir (which is defined more often than
|
||||||
|
dnl MKDIR_P).
|
||||||
|
AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
|
||||||
|
case $mkdir_p in
|
||||||
|
[[\\/$]]* | ?:[[\\/]]*) ;;
|
||||||
|
*/*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
|
||||||
|
esac
|
||||||
|
])
|
||||||
|
|
||||||
|
# Helper functions for option handling. -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 3
|
||||||
|
|
||||||
|
# _AM_MANGLE_OPTION(NAME)
|
||||||
|
# -----------------------
|
||||||
|
AC_DEFUN([_AM_MANGLE_OPTION],
|
||||||
|
[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
|
||||||
|
|
||||||
|
# _AM_SET_OPTION(NAME)
|
||||||
|
# ------------------------------
|
||||||
|
# Set option NAME. Presently that only means defining a flag for this option.
|
||||||
|
AC_DEFUN([_AM_SET_OPTION],
|
||||||
|
[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
|
||||||
|
|
||||||
|
# _AM_SET_OPTIONS(OPTIONS)
|
||||||
|
# ----------------------------------
|
||||||
|
# OPTIONS is a space-separated list of Automake options.
|
||||||
|
AC_DEFUN([_AM_SET_OPTIONS],
|
||||||
|
[AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
|
||||||
|
|
||||||
|
# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
|
||||||
|
# -------------------------------------------
|
||||||
|
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
|
||||||
|
AC_DEFUN([_AM_IF_OPTION],
|
||||||
|
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
||||||
|
|
||||||
|
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
|
||||||
|
# Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 4
|
||||||
|
|
||||||
|
# AM_SANITY_CHECK
|
||||||
|
# ---------------
|
||||||
|
AC_DEFUN([AM_SANITY_CHECK],
|
||||||
|
[AC_MSG_CHECKING([whether build environment is sane])
|
||||||
|
# Just in case
|
||||||
|
sleep 1
|
||||||
|
echo timestamp > conftest.file
|
||||||
|
# Do `set' in a subshell so we don't clobber the current shell's
|
||||||
|
# arguments. Must try -L first in case configure is actually a
|
||||||
|
# symlink; some systems play weird games with the mod time of symlinks
|
||||||
|
# (eg FreeBSD returns the mod time of the symlink's containing
|
||||||
|
# directory).
|
||||||
|
if (
|
||||||
|
set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null`
|
||||||
|
if test "$[*]" = "X"; then
|
||||||
|
# -L didn't work.
|
||||||
|
set X `ls -t $srcdir/configure conftest.file`
|
||||||
|
fi
|
||||||
|
rm -f conftest.file
|
||||||
|
if test "$[*]" != "X $srcdir/configure conftest.file" \
|
||||||
|
&& test "$[*]" != "X conftest.file $srcdir/configure"; then
|
||||||
|
|
||||||
|
# If neither matched, then we have a broken ls. This can happen
|
||||||
|
# if, for instance, CONFIG_SHELL is bash and it inherits a
|
||||||
|
# broken ls alias from the environment. This has actually
|
||||||
|
# happened. Such a system could not be considered "sane".
|
||||||
|
AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
|
||||||
|
alias in your environment])
|
||||||
|
fi
|
||||||
|
|
||||||
|
test "$[2]" = conftest.file
|
||||||
|
)
|
||||||
|
then
|
||||||
|
# Ok.
|
||||||
|
:
|
||||||
|
else
|
||||||
|
AC_MSG_ERROR([newly created file is older than distributed files!
|
||||||
|
Check your system clock])
|
||||||
|
fi
|
||||||
|
AC_MSG_RESULT(yes)])
|
||||||
|
|
||||||
|
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# AM_PROG_INSTALL_STRIP
|
||||||
|
# ---------------------
|
||||||
|
# One issue with vendor `install' (even GNU) is that you can't
|
||||||
|
# specify the program used to strip binaries. This is especially
|
||||||
|
# annoying in cross-compiling environments, where the build's strip
|
||||||
|
# is unlikely to handle the host's binaries.
|
||||||
|
# Fortunately install-sh will honor a STRIPPROG variable, so we
|
||||||
|
# always use install-sh in `make install-strip', and initialize
|
||||||
|
# STRIPPROG with the value of the STRIP variable (set by the user).
|
||||||
|
AC_DEFUN([AM_PROG_INSTALL_STRIP],
|
||||||
|
[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
|
||||||
|
# Installed binaries are usually stripped using `strip' when the user
|
||||||
|
# run `make install-strip'. However `strip' might not be the right
|
||||||
|
# tool to use in cross-compilation environments, therefore Automake
|
||||||
|
# will honor the `STRIP' environment variable to overrule this program.
|
||||||
|
dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
|
||||||
|
if test "$cross_compiling" != no; then
|
||||||
|
AC_CHECK_TOOL([STRIP], [strip], :)
|
||||||
|
fi
|
||||||
|
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||||
|
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||||
|
|
||||||
|
# Copyright (C) 2006 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# _AM_SUBST_NOTMAKE(VARIABLE)
|
||||||
|
# ---------------------------
|
||||||
|
# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
|
||||||
|
# This macro is traced by Automake.
|
||||||
|
AC_DEFUN([_AM_SUBST_NOTMAKE])
|
||||||
|
|
||||||
|
# Check how to create a tarball. -*- Autoconf -*-
|
||||||
|
|
||||||
|
# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
|
||||||
|
#
|
||||||
|
# This file is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# serial 2
|
||||||
|
|
||||||
|
# _AM_PROG_TAR(FORMAT)
|
||||||
|
# --------------------
|
||||||
|
# Check how to create a tarball in format FORMAT.
|
||||||
|
# FORMAT should be one of `v7', `ustar', or `pax'.
|
||||||
|
#
|
||||||
|
# Substitute a variable $(am__tar) that is a command
|
||||||
|
# writing to stdout a FORMAT-tarball containing the directory
|
||||||
|
# $tardir.
|
||||||
|
# tardir=directory && $(am__tar) > result.tar
|
||||||
|
#
|
||||||
|
# Substitute a variable $(am__untar) that extract such
|
||||||
|
# a tarball read from stdin.
|
||||||
|
# $(am__untar) < result.tar
|
||||||
|
AC_DEFUN([_AM_PROG_TAR],
|
||||||
|
[# Always define AMTAR for backward compatibility.
|
||||||
|
AM_MISSING_PROG([AMTAR], [tar])
|
||||||
|
m4_if([$1], [v7],
|
||||||
|
[am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
|
||||||
|
[m4_case([$1], [ustar],, [pax],,
|
||||||
|
[m4_fatal([Unknown tar format])])
|
||||||
|
AC_MSG_CHECKING([how to create a $1 tar archive])
|
||||||
|
# Loop over all known methods to create a tar archive until one works.
|
||||||
|
_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
|
||||||
|
_am_tools=${am_cv_prog_tar_$1-$_am_tools}
|
||||||
|
# Do not fold the above two line into one, because Tru64 sh and
|
||||||
|
# Solaris sh will not grok spaces in the rhs of `-'.
|
||||||
|
for _am_tool in $_am_tools
|
||||||
|
do
|
||||||
|
case $_am_tool in
|
||||||
|
gnutar)
|
||||||
|
for _am_tar in tar gnutar gtar;
|
||||||
|
do
|
||||||
|
AM_RUN_LOG([$_am_tar --version]) && break
|
||||||
|
done
|
||||||
|
am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
|
||||||
|
am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
|
||||||
|
am__untar="$_am_tar -xf -"
|
||||||
|
;;
|
||||||
|
plaintar)
|
||||||
|
# Must skip GNU tar: if it does not support --format= it doesn't create
|
||||||
|
# ustar tarball either.
|
||||||
|
(tar --version) >/dev/null 2>&1 && continue
|
||||||
|
am__tar='tar chf - "$$tardir"'
|
||||||
|
am__tar_='tar chf - "$tardir"'
|
||||||
|
am__untar='tar xf -'
|
||||||
|
;;
|
||||||
|
pax)
|
||||||
|
am__tar='pax -L -x $1 -w "$$tardir"'
|
||||||
|
am__tar_='pax -L -x $1 -w "$tardir"'
|
||||||
|
am__untar='pax -r'
|
||||||
|
;;
|
||||||
|
cpio)
|
||||||
|
am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
|
||||||
|
am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
|
||||||
|
am__untar='cpio -i -H $1 -d'
|
||||||
|
;;
|
||||||
|
none)
|
||||||
|
am__tar=false
|
||||||
|
am__tar_=false
|
||||||
|
am__untar=false
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# If the value was cached, stop now. We just wanted to have am__tar
|
||||||
|
# and am__untar set.
|
||||||
|
test -n "${am_cv_prog_tar_$1}" && break
|
||||||
|
|
||||||
|
# tar/untar a dummy directory, and stop if the command works
|
||||||
|
rm -rf conftest.dir
|
||||||
|
mkdir conftest.dir
|
||||||
|
echo GrepMe > conftest.dir/file
|
||||||
|
AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
|
||||||
|
rm -rf conftest.dir
|
||||||
|
if test -s conftest.tar; then
|
||||||
|
AM_RUN_LOG([$am__untar <conftest.tar])
|
||||||
|
grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
rm -rf conftest.dir
|
||||||
|
|
||||||
|
AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
|
||||||
|
AC_MSG_RESULT([$am_cv_prog_tar_$1])])
|
||||||
|
AC_SUBST([am__tar])
|
||||||
|
AC_SUBST([am__untar])
|
||||||
|
]) # _AM_PROG_TAR
|
||||||
|
|
159
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/autogen.sh
Executable file
159
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/autogen.sh
Executable file
@ -0,0 +1,159 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Run this to generate all the initial makefiles, etc.
|
||||||
|
|
||||||
|
srcdir=`dirname $0`
|
||||||
|
test -z "$srcdir" && srcdir=.
|
||||||
|
|
||||||
|
DIE=0
|
||||||
|
|
||||||
|
if [ -n "$GNOME2_DIR" ]; then
|
||||||
|
ACLOCAL_FLAGS="-I $GNOME2_DIR/share/aclocal $ACLOCAL_FLAGS"
|
||||||
|
LD_LIBRARY_PATH="$GNOME2_DIR/lib:$LD_LIBRARY_PATH"
|
||||||
|
PATH="$GNOME2_DIR/bin:$PATH"
|
||||||
|
export PATH
|
||||||
|
export LD_LIBRARY_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
(test -f $srcdir/configure.ac) || {
|
||||||
|
echo -n "**Error**: Directory "\`$srcdir\'" does not look like the"
|
||||||
|
echo " top-level package directory"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
(autoconf --version) < /dev/null > /dev/null 2>&1 || {
|
||||||
|
echo
|
||||||
|
echo "**Error**: You must have \`autoconf' installed."
|
||||||
|
echo "Download the appropriate package for your distribution,"
|
||||||
|
echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
|
||||||
|
DIE=1
|
||||||
|
}
|
||||||
|
|
||||||
|
(grep "^IT_PROG_INTLTOOL" $srcdir/configure.ac >/dev/null) && {
|
||||||
|
(intltoolize --version) < /dev/null > /dev/null 2>&1 || {
|
||||||
|
echo
|
||||||
|
echo "**Error**: You must have \`intltool' installed."
|
||||||
|
echo "You can get it from:"
|
||||||
|
echo " ftp://ftp.gnome.org/pub/GNOME/"
|
||||||
|
DIE=1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(grep "^AM_PROG_XML_I18N_TOOLS" $srcdir/configure.ac >/dev/null) && {
|
||||||
|
(xml-i18n-toolize --version) < /dev/null > /dev/null 2>&1 || {
|
||||||
|
echo
|
||||||
|
echo "**Error**: You must have \`xml-i18n-toolize' installed."
|
||||||
|
echo "You can get it from:"
|
||||||
|
echo " ftp://ftp.gnome.org/pub/GNOME/"
|
||||||
|
DIE=1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(grep "^AM_PROG_LIBTOOL" $srcdir/configure.ac >/dev/null) && {
|
||||||
|
(libtool --version) < /dev/null > /dev/null 2>&1 || {
|
||||||
|
echo
|
||||||
|
echo "**Error**: You must have \`libtool' installed."
|
||||||
|
echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
|
||||||
|
DIE=1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(grep "^AM_GLIB_GNU_GETTEXT" $srcdir/configure.ac >/dev/null) && {
|
||||||
|
(grep "sed.*POTFILES" $srcdir/configure.ac) > /dev/null || \
|
||||||
|
(glib-gettextize --version) < /dev/null > /dev/null 2>&1 || {
|
||||||
|
echo
|
||||||
|
echo "**Error**: You must have \`glib' installed."
|
||||||
|
echo "You can get it from: ftp://ftp.gtk.org/pub/gtk"
|
||||||
|
DIE=1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(automake --version) < /dev/null > /dev/null 2>&1 || {
|
||||||
|
echo
|
||||||
|
echo "**Error**: You must have \`automake' installed."
|
||||||
|
echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
|
||||||
|
DIE=1
|
||||||
|
NO_AUTOMAKE=yes
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# if no automake, don't bother testing for aclocal
|
||||||
|
test -n "$NO_AUTOMAKE" || (aclocal --version) < /dev/null > /dev/null 2>&1 || {
|
||||||
|
echo
|
||||||
|
echo "**Error**: Missing \`aclocal'. The version of \`automake'"
|
||||||
|
echo "installed doesn't appear recent enough."
|
||||||
|
echo "You can get automake from ftp://ftp.gnu.org/pub/gnu/"
|
||||||
|
DIE=1
|
||||||
|
}
|
||||||
|
|
||||||
|
if test "$DIE" -eq 1; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -z "$*"; then
|
||||||
|
echo "**Warning**: I am going to run \`configure' with no arguments."
|
||||||
|
echo "If you wish to pass any to it, please specify them on the"
|
||||||
|
echo \`$0\'" command line."
|
||||||
|
echo
|
||||||
|
fi
|
||||||
|
|
||||||
|
case $CC in
|
||||||
|
xlc )
|
||||||
|
am_opt=--include-deps;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
for coin in `find $srcdir -path $srcdir/CVS -prune -o -name configure.ac -print`
|
||||||
|
do
|
||||||
|
dr=`dirname $coin`
|
||||||
|
if test -f $dr/NO-AUTO-GEN; then
|
||||||
|
echo skipping $dr -- flagged as no auto-gen
|
||||||
|
else
|
||||||
|
echo processing $dr
|
||||||
|
( cd $dr
|
||||||
|
|
||||||
|
aclocalinclude="$ACLOCAL_FLAGS"
|
||||||
|
|
||||||
|
if grep "^AM_GLIB_GNU_GETTEXT" configure.ac >/dev/null; then
|
||||||
|
echo "Creating $dr/aclocal.m4 ..."
|
||||||
|
test -r $dr/aclocal.m4 || touch $dr/aclocal.m4
|
||||||
|
echo "Running glib-gettextize... Ignore non-fatal messages."
|
||||||
|
echo "no" | glib-gettextize --force --copy
|
||||||
|
echo "Making $dr/aclocal.m4 writable ..."
|
||||||
|
test -r $dr/aclocal.m4 && chmod u+w $dr/aclocal.m4
|
||||||
|
fi
|
||||||
|
if grep "^IT_PROG_INTLTOOL" configure.ac >/dev/null; then
|
||||||
|
echo "Running intltoolize..."
|
||||||
|
intltoolize --copy --force --automake
|
||||||
|
fi
|
||||||
|
if grep "^AM_PROG_XML_I18N_TOOLS" configure.ac >/dev/null; then
|
||||||
|
echo "Running xml-i18n-toolize..."
|
||||||
|
xml-i18n-toolize --copy --force --automake
|
||||||
|
fi
|
||||||
|
if grep "^AM_PROG_LIBTOOL" configure.ac >/dev/null; then
|
||||||
|
if test -z "$NO_LIBTOOLIZE" ; then
|
||||||
|
echo "Running libtoolize..."
|
||||||
|
libtoolize --force --copy
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo "Running aclocal $aclocalinclude ..."
|
||||||
|
aclocal $aclocalinclude
|
||||||
|
if grep "^AM_CONFIG_HEADER" configure.ac >/dev/null; then
|
||||||
|
echo "Running autoheader..."
|
||||||
|
autoheader
|
||||||
|
fi
|
||||||
|
echo "Running automake --gnu $am_opt ..."
|
||||||
|
automake --add-missing --gnu $am_opt
|
||||||
|
echo "Running autoconf ..."
|
||||||
|
autoconf
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
conf_flags="--enable-maintainer-mode"
|
||||||
|
|
||||||
|
if test x$NOCONFIGURE = x; then
|
||||||
|
echo Running $srcdir/configure $conf_flags "$@" ...
|
||||||
|
$srcdir/configure $conf_flags "$@" \
|
||||||
|
&& echo Now type \`make\' to compile. || exit 1
|
||||||
|
else
|
||||||
|
echo Skipping configure process.
|
||||||
|
fi
|
1526
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/config.guess
vendored
Executable file
1526
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/config.guess
vendored
Executable file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,26 @@
|
|||||||
|
/* config.h. Generated from config.h.in by configure. */
|
||||||
|
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
|
/* Name of package */
|
||||||
|
#define PACKAGE "mgiza"
|
||||||
|
|
||||||
|
/* Define to the address where bug reports for this package should be sent. */
|
||||||
|
#define PACKAGE_BUGREPORT ""
|
||||||
|
|
||||||
|
/* Define to the full name of this package. */
|
||||||
|
#define PACKAGE_NAME "mgiza"
|
||||||
|
|
||||||
|
/* Define to the full name and version of this package. */
|
||||||
|
#define PACKAGE_STRING "mgiza 1.0"
|
||||||
|
|
||||||
|
/* Define to the one symbol short name of this package. */
|
||||||
|
#define PACKAGE_TARNAME "mgiza"
|
||||||
|
|
||||||
|
/* Define to the version of this package. */
|
||||||
|
#define PACKAGE_VERSION "1.0"
|
||||||
|
|
||||||
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
|
#define STDC_HEADERS 1
|
||||||
|
|
||||||
|
/* Version number of package */
|
||||||
|
#define VERSION "1.0"
|
@ -0,0 +1,25 @@
|
|||||||
|
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
|
/* Name of package */
|
||||||
|
#undef PACKAGE
|
||||||
|
|
||||||
|
/* Define to the address where bug reports for this package should be sent. */
|
||||||
|
#undef PACKAGE_BUGREPORT
|
||||||
|
|
||||||
|
/* Define to the full name of this package. */
|
||||||
|
#undef PACKAGE_NAME
|
||||||
|
|
||||||
|
/* Define to the full name and version of this package. */
|
||||||
|
#undef PACKAGE_STRING
|
||||||
|
|
||||||
|
/* Define to the one symbol short name of this package. */
|
||||||
|
#undef PACKAGE_TARNAME
|
||||||
|
|
||||||
|
/* Define to the version of this package. */
|
||||||
|
#undef PACKAGE_VERSION
|
||||||
|
|
||||||
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
|
#undef STDC_HEADERS
|
||||||
|
|
||||||
|
/* Version number of package */
|
||||||
|
#undef VERSION
|
1658
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/config.sub
vendored
Executable file
1658
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/config.sub
vendored
Executable file
File diff suppressed because it is too large
Load Diff
6897
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/configure
vendored
Executable file
6897
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/configure
vendored
Executable file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,28 @@
|
|||||||
|
dnl Process this file with autoconf to produce a configure script.
|
||||||
|
dnl Created by Anjuta application wizard.
|
||||||
|
|
||||||
|
AC_INIT(mgiza, 1.0)
|
||||||
|
|
||||||
|
AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
|
||||||
|
AM_CONFIG_HEADER(config.h)
|
||||||
|
AM_MAINTAINER_MODE
|
||||||
|
|
||||||
|
AC_ISC_POSIX
|
||||||
|
AC_PROG_CXX
|
||||||
|
AM_PROG_CC_STDC
|
||||||
|
AC_HEADER_STDC
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
AC_PROG_RANLIB
|
||||||
|
AM_PROG_LIBTOOL
|
||||||
|
AC_PROG_LIBTOOL
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
AC_OUTPUT([
|
||||||
|
Makefile
|
||||||
|
src/Makefile
|
||||||
|
src/mkcls/Makefile
|
||||||
|
])
|
519
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/install-sh
Executable file
519
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/install-sh
Executable file
@ -0,0 +1,519 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# install - install a program, script, or datafile
|
||||||
|
|
||||||
|
scriptversion=2006-12-25.00
|
||||||
|
|
||||||
|
# This originates from X11R5 (mit/util/scripts/install.sh), which was
|
||||||
|
# later released in X11R6 (xc/config/util/install.sh) with the
|
||||||
|
# following copyright and license.
|
||||||
|
#
|
||||||
|
# Copyright (C) 1994 X Consortium
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to
|
||||||
|
# deal in the Software without restriction, including without limitation the
|
||||||
|
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
# sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||||
|
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
|
||||||
|
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
#
|
||||||
|
# Except as contained in this notice, the name of the X Consortium shall not
|
||||||
|
# be used in advertising or otherwise to promote the sale, use or other deal-
|
||||||
|
# ings in this Software without prior written authorization from the X Consor-
|
||||||
|
# tium.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# FSF changes to this file are in the public domain.
|
||||||
|
#
|
||||||
|
# Calling this script install-sh is preferred over install.sh, to prevent
|
||||||
|
# `make' implicit rules from creating a file called install from it
|
||||||
|
# when there is no Makefile.
|
||||||
|
#
|
||||||
|
# This script is compatible with the BSD install script, but was written
|
||||||
|
# from scratch.
|
||||||
|
|
||||||
|
nl='
|
||||||
|
'
|
||||||
|
IFS=" "" $nl"
|
||||||
|
|
||||||
|
# set DOITPROG to echo to test this script
|
||||||
|
|
||||||
|
# Don't use :- since 4.3BSD and earlier shells don't like it.
|
||||||
|
doit=${DOITPROG-}
|
||||||
|
if test -z "$doit"; then
|
||||||
|
doit_exec=exec
|
||||||
|
else
|
||||||
|
doit_exec=$doit
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Put in absolute file names if you don't have them in your path;
|
||||||
|
# or use environment vars.
|
||||||
|
|
||||||
|
chgrpprog=${CHGRPPROG-chgrp}
|
||||||
|
chmodprog=${CHMODPROG-chmod}
|
||||||
|
chownprog=${CHOWNPROG-chown}
|
||||||
|
cmpprog=${CMPPROG-cmp}
|
||||||
|
cpprog=${CPPROG-cp}
|
||||||
|
mkdirprog=${MKDIRPROG-mkdir}
|
||||||
|
mvprog=${MVPROG-mv}
|
||||||
|
rmprog=${RMPROG-rm}
|
||||||
|
stripprog=${STRIPPROG-strip}
|
||||||
|
|
||||||
|
posix_glob='?'
|
||||||
|
initialize_posix_glob='
|
||||||
|
test "$posix_glob" != "?" || {
|
||||||
|
if (set -f) 2>/dev/null; then
|
||||||
|
posix_glob=
|
||||||
|
else
|
||||||
|
posix_glob=:
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
'
|
||||||
|
|
||||||
|
posix_mkdir=
|
||||||
|
|
||||||
|
# Desired mode of installed file.
|
||||||
|
mode=0755
|
||||||
|
|
||||||
|
chgrpcmd=
|
||||||
|
chmodcmd=$chmodprog
|
||||||
|
chowncmd=
|
||||||
|
mvcmd=$mvprog
|
||||||
|
rmcmd="$rmprog -f"
|
||||||
|
stripcmd=
|
||||||
|
|
||||||
|
src=
|
||||||
|
dst=
|
||||||
|
dir_arg=
|
||||||
|
dst_arg=
|
||||||
|
|
||||||
|
copy_on_change=false
|
||||||
|
no_target_directory=
|
||||||
|
|
||||||
|
usage="\
|
||||||
|
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
|
||||||
|
or: $0 [OPTION]... SRCFILES... DIRECTORY
|
||||||
|
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
|
||||||
|
or: $0 [OPTION]... -d DIRECTORIES...
|
||||||
|
|
||||||
|
In the 1st form, copy SRCFILE to DSTFILE.
|
||||||
|
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
|
||||||
|
In the 4th, create DIRECTORIES.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--help display this help and exit.
|
||||||
|
--version display version info and exit.
|
||||||
|
|
||||||
|
-c (ignored)
|
||||||
|
-C install only if different (preserve the last data modification time)
|
||||||
|
-d create directories instead of installing files.
|
||||||
|
-g GROUP $chgrpprog installed files to GROUP.
|
||||||
|
-m MODE $chmodprog installed files to MODE.
|
||||||
|
-o USER $chownprog installed files to USER.
|
||||||
|
-s $stripprog installed files.
|
||||||
|
-t DIRECTORY install into DIRECTORY.
|
||||||
|
-T report an error if DSTFILE is a directory.
|
||||||
|
|
||||||
|
Environment variables override the default commands:
|
||||||
|
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
|
||||||
|
RMPROG STRIPPROG
|
||||||
|
"
|
||||||
|
|
||||||
|
while test $# -ne 0; do
|
||||||
|
case $1 in
|
||||||
|
-c) ;;
|
||||||
|
|
||||||
|
-C) copy_on_change=true;;
|
||||||
|
|
||||||
|
-d) dir_arg=true;;
|
||||||
|
|
||||||
|
-g) chgrpcmd="$chgrpprog $2"
|
||||||
|
shift;;
|
||||||
|
|
||||||
|
--help) echo "$usage"; exit $?;;
|
||||||
|
|
||||||
|
-m) mode=$2
|
||||||
|
case $mode in
|
||||||
|
*' '* | *' '* | *'
|
||||||
|
'* | *'*'* | *'?'* | *'['*)
|
||||||
|
echo "$0: invalid mode: $mode" >&2
|
||||||
|
exit 1;;
|
||||||
|
esac
|
||||||
|
shift;;
|
||||||
|
|
||||||
|
-o) chowncmd="$chownprog $2"
|
||||||
|
shift;;
|
||||||
|
|
||||||
|
-s) stripcmd=$stripprog;;
|
||||||
|
|
||||||
|
-t) dst_arg=$2
|
||||||
|
shift;;
|
||||||
|
|
||||||
|
-T) no_target_directory=true;;
|
||||||
|
|
||||||
|
--version) echo "$0 $scriptversion"; exit $?;;
|
||||||
|
|
||||||
|
--) shift
|
||||||
|
break;;
|
||||||
|
|
||||||
|
-*) echo "$0: invalid option: $1" >&2
|
||||||
|
exit 1;;
|
||||||
|
|
||||||
|
*) break;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
|
||||||
|
# When -d is used, all remaining arguments are directories to create.
|
||||||
|
# When -t is used, the destination is already specified.
|
||||||
|
# Otherwise, the last argument is the destination. Remove it from $@.
|
||||||
|
for arg
|
||||||
|
do
|
||||||
|
if test -n "$dst_arg"; then
|
||||||
|
# $@ is not empty: it contains at least $arg.
|
||||||
|
set fnord "$@" "$dst_arg"
|
||||||
|
shift # fnord
|
||||||
|
fi
|
||||||
|
shift # arg
|
||||||
|
dst_arg=$arg
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test $# -eq 0; then
|
||||||
|
if test -z "$dir_arg"; then
|
||||||
|
echo "$0: no input file specified." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# It's OK to call `install-sh -d' without argument.
|
||||||
|
# This can happen when creating conditional directories.
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -z "$dir_arg"; then
|
||||||
|
trap '(exit $?); exit' 1 2 13 15
|
||||||
|
|
||||||
|
# Set umask so as not to create temps with too-generous modes.
|
||||||
|
# However, 'strip' requires both read and write access to temps.
|
||||||
|
case $mode in
|
||||||
|
# Optimize common cases.
|
||||||
|
*644) cp_umask=133;;
|
||||||
|
*755) cp_umask=22;;
|
||||||
|
|
||||||
|
*[0-7])
|
||||||
|
if test -z "$stripcmd"; then
|
||||||
|
u_plus_rw=
|
||||||
|
else
|
||||||
|
u_plus_rw='% 200'
|
||||||
|
fi
|
||||||
|
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
|
||||||
|
*)
|
||||||
|
if test -z "$stripcmd"; then
|
||||||
|
u_plus_rw=
|
||||||
|
else
|
||||||
|
u_plus_rw=,u+rw
|
||||||
|
fi
|
||||||
|
cp_umask=$mode$u_plus_rw;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
for src
|
||||||
|
do
|
||||||
|
# Protect names starting with `-'.
|
||||||
|
case $src in
|
||||||
|
-*) src=./$src;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if test -n "$dir_arg"; then
|
||||||
|
dst=$src
|
||||||
|
dstdir=$dst
|
||||||
|
test -d "$dstdir"
|
||||||
|
dstdir_status=$?
|
||||||
|
else
|
||||||
|
|
||||||
|
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
|
||||||
|
# might cause directories to be created, which would be especially bad
|
||||||
|
# if $src (and thus $dsttmp) contains '*'.
|
||||||
|
if test ! -f "$src" && test ! -d "$src"; then
|
||||||
|
echo "$0: $src does not exist." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -z "$dst_arg"; then
|
||||||
|
echo "$0: no destination specified." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
dst=$dst_arg
|
||||||
|
# Protect names starting with `-'.
|
||||||
|
case $dst in
|
||||||
|
-*) dst=./$dst;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# If destination is a directory, append the input filename; won't work
|
||||||
|
# if double slashes aren't ignored.
|
||||||
|
if test -d "$dst"; then
|
||||||
|
if test -n "$no_target_directory"; then
|
||||||
|
echo "$0: $dst_arg: Is a directory" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
dstdir=$dst
|
||||||
|
dst=$dstdir/`basename "$src"`
|
||||||
|
dstdir_status=0
|
||||||
|
else
|
||||||
|
# Prefer dirname, but fall back on a substitute if dirname fails.
|
||||||
|
dstdir=`
|
||||||
|
(dirname "$dst") 2>/dev/null ||
|
||||||
|
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
|
||||||
|
X"$dst" : 'X\(//\)[^/]' \| \
|
||||||
|
X"$dst" : 'X\(//\)$' \| \
|
||||||
|
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
|
||||||
|
echo X"$dst" |
|
||||||
|
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
|
||||||
|
s//\1/
|
||||||
|
q
|
||||||
|
}
|
||||||
|
/^X\(\/\/\)[^/].*/{
|
||||||
|
s//\1/
|
||||||
|
q
|
||||||
|
}
|
||||||
|
/^X\(\/\/\)$/{
|
||||||
|
s//\1/
|
||||||
|
q
|
||||||
|
}
|
||||||
|
/^X\(\/\).*/{
|
||||||
|
s//\1/
|
||||||
|
q
|
||||||
|
}
|
||||||
|
s/.*/./; q'
|
||||||
|
`
|
||||||
|
|
||||||
|
test -d "$dstdir"
|
||||||
|
dstdir_status=$?
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
obsolete_mkdir_used=false
|
||||||
|
|
||||||
|
if test $dstdir_status != 0; then
|
||||||
|
case $posix_mkdir in
|
||||||
|
'')
|
||||||
|
# Create intermediate dirs using mode 755 as modified by the umask.
|
||||||
|
# This is like FreeBSD 'install' as of 1997-10-28.
|
||||||
|
umask=`umask`
|
||||||
|
case $stripcmd.$umask in
|
||||||
|
# Optimize common cases.
|
||||||
|
*[2367][2367]) mkdir_umask=$umask;;
|
||||||
|
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
|
||||||
|
|
||||||
|
*[0-7])
|
||||||
|
mkdir_umask=`expr $umask + 22 \
|
||||||
|
- $umask % 100 % 40 + $umask % 20 \
|
||||||
|
- $umask % 10 % 4 + $umask % 2
|
||||||
|
`;;
|
||||||
|
*) mkdir_umask=$umask,go-w;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# With -d, create the new directory with the user-specified mode.
|
||||||
|
# Otherwise, rely on $mkdir_umask.
|
||||||
|
if test -n "$dir_arg"; then
|
||||||
|
mkdir_mode=-m$mode
|
||||||
|
else
|
||||||
|
mkdir_mode=
|
||||||
|
fi
|
||||||
|
|
||||||
|
posix_mkdir=false
|
||||||
|
case $umask in
|
||||||
|
*[123567][0-7][0-7])
|
||||||
|
# POSIX mkdir -p sets u+wx bits regardless of umask, which
|
||||||
|
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
|
||||||
|
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
|
||||||
|
|
||||||
|
if (umask $mkdir_umask &&
|
||||||
|
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
if test -z "$dir_arg" || {
|
||||||
|
# Check for POSIX incompatibilities with -m.
|
||||||
|
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
|
||||||
|
# other-writeable bit of parent directory when it shouldn't.
|
||||||
|
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
|
||||||
|
ls_ld_tmpdir=`ls -ld "$tmpdir"`
|
||||||
|
case $ls_ld_tmpdir in
|
||||||
|
d????-?r-*) different_mode=700;;
|
||||||
|
d????-?--*) different_mode=755;;
|
||||||
|
*) false;;
|
||||||
|
esac &&
|
||||||
|
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
|
||||||
|
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
|
||||||
|
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
then posix_mkdir=:
|
||||||
|
fi
|
||||||
|
rmdir "$tmpdir/d" "$tmpdir"
|
||||||
|
else
|
||||||
|
# Remove any dirs left behind by ancient mkdir implementations.
|
||||||
|
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
|
||||||
|
fi
|
||||||
|
trap '' 0;;
|
||||||
|
esac;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if
|
||||||
|
$posix_mkdir && (
|
||||||
|
umask $mkdir_umask &&
|
||||||
|
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
|
||||||
|
)
|
||||||
|
then :
|
||||||
|
else
|
||||||
|
|
||||||
|
# The umask is ridiculous, or mkdir does not conform to POSIX,
|
||||||
|
# or it failed possibly due to a race condition. Create the
|
||||||
|
# directory the slow way, step by step, checking for races as we go.
|
||||||
|
|
||||||
|
case $dstdir in
|
||||||
|
/*) prefix='/';;
|
||||||
|
-*) prefix='./';;
|
||||||
|
*) prefix='';;
|
||||||
|
esac
|
||||||
|
|
||||||
|
eval "$initialize_posix_glob"
|
||||||
|
|
||||||
|
oIFS=$IFS
|
||||||
|
IFS=/
|
||||||
|
$posix_glob set -f
|
||||||
|
set fnord $dstdir
|
||||||
|
shift
|
||||||
|
$posix_glob set +f
|
||||||
|
IFS=$oIFS
|
||||||
|
|
||||||
|
prefixes=
|
||||||
|
|
||||||
|
for d
|
||||||
|
do
|
||||||
|
test -z "$d" && continue
|
||||||
|
|
||||||
|
prefix=$prefix$d
|
||||||
|
if test -d "$prefix"; then
|
||||||
|
prefixes=
|
||||||
|
else
|
||||||
|
if $posix_mkdir; then
|
||||||
|
(umask=$mkdir_umask &&
|
||||||
|
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
|
||||||
|
# Don't fail if two instances are running concurrently.
|
||||||
|
test -d "$prefix" || exit 1
|
||||||
|
else
|
||||||
|
case $prefix in
|
||||||
|
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
|
||||||
|
*) qprefix=$prefix;;
|
||||||
|
esac
|
||||||
|
prefixes="$prefixes '$qprefix'"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
prefix=$prefix/
|
||||||
|
done
|
||||||
|
|
||||||
|
if test -n "$prefixes"; then
|
||||||
|
# Don't fail if two instances are running concurrently.
|
||||||
|
(umask $mkdir_umask &&
|
||||||
|
eval "\$doit_exec \$mkdirprog $prefixes") ||
|
||||||
|
test -d "$dstdir" || exit 1
|
||||||
|
obsolete_mkdir_used=true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -n "$dir_arg"; then
|
||||||
|
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
|
||||||
|
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
|
||||||
|
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
|
||||||
|
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
|
||||||
|
else
|
||||||
|
|
||||||
|
# Make a couple of temp file names in the proper directory.
|
||||||
|
dsttmp=$dstdir/_inst.$$_
|
||||||
|
rmtmp=$dstdir/_rm.$$_
|
||||||
|
|
||||||
|
# Trap to clean up those temp files at exit.
|
||||||
|
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
|
||||||
|
|
||||||
|
# Copy the file name to the temp name.
|
||||||
|
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
|
||||||
|
|
||||||
|
# and set any options; do chmod last to preserve setuid bits.
|
||||||
|
#
|
||||||
|
# If any of these fail, we abort the whole thing. If we want to
|
||||||
|
# ignore errors from any of these, just make sure not to ignore
|
||||||
|
# errors from the above "$doit $cpprog $src $dsttmp" command.
|
||||||
|
#
|
||||||
|
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
|
||||||
|
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
|
||||||
|
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
|
||||||
|
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
|
||||||
|
|
||||||
|
# If -C, don't bother to copy if it wouldn't change the file.
|
||||||
|
if $copy_on_change &&
|
||||||
|
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
|
||||||
|
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
|
||||||
|
|
||||||
|
eval "$initialize_posix_glob" &&
|
||||||
|
$posix_glob set -f &&
|
||||||
|
set X $old && old=:$2:$4:$5:$6 &&
|
||||||
|
set X $new && new=:$2:$4:$5:$6 &&
|
||||||
|
$posix_glob set +f &&
|
||||||
|
|
||||||
|
test "$old" = "$new" &&
|
||||||
|
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
rm -f "$dsttmp"
|
||||||
|
else
|
||||||
|
# Rename the file to the real destination.
|
||||||
|
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
|
||||||
|
|
||||||
|
# The rename failed, perhaps because mv can't rename something else
|
||||||
|
# to itself, or perhaps because mv is so ancient that it does not
|
||||||
|
# support -f.
|
||||||
|
{
|
||||||
|
# Now remove or move aside any old file at destination location.
|
||||||
|
# We try this two ways since rm can't unlink itself on some
|
||||||
|
# systems and the destination file might be busy for other
|
||||||
|
# reasons. In this case, the final cleanup might fail but the new
|
||||||
|
# file should still install successfully.
|
||||||
|
{
|
||||||
|
test ! -f "$dst" ||
|
||||||
|
$doit $rmcmd -f "$dst" 2>/dev/null ||
|
||||||
|
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
|
||||||
|
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
|
||||||
|
} ||
|
||||||
|
{ echo "$0: cannot unlink or rename $dst" >&2
|
||||||
|
(exit 1); exit 1
|
||||||
|
}
|
||||||
|
} &&
|
||||||
|
|
||||||
|
# Now rename the file to the real destination.
|
||||||
|
$doit $mvcmd "$dsttmp" "$dst"
|
||||||
|
}
|
||||||
|
fi || exit 1
|
||||||
|
|
||||||
|
trap '' 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Local variables:
|
||||||
|
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||||
|
# time-stamp-start: "scriptversion="
|
||||||
|
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||||
|
# time-stamp-end: "$"
|
||||||
|
# End:
|
8412
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/ltmain.sh
Normal file
8412
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/ltmain.sh
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,37 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<anjuta>
|
||||||
|
<plugin name="GBF Project Manager"
|
||||||
|
url="http://anjuta.org/plugins/"
|
||||||
|
mandatory="yes">
|
||||||
|
<require group="Anjuta Plugin"
|
||||||
|
attribute="Interfaces"
|
||||||
|
value="IAnjutaProjectManager"/>
|
||||||
|
<require group="Project"
|
||||||
|
attribute="Supported-Project-Types"
|
||||||
|
value="automake"/>
|
||||||
|
</plugin>
|
||||||
|
<plugin name="Make Build System"
|
||||||
|
url="http://anjuta.org/plugins/"
|
||||||
|
mandatory="yes">
|
||||||
|
<require group="Anjuta Plugin"
|
||||||
|
attribute="Interfaces"
|
||||||
|
value="IAnjutaBuildable"/>
|
||||||
|
<require group="Build"
|
||||||
|
attribute="Supported-Build-Types"
|
||||||
|
value="make"/>
|
||||||
|
</plugin>
|
||||||
|
<plugin name="Task Manager"
|
||||||
|
url="http://anjuta.org/plugins/"
|
||||||
|
mandatory="no">
|
||||||
|
<require group="Anjuta Plugin"
|
||||||
|
attribute="Interfaces"
|
||||||
|
value="IAnjutaTodo"/>
|
||||||
|
</plugin>
|
||||||
|
<plugin name="Debug Manager"
|
||||||
|
url="http://anjuta.org/plugins/"
|
||||||
|
mandatory="no">
|
||||||
|
<require group="Anjuta Plugin"
|
||||||
|
attribute="Interfaces"
|
||||||
|
value="IAnjutaDebuggerManager"/>
|
||||||
|
</plugin>
|
||||||
|
</anjuta>
|
367
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/missing
Executable file
367
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/missing
Executable file
@ -0,0 +1,367 @@
|
|||||||
|
#! /bin/sh
|
||||||
|
# Common stub for a few missing GNU programs while installing.
|
||||||
|
|
||||||
|
scriptversion=2006-05-10.23
|
||||||
|
|
||||||
|
# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006
|
||||||
|
# Free Software Foundation, Inc.
|
||||||
|
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
# 02110-1301, USA.
|
||||||
|
|
||||||
|
# As a special exception to the GNU General Public License, if you
|
||||||
|
# distribute this file as part of a program that contains a
|
||||||
|
# configuration script generated by Autoconf, you may include it under
|
||||||
|
# the same distribution terms that you use for the rest of that program.
|
||||||
|
|
||||||
|
if test $# -eq 0; then
|
||||||
|
echo 1>&2 "Try \`$0 --help' for more information"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
run=:
|
||||||
|
sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
|
||||||
|
sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
|
||||||
|
|
||||||
|
# In the cases where this matters, `missing' is being run in the
|
||||||
|
# srcdir already.
|
||||||
|
if test -f configure.ac; then
|
||||||
|
configure_ac=configure.ac
|
||||||
|
else
|
||||||
|
configure_ac=configure.in
|
||||||
|
fi
|
||||||
|
|
||||||
|
msg="missing on your system"
|
||||||
|
|
||||||
|
case $1 in
|
||||||
|
--run)
|
||||||
|
# Try to run requested program, and just exit if it succeeds.
|
||||||
|
run=
|
||||||
|
shift
|
||||||
|
"$@" && exit 0
|
||||||
|
# Exit code 63 means version mismatch. This often happens
|
||||||
|
# when the user try to use an ancient version of a tool on
|
||||||
|
# a file that requires a minimum version. In this case we
|
||||||
|
# we should proceed has if the program had been absent, or
|
||||||
|
# if --run hadn't been passed.
|
||||||
|
if test $? = 63; then
|
||||||
|
run=:
|
||||||
|
msg="probably too old"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
-h|--h|--he|--hel|--help)
|
||||||
|
echo "\
|
||||||
|
$0 [OPTION]... PROGRAM [ARGUMENT]...
|
||||||
|
|
||||||
|
Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
|
||||||
|
error status if there is no known handling for PROGRAM.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-h, --help display this help and exit
|
||||||
|
-v, --version output version information and exit
|
||||||
|
--run try to run the given command, and emulate it if it fails
|
||||||
|
|
||||||
|
Supported PROGRAM values:
|
||||||
|
aclocal touch file \`aclocal.m4'
|
||||||
|
autoconf touch file \`configure'
|
||||||
|
autoheader touch file \`config.h.in'
|
||||||
|
autom4te touch the output file, or create a stub one
|
||||||
|
automake touch all \`Makefile.in' files
|
||||||
|
bison create \`y.tab.[ch]', if possible, from existing .[ch]
|
||||||
|
flex create \`lex.yy.c', if possible, from existing .c
|
||||||
|
help2man touch the output file
|
||||||
|
lex create \`lex.yy.c', if possible, from existing .c
|
||||||
|
makeinfo touch the output file
|
||||||
|
tar try tar, gnutar, gtar, then tar without non-portable flags
|
||||||
|
yacc create \`y.tab.[ch]', if possible, from existing .[ch]
|
||||||
|
|
||||||
|
Send bug reports to <bug-automake@gnu.org>."
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
|
||||||
|
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
|
||||||
|
echo "missing $scriptversion (GNU Automake)"
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
|
||||||
|
-*)
|
||||||
|
echo 1>&2 "$0: Unknown \`$1' option"
|
||||||
|
echo 1>&2 "Try \`$0 --help' for more information"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Now exit if we have it, but it failed. Also exit now if we
|
||||||
|
# don't have it and --version was passed (most likely to detect
|
||||||
|
# the program).
|
||||||
|
case $1 in
|
||||||
|
lex|yacc)
|
||||||
|
# Not GNU programs, they don't have --version.
|
||||||
|
;;
|
||||||
|
|
||||||
|
tar)
|
||||||
|
if test -n "$run"; then
|
||||||
|
echo 1>&2 "ERROR: \`tar' requires --run"
|
||||||
|
exit 1
|
||||||
|
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
|
||||||
|
# We have it, but it failed.
|
||||||
|
exit 1
|
||||||
|
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
||||||
|
# Could not run --version or --help. This is probably someone
|
||||||
|
# running `$TOOL --version' or `$TOOL --help' to check whether
|
||||||
|
# $TOOL exists and not knowing $TOOL uses missing.
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# If it does not exist, or fails to run (possibly an outdated version),
|
||||||
|
# try to emulate it.
|
||||||
|
case $1 in
|
||||||
|
aclocal*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' is $msg. You should only need it if
|
||||||
|
you modified \`acinclude.m4' or \`${configure_ac}'. You might want
|
||||||
|
to install the \`Automake' and \`Perl' packages. Grab them from
|
||||||
|
any GNU archive site."
|
||||||
|
touch aclocal.m4
|
||||||
|
;;
|
||||||
|
|
||||||
|
autoconf)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' is $msg. You should only need it if
|
||||||
|
you modified \`${configure_ac}'. You might want to install the
|
||||||
|
\`Autoconf' and \`GNU m4' packages. Grab them from any GNU
|
||||||
|
archive site."
|
||||||
|
touch configure
|
||||||
|
;;
|
||||||
|
|
||||||
|
autoheader)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' is $msg. You should only need it if
|
||||||
|
you modified \`acconfig.h' or \`${configure_ac}'. You might want
|
||||||
|
to install the \`Autoconf' and \`GNU m4' packages. Grab them
|
||||||
|
from any GNU archive site."
|
||||||
|
files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
|
||||||
|
test -z "$files" && files="config.h"
|
||||||
|
touch_files=
|
||||||
|
for f in $files; do
|
||||||
|
case $f in
|
||||||
|
*:*) touch_files="$touch_files "`echo "$f" |
|
||||||
|
sed -e 's/^[^:]*://' -e 's/:.*//'`;;
|
||||||
|
*) touch_files="$touch_files $f.in";;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
touch $touch_files
|
||||||
|
;;
|
||||||
|
|
||||||
|
automake*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' is $msg. You should only need it if
|
||||||
|
you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
|
||||||
|
You might want to install the \`Automake' and \`Perl' packages.
|
||||||
|
Grab them from any GNU archive site."
|
||||||
|
find . -type f -name Makefile.am -print |
|
||||||
|
sed 's/\.am$/.in/' |
|
||||||
|
while read f; do touch "$f"; done
|
||||||
|
;;
|
||||||
|
|
||||||
|
autom4te)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' is needed, but is $msg.
|
||||||
|
You might have modified some files without having the
|
||||||
|
proper tools for further handling them.
|
||||||
|
You can get \`$1' as part of \`Autoconf' from any GNU
|
||||||
|
archive site."
|
||||||
|
|
||||||
|
file=`echo "$*" | sed -n "$sed_output"`
|
||||||
|
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||||
|
if test -f "$file"; then
|
||||||
|
touch $file
|
||||||
|
else
|
||||||
|
test -z "$file" || exec >$file
|
||||||
|
echo "#! /bin/sh"
|
||||||
|
echo "# Created by GNU Automake missing as a replacement of"
|
||||||
|
echo "# $ $@"
|
||||||
|
echo "exit 0"
|
||||||
|
chmod +x $file
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
bison|yacc)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' $msg. You should only need it if
|
||||||
|
you modified a \`.y' file. You may need the \`Bison' package
|
||||||
|
in order for those modifications to take effect. You can get
|
||||||
|
\`Bison' from any GNU archive site."
|
||||||
|
rm -f y.tab.c y.tab.h
|
||||||
|
if test $# -ne 1; then
|
||||||
|
eval LASTARG="\${$#}"
|
||||||
|
case $LASTARG in
|
||||||
|
*.y)
|
||||||
|
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
|
||||||
|
if test -f "$SRCFILE"; then
|
||||||
|
cp "$SRCFILE" y.tab.c
|
||||||
|
fi
|
||||||
|
SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
|
||||||
|
if test -f "$SRCFILE"; then
|
||||||
|
cp "$SRCFILE" y.tab.h
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
if test ! -f y.tab.h; then
|
||||||
|
echo >y.tab.h
|
||||||
|
fi
|
||||||
|
if test ! -f y.tab.c; then
|
||||||
|
echo 'main() { return 0; }' >y.tab.c
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
lex|flex)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' is $msg. You should only need it if
|
||||||
|
you modified a \`.l' file. You may need the \`Flex' package
|
||||||
|
in order for those modifications to take effect. You can get
|
||||||
|
\`Flex' from any GNU archive site."
|
||||||
|
rm -f lex.yy.c
|
||||||
|
if test $# -ne 1; then
|
||||||
|
eval LASTARG="\${$#}"
|
||||||
|
case $LASTARG in
|
||||||
|
*.l)
|
||||||
|
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
|
||||||
|
if test -f "$SRCFILE"; then
|
||||||
|
cp "$SRCFILE" lex.yy.c
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
if test ! -f lex.yy.c; then
|
||||||
|
echo 'main() { return 0; }' >lex.yy.c
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
help2man)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' is $msg. You should only need it if
|
||||||
|
you modified a dependency of a manual page. You may need the
|
||||||
|
\`Help2man' package in order for those modifications to take
|
||||||
|
effect. You can get \`Help2man' from any GNU archive site."
|
||||||
|
|
||||||
|
file=`echo "$*" | sed -n "$sed_output"`
|
||||||
|
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||||
|
if test -f "$file"; then
|
||||||
|
touch $file
|
||||||
|
else
|
||||||
|
test -z "$file" || exec >$file
|
||||||
|
echo ".ab help2man is required to generate this page"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
makeinfo)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' is $msg. You should only need it if
|
||||||
|
you modified a \`.texi' or \`.texinfo' file, or any other file
|
||||||
|
indirectly affecting the aspect of the manual. The spurious
|
||||||
|
call might also be the consequence of using a buggy \`make' (AIX,
|
||||||
|
DU, IRIX). You might want to install the \`Texinfo' package or
|
||||||
|
the \`GNU make' package. Grab either from any GNU archive site."
|
||||||
|
# The file to touch is that specified with -o ...
|
||||||
|
file=`echo "$*" | sed -n "$sed_output"`
|
||||||
|
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||||
|
if test -z "$file"; then
|
||||||
|
# ... or it is the one specified with @setfilename ...
|
||||||
|
infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
|
||||||
|
file=`sed -n '
|
||||||
|
/^@setfilename/{
|
||||||
|
s/.* \([^ ]*\) *$/\1/
|
||||||
|
p
|
||||||
|
q
|
||||||
|
}' $infile`
|
||||||
|
# ... or it is derived from the source name (dir/f.texi becomes f.info)
|
||||||
|
test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
|
||||||
|
fi
|
||||||
|
# If the file does not exist, the user really needs makeinfo;
|
||||||
|
# let's fail without touching anything.
|
||||||
|
test -f $file || exit 1
|
||||||
|
touch $file
|
||||||
|
;;
|
||||||
|
|
||||||
|
tar)
|
||||||
|
shift
|
||||||
|
|
||||||
|
# We have already tried tar in the generic part.
|
||||||
|
# Look for gnutar/gtar before invocation to avoid ugly error
|
||||||
|
# messages.
|
||||||
|
if (gnutar --version > /dev/null 2>&1); then
|
||||||
|
gnutar "$@" && exit 0
|
||||||
|
fi
|
||||||
|
if (gtar --version > /dev/null 2>&1); then
|
||||||
|
gtar "$@" && exit 0
|
||||||
|
fi
|
||||||
|
firstarg="$1"
|
||||||
|
if shift; then
|
||||||
|
case $firstarg in
|
||||||
|
*o*)
|
||||||
|
firstarg=`echo "$firstarg" | sed s/o//`
|
||||||
|
tar "$firstarg" "$@" && exit 0
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
case $firstarg in
|
||||||
|
*h*)
|
||||||
|
firstarg=`echo "$firstarg" | sed s/h//`
|
||||||
|
tar "$firstarg" "$@" && exit 0
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: I can't seem to be able to run \`tar' with the given arguments.
|
||||||
|
You may want to install GNU tar or Free paxutils, or check the
|
||||||
|
command line arguments."
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: \`$1' is needed, and is $msg.
|
||||||
|
You might have modified some files without having the
|
||||||
|
proper tools for further handling them. Check the \`README' file,
|
||||||
|
it often tells you about the needed prerequisites for installing
|
||||||
|
this package. You may also peek at any GNU archive site, in case
|
||||||
|
some other package would contain this missing \`$1' program."
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
# Local variables:
|
||||||
|
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||||
|
# time-stamp-start: "scriptversion="
|
||||||
|
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||||
|
# time-stamp-end: "$"
|
||||||
|
# End:
|
@ -0,0 +1,2 @@
|
|||||||
|
sed -e 's/^[ \t]*//' -e 's/[ \t][ \t]*/ /g' -e 's/[ \t]*$//'
|
||||||
|
|
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
MGIZA=${QMT_HOME}/bin/mgiza
|
||||||
|
|
||||||
|
if [ $# -lt 4 ]; then
|
||||||
|
echo "OK, this is simple, put me into your Moses training directory, link your source/target corpus" 1>&2
|
||||||
|
echo "and run " $0 " PREFIX src_tag tgt_tag root-dir." 1>&2
|
||||||
|
echo "and get force-aligned data: root-dir/giza.[src-tgt|tgt-src]/*.A3.final.* " 1>&2
|
||||||
|
echo "make sure I can find PREFIX.src_tag-tgt_tag and PREFIX.tgt_tag-src_tag, and \${QMT_HOME} is set" 1>&2
|
||||||
|
exit
|
||||||
|
fi
|
||||||
|
|
||||||
|
PRE=$1
|
||||||
|
SRC=$2
|
||||||
|
TGT=$3
|
||||||
|
ROOT=$4
|
||||||
|
|
||||||
|
mkdir -p $ROOT/giza.${SRC}-${TGT}
|
||||||
|
mkdir -p $ROOT/giza.${TGT}-${SRC}
|
||||||
|
mkdir -p $ROOT/corpus
|
||||||
|
|
||||||
|
echo "Generating corpus file " 1>&2
|
||||||
|
|
||||||
|
${QMT_HOME}/scripts/plain2snt-hasvcb.py corpus/$SRC.vcb corpus/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/corpus/${TGT}-${SRC}.snt $ROOT/corpus/${SRC}-${TGT}.snt $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb
|
||||||
|
|
||||||
|
ln -sf $PWD/corpus/$SRC.vcb.classes $PWD/corpus/$TGT.vcb.classes $ROOT/corpus/
|
||||||
|
|
||||||
|
echo "Generating co-occurrence file " 1>&2
|
||||||
|
|
||||||
|
${QMT_HOME}/bin/snt2cooc $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb $ROOT/corpus/${TGT}-${SRC}.snt
|
||||||
|
${QMT_HOME}/bin//snt2cooc $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc $ROOT/corpus/$TGT.vcb $ROOT/corpus/$SRC.vcb $ROOT/corpus/${SRC}-${TGT}.snt
|
||||||
|
|
||||||
|
echo "Running force alignment " 1>&2
|
||||||
|
|
||||||
|
$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/corpus/$TGT-$SRC.snt -o $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC} \
|
||||||
|
-s $ROOT/corpus/$SRC.vcb -t $ROOT/corpus/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc \
|
||||||
|
-restart 11 -previoust giza.$TGT-$SRC/$TGT-$SRC.t3.final \
|
||||||
|
-previousa giza.$TGT-$SRC/$TGT-$SRC.a3.final -previousd giza.$TGT-$SRC/$TGT-$SRC.d3.final \
|
||||||
|
-previousn giza.$TGT-$SRC/$TGT-$SRC.n3.final -previousd4 giza.$TGT-$SRC/$TGT-$SRC.d4.final \
|
||||||
|
-previousd42 giza.$TGT-$SRC/$TGT-$SRC.D4.final -m3 0 -m4 1
|
||||||
|
|
||||||
|
$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/corpus/$SRC-$TGT.snt -o $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT} \
|
||||||
|
-s $ROOT/corpus/$TGT.vcb -t $ROOT/corpus/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc \
|
||||||
|
-restart 11 -previoust giza.$SRC-$TGT/$SRC-$TGT.t3.final \
|
||||||
|
-previousa giza.$SRC-$TGT/$SRC-$TGT.a3.final -previousd giza.$SRC-$TGT/$SRC-$TGT.d3.final \
|
||||||
|
-previousn giza.$SRC-$TGT/$SRC-$TGT.n3.final -previousd4 giza.$SRC-$TGT/$SRC-$TGT.d4.final \
|
||||||
|
-previousd42 giza.$SRC-$TGT/$SRC-$TGT.D4.final -m3 0 -m4 1
|
||||||
|
|
112
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/scripts/giza2bal.pl
Executable file
112
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/scripts/giza2bal.pl
Executable file
@ -0,0 +1,112 @@
|
|||||||
|
#! /usr/bin/perl
|
||||||
|
|
||||||
|
# $Id: giza2bal.pl 1562 2008-02-19 20:48:14Z redpony $
|
||||||
|
#Converts direct and inverted alignments into a more compact
|
||||||
|
#bi-alignment format. It optionally reads the counting file
|
||||||
|
#produced by giza containing the frequency of each traning sentence.
|
||||||
|
|
||||||
|
#Copyright Marcello Federico, November 2004
|
||||||
|
|
||||||
|
($cnt,$dir,$inv)=();
|
||||||
|
|
||||||
|
while ($w=shift @ARGV){
|
||||||
|
$dir=shift(@ARGV),next if $w eq "-d";
|
||||||
|
$inv=shift(@ARGV),next if $w eq "-i";
|
||||||
|
$cnt=shift(@ARGV),next if $w eq "-c";
|
||||||
|
}
|
||||||
|
|
||||||
|
my $lc = 0;
|
||||||
|
|
||||||
|
if (!$dir || !inv){
|
||||||
|
print "usage: giza2bal.pl [-c <count-file>] -d <dir-align-file> -i <inv-align-file>\n";
|
||||||
|
print "input files can be also commands, e.g. -d \"gunzip -c file.gz\"\n";
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
$|=1;
|
||||||
|
|
||||||
|
open(DIR,"<$dir") || open(DIR,"$dir|") || die "cannot open $dir\n";
|
||||||
|
open(INV,"<$inv") || open(INV,"$inv|") || die "cannot open $dir\n";
|
||||||
|
|
||||||
|
if ($cnt){
|
||||||
|
open(CNT,"<$cnt") || open(CNT,"$cnt|") || die "cannot open $dir\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub ReadBiAlign{
|
||||||
|
local($fd0,$fd1,$fd2,*s1,*s2,*a,*b,*c)=@_;
|
||||||
|
local($dummy,$n);
|
||||||
|
|
||||||
|
chop($c=<$fd0>); ## count
|
||||||
|
$dummy=<$fd0>; ## header
|
||||||
|
$dummy=<$fd0>; ## header
|
||||||
|
$c=1 if !$c;
|
||||||
|
|
||||||
|
$dummy=<$fd1>; ## header
|
||||||
|
chop($s1=<$fd1>);
|
||||||
|
chop($t1=<$fd1>);
|
||||||
|
|
||||||
|
$dummy=<$fd2>; ## header
|
||||||
|
chop($s2=<$fd2>);
|
||||||
|
chop($t2=<$fd2>);
|
||||||
|
|
||||||
|
@a=@b=();
|
||||||
|
$lc++;
|
||||||
|
|
||||||
|
#get target statistics
|
||||||
|
$n=1;
|
||||||
|
$t1=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
|
||||||
|
while ($t1=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
|
||||||
|
grep($a[$_]=$n,split(/\s+/,$2));
|
||||||
|
$n++;
|
||||||
|
}
|
||||||
|
|
||||||
|
$m=1;
|
||||||
|
$t2=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
|
||||||
|
while ($t2=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
|
||||||
|
grep($b[$_]=$m,split(/\s+/,$2));
|
||||||
|
$m++;
|
||||||
|
}
|
||||||
|
|
||||||
|
$M=split(/\s+/,$s1);
|
||||||
|
$N=split(/\s+/,$s2);
|
||||||
|
|
||||||
|
if ($m != ($M+1) || $n != ($N+1)) {
|
||||||
|
print STDERR "Sentence mismatch error! Line #$lc\n";
|
||||||
|
$s1 = "ALIGN_ERR";
|
||||||
|
$s2 = "ALIGN_ERR";
|
||||||
|
@a=(); @b=();
|
||||||
|
for ($j=1;$j<2;$j++){ $a[$j]=1; }
|
||||||
|
for ($i=1;$i<2;$i++){ $b[$i]=1; }
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for ($j=1;$j<$m;$j++){
|
||||||
|
$a[$j]=0 if !$a[$j];
|
||||||
|
}
|
||||||
|
|
||||||
|
for ($i=1;$i<$n;$i++){
|
||||||
|
$b[$i]=0 if !$b[$i];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
$skip=0;
|
||||||
|
$ccc=0;
|
||||||
|
while(!eof(DIR)){
|
||||||
|
|
||||||
|
if (ReadBiAlign(CNT,DIR,INV,*src,*tgt,*a,*b,*c))
|
||||||
|
{
|
||||||
|
$ccc++;
|
||||||
|
print "$c\n";
|
||||||
|
print $#a," $src \# @a[1..$#a]\n";
|
||||||
|
print $#b," $tgt \# @b[1..$#b]\n";
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
print "\n";
|
||||||
|
print STDERR "." if !(++$skip % 1000);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
print STDERR "skip=<$skip> counts=<$ccc>\n";
|
@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Author : Qin Gao
|
||||||
|
# Date : Dec 31, 2007
|
||||||
|
# Purpose: Combine multiple alignment files into a single one, the files are
|
||||||
|
# prodcuced by MGIZA, which has sentence IDs, and every file is
|
||||||
|
# ordered inside
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
if len(sys.argv)<2:
|
||||||
|
sys.stderr.write("Provide me the file names (at least 2)\n");
|
||||||
|
sys.exit();
|
||||||
|
|
||||||
|
sent_id = 0;
|
||||||
|
|
||||||
|
files = [];
|
||||||
|
ids = [];
|
||||||
|
|
||||||
|
sents = [];
|
||||||
|
done = [];
|
||||||
|
|
||||||
|
for i in range(1,len(sys.argv)):
|
||||||
|
files.append(open(sys.argv[i],"r"));
|
||||||
|
ids.append(0);
|
||||||
|
sents.append("");
|
||||||
|
done.append(False);
|
||||||
|
|
||||||
|
r = re.compile("\\((\\d+)\\)");
|
||||||
|
i = 0;
|
||||||
|
while i< len(files):
|
||||||
|
st1 = files[i].readline();
|
||||||
|
st2 = files[i].readline();
|
||||||
|
st3 = files[i].readline();
|
||||||
|
if len(st1)==0 or len(st2)==0 or len(st3)==0:
|
||||||
|
done[i] = True;
|
||||||
|
else:
|
||||||
|
mt = r.search(st1);
|
||||||
|
id = int(mt.group(1));
|
||||||
|
ids[i] = id;
|
||||||
|
sents[i] = (st1, st2, st3);
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
cont = True;
|
||||||
|
while (cont):
|
||||||
|
sent_id += 1;
|
||||||
|
writeOne = False;
|
||||||
|
# Now try to read more sentences
|
||||||
|
i = 0;
|
||||||
|
cont = False;
|
||||||
|
while i < len(files):
|
||||||
|
if done[i]:
|
||||||
|
i+=1
|
||||||
|
continue;
|
||||||
|
cont = True;
|
||||||
|
if ids[i] == sent_id:
|
||||||
|
sys.stdout.write("%s%s%s"%(sents[i][0],sents[i][1],sents[i][2]));
|
||||||
|
writeOne = True;
|
||||||
|
st1 = files[i].readline();
|
||||||
|
st2 = files[i].readline();
|
||||||
|
st3 = files[i].readline();
|
||||||
|
if len(st1)==0 or len(st2)==0 or len(st3)==0:
|
||||||
|
done[i] = True;
|
||||||
|
else:
|
||||||
|
mt = r.search(st1);
|
||||||
|
id = int(mt.group(1));
|
||||||
|
ids[i] = id;
|
||||||
|
sents[i] = (st1, st2, st3);
|
||||||
|
cont = True;
|
||||||
|
break;
|
||||||
|
elif ids[i] < sent_id:
|
||||||
|
sys.stderr.write("ERROR! DUPLICATED ENTRY %d\n" % ids[i]);
|
||||||
|
sys.exit();
|
||||||
|
else:
|
||||||
|
cont = True;
|
||||||
|
i+=1;
|
||||||
|
if (not writeOne) and cont:
|
||||||
|
sys.stderr.write("ERROR! MISSING ENTRy %d\n" % sent_id);
|
||||||
|
#sys.exit();
|
||||||
|
sys.stderr.write("Combined %d files, totally %d sents \n" %(len(files),sent_id-1));
|
@ -0,0 +1,93 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from sys import *
|
||||||
|
|
||||||
|
def loadvcb(fname,out):
|
||||||
|
dict={};
|
||||||
|
df = open(fname,"r");
|
||||||
|
for line in df:
|
||||||
|
out.write(line);
|
||||||
|
ws = line.strip().split();
|
||||||
|
id = int(ws[0]);
|
||||||
|
wd = ws[1];
|
||||||
|
dict[wd]=id;
|
||||||
|
return dict;
|
||||||
|
|
||||||
|
if len(argv)<9:
|
||||||
|
stderr.write("Error, the input should be \n");
|
||||||
|
stderr.write("%s evcb fvcb etxt ftxt esnt(out) fsnt(out) evcbx(out) fvcbx(out)\n" % argv[0]);
|
||||||
|
stderr.write("You should concatenate the evcbx and fvcbx to existing vcb files\n");
|
||||||
|
exit();
|
||||||
|
|
||||||
|
ein = open(argv[3],"r");
|
||||||
|
fin = open(argv[4],"r");
|
||||||
|
|
||||||
|
eout = open(argv[5],"w");
|
||||||
|
fout = open(argv[6],"w");
|
||||||
|
|
||||||
|
evcbx = open(argv[7],"w");
|
||||||
|
fvcbx = open(argv[8],"w");
|
||||||
|
evcb = loadvcb(argv[1],evcbx);
|
||||||
|
fvcb = loadvcb(argv[2],fvcbx);
|
||||||
|
|
||||||
|
i=0
|
||||||
|
while True:
|
||||||
|
i+=1;
|
||||||
|
eline=ein.readline();
|
||||||
|
fline=fin.readline();
|
||||||
|
if len(eline)==0 or len(fline)==0:
|
||||||
|
break;
|
||||||
|
ewords = eline.strip().split();
|
||||||
|
fwords = fline.strip().split();
|
||||||
|
el = [];
|
||||||
|
fl = [];
|
||||||
|
j=0;
|
||||||
|
for w in ewords:
|
||||||
|
j+=1
|
||||||
|
if evcb.has_key(w):
|
||||||
|
el.append(evcb[w]);
|
||||||
|
else:
|
||||||
|
if evcb.has_key(w.lower()):
|
||||||
|
el.append(evcb[w.lower()]);
|
||||||
|
else:
|
||||||
|
##stdout.write("#E %d %d %s\n" % (i,j,w))
|
||||||
|
#el.append(1);
|
||||||
|
nid = len(evcb)+1;
|
||||||
|
evcb[w.lower()] = nid;
|
||||||
|
evcbx.write("%d %s 1\n" % (nid, w));
|
||||||
|
el.append(nid);
|
||||||
|
|
||||||
|
j=0;
|
||||||
|
for w in fwords:
|
||||||
|
j+=1
|
||||||
|
if fvcb.has_key(w):
|
||||||
|
fl.append(fvcb[w]);
|
||||||
|
else:
|
||||||
|
if fvcb.has_key(w.lower()):
|
||||||
|
fl.append(fvcb[w.lower()]);
|
||||||
|
else:
|
||||||
|
#stdout.write("#F %d %d %s\n" % (i,j,w))
|
||||||
|
nid = len(fvcb)+1;
|
||||||
|
fvcb[w.lower()] = nid;
|
||||||
|
fvcbx.write("%d %s 1\n" % (nid, w));
|
||||||
|
fl.append(nid);
|
||||||
|
#fl.append(1);
|
||||||
|
eout.write("1\n");
|
||||||
|
fout.write("1\n");
|
||||||
|
for I in el:
|
||||||
|
eout.write("%d " % I);
|
||||||
|
eout.write("\n");
|
||||||
|
for I in fl:
|
||||||
|
eout.write("%d " % I);
|
||||||
|
fout.write("%d " % I);
|
||||||
|
eout.write("\n");
|
||||||
|
fout.write("\n");
|
||||||
|
for I in el:
|
||||||
|
fout.write("%d " % I);
|
||||||
|
fout.write("\n");
|
||||||
|
|
||||||
|
fout.close();
|
||||||
|
eout.close();
|
||||||
|
fvcbx.close();
|
||||||
|
evcbx.close();
|
||||||
|
|
116
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/scripts/sntpostproc.py
Executable file
116
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/scripts/sntpostproc.py
Executable file
@ -0,0 +1,116 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# This script post process the snt file -- either in single-line format or in multi-line format
|
||||||
|
# The output, however, will always be in single-line format
|
||||||
|
|
||||||
|
from sys import *
|
||||||
|
from optparse import OptionParser
|
||||||
|
import re;
|
||||||
|
usage = """
|
||||||
|
The script post process the snt file, the input could be single-line snt
|
||||||
|
file or multi-line, (triple line) and can insert sentence weight to the
|
||||||
|
file (-w) or add partial alignment to the file (-a)
|
||||||
|
Usage %prog -s sntfile -w weight-file -a alignfile -o outputfile
|
||||||
|
"""
|
||||||
|
parser = OptionParser(usage=usage)
|
||||||
|
|
||||||
|
|
||||||
|
parser = OptionParser()
|
||||||
|
|
||||||
|
parser.add_option("-s", "--snt", dest="snt",default=None,
|
||||||
|
help="The input snt file", metavar="FILE")
|
||||||
|
|
||||||
|
parser.add_option("-w", "--weight", dest="weight",default=None,
|
||||||
|
help="The input weight file", metavar="FILE")
|
||||||
|
|
||||||
|
|
||||||
|
parser.add_option("-o", "--output", dest="output",default="-",
|
||||||
|
help="The input partial alignment file, one sentence per line", metavar="FILE")
|
||||||
|
|
||||||
|
parser.add_option("-a", "--align", dest="align",default=None,
|
||||||
|
help="The input partial alignment file, one sentence per line", metavar="FILE")
|
||||||
|
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
|
if options.snt == None:
|
||||||
|
parser.print_help();
|
||||||
|
exit();
|
||||||
|
else:
|
||||||
|
sfile = open(options.snt,"r");
|
||||||
|
|
||||||
|
if options.output=="-":
|
||||||
|
ofile = stdout;
|
||||||
|
else:
|
||||||
|
ofile = open(options.output,"w");
|
||||||
|
|
||||||
|
wfile = None;
|
||||||
|
|
||||||
|
if options.weight <> None:
|
||||||
|
wfile = open(options.weight,"r");
|
||||||
|
|
||||||
|
afile = None;
|
||||||
|
if options.align <> None:
|
||||||
|
afile = open(options.align,"r");
|
||||||
|
|
||||||
|
rr = re.compile("[\\|\\#\\*]");
|
||||||
|
wt = 0.0;
|
||||||
|
al = {};
|
||||||
|
e = "";
|
||||||
|
f = "";
|
||||||
|
|
||||||
|
def parse_ax(line):
|
||||||
|
alq = {};
|
||||||
|
als = line.strip().split(" ");
|
||||||
|
for e in als:
|
||||||
|
if len(e.strip())>0:
|
||||||
|
alo = e.split("-");
|
||||||
|
if len(alo)==2:
|
||||||
|
alq[tuple(alo)] = 1;
|
||||||
|
return alq;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
while True:
|
||||||
|
l = sfile.readline();
|
||||||
|
if len(l) == 0:
|
||||||
|
break;
|
||||||
|
lp = rr.split(l.strip());
|
||||||
|
if len(lp)>=3:
|
||||||
|
wt = float(lp[0]);
|
||||||
|
e = lp[1];
|
||||||
|
f = lp[2];
|
||||||
|
if len(lp) > 3:
|
||||||
|
al = parse_ax(lp[3]);
|
||||||
|
else:
|
||||||
|
al = {};
|
||||||
|
else:
|
||||||
|
wt = float(l);
|
||||||
|
e = sfile.readline().strip();
|
||||||
|
f = sfile.readline().strip();
|
||||||
|
al={}
|
||||||
|
if wfile <> None:
|
||||||
|
lw = wfile.readline().strip();
|
||||||
|
if len(lw)>0:
|
||||||
|
wt = float(lw);
|
||||||
|
else:
|
||||||
|
wt = 1;
|
||||||
|
if afile <> None:
|
||||||
|
la = afile.readline().strip();
|
||||||
|
if len(la)>0:
|
||||||
|
al1 = parse_ax(la);
|
||||||
|
for entry in al1.keys():
|
||||||
|
al[entry] = 1;
|
||||||
|
|
||||||
|
ofile.write("%g | %s | %s" % (wt, e, f));
|
||||||
|
if len(al)>0:
|
||||||
|
ofile.write(" |");
|
||||||
|
|
||||||
|
for entry in al.keys():
|
||||||
|
ofile.write(" %s-%s" % entry);
|
||||||
|
ofile.write("\n");
|
||||||
|
|
||||||
|
|
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
OUTPUT=$1
|
||||||
|
shift
|
||||||
|
GIZA2BAL=$1
|
||||||
|
shift
|
||||||
|
SYMAL=$1
|
||||||
|
shift
|
||||||
|
STOT=$1
|
||||||
|
shift
|
||||||
|
TTOS=$1
|
||||||
|
shift
|
||||||
|
|
||||||
|
perl $GIZA2BAL -d ${STOT} -i ${TTOS} | $SYMAL $* > $OUTPUT
|
||||||
|
|
@ -0,0 +1,17 @@
|
|||||||
|
.libs
|
||||||
|
.deps
|
||||||
|
.*swp
|
||||||
|
.nautilus-metafile.xml
|
||||||
|
*.autosave
|
||||||
|
*.bak
|
||||||
|
*~
|
||||||
|
#*#
|
||||||
|
*.gladep
|
||||||
|
*.la
|
||||||
|
*.lo
|
||||||
|
*.o
|
||||||
|
*.class
|
||||||
|
*.pyc
|
||||||
|
*.plugin
|
||||||
|
Makefile
|
||||||
|
Makefile.in
|
@ -0,0 +1,212 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "ATables.h"
|
||||||
|
#include "Globals.h"
|
||||||
|
#include "myassert.h"
|
||||||
|
#include "Parameter.h"
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(bool,CompactADTable,"compactadtable","1: only 3-dimensional alignment table for IBM-2 and IBM-3",PARLEV_MODELS,1);
|
||||||
|
GLOBAL_PARAMETER(float,amodel_smooth_factor,"model23SmoothFactor","smoothing parameter for IBM-2/3 (interpolation with constant)",PARLEV_SMOOTH,0.0);
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
void amodel<VALTYPE>::printTable(const char *filename) const{
|
||||||
|
// print amodel to file with the name filename (it'll be created or overwritten
|
||||||
|
// format : for a table :
|
||||||
|
// aj j l m val
|
||||||
|
// where aj is source word pos, j target word pos, l source sentence length,
|
||||||
|
// m is target sentence length.
|
||||||
|
//
|
||||||
|
//return;
|
||||||
|
if (is_distortion)
|
||||||
|
cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n';
|
||||||
|
else
|
||||||
|
cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n';
|
||||||
|
|
||||||
|
ofstream of(filename);
|
||||||
|
double ssum=0.0;
|
||||||
|
for(WordIndex l=0; l < MaxSentLength; l++){
|
||||||
|
for(WordIndex m=0;m<MaxSentLength;m++){
|
||||||
|
if( CompactADTable && l!=m )
|
||||||
|
continue;
|
||||||
|
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
||||||
|
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
||||||
|
if( is_distortion==0 ){
|
||||||
|
for(WordIndex j=1;j<=M; j++){
|
||||||
|
double sum=0.0;
|
||||||
|
for(WordIndex i=0;i<=L; i++){
|
||||||
|
VALTYPE x=getValue(i, j, L, M);
|
||||||
|
if( x>PROB_SMOOTH ){
|
||||||
|
of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
|
||||||
|
sum+=x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ssum+=sum;
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
for(WordIndex i=0;i<=L;i++){
|
||||||
|
double sum=0.0;
|
||||||
|
for(WordIndex j=1;j<=M;j++){
|
||||||
|
VALTYPE x=getValue(j, i, L, M);
|
||||||
|
if( x>PROB_SMOOTH ){
|
||||||
|
of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
|
||||||
|
sum+=x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ssum+=sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
void amodel<VALTYPE>::printRealTable(const char *filename) const{
|
||||||
|
// print amodel to file with the name filename (it'll be created or overwritten
|
||||||
|
// format : for a table :
|
||||||
|
// aj j l m val
|
||||||
|
// where aj is source word pos, j target word pos, l source sentence length,
|
||||||
|
// m is target sentence length.
|
||||||
|
//
|
||||||
|
//return;
|
||||||
|
if (is_distortion)
|
||||||
|
cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n';
|
||||||
|
else
|
||||||
|
cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n';
|
||||||
|
|
||||||
|
ofstream of(filename);
|
||||||
|
for(WordIndex l=0; l < MaxSentLength; l++){
|
||||||
|
for(WordIndex m=0;m<MaxSentLength;m++){
|
||||||
|
if( CompactADTable && l!=m )
|
||||||
|
continue;
|
||||||
|
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
||||||
|
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
||||||
|
if( is_distortion==0 ){
|
||||||
|
for(WordIndex j=1;j<=M; j++){
|
||||||
|
for(WordIndex i=0;i<=L; i++){
|
||||||
|
VALTYPE x=getValue(i, j, L, M);
|
||||||
|
if( x>MINCOUNTINCREASE )
|
||||||
|
of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
for(WordIndex i=0;i<=L;i++){
|
||||||
|
for(WordIndex j=1;j<=M;j++){
|
||||||
|
VALTYPE x=getValue(j, i, L, M);
|
||||||
|
if( x>MINCOUNTINCREASE )
|
||||||
|
of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern short NoEmptyWord;
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
bool amodel<VALTYPE>::readTable(const char *filename){
|
||||||
|
/* This function reads the a table from a file.
|
||||||
|
Each line is of the format: aj j l m val
|
||||||
|
where aj is the source word position, j the target word position,
|
||||||
|
l the source sentence length, and m the target sentence length
|
||||||
|
|
||||||
|
This function also works for a d table, where the positions
|
||||||
|
of aj and i are swapped. Both the a and d tables are 4 dimensional
|
||||||
|
hashes; this function will simply read in the four values and keep
|
||||||
|
them in that order when hashing the fifth value.
|
||||||
|
NAS, 7/11/99
|
||||||
|
*/
|
||||||
|
ifstream inf(filename);
|
||||||
|
cout << "Reading a/d table from " << filename << "\n";
|
||||||
|
if(!inf){
|
||||||
|
cerr << "\nERROR: Cannot open " << filename<<"\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
WordIndex w, x, l, m;
|
||||||
|
VALTYPE prob;
|
||||||
|
while(inf >> w >> x >> l >> m >> prob )
|
||||||
|
// the NULL word is added to the length
|
||||||
|
// of the sentence in the tables, but discount it when you write the tables.
|
||||||
|
setValue(w, x, l, m, prob);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
bool amodel<VALTYPE>::readAugTable(const char *filename){
|
||||||
|
/* This function reads the a table from a file.
|
||||||
|
Each line is of the format: aj j l m val
|
||||||
|
where aj is the source word position, j the target word position,
|
||||||
|
l the source sentence length, and m the target sentence length
|
||||||
|
|
||||||
|
This function also works for a d table, where the positions
|
||||||
|
of aj and i are swapped. Both the a and d tables are 4 dimensional
|
||||||
|
hashes; this function will simply read in the four values and keep
|
||||||
|
them in that order when hashing the fifth value.
|
||||||
|
NAS, 7/11/99
|
||||||
|
*/
|
||||||
|
ifstream inf(filename);
|
||||||
|
cout << "Reading a/d table from " << filename << "\n";
|
||||||
|
if(!inf){
|
||||||
|
cerr << "\nERROR: Cannot open " << filename<<"\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
WordIndex w, x, l, m;
|
||||||
|
VALTYPE prob;
|
||||||
|
while(inf >> w >> x >> l >> m >> prob )
|
||||||
|
// the NULL word is added to the length
|
||||||
|
// of the sentence in the tables, but discount it when you write the tables.
|
||||||
|
addValue(w, x, l, m, prob);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am){
|
||||||
|
cout << "start merging " <<"\n";
|
||||||
|
for(WordIndex l=0; l < MaxSentLength; l++){
|
||||||
|
for(WordIndex m=0;m<MaxSentLength;m++){
|
||||||
|
if( CompactADTable && l!=m )
|
||||||
|
continue;
|
||||||
|
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
||||||
|
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
||||||
|
if( is_distortion==0 ){
|
||||||
|
for(WordIndex j=1;j<=M; j++){
|
||||||
|
for(WordIndex i=0;i<=L; i++){
|
||||||
|
VALTYPE x=am.getValue(i, j, L, M);
|
||||||
|
addValue(i,j,L,M,x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
for(WordIndex i=0;i<=L;i++){
|
||||||
|
for(WordIndex j=1;j<=M;j++){
|
||||||
|
VALTYPE x=am.getValue(j, i, L, M);
|
||||||
|
addValue(j,i,L,M,x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template class amodel<COUNT> ;
|
||||||
|
//template class amodel<PROB> ;
|
@ -0,0 +1,191 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/* --------------------------------------------------------------------------*
|
||||||
|
* *
|
||||||
|
* Module :ATables *
|
||||||
|
* *
|
||||||
|
* Prototypes File: ATables.h *
|
||||||
|
* *
|
||||||
|
* Objective: Defines clases and methods for handling I/O for distortion & *
|
||||||
|
* alignment tables. *
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef _atables_h
|
||||||
|
#define _atables_h 1
|
||||||
|
|
||||||
|
#include "defs.h"
|
||||||
|
#include <cassert>
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include "Vector.h"
|
||||||
|
#include <utility>
|
||||||
|
#if __GNUC__>2
|
||||||
|
#include <ext/hash_map>
|
||||||
|
using __gnu_cxx::hash_map;
|
||||||
|
#else
|
||||||
|
#include <hash_map>
|
||||||
|
#endif
|
||||||
|
#include <fstream>
|
||||||
|
#include "Array4.h"
|
||||||
|
#include "myassert.h"
|
||||||
|
#include "Globals.h"
|
||||||
|
#include "syncObj.h"
|
||||||
|
|
||||||
|
extern bool CompactADTable;
|
||||||
|
extern float amodel_smooth_factor;
|
||||||
|
extern short NoEmptyWord;
|
||||||
|
|
||||||
|
/* ------------------- Class Defintions of amodel ---------------------------*/
|
||||||
|
/* Class Name: amodel:
|
||||||
|
Objective: This defines the underlying data structure for distortiont prob.
|
||||||
|
and count tables. They are defined as a hash table. Each entry in the hash
|
||||||
|
table is the probability (d(j/l,m,i), where j is word target position, i is
|
||||||
|
source word position connected to it, m is target sentence length, and l is
|
||||||
|
source sentence length) or count collected for it. The probability and the
|
||||||
|
count are represented as log integer probability as
|
||||||
|
defined by the class LogProb .
|
||||||
|
|
||||||
|
This class is used to represents a Tables (probabiliity) and d (distortion)
|
||||||
|
tables and also their corresponding count tables .
|
||||||
|
|
||||||
|
*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
inline int Mabs(int a){
|
||||||
|
if(a<0)
|
||||||
|
return -a;
|
||||||
|
else
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
class amodel{
|
||||||
|
public:
|
||||||
|
Array4<VALTYPE> a;
|
||||||
|
bool is_distortion ;
|
||||||
|
WordIndex MaxSentLength;
|
||||||
|
bool ignoreL, ignoreM;
|
||||||
|
VALTYPE get(WordIndex aj, WordIndex j, WordIndex l, WordIndex m)const{
|
||||||
|
massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );massert( (!is_distortion) || aj!=0 );
|
||||||
|
massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
|
||||||
|
massert( l<MaxSentLength );massert( m<MaxSentLength );
|
||||||
|
return a.get(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
|
||||||
|
}
|
||||||
|
|
||||||
|
static float smooth_factor;
|
||||||
|
amodel(bool flag = false)
|
||||||
|
: a(MAX_SENTENCE_LENGTH+1,0.0), is_distortion(flag), MaxSentLength(MAX_SENTENCE_LENGTH)
|
||||||
|
{};
|
||||||
|
|
||||||
|
protected:
|
||||||
|
VALTYPE&getRef(WordIndex aj, WordIndex j, WordIndex l, WordIndex m){
|
||||||
|
massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );
|
||||||
|
massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
|
||||||
|
massert( l<MaxSentLength );massert( m<MaxSentLength );
|
||||||
|
return a(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
void setValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
|
||||||
|
lock.lock();
|
||||||
|
getRef(aj, j, l, m)=val;
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
Mutex lock;
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
By Qin
|
||||||
|
*/
|
||||||
|
void addValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
|
||||||
|
lock.lock();
|
||||||
|
getRef(aj, j, l, m)+=val;
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
bool merge(amodel<VALTYPE>& am);
|
||||||
|
VALTYPE getValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m) const{
|
||||||
|
if( is_distortion==0 )
|
||||||
|
return max(double(PROB_SMOOTH),amodel_smooth_factor/(l+1)+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
|
||||||
|
else
|
||||||
|
return max(double(PROB_SMOOTH),amodel_smooth_factor/m+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
|
||||||
|
}
|
||||||
|
|
||||||
|
void printTable(const char* filename)const ;
|
||||||
|
void printRealTable(const char* filename)const ;
|
||||||
|
template<class COUNT>
|
||||||
|
void normalize(amodel<COUNT>& aTable)const
|
||||||
|
{
|
||||||
|
WordIndex i, j, l, m ;
|
||||||
|
COUNT total;
|
||||||
|
int nParam=0;
|
||||||
|
for(l=0;l<MaxSentLength;l++){
|
||||||
|
for(m=0;m<MaxSentLength;m++){
|
||||||
|
if( CompactADTable && l!=m )
|
||||||
|
continue;
|
||||||
|
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
||||||
|
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
||||||
|
if( is_distortion==0 ){
|
||||||
|
for(j=1;j<=M; j++){
|
||||||
|
total=0.0;
|
||||||
|
for(i=0;i<=L;i++){
|
||||||
|
total+=get(i, j, L, M);
|
||||||
|
}
|
||||||
|
if( total ){
|
||||||
|
for(i=0;i<=L;i++){
|
||||||
|
nParam++;
|
||||||
|
aTable.getRef(i, j, L, M)=get(i, j, L, M)/total;
|
||||||
|
massert(aTable.getRef(i,j,L,M)<=1.0);
|
||||||
|
if( NoEmptyWord&&i==0 )
|
||||||
|
aTable.getRef(i,j,L,M)=0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
for(i=0;i<=L;i++){
|
||||||
|
total=0.0;
|
||||||
|
for(j=1;j<=M;j++)
|
||||||
|
total+=get(j, i, L, M);
|
||||||
|
if( total )
|
||||||
|
for(j=1;j<=M;j++){
|
||||||
|
aTable.getRef(j, i, L, M)=amodel_smooth_factor/M+(1.0-amodel_smooth_factor)*get(j, i, L, M)/total;
|
||||||
|
nParam++;
|
||||||
|
massert(aTable.getRef(j,i,L,M)<=1.0);
|
||||||
|
if( NoEmptyWord&&i==0 )
|
||||||
|
aTable.getRef(j,i,L,M)=0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cout << "A/D table contains " << nParam << " parameters.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool readTable(const char *filename);
|
||||||
|
bool readAugTable(const char *filename);
|
||||||
|
void clear()
|
||||||
|
{a.clear();}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ------------------- End of amodel Class Definitions ----------------------*/
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "AlignTables.h"
|
||||||
|
|
||||||
|
bool alignmodel::insert(Vector<WordIndex>& aj, LogProb val)
|
||||||
|
{
|
||||||
|
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator i;
|
||||||
|
i = a.find(aj);
|
||||||
|
if(i != a.end() || val <= 0)
|
||||||
|
return false ;
|
||||||
|
a.insert(pair<const Vector<WordIndex>, LogProb>(aj, val));
|
||||||
|
return true ;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
LogProb alignmodel::getValue(Vector<WordIndex>& align) const
|
||||||
|
{
|
||||||
|
const LogProb zero = 0.0 ;
|
||||||
|
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::const_iterator i;
|
||||||
|
i = a.find(align);
|
||||||
|
if(i == a.end())
|
||||||
|
return zero;
|
||||||
|
else
|
||||||
|
return (*i).second;
|
||||||
|
}
|
@ -0,0 +1,124 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef _aligntables_h
|
||||||
|
#define _aligntables_h 1
|
||||||
|
|
||||||
|
#include "defs.h"
|
||||||
|
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
//#include <vector>
|
||||||
|
#include "Vector.h"
|
||||||
|
#include <utility>
|
||||||
|
#if __GNUC__>2
|
||||||
|
#include <ext/hash_map>
|
||||||
|
using __gnu_cxx::hash_map;
|
||||||
|
#else
|
||||||
|
#include <hash_map>
|
||||||
|
#endif
|
||||||
|
#include <cmath>
|
||||||
|
#include <fstream>
|
||||||
|
#include "transpair_model1.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* ----------------- Class Defintions for hashmyalignment --------------------
|
||||||
|
Objective: This class is used to define a hash mapping function to map
|
||||||
|
an alignment (defined as a vector of integers) into a hash key
|
||||||
|
----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
class hashmyalignment : public unary_function< Vector<WordIndex>, size_t >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
size_t operator() (const Vector<WordIndex>& key) const
|
||||||
|
// to define the mapping function. it takes an alignment (a vector of
|
||||||
|
// integers) and it returns an integer value (hash key).
|
||||||
|
{
|
||||||
|
WordIndex j ;
|
||||||
|
size_t s ;
|
||||||
|
size_t key_sum = 0 ;
|
||||||
|
// logmsg << "For alignment:" ;
|
||||||
|
for (j = 1 ; j < key.size() ; j++){
|
||||||
|
// logmsg << " " << key[j] ;
|
||||||
|
key_sum += (size_t) (int) pow(double(key[j]), double((j % 6)+1));
|
||||||
|
}
|
||||||
|
// logmsg << " , Key value was : " << key_sum;
|
||||||
|
s = key_sum % 1000000 ;
|
||||||
|
// logmsg << " h(k) = " << s << endl ;
|
||||||
|
return(s);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class equal_to_myalignment{
|
||||||
|
// returns true if two alignments are the same (two vectors have same enties)
|
||||||
|
public:
|
||||||
|
bool operator()(const Vector<WordIndex> t1,
|
||||||
|
const Vector<WordIndex> t2) const
|
||||||
|
{WordIndex j ;
|
||||||
|
if (t1.size() != t2.size())
|
||||||
|
return(false);
|
||||||
|
for (j = 1 ; j < t1.size() ; j++)
|
||||||
|
if (t1[j] != t2[j])
|
||||||
|
return(false);
|
||||||
|
return(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---------------- End of Class Defnition for hashmyalignment --------------*/
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------------ Class Defintions for alignmodel -----------------------
|
||||||
|
Class Name: alignmodel
|
||||||
|
Objective: Alignments neighborhhoods (collection of alignments) are stored in
|
||||||
|
a hash table (for easy lookup). Each alignment vector is mapped into a hash
|
||||||
|
key using the operator defined above.
|
||||||
|
*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
class alignmodel{
|
||||||
|
private:
|
||||||
|
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment > a;
|
||||||
|
private:
|
||||||
|
// void erase(Vector<WordIndex>&);
|
||||||
|
public:
|
||||||
|
|
||||||
|
// methods;
|
||||||
|
|
||||||
|
inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator begin(void){return a.begin();} // begining of hash
|
||||||
|
inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator end(void){return a.end();} // end of hash
|
||||||
|
inline const hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >& getHash() const {return a;}; // reference to hash table
|
||||||
|
bool insert(Vector<WordIndex>&, LogProb val=0.0); // add a alignmnet
|
||||||
|
// void setValue(Vector<WordIndex>&, LogProb val); // not needed
|
||||||
|
LogProb getValue(Vector<WordIndex>&)const; // retrieve prob. of alignment
|
||||||
|
inline void clear(void){ a.clear();}; // clear hash table
|
||||||
|
// void printTable(const char* filename);
|
||||||
|
inline void resize(WordIndex n) {a.resize(n);}; // resize table
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/* -------------- End of alignmode Class Definitions ------------------------*/
|
||||||
|
#endif
|
@ -0,0 +1,5 @@
|
|||||||
|
#ifndef GIZA_ARRAY_H_DEFINED
|
||||||
|
#define GIZA_ARRAY_H_DEFINED
|
||||||
|
#include "Vector.h"
|
||||||
|
#define Array Vector
|
||||||
|
#endif
|
@ -0,0 +1,126 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/*--
|
||||||
|
Array2: Implementation of a twodimensional checked array allowing for
|
||||||
|
a specified underlieing one-dimensional data-structure.
|
||||||
|
|
||||||
|
Franz Josef Och (30/07/99)
|
||||||
|
--*/
|
||||||
|
#ifndef CLASS_Array2_DEFINED
|
||||||
|
#define CLASS_Array2_DEFINED
|
||||||
|
|
||||||
|
#include "mystl.h"
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
template<class T, class Y=vector<T> > class Array2 {
|
||||||
|
public:
|
||||||
|
Y p;
|
||||||
|
// short h1, h2;
|
||||||
|
unsigned int h1, h2;
|
||||||
|
public:
|
||||||
|
Array2(unsigned int _h1, unsigned int _h2) :
|
||||||
|
p(_h1*_h2), h1(_h1), h2(_h2) {
|
||||||
|
}
|
||||||
|
Array2(unsigned int _h1, unsigned int _h2, const T&_init) :
|
||||||
|
p(_h1*_h2, _init), h1(_h1), h2(_h2) {
|
||||||
|
}
|
||||||
|
Array2() :
|
||||||
|
h1(0), h2(0) {
|
||||||
|
}
|
||||||
|
inline T &operator()(unsigned int i, unsigned int j) {
|
||||||
|
assert(i<h1);
|
||||||
|
assert(j<h2);
|
||||||
|
return p[i*h2+j];
|
||||||
|
}
|
||||||
|
inline const T&operator()(unsigned int i, unsigned int j) const {
|
||||||
|
assert(i<h1);
|
||||||
|
assert(j<h2);
|
||||||
|
return p[i*h2+j];
|
||||||
|
}
|
||||||
|
inline T get(unsigned int i, unsigned int j) {
|
||||||
|
assert(i<h1);
|
||||||
|
assert(j<h2);
|
||||||
|
return p[i*h2+j];
|
||||||
|
}
|
||||||
|
inline void set(unsigned int i, unsigned int j, T x) {
|
||||||
|
assert(i<h1);
|
||||||
|
assert(j<h2);
|
||||||
|
p[i*h2+j]=x;
|
||||||
|
}
|
||||||
|
inline const T get(unsigned int i, unsigned int j) const {
|
||||||
|
assert(i<h1);
|
||||||
|
assert(j<h2);
|
||||||
|
return p[i*h2+j];
|
||||||
|
}
|
||||||
|
inline unsigned int getLen1() const {
|
||||||
|
return h1;
|
||||||
|
}
|
||||||
|
inline unsigned int getLen2() const {
|
||||||
|
return h2;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline T*begin() {
|
||||||
|
if (h1==0||h2==0)
|
||||||
|
return 0;
|
||||||
|
return &(p[0]);
|
||||||
|
}
|
||||||
|
inline T*end() {
|
||||||
|
if (h1==0||h2==0)
|
||||||
|
return 0;
|
||||||
|
return &(p[0])+p.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const T*begin() const {
|
||||||
|
return p.begin();
|
||||||
|
}
|
||||||
|
inline const T*end() const {
|
||||||
|
return p.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
friend ostream&operator<<(ostream&out, const Array2<T, Y>&ar) {
|
||||||
|
for (unsigned int i=0; i<ar.getLen1(); i++) {
|
||||||
|
//out << i << ": ";
|
||||||
|
for (unsigned int j=0; j<ar.getLen2(); j++)
|
||||||
|
out << ar(i, j) << ' ';
|
||||||
|
out << '\n';
|
||||||
|
}
|
||||||
|
return out << endl;
|
||||||
|
}
|
||||||
|
inline void resize(unsigned int a, unsigned int b) {
|
||||||
|
if ( !(a==h1&&b==h2)) {
|
||||||
|
h1=a;
|
||||||
|
h2=b;
|
||||||
|
p.resize(h1*h2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inline void resize(unsigned int a, unsigned int b, const T&t) {
|
||||||
|
if ( !(a==h1&&b==h2)) {
|
||||||
|
h1=a;
|
||||||
|
h2=b;
|
||||||
|
p.resize(h1*h2);
|
||||||
|
fill(p.begin(), p.end(), t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,78 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef AlignmentArray4_h_DEFINED
|
||||||
|
#define AlignmentArray4_h_DEFINED
|
||||||
|
|
||||||
|
#include "Array2.h"
|
||||||
|
template<class T> class Array4
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
Array2< Array2<T>* > A;
|
||||||
|
int M;
|
||||||
|
T init;
|
||||||
|
public:
|
||||||
|
Array4(int m,const T&_init)
|
||||||
|
: A(m,m,0),M(m),init(_init) {}
|
||||||
|
~Array4()
|
||||||
|
{
|
||||||
|
for(int l=0;l<M;++l)
|
||||||
|
for(int m=0;m<M;++m)
|
||||||
|
delete A(l,m);
|
||||||
|
}
|
||||||
|
const T&operator()(int i, int j, int l, int m)const
|
||||||
|
{
|
||||||
|
if( A(l,m)==0 )
|
||||||
|
return init;
|
||||||
|
else
|
||||||
|
return (*A(l,m))(i,j);
|
||||||
|
}
|
||||||
|
const T&get(int i, int j, int l, int m)const
|
||||||
|
{
|
||||||
|
if( A(l,m)==0 )
|
||||||
|
return init;
|
||||||
|
else
|
||||||
|
return (*A(l,m))(i,j);
|
||||||
|
}
|
||||||
|
T&operator()(int i, int j, int l, int m)
|
||||||
|
{
|
||||||
|
if( A(l,m)==0 )
|
||||||
|
{
|
||||||
|
A(l,m)=new Array2<T>(max(l+1,m+1),max(l+1,m+1),init);
|
||||||
|
}
|
||||||
|
return (*A(l,m))(i,j);
|
||||||
|
}
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
for(int l=0;l<M;++l)
|
||||||
|
for(int m=0;m<M;++m)
|
||||||
|
if( A(l,m) )
|
||||||
|
{
|
||||||
|
Array2<T>&a=*A(l,m);
|
||||||
|
for(int i=0;i<=l;++i)
|
||||||
|
for(int j=0;j<=m;++j)
|
||||||
|
a(i,j)=0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,772 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef _d4tables_h_define
|
||||||
|
#define _d4tables_h_define
|
||||||
|
#include <cmath>
|
||||||
|
#include "WordClasses.h"
|
||||||
|
#include "Globals.h"
|
||||||
|
#include "myassert.h"
|
||||||
|
#include "syncObj.h"
|
||||||
|
extern float d4modelsmooth_factor;
|
||||||
|
|
||||||
|
class m4_key {
|
||||||
|
public:
|
||||||
|
int deps;
|
||||||
|
int l;
|
||||||
|
int m;
|
||||||
|
int F;
|
||||||
|
int E;
|
||||||
|
int prevj;
|
||||||
|
int vacancies1, vacancies2;
|
||||||
|
m4_key(int _deps, int _l, int _m, int _F, int _E, int _prevj, int _v1,
|
||||||
|
int _v2) :
|
||||||
|
deps(_deps), l(_l), m(_m), F(_F), E(_E), prevj(_prevj),
|
||||||
|
vacancies1(_v1), vacancies2(_v2) {
|
||||||
|
}
|
||||||
|
friend ostream&print1(ostream&out, const m4_key&x, const WordClasses&wce,
|
||||||
|
const WordClasses&wcf) {
|
||||||
|
if (x.deps&DEP_MODEL_l)
|
||||||
|
out << "l: " << x.l<<' ';
|
||||||
|
if (x.deps&DEP_MODEL_m)
|
||||||
|
out << "m: " << x.m<<' ';
|
||||||
|
if (x.deps&DEP_MODEL_F)
|
||||||
|
out << "F: " << wcf.classString(x.F)<< ' ';
|
||||||
|
if (x.deps&DEP_MODEL_E)
|
||||||
|
out << "E: " << wce.classString(x.E)<< ' ';
|
||||||
|
// if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
|
||||||
|
if (x.vacancies1!=-1)
|
||||||
|
out << "v1: " << x.vacancies1 << ' ';
|
||||||
|
if (x.vacancies2!=-1)
|
||||||
|
out << "v2: " << x.vacancies2 << ' ';
|
||||||
|
return out << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
friend ostream&print1_m5(ostream&out, const m4_key&x,
|
||||||
|
const WordClasses&wce, const WordClasses&wcf) {
|
||||||
|
out << ((x.deps&DEP_MODEL_E) ? wce.classString(x.E) : string("0"))
|
||||||
|
<< ' ';
|
||||||
|
out << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F) : string("0"))
|
||||||
|
<< ' ';
|
||||||
|
out << x.vacancies1 << ' ';
|
||||||
|
out << x.vacancies2 << ' ';
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend ostream&printb1(ostream&out, const m4_key&x, const WordClasses&wce,
|
||||||
|
const WordClasses&wcf) {
|
||||||
|
if (x.deps&DEP_MODELb_l)
|
||||||
|
out << "l: " << x.l<<' ';
|
||||||
|
if (x.deps&DEP_MODELb_m)
|
||||||
|
out << "m: " << x.m<<' ';
|
||||||
|
if (x.deps&DEP_MODELb_F)
|
||||||
|
out << "F: " << wcf.classString(x.F) << ' ';
|
||||||
|
if (x.deps&DEP_MODELb_E)
|
||||||
|
out << "E: " << wce.classString(x.E) << ' ';
|
||||||
|
if (x.vacancies1!=-1)
|
||||||
|
out << "v1: " << x.vacancies1 << ' ';
|
||||||
|
if (x.vacancies2!=-1)
|
||||||
|
out << "v2: " << x.vacancies2 << ' ';
|
||||||
|
return out << '\n';
|
||||||
|
}
|
||||||
|
friend ostream&printb1_m5(ostream&out, const m4_key&x,
|
||||||
|
const WordClasses&wcf) {
|
||||||
|
out << "-1 " << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F)
|
||||||
|
: string("0"))<< ' ';
|
||||||
|
out << x.vacancies1 << ' ';
|
||||||
|
out << x.vacancies2 << ' ';
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class compare1 {
|
||||||
|
private:
|
||||||
|
int deps;
|
||||||
|
public:
|
||||||
|
compare1(int _deps) :
|
||||||
|
deps(_deps) {
|
||||||
|
}
|
||||||
|
bool operator()(const m4_key&a, const m4_key&b) const {
|
||||||
|
if (deps&DEP_MODEL_l) {
|
||||||
|
if (a.l<b.l)
|
||||||
|
return 1;
|
||||||
|
if (b.l<a.l)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (deps&DEP_MODEL_m) {
|
||||||
|
if (a.m<b.m)
|
||||||
|
return 1;
|
||||||
|
if (b.m<a.m)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (deps&DEP_MODEL_F) {
|
||||||
|
if (a.F<b.F)
|
||||||
|
return 1;
|
||||||
|
if (b.F<a.F)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (deps&DEP_MODEL_E) {
|
||||||
|
if (a.E<b.E)
|
||||||
|
return 1;
|
||||||
|
if (b.E<a.E)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
|
||||||
|
if (a.vacancies1<b.vacancies1)
|
||||||
|
return 1;
|
||||||
|
if (b.vacancies1<a.vacancies1)
|
||||||
|
return 0;
|
||||||
|
if (a.vacancies2<b.vacancies2)
|
||||||
|
return 1;
|
||||||
|
if (b.vacancies2<a.vacancies2)
|
||||||
|
return 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class compareb1 {
|
||||||
|
private:
|
||||||
|
int deps;
|
||||||
|
public:
|
||||||
|
compareb1(int _deps) :
|
||||||
|
deps(_deps) {
|
||||||
|
}
|
||||||
|
bool operator()(const m4_key&a, const m4_key&b) const {
|
||||||
|
if (deps&DEP_MODELb_l) {
|
||||||
|
if (a.l<b.l)
|
||||||
|
return 1;
|
||||||
|
if (b.l<a.l)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (deps&DEP_MODELb_m) {
|
||||||
|
if (a.m<b.m)
|
||||||
|
return 1;
|
||||||
|
if (b.m<a.m)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (deps&DEP_MODELb_F) {
|
||||||
|
if (a.F<b.F)
|
||||||
|
return 1;
|
||||||
|
if (b.F<a.F)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (deps&DEP_MODELb_E) {
|
||||||
|
if (a.E<b.E)
|
||||||
|
return 1;
|
||||||
|
if (b.E<a.E)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
|
||||||
|
if (a.vacancies1<b.vacancies1)
|
||||||
|
return 1;
|
||||||
|
if (b.vacancies1<a.vacancies1)
|
||||||
|
return 0;
|
||||||
|
if (a.vacancies2<b.vacancies2)
|
||||||
|
return 1;
|
||||||
|
if (b.vacancies2<a.vacancies2)
|
||||||
|
return 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline void tokenize(const string&in, Vector<string>&out) {
|
||||||
|
string s;
|
||||||
|
istrstream l(in.c_str());
|
||||||
|
while (l>>s)
|
||||||
|
out.push_back(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
class d4model {
|
||||||
|
public:
|
||||||
|
typedef Vector<pair<COUNT,PROB> > Vpff;
|
||||||
|
map<m4_key,Vpff,compare1 > D1;
|
||||||
|
map<m4_key,Vpff,compareb1> Db1;
|
||||||
|
PositionIndex msl;
|
||||||
|
WordClasses* ewordclasses;
|
||||||
|
WordClasses* fwordclasses;
|
||||||
|
template<class MAPPER> void makeWordClasses(const MAPPER&m1,
|
||||||
|
const MAPPER&m2, string efile, string ffile, const vcbList& elist,
|
||||||
|
const vcbList& flist) {
|
||||||
|
ifstream estrm(efile.c_str()), fstrm(ffile.c_str());
|
||||||
|
if ( !estrm) {
|
||||||
|
cerr << "ERROR: can not read " << efile << endl;
|
||||||
|
} else
|
||||||
|
ewordclasses->read(estrm, m1,elist);
|
||||||
|
if ( !fstrm)
|
||||||
|
cerr << "ERROR: can not read " << ffile << endl;
|
||||||
|
else
|
||||||
|
fwordclasses->read(fstrm, m2,flist);
|
||||||
|
}
|
||||||
|
d4model(PositionIndex _msl, WordClasses& e, WordClasses& f) :
|
||||||
|
D1(compare1(M4_Dependencies)), Db1(compareb1(M4_Dependencies)),
|
||||||
|
msl(_msl),ewordclasses(&e),fwordclasses(&f) {
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
inline COUNT&getCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
|
||||||
|
int m) {
|
||||||
|
assert(j>=1);
|
||||||
|
m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
|
||||||
|
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
||||||
|
if (p==D1.end())
|
||||||
|
p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||||
|
assert(p!=D1.end());
|
||||||
|
return (p->second)[j-j_cp+msl].first;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline COUNT&getCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
|
||||||
|
int l, int m) {
|
||||||
|
assert(j>=1);
|
||||||
|
assert(j_prev>=1);
|
||||||
|
m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
|
||||||
|
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
||||||
|
if (p==Db1.end())
|
||||||
|
p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||||
|
assert(p!=Db1.end());
|
||||||
|
return (p->second)[j-j_prev+msl].first;
|
||||||
|
};
|
||||||
|
Mutex lock_f,lock_b;
|
||||||
|
public:
|
||||||
|
inline void augCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
|
||||||
|
int m, const COUNT& v){
|
||||||
|
lock_f.lock();
|
||||||
|
getCountRef_first(j,j_cp,E,F,l,m)+=v;
|
||||||
|
lock_f.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void augCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
|
||||||
|
int l, int m, const COUNT& v){
|
||||||
|
lock_b.lock();
|
||||||
|
getCountRef_bigger(j,j_prev,E,F,l,m)+=v;
|
||||||
|
lock_b.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void merge(d4model &d) {
|
||||||
|
map<m4_key,Vpff,compare1 >::iterator it;
|
||||||
|
for (it = d.D1.begin(); it!=d.D1.end(); it++) {
|
||||||
|
map<m4_key,Vpff,compare1 >::iterator p=D1.find(it->first);
|
||||||
|
if (p==D1.end())
|
||||||
|
p=D1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||||
|
int i;
|
||||||
|
for (i=0; i<it->second.size(); i++) {
|
||||||
|
p->second[i].second+=it->second[i].second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (it = d.Db1.begin(); it!=d.Db1.end(); it++) {
|
||||||
|
map<m4_key,Vpff,compare1 >::iterator p=Db1.find(it->first);
|
||||||
|
if (p==Db1.end())
|
||||||
|
p=Db1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||||
|
int i;
|
||||||
|
for (i=0; i<it->second.size(); i++) {
|
||||||
|
p->second[i].second+=it->second[i].second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool augCount(const char* fD1, const char* fDb) {
|
||||||
|
ifstream ifsd(fD1);
|
||||||
|
int deps;
|
||||||
|
int l;
|
||||||
|
int m;
|
||||||
|
int F;
|
||||||
|
int E;
|
||||||
|
int prevj;
|
||||||
|
int vacancies1, vacancies2;
|
||||||
|
int len;
|
||||||
|
double count;
|
||||||
|
if (!ifsd) {
|
||||||
|
cerr << "Failed in " << fD1 << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
while (ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1
|
||||||
|
>>vacancies2>>len) {
|
||||||
|
m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
|
||||||
|
vacancies2);
|
||||||
|
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
||||||
|
if (p==D1.end())
|
||||||
|
p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||||
|
assert(p!=D1.end());
|
||||||
|
int i;
|
||||||
|
for (i=0; i<len; i++) {
|
||||||
|
ifsd >> count;
|
||||||
|
p->second[i].first+=count;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ifstream ifsd1(fDb);
|
||||||
|
if (!ifsd1) {
|
||||||
|
cerr << "Failed in " << fDb << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
while (ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1
|
||||||
|
>>vacancies2>>len) {
|
||||||
|
m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
|
||||||
|
vacancies2);
|
||||||
|
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
||||||
|
if (p==Db1.end())
|
||||||
|
p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||||
|
assert(p!=D1.end());
|
||||||
|
int i;
|
||||||
|
for (i=0; i<len; i++) {
|
||||||
|
ifsd1 >> count;
|
||||||
|
p->second[i].first+=count;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool readProbTable(const char* fD1, const char* fDb){
|
||||||
|
ifstream ifsd(fD1);
|
||||||
|
int deps;
|
||||||
|
int l;
|
||||||
|
int m;
|
||||||
|
int F;
|
||||||
|
int E;
|
||||||
|
int prevj;
|
||||||
|
int vacancies1,vacancies2;
|
||||||
|
int len;
|
||||||
|
double count;
|
||||||
|
if(!ifsd){
|
||||||
|
cerr << "Failed in " << fD1 << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
while(ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
|
||||||
|
m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
|
||||||
|
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
||||||
|
if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||||
|
assert(p!=D1.end());
|
||||||
|
int i;
|
||||||
|
for(i=0;i<len;i++){
|
||||||
|
ifsd >> count;
|
||||||
|
p->second[i].second=count;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ifstream ifsd1(fDb);
|
||||||
|
if(!ifsd1){
|
||||||
|
cerr << "Failed in " << fDb << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
while(ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
|
||||||
|
m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
|
||||||
|
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
||||||
|
if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||||
|
assert(p!=D1.end());
|
||||||
|
int i;
|
||||||
|
for(i=0;i<len;i++){
|
||||||
|
ifsd1 >> count;
|
||||||
|
p->second[i].second=count;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool printProbTable(const char* fD1, const char* fDb) {
|
||||||
|
ofstream ofsd(fD1);
|
||||||
|
if (!ofsd.is_open()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
map<m4_key,Vpff,compare1 >::iterator it;
|
||||||
|
for (it = D1.begin(); it!=D1.end(); it++) {
|
||||||
|
ofsd << it->first.deps << " " << it->first.l << " "
|
||||||
|
<< it->first.m << " " << it->first.F << " "
|
||||||
|
<< it->first.E << " " << it->first.prevj << " "
|
||||||
|
<< it->first.vacancies1 << " " << it->first.vacancies2
|
||||||
|
<< " " << it->second.size() << " ";
|
||||||
|
int i;
|
||||||
|
for (i=0; i<it->second.size(); i++) {
|
||||||
|
ofsd << it->second[i].second << " ";
|
||||||
|
}
|
||||||
|
ofsd << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ofstream ofsdb(fDb);
|
||||||
|
if (!ofsdb.is_open()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
map<m4_key,Vpff,compareb1 >::iterator it;
|
||||||
|
for (it = Db1.begin(); it!=Db1.end(); it++) {
|
||||||
|
ofsdb << it->first.deps << " " << it->first.l << " " << it->first.m
|
||||||
|
<< " " << it->first.F << " " << it->first.E << " "
|
||||||
|
<< it->first.prevj << " " << it->first.vacancies1 << " "
|
||||||
|
<< it->first.vacancies2 << " " << it->second.size()<< endl;
|
||||||
|
int i;
|
||||||
|
for (i=0; i<it->second.size(); i++) {
|
||||||
|
ofsdb << it->second[i].second << " ";
|
||||||
|
}
|
||||||
|
ofsdb << endl;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool dumpCount(const char* fD1, const char* fDb){
|
||||||
|
ofstream ofsd(fD1);
|
||||||
|
if(!ofsd.is_open()){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
map<m4_key,Vpff,compare1 >::iterator it;
|
||||||
|
for(it = D1.begin(); it!=D1.end();it++){
|
||||||
|
ofsd << it->first.deps << " "
|
||||||
|
<< it->first.l << " "
|
||||||
|
<< it->first.m << " "
|
||||||
|
<< it->first.F << " "
|
||||||
|
<< it->first.E << " "
|
||||||
|
<< it->first.prevj << " "
|
||||||
|
<< it->first.vacancies1 << " "
|
||||||
|
<< it->first.vacancies2 << " "
|
||||||
|
<< it->second.size() << " ";
|
||||||
|
int i;
|
||||||
|
for(i=0;i<it->second.size();i++){
|
||||||
|
ofsd << it->second[i].first << " ";
|
||||||
|
}
|
||||||
|
ofsd << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ofstream ofsdb(fDb);
|
||||||
|
if(!ofsdb.is_open()){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
map<m4_key,Vpff,compareb1 >::iterator it;
|
||||||
|
for(it = Db1.begin(); it!=Db1.end();it++){
|
||||||
|
ofsdb << it->first.deps << " "
|
||||||
|
<< it->first.l << " "
|
||||||
|
<< it->first.m << " "
|
||||||
|
<< it->first.F << " "
|
||||||
|
<< it->first.E << " "
|
||||||
|
<< it->first.prevj << " "
|
||||||
|
<< it->first.vacancies1 << " "
|
||||||
|
<< it->first.vacancies2 << " "
|
||||||
|
<< it->second.size()<< endl;
|
||||||
|
int i;
|
||||||
|
for(i=0;i<it->second.size();i++){
|
||||||
|
ofsdb << it->second[i].first << " ";
|
||||||
|
}
|
||||||
|
ofsdb << endl;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,
|
||||||
|
int F, int l, int m) const {
|
||||||
|
return D1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
|
||||||
|
}
|
||||||
|
PROB getProb_first_withiterator(WordIndex j, WordIndex j_cp, int m,
|
||||||
|
const map<m4_key,Vpff,compare1 >::const_iterator& p) const {
|
||||||
|
assert(j>=1);
|
||||||
|
//assert(j_cp>=0);
|
||||||
|
assert(j<=msl);
|
||||||
|
assert(j_cp<=msl);
|
||||||
|
if (p==D1.end()) {
|
||||||
|
return PROB_SMOOTH;
|
||||||
|
} else {
|
||||||
|
massert((p->second)[j-j_cp+msl].second<=1.0);
|
||||||
|
return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
|
||||||
|
-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PROB getProb_first(WordIndex j, WordIndex j_cp, int E, int F, int l, int m) const {
|
||||||
|
assert(j>=1);
|
||||||
|
//assert(j_cp>=0);
|
||||||
|
assert(j<=msl);
|
||||||
|
assert(j_cp<=msl);
|
||||||
|
m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
|
||||||
|
map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
|
||||||
|
if (p==D1.end()) {
|
||||||
|
return PROB_SMOOTH;
|
||||||
|
} else {
|
||||||
|
massert((p->second)[j-j_cp+msl].second<=1.0);
|
||||||
|
return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
|
||||||
|
-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,
|
||||||
|
int F, int l, int m) const {
|
||||||
|
return Db1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
|
||||||
|
}
|
||||||
|
PROB getProb_bigger_withiterator(WordIndex j, WordIndex j_prev, int m,
|
||||||
|
const map<m4_key,Vpff,compareb1 >::const_iterator&p) const {
|
||||||
|
massert(j>=1);
|
||||||
|
massert(j_prev>=1);
|
||||||
|
massert(j>j_prev);
|
||||||
|
massert(j<=msl);
|
||||||
|
massert(j_prev<=msl);
|
||||||
|
if (p==Db1.end()) {
|
||||||
|
return PROB_SMOOTH;
|
||||||
|
} else {
|
||||||
|
massert((p->second)[j-j_prev+msl].second<=1.0 );
|
||||||
|
return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
|
||||||
|
-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PROB getProb_bigger(WordIndex j, WordIndex j_prev, int E, int F, int l,
|
||||||
|
int m) const {
|
||||||
|
massert(j>=1);
|
||||||
|
massert(j_prev>=1);
|
||||||
|
massert(j>j_prev);
|
||||||
|
massert(j<=msl);
|
||||||
|
massert(j_prev<=msl);
|
||||||
|
m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
|
||||||
|
map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
|
||||||
|
if (p==Db1.end()) {
|
||||||
|
return PROB_SMOOTH;
|
||||||
|
} else {
|
||||||
|
massert((p->second)[j-j_prev+msl].second<=1.0 );
|
||||||
|
return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
|
||||||
|
-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void normalizeTable() {
|
||||||
|
int nParams=0;
|
||||||
|
for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
|
||||||
|
Vpff&d1=i->second;
|
||||||
|
double sum=0.0;
|
||||||
|
for (PositionIndex i=0; i<d1.size(); i++)
|
||||||
|
sum+=d1[i].first;
|
||||||
|
for (PositionIndex i=0; i<d1.size(); i++) {
|
||||||
|
d1[i].second=sum ? (d1[i].first/sum) : (1.0/d1.size());
|
||||||
|
nParams++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
|
||||||
|
Vpff&db1=i->second;
|
||||||
|
double sum=0.0;
|
||||||
|
for (PositionIndex i=0; i<db1.size(); i++)
|
||||||
|
sum+=db1[i].first;
|
||||||
|
for (PositionIndex i=0; i<db1.size(); i++) {
|
||||||
|
db1[i].second=sum ? (db1[i].first/sum) : (1.0/db1.size());
|
||||||
|
nParams++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cout << "D4 table contains " << nParams << " parameters.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
|
||||||
|
Vpff&d1=i->second;
|
||||||
|
for (PositionIndex i=0; i<d1.size(); i++)
|
||||||
|
d1[i].first=0.0;
|
||||||
|
}
|
||||||
|
for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
|
||||||
|
Vpff&db1=i->second;
|
||||||
|
for (PositionIndex i=0; i<db1.size(); i++)
|
||||||
|
db1[i].first=0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*void printProbTable(const char*fname1,const char*fname2)
|
||||||
|
{
|
||||||
|
ofstream out(fname1);
|
||||||
|
double ssum=0.0;
|
||||||
|
out << "# Translation tables for Model 4 .\n";
|
||||||
|
out << "# Table for head of cept.\n";
|
||||||
|
for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i){
|
||||||
|
const Vpff&d1=i->second;
|
||||||
|
double sum=0.0;
|
||||||
|
for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
|
||||||
|
if ( sum ){
|
||||||
|
print1(out,i->first,ewordclasses,fwordclasses);
|
||||||
|
out << "SUM: " << sum << ' '<< '\n';
|
||||||
|
for(unsigned ii=0;ii<d1.size();ii++)
|
||||||
|
if( d1[ii].first )
|
||||||
|
out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
|
||||||
|
out << endl;
|
||||||
|
}
|
||||||
|
ssum+=sum;
|
||||||
|
}
|
||||||
|
out << "# Table for non-head of cept.\n";
|
||||||
|
for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
|
||||||
|
{
|
||||||
|
const Vpff&db1=i->second;
|
||||||
|
double sum=0.0;
|
||||||
|
for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
|
||||||
|
if( sum ){
|
||||||
|
printb1(out,i->first,ewordclasses,fwordclasses);
|
||||||
|
out << "SUM: " << sum << ' '<<'\n';
|
||||||
|
for(unsigned ii=0;ii<db1.size();ii++)
|
||||||
|
if( db1[ii].first )
|
||||||
|
{
|
||||||
|
out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
|
||||||
|
}
|
||||||
|
out << endl;
|
||||||
|
}
|
||||||
|
ssum+=sum;
|
||||||
|
}
|
||||||
|
out << endl << "FULL-SUM: " << ssum << endl;
|
||||||
|
if( M4_Dependencies==76 ){
|
||||||
|
ofstream out2(fname2);
|
||||||
|
for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
|
||||||
|
{
|
||||||
|
const Vpff&d1=i->second;
|
||||||
|
for(unsigned ii=0;ii<d1.size();ii++)
|
||||||
|
if( d1[ii].first )
|
||||||
|
out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
|
||||||
|
}
|
||||||
|
for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i) {
|
||||||
|
const Vpff&db1=i->second;
|
||||||
|
for(unsigned ii=0;ii<db1.size();ii++)
|
||||||
|
if( db1[ii].first )
|
||||||
|
out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
|
bool readProbTable(const char *fname) {
|
||||||
|
cerr << "Reading D4Tables from " << fname << endl;
|
||||||
|
ifstream file(fname);
|
||||||
|
string line;
|
||||||
|
do {
|
||||||
|
getline(file, line);
|
||||||
|
} while (line.length()&&line[0]=='#');
|
||||||
|
|
||||||
|
do {
|
||||||
|
while (line.length()==0)
|
||||||
|
getline(file, line);
|
||||||
|
if (line[0]=='#')
|
||||||
|
break;
|
||||||
|
Vector<string> linestr;
|
||||||
|
tokenize(line, linestr);
|
||||||
|
m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
|
||||||
|
for (unsigned int i=0; i<linestr.size(); i+=2) {
|
||||||
|
if (linestr[i]=="l:") {
|
||||||
|
k.l=atoi(linestr[i+1].c_str());
|
||||||
|
iassert(M4_Dependencies&DEP_MODEL_l);
|
||||||
|
}
|
||||||
|
if (linestr[i]=="m:") {
|
||||||
|
k.m=atoi(linestr[i+1].c_str());
|
||||||
|
iassert(M4_Dependencies&DEP_MODEL_m);
|
||||||
|
}
|
||||||
|
if (linestr[i]=="F:") {
|
||||||
|
k.F=(*fwordclasses)(linestr[i+1]);
|
||||||
|
iassert(M4_Dependencies&DEP_MODEL_F);
|
||||||
|
}
|
||||||
|
if (linestr[i]=="E:") {
|
||||||
|
k.E=(*ewordclasses)(linestr[i+1]);
|
||||||
|
iassert(M4_Dependencies&DEP_MODEL_E);
|
||||||
|
}
|
||||||
|
//if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
|
||||||
|
}
|
||||||
|
string str;
|
||||||
|
double sum;
|
||||||
|
file >> str >> sum;
|
||||||
|
iassert(str=="SUM:");
|
||||||
|
if (str!="SUM:")
|
||||||
|
cerr << "ERROR: string is " << str << " and not sum " << endl;
|
||||||
|
|
||||||
|
do {
|
||||||
|
int value;
|
||||||
|
double count;
|
||||||
|
getline(file, line);
|
||||||
|
istrstream twonumbers(line.c_str());
|
||||||
|
if (twonumbers >> value >> count) {
|
||||||
|
if (D1.count(k)==0)
|
||||||
|
D1.insert(make_pair(k, Vpff(msl*2+1, pair<COUNT, PROB>(
|
||||||
|
0.0, 0.0))));
|
||||||
|
D1[k][value+msl]=make_pair(count, count/sum);
|
||||||
|
}
|
||||||
|
} while (line.length());
|
||||||
|
} while (file);
|
||||||
|
do {
|
||||||
|
getline(file, line);
|
||||||
|
} while (line.length()&&line[0]=='#');
|
||||||
|
do {
|
||||||
|
while (line.length()==0)
|
||||||
|
getline(file, line);
|
||||||
|
if (line[0]=='#')
|
||||||
|
break;
|
||||||
|
Vector<string> linestr;
|
||||||
|
tokenize(line, linestr);
|
||||||
|
m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
|
||||||
|
bool sumRead=0;
|
||||||
|
for (unsigned int i=0; i<linestr.size(); i+=2) {
|
||||||
|
if (linestr[i]=="l:") {
|
||||||
|
k.l=atoi(linestr[i+1].c_str());
|
||||||
|
iassert(M4_Dependencies&DEP_MODELb_l);
|
||||||
|
} else if (linestr[i]=="m:") {
|
||||||
|
k.m=atoi(linestr[i+1].c_str());
|
||||||
|
iassert(M4_Dependencies&DEP_MODELb_m);
|
||||||
|
} else if (linestr[i]=="F:") {
|
||||||
|
k.F=(*fwordclasses)(linestr[i+1]);
|
||||||
|
iassert(M4_Dependencies&DEP_MODELb_F);
|
||||||
|
} else if (linestr[i]=="E:") {
|
||||||
|
k.E=(*ewordclasses)(linestr[i+1]);
|
||||||
|
iassert(M4_Dependencies&DEP_MODELb_E);
|
||||||
|
} else if (linestr[i]=="SUM:") {
|
||||||
|
cerr << "Warning: obviously no dependency.\n";
|
||||||
|
sumRead=1;
|
||||||
|
} else if (linestr[i]=="FULL-SUM:") {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
cerr << "ERROR: error in reading d4 tables: " << linestr[i]
|
||||||
|
<< ' ' << linestr[i+1] << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
string str;
|
||||||
|
double sum;
|
||||||
|
if (sumRead==0)
|
||||||
|
file >> str >> sum;
|
||||||
|
else {
|
||||||
|
str=linestr[0];
|
||||||
|
sum=atof(linestr[1].c_str());
|
||||||
|
}
|
||||||
|
if (str!="SUM:")
|
||||||
|
cerr << "ERROR: should read SUM but read " << str << endl;
|
||||||
|
do {
|
||||||
|
int value;
|
||||||
|
double count;
|
||||||
|
getline(file, line);
|
||||||
|
istrstream twonumbers(line.c_str());
|
||||||
|
if (twonumbers >> value >> count) {
|
||||||
|
if (Db1.count(k)==0)
|
||||||
|
Db1.insert(make_pair(k, Vpff(msl*2+1,
|
||||||
|
pair<COUNT, PROB>(0.0, 0.0))));
|
||||||
|
Db1[k][value+msl]=make_pair(count, count/sum);
|
||||||
|
}
|
||||||
|
} while (file&&line.length());
|
||||||
|
} while (file);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,233 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef _d5tables_h_define
|
||||||
|
#define _d5tables_h_define
|
||||||
|
#include <cmath>
|
||||||
|
#include "D4Tables.h"
|
||||||
|
|
||||||
|
extern float d5modelsmooth_countoffset;
|
||||||
|
extern float d5modelsmooth_factor;
|
||||||
|
|
||||||
|
#define UNSEENPROB (1.0/vacancies_total)
|
||||||
|
|
||||||
|
class d5model
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
typedef Vector < pair < COUNT,PROB > >Vpff;
|
||||||
|
map< m4_key,Vpff,compare1 > D1;
|
||||||
|
map< m4_key,Vpff,compareb1 > Db1;
|
||||||
|
public:
|
||||||
|
d4model&d4m;
|
||||||
|
WordClasses* ewordclasses;
|
||||||
|
WordClasses* fwordclasses;
|
||||||
|
template<class MAPPER>
|
||||||
|
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile
|
||||||
|
, const vcbList& elist,
|
||||||
|
const vcbList& flist)
|
||||||
|
{
|
||||||
|
ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
|
||||||
|
if( !estrm )
|
||||||
|
cerr << "ERROR: can not read classes from " << efile << endl;
|
||||||
|
else
|
||||||
|
ewordclasses->read(estrm,m1,elist);
|
||||||
|
if( !fstrm )
|
||||||
|
cerr << "ERROR: can not read classes from " << ffile << endl;
|
||||||
|
else
|
||||||
|
fwordclasses->read(fstrm,m2,flist);
|
||||||
|
}
|
||||||
|
d5model (d4model&_d4m)
|
||||||
|
:D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m),
|
||||||
|
ewordclasses(_d4m.ewordclasses),fwordclasses(_d4m.fwordclasses)
|
||||||
|
{}
|
||||||
|
COUNT &getCountRef_first (PositionIndex vacancies_j,
|
||||||
|
PositionIndex vacancies_jp, int F,
|
||||||
|
PositionIndex l, PositionIndex m,
|
||||||
|
PositionIndex vacancies_total)
|
||||||
|
{
|
||||||
|
massert(vacancies_j>0);
|
||||||
|
massert(vacancies_total>0);
|
||||||
|
//massert(vacancies_jp<=vacancies_total);
|
||||||
|
massert(vacancies_j <=vacancies_total);
|
||||||
|
massert(vacancies_total<=m);
|
||||||
|
m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
|
||||||
|
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
||||||
|
if(p==D1.end())
|
||||||
|
p=D1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
|
||||||
|
massert(p!=D1.end());
|
||||||
|
return (p->second)[vacancies_j].first;
|
||||||
|
}
|
||||||
|
COUNT &getCountRef_bigger (PositionIndex vacancies_j,
|
||||||
|
PositionIndex vacancies_jp, int F,
|
||||||
|
PositionIndex l, PositionIndex m,
|
||||||
|
PositionIndex vacancies_total)
|
||||||
|
{
|
||||||
|
massert(vacancies_j>0);
|
||||||
|
massert(vacancies_total>0);
|
||||||
|
massert (vacancies_jp <= vacancies_j);
|
||||||
|
massert (vacancies_j-vacancies_jp <= vacancies_total);
|
||||||
|
m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
|
||||||
|
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
||||||
|
if(p==Db1.end())
|
||||||
|
p=Db1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
|
||||||
|
massert(p!=Db1.end());
|
||||||
|
return (p->second)[vacancies_j - vacancies_jp].first;
|
||||||
|
}
|
||||||
|
PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp,
|
||||||
|
int F, PositionIndex l, PositionIndex m,
|
||||||
|
PositionIndex vacancies_total) const
|
||||||
|
{
|
||||||
|
massert(vacancies_j>0);
|
||||||
|
massert(vacancies_total>0);
|
||||||
|
//massert(vacancies_jp<=vacancies_total);
|
||||||
|
massert(vacancies_j <=vacancies_total);
|
||||||
|
massert(vacancies_total<=m);
|
||||||
|
m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
|
||||||
|
map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
|
||||||
|
if( p==D1.end() )
|
||||||
|
return UNSEENPROB;
|
||||||
|
else
|
||||||
|
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second);
|
||||||
|
}
|
||||||
|
PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp,
|
||||||
|
int F, PositionIndex l, PositionIndex m,
|
||||||
|
PositionIndex vacancies_total) const
|
||||||
|
{
|
||||||
|
massert(vacancies_j>0);
|
||||||
|
massert(vacancies_total>0);
|
||||||
|
massert (vacancies_jp <= vacancies_j);
|
||||||
|
massert (vacancies_j-vacancies_jp <= vacancies_total);
|
||||||
|
m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
|
||||||
|
map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
|
||||||
|
if(p==Db1.end())
|
||||||
|
return UNSEENPROB;
|
||||||
|
else
|
||||||
|
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second);
|
||||||
|
}
|
||||||
|
void normalizeTable ()
|
||||||
|
{
|
||||||
|
int nParams=0;
|
||||||
|
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
|
||||||
|
{
|
||||||
|
Vpff&d1=i->second;
|
||||||
|
COUNT sum=0.0;
|
||||||
|
for(PositionIndex i=0;i<d1.size();i++)
|
||||||
|
sum+=d1[i].first+d5modelsmooth_countoffset;
|
||||||
|
for(PositionIndex i=0;i<d1.size();i++)
|
||||||
|
{
|
||||||
|
d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size());
|
||||||
|
nParams++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
|
||||||
|
{
|
||||||
|
Vpff&db1=i->second;
|
||||||
|
double sum=0.0;
|
||||||
|
for(PositionIndex i=0;i<db1.size();i++)
|
||||||
|
sum+=db1[i].first+d5modelsmooth_countoffset;
|
||||||
|
for(PositionIndex i=0;i<db1.size();i++)
|
||||||
|
{
|
||||||
|
db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size());
|
||||||
|
nParams++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cout << "D5 table contains " << nParams << " parameters.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
friend ostream&operator<<(ostream&out,d5model&d5m) {
|
||||||
|
out << "# Translation tables for Model 5 .\n";
|
||||||
|
out << "# Table for head of cept.\n";
|
||||||
|
for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin();i!=d5m.D1.end();++i){
|
||||||
|
const Vpff&d1=i->second;
|
||||||
|
COUNT sum=0.0;
|
||||||
|
for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
|
||||||
|
if ( sum ) {
|
||||||
|
for(unsigned ii=0;ii<d1.size();ii++)
|
||||||
|
{
|
||||||
|
print1_m5(out,i->first,*d5m.ewordclasses,*d5m.fwordclasses);
|
||||||
|
out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n';
|
||||||
|
}
|
||||||
|
out << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out << "# Table for non-head of cept.\n";
|
||||||
|
for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin();i!=d5m.Db1.end();++i){
|
||||||
|
const Vpff&db1=i->second;
|
||||||
|
double sum=0.0;
|
||||||
|
for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
|
||||||
|
if( sum ){
|
||||||
|
for(unsigned ii=0;ii<db1.size();ii++){
|
||||||
|
printb1_m5(out,i->first,*d5m.fwordclasses);
|
||||||
|
out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n';
|
||||||
|
}
|
||||||
|
out << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
void readProbTable(const char*x)
|
||||||
|
{
|
||||||
|
ifstream f(x);
|
||||||
|
string l;
|
||||||
|
while(getline(f,l))
|
||||||
|
{
|
||||||
|
if(l.length()&&l[0]=='#')
|
||||||
|
continue;
|
||||||
|
istrstream is(l.c_str());
|
||||||
|
string E,F;
|
||||||
|
int v1,v2,ii;
|
||||||
|
double prob,count;
|
||||||
|
if(is>>E>>F>>v1>>v2>>ii>>prob>>count)
|
||||||
|
{
|
||||||
|
//cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl;
|
||||||
|
if( count>0 )
|
||||||
|
if( E=="-1")
|
||||||
|
getCountRef_bigger(ii,0,(*fwordclasses)(F),1000,1000,v2)+=count;
|
||||||
|
else
|
||||||
|
getCountRef_first(ii,v1,(*fwordclasses)(F),1000,1000,v2)+=count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
normalizeTable();
|
||||||
|
//ofstream of("M5FILE");
|
||||||
|
//of << (*this);
|
||||||
|
}
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
|
||||||
|
{
|
||||||
|
Vpff&d1=i->second;
|
||||||
|
for(PositionIndex i=0;i<d1.size();i++)
|
||||||
|
d1[i].first=0.0;
|
||||||
|
}
|
||||||
|
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
|
||||||
|
{
|
||||||
|
Vpff&db1=i->second;
|
||||||
|
for(PositionIndex i=0;i<db1.size();i++)
|
||||||
|
db1[i].first=0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,93 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/* Noah A. Smith
|
||||||
|
Dictionary object for dictionary filter in Model 1 training
|
||||||
|
|
||||||
|
Dictionary file must be in order (sorted) by Foreign vocab id, but English
|
||||||
|
vocab ids may be in any order.
|
||||||
|
|
||||||
|
9 August 1999
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "Dictionary.h"
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
Dictionary::Dictionary(const char *filename){
|
||||||
|
if(!strcmp(filename, "")){
|
||||||
|
dead = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
dead = false;
|
||||||
|
cout << "Reading dictionary from: " << filename << '\n';
|
||||||
|
ifstream dFile(filename);
|
||||||
|
if(!dFile){
|
||||||
|
cerr << "ERROR: Can't open dictionary: " << filename << '\n';
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
currindexmin = 0;
|
||||||
|
currindexmax = 0;
|
||||||
|
currval = 0;
|
||||||
|
int p, q;
|
||||||
|
while((dFile >> p >> q)){
|
||||||
|
pairs[0].push_back(p);
|
||||||
|
pairs[1].push_back(q);
|
||||||
|
}
|
||||||
|
cout << "Dictionary read; " << pairs[0].size() << " pairs loaded." << '\n';
|
||||||
|
dFile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Dictionary::indict(int p, int q){
|
||||||
|
if(dead) return false;
|
||||||
|
if(p == 0 && q == 0) return false;
|
||||||
|
if(currval == p){
|
||||||
|
for(int i = currindexmin; i <= currindexmax; i++)
|
||||||
|
if(pairs[1][i] == q) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
int begin = 0, end = pairs[0].size() - 1, middle = 0;
|
||||||
|
unsigned int t;
|
||||||
|
bool ret = false;
|
||||||
|
while(begin <= end){
|
||||||
|
middle = begin + ((end - begin) >> 1);
|
||||||
|
if(p < pairs[0][middle]) end = middle - 1;
|
||||||
|
else if(p > pairs[0][middle]) begin = middle + 1;
|
||||||
|
else{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t = middle;
|
||||||
|
while(pairs[0][t] == p )
|
||||||
|
if(pairs[1][t--] == q) ret = true;
|
||||||
|
currindexmin = t + 1;
|
||||||
|
t = middle + 1;
|
||||||
|
while(pairs[0][t] == p && t < pairs[0].size())
|
||||||
|
if(pairs[1][t++] == q) ret = true;
|
||||||
|
currindexmax = t - 1;
|
||||||
|
currval = p;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,48 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/* Noah A. Smith
|
||||||
|
Dictionary object for dictionary filter in Model 1 training
|
||||||
|
|
||||||
|
9 August 1999
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "Vector.h"
|
||||||
|
|
||||||
|
#ifndef DICTIONARY_H
|
||||||
|
#define DICTIONARY_H
|
||||||
|
|
||||||
|
class Dictionary{
|
||||||
|
private:
|
||||||
|
Vector<int> pairs[2];
|
||||||
|
int currval;
|
||||||
|
int currindexmin;
|
||||||
|
int currindexmax;
|
||||||
|
bool dead;
|
||||||
|
public:
|
||||||
|
Dictionary(const char *);
|
||||||
|
bool indict(int, int);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1988,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef CLASS_FlexArray_defined
|
||||||
|
#define CLASS_FlexArray_defined
|
||||||
|
#include "Array.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
template<class T>
|
||||||
|
class FlexArray
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
Array<T> p;
|
||||||
|
int start,End;
|
||||||
|
public:
|
||||||
|
FlexArray(int _start=0,int _end=-1)
|
||||||
|
: p(_end-_start+1),start(_start),End(_end) {}
|
||||||
|
FlexArray(int _start,int _end,const T&init)
|
||||||
|
: p(_end-_start+1,init),start(_start),End(_end) {}
|
||||||
|
T&operator[](int i)
|
||||||
|
{return p[i-start];}
|
||||||
|
const T&operator[](int i)const
|
||||||
|
{return p[i-start];}
|
||||||
|
int low()const{return start;}
|
||||||
|
int high()const{return End;}
|
||||||
|
T*begin(){return conv<double>(p.begin());}
|
||||||
|
T*end(){return conv<double>(p.end());}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
inline ostream&operator<<(ostream&out,const FlexArray<T>&x)
|
||||||
|
{
|
||||||
|
for(int i=x.low();i<=x.high();++i)
|
||||||
|
out << i << ':' << x[i] << ';' << ' ';
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,240 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef NO_TRAINING
|
||||||
|
#include "ForwardBackward.h"
|
||||||
|
#include "Globals.h"
|
||||||
|
#include "myassert.h"
|
||||||
|
#include "HMMTables.h"
|
||||||
|
#include "mymath.h"
|
||||||
|
|
||||||
|
double ForwardBackwardTraining(const HMMNetwork&net, Array<double>&g, Array<
|
||||||
|
Array2<double> >&E) {
|
||||||
|
const int I = net.size1(), J = net.size2(), N = I * J;
|
||||||
|
Array<double> alpha(N, 0), beta(N, 0), sum(J);
|
||||||
|
for (int i = 0; i < I; i++)
|
||||||
|
beta[N - I + i] = net.getBetainit(i);
|
||||||
|
double * cur_beta = conv<double> (beta.begin()) + N - I - 1;
|
||||||
|
for (int j = J - 2; j >= 0; --j)
|
||||||
|
for (int ti = I - 1; ti >= 0; --ti, --cur_beta) {
|
||||||
|
const double *next_beta = conv<double> (beta.begin()) + (j + 1) * I;
|
||||||
|
const double *alprob = &net.outProb(j, ti, 0), *next_node =
|
||||||
|
&net.nodeProb(0, j + 1);
|
||||||
|
for (int ni = 0; ni < I; ++ni, (next_node += J)) {
|
||||||
|
massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
|
||||||
|
massert(next_node == &net.nodeProb(ni,j+1));
|
||||||
|
/* if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
|
||||||
|
cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
|
||||||
|
<< next_beta-beta.begin() << ") alprob:" << (*alprob) << " lexprob:" << (*next_node) << endl;*/
|
||||||
|
(*cur_beta) += (*next_beta++) * (*alprob++) * (*next_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < I; i++)
|
||||||
|
alpha[i] = net.getAlphainit(i) * net.nodeProb(i, 0);
|
||||||
|
double* cur_alpha = conv<double> (alpha.begin()) + I;
|
||||||
|
cur_beta = conv<double> (beta.begin()) + I;
|
||||||
|
for (int j = 1; j < J; j++) {
|
||||||
|
Array2<double>&e = E[(E.size() == 1) ? 0 : (j - 1)];
|
||||||
|
if ((E.size() != 1) || j == 1) {
|
||||||
|
e.resize(I, I);
|
||||||
|
fill(e.begin(), e.end(), 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_beta) {
|
||||||
|
const double * prev_alpha = conv<double> (alpha.begin()) + I * (j
|
||||||
|
- 1);
|
||||||
|
double *cur_e = &e(ti, 0);
|
||||||
|
double this_node = net.nodeProb(ti, j);
|
||||||
|
const double* alprob = &net.outProb(j - 1, 0, ti);
|
||||||
|
for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) {
|
||||||
|
massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
|
||||||
|
massert(&e(ti,pi)==cur_e);
|
||||||
|
const double alpha_increment = *prev_alpha * (*alprob)
|
||||||
|
* this_node;
|
||||||
|
(*cur_alpha) += alpha_increment;
|
||||||
|
(*cur_e++) += alpha_increment * (*cur_beta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
g.resize(N);
|
||||||
|
transform(alpha.begin(), alpha.end(), beta.begin(), g.begin(), multiplies<
|
||||||
|
double> ());
|
||||||
|
double bsum = 0, esum = 0, esum2;
|
||||||
|
for (int i = 0; i < I; i++)
|
||||||
|
bsum += beta[i] * net.nodeProb(i, 0) * net.getAlphainit(i);
|
||||||
|
for (unsigned int j = 0; j < (unsigned int) E.size(); j++) {
|
||||||
|
Array2<double>&e = E[j];
|
||||||
|
const double *epe = e.end();
|
||||||
|
for (const double*ep = e.begin(); ep != epe; ++ep)
|
||||||
|
esum += *ep;
|
||||||
|
}
|
||||||
|
if (J > 1)
|
||||||
|
esum2 = esum / (J - 1);
|
||||||
|
else
|
||||||
|
esum2 = 0.0;
|
||||||
|
if (!(esum2 == 0.0 || mfabs(esum2 - bsum) / bsum < 1e-3 * I))
|
||||||
|
cout << "ERROR2: " << esum2 << " " << bsum << " " << esum << net
|
||||||
|
<< endl;
|
||||||
|
double * sumptr = conv<double> (sum.begin());
|
||||||
|
double* ge = conv<double> (g.end());
|
||||||
|
for (double* gp = conv<double> (g.begin()); gp != ge; gp += I) {
|
||||||
|
*sumptr++ = normalize_if_possible(gp, gp + I);
|
||||||
|
if (bsum && !(mfabs((*(sumptr - 1) - bsum) / bsum) < 1e-3 * I))
|
||||||
|
cout << "ERROR: " << *(sumptr - 1) << " " << bsum << " " << mfabs(
|
||||||
|
(*(sumptr - 1) - bsum) / bsum) << ' ' << I << ' ' << J
|
||||||
|
<< endl;
|
||||||
|
}
|
||||||
|
for (unsigned int j = 0; j < (unsigned int) E.size(); j++) {
|
||||||
|
Array2<double>&e = E[j];
|
||||||
|
double* epe = e.end();
|
||||||
|
if (esum)
|
||||||
|
for (double*ep = e.begin(); ep != epe; ++ep)
|
||||||
|
*ep /= esum;
|
||||||
|
else
|
||||||
|
for (double*ep = e.begin(); ep != epe; ++ep)
|
||||||
|
*ep /= 1.0 / (max(I * I, I * I * (J - 1)));
|
||||||
|
}
|
||||||
|
if (sum.size())
|
||||||
|
return sum[0];
|
||||||
|
else
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
void HMMViterbi(const HMMNetwork&net, Array<int>&vit) {
|
||||||
|
const int I = net.size1(), J = net.size2();
|
||||||
|
vit.resize(J);
|
||||||
|
Array<double> g;
|
||||||
|
Array<Array2<double> > e(1);
|
||||||
|
ForwardBackwardTraining(net, g, e);
|
||||||
|
for (int j = 0; j < J; j++) {
|
||||||
|
double * begin = conv<double> (g.begin()) + I * j;
|
||||||
|
vit[j] = max_element(begin, begin + I) - begin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void HMMViterbi(const HMMNetwork&net, Array<double>&g, Array<int>&vit) {
|
||||||
|
const int I = net.size1(), J = net.size2();
|
||||||
|
vit.resize(J);
|
||||||
|
for (int j = 0; j < J; j++) {
|
||||||
|
double* begin = conv<double> (g.begin()) + I * j;
|
||||||
|
vit[j] = max_element(begin, begin + I) - begin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double HMMRealViterbi(const HMMNetwork&net, Array<int>&vitar, int pegi,
|
||||||
|
int pegj, bool verbose) {
|
||||||
|
const int I = net.size1(), J = net.size2(), N = I * J;
|
||||||
|
Array<double> alpha(N, -1);
|
||||||
|
Array<double*> bp(N, (double*) 0);
|
||||||
|
vitar.resize(J);
|
||||||
|
if (J == 0)
|
||||||
|
return 1.0;
|
||||||
|
for (int i = 0; i < I; i++) {
|
||||||
|
alpha[i] = net.getAlphainit(i) * net.nodeProb(i, 0);
|
||||||
|
if (i > I / 2)
|
||||||
|
alpha[i] = 0; // only first empty word can be chosen
|
||||||
|
bp[i] = 0;
|
||||||
|
}
|
||||||
|
double *cur_alpha = conv<double> (alpha.begin()) + I;
|
||||||
|
double **cur_bp = conv<double*> (bp.begin()) + I;
|
||||||
|
for (int j = 1; j < J; j++) {
|
||||||
|
if (pegj + 1 == j)
|
||||||
|
for (int ti = 0; ti < I; ti++)
|
||||||
|
if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2))
|
||||||
|
(cur_alpha - I)[ti] = 0.0;
|
||||||
|
for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_bp) {
|
||||||
|
double* prev_alpha = conv<double> (alpha.begin()) + I * (j - 1);
|
||||||
|
double this_node = net.nodeProb(ti, j);
|
||||||
|
const double *alprob = &net.outProb(j - 1, 0, ti);
|
||||||
|
for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) {
|
||||||
|
massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
|
||||||
|
const double alpha_increment = *prev_alpha * (*alprob)
|
||||||
|
* this_node;
|
||||||
|
if (alpha_increment > *cur_alpha) {
|
||||||
|
(*cur_alpha) = alpha_increment;
|
||||||
|
(*cur_bp) = prev_alpha;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < I; i++)
|
||||||
|
alpha[N - I + i] *= net.getBetainit(i);
|
||||||
|
if (pegj == J - 1)
|
||||||
|
for (int ti = 0; ti < I; ti++)
|
||||||
|
if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2))
|
||||||
|
(alpha)[N - I + ti] = 0.0;
|
||||||
|
|
||||||
|
int j = J - 1;
|
||||||
|
cur_alpha = conv<double> (alpha.begin()) + j * I;
|
||||||
|
vitar[J - 1] = max_element(cur_alpha, cur_alpha + I) - cur_alpha;
|
||||||
|
double ret = *max_element(cur_alpha, cur_alpha + I);
|
||||||
|
while (bp[vitar[j] + j * I]) {
|
||||||
|
cur_alpha -= I;
|
||||||
|
vitar[j - 1] = bp[vitar[j] + j * I] - cur_alpha;
|
||||||
|
massert(vitar[j-1]<I&&vitar[j-1]>=0);
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
massert(j==0);
|
||||||
|
if (verbose) {
|
||||||
|
cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
|
||||||
|
for (int j = 0; j < J; j++)
|
||||||
|
cout << "NP " << net.nodeProb(vitar[j], j) << ' ' << "AP " << ((j
|
||||||
|
== 0) ? net.getAlphainit(vitar[j]) : net.outProb(j - 1,
|
||||||
|
vitar[j - 1], vitar[j])) << " j:" << j << " i:" << vitar[j]
|
||||||
|
<< "; ";
|
||||||
|
cout << endl;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
double MaximumTraining(const HMMNetwork&net, Array<double>&g, Array<Array2<
|
||||||
|
double> >&E) {
|
||||||
|
Array<int> vitar;
|
||||||
|
double ret = HMMRealViterbi(net, vitar);
|
||||||
|
const int I = net.size1(), J = net.size2();
|
||||||
|
if (E.size() == 1) {
|
||||||
|
Array2<double>&e = E[0];
|
||||||
|
e.resize(I, I);
|
||||||
|
g.resize(I * J);
|
||||||
|
fill(g.begin(), g.end(), 0.0);
|
||||||
|
fill(e.begin(), e.end(), 0.0);
|
||||||
|
for (int i = 0; i < J; ++i) {
|
||||||
|
g[i * I + vitar[i]] = 1.0;
|
||||||
|
if (i > 0)
|
||||||
|
e(vitar[i], vitar[i - 1])++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
g.resize(I * J);
|
||||||
|
fill(g.begin(), g.end(), 0.0);
|
||||||
|
for (int i = 0; i < J; ++i) {
|
||||||
|
g[i * I + vitar[i]] = 1.0;
|
||||||
|
if (i > 0) {
|
||||||
|
Array2<double>&e = E[i - 1];
|
||||||
|
e.resize(I, I);
|
||||||
|
fill(e.begin(), e.end(), 0.0);
|
||||||
|
e(vitar[i], vitar[i - 1])++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef NO_EM_MARKOF_ZEUGS_DEFINED
|
||||||
|
#define NO_EM_MARKOF_ZEUGS_DEFINED
|
||||||
|
#ifndef NO_TRAINING
|
||||||
|
#include "myassert.h"
|
||||||
|
#include "Array.h"
|
||||||
|
#include "Array2.h"
|
||||||
|
|
||||||
|
class HMMNetwork
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
int as,bs;
|
||||||
|
Array2<double> n;
|
||||||
|
Array<Array2<double> > e;
|
||||||
|
Array<double> alphainit;
|
||||||
|
Array<double> betainit;
|
||||||
|
int ab;
|
||||||
|
double finalMultiply;
|
||||||
|
HMMNetwork(int I,int J)
|
||||||
|
: as(I),bs(J),n(as,bs),/*e(as,as,0.0),*/e(0),alphainit(as,1.0/as),betainit(as,1.0),ab(as*bs),finalMultiply(1.0)
|
||||||
|
{}
|
||||||
|
double getAlphainit(int i)const{return alphainit[i];}
|
||||||
|
double getBetainit(int i)const{return betainit[i];}
|
||||||
|
inline int size1()const{return as;}
|
||||||
|
inline int size2()const{return bs;}
|
||||||
|
inline const double&nodeProb(int i,int j)const
|
||||||
|
{return n(i,j);}
|
||||||
|
inline const double&outProb(int j,int i1,int i2)const
|
||||||
|
{/*massert(e[min(int(e.size())-1,j)](i1,i2) );*/ return e[min(int(e.size())-1,j)](i1,i2);}
|
||||||
|
friend ostream&operator<<(ostream&out,const HMMNetwork&x)
|
||||||
|
{
|
||||||
|
return out <<"N: \n"<< x.n << endl << "E: \n" << x.e << "A:\n" << x.alphainit << "B:\n" << x.betainit << endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
double ForwardBackwardTraining(const HMMNetwork&mc,Array<double>&gamma,Array<Array2<double> >&epsilon);
|
||||||
|
void HMMViterbi(const HMMNetwork&mc,Array<int>&vit);
|
||||||
|
double HMMRealViterbi(const HMMNetwork&net,Array<int>&vit,int pegi=-1,int pegj=-1,bool verbose=0);
|
||||||
|
double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&e);
|
||||||
|
void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit);
|
||||||
|
#endif
|
||||||
|
#endif
|
@ -0,0 +1,75 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef Globals_asdf_defined
|
||||||
|
#define Globals_asdf_defined
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
#include <map>
|
||||||
|
#include <syncObj.h>
|
||||||
|
#include "defs.h"
|
||||||
|
#include "Vector.h"
|
||||||
|
|
||||||
|
extern float PROB_SMOOTH,MINCOUNTINCREASE;
|
||||||
|
extern bool Verbose, Log, Peg, Transfer, Transfer2to3, useDict ;
|
||||||
|
extern string Prefix, LogFilename, OPath,
|
||||||
|
SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename,
|
||||||
|
t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
|
||||||
|
extern ofstream logmsg ;
|
||||||
|
extern Mutex logmsg_lock;
|
||||||
|
extern double M5P0,P0 ;
|
||||||
|
extern bool NODUMPS, FEWDUMPS ;
|
||||||
|
extern string Usage ;
|
||||||
|
extern unsigned int MAX_SENTENCE_LENGTH ;
|
||||||
|
extern int PegUntil;
|
||||||
|
|
||||||
|
extern short DeficientDistortionForEmptyWord;
|
||||||
|
|
||||||
|
extern int M4_Dependencies;
|
||||||
|
extern int M5_Dependencies;
|
||||||
|
|
||||||
|
extern short OutputInAachenFormat;
|
||||||
|
|
||||||
|
#define DEP_MODEL_l 1
|
||||||
|
#define DEP_MODEL_m 2
|
||||||
|
#define DEP_MODEL_F 4
|
||||||
|
#define DEP_MODEL_E 8
|
||||||
|
|
||||||
|
#define DEP_MODELb_l 16
|
||||||
|
#define DEP_MODELb_m 32
|
||||||
|
#define DEP_MODELb_F 64
|
||||||
|
#define DEP_MODELb_E 128
|
||||||
|
|
||||||
|
#define DEP_SUM 256
|
||||||
|
|
||||||
|
class vcbList;
|
||||||
|
|
||||||
|
extern vcbList *globeTrainVcbList, *globfTrainVcbList;
|
||||||
|
|
||||||
|
extern short PredictionInAlignments;
|
||||||
|
extern short SmoothHMM;
|
||||||
|
#define VERB Verbose
|
||||||
|
|
||||||
|
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,const Vector<WordIndex>&test,int l,int&missing,int&toomuch,int&eventsMissing,int&eventsToomuch,int);
|
||||||
|
extern Vector<map< pair<int,int>,char > > ReferenceAlignment;
|
||||||
|
void printGIZAPars(ostream&out);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,512 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "HMMTables.h"
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include "Globals.h"
|
||||||
|
#include "Parameter.h"
|
||||||
|
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::writeJumps(ostream&out) const {
|
||||||
|
double ssum=0.0;
|
||||||
|
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||||
|
alProb.begin(); i!=alProb.end(); ++i) {
|
||||||
|
double sum=0.0;
|
||||||
|
out << "\n\nDistribution for: ";
|
||||||
|
printAlDeps(out, i->first, *mapper1, *mapper2);
|
||||||
|
out << ' ';
|
||||||
|
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||||
|
if (i->second[a]) {
|
||||||
|
out << a << ':' << i->second[a] << ';' << ' ';
|
||||||
|
sum+=i->second[a];
|
||||||
|
}
|
||||||
|
out << '\n' << '\n';
|
||||||
|
out << "SUM: " << sum << '\n';
|
||||||
|
ssum+=sum;
|
||||||
|
}
|
||||||
|
out << "FULL-SUM: " << ssum << '\n';
|
||||||
|
}
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::readJumps(istream&) {
|
||||||
|
}
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> double HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::getAlProb(int istrich, int k, int sentLength,
|
||||||
|
int J, CLS w1, CLS w2, int j, int iter) const {
|
||||||
|
massert(k<sentLength&&k>=0);
|
||||||
|
massert(istrich<sentLength&&istrich>=-1);
|
||||||
|
int pos=istrich-k;
|
||||||
|
switch (PredictionInAlignments) {
|
||||||
|
case 0:
|
||||||
|
pos=istrich-k;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
pos=k;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
pos=(k*J-j*sentLength);
|
||||||
|
if (pos>0)
|
||||||
|
pos+=J/2;
|
||||||
|
else
|
||||||
|
pos-=J/2;
|
||||||
|
pos/=J;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
lock.lock();
|
||||||
|
typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator p=
|
||||||
|
alProb.find(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
|
||||||
|
if (p!=alProb.end() ) {
|
||||||
|
lock.unlock();
|
||||||
|
return (p->second)[pos];
|
||||||
|
} else {
|
||||||
|
if (iter>0&&iter<5000)
|
||||||
|
cout << "WARNING: Not found: " << ' ' << J << ' ' << sentLength
|
||||||
|
<< '\n';;
|
||||||
|
lock.unlock();
|
||||||
|
return 1.0/(2*sentLength-1);
|
||||||
|
}
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::addAlCount(int istrich, int k, int sentLength,
|
||||||
|
int J, CLS w1, CLS w2, int j, double value, double valuePredicted) {
|
||||||
|
int pos=istrich-k;
|
||||||
|
switch (PredictionInAlignments) {
|
||||||
|
case 0:
|
||||||
|
pos=istrich-k;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
pos=k;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
pos=(k*J-j*sentLength);
|
||||||
|
if (pos>0)
|
||||||
|
pos+=J/2;
|
||||||
|
else
|
||||||
|
pos-=J/2;
|
||||||
|
pos/=J;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
AlDeps<CLS> deps(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
|
||||||
|
|
||||||
|
{
|
||||||
|
lock.lock();
|
||||||
|
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||||
|
alProb.find(deps);
|
||||||
|
if (p==alProb.end() ) {
|
||||||
|
if ( (CompareAlDeps&1)==0)
|
||||||
|
p
|
||||||
|
=alProb.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
|
||||||
|
else
|
||||||
|
p=alProb.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
|
||||||
|
}
|
||||||
|
p->second[pos]+=value;
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (valuePredicted) {
|
||||||
|
lock.lock();
|
||||||
|
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||||
|
alProbPredicted.find(deps);
|
||||||
|
if (p==alProbPredicted.end() ) {
|
||||||
|
if ( (CompareAlDeps&1)==0)
|
||||||
|
p
|
||||||
|
=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
|
||||||
|
else
|
||||||
|
p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
|
||||||
|
}
|
||||||
|
p->second[pos]+=valuePredicted;
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING>
|
||||||
|
pair<Array<double>,Mutex>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetAlphaInit(int I)
|
||||||
|
{
|
||||||
|
alphalock.lock();
|
||||||
|
if( !init_alpha.count(I) ){
|
||||||
|
init_alpha[I]=pair<Array<double>,Mutex>(Array<double>(I,0),Mutex());
|
||||||
|
}
|
||||||
|
pair<Array<double>,Mutex>& ret = init_alpha[I];
|
||||||
|
alphalock.unlock();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING>
|
||||||
|
pair<Array<double>,Mutex>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetBetaInit(int I)
|
||||||
|
{
|
||||||
|
betalock.lock();
|
||||||
|
if( !init_beta.count(I) ){
|
||||||
|
init_beta[I]=pair<Array<double>,Mutex>(Array<double>(I,0),Mutex());
|
||||||
|
}
|
||||||
|
pair<Array<double>,Mutex>& ret = init_beta[I];
|
||||||
|
betalock.unlock();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::getAlphaInit(int I, Array<double>&x) const {
|
||||||
|
alphalock.lock();
|
||||||
|
hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=init_alpha.find(I);
|
||||||
|
if (i==init_alpha.end() ){
|
||||||
|
alphalock.unlock();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
x=i->second.first;
|
||||||
|
alphalock.unlock();
|
||||||
|
for (unsigned int j=x.size()/2+1; j<x.size(); ++j)
|
||||||
|
// only first empty word can be chosen
|
||||||
|
x[j]=0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
alphalock.unlock();
|
||||||
|
}
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::getBetaInit(int I, Array<double>&x) const {
|
||||||
|
betalock.lock();
|
||||||
|
hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=init_beta.find(I);
|
||||||
|
if (i==init_beta.end() ){
|
||||||
|
betalock.unlock();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
x=i->second.first;
|
||||||
|
betalock.unlock();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
betalock.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/***********************************
|
||||||
|
By Edward Gao
|
||||||
|
************************************/
|
||||||
|
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::writeJumps(const char* alprob,
|
||||||
|
const char* alpredict, const char* alpha, const char* beta) const {
|
||||||
|
if (alprob) {
|
||||||
|
ofstream ofs(alprob);
|
||||||
|
if (!ofs.is_open()) {
|
||||||
|
cerr << "Cannot open file for HMM output " << alprob << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cerr << "Dumping HMM table to " << alprob << endl;
|
||||||
|
|
||||||
|
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||||
|
alProb.begin(); i!=alProb.end(); ++i) {
|
||||||
|
double sum=0.0;
|
||||||
|
ofs <<i->first.englishSentenceLength << " "
|
||||||
|
<< i->first.classPrevious << " " << i->first.previous
|
||||||
|
<< " " << i->first.j << " " << i->first.Cj <<" "
|
||||||
|
<< i->second.low() <<" " << i->second.high()<< " ";
|
||||||
|
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||||
|
if (i->second[a]) {
|
||||||
|
ofs << a << ' ' << i->second[a] << ' ';
|
||||||
|
sum+=i->second[a];
|
||||||
|
}
|
||||||
|
ofs << endl;
|
||||||
|
}
|
||||||
|
ofs.close();
|
||||||
|
}
|
||||||
|
if (alpredict) {
|
||||||
|
ofstream ofs(alpredict);
|
||||||
|
if (!ofs.is_open()) {
|
||||||
|
cerr << "Cannot open file for HMM output " << alpredict << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cerr << "Dumping HMM table to " << alpredict << endl;
|
||||||
|
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||||
|
alProbPredicted.begin(); i!=alProbPredicted.end(); ++i) {
|
||||||
|
double sum=0.0;
|
||||||
|
ofs << i->first.englishSentenceLength << " "
|
||||||
|
<< i->first.classPrevious << " " << i->first.previous
|
||||||
|
<< " " << i->first.j << " " << i->first.Cj <<" "
|
||||||
|
<< i->second.low() <<" " << i->second.high()<< " ";
|
||||||
|
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||||
|
if (i->second[a]) {
|
||||||
|
ofs << a << ' ' << i->second[a] << ' ';
|
||||||
|
sum+=i->second[a];
|
||||||
|
}
|
||||||
|
ofs << endl;
|
||||||
|
}
|
||||||
|
ofs.close();
|
||||||
|
}
|
||||||
|
if (alpha) {
|
||||||
|
ofstream ofs(alpha);
|
||||||
|
|
||||||
|
if (!ofs.is_open()) {
|
||||||
|
cerr << "Cannot open file for HMM output " << alpha << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cerr << "Dumping HMM table to " << alpha << endl;
|
||||||
|
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
|
||||||
|
init_alpha.begin(); i!=init_alpha.end(); i++) {
|
||||||
|
ofs << i->first << " " << i->second.first.size() <<" ";
|
||||||
|
int j;
|
||||||
|
for (j=0; j<i->second.first.size(); j++) {
|
||||||
|
ofs << i->second.first[j] << " ";
|
||||||
|
}
|
||||||
|
ofs<<endl;
|
||||||
|
}
|
||||||
|
ofs.close();
|
||||||
|
}
|
||||||
|
if (beta) {
|
||||||
|
ofstream ofs(beta);
|
||||||
|
if (!ofs.is_open()) {
|
||||||
|
cerr << "Cannot open file for HMM output " << beta << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cerr << "Dumping HMM table to " << beta << endl;
|
||||||
|
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
|
||||||
|
init_beta.begin(); i!=init_beta.end(); i++) {
|
||||||
|
ofs << i->first << " " << i->second.first.size() << " ";
|
||||||
|
int j;
|
||||||
|
for (j=0; j<i->second.first.size(); j++) {
|
||||||
|
ofs << i->second.first[j] << " ";
|
||||||
|
}
|
||||||
|
ofs << endl;
|
||||||
|
}
|
||||||
|
ofs.close();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::readJumps(const char* alprob,
|
||||||
|
const char* alpredict, const char* alpha, const char* beta) {
|
||||||
|
if (alprob) {
|
||||||
|
ifstream ifs(alprob);
|
||||||
|
if (!ifs.is_open()) {
|
||||||
|
cerr << "Cannot open file for HMM input " << alprob << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cerr << "Reading HMM table from " << alprob << endl;
|
||||||
|
string strLine="";
|
||||||
|
bool expect_data = false;
|
||||||
|
while (!ifs.eof()) {
|
||||||
|
strLine = "";
|
||||||
|
getline(ifs, strLine);
|
||||||
|
if (strLine.length()) {
|
||||||
|
stringstream ss(strLine.c_str());
|
||||||
|
AlDeps<CLS> dep;
|
||||||
|
int low, high;
|
||||||
|
ss >> dep.englishSentenceLength >> dep.classPrevious
|
||||||
|
>> dep.previous >> dep.j >> dep.Cj >> low >> high;
|
||||||
|
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||||
|
alProb.find(dep);
|
||||||
|
if (p==alProb.end() ) {
|
||||||
|
p=alProb.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
|
||||||
|
}
|
||||||
|
int pos;
|
||||||
|
double val;
|
||||||
|
while (!ss.eof()) {
|
||||||
|
pos = low-1;
|
||||||
|
val = 0;
|
||||||
|
ss >> pos >> val;
|
||||||
|
if (pos>low-1) {
|
||||||
|
p->second[pos]+=val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (alpredict) {
|
||||||
|
ifstream ifs(alpredict);
|
||||||
|
if (!ifs.is_open()) {
|
||||||
|
cerr << "Cannot open file for HMM input " << alpredict << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cerr << "Reading HMM table from " << alpredict << endl;
|
||||||
|
string strLine="";
|
||||||
|
bool expect_data = false;
|
||||||
|
while (!ifs.eof()) {
|
||||||
|
strLine = "";
|
||||||
|
getline(ifs, strLine);
|
||||||
|
if (strLine.length()) {
|
||||||
|
stringstream ss(strLine.c_str());
|
||||||
|
AlDeps<CLS> dep;
|
||||||
|
int low, high;
|
||||||
|
ss >> dep.englishSentenceLength >> dep.classPrevious
|
||||||
|
>> dep.previous >> dep.j >> dep.Cj >> low >> high;
|
||||||
|
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||||
|
alProbPredicted.find(dep);
|
||||||
|
if (p==alProbPredicted.end() ) {
|
||||||
|
p=alProbPredicted.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
|
||||||
|
}
|
||||||
|
int pos;
|
||||||
|
double val;
|
||||||
|
|
||||||
|
while (!ss.eof()) {
|
||||||
|
pos = low-1;
|
||||||
|
val = 0;
|
||||||
|
ss >> pos >> val;
|
||||||
|
if (pos>low-1) {
|
||||||
|
p->second[pos]+=val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alpha) {
|
||||||
|
ifstream ifs(alpha);
|
||||||
|
|
||||||
|
if (!ifs.is_open()) {
|
||||||
|
cerr << "Cannot open file for HMM input " << alpha << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
string strLine="";
|
||||||
|
bool expect_data = false;
|
||||||
|
while (!ifs.eof()) {
|
||||||
|
strLine = "";
|
||||||
|
getline(ifs, strLine);
|
||||||
|
if (strLine.length()) {
|
||||||
|
stringstream ss(strLine.c_str());
|
||||||
|
int id = -1, size = -1;
|
||||||
|
ss >> id >> size;
|
||||||
|
if (id<0||size<0||id!=size) {
|
||||||
|
cerr << "Mismatch in alpha init table!" << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
pair<Array<double>, Mutex>&alp = doGetAlphaInit(id);
|
||||||
|
Array<double>& gk = alp.first;
|
||||||
|
int j;
|
||||||
|
double v;
|
||||||
|
alp.second.lock();
|
||||||
|
for (j=0; j<gk.size(); j++) {
|
||||||
|
ss >> v;
|
||||||
|
gk[j]+=v;
|
||||||
|
}
|
||||||
|
alp.second.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (beta) {
|
||||||
|
ifstream ifs(beta);
|
||||||
|
|
||||||
|
if (!ifs.is_open()) {
|
||||||
|
cerr << "Cannot open file for HMM input " << beta << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
string strLine="";
|
||||||
|
bool expect_data = false;
|
||||||
|
while (!ifs.eof()) {
|
||||||
|
strLine = "";
|
||||||
|
getline(ifs, strLine);
|
||||||
|
if (strLine.length()) {
|
||||||
|
stringstream ss(strLine.c_str());
|
||||||
|
int id = -1, size = -1;
|
||||||
|
ss >> id >> size;
|
||||||
|
if (id<0||size<0||id!=size) {
|
||||||
|
cerr << "Mismatch in alpha init table!" << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
pair<Array<double>, Mutex>&bet1 = doGetBetaInit(id);
|
||||||
|
Array<double>&bet = bet1.first;
|
||||||
|
|
||||||
|
int j;
|
||||||
|
double v;
|
||||||
|
bet1.second.lock();
|
||||||
|
for (j=0; j<bet.size(); j++) {
|
||||||
|
ss >> v;
|
||||||
|
bet[j]+=v;
|
||||||
|
}
|
||||||
|
bet1.second.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht) {
|
||||||
|
|
||||||
|
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||||
|
ht.alProb.begin(); i!=ht.alProb.end(); ++i) {
|
||||||
|
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||||
|
alProb.find(i->first);
|
||||||
|
if (p==alProb.end() ) {
|
||||||
|
p=alProb.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
|
||||||
|
}
|
||||||
|
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||||
|
if (i->second[a]) {
|
||||||
|
p->second[a] += i->second[a];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||||
|
ht.alProbPredicted.begin(); i!=ht.alProbPredicted.end(); ++i) {
|
||||||
|
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||||
|
alProbPredicted.find(i->first);
|
||||||
|
if (p==alProbPredicted.end() ) {
|
||||||
|
p=alProbPredicted.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
|
||||||
|
}
|
||||||
|
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||||
|
if (i->second[a]) {
|
||||||
|
p->second[a] += i->second[a];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
|
||||||
|
ht.init_alpha.begin(); i!=ht.init_alpha.end(); i++) {
|
||||||
|
pair<Array<double>,Mutex> alp = doGetAlphaInit(i->first);
|
||||||
|
int j;
|
||||||
|
double v;
|
||||||
|
for (j=0; j<alp.first.size(); j++) {
|
||||||
|
alp.first[j]+=i->second.first[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
|
||||||
|
ht.init_beta.begin(); i!=ht.init_beta.end(); i++) {
|
||||||
|
pair<Array<double>,Mutex>&alp = doGetBetaInit(i->first);
|
||||||
|
int j;
|
||||||
|
double v;
|
||||||
|
for (j=0; j<alp.first.size(); j++) {
|
||||||
|
alp.first[j]+=i->second.first[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::HMMTables(double _probForEmpty,
|
||||||
|
const MAPPERCLASSTOSTRING&m1, const MAPPERCLASSTOSTRING&m2) :
|
||||||
|
probabilityForEmpty(mfabs(_probForEmpty)),
|
||||||
|
updateProbabilityForEmpty(_probForEmpty<0.0), mapper1(&m1),
|
||||||
|
mapper2(&m2) {
|
||||||
|
}
|
||||||
|
template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
|
||||||
|
MAPPERCLASSTOSTRING>::~HMMTables() {
|
||||||
|
}
|
@ -0,0 +1,179 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef HMM_TABLES_H_ASDF_DEFINED
|
||||||
|
#define HMM_TABLES_H_ASDF_DEFINED
|
||||||
|
#include "FlexArray.h"
|
||||||
|
|
||||||
|
#if __GNUC__>2
|
||||||
|
#include <ext/hash_map>
|
||||||
|
using __gnu_cxx::hash_map;
|
||||||
|
#else
|
||||||
|
#include <hash_map>
|
||||||
|
#endif
|
||||||
|
#include "Array.h"
|
||||||
|
#include <map>
|
||||||
|
#include "mymath.h"
|
||||||
|
#include "syncObj.h"
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T normalize_if_possible(T*a,T*b){
|
||||||
|
T sum=0;
|
||||||
|
for(T*i=a;i!=b;++i)
|
||||||
|
sum+=*i;
|
||||||
|
if( sum )
|
||||||
|
for(T*i=a;i!=b;++i)
|
||||||
|
*i/=sum;
|
||||||
|
else
|
||||||
|
fill(a,b,1.0/(b-a));
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern short CompareAlDeps;
|
||||||
|
template<class CLS>
|
||||||
|
class AlDeps{
|
||||||
|
public:
|
||||||
|
int englishSentenceLength;
|
||||||
|
CLS classPrevious;
|
||||||
|
int previous;
|
||||||
|
int j;
|
||||||
|
CLS Cj;
|
||||||
|
AlDeps(){};
|
||||||
|
AlDeps(int l,int p=0,int _j=0,CLS s1=0,CLS _Cj=0)
|
||||||
|
: englishSentenceLength(l),classPrevious(s1),previous(p),j(_j),Cj(_Cj)
|
||||||
|
{}
|
||||||
|
friend bool operator<(const AlDeps&x,const AlDeps&y){
|
||||||
|
if( (CompareAlDeps&1) && x.englishSentenceLength<y.englishSentenceLength ) return 1;
|
||||||
|
if( (CompareAlDeps&1) && y.englishSentenceLength<x.englishSentenceLength ) return 0;
|
||||||
|
if( (CompareAlDeps&2) && x.classPrevious<y.classPrevious ) return 1;
|
||||||
|
if( (CompareAlDeps&2) && y.classPrevious<x.classPrevious ) return 0;
|
||||||
|
if( (CompareAlDeps&4) && x.previous<y.previous ) return 1;
|
||||||
|
if( (CompareAlDeps&4) && y.previous<x.previous ) return 0;
|
||||||
|
if( (CompareAlDeps&8) && x.j<y.j ) return 1;
|
||||||
|
if( (CompareAlDeps&8) && y.j<x.j ) return 0;
|
||||||
|
if( (CompareAlDeps&16) && x.Cj<y.Cj ) return 1;
|
||||||
|
if( (CompareAlDeps&16) && y.Cj<x.Cj ) return 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
friend bool operator==(const AlDeps&x,const AlDeps&y)
|
||||||
|
{ return !( x<y || y<x ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class CLS>
|
||||||
|
class Hash_AlDeps{
|
||||||
|
public:
|
||||||
|
unsigned
|
||||||
|
int
|
||||||
|
operator()
|
||||||
|
(const AlDeps<CLS>&x)
|
||||||
|
const
|
||||||
|
{
|
||||||
|
unsigned int hash=0;
|
||||||
|
if( (CompareAlDeps&1) ) { hash=hash+x.englishSentenceLength;hash*=31;}
|
||||||
|
if( (CompareAlDeps&2) ) { hash=hash+x.classPrevious;hash*=31;}
|
||||||
|
if( (CompareAlDeps&4) ) { hash=hash+x.previous;hash*=31;}
|
||||||
|
if( (CompareAlDeps&8) ) { hash=hash+x.j;hash*=31;}
|
||||||
|
if( (CompareAlDeps&16) ) { hash=hash+x.Cj;hash*=31;}
|
||||||
|
return hash;
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class CLS,class MAPPERCLASSTOSTRING>
|
||||||
|
class HMMTables
|
||||||
|
{
|
||||||
|
Mutex lock;
|
||||||
|
Mutex alphalock,betalock;
|
||||||
|
public:
|
||||||
|
double probabilityForEmpty;
|
||||||
|
bool updateProbabilityForEmpty;
|
||||||
|
hash_map<int, pair<Array<double>,Mutex> > init_alpha;
|
||||||
|
hash_map<int, pair<Array<double>,Mutex> > init_beta;
|
||||||
|
map<AlDeps<CLS>,FlexArray<double> > alProb;
|
||||||
|
map<AlDeps<CLS>,FlexArray<double> > alProbPredicted;
|
||||||
|
int globalCounter;
|
||||||
|
double divSum;
|
||||||
|
double p0_count,np0_count;
|
||||||
|
const MAPPERCLASSTOSTRING*mapper1;
|
||||||
|
const MAPPERCLASSTOSTRING*mapper2;
|
||||||
|
public:
|
||||||
|
bool merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht);
|
||||||
|
const HMMTables<CLS,MAPPERCLASSTOSTRING>*getThis()const {return this;}
|
||||||
|
HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2);
|
||||||
|
virtual ~HMMTables();
|
||||||
|
virtual double getAlProb(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter=0) const;
|
||||||
|
virtual void writeJumps(ostream&) const;
|
||||||
|
/**By Edward Gao, write out all things needed to rebuild the count table*/
|
||||||
|
virtual bool writeJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta )const;
|
||||||
|
virtual bool readJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta );
|
||||||
|
void addAlCount(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted);
|
||||||
|
virtual void readJumps(istream&);
|
||||||
|
virtual bool getAlphaInit(int I,Array<double>&x)const;
|
||||||
|
virtual bool getBetaInit(int I,Array<double> &x)const;
|
||||||
|
pair<Array<double>, Mutex> &doGetAlphaInit(int I);
|
||||||
|
pair<Array<double>, Mutex> &doGetBetaInit(int I);
|
||||||
|
virtual double getProbabilityForEmpty()const
|
||||||
|
{return probabilityForEmpty;}
|
||||||
|
void performGISIteration(const HMMTables<CLS,MAPPERCLASSTOSTRING>*old){
|
||||||
|
cout << "OLDSIZE: " << (old?(old->alProb.size()):0) << " NEWSIZE:"<< alProb.size()<< endl;
|
||||||
|
for(typename map<AlDeps<CLS>,FlexArray<double> >::iterator i=alProb.begin();i!=alProb.end();++i) {
|
||||||
|
if( alProbPredicted.count(i->first)){
|
||||||
|
normalize_if_possible(i->second.begin(),i->second.end());
|
||||||
|
normalize_if_possible(alProbPredicted[i->first].begin(),alProbPredicted[i->first].end());
|
||||||
|
for(int j=i->second.low();j<=i->second.high();++j){
|
||||||
|
if( i->second[j] )
|
||||||
|
if(alProbPredicted[i->first][j]>0.0 )
|
||||||
|
{
|
||||||
|
double op=1.0;
|
||||||
|
if( old && old->alProb.count(i->first) )
|
||||||
|
op=(old->alProb.find(i->first)->second)[j];
|
||||||
|
//cerr << "GIS: " << j << ' ' << " OLD:"
|
||||||
|
// << op << "*true:"
|
||||||
|
// << i->second[j] << "/pred:" << alProbPredicted[i->first][j] << " -> ";
|
||||||
|
|
||||||
|
|
||||||
|
i->second[j]= op*(i->second[j]/alProbPredicted[i->first][j]);
|
||||||
|
//cerr << i->second[j] << endl;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
cerr << "ERROR2 in performGISiteration: " << i->second[j] << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cerr << "ERROR in performGISIteration: " << alProbPredicted.count(i->first) << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class CLS,class MAPPERCLASSTOSTRING>
|
||||||
|
inline void printAlDeps(ostream&out,const AlDeps<CLS>&x,const MAPPERCLASSTOSTRING&mapper1,const MAPPERCLASSTOSTRING&mapper2)
|
||||||
|
{
|
||||||
|
if( (CompareAlDeps&1) ) out << "sentenceLength: " << x.englishSentenceLength<< ' ';
|
||||||
|
if( (CompareAlDeps&2) ) out << "previousClass: " << mapper1.classString(x.classPrevious) << ' ';
|
||||||
|
if( (CompareAlDeps&4) ) out << "previousPosition: " << x.previous << ' ';
|
||||||
|
if( (CompareAlDeps&8) ) out << "FrenchPosition: " << x.j << ' ';
|
||||||
|
if( (CompareAlDeps&16) ) out << "FrenchClass: " << mapper2.classString(x.Cj) << ' ';
|
||||||
|
//out << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,217 @@
|
|||||||
|
## Process this file with automake to produce Makefile.in
|
||||||
|
|
||||||
|
## Created by Anjuta
|
||||||
|
|
||||||
|
INCLUDES = \
|
||||||
|
-DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
|
||||||
|
-DPACKAGE_SRC_DIR=\""$(srcdir)"\" \
|
||||||
|
-DPACKAGE_DATA_DIR=\""$(datadir)"\"
|
||||||
|
|
||||||
|
AM_CFLAGS =\
|
||||||
|
-Wall\
|
||||||
|
-g
|
||||||
|
|
||||||
|
bin_PROGRAMS = mgiza \
|
||||||
|
snt2cooc\
|
||||||
|
snt2plain\
|
||||||
|
plain2snt \
|
||||||
|
symal \
|
||||||
|
hmmnorm \
|
||||||
|
d4norm
|
||||||
|
|
||||||
|
d4norm_SOURCES = \
|
||||||
|
d4norm.cxx
|
||||||
|
|
||||||
|
d4norm_LDADD = \
|
||||||
|
-lgiza \
|
||||||
|
-lpthread
|
||||||
|
|
||||||
|
d4norm_LDFLAGS = \
|
||||||
|
-L.
|
||||||
|
|
||||||
|
d4norm_DEPENDENCIES = \
|
||||||
|
libgiza.a
|
||||||
|
|
||||||
|
d4norm_CXXFLAGS = \
|
||||||
|
-MT \
|
||||||
|
-MD \
|
||||||
|
-MP \
|
||||||
|
-MF \
|
||||||
|
-O6
|
||||||
|
|
||||||
|
d4norm_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
||||||
|
hmmnorm_SOURCES = \
|
||||||
|
hmmnorm.cxx
|
||||||
|
|
||||||
|
hmmnorm_LDADD = \
|
||||||
|
-lgiza \
|
||||||
|
-lpthread
|
||||||
|
|
||||||
|
hmmnorm_LDFLAGS = \
|
||||||
|
-L.
|
||||||
|
|
||||||
|
hmmnorm_DEPENDENCIES = \
|
||||||
|
libgiza.a
|
||||||
|
|
||||||
|
hmmnorm_CXXFLAGS = \
|
||||||
|
-MT \
|
||||||
|
-MD \
|
||||||
|
-MP \
|
||||||
|
-MF \
|
||||||
|
-O6
|
||||||
|
|
||||||
|
hmmnorm_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
||||||
|
symal_SOURCES = \
|
||||||
|
cmd.c \
|
||||||
|
cmd.h \
|
||||||
|
symal.cpp
|
||||||
|
|
||||||
|
plain2snt_SOURCES = \
|
||||||
|
plain2snt.cpp
|
||||||
|
|
||||||
|
snt2plain_SOURCES = \
|
||||||
|
snt2plain.cpp
|
||||||
|
|
||||||
|
snt2cooc_SOURCES = \
|
||||||
|
snt2cooc.cpp
|
||||||
|
|
||||||
|
snt2cooc_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
||||||
|
mgiza_SOURCES = \
|
||||||
|
main.cpp
|
||||||
|
|
||||||
|
mgiza_DEPENDENCIES = \
|
||||||
|
libgiza.a
|
||||||
|
|
||||||
|
mgiza_CXXFLAGS = \
|
||||||
|
-MT \
|
||||||
|
-MD \
|
||||||
|
-MP \
|
||||||
|
-MF \
|
||||||
|
-O6
|
||||||
|
|
||||||
|
mgiza_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
||||||
|
mgiza_LDFLAGS = \
|
||||||
|
-L.
|
||||||
|
|
||||||
|
mgiza_LDADD = \
|
||||||
|
-lgiza \
|
||||||
|
-lpthread
|
||||||
|
|
||||||
|
lib_LIBRARIES = \
|
||||||
|
libgiza.a
|
||||||
|
|
||||||
|
libgiza_a_SOURCES = \
|
||||||
|
alignment.cpp\
|
||||||
|
alignment.h \
|
||||||
|
AlignTables.cpp \
|
||||||
|
AlignTables.h \
|
||||||
|
Array.h \
|
||||||
|
Array2.h \
|
||||||
|
Array4.h \
|
||||||
|
ATables.cpp \
|
||||||
|
ATables.h \
|
||||||
|
collCounts.cpp \
|
||||||
|
collCounts.h \
|
||||||
|
common.h \
|
||||||
|
D4Tables.h \
|
||||||
|
D5Tables.h \
|
||||||
|
defs.h \
|
||||||
|
Dictionary.cpp \
|
||||||
|
Dictionary.h \
|
||||||
|
file_spec.h \
|
||||||
|
FlexArray.h \
|
||||||
|
ForwardBackward.cpp \
|
||||||
|
ForwardBackward.h \
|
||||||
|
getSentence.cpp \
|
||||||
|
getSentence.h \
|
||||||
|
Globals.h \
|
||||||
|
hmm.cpp \
|
||||||
|
hmm.h \
|
||||||
|
HMMTables.cpp \
|
||||||
|
HMMTables.h \
|
||||||
|
logprob.cpp \
|
||||||
|
logprob.h \
|
||||||
|
model1.cpp \
|
||||||
|
model1.h \
|
||||||
|
model2.cpp \
|
||||||
|
model2.h \
|
||||||
|
model2to3.cpp \
|
||||||
|
model3.cpp \
|
||||||
|
model3.h \
|
||||||
|
model3_viterbi.cpp \
|
||||||
|
model3_viterbi_with_tricks.cpp \
|
||||||
|
model345-peg.cpp \
|
||||||
|
MoveSwapMatrix.cpp \
|
||||||
|
MoveSwapMatrix.h \
|
||||||
|
myassert.cpp \
|
||||||
|
myassert.h \
|
||||||
|
mymath.h \
|
||||||
|
mystl.h \
|
||||||
|
NTables.cpp \
|
||||||
|
NTables.h \
|
||||||
|
Parameter.cpp \
|
||||||
|
Parameter.h \
|
||||||
|
parse.cpp \
|
||||||
|
Perplexity.cpp \
|
||||||
|
Perplexity.h \
|
||||||
|
Pointer.h \
|
||||||
|
reports.cpp \
|
||||||
|
SetArray.cpp \
|
||||||
|
SetArray.h \
|
||||||
|
syncObj.h \
|
||||||
|
transpair_model1.h \
|
||||||
|
transpair_model2.h \
|
||||||
|
transpair_model3.cpp \
|
||||||
|
transpair_model3.h \
|
||||||
|
transpair_model4.cpp \
|
||||||
|
transpair_model4.h \
|
||||||
|
transpair_model5.cpp \
|
||||||
|
transpair_model5.h \
|
||||||
|
transpair_modelhmm.h \
|
||||||
|
ttableDiff.hpp \
|
||||||
|
TTables.cpp \
|
||||||
|
TTables.h \
|
||||||
|
types.h \
|
||||||
|
utility.cpp \
|
||||||
|
utility.h \
|
||||||
|
Vector.h \
|
||||||
|
vocab.cpp \
|
||||||
|
vocab.h \
|
||||||
|
WordClasses.h
|
||||||
|
|
||||||
|
libgiza_a_CXXFLAGS = \
|
||||||
|
-MD \
|
||||||
|
-MP \
|
||||||
|
-MF \
|
||||||
|
-MT \
|
||||||
|
-O6
|
||||||
|
|
||||||
|
libgiza_a_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
||||||
|
SUBDIRS = \
|
||||||
|
mkcls
|
||||||
|
|
@ -0,0 +1,214 @@
|
|||||||
|
## Process this file with automake to produce Makefile.in
|
||||||
|
|
||||||
|
## Created by Anjuta
|
||||||
|
|
||||||
|
INCLUDES = \
|
||||||
|
-DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
|
||||||
|
-DPACKAGE_SRC_DIR=\""$(srcdir)"\" \
|
||||||
|
-DPACKAGE_DATA_DIR=\""$(datadir)"\"
|
||||||
|
|
||||||
|
AM_CFLAGS =\
|
||||||
|
-Wall\
|
||||||
|
-g
|
||||||
|
|
||||||
|
bin_PROGRAMS = mgiza \
|
||||||
|
snt2cooc\
|
||||||
|
snt2plain\
|
||||||
|
plain2snt \
|
||||||
|
symal \
|
||||||
|
hmmnorm \
|
||||||
|
d4norm
|
||||||
|
|
||||||
|
d4norm_SOURCES = \
|
||||||
|
d4norm.cxx
|
||||||
|
|
||||||
|
d4norm_LDADD = \
|
||||||
|
-lgiza \
|
||||||
|
-lpthread
|
||||||
|
|
||||||
|
d4norm_LDFLAGS = \
|
||||||
|
-L.
|
||||||
|
|
||||||
|
d4norm_DEPENDENCIES = \
|
||||||
|
libgiza.a
|
||||||
|
|
||||||
|
d4norm_CXXFLAGS = \
|
||||||
|
-MT \
|
||||||
|
-MD \
|
||||||
|
-MP \
|
||||||
|
-MF \
|
||||||
|
-O6
|
||||||
|
|
||||||
|
d4norm_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
||||||
|
hmmnorm_SOURCES = \
|
||||||
|
hmmnorm.cxx
|
||||||
|
|
||||||
|
hmmnorm_LDADD = \
|
||||||
|
-lgiza \
|
||||||
|
-lpthread
|
||||||
|
|
||||||
|
hmmnorm_LDFLAGS = \
|
||||||
|
-L.
|
||||||
|
|
||||||
|
hmmnorm_DEPENDENCIES = \
|
||||||
|
libgiza.a
|
||||||
|
|
||||||
|
hmmnorm_CXXFLAGS = \
|
||||||
|
-MT \
|
||||||
|
-MD \
|
||||||
|
-MP \
|
||||||
|
-MF \
|
||||||
|
-O6
|
||||||
|
|
||||||
|
hmmnorm_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
||||||
|
symal_SOURCES = \
|
||||||
|
cmd.c \
|
||||||
|
cmd.h \
|
||||||
|
symal.cpp
|
||||||
|
|
||||||
|
plain2snt_SOURCES = \
|
||||||
|
plain2snt.cpp
|
||||||
|
|
||||||
|
snt2plain_SOURCES = \
|
||||||
|
snt2plain.cpp
|
||||||
|
|
||||||
|
snt2cooc_SOURCES = \
|
||||||
|
snt2cooc.cpp
|
||||||
|
|
||||||
|
snt2cooc_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
||||||
|
mgiza_SOURCES = \
|
||||||
|
main.cpp
|
||||||
|
|
||||||
|
mgiza_DEPENDENCIES = \
|
||||||
|
libgiza.a
|
||||||
|
|
||||||
|
mgiza_CXXFLAGS = \
|
||||||
|
-MT \
|
||||||
|
-MD \
|
||||||
|
-MP \
|
||||||
|
-MF \
|
||||||
|
-O6
|
||||||
|
|
||||||
|
mgiza_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
||||||
|
mgiza_LDFLAGS = \
|
||||||
|
-L.
|
||||||
|
|
||||||
|
mgiza_LDADD = \
|
||||||
|
-lgiza \
|
||||||
|
-lpthread
|
||||||
|
|
||||||
|
lib_LIBRARIES = \
|
||||||
|
libgiza.a
|
||||||
|
|
||||||
|
libgiza_a_SOURCES = \
|
||||||
|
alignment.cpp\
|
||||||
|
alignment.h \
|
||||||
|
AlignTables.cpp \
|
||||||
|
AlignTables.h \
|
||||||
|
Array.h \
|
||||||
|
Array2.h \
|
||||||
|
Array4.h \
|
||||||
|
ATables.cpp \
|
||||||
|
ATables.h \
|
||||||
|
collCounts.cpp \
|
||||||
|
collCounts.h \
|
||||||
|
common.h \
|
||||||
|
D4Tables.h \
|
||||||
|
D5Tables.h \
|
||||||
|
defs.h \
|
||||||
|
Dictionary.cpp \
|
||||||
|
Dictionary.h \
|
||||||
|
file_spec.h \
|
||||||
|
FlexArray.h \
|
||||||
|
ForwardBackward.cpp \
|
||||||
|
ForwardBackward.h \
|
||||||
|
getSentence.cpp \
|
||||||
|
getSentence.h \
|
||||||
|
Globals.h \
|
||||||
|
hmm.cpp \
|
||||||
|
hmm.h \
|
||||||
|
HMMTables.cpp \
|
||||||
|
HMMTables.h \
|
||||||
|
logprob.cpp \
|
||||||
|
logprob.h \
|
||||||
|
model1.cpp \
|
||||||
|
model1.h \
|
||||||
|
model2.cpp \
|
||||||
|
model2.h \
|
||||||
|
model2to3.cpp \
|
||||||
|
model3.cpp \
|
||||||
|
model3.h \
|
||||||
|
model3_viterbi.cpp \
|
||||||
|
model3_viterbi_with_tricks.cpp \
|
||||||
|
model345-peg.cpp \
|
||||||
|
MoveSwapMatrix.cpp \
|
||||||
|
MoveSwapMatrix.h \
|
||||||
|
myassert.cpp \
|
||||||
|
myassert.h \
|
||||||
|
mymath.h \
|
||||||
|
mystl.h \
|
||||||
|
NTables.cpp \
|
||||||
|
NTables.h \
|
||||||
|
Parameter.cpp \
|
||||||
|
Parameter.h \
|
||||||
|
parse.cpp \
|
||||||
|
Perplexity.cpp \
|
||||||
|
Perplexity.h \
|
||||||
|
Pointer.h \
|
||||||
|
reports.cpp \
|
||||||
|
SetArray.cpp \
|
||||||
|
SetArray.h \
|
||||||
|
syncObj.h \
|
||||||
|
transpair_model1.h \
|
||||||
|
transpair_model2.h \
|
||||||
|
transpair_model3.cpp \
|
||||||
|
transpair_model3.h \
|
||||||
|
transpair_model4.cpp \
|
||||||
|
transpair_model4.h \
|
||||||
|
transpair_model5.cpp \
|
||||||
|
transpair_model5.h \
|
||||||
|
transpair_modelhmm.h \
|
||||||
|
ttableDiff.hpp \
|
||||||
|
TTables.cpp \
|
||||||
|
TTables.h \
|
||||||
|
types.h \
|
||||||
|
utility.cpp \
|
||||||
|
utility.h \
|
||||||
|
Vector.h \
|
||||||
|
vocab.cpp \
|
||||||
|
vocab.h \
|
||||||
|
WordClasses.h
|
||||||
|
|
||||||
|
libgiza_a_CXXFLAGS = \
|
||||||
|
-MD \
|
||||||
|
-MP \
|
||||||
|
-MF \
|
||||||
|
-MT \
|
||||||
|
-O6
|
||||||
|
|
||||||
|
libgiza_a_CPPFLAGS = \
|
||||||
|
-DNDEBUG \
|
||||||
|
-DWORDINDEX_WITH_4_BYTE \
|
||||||
|
-DBINARY_SEARCH_FOR_TTABLE \
|
||||||
|
-DDEBUG
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,235 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "MoveSwapMatrix.h"
|
||||||
|
|
||||||
|
template<class TRANSPAIR>
|
||||||
|
MoveSwapMatrix<TRANSPAIR>::MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a)
|
||||||
|
: alignment(_a), ef(_ef), l(ef.get_l()), m(ef.get_m()), _cmove(l+1, m+1), _cswap(m+1, m+1),
|
||||||
|
delmove(l+1, m+1,0),delswap(m+1, m+1,0),changed(l+2, 0), changedCounter(1),
|
||||||
|
modelnr(_ef.modelnr()),lazyEvaluation(0),centerDeleted(0)
|
||||||
|
{
|
||||||
|
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||||
|
if( lazyEvaluation==0)
|
||||||
|
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class TRANSPAIR>
|
||||||
|
void MoveSwapMatrix<TRANSPAIR>::updateJ(WordIndex j, bool useChanged,double thisValue)
|
||||||
|
{
|
||||||
|
massert( lazyEvaluation==0 );
|
||||||
|
for(WordIndex i=0;i<=l;i++)
|
||||||
|
if( (useChanged==0||changed[i]!=changedCounter) )
|
||||||
|
if( get_al(j)!=i )
|
||||||
|
_cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
|
||||||
|
else
|
||||||
|
_cmove(i, j)=1.0;
|
||||||
|
for(WordIndex j2=j+1;j2<=m;j2++)
|
||||||
|
if( get_al(j)!=get_al(j2) )
|
||||||
|
_cswap(j, j2)=ef.scoreOfSwap((*this), j, j2,thisValue);
|
||||||
|
else
|
||||||
|
_cswap(j, j2)=1.0;
|
||||||
|
for(WordIndex j2=1;j2<j;j2++)
|
||||||
|
if( get_al(j)!=get_al(j2) )
|
||||||
|
_cswap(j2, j)=ef.scoreOfSwap((*this), j2, j,thisValue);
|
||||||
|
else
|
||||||
|
_cswap(j2, j)=1.0;
|
||||||
|
}
|
||||||
|
template<class TRANSPAIR>
|
||||||
|
void MoveSwapMatrix<TRANSPAIR>::updateI(WordIndex i,double thisValue)
|
||||||
|
{
|
||||||
|
massert( lazyEvaluation==0);
|
||||||
|
for(WordIndex j=1;j<=m;j++)
|
||||||
|
if( get_al(j)!=i )
|
||||||
|
_cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
|
||||||
|
else
|
||||||
|
_cmove(i, j)=1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class TRANSPAIR>
|
||||||
|
void MoveSwapMatrix<TRANSPAIR>::printWrongs()const{
|
||||||
|
for(WordIndex i=0;i<=l;i++)
|
||||||
|
{
|
||||||
|
for(WordIndex j=1;j<=m;j++)
|
||||||
|
if( get_al(j)==i)
|
||||||
|
cout << "A";
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LogProb real=_cmove(i, j), wanted=ef.scoreOfMove((*this), i, j);
|
||||||
|
if( fabs(1.0-real/wanted)>1e-3 )
|
||||||
|
cout << 'b';
|
||||||
|
else if(fabs(1.0-real/wanted)>1e-10 )
|
||||||
|
cout << 'e';
|
||||||
|
else if(real!=wanted)
|
||||||
|
cout << 'E';
|
||||||
|
else
|
||||||
|
cout << ' ';
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
for(WordIndex j=1;j<=m;j++)
|
||||||
|
{
|
||||||
|
for(WordIndex j1=1;j1<=m;j1++)
|
||||||
|
if( j1>j )
|
||||||
|
{
|
||||||
|
if( get_al(j)==get_al(j1) )
|
||||||
|
cout << 'A';
|
||||||
|
else
|
||||||
|
cout << (_cswap(j, j1)==ef.scoreOfSwap((*this), j, j1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cout << ' ';
|
||||||
|
cout << endl;
|
||||||
|
}
|
||||||
|
massert(0);
|
||||||
|
}
|
||||||
|
template<class TRANSPAIR>
|
||||||
|
bool MoveSwapMatrix<TRANSPAIR>::isRight()const{
|
||||||
|
if( lazyEvaluation )
|
||||||
|
return 1;
|
||||||
|
for(WordIndex i=0;i<=l;i++)
|
||||||
|
for(WordIndex j=1;j<=m;j++)
|
||||||
|
if( get_al(j)!=i && (!(doubleEqual(_cmove(i, j), ef.scoreOfMove((*this), i, j)))) )
|
||||||
|
{
|
||||||
|
cerr << "DIFF: " << i << " " << j << " " << _cmove(i, j) << " " << ef.scoreOfMove((*this), i, j) << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
for(WordIndex j=1;j<=m;j++)
|
||||||
|
for(WordIndex j1=1;j1<=m;j1++)
|
||||||
|
if( j1>j&&get_al(j)!=get_al(j1)&&(!doubleEqual(_cswap(j, j1), ef.scoreOfSwap((*this), j, j1))) )
|
||||||
|
{
|
||||||
|
cerr << "DIFFERENT: " << j << " " << j1 << " " << _cswap(j, j1) << " " << ef.scoreOfSwap((*this), j, j1) << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class TRANSPAIR>
|
||||||
|
void MoveSwapMatrix<TRANSPAIR>::doMove(WordIndex _i, WordIndex _j)
|
||||||
|
{
|
||||||
|
WordIndex old_i=get_al(_j);
|
||||||
|
if(old_i>100){
|
||||||
|
cerr << "Error, invalid index set";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if( lazyEvaluation )
|
||||||
|
set(_j,_i);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ( modelnr==5||modelnr==6 )
|
||||||
|
{
|
||||||
|
set(_j, _i);
|
||||||
|
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||||
|
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
|
||||||
|
}
|
||||||
|
else if ( modelnr==4 )
|
||||||
|
{
|
||||||
|
changedCounter++;
|
||||||
|
for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
|
||||||
|
for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
|
||||||
|
set(_j, _i);
|
||||||
|
for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
|
||||||
|
for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
|
||||||
|
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||||
|
for(unsigned int i=0;i<=l;i++)
|
||||||
|
if(changed[i]==changedCounter)
|
||||||
|
updateI(i,thisValue);
|
||||||
|
for(unsigned int j=1;j<=m;j++)
|
||||||
|
if( changed[get_al(j)]==changedCounter )
|
||||||
|
updateJ(j, 1,thisValue);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert(modelnr==3);
|
||||||
|
set(_j, _i);
|
||||||
|
changedCounter++;
|
||||||
|
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||||
|
updateI(old_i,thisValue);
|
||||||
|
changed[old_i]=changedCounter;
|
||||||
|
updateI(_i,thisValue);
|
||||||
|
changed[_i]=changedCounter;
|
||||||
|
for(WordIndex j=1;j<=m;j++)
|
||||||
|
if( get_al(j)==_i || get_al(j)==old_i )
|
||||||
|
updateJ(j, 1,thisValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class TRANSPAIR>
|
||||||
|
void MoveSwapMatrix<TRANSPAIR>::doSwap(WordIndex _j1, WordIndex _j2)
|
||||||
|
{
|
||||||
|
assert( cswap(_j1, _j2)>1 );
|
||||||
|
WordIndex i1=get_al(_j1), i2=get_al(_j2);
|
||||||
|
if( lazyEvaluation==1 )
|
||||||
|
{
|
||||||
|
set(_j1, i2);
|
||||||
|
set(_j2, i1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ( modelnr==5||modelnr==6 )
|
||||||
|
{
|
||||||
|
set(_j1, i2);
|
||||||
|
set(_j2, i1);
|
||||||
|
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||||
|
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
|
||||||
|
}
|
||||||
|
else if( modelnr==4 )
|
||||||
|
{
|
||||||
|
changedCounter++;
|
||||||
|
for(unsigned int k=prev_cept(i1);k<=next_cept(i1);++k)changed[k]=changedCounter;
|
||||||
|
for(unsigned int k=prev_cept(i2);k<=next_cept(i2);++k)changed[k]=changedCounter;
|
||||||
|
set(_j1, i2);
|
||||||
|
set(_j2, i1);
|
||||||
|
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||||
|
for(unsigned int i=0;i<=l;i++)
|
||||||
|
if(changed[i]==changedCounter)
|
||||||
|
updateI(i,thisValue);
|
||||||
|
for(unsigned int j=1;j<=m;j++)
|
||||||
|
if( changed[get_al(j)]==changedCounter )
|
||||||
|
updateJ(j, 1,thisValue);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert(modelnr==3);
|
||||||
|
set(_j1, i2);
|
||||||
|
set(_j2, i1);
|
||||||
|
changedCounter++;
|
||||||
|
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||||
|
updateI(i1,thisValue);
|
||||||
|
changed[i1]=changedCounter;
|
||||||
|
updateI(i2,thisValue);
|
||||||
|
changed[i2]=changedCounter;
|
||||||
|
updateJ(_j1, 1,thisValue);
|
||||||
|
updateJ(_j2, 1,thisValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "transpair_model3.h"
|
||||||
|
#include "transpair_model4.h"
|
||||||
|
#include "transpair_model5.h"
|
||||||
|
#include "transpair_modelhmm.h"
|
||||||
|
template class MoveSwapMatrix<transpair_model3>;
|
||||||
|
template class MoveSwapMatrix<transpair_model4>;
|
||||||
|
template class MoveSwapMatrix<transpair_model5>;
|
||||||
|
template class MoveSwapMatrix<transpair_modelhmm>;
|
@ -0,0 +1,162 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/*--
|
||||||
|
MoveSwapMatrix: Efficient representation for moving and swapping
|
||||||
|
around in IBM3 training.
|
||||||
|
Franz Josef Och (30/07/99)
|
||||||
|
--*/
|
||||||
|
#ifndef moveswap2_costs_h_defined
|
||||||
|
#define moveswap2_costs_h_defined
|
||||||
|
#include "alignment.h"
|
||||||
|
#include "transpair_model3.h"
|
||||||
|
#include "myassert.h"
|
||||||
|
#include <set>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
extern short DoViterbiTraining;
|
||||||
|
|
||||||
|
template<class TRANSPAIR>
|
||||||
|
class MoveSwapMatrix: public alignment {
|
||||||
|
private:
|
||||||
|
const TRANSPAIR&ef;
|
||||||
|
const WordIndex l, m;
|
||||||
|
Array2<LogProb, Vector<LogProb> > _cmove, _cswap;
|
||||||
|
Array2<char, Vector<char> > delmove, delswap;
|
||||||
|
Vector<int> changed;
|
||||||
|
int changedCounter;
|
||||||
|
const int modelnr;
|
||||||
|
bool lazyEvaluation;
|
||||||
|
bool centerDeleted;
|
||||||
|
std::map<int,std::set<int> >untouch_i; // target words that should not be aligned anywhere
|
||||||
|
std::map<int,std::set<int> > untouch_j;
|
||||||
|
public:
|
||||||
|
void addUnTouchI(int i, int j){
|
||||||
|
if(i>0){
|
||||||
|
if(untouch_i.find(i)==untouch_i.end()){
|
||||||
|
untouch_i[i] = std::set<int>();
|
||||||
|
}
|
||||||
|
untouch_i[i].insert(j);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void addUnTouchJ(int j,int i){
|
||||||
|
if(j>0){
|
||||||
|
if(untouch_j.find(j)==untouch_j.end()){
|
||||||
|
untouch_j[j] = std::set<int>();
|
||||||
|
}
|
||||||
|
untouch_j[j].insert(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool check() const {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
const TRANSPAIR&get_ef() const {
|
||||||
|
return ef;
|
||||||
|
}
|
||||||
|
bool isCenterDeleted() const {
|
||||||
|
return centerDeleted;
|
||||||
|
}
|
||||||
|
bool isLazy() const {
|
||||||
|
return lazyEvaluation;
|
||||||
|
}
|
||||||
|
MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a);
|
||||||
|
void updateJ(WordIndex j, bool, double thisValue);
|
||||||
|
void updateI(WordIndex i, double thisValue);
|
||||||
|
void doMove(WordIndex _i, WordIndex _j);
|
||||||
|
void doSwap(WordIndex _j1, WordIndex _j2);
|
||||||
|
void delCenter() {
|
||||||
|
centerDeleted = 1;
|
||||||
|
}
|
||||||
|
void delMove(WordIndex x, WordIndex y) {
|
||||||
|
delmove(x, y) = 1;
|
||||||
|
}
|
||||||
|
void delSwap(WordIndex x, WordIndex y) {
|
||||||
|
massert(y>x);
|
||||||
|
delswap(x, y) = 1;
|
||||||
|
delswap(y, x) = 1;
|
||||||
|
}
|
||||||
|
bool isDelMove(WordIndex x, WordIndex y) const {
|
||||||
|
return DoViterbiTraining || delmove(x, y);
|
||||||
|
}
|
||||||
|
bool isDelSwap(WordIndex x, WordIndex y) const {
|
||||||
|
massert(y>x);
|
||||||
|
return DoViterbiTraining || delswap(x, y);
|
||||||
|
}
|
||||||
|
LogProb cmove(WordIndex x, WordIndex y) const {
|
||||||
|
massert( get_al(y)!=x );
|
||||||
|
massert( delmove(x,y)==0 );
|
||||||
|
if (lazyEvaluation)
|
||||||
|
return ef.scoreOfMove(*this, x, y);
|
||||||
|
else {
|
||||||
|
std::map<int, std::set<int> >::const_iterator it;
|
||||||
|
|
||||||
|
it = untouch_i.find(x);
|
||||||
|
if(it!=untouch_i.end()){
|
||||||
|
// Return -1 if the j jump set is not within the limit
|
||||||
|
if(it->second.find(y) == it->second.end()) //Not in the feasible set
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
it = untouch_j.find(y);
|
||||||
|
if(it!=untouch_j.end()){
|
||||||
|
if(it->second.find(x) == it->second.end()) //Not in the feasible set
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return _cmove(x, y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LogProb cswap(WordIndex x, WordIndex y) const {
|
||||||
|
massert(x<y);
|
||||||
|
massert(delswap(x,y)==0);
|
||||||
|
massert(get_al(x)!=get_al(y));
|
||||||
|
if (lazyEvaluation)
|
||||||
|
return ef.scoreOfSwap(*this, x, y);
|
||||||
|
else {
|
||||||
|
massert(y>x);
|
||||||
|
std::map<int, std::set<int> >::const_iterator it1,it2;
|
||||||
|
it1 =untouch_j.find(y);
|
||||||
|
it2 = untouch_j.find(x);
|
||||||
|
int nal1 = get_al(y);
|
||||||
|
int nal2 = get_al(x); // Need to test if nal1 is in it2's feasible set
|
||||||
|
// and vice versa
|
||||||
|
|
||||||
|
if(it1!=untouch_j.end()&&it1->second.find(nal2)==it1->second.end()){
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if(it2!=untouch_j.end()&&it2->second.find(nal1)==it2->second.end()){
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Make sure we never swap these
|
||||||
|
return _cswap(x, y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void printWrongs() const;
|
||||||
|
bool isRight() const;
|
||||||
|
friend ostream&operator<<(ostream&out, const MoveSwapMatrix<TRANSPAIR>&m) {
|
||||||
|
return out << (alignment) m << "\nEF:\n" << m.ef << "\nCMOVE\n"
|
||||||
|
<< m._cmove << "\nCSWAP\n" << m._cswap << endl;
|
||||||
|
}
|
||||||
|
;
|
||||||
|
};
|
||||||
|
#endif
|
@ -0,0 +1,184 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "NTables.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include "defs.h"
|
||||||
|
#include <fstream>
|
||||||
|
#include "Parameter.h"
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(double,NTablesFactorGraphemes,"nSmooth","smoothing for fertility parameters (good value: 64): weight for wordlength-dependent fertility parameters",PARLEV_SMOOTH,64.0);
|
||||||
|
GLOBAL_PARAMETER(double,NTablesFactorGeneral,"nSmoothGeneral","smoothing for fertility parameters (default: 0): weight for word-independent fertility parameters",PARLEV_SMOOTH,0.0);
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
void nmodel<VALTYPE>::printNTable(int noEW, const char* filename,
|
||||||
|
const Vector<WordEntry>& evlist,
|
||||||
|
bool actual) const
|
||||||
|
// prints the fertility table but with actual sourcce words (not their id)
|
||||||
|
{
|
||||||
|
cerr << "Dumping nTable to: " << filename << '\n';
|
||||||
|
ofstream of(filename);
|
||||||
|
VALTYPE p ;
|
||||||
|
WordIndex k, i ;
|
||||||
|
for(i=1; int(i) < noEW; i++){
|
||||||
|
if (evlist[i].freq > 0){
|
||||||
|
if (actual)
|
||||||
|
of << evlist[i].word << ' ' ;
|
||||||
|
else
|
||||||
|
of << i << ' ' ;
|
||||||
|
for( k=0; k < MAX_FERTILITY; k++){
|
||||||
|
p = getValue(i, k);
|
||||||
|
if (p <= PROB_SMOOTH)
|
||||||
|
p = 0;
|
||||||
|
of << p << ' ';
|
||||||
|
}
|
||||||
|
of << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
void nmodel<VALTYPE>::printRealNTable(int noEW, const char* filename,
|
||||||
|
const Vector<WordEntry>& evlist,
|
||||||
|
bool actual) const
|
||||||
|
// prints the fertility table but with actual sourcce words (not their id)
|
||||||
|
{
|
||||||
|
cerr << "Dumping nTable to: " << filename << '\n';
|
||||||
|
ofstream of(filename);
|
||||||
|
VALTYPE p ;
|
||||||
|
WordIndex k, i ;
|
||||||
|
for(i=1; int(i) < noEW; i++){
|
||||||
|
if (evlist[i].freq > 0){
|
||||||
|
if (actual)
|
||||||
|
of << evlist[i].word << ' ' ;
|
||||||
|
else
|
||||||
|
of << i << ' ' ;
|
||||||
|
for( k=0; k < MAX_FERTILITY; k++){
|
||||||
|
p = getValue(i, k);
|
||||||
|
// if (p <= PROB_SMOOTH)
|
||||||
|
// p = 0;
|
||||||
|
of << p << ' ';
|
||||||
|
}
|
||||||
|
of << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
bool nmodel<VALTYPE>::readNTable(const char *filename){
|
||||||
|
/* This function reads the n table from a file.
|
||||||
|
Each line is of the format: source_word_id p0 p1 p2 ... pn
|
||||||
|
This is the inverse operation of the printTable function.
|
||||||
|
NAS, 7/11/99
|
||||||
|
*/
|
||||||
|
ifstream inf(filename);
|
||||||
|
if(!inf.is_open()){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cerr << "Reading fertility table from " << filename << "\n";
|
||||||
|
if(!inf){
|
||||||
|
cerr << "\nERROR: Cannot open " << filename <<"\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
VALTYPE prob;
|
||||||
|
WordIndex tok, i;
|
||||||
|
int nFert=0;
|
||||||
|
while(!inf.eof()){
|
||||||
|
nFert++;
|
||||||
|
inf >> ws >> tok;
|
||||||
|
if (tok > MAX_VOCAB_SIZE){
|
||||||
|
cerr << "NTables:readNTable(): unrecognized token id: " << tok
|
||||||
|
<<'\n';
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
for(i = 0; i < MAX_FERTILITY; i++){
|
||||||
|
inf >> ws >> prob;
|
||||||
|
getRef(tok, i)=prob;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cerr << "Read " << nFert << " entries in fertility table.\n";
|
||||||
|
inf.close();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
bool nmodel<VALTYPE>::merge(nmodel<VALTYPE>& n,int noEW, const Vector<WordEntry>& evlist){
|
||||||
|
/* This function reads the n table from a file.
|
||||||
|
Each line is of the format: source_word_id p0 p1 p2 ... pn
|
||||||
|
This is the inverse operation of the printTable function.
|
||||||
|
NAS, 7/11/99
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
VALTYPE p ;
|
||||||
|
WordIndex k, i ;
|
||||||
|
for(i=1; int(i) < noEW; i++){
|
||||||
|
if (evlist[i].freq > 0){
|
||||||
|
for( k=0; k < MAX_FERTILITY; k++){
|
||||||
|
p = n.getValue(i, k);
|
||||||
|
getRef(i,k)+=p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class VALTYPE>
|
||||||
|
bool nmodel<VALTYPE>::readAugNTable(const char *filename){
|
||||||
|
/* This function reads the n table from a file.
|
||||||
|
Each line is of the format: source_word_id p0 p1 p2 ... pn
|
||||||
|
This is the inverse operation of the printTable function.
|
||||||
|
NAS, 7/11/99
|
||||||
|
*/
|
||||||
|
ifstream inf(filename);
|
||||||
|
if(!inf.is_open()){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
cerr << "Reading fertility table from " << filename << "\n";
|
||||||
|
if(!inf){
|
||||||
|
cerr << "\nERROR: Cannot open " << filename <<"\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
VALTYPE prob;
|
||||||
|
WordIndex tok, i;
|
||||||
|
int nFert=0;
|
||||||
|
while(!inf.eof()){
|
||||||
|
nFert++;
|
||||||
|
inf >> ws >> tok;
|
||||||
|
if (tok > MAX_VOCAB_SIZE){
|
||||||
|
cerr << "NTables:readNTable(): unrecognized token id: " << tok
|
||||||
|
<<'\n';
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
for(i = 0; i < MAX_FERTILITY; i++){
|
||||||
|
inf >> ws >> prob;
|
||||||
|
getRef(tok, i)+=prob;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cerr << "Read " << nFert << " entries in fertility table.\n";
|
||||||
|
inf.close();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template class nmodel<COUNT>;
|
||||||
|
//template class nmodel<PROB>;
|
@ -0,0 +1,145 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef _ntables_h
|
||||||
|
#define _ntables_h 1
|
||||||
|
#include "Array2.h"
|
||||||
|
#include "Vector.h"
|
||||||
|
#include <cassert>
|
||||||
|
#include "defs.h"
|
||||||
|
#include "vocab.h"
|
||||||
|
#include "myassert.h"
|
||||||
|
#include "Globals.h"
|
||||||
|
#include "syncObj.h"
|
||||||
|
|
||||||
|
extern double NTablesFactorGraphemes, NTablesFactorGeneral;
|
||||||
|
|
||||||
|
template<class VALTYPE> class nmodel {
|
||||||
|
private:
|
||||||
|
Array2<VALTYPE, Vector<VALTYPE> > ntab;
|
||||||
|
public:
|
||||||
|
nmodel(int maxw, int maxn) :
|
||||||
|
ntab(maxw, maxn, 0.0) {
|
||||||
|
}
|
||||||
|
VALTYPE getValue(int w, unsigned int n) const {
|
||||||
|
massert(w!=0);
|
||||||
|
if (n>=ntab.getLen2())
|
||||||
|
return 0.0;
|
||||||
|
else
|
||||||
|
return max(ntab(w, n), VALTYPE(PROB_SMOOTH));
|
||||||
|
}
|
||||||
|
protected:
|
||||||
|
inline VALTYPE&getRef(int w, int n) {
|
||||||
|
//massert(w!=0);
|
||||||
|
return ntab(w, n);
|
||||||
|
};
|
||||||
|
Mutex lock;
|
||||||
|
public:
|
||||||
|
inline void addValue(int w , int n,const VALTYPE& t){lock.lock();ntab(w,n)+=t;lock.unlock();};
|
||||||
|
public:
|
||||||
|
template<class COUNT> void normalize(nmodel<COUNT>&write,
|
||||||
|
const Vector<WordEntry>* _evlist) const {
|
||||||
|
int h1=ntab.getLen1(), h2=ntab.getLen2();
|
||||||
|
int nParams=0;
|
||||||
|
if (_evlist&&(NTablesFactorGraphemes||NTablesFactorGeneral)) {
|
||||||
|
size_t maxlen=0;
|
||||||
|
const Vector<WordEntry>&evlist=*_evlist;
|
||||||
|
for (unsigned int i=1; i<evlist.size(); i++)
|
||||||
|
maxlen=max(maxlen, evlist[i].word.length());
|
||||||
|
Array2<COUNT,Vector<COUNT> > counts(maxlen+1, MAX_FERTILITY+1, 0.0);
|
||||||
|
Vector<COUNT> nprob_general(MAX_FERTILITY+1,0.0);
|
||||||
|
for (unsigned int i=1; i<min((unsigned int)h1,
|
||||||
|
(unsigned int)evlist.size()); i++) {
|
||||||
|
int l=evlist[i].word.length();
|
||||||
|
for (int k=0; k<h2; k++) {
|
||||||
|
counts(l, k)+=getValue(i, k);
|
||||||
|
nprob_general[k]+=getValue(i, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
COUNT sum2=0;
|
||||||
|
for (unsigned int i=1; i<maxlen+1; i++) {
|
||||||
|
COUNT sum=0.0;
|
||||||
|
for (int k=0; k<h2; k++)
|
||||||
|
sum+=counts(i, k);
|
||||||
|
sum2+=sum;
|
||||||
|
if (sum) {
|
||||||
|
double average=0.0;
|
||||||
|
//cerr << "l: " << i << " " << sum << " ";
|
||||||
|
for (int k=0; k<h2; k++) {
|
||||||
|
counts(i, k)/=sum;
|
||||||
|
//cerr << counts(i,k) << ' ';
|
||||||
|
average+=k*counts(i, k);
|
||||||
|
}
|
||||||
|
//cerr << "avg: " << average << endl;
|
||||||
|
//cerr << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (unsigned int k=0; k<nprob_general.size(); k++)
|
||||||
|
nprob_general[k]/=sum2;
|
||||||
|
|
||||||
|
for (int i=1; i<h1; i++) {
|
||||||
|
int l=-1;
|
||||||
|
if ((unsigned int)i<evlist.size())
|
||||||
|
l=evlist[i].word.length();
|
||||||
|
COUNT sum=0.0;
|
||||||
|
for (int k=0; k<h2; k++)
|
||||||
|
sum+=getValue(i, k)+((l==-1) ? 0.0 : (counts(l, k)
|
||||||
|
*NTablesFactorGraphemes)) + NTablesFactorGeneral
|
||||||
|
*nprob_general[k];
|
||||||
|
assert(sum);
|
||||||
|
for (int k=0; k<h2; k++) {
|
||||||
|
write.getRef(i, k)=(getValue(i, k)+((l==-1) ? 0.0
|
||||||
|
: (counts(l, k)*NTablesFactorGraphemes)))/sum
|
||||||
|
+ NTablesFactorGeneral*nprob_general[k];
|
||||||
|
nParams++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
for (int i=1; i<h1; i++) {
|
||||||
|
COUNT sum=0.0;
|
||||||
|
for (int k=0; k<h2; k++)
|
||||||
|
sum+=getValue(i, k);
|
||||||
|
assert(sum);
|
||||||
|
for (int k=0; k<h2; k++) {
|
||||||
|
write.getRef(i, k)=getValue(i, k)/sum;
|
||||||
|
nParams++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cerr << "NTable contains " << nParams << " parameter.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool merge(nmodel<VALTYPE>& n, int noEW, const Vector<WordEntry>& evlist);
|
||||||
|
void clear() {
|
||||||
|
int h1=ntab.getLen1(), h2=ntab.getLen2();
|
||||||
|
for (int i=0; i<h1; i++)
|
||||||
|
for (int k=0; k<h2; k++)
|
||||||
|
ntab(i, k)=0;
|
||||||
|
}
|
||||||
|
void printNTable(int noEW, const char* filename,
|
||||||
|
const Vector<WordEntry>& evlist, bool) const;
|
||||||
|
void printRealNTable(int noEW, const char* filename,
|
||||||
|
const Vector<WordEntry>& evlist, bool) const;
|
||||||
|
bool readAugNTable(const char *filename);
|
||||||
|
bool readNTable(const char *filename);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,144 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "Parameter.h"
|
||||||
|
#include "fstream"
|
||||||
|
#include "unistd.h"
|
||||||
|
#include <strstream>
|
||||||
|
|
||||||
|
|
||||||
|
bool absolutePathNames=0;
|
||||||
|
string ParameterPathPrefix;
|
||||||
|
bool ParameterChangedFlag=0;
|
||||||
|
|
||||||
|
bool writeParameters(ofstream&of,const ParSet&parset,int level)
|
||||||
|
{
|
||||||
|
if(!of)return 0;
|
||||||
|
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
|
||||||
|
{
|
||||||
|
if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
|
||||||
|
{
|
||||||
|
ostrstream os;
|
||||||
|
(*i)->printValue(os);
|
||||||
|
os << ends;
|
||||||
|
string s(os.str());
|
||||||
|
of << (*i)->getString() << " ";
|
||||||
|
if( absolutePathNames&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
|
||||||
|
{
|
||||||
|
char path[1024];
|
||||||
|
getcwd(path,1024);
|
||||||
|
of << path << '/';
|
||||||
|
}
|
||||||
|
if( ParameterPathPrefix.length()&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
|
||||||
|
of << ParameterPathPrefix << '/';
|
||||||
|
(*i)->printValue(of);
|
||||||
|
of << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool readParameters(ifstream&f,const ParSet&parset,int verb,int level)
|
||||||
|
{
|
||||||
|
string s;
|
||||||
|
if(!f)return 0;
|
||||||
|
while(getline(f,s))
|
||||||
|
{
|
||||||
|
istrstream eingabe(s.c_str());
|
||||||
|
string s1,s2;
|
||||||
|
eingabe>>s1>>s2;
|
||||||
|
if(makeSetCommand(s1,s2,parset,verb,level)==0)
|
||||||
|
cerr << "ERROR: could not set: (C) " << s1 << " " << s2 << endl;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool makeSetCommand(string _s1,string s2,const ParSet&parset,int verb,int level)
|
||||||
|
{
|
||||||
|
ParPtr anf;
|
||||||
|
int anfset=0;
|
||||||
|
string s1=simpleString(_s1);
|
||||||
|
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
|
||||||
|
{
|
||||||
|
if( *(*i)==s1 )
|
||||||
|
{
|
||||||
|
if( level==-1 || level==(*i)->getLevel() )
|
||||||
|
(*i)->setParameter(s2,verb);
|
||||||
|
else if(verb>1)
|
||||||
|
cerr << "ERROR: Could not set: (A) " << s1 << " " << s2 << " " << level << " " << (*i)->getLevel() << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else if( (*i)->getString().substr(0,s1.length())==s1 )
|
||||||
|
{
|
||||||
|
anf=(*i);anfset++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(anfset==1)
|
||||||
|
{
|
||||||
|
if( level==-1 || level==anf->getLevel() )
|
||||||
|
anf->setParameter(s2,verb);
|
||||||
|
else if( verb>1 )
|
||||||
|
cerr << "ERROR: Could not set: (B) " << s1 << " " << s2 << " " << level << " " << anf->getLevel() << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if( anfset>1 )
|
||||||
|
cerr << "ERROR: ambiguous parameter '" << s1 << "'.\n";
|
||||||
|
if( anfset==0 )
|
||||||
|
cerr << "ERROR: parameter '" << s1 << "' does not exist.\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostream& printPars(ostream&of,const ParSet&parset,int level)
|
||||||
|
{
|
||||||
|
if(!of)return of;
|
||||||
|
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
|
||||||
|
{
|
||||||
|
if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
|
||||||
|
{
|
||||||
|
(*i)->printAt(of);
|
||||||
|
of << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return of;
|
||||||
|
}
|
||||||
|
|
||||||
|
string simpleString(const string s)
|
||||||
|
{
|
||||||
|
string k;
|
||||||
|
for(unsigned int i=0;i<s.length();++i)
|
||||||
|
{
|
||||||
|
char c[2];
|
||||||
|
c[0]=tolower(s[i]);
|
||||||
|
c[1]=0;
|
||||||
|
if( (c[0]>='a'&&c[0]<='z')||(c[0]>='0'&&c[0]<='9') )
|
||||||
|
k += c;
|
||||||
|
}
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ParSet&getGlobalParSet()
|
||||||
|
{
|
||||||
|
static ParSet x;
|
||||||
|
return x;
|
||||||
|
}
|
@ -0,0 +1,200 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef PARAMETER_H_DEFINED
|
||||||
|
#define PARAMETER_H_DEFINED
|
||||||
|
|
||||||
|
#include "mystl.h"
|
||||||
|
#include <set>
|
||||||
|
#include "Pointer.h"
|
||||||
|
#include <string>
|
||||||
|
#include "Globals.h"
|
||||||
|
#include <fstream>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
inline unsigned int mConvert(const string&s,unsigned int &i)
|
||||||
|
{
|
||||||
|
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1; }
|
||||||
|
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
|
||||||
|
return i=atoi(s.c_str());
|
||||||
|
}
|
||||||
|
inline int mConvert(const string&s,int &i){
|
||||||
|
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1;}
|
||||||
|
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
|
||||||
|
return i=atoi(s.c_str());
|
||||||
|
}
|
||||||
|
inline double mConvert(const string&s,double &d) { return d=atof(s.c_str()); }
|
||||||
|
inline double mConvert(const string&s,float &d) { return d=atof(s.c_str()); }
|
||||||
|
inline string mConvert(const string&s,string&n) { return n=s; }
|
||||||
|
inline bool mConvert(const string&s,bool&n) {
|
||||||
|
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
|
||||||
|
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
|
||||||
|
return n=atoi(s.c_str());
|
||||||
|
}
|
||||||
|
inline short mConvert(const string&s,short&n) {
|
||||||
|
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
|
||||||
|
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
|
||||||
|
return n=atoi(s.c_str());
|
||||||
|
}
|
||||||
|
inline unsigned short mConvert(const string&s,unsigned short&n) {
|
||||||
|
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
|
||||||
|
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
|
||||||
|
return n=atoi(s.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
string simpleString(const string s);
|
||||||
|
|
||||||
|
inline int Hashstring(const string& s)
|
||||||
|
{
|
||||||
|
int sum=0;
|
||||||
|
string::const_iterator i=s.begin(),end=s.end();
|
||||||
|
for(;i!=end;i++)sum=5*sum+(*i);
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
class _Parameter
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
string name;
|
||||||
|
bool *ifChanged;
|
||||||
|
string description;
|
||||||
|
int level;
|
||||||
|
bool filename;
|
||||||
|
public:
|
||||||
|
int onlyCopy;
|
||||||
|
_Parameter(string n,bool&b,string desc,int _level,bool _onlyCopy)
|
||||||
|
: name(simpleString(n)),ifChanged(&b),description(desc),level(_level),filename(0),onlyCopy(_onlyCopy) {}
|
||||||
|
virtual ~_Parameter(){};
|
||||||
|
bool operator==(const string&s)const
|
||||||
|
{ return name== simpleString(s); }
|
||||||
|
void setChanged()
|
||||||
|
{ *ifChanged=true; }
|
||||||
|
virtual bool setParameter(string s2,int)=0;
|
||||||
|
virtual ostream&printAt(ostream&out)=0;
|
||||||
|
virtual ostream&printValue(ostream&out)=0;
|
||||||
|
const string&getString() const { return name; }
|
||||||
|
int getLevel() const { return level;}
|
||||||
|
bool isFilename() { return filename;}
|
||||||
|
void setFilename(bool x=1) { filename=x;}
|
||||||
|
friend bool operator==(const _Parameter&a,const _Parameter&b)
|
||||||
|
{ return a.name==b.name; }
|
||||||
|
friend bool operator<(const _Parameter&a,const _Parameter&b)
|
||||||
|
{ return a.name<b.name; }
|
||||||
|
friend int Hash(const _Parameter&aaa)
|
||||||
|
{ return Hashstring(aaa.name); }
|
||||||
|
friend ostream&operator<<(ostream&out,const _Parameter&p)
|
||||||
|
{ return out<<"Parameter: "<<p.name <<endl;}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
class Parameter : public _Parameter
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
T*t;
|
||||||
|
public:
|
||||||
|
Parameter(string n,bool&b,string desc,T&_t,int level=0,bool onlyCopy=0)
|
||||||
|
: _Parameter(n,b,desc,level,onlyCopy),t(&_t) {}
|
||||||
|
virtual ~Parameter(){}
|
||||||
|
virtual bool setParameter(string s2,int verb)
|
||||||
|
{
|
||||||
|
T x;
|
||||||
|
if( !(*t==mConvert(s2,x)))
|
||||||
|
{
|
||||||
|
bool printedFirst=0;
|
||||||
|
if( verb>1 )
|
||||||
|
{
|
||||||
|
cout << "Parameter '"<<name <<"' changed from '"<<*t<<"' to '";
|
||||||
|
printedFirst=1;
|
||||||
|
}
|
||||||
|
mConvert(s2,*t);
|
||||||
|
if( printedFirst )
|
||||||
|
cout << *t <<"'\n";
|
||||||
|
setChanged();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
virtual ostream&printAt(ostream&out)
|
||||||
|
{return out << name << " = " << *t << " (" << description << ")";}
|
||||||
|
virtual ostream&printValue(ostream&out)
|
||||||
|
{return out << *t;}
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef MP<_Parameter> ParPtr;
|
||||||
|
|
||||||
|
class ParSet : public set<ParPtr>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void insert(const ParPtr&x)
|
||||||
|
{
|
||||||
|
if( count(x)!=0 )
|
||||||
|
cerr << "ERROR: element " << x->getString() << " already inserted.\n";
|
||||||
|
set<ParPtr>::insert(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
bool makeSetCommand(string s1,string s2,const ParSet&pars,int verb=1,int level= -1);
|
||||||
|
ostream&printPars(ostream&out,const ParSet&pars,int level=-1);
|
||||||
|
bool writeParameters(ofstream&of,const ParSet&parset,int level=0);
|
||||||
|
bool readParameters(ifstream&f,const ParSet&parset,int verb=2,int level=0);
|
||||||
|
ParSet&getGlobalParSet();
|
||||||
|
extern bool ParameterChangedFlag;
|
||||||
|
template<class T>const T&addGlobalParameter(const char *name,const char *description,int level,T*adr,const T&init)
|
||||||
|
{
|
||||||
|
*adr=init;
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
||||||
|
return init;
|
||||||
|
}
|
||||||
|
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *description,int level,T*adr,const T&init)
|
||||||
|
{
|
||||||
|
*adr=init;
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
|
||||||
|
return init;
|
||||||
|
}
|
||||||
|
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *description,int level,T*adr,const T&init)
|
||||||
|
{
|
||||||
|
*adr=init;
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
|
||||||
|
return init;
|
||||||
|
}
|
||||||
|
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *name4,const char *description,int level,T*adr,const T&init)
|
||||||
|
{
|
||||||
|
*adr=init;
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
|
||||||
|
getGlobalParSet().insert(new Parameter<T>(name4,ParameterChangedFlag,description,*adr,-1));
|
||||||
|
return init;
|
||||||
|
}
|
||||||
|
void MakeParameterOptimizing(istream&file,string resultingParameters);
|
||||||
|
|
||||||
|
#define GLOBAL_PARAMETER(TYP,VARNAME,NAME,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
||||||
|
#define GLOBAL_PARAMETER2(TYP,VARNAME,NAME,NAME2,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
||||||
|
#define GLOBAL_PARAMETER3(TYP,VARNAME,NAME,NAME2,NAME3,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
||||||
|
#define GLOBAL_PARAMETER4(TYP,VARNAME,NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
||||||
|
|
||||||
|
void setParameterLevelName(unsigned int i,string x);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/* Perplexity.cc
|
||||||
|
* =============
|
||||||
|
* Mike Jahr, 7/21/99
|
||||||
|
* Machine Translation group, WS99
|
||||||
|
* Center for Language and Speech Processing
|
||||||
|
*
|
||||||
|
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
|
||||||
|
*
|
||||||
|
* Simple class used to calculate cross entropy and perplexity
|
||||||
|
* of models.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "Perplexity.h"
|
||||||
|
|
||||||
|
void Perplexity::record(string model){
|
||||||
|
mutex.lock();
|
||||||
|
modelid.push_back(model);
|
||||||
|
perp.push_back(perplexity());
|
||||||
|
ce.push_back(cross_entropy());
|
||||||
|
mutex.unlock();
|
||||||
|
}
|
@ -0,0 +1,115 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/* Perplexity.h
|
||||||
|
* ============
|
||||||
|
* Mike Jahr, 7/15/99
|
||||||
|
* Machine Translation group, WS99
|
||||||
|
* Center for Language and Speech Processing
|
||||||
|
*
|
||||||
|
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
|
||||||
|
*
|
||||||
|
* Simple class used to calculate cross entropy and perplexity
|
||||||
|
* of models.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _PERPLEXITY_H
|
||||||
|
#define _PERPLEXITY_H
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <fstream>
|
||||||
|
#include "Vector.h"
|
||||||
|
#include "defs.h"
|
||||||
|
#include "Array2.h"
|
||||||
|
#include "Globals.h"
|
||||||
|
#include "syncObj.h"
|
||||||
|
|
||||||
|
#define CROSS_ENTROPY_BASE 2
|
||||||
|
|
||||||
|
class Perplexity {
|
||||||
|
private:
|
||||||
|
double sum;
|
||||||
|
double wc;
|
||||||
|
Array2<double, Vector<double> > *E_M_L;
|
||||||
|
Vector<string> modelid;
|
||||||
|
Vector<double > perp;
|
||||||
|
Vector<double > ce;
|
||||||
|
Vector<string> name ;
|
||||||
|
Mutex mutex;
|
||||||
|
public:
|
||||||
|
~Perplexity() { delete E_M_L;}
|
||||||
|
Perplexity() {
|
||||||
|
E_M_L = new Array2<double, Vector<double> >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH);
|
||||||
|
unsigned int l, m ;
|
||||||
|
Vector<double> fact(MAX_SENTENCE_LENGTH, 1.0);
|
||||||
|
for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++)
|
||||||
|
fact[m] = fact[m-1] * m ;
|
||||||
|
for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++)
|
||||||
|
for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) {
|
||||||
|
(*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) /
|
||||||
|
(fact[m])) ;
|
||||||
|
}
|
||||||
|
sum = 0 ;
|
||||||
|
wc = 0;
|
||||||
|
perp.clear();
|
||||||
|
ce.clear();
|
||||||
|
name.clear();
|
||||||
|
}
|
||||||
|
inline void clear() {
|
||||||
|
mutex.lock();
|
||||||
|
sum = 0 ;
|
||||||
|
wc = 0 ;
|
||||||
|
mutex.unlock();
|
||||||
|
}
|
||||||
|
size_t size() const {return(min(perp.size(), ce.size()));}
|
||||||
|
inline void addFactor(const double p, const double count, const int l,
|
||||||
|
const int m,bool withPoisson) {
|
||||||
|
mutex.lock();
|
||||||
|
wc += count * m ; // number of french words
|
||||||
|
sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ;
|
||||||
|
mutex.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double perplexity() const {
|
||||||
|
return exp( -1*sum / wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double cross_entropy() const {
|
||||||
|
return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double word_count() const {
|
||||||
|
return wc;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double getSum() const {
|
||||||
|
return sum ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void record(string model);
|
||||||
|
|
||||||
|
friend void generatePerplexityReport(const Perplexity&, const Perplexity&,
|
||||||
|
const Perplexity&, const Perplexity&,
|
||||||
|
ostream&, int, int, bool);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,175 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef HEADER_Pointer_DEFINED
|
||||||
|
#define HEADER_Pointer_DEFINED
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
class SmartPointer
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
T*p;
|
||||||
|
public:
|
||||||
|
SmartPointer(T*_p=0)
|
||||||
|
: p(_p) {}
|
||||||
|
inline T&operator*() const
|
||||||
|
{return *p;}
|
||||||
|
inline T*operator->() const
|
||||||
|
{return p;}
|
||||||
|
inline operator bool() const
|
||||||
|
{return p!=0;}
|
||||||
|
inline T*ptr() const
|
||||||
|
{ return p; }
|
||||||
|
};
|
||||||
|
template<class T> inline ostream &operator<<(ostream&out,const SmartPointer<T>&s)
|
||||||
|
{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
|
||||||
|
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
class SmartPointerConst
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
const T*p;
|
||||||
|
public:
|
||||||
|
SmartPointerConst(const T*_p=0)
|
||||||
|
: p(_p) {}
|
||||||
|
inline const T&operator*() const
|
||||||
|
{return *p;}
|
||||||
|
inline const T*operator->() const
|
||||||
|
{return p;}
|
||||||
|
inline operator bool() const
|
||||||
|
{return p!=0;}
|
||||||
|
inline const T*ptr() const
|
||||||
|
{ return p; }
|
||||||
|
};
|
||||||
|
template<class T> inline ostream &operator<<(ostream&out,const SmartPointerConst<T>&s)
|
||||||
|
{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
class UP : public SmartPointer<T>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
UP(T*_p=0)
|
||||||
|
: SmartPointer<T>(_p) {}
|
||||||
|
};
|
||||||
|
template<class T> inline bool operator==(const UP<T>&s1,const UP<T>&s2)
|
||||||
|
{return s1.ptr()==s2.ptr();}
|
||||||
|
template<class T> inline bool operator<(const UP<T>&s1,const UP<T>&s2)
|
||||||
|
{return s1.ptr() < s2.ptr();}
|
||||||
|
template<class T> inline int Hash(const UP<T> &wp)
|
||||||
|
{if(wp.ptr())return Hash(*wp);else return 0;}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
class UPConst : public SmartPointerConst<T>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
UPConst(const T*_p=0)
|
||||||
|
: SmartPointerConst<T>(_p) {}
|
||||||
|
};
|
||||||
|
template<class T> inline bool operator==(const UPConst<T>&s1,const UPConst<T>&s2)
|
||||||
|
{return s1.ptr()==s2.ptr();}
|
||||||
|
template<class T> inline bool operator<(const UPConst<T>&s1,const UPConst<T>&s2)
|
||||||
|
{return s1.ptr()<s2.ptr();}
|
||||||
|
template<class T> inline int Hash(const UPConst<T> &wp)
|
||||||
|
{if(wp.ptr())return Hash(*wp);else return 0;}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
class MP : public SmartPointer<T>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
MP(T*_p=0)
|
||||||
|
: SmartPointer<T>(_p) {}
|
||||||
|
};
|
||||||
|
template <class T> inline bool operator==(const MP<T>&s1,const MP<T>&s2)
|
||||||
|
{assert(s1);assert(s2);return *s1==*s2;}
|
||||||
|
template <class T> inline bool operator<(const MP<T>&s1,const MP<T>&s2)
|
||||||
|
{assert(s1);assert(s2);return *s1 < *s2;}
|
||||||
|
template <class T> inline int Hash(const MP<T> &wp)
|
||||||
|
{if(wp.ptr())return Hash(*wp);else return 0;}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
class MPConst : public SmartPointerConst<T>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
MPConst(const T*_p=0)
|
||||||
|
: SmartPointerConst<T>(_p) {}
|
||||||
|
};
|
||||||
|
template <class T> inline bool operator==(const MPConst<T>&s1,const MPConst<T>&s2)
|
||||||
|
{assert(s1);assert(s2);return *s1== *s2;}
|
||||||
|
template <class T> inline bool operator<(const MPConst<T>&s1,const MPConst<T>&s2)
|
||||||
|
{assert(s1);assert(s2);return *s1 < *s2;}
|
||||||
|
template <class T> inline int Hash(const MPConst<T> &wp)
|
||||||
|
{if(wp.ptr())return Hash(*wp);else return 0;}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
class DELP : public SmartPointer<T>
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
DELP(const DELP<T>&x);
|
||||||
|
public:
|
||||||
|
const DELP<T>&operator=(DELP<T>&x)
|
||||||
|
{
|
||||||
|
delete this->p;
|
||||||
|
this->p=x.p;x.p=0;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
~DELP()
|
||||||
|
{ delete this->p;this->p=0;}
|
||||||
|
DELP(T*_p=0)
|
||||||
|
: SmartPointer<T>(_p) {}
|
||||||
|
void set(T*_p)
|
||||||
|
{
|
||||||
|
delete this->p;
|
||||||
|
this->p=_p;
|
||||||
|
}
|
||||||
|
friend bool operator==(const DELP<T>&s1,const DELP<T>&s2)
|
||||||
|
{
|
||||||
|
return *(s1.p)== *(s2.p);
|
||||||
|
}
|
||||||
|
friend bool operator<(const DELP<T>&s1,const DELP<T>&s2)
|
||||||
|
{
|
||||||
|
return *(s1.p) < *(s2.p);
|
||||||
|
}
|
||||||
|
friend inline int Hash(const DELP<T> &wp)
|
||||||
|
{
|
||||||
|
if(wp.p)
|
||||||
|
return Hash(*wp.p);
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
|||||||
|
|
||||||
|
//#include "SetArray.h"
|
||||||
|
|
||||||
|
#include "Parameter.h"
|
||||||
|
|
@ -0,0 +1,159 @@
|
|||||||
|
/*
|
||||||
|
Array of set, for fast access of dictionary, and most important,
|
||||||
|
be threadsafe
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __SET_ARRAY_H__
|
||||||
|
#define __SET_ARRAY_H__
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include "defs.h"
|
||||||
|
#include "vocab.h"
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include "syncObj.h"
|
||||||
|
|
||||||
|
template <class COUNT, class PROB>
|
||||||
|
class LpPair {
|
||||||
|
public:
|
||||||
|
COUNT count ;
|
||||||
|
PROB prob ;
|
||||||
|
public: // constructor
|
||||||
|
LpPair():count(0), prob(0){} ;
|
||||||
|
LpPair(COUNT c, PROB p):count(c), prob(p){};
|
||||||
|
} ;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <class COUNT, class PROB>
|
||||||
|
class SetArray{
|
||||||
|
public:
|
||||||
|
typedef LpPair<COUNT, PROB> CPPair;
|
||||||
|
protected:
|
||||||
|
|
||||||
|
/*Information stores here*/
|
||||||
|
std::vector<std::map<size_t,CPPair> > store;
|
||||||
|
std::vector<Mutex> muts;
|
||||||
|
size_t nEnglishWord;
|
||||||
|
size_t nFrenchWord;
|
||||||
|
void _init(){
|
||||||
|
store.resize(nEnglishWord);
|
||||||
|
muts.resize(nFrenchWord);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
/*
|
||||||
|
Get reference, not creating
|
||||||
|
*/
|
||||||
|
CPPair* find(size_t fi, size_t si){
|
||||||
|
/*HERE: lock, unlock after we get the pointer*/
|
||||||
|
muts[fi].lock();
|
||||||
|
/* Sync-ed */
|
||||||
|
std::map<size_t,CPPair>& w = store[fi];
|
||||||
|
typename std::map<size_t,CPPair>::iterator it = w.find((size_t)si);
|
||||||
|
CPPair* q = ( it!=store[fi].end() ? &(it->second) : 0);
|
||||||
|
// for(it = w.begin(); it!=w.end();it++){
|
||||||
|
// cout << it->first << endl;
|
||||||
|
// }
|
||||||
|
/* End Synced*/
|
||||||
|
muts[fi].unlock();
|
||||||
|
return q;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
Get reference, creating it
|
||||||
|
*/
|
||||||
|
inline CPPair& findRef(size_t fi, size_t si){
|
||||||
|
std::map<size_t,CPPair> &x = store[fi];
|
||||||
|
muts[fi].lock();
|
||||||
|
/* Sync-ed */
|
||||||
|
CPPair& ref= x[si];
|
||||||
|
/* End Synced */
|
||||||
|
muts[fi].unlock();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void insert(size_t fi, size_t si, COUNT count = 0, PROB prob = 0){
|
||||||
|
muts[fi].lock();
|
||||||
|
/*Syced*/
|
||||||
|
std::map<size_t,CPPair> &x = store[fi];
|
||||||
|
CPPair& v= x[si];
|
||||||
|
v.count = count;
|
||||||
|
v.prob = prob;
|
||||||
|
muts[fi].unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
void incCount(size_t e, size_t f, COUNT inc)
|
||||||
|
// increments the count of the given word pair. if the pair does not exist,
|
||||||
|
// it creates it with the given value.
|
||||||
|
{
|
||||||
|
if( inc ){
|
||||||
|
std::map<size_t,CPPair> &x = store[e];
|
||||||
|
muts[e].lock();
|
||||||
|
CPPair& ref= x[f];
|
||||||
|
ref.count += inc;
|
||||||
|
muts[e].unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PROB getProb(size_t e, size_t f) const
|
||||||
|
// read probability value for P(fj/ei) from the hash table
|
||||||
|
// if pair does not exist, return floor value PROB_SMOOTH
|
||||||
|
{
|
||||||
|
muts[e].lock();
|
||||||
|
typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
|
||||||
|
PROB b;
|
||||||
|
if(it == store[e].end())
|
||||||
|
b = PROB_SMOOTH;
|
||||||
|
else
|
||||||
|
b=max((it->second).prob, PROB_SMOOTH);
|
||||||
|
muts[e].unlock();
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
COUNT getCount(size_t e, size_t f) const
|
||||||
|
/* read count value for entry pair (fj/ei) from the hash table */
|
||||||
|
{
|
||||||
|
muts[e].lock();
|
||||||
|
typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
|
||||||
|
COUNT c;
|
||||||
|
if(it == store[e].end())
|
||||||
|
c = 0;
|
||||||
|
else
|
||||||
|
c = ((*it).second).count;
|
||||||
|
muts[e].unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
void erase(size_t e, size_t f)
|
||||||
|
// In: a source and a target token ids.
|
||||||
|
// removes the entry with that pair from table
|
||||||
|
{
|
||||||
|
muts[e].lock();
|
||||||
|
store[e].erase(f);
|
||||||
|
muts[e].unlock();
|
||||||
|
};
|
||||||
|
|
||||||
|
inline void setNumberOfEnlish(size_t e){nEnglishWord=e;_init();};
|
||||||
|
inline void setNumberOfFrench(size_t f){nFrenchWord = f;};
|
||||||
|
|
||||||
|
const std::map<size_t,CPPair>& getMap(size_t i) const{
|
||||||
|
return store[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map<size_t,CPPair>& getMap1(size_t i){
|
||||||
|
return store[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
SetArray(size_t e, size_t f): nEnglishWord(e), nFrenchWord(f){
|
||||||
|
_init();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,177 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "TTables.h"
|
||||||
|
#include "Parameter.h"
|
||||||
|
#include<iostream>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(float,PROB_CUTOFF,"PROB CUTOFF","Probability cutoff threshold for lexicon probabilities",PARLEV_OPTHEUR,1e-7);
|
||||||
|
GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF,"COUNTINCREASE CUTOFF","countCutoff","Counts increment cutoff threshold",PARLEV_OPTHEUR,1e-6);
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------------ Method Definiotns for Class tmodel --------------------*/
|
||||||
|
|
||||||
|
|
||||||
|
// To output to STDOUT, submit filename as NULL
|
||||||
|
template <class COUNT, class PROB>
|
||||||
|
void tmodel<COUNT, PROB>::printCountTable(const char *filename,
|
||||||
|
const Vector<WordEntry>& evlist,
|
||||||
|
const Vector<WordEntry>& fvlist,
|
||||||
|
const bool actual) const
|
||||||
|
{
|
||||||
|
ostream *tof;
|
||||||
|
|
||||||
|
if(filename)
|
||||||
|
tof = new ofstream(filename);
|
||||||
|
else
|
||||||
|
tof = & cout;
|
||||||
|
|
||||||
|
ostream &of = *tof;
|
||||||
|
/* for(unsigned int i=0;i<es.size()-1;++i)
|
||||||
|
for(unsigned int j=es[i];j<es[i+1];++j)
|
||||||
|
{
|
||||||
|
const CPPair&x=fs[j].second;
|
||||||
|
WordIndex e=i,f=fs[j].first;
|
||||||
|
if( actual )
|
||||||
|
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
|
||||||
|
else
|
||||||
|
of << e << ' ' << f << ' ' << x.prob << '\n';
|
||||||
|
}*/
|
||||||
|
for(unsigned int i=0;i<lexmat.size();++i){
|
||||||
|
if( lexmat[i] ){
|
||||||
|
for(unsigned int j=0;j<lexmat[i]->size();++j)
|
||||||
|
{
|
||||||
|
const CPPair&x=(*lexmat[i])[j].second;
|
||||||
|
WordIndex e=i,f=(*lexmat[i])[j].first;
|
||||||
|
if( x.prob>MINCOUNTINCREASE ){
|
||||||
|
if( actual ){
|
||||||
|
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.count << '\n';
|
||||||
|
}else{
|
||||||
|
of << e << ' ' << f << ' ' << x.count << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(filename){
|
||||||
|
((ofstream*)tof)->close();
|
||||||
|
delete tof;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class COUNT, class PROB>
|
||||||
|
void tmodel<COUNT, PROB>::printProbTable(const char *filename,
|
||||||
|
const Vector<WordEntry>& evlist,
|
||||||
|
const Vector<WordEntry>& fvlist,
|
||||||
|
const bool actual) const
|
||||||
|
{
|
||||||
|
ofstream of(filename);
|
||||||
|
/* for(unsigned int i=0;i<es.size()-1;++i)
|
||||||
|
for(unsigned int j=es[i];j<es[i+1];++j)
|
||||||
|
{
|
||||||
|
const CPPair&x=fs[j].second;
|
||||||
|
WordIndex e=i,f=fs[j].first;
|
||||||
|
if( actual )
|
||||||
|
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
|
||||||
|
else
|
||||||
|
of << e << ' ' << f << ' ' << x.prob << '\n';
|
||||||
|
}*/
|
||||||
|
for(unsigned int i=0;i<lexmat.size();++i){
|
||||||
|
if( lexmat[i] ){
|
||||||
|
for(unsigned int j=0;j<lexmat[i]->size();++j)
|
||||||
|
{
|
||||||
|
const CPPair&x=(*lexmat[i])[j].second;
|
||||||
|
WordIndex e=i,f=(*lexmat[i])[j].first;
|
||||||
|
if( x.prob>PROB_SMOOTH ){
|
||||||
|
if( actual ){
|
||||||
|
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
|
||||||
|
}else{
|
||||||
|
of << e << ' ' << f << ' ' << x.prob << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class COUNT, class PROB>
|
||||||
|
void tmodel<COUNT, PROB>::printProbTableInverse(const char *,
|
||||||
|
const Vector<WordEntry>&,
|
||||||
|
const Vector<WordEntry>&,
|
||||||
|
const double,
|
||||||
|
const double,
|
||||||
|
const bool ) const
|
||||||
|
{
|
||||||
|
}
|
||||||
|
template <class COUNT, class PROB>
|
||||||
|
void tmodel<COUNT, PROB>::normalizeTable(const vcbList&, const vcbList&, int)
|
||||||
|
{
|
||||||
|
for(unsigned int i=0;i<lexmat.size();++i){
|
||||||
|
double c=0.0;
|
||||||
|
if( lexmat[i] ){
|
||||||
|
unsigned int lSize=lexmat[i]->size();
|
||||||
|
for(unsigned int j=0;j<lSize;++j)
|
||||||
|
c+=(*lexmat[i])[j].second.count;
|
||||||
|
for(unsigned int j=0;j<lSize;++j) {
|
||||||
|
if( c==0 )
|
||||||
|
(*lexmat[i])[j].second.prob=1.0/(lSize);
|
||||||
|
else
|
||||||
|
(*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c;
|
||||||
|
(*lexmat[i])[j].second.count=0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class COUNT, class PROB>
|
||||||
|
bool tmodel<COUNT, PROB>::readProbTable(const char *filename){
|
||||||
|
/* This function reads the t table from a file.
|
||||||
|
Each line is of the format: source_word_id target_word_id p(target_word|source_word)
|
||||||
|
This is the inverse operation of the printTable function.
|
||||||
|
NAS, 7/11/99
|
||||||
|
*/
|
||||||
|
ifstream inf(filename);
|
||||||
|
cerr << "Reading t prob. table from " << filename << "\n";
|
||||||
|
if (!inf) {
|
||||||
|
cerr << "\nERROR: Cannot open " << filename << "\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
WordIndex src_id, trg_id;
|
||||||
|
PROB prob;
|
||||||
|
int nEntry=0;
|
||||||
|
while (inf >> src_id >> trg_id >> prob) {
|
||||||
|
insert(src_id, trg_id, 0.0, prob);
|
||||||
|
nEntry++;
|
||||||
|
}
|
||||||
|
cerr << "Read " << nEntry << " entries in prob. table.\n";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template class tmodel<COUNT,PROB> ;
|
||||||
|
|
||||||
|
/* ---------------- End of Method Definitions of class tmodel ---------------*/
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,330 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/* --------------------------------------------------------------------------*
|
||||||
|
* *
|
||||||
|
* Module : TTables *
|
||||||
|
* *
|
||||||
|
* Prototypes File: TTables.h *
|
||||||
|
* *
|
||||||
|
* Objective: Defines clases and methods for handling I/O for Probability & *
|
||||||
|
* Count tables and also alignment tables *
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef _ttables_h
|
||||||
|
#define _ttables_h 1
|
||||||
|
|
||||||
|
|
||||||
|
#include "defs.h"
|
||||||
|
#include "vocab.h"
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include "Vector.h"
|
||||||
|
#include <utility>
|
||||||
|
#include "syncObj.h"
|
||||||
|
|
||||||
|
#if __GNUC__>2
|
||||||
|
#include <ext/hash_map>
|
||||||
|
using __gnu_cxx::hash_map;
|
||||||
|
#else
|
||||||
|
#include <hash_map>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "Globals.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* The tables defined in the following classes are defined as hash tables. For
|
||||||
|
example. the t-table is a hash function of a word pair; an alignment is
|
||||||
|
a hash function of a vector of integer numbers (sentence positions) and so
|
||||||
|
on */
|
||||||
|
|
||||||
|
|
||||||
|
/*----------- Defnition of Hash Function for class tmodel ------- -----------*/
|
||||||
|
|
||||||
|
typedef pair<WordIndex, WordIndex> wordPairIds;
|
||||||
|
|
||||||
|
|
||||||
|
class hashpair : public unary_function< pair<WordIndex, WordIndex>, size_t >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
size_t operator() (const pair<WordIndex, WordIndex>& key) const
|
||||||
|
{
|
||||||
|
return (size_t) MAX_W*key.first + key.second; /* hash function and it
|
||||||
|
is guarnteed to have
|
||||||
|
unique id for each
|
||||||
|
unique pair */
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------------ Class Prototype Definitions ---------------------------*
|
||||||
|
Class Name: tmodel
|
||||||
|
Objective: This defines the underlying data structur for t Tables and t
|
||||||
|
Count Tables. They are defined as a hash table. Each entry in the hash table
|
||||||
|
is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The
|
||||||
|
probability and the count are represented as log integer probability as
|
||||||
|
defined by the class LogProb .
|
||||||
|
|
||||||
|
This class is used to represents t Tables (probabiliity) and n (fertility
|
||||||
|
Tables and also their corresponding count tables .
|
||||||
|
|
||||||
|
*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
//typedef float COUNT ;
|
||||||
|
//typedef LogProb PROB ;
|
||||||
|
template <class COUNT, class PROB>
|
||||||
|
class LpPair {
|
||||||
|
public:
|
||||||
|
COUNT count ;
|
||||||
|
PROB prob ;
|
||||||
|
public: // constructor
|
||||||
|
LpPair():count(0), prob(0){} ;
|
||||||
|
LpPair(COUNT c, PROB p):count(c), prob(p){};
|
||||||
|
} ;
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T*mbinary_search(T*x,T*y,unsigned int val)
|
||||||
|
{
|
||||||
|
if( y-x==0 )
|
||||||
|
return 0;
|
||||||
|
if( x->first==val)
|
||||||
|
return x;
|
||||||
|
if( y-x<2 )
|
||||||
|
return 0;
|
||||||
|
T*mid=x+(y-x)/2;
|
||||||
|
if( val < mid->first )
|
||||||
|
return mbinary_search(x,mid,val);
|
||||||
|
else
|
||||||
|
return mbinary_search(mid,y,val);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
const T*mbinary_search(const T*x,const T*y,unsigned int val)
|
||||||
|
{
|
||||||
|
if( y-x==0 )
|
||||||
|
return 0;
|
||||||
|
if( x->first==val)
|
||||||
|
return x;
|
||||||
|
if( y-x<2 )
|
||||||
|
return 0;
|
||||||
|
const T*mid=x+(y-x)/2;
|
||||||
|
if( val < mid->first )
|
||||||
|
return mbinary_search(x,mid,val);
|
||||||
|
else
|
||||||
|
return mbinary_search(mid,y,val);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class COUNT, class PROB>
|
||||||
|
class tmodel{
|
||||||
|
typedef LpPair<COUNT, PROB> CPPair;
|
||||||
|
public:
|
||||||
|
bool recordDiff;
|
||||||
|
|
||||||
|
public:
|
||||||
|
int noEnglishWords; // total number of unique source words
|
||||||
|
int noFrenchWords; // total number of unique target words
|
||||||
|
//vector<pair<unsigned int,CPPair> > fs;
|
||||||
|
//vector<unsigned int> es;
|
||||||
|
vector< vector<pair<unsigned int,CPPair> >* > lexmat;
|
||||||
|
vector< Mutex > mutex;
|
||||||
|
|
||||||
|
void erase(WordIndex e, WordIndex f){
|
||||||
|
CPPair *p=find(e,f);
|
||||||
|
if(p)
|
||||||
|
*p=CPPair(0,0);
|
||||||
|
};
|
||||||
|
|
||||||
|
CPPair*find(int e,int f){
|
||||||
|
//pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
|
||||||
|
//pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
|
||||||
|
if(e>lexmat.size()||lexmat[e]==NULL){
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
|
||||||
|
pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
|
||||||
|
pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
|
||||||
|
if( x==0 ){
|
||||||
|
//cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
|
||||||
|
//abort();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return &(x->second);
|
||||||
|
}
|
||||||
|
|
||||||
|
const CPPair*find(int e,int f)const{
|
||||||
|
if(lexmat[e]==0)
|
||||||
|
return 0;
|
||||||
|
const pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
|
||||||
|
const pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
|
||||||
|
//const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
|
||||||
|
//const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
|
||||||
|
const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
|
||||||
|
if( x==0 ){
|
||||||
|
//cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
|
||||||
|
//abort();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return &(x->second);
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
|
||||||
|
CPPair* found = find(e,f);
|
||||||
|
if(found)
|
||||||
|
*found=CPPair(cval,pval);
|
||||||
|
}
|
||||||
|
|
||||||
|
CPPair*getPtr(int e,int f){return find(e,f);}
|
||||||
|
|
||||||
|
tmodel(){};
|
||||||
|
tmodel(const string&fn) {
|
||||||
|
recordDiff = false;
|
||||||
|
int count=0,count2=0;
|
||||||
|
ifstream infile2(fn.c_str());
|
||||||
|
cerr << "Inputfile in " << fn << endl;
|
||||||
|
int e,f,olde=-1,oldf=-1;
|
||||||
|
pair<unsigned int,CPPair> cp;
|
||||||
|
vector< pair<unsigned int,CPPair> > cps;
|
||||||
|
while(infile2>>e>>f){
|
||||||
|
cp.first=f;
|
||||||
|
assert(e>=olde);
|
||||||
|
assert(e>olde ||f>oldf);
|
||||||
|
if( e!=olde&&olde>=0 ){
|
||||||
|
int oldsize=lexmat.size();
|
||||||
|
lexmat.resize(olde+1);
|
||||||
|
for(unsigned int i=oldsize;i<lexmat.size();++i)
|
||||||
|
lexmat[i]=0;
|
||||||
|
lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
|
||||||
|
cps.clear();
|
||||||
|
if( !((*lexmat[olde]).size()==(*lexmat[olde]).capacity()) )
|
||||||
|
cerr << "eRROR: waste of memory: " << (*lexmat[olde]).size() << " " << (*lexmat[olde]).capacity() << endl;
|
||||||
|
count2+=lexmat[olde]->capacity();
|
||||||
|
}
|
||||||
|
cps.push_back(cp);
|
||||||
|
olde=e;
|
||||||
|
oldf=f;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
lexmat.resize(olde+1);
|
||||||
|
lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
|
||||||
|
count2+=lexmat[olde]->capacity();
|
||||||
|
cout << "There are " << count << " " << count2 << " entries in table" << '\n';
|
||||||
|
mutex.resize(lexmat.size());
|
||||||
|
/* Create mutex */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* tmodel(const string&fn)
|
||||||
|
{
|
||||||
|
size_t count=0;
|
||||||
|
{
|
||||||
|
ifstream infile1(fn.c_str());
|
||||||
|
if( !infile1 )
|
||||||
|
{
|
||||||
|
cerr << "ERROR: can't read coocurrence file " << fn << '\n';
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
int e,f;
|
||||||
|
while(infile1>>e>>f)
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
cout << "There are " << count << " entries in table" << '\n';
|
||||||
|
ifstream infile2(fn.c_str());
|
||||||
|
fs.resize(count);
|
||||||
|
int e,f,olde=-1,oldf=-1;
|
||||||
|
pair<unsigned int,CPPair> cp;
|
||||||
|
count=0;
|
||||||
|
while(infile2>>e>>f)
|
||||||
|
{
|
||||||
|
assert(e>=olde);
|
||||||
|
assert(e>olde ||f>oldf);
|
||||||
|
if( e!=olde )
|
||||||
|
{
|
||||||
|
es.resize(e+1);
|
||||||
|
for(unsigned int i=olde+1;int(i)<=e;++i)
|
||||||
|
es[i]=count;
|
||||||
|
}
|
||||||
|
cp.first=f;
|
||||||
|
assert(count<fs.size());
|
||||||
|
fs[count]=cp;
|
||||||
|
//fs.push_back(cp);
|
||||||
|
olde=e;
|
||||||
|
oldf=f;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
assert(count==fs.size());
|
||||||
|
es.push_back(fs.size());
|
||||||
|
cout << fs.size() << " " << count << " coocurrences read" << '\n';
|
||||||
|
}*/
|
||||||
|
|
||||||
|
void incCount(WordIndex e, WordIndex f, COUNT inc) {
|
||||||
|
if( inc ){
|
||||||
|
CPPair *p=find(e,f);
|
||||||
|
if( p ){
|
||||||
|
mutex[e].lock();
|
||||||
|
p->count += inc ;
|
||||||
|
mutex[e].unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PROB getProb(WordIndex e, WordIndex f) const{
|
||||||
|
const CPPair *p=find(e,f);
|
||||||
|
if( p )
|
||||||
|
return max(p->prob, PROB_SMOOTH);
|
||||||
|
else
|
||||||
|
return PROB_SMOOTH;
|
||||||
|
}
|
||||||
|
|
||||||
|
COUNT getCount(WordIndex e, WordIndex f) const
|
||||||
|
{
|
||||||
|
const CPPair *p=find(e,f);
|
||||||
|
if( p )
|
||||||
|
return p->count;
|
||||||
|
else
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
|
||||||
|
void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
|
||||||
|
void printProbTableInverse(const char *filename,
|
||||||
|
const Vector<WordEntry>& evlist,
|
||||||
|
const Vector<WordEntry>& fvlist,
|
||||||
|
const double eTotal,
|
||||||
|
const double fTotal,
|
||||||
|
const bool actual = false ) const;
|
||||||
|
void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
|
||||||
|
bool readProbTable(const char *filename);
|
||||||
|
bool readSubSampledProbTable(const char* filename, std::set<WordIndex> &e, std::set<WordIndex> &f);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,423 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/*--
|
||||||
|
Vector: checked vector implementation
|
||||||
|
|
||||||
|
Franz Josef Och (30/07/99)
|
||||||
|
--*/
|
||||||
|
#ifndef ARRAY_H_DEFINED
|
||||||
|
#define ARRAY_H_DEFINED
|
||||||
|
#include "mystl.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <functional>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef NDEBUG
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#define Vector vector
|
||||||
|
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
|
||||||
|
{
|
||||||
|
o << "Vector(" << a.size() << "){ ";
|
||||||
|
for(unsigned int iii=0;iii<a.size();iii++)
|
||||||
|
o << " " << iii<< ": " << a[iii]<<" ;";
|
||||||
|
return o << "}\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define ARRAY_DEBUG
|
||||||
|
#define memo_del(a, b)
|
||||||
|
#define memo_new(a)
|
||||||
|
|
||||||
|
template<class T> class Vector
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
T *p;
|
||||||
|
int realSize;
|
||||||
|
int maxWritten;
|
||||||
|
|
||||||
|
void copy(T *a, const T *b, int n);
|
||||||
|
void copy(T *a, T *b, int n);
|
||||||
|
void _expand();
|
||||||
|
public:
|
||||||
|
Vector()
|
||||||
|
: p(0), realSize(0), maxWritten(-1)
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "MAKE ARRAY: " << this<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
Vector(const Vector<T> &x)
|
||||||
|
: p(new T[x.maxWritten+1]), realSize(x.maxWritten+1), maxWritten(x.maxWritten)
|
||||||
|
{
|
||||||
|
memo_new(p);
|
||||||
|
copy(p, x.p, realSize);
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< '\n';
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
explicit Vector(int n)
|
||||||
|
: p(new T[n]), realSize(n), maxWritten(n-1)
|
||||||
|
{
|
||||||
|
memo_new(p);
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
Vector(int n, const T&_init)
|
||||||
|
: p(new T[n]), realSize(n), maxWritten(n-1)
|
||||||
|
{
|
||||||
|
memo_new(p);
|
||||||
|
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
~Vector()
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
delete [] p;
|
||||||
|
memo_del(p, 1);
|
||||||
|
#ifndef NDEBUG
|
||||||
|
p=0;realSize=-1;maxWritten=-1;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector<T>& operator=(const Vector<T>&x)
|
||||||
|
{
|
||||||
|
if( this!= &x )
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
delete [] p;
|
||||||
|
memo_del(p, 1);
|
||||||
|
realSize = x.maxWritten+1;
|
||||||
|
maxWritten = x.maxWritten;
|
||||||
|
p = new T[realSize];
|
||||||
|
memo_new(p);
|
||||||
|
copy(p, x.p, realSize);
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector<T>& operator=(Vector<T>&x)
|
||||||
|
{
|
||||||
|
if( this!= &x )
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
delete [] p;
|
||||||
|
memo_del(p, 1);
|
||||||
|
realSize = x.maxWritten+1;
|
||||||
|
maxWritten = x.maxWritten;
|
||||||
|
p = new T[realSize];
|
||||||
|
memo_new(p);
|
||||||
|
copy(p, x.p, realSize);
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
void allowAccess(int n)
|
||||||
|
{
|
||||||
|
while( realSize<=n )
|
||||||
|
_expand();
|
||||||
|
maxWritten=max(maxWritten, n);
|
||||||
|
assert( maxWritten<realSize );
|
||||||
|
}
|
||||||
|
void resize(int n)
|
||||||
|
{
|
||||||
|
while( realSize<n )
|
||||||
|
_expand();
|
||||||
|
maxWritten=n-1;
|
||||||
|
}
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
resize(0);
|
||||||
|
}
|
||||||
|
void reserve(int n)
|
||||||
|
{
|
||||||
|
int maxOld=maxWritten;
|
||||||
|
resize(n);
|
||||||
|
maxWritten=maxOld;
|
||||||
|
}
|
||||||
|
void sort(int until=-1)
|
||||||
|
{
|
||||||
|
if( until== -1 ) until=size();
|
||||||
|
std::sort(p, p+until);
|
||||||
|
}
|
||||||
|
void invsort(int until=-1)
|
||||||
|
{
|
||||||
|
if( until== -1 ) until=size();
|
||||||
|
std::sort(p, p+until, greater<T>());
|
||||||
|
}
|
||||||
|
void init(int n, const T&_init)
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
delete []p;
|
||||||
|
memo_del(p, 1);
|
||||||
|
p=new T[n];
|
||||||
|
memo_new(p);
|
||||||
|
realSize=n;
|
||||||
|
maxWritten=n-1;
|
||||||
|
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
inline unsigned int size() const
|
||||||
|
{assert( maxWritten<realSize );
|
||||||
|
return maxWritten+1;}
|
||||||
|
inline int low() const
|
||||||
|
{ return 0; }
|
||||||
|
inline int high() const
|
||||||
|
{ return maxWritten; }
|
||||||
|
int findMax() const;
|
||||||
|
int findMin() const;
|
||||||
|
void errorAccess(int n) const;
|
||||||
|
inline T*getPointerToData(){return p;}
|
||||||
|
inline T*begin(){return p;}
|
||||||
|
inline T*end(){return p+maxWritten+1;}
|
||||||
|
inline T& operator[](int n)
|
||||||
|
{
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if( n<0 || n>maxWritten )
|
||||||
|
errorAccess(n);
|
||||||
|
#endif
|
||||||
|
return p[n];
|
||||||
|
}
|
||||||
|
inline const T& operator[](int n) const
|
||||||
|
{
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if(n<0 || n>maxWritten )
|
||||||
|
errorAccess(n);
|
||||||
|
#endif
|
||||||
|
return p[n];
|
||||||
|
}
|
||||||
|
inline const T& get(int n) const
|
||||||
|
{
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if(n<0 || n>maxWritten )
|
||||||
|
errorAccess(n);
|
||||||
|
#endif
|
||||||
|
return p[n];
|
||||||
|
}
|
||||||
|
const T&top(int n=0) const
|
||||||
|
{return (*this)[maxWritten-n];}
|
||||||
|
T&top(int n=0)
|
||||||
|
{return (*this)[maxWritten-n];}
|
||||||
|
const T&back(int n=0) const
|
||||||
|
{return (*this)[maxWritten-n];}
|
||||||
|
T&back(int n=0)
|
||||||
|
{return (*this)[maxWritten-n];}
|
||||||
|
T&push_back(const T&x)
|
||||||
|
{
|
||||||
|
allowAccess(maxWritten+1);
|
||||||
|
(*this)[maxWritten]=x;
|
||||||
|
return top();
|
||||||
|
}
|
||||||
|
bool writeTo(ostream&out) const
|
||||||
|
{
|
||||||
|
out << "Vector ";
|
||||||
|
out << size() << " ";
|
||||||
|
//out << a << '\n';
|
||||||
|
for(int iv=0;iv<=maxWritten;iv++)
|
||||||
|
{
|
||||||
|
writeOb(out, (*this)[iv]);
|
||||||
|
out << '\n';
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
bool readFrom(istream&in)
|
||||||
|
{
|
||||||
|
string s;
|
||||||
|
if( !in )
|
||||||
|
{
|
||||||
|
cerr << "ERROR(Vector): file cannot be opened.\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
in >> s;
|
||||||
|
if( !(s=="Vector") )
|
||||||
|
{
|
||||||
|
cerr << "ERROR(Vector): Vector!='"<<s<<"'\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int biggest;
|
||||||
|
in >> biggest;
|
||||||
|
// in >> a;
|
||||||
|
resize(biggest);
|
||||||
|
for(int iv=0;iv<size();iv++)
|
||||||
|
{
|
||||||
|
readOb(in, (*this)[iv]);
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T> bool operator==(const Vector<T> &x, const Vector<T> &y)
|
||||||
|
{
|
||||||
|
if( &x == &y )
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( y.size()!=x.size() )
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(unsigned int iii=0;iii<x.size();iii++)
|
||||||
|
if( !(x[iii]==y[iii]) )
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class T> bool operator!=(const Vector<T> &x, const Vector<T> &y)
|
||||||
|
{
|
||||||
|
return !(x==y);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> bool operator<(const Vector<T> &x, const Vector<T> &y)
|
||||||
|
{
|
||||||
|
if( &x == &y )
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( y.size()<x.size() )
|
||||||
|
return !(y<x);
|
||||||
|
for(int iii=0;iii<x.size();iii++)
|
||||||
|
{
|
||||||
|
assert( iii!=y.size() );
|
||||||
|
if( x[iii]<y[iii] )
|
||||||
|
return 1;
|
||||||
|
else if( y[iii]<x[iii] )
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return x.size()!=y.size();//??
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class T> void Vector<T>:: errorAccess(int n) const
|
||||||
|
{
|
||||||
|
cerr << "ERROR: Access to array element " << n
|
||||||
|
<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
|
||||||
|
cout << "ERROR: Access to array element " << n
|
||||||
|
<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
|
||||||
|
assert(0);
|
||||||
|
#ifndef DEBUG
|
||||||
|
abort();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
|
||||||
|
{
|
||||||
|
o << "Vector(" << a.size() << "){ ";
|
||||||
|
for(unsigned int iii=0;iii<a.size();iii++)
|
||||||
|
o << " " << iii<< ": " << a[iii]<<" ;";
|
||||||
|
return o << "}\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> istream& operator>>(istream&in, Vector<T>&)
|
||||||
|
{return in;}
|
||||||
|
|
||||||
|
template<class T> int Hash(const Vector<T>&a)
|
||||||
|
{
|
||||||
|
int n=0;
|
||||||
|
for(int iii=0;iii<a.size();iii++)
|
||||||
|
n+=Hash(a[iii])*(iii+1);
|
||||||
|
return n+a.size()*47;
|
||||||
|
}
|
||||||
|
template<class T> void Vector<T>::copy(T *aa, const T *bb, int n)
|
||||||
|
{
|
||||||
|
for(int iii=0;iii<n;iii++)
|
||||||
|
aa[iii]=bb[iii];
|
||||||
|
}
|
||||||
|
template<class T> void Vector<T>::copy(T *aa, T *bb, int n)
|
||||||
|
{
|
||||||
|
for(int iii=0;iii<n;iii++)
|
||||||
|
aa[iii]=bb[iii];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> void Vector<T>::_expand()
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
T *oldp=p;
|
||||||
|
int oldsize=realSize;
|
||||||
|
realSize=realSize*2+1;
|
||||||
|
p=new T[realSize];
|
||||||
|
memo_new(p);
|
||||||
|
copy(p, oldp, oldsize);
|
||||||
|
delete [] oldp;
|
||||||
|
memo_del(oldp, 1);
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> int Vector<T>::findMax() const
|
||||||
|
{
|
||||||
|
if( size()==0 )
|
||||||
|
return -1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int maxPos=0;
|
||||||
|
for(int iii=1;iii<size();iii++)
|
||||||
|
if( (*this)[maxPos]<(*this)[iii] )
|
||||||
|
maxPos=iii;
|
||||||
|
return maxPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class T> int Vector<T>::findMin() const
|
||||||
|
{
|
||||||
|
if( size()==0 )
|
||||||
|
return -1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int minPos=0;
|
||||||
|
for(int iii=1;iii<size();iii++)
|
||||||
|
if( (*this)[iii]<(*this)[minPos] )
|
||||||
|
minPos=iii;
|
||||||
|
return minPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,103 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef WordClasses_h_DEFINED
|
||||||
|
#define WordClasses_h_DEFINED
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <set>
|
||||||
|
#include "vocab.h"
|
||||||
|
|
||||||
|
class WordClasses
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
map<string,string> Sw2c;
|
||||||
|
map<string,int> Sc2int;
|
||||||
|
Vector<string> Sint2c;
|
||||||
|
Vector<int> w2c;
|
||||||
|
unsigned int classes;
|
||||||
|
public:
|
||||||
|
WordClasses()
|
||||||
|
: classes(1)
|
||||||
|
{
|
||||||
|
Sint2c.push_back("0");
|
||||||
|
Sc2int["0"]=0;
|
||||||
|
}
|
||||||
|
template<class MAPPER> bool read(istream&in,const MAPPER&m,const vcbList& vcb)
|
||||||
|
{
|
||||||
|
string sline;
|
||||||
|
int maxword=0;
|
||||||
|
int readWord=0, putWord=0;
|
||||||
|
while(getline(in,sline))
|
||||||
|
{
|
||||||
|
readWord ++;
|
||||||
|
string word,wclass;
|
||||||
|
istrstream iline(sline.c_str());
|
||||||
|
iline>>word>>wclass;
|
||||||
|
|
||||||
|
if( !Sc2int.count(wclass) )
|
||||||
|
{
|
||||||
|
Sc2int[wclass]=classes++;
|
||||||
|
Sint2c.push_back(wclass);
|
||||||
|
assert(classes==Sint2c.size());
|
||||||
|
}
|
||||||
|
if(vcb.has_word(word)){
|
||||||
|
maxword=max(m(word),maxword);
|
||||||
|
assert(Sw2c.count(word)==0);
|
||||||
|
Sw2c[word]=wclass;
|
||||||
|
putWord++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w2c=Vector<int>(maxword+1,0);
|
||||||
|
for(map<string,string>::const_iterator i=Sw2c.begin();i!=Sw2c.end();++i)
|
||||||
|
w2c[m(i->first)]=Sc2int[i->second];
|
||||||
|
cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl;
|
||||||
|
cout << "Actual number of read words: " << readWord << " stored words: " << putWord << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
int getClass(int w)const
|
||||||
|
{
|
||||||
|
if(w>=0&&int(w)<int(w2c.size()) )
|
||||||
|
return w2c[w];
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int operator()(const string&x)const
|
||||||
|
{
|
||||||
|
if( Sc2int.count(x) )
|
||||||
|
return Sc2int.find(x)->second;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cerr << "WARNING: class " << x << " not found.\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
string classString(unsigned int cnr)const
|
||||||
|
{
|
||||||
|
if( cnr<Sint2c.size())
|
||||||
|
return Sint2c[cnr];
|
||||||
|
else
|
||||||
|
return string("0");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/*--
|
||||||
|
alignment: 'checked' alignment representation with automatic calculation
|
||||||
|
of fertilities
|
||||||
|
Franz Josef Och (30/07/99)
|
||||||
|
--*/
|
||||||
|
#include "alignment.h"
|
||||||
|
|
||||||
|
ostream&operator<<(ostream&out, const alignment&a)
|
||||||
|
{
|
||||||
|
int m=a.a.size()-1,l=a.f.size()-1;
|
||||||
|
out << "AL(l:"<<l<<",m:"<<m<<")(a: ";
|
||||||
|
for(int j=1;j<=m;j++)out << a(j) << ' ';
|
||||||
|
out << ")(fert: ";
|
||||||
|
for(int i=0;i<=l;i++)out << a.fert(i) << ' ';
|
||||||
|
return out << ") c:"<<"\n";
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,227 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/*--
|
||||||
|
alignment: 'checked' alignment representation with autom. calc. of fertilities
|
||||||
|
Franz Josef Och (30/07/99)
|
||||||
|
--*/
|
||||||
|
#ifndef alignment_h_fjo_defined
|
||||||
|
#define alignment_h_fjo_defined
|
||||||
|
#include "Vector.h"
|
||||||
|
#include <cassert>
|
||||||
|
#include "defs.h"
|
||||||
|
#include "myassert.h"
|
||||||
|
|
||||||
|
class al_struct
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
al_struct()
|
||||||
|
: prev(0),next(0){}
|
||||||
|
PositionIndex prev,next;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class alignment
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
Vector<PositionIndex> a;
|
||||||
|
Vector<PositionIndex> positionSum,f;
|
||||||
|
public:
|
||||||
|
Vector<PositionIndex> als_i;
|
||||||
|
Vector<al_struct> als_j;
|
||||||
|
PositionIndex l,m;
|
||||||
|
alignment()
|
||||||
|
{}
|
||||||
|
alignment(PositionIndex _l, PositionIndex _m)
|
||||||
|
: a(_m+1, (PositionIndex)0),
|
||||||
|
positionSum(_l+1, (PositionIndex)0), f(_l+1, (PositionIndex)0), als_i(_l+1,0),als_j(_m+1),l(_l), m(_m)
|
||||||
|
{
|
||||||
|
f[0]=m;
|
||||||
|
for(PositionIndex j=1;j<=m;j++)
|
||||||
|
{
|
||||||
|
if( j>1 )
|
||||||
|
als_j[j].prev= j-1;
|
||||||
|
if( j<m )
|
||||||
|
als_j[j].next= j+1;
|
||||||
|
}
|
||||||
|
als_i[0]=1;
|
||||||
|
}
|
||||||
|
PositionIndex get_l()const
|
||||||
|
{return l;}
|
||||||
|
PositionIndex get_m()const
|
||||||
|
{return m;}
|
||||||
|
void doMove(int i,int j)
|
||||||
|
{
|
||||||
|
set(j,i);
|
||||||
|
}
|
||||||
|
void doSwap(int j1,int j2)
|
||||||
|
{
|
||||||
|
int aj1=a[j1],aj2=a[j2];
|
||||||
|
set(j1,aj2);
|
||||||
|
set(j2,aj1);
|
||||||
|
}
|
||||||
|
void set(PositionIndex j, PositionIndex aj)
|
||||||
|
{
|
||||||
|
PositionIndex old_aj=a[j];
|
||||||
|
massert(j<a.size());massert(aj<f.size());
|
||||||
|
massert(old_aj<f.size());massert(f[old_aj]>0);
|
||||||
|
massert(j>0);
|
||||||
|
positionSum[old_aj]-=j;
|
||||||
|
// ausfuegen
|
||||||
|
PositionIndex prev=als_j[j].prev;
|
||||||
|
PositionIndex next=als_j[j].next;
|
||||||
|
if( next )
|
||||||
|
als_j[next].prev=prev;
|
||||||
|
if( prev )
|
||||||
|
als_j[prev].next=next;
|
||||||
|
else
|
||||||
|
als_i[old_aj]=next;
|
||||||
|
|
||||||
|
// neue Position suchen
|
||||||
|
PositionIndex lfd=als_i[aj],llfd=0;
|
||||||
|
while( lfd && lfd<j )
|
||||||
|
lfd = als_j[llfd=lfd].next;
|
||||||
|
|
||||||
|
// einfuegen
|
||||||
|
als_j[j].prev=llfd;
|
||||||
|
als_j[j].next=lfd;
|
||||||
|
if( llfd )
|
||||||
|
als_j[llfd].next=j;
|
||||||
|
else
|
||||||
|
als_i[aj]=j;
|
||||||
|
if( lfd )
|
||||||
|
als_j[lfd].prev=j;
|
||||||
|
|
||||||
|
f[old_aj]--;
|
||||||
|
positionSum[aj]+=j;
|
||||||
|
f[aj]++;
|
||||||
|
a[j]=aj;
|
||||||
|
}
|
||||||
|
const Vector<PositionIndex>& getAlignment() const
|
||||||
|
{return a ;}
|
||||||
|
PositionIndex get_al(PositionIndex j)const
|
||||||
|
{
|
||||||
|
massert(j<a.size());
|
||||||
|
return a[j];
|
||||||
|
}
|
||||||
|
PositionIndex operator()(PositionIndex j)const
|
||||||
|
{
|
||||||
|
massert(j<a.size());
|
||||||
|
return a[j];
|
||||||
|
}
|
||||||
|
PositionIndex fert(PositionIndex i)const
|
||||||
|
{
|
||||||
|
massert(i<f.size());
|
||||||
|
return f[i];
|
||||||
|
}
|
||||||
|
PositionIndex get_head(PositionIndex i)const
|
||||||
|
{
|
||||||
|
massert( als_i[i]==_get_head(i) );
|
||||||
|
return als_i[i];
|
||||||
|
}
|
||||||
|
PositionIndex get_center(PositionIndex i)const
|
||||||
|
{
|
||||||
|
if( i==0 )return 0;
|
||||||
|
massert(((positionSum[i]+f[i]-1)/f[i]==_get_center(i)));
|
||||||
|
return (positionSum[i]+f[i]-1)/f[i];
|
||||||
|
}
|
||||||
|
PositionIndex _get_head(PositionIndex i)const
|
||||||
|
{
|
||||||
|
if( fert(i)==0 )return 0;
|
||||||
|
for(PositionIndex j=1;j<=m;j++)
|
||||||
|
if( a[j]==i )
|
||||||
|
return j;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
PositionIndex _get_center(PositionIndex i)const
|
||||||
|
{
|
||||||
|
if( i==0 )return 0;
|
||||||
|
massert(fert(i));
|
||||||
|
PositionIndex sum=0;
|
||||||
|
for(PositionIndex j=1;j<=m;j++)
|
||||||
|
if( a[j]==i )
|
||||||
|
sum+=j;
|
||||||
|
return (sum+fert(i)-1)/fert(i);
|
||||||
|
}
|
||||||
|
PositionIndex prev_cept(PositionIndex i)const
|
||||||
|
{
|
||||||
|
if( i==0 )return 0;
|
||||||
|
PositionIndex k=i-1;
|
||||||
|
while(k&&fert(k)==0)
|
||||||
|
k--;
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
PositionIndex next_cept(PositionIndex i)const
|
||||||
|
{
|
||||||
|
PositionIndex k=i+1;
|
||||||
|
while(k<l+1&&fert(k)==0)
|
||||||
|
k++;
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
PositionIndex prev_in_cept(PositionIndex j)const
|
||||||
|
{
|
||||||
|
//PositionIndex k=j-1;
|
||||||
|
//while(k&&a[k]!=a[j])
|
||||||
|
//k--;
|
||||||
|
//assert( als_j[j].prev==k );
|
||||||
|
//assert(k);
|
||||||
|
//return k;
|
||||||
|
massert(als_j[j].prev==0||a[als_j[j].prev]==a[j]);
|
||||||
|
return als_j[j].prev;
|
||||||
|
}
|
||||||
|
friend ostream &operator<<(ostream&out, const alignment&a);
|
||||||
|
friend bool operator==(const alignment&a, const alignment&b)
|
||||||
|
{
|
||||||
|
massert(a.a.size()==b.a.size());
|
||||||
|
for(PositionIndex j=1;j<=a.get_m();j++)
|
||||||
|
if(a(j)!=b(j))
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
friend bool operator<(const alignment&x, const alignment&y)
|
||||||
|
{
|
||||||
|
massert(x.get_m()==y.get_m());
|
||||||
|
for(PositionIndex j=1;j<=x.get_m();j++)
|
||||||
|
if( x(j)<y(j) )
|
||||||
|
return 1;
|
||||||
|
else if( y(j)<x(j) )
|
||||||
|
return 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
friend int differences(const alignment&x, const alignment&y){
|
||||||
|
int count=0;
|
||||||
|
massert(x.get_m()==y.get_m());
|
||||||
|
for(PositionIndex j=1;j<=x.get_m();j++)
|
||||||
|
count += (x(j)!=y(j));
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
bool valid()const
|
||||||
|
{
|
||||||
|
if( 2*f[0]>m )
|
||||||
|
return 0;
|
||||||
|
for(unsigned int i=1;i<=l;i++)
|
||||||
|
if( f[i]>=MAX_FERTILITY )
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
friend class transpair_model5;
|
||||||
|
};
|
||||||
|
#endif
|
@ -0,0 +1,649 @@
|
|||||||
|
|
||||||
|
// $Id: cmd.c 1307 2007-03-14 22:22:36Z hieuhoang1972 $
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "cmd.h"
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
# define popen _popen
|
||||||
|
# define pclose _pclose
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static Enum_T BoolEnum[] = {
|
||||||
|
{ "FALSE", 0 },
|
||||||
|
{ "TRUE", 1 },
|
||||||
|
{ 0, 0 }
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef NEEDSTRDUP
|
||||||
|
char *strdup();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define FALSE 0
|
||||||
|
#define TRUE 1
|
||||||
|
|
||||||
|
#define LINSIZ 10240
|
||||||
|
#define MAXPARAM 256
|
||||||
|
|
||||||
|
static char *GetLine(),
|
||||||
|
**str2array();
|
||||||
|
static int Scan(),
|
||||||
|
SetParam(),
|
||||||
|
SetEnum(),
|
||||||
|
SetSubrange(),
|
||||||
|
SetStrArray(),
|
||||||
|
SetGte(),
|
||||||
|
SetLte(),
|
||||||
|
CmdError(),
|
||||||
|
EnumError(),
|
||||||
|
SubrangeError(),
|
||||||
|
GteError(),
|
||||||
|
LteError(),
|
||||||
|
PrintParam(),
|
||||||
|
PrintEnum(),
|
||||||
|
PrintStrArray();
|
||||||
|
|
||||||
|
static Cmd_T cmds[MAXPARAM+1];
|
||||||
|
static char *SepString = " \t\n";
|
||||||
|
|
||||||
|
#if defined(__STDC__)
|
||||||
|
#include <stdarg.h>
|
||||||
|
int DeclareParams(char *ParName, ...)
|
||||||
|
#else
|
||||||
|
#include <varargs.h>
|
||||||
|
int DeclareParams(ParName, va_alist)
|
||||||
|
char *ParName;
|
||||||
|
va_dcl
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
va_list args;
|
||||||
|
static int ParamN = 0;
|
||||||
|
int j,
|
||||||
|
c;
|
||||||
|
char *s;
|
||||||
|
|
||||||
|
#if defined(__STDC__)
|
||||||
|
va_start(args, ParName);
|
||||||
|
#else
|
||||||
|
va_start(args);
|
||||||
|
#endif
|
||||||
|
for(;ParName;) {
|
||||||
|
if(ParamN==MAXPARAM) {
|
||||||
|
fprintf(stderr, "Too many parameters !!\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
|
||||||
|
;
|
||||||
|
if(!c) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Warning: parameter \"%s\" declared twice.\n",
|
||||||
|
ParName);
|
||||||
|
}
|
||||||
|
for(c=ParamN; c>j; c--) {
|
||||||
|
cmds[c] = cmds[c-1];
|
||||||
|
}
|
||||||
|
cmds[j].Name = ParName;
|
||||||
|
cmds[j].Type = va_arg(args, int);
|
||||||
|
cmds[j].Val = va_arg(args, void *);
|
||||||
|
switch(cmds[j].Type) {
|
||||||
|
case CMDENUMTYPE: /* get the pointer to Enum_T struct */
|
||||||
|
cmds[j].p = va_arg(args, void *);
|
||||||
|
break;
|
||||||
|
case CMDSUBRANGETYPE: /* get the two extremes */
|
||||||
|
cmds[j].p = (void*) calloc(2, sizeof(int));
|
||||||
|
((int*)cmds[j].p)[0] = va_arg(args, int);
|
||||||
|
((int*)cmds[j].p)[1] = va_arg(args, int);
|
||||||
|
break;
|
||||||
|
case CMDGTETYPE: /* get lower or upper bound */
|
||||||
|
case CMDLTETYPE:
|
||||||
|
cmds[j].p = (void*) calloc(1, sizeof(int));
|
||||||
|
((int*)cmds[j].p)[0] = va_arg(args, int);
|
||||||
|
break;
|
||||||
|
case CMDSTRARRAYTYPE: /* get the separators string */
|
||||||
|
cmds[j].p = (s=va_arg(args, char*))
|
||||||
|
? (void*)strdup(s) : 0;
|
||||||
|
break;
|
||||||
|
case CMDBOOLTYPE:
|
||||||
|
cmds[j].Type = CMDENUMTYPE;
|
||||||
|
cmds[j].p = BoolEnum;
|
||||||
|
break;
|
||||||
|
case CMDDOUBLETYPE: /* nothing else is needed */
|
||||||
|
case CMDINTTYPE:
|
||||||
|
case CMDSTRINGTYPE:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
|
||||||
|
"DeclareParam()", "Unknown Type",
|
||||||
|
cmds[j].Type, "for parameter", cmds[j].Name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
ParamN++;
|
||||||
|
ParName = va_arg(args, char *);
|
||||||
|
}
|
||||||
|
cmds[ParamN].Name = NULL;
|
||||||
|
va_end(args);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int GetParams(n, a, CmdFileName)
|
||||||
|
int *n;
|
||||||
|
char ***a;
|
||||||
|
char *CmdFileName;
|
||||||
|
{
|
||||||
|
char *Line,
|
||||||
|
*ProgName;
|
||||||
|
int argc = *n;
|
||||||
|
char **argv = *a,
|
||||||
|
*s;
|
||||||
|
FILE *fp;
|
||||||
|
int IsPipe;
|
||||||
|
|
||||||
|
#ifdef MSDOS
|
||||||
|
#define PATHSEP '\\'
|
||||||
|
char *dot = NULL;
|
||||||
|
#else
|
||||||
|
#define PATHSEP '/'
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(!(Line=malloc(LINSIZ))) {
|
||||||
|
fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
|
||||||
|
LINSIZ);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if((ProgName=strrchr(*argv, PATHSEP))) {
|
||||||
|
++ProgName;
|
||||||
|
} else {
|
||||||
|
ProgName = *argv;
|
||||||
|
}
|
||||||
|
#ifdef MSDOS
|
||||||
|
if(dot=strchr(ProgName, '.')) *dot = 0;
|
||||||
|
#endif
|
||||||
|
--argc;
|
||||||
|
++argv;
|
||||||
|
for(;;) {
|
||||||
|
if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
|
||||||
|
CmdFileName = argv[0]+2;
|
||||||
|
++argv;
|
||||||
|
--argc;
|
||||||
|
}
|
||||||
|
if(!CmdFileName) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
IsPipe = !strncmp(CmdFileName, "@@", 2);
|
||||||
|
fp = IsPipe
|
||||||
|
? popen(CmdFileName+2, "r")
|
||||||
|
: strcmp(CmdFileName, "-")
|
||||||
|
? fopen(CmdFileName, "r")
|
||||||
|
: stdin;
|
||||||
|
if(!fp) {
|
||||||
|
fprintf(stderr, "Unable to open command file %s\n",
|
||||||
|
CmdFileName);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
|
||||||
|
if(Scan(ProgName, cmds, Line)) {
|
||||||
|
CmdError(Line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(fp!=stdin) {
|
||||||
|
if(IsPipe) pclose(fp); else fclose(fp);
|
||||||
|
}
|
||||||
|
CmdFileName = NULL;
|
||||||
|
}
|
||||||
|
while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
|
||||||
|
*s = ' ';
|
||||||
|
sprintf(Line, "%s/%s", ProgName, *argv+1);
|
||||||
|
*s = '=';
|
||||||
|
if(Scan(ProgName, cmds, Line)) CmdError(*argv);
|
||||||
|
--argc;
|
||||||
|
++argv;
|
||||||
|
}
|
||||||
|
*n = argc;
|
||||||
|
*a = argv;
|
||||||
|
#ifdef MSDOS
|
||||||
|
if(dot) *dot = '.';
|
||||||
|
#endif
|
||||||
|
free(Line);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int PrintParams(ValFlag, fp)
|
||||||
|
int ValFlag;
|
||||||
|
FILE *fp;
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
fflush(fp);
|
||||||
|
if(ValFlag) {
|
||||||
|
fprintf(fp, "Parameters Values:\n");
|
||||||
|
} else {
|
||||||
|
fprintf(fp, "Parameters:\n");
|
||||||
|
}
|
||||||
|
for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
fflush(fp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int SPrintParams(a, pfx)
|
||||||
|
char ***a,
|
||||||
|
*pfx;
|
||||||
|
{
|
||||||
|
int l,
|
||||||
|
n;
|
||||||
|
Cmd_T *cmd;
|
||||||
|
|
||||||
|
if(!pfx) pfx="";
|
||||||
|
l = strlen(pfx);
|
||||||
|
for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
|
||||||
|
a[0] = calloc(n, sizeof(char*));
|
||||||
|
for(n=0, cmd=cmds; cmd->Name; cmd++) {
|
||||||
|
if(!cmd->ArgStr) continue;
|
||||||
|
a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
|
||||||
|
sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int CmdError(opt)
|
||||||
|
char *opt;
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Invalid option \"%s\"\n", opt);
|
||||||
|
fprintf(stderr, "This program expectes the following parameters:\n");
|
||||||
|
PrintParams(FALSE, stderr);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int PrintParam(cmd, ValFlag, fp)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
int ValFlag;
|
||||||
|
FILE *fp;
|
||||||
|
{
|
||||||
|
fprintf(fp, "%4s", "");
|
||||||
|
switch(cmd->Type) {
|
||||||
|
case CMDDOUBLETYPE:
|
||||||
|
fprintf(fp, "%s", cmd->Name);
|
||||||
|
if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
break;
|
||||||
|
case CMDENUMTYPE:
|
||||||
|
PrintEnum(cmd, ValFlag, fp);
|
||||||
|
break;
|
||||||
|
case CMDINTTYPE:
|
||||||
|
case CMDSUBRANGETYPE:
|
||||||
|
case CMDGTETYPE:
|
||||||
|
case CMDLTETYPE:
|
||||||
|
fprintf(fp, "%s", cmd->Name);
|
||||||
|
if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
break;
|
||||||
|
case CMDSTRINGTYPE:
|
||||||
|
fprintf(fp, "%s", cmd->Name);
|
||||||
|
if(ValFlag) {
|
||||||
|
if(*(char **)cmd->Val) {
|
||||||
|
fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
|
||||||
|
} else {
|
||||||
|
fprintf(fp, ": %s", "NULL");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
break;
|
||||||
|
case CMDSTRARRAYTYPE:
|
||||||
|
PrintStrArray(cmd, ValFlag, fp);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
|
||||||
|
"PrintParam",
|
||||||
|
"Unknown Type",
|
||||||
|
cmd->Type,
|
||||||
|
"for parameter",
|
||||||
|
cmd->Name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *GetLine(fp, n, Line)
|
||||||
|
FILE *fp;
|
||||||
|
int n;
|
||||||
|
char *Line;
|
||||||
|
{
|
||||||
|
int j,
|
||||||
|
l,
|
||||||
|
offs=0;
|
||||||
|
|
||||||
|
for(;;) {
|
||||||
|
if(!fgets(Line+offs, n-offs, fp)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if(Line[offs]=='#') continue;
|
||||||
|
l = strlen(Line+offs)-1;
|
||||||
|
Line[offs+l] = 0;
|
||||||
|
for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
|
||||||
|
;
|
||||||
|
if(l<1) continue;
|
||||||
|
if(j > offs) {
|
||||||
|
char *s = Line+offs,
|
||||||
|
*q = Line+j;
|
||||||
|
|
||||||
|
while((*s++=*q++))
|
||||||
|
;
|
||||||
|
}
|
||||||
|
if(Line[offs+l-1]=='\\') {
|
||||||
|
offs += l;
|
||||||
|
Line[offs-1] = ' ';
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Line;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int Scan(ProgName, cmds, Line)
|
||||||
|
char *ProgName,
|
||||||
|
*Line;
|
||||||
|
Cmd_T *cmds;
|
||||||
|
{
|
||||||
|
char *q,
|
||||||
|
*p;
|
||||||
|
int i,
|
||||||
|
hl,
|
||||||
|
HasToMatch = FALSE,
|
||||||
|
c0,
|
||||||
|
c;
|
||||||
|
|
||||||
|
p = Line+strspn(Line, SepString);
|
||||||
|
if(!(hl=strcspn(p, SepString))) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if((q=strchr(p, '/')) && q-p<hl) {
|
||||||
|
*q = 0;
|
||||||
|
if(strcmp(p, ProgName)) {
|
||||||
|
*q = '/';
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*q = '/';
|
||||||
|
HasToMatch=TRUE;
|
||||||
|
p = q+1;
|
||||||
|
}
|
||||||
|
if(!(hl = strcspn(p, SepString))) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
c0 = p[hl];
|
||||||
|
p[hl] = 0;
|
||||||
|
for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
|
||||||
|
;
|
||||||
|
p[hl] = c0;
|
||||||
|
if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
|
||||||
|
return HasToMatch && c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int SetParam(cmd, s)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
char *s;
|
||||||
|
{
|
||||||
|
if(!*s && cmd->Type != CMDSTRINGTYPE) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: No value specified for parameter \"%s\"\n",
|
||||||
|
cmd->Name);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
switch(cmd->Type) {
|
||||||
|
case CMDDOUBLETYPE:
|
||||||
|
if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Float value required for parameter \"%s\"\n",
|
||||||
|
cmd->Name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CMDENUMTYPE:
|
||||||
|
SetEnum(cmd, s);
|
||||||
|
break;
|
||||||
|
case CMDINTTYPE:
|
||||||
|
if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Integer value required for parameter \"%s\"\n",
|
||||||
|
cmd->Name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CMDSTRINGTYPE:
|
||||||
|
*(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
|
||||||
|
? strdup(s)
|
||||||
|
: 0;
|
||||||
|
break;
|
||||||
|
case CMDSTRARRAYTYPE:
|
||||||
|
SetStrArray(cmd, s);
|
||||||
|
break;
|
||||||
|
case CMDGTETYPE:
|
||||||
|
SetGte(cmd, s);
|
||||||
|
break;
|
||||||
|
case CMDLTETYPE:
|
||||||
|
SetLte(cmd, s);
|
||||||
|
break;
|
||||||
|
case CMDSUBRANGETYPE:
|
||||||
|
SetSubrange(cmd, s);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
|
||||||
|
"SetParam",
|
||||||
|
"Unknown Type",
|
||||||
|
cmd->Type,
|
||||||
|
"for parameter",
|
||||||
|
cmd->Name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
cmd->ArgStr = strdup(s);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int SetEnum(cmd, s)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
char *s;
|
||||||
|
{
|
||||||
|
Enum_T *en;
|
||||||
|
|
||||||
|
for(en=(Enum_T *)cmd->p; en->Name; en++) {
|
||||||
|
if(*en->Name && !strcmp(s, en->Name)) {
|
||||||
|
*(int *) cmd->Val = en->Idx;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return EnumError(cmd, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int SetSubrange(cmd, s)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
char *s;
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if(sscanf(s, "%d", &n)!=1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Integer value required for parameter \"%s\"\n",
|
||||||
|
cmd->Name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
|
||||||
|
return SubrangeError(cmd, n);
|
||||||
|
}
|
||||||
|
*(int *)cmd->Val = n;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int SetGte(cmd, s)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
char *s;
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if(sscanf(s, "%d", &n)!=1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Integer value required for parameter \"%s\"\n",
|
||||||
|
cmd->Name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if(n<*(int *)cmd->p) {
|
||||||
|
return GteError(cmd, n);
|
||||||
|
}
|
||||||
|
*(int *)cmd->Val = n;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int SetStrArray(cmd, s)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
char *s;
|
||||||
|
{
|
||||||
|
*(char***)cmd->Val = str2array(s, (char*)cmd->p);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int SetLte(cmd, s)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
char *s;
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if(sscanf(s, "%d", &n)!=1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Integer value required for parameter \"%s\"\n",
|
||||||
|
cmd->Name);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if(n > *(int *)cmd->p) {
|
||||||
|
return LteError(cmd, n);
|
||||||
|
}
|
||||||
|
*(int *)cmd->Val = n;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int EnumError(cmd, s)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
char *s;
|
||||||
|
{
|
||||||
|
Enum_T *en;
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
"Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
|
||||||
|
fprintf(stderr, "Valid values are:\n");
|
||||||
|
for(en=(Enum_T *)cmd->p; en->Name; en++) {
|
||||||
|
if(*en->Name) {
|
||||||
|
fprintf(stderr, " %s\n", en->Name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int GteError(cmd, n)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
int n;
|
||||||
|
{
|
||||||
|
fprintf(stderr,
|
||||||
|
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
|
||||||
|
fprintf(stderr, "Valid values must be greater than or equal to %d\n",
|
||||||
|
*(int *)cmd->p);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int LteError(cmd, n)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
int n;
|
||||||
|
{
|
||||||
|
fprintf(stderr,
|
||||||
|
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
|
||||||
|
fprintf(stderr, "Valid values must be less than or equal to %d\n",
|
||||||
|
*(int *)cmd->p);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int SubrangeError(cmd, n)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
int n;
|
||||||
|
{
|
||||||
|
fprintf(stderr,
|
||||||
|
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
|
||||||
|
fprintf(stderr, "Valid values range from %d to %d\n",
|
||||||
|
*(int *)cmd->p, *((int *)cmd->p+1));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int PrintEnum(cmd, ValFlag, fp)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
int ValFlag;
|
||||||
|
FILE *fp;
|
||||||
|
{
|
||||||
|
Enum_T *en;
|
||||||
|
|
||||||
|
fprintf(fp, "%s", cmd->Name);
|
||||||
|
if(ValFlag) {
|
||||||
|
for(en=(Enum_T *)cmd->p; en->Name; en++) {
|
||||||
|
if(*en->Name && en->Idx==*(int *)cmd->Val) {
|
||||||
|
fprintf(fp, ": %s", en->Name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int PrintStrArray(cmd, ValFlag, fp)
|
||||||
|
Cmd_T *cmd;
|
||||||
|
int ValFlag;
|
||||||
|
FILE *fp;
|
||||||
|
{
|
||||||
|
char *indent,
|
||||||
|
**s = *(char***)cmd->Val;
|
||||||
|
int l = 4+strlen(cmd->Name);
|
||||||
|
|
||||||
|
fprintf(fp, "%s", cmd->Name);
|
||||||
|
indent = malloc(l+2);
|
||||||
|
memset(indent, ' ', l+1);
|
||||||
|
indent[l+1] = 0;
|
||||||
|
if(ValFlag) {
|
||||||
|
fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
|
||||||
|
if(s) while(*s) {
|
||||||
|
fprintf(fp, "\n%s %s", indent, *s++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(indent);
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char **str2array(s, sep)
|
||||||
|
char *s,
|
||||||
|
*sep;
|
||||||
|
{
|
||||||
|
char *p,
|
||||||
|
**a;
|
||||||
|
int n = 0,
|
||||||
|
l;
|
||||||
|
|
||||||
|
if(!sep) sep = SepString;
|
||||||
|
p = s += strspn(s, sep);
|
||||||
|
while(*p) {
|
||||||
|
p += strcspn(p, sep);
|
||||||
|
p += strspn(p, sep);
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
a = calloc(n+1, sizeof(char *));
|
||||||
|
p = s;
|
||||||
|
n = 0;
|
||||||
|
while(*p) {
|
||||||
|
l = strcspn(p, sep);
|
||||||
|
a[n] = malloc(l+1);
|
||||||
|
memcpy(a[n], p, l);
|
||||||
|
a[n][l] = 0;
|
||||||
|
++n;
|
||||||
|
p += l;
|
||||||
|
p += strspn(p, sep);
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
@ -0,0 +1,51 @@
|
|||||||
|
|
||||||
|
// $Id: cmd.h 1307 2007-03-14 22:22:36Z hieuhoang1972 $
|
||||||
|
|
||||||
|
#if !defined(CMD_H)
|
||||||
|
|
||||||
|
#define CMD_H
|
||||||
|
|
||||||
|
#define CMDDOUBLETYPE 1
|
||||||
|
#define CMDENUMTYPE 2
|
||||||
|
#define CMDINTTYPE 3
|
||||||
|
#define CMDSTRINGTYPE 4
|
||||||
|
#define CMDSUBRANGETYPE 5
|
||||||
|
#define CMDGTETYPE 6
|
||||||
|
#define CMDLTETYPE 7
|
||||||
|
#define CMDSTRARRAYTYPE 8
|
||||||
|
#define CMDBOOLTYPE 9
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char *Name;
|
||||||
|
int Idx;
|
||||||
|
} Enum_T;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int Type;
|
||||||
|
char *Name,
|
||||||
|
*ArgStr;
|
||||||
|
void *Val,
|
||||||
|
*p;
|
||||||
|
} Cmd_T;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__STDC__)
|
||||||
|
int DeclareParams(char *, ...);
|
||||||
|
#else
|
||||||
|
int DeclareParams();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int GetParams(int *n, char ***a,char *CmdFileName),
|
||||||
|
SPrintParams(),
|
||||||
|
PrintParams();
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,315 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "alignment.h"
|
||||||
|
#include "transpair_model3.h"
|
||||||
|
#include <map>
|
||||||
|
#include "collCounts.h"
|
||||||
|
#include "MoveSwapMatrix.h"
|
||||||
|
#include "D5Tables.h"
|
||||||
|
#include "transpair_model5.h"
|
||||||
|
#include "transpair_modelhmm.h"
|
||||||
|
#include "Parameter.h"
|
||||||
|
|
||||||
|
extern float COUNTINCREASE_CUTOFF_AL;
|
||||||
|
// unifies collectCountsOverAlignments and findAlignmentNeighborhood FJO-20/07/99
|
||||||
|
template<class TRANSPAIR> int collectCountsOverNeighborhood(
|
||||||
|
const MoveSwapMatrix<TRANSPAIR>&msc, LogProb ascore,
|
||||||
|
Array2<LogProb,Vector<LogProb> >&dtcount,
|
||||||
|
Array2<LogProb,Vector<LogProb> >&ncount, LogProb&p1count,
|
||||||
|
LogProb&p0count, LogProb&total_count) {
|
||||||
|
int nAl=0;
|
||||||
|
const PositionIndex l=msc.get_l(), m=msc.get_m();
|
||||||
|
Array2<LogProb,Vector<LogProb> > cmove(l+1, m+1), cswap(l+1, m+1);
|
||||||
|
Vector<LogProb> negmove(m+1),negswap(m+1),plus1fert(l+1),minus1fert(l+1);
|
||||||
|
LogProb total_move, total_swap;
|
||||||
|
if (msc.isCenterDeleted()==0) {
|
||||||
|
total_move+=ascore;
|
||||||
|
nAl++;
|
||||||
|
}
|
||||||
|
for (PositionIndex j=1; j<=m; j++) {
|
||||||
|
for (PositionIndex i=0; i<=l; i++) {
|
||||||
|
if (msc(j)!=i && !msc.isDelMove(i, j) ) {
|
||||||
|
double cm = msc.cmove(i, j);
|
||||||
|
if(cm<0)
|
||||||
|
continue;
|
||||||
|
LogProb newscore=ascore*cm;
|
||||||
|
total_move+=newscore;
|
||||||
|
nAl++;
|
||||||
|
cmove(i, j)+=newscore;
|
||||||
|
negmove[j]+=newscore;
|
||||||
|
plus1fert[i]+=newscore;
|
||||||
|
minus1fert[msc(j)]+=newscore;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (PositionIndex j1=1; j1<=m; j1++) {
|
||||||
|
for (PositionIndex j2=j1+1; j2<=m; j2++) {
|
||||||
|
if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
|
||||||
|
double cs = msc.cswap(j1, j2);
|
||||||
|
if(cs < 0){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
LogProb newscore=ascore*cs;
|
||||||
|
total_swap+=newscore;
|
||||||
|
nAl++;
|
||||||
|
cswap(msc(j1), j2)+=newscore;
|
||||||
|
cswap(msc(j2), j1)+=newscore;
|
||||||
|
negswap[j1]+=newscore;
|
||||||
|
negswap[j2]+=newscore;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
total_count+=total_move+total_swap;
|
||||||
|
for (PositionIndex j=1; j<=m; j++)
|
||||||
|
for (PositionIndex i=0; i<=l; i++)
|
||||||
|
dtcount(i, j) += ((i==msc(j)) ? (total_count
|
||||||
|
-(negmove[j]+negswap[j])) : (cswap(i, j)+cmove(i, j)));
|
||||||
|
for (PositionIndex i=1; i<=l; i++) {
|
||||||
|
LogProb temp=minus1fert[i]+plus1fert[i];
|
||||||
|
if (msc.fert(i)<MAX_FERTILITY)
|
||||||
|
ncount(i, msc.fert(i))+=total_count-temp;
|
||||||
|
if (msc.fert(i)>0&&msc.fert(i)-1<MAX_FERTILITY)
|
||||||
|
ncount(i, msc.fert(i)-1)+=minus1fert[i];
|
||||||
|
else if (minus1fert[i]!=0.0)
|
||||||
|
cerr << "ERROR: M1Fa: " << minus1fert[i] << ' ' << i << ' '
|
||||||
|
<< msc.fert(i)<< endl;
|
||||||
|
if (msc.fert(i)+1<MAX_FERTILITY)
|
||||||
|
ncount(i, msc.fert(i)+1)+=plus1fert[i];
|
||||||
|
}
|
||||||
|
LogProb temp=minus1fert[0]+plus1fert[0];
|
||||||
|
p1count += (total_count-temp)*(LogProb)msc.fert(0);
|
||||||
|
p0count += (total_count-temp)*(LogProb)(m-2*msc.fert(0));
|
||||||
|
if (msc.fert(0)>0) {
|
||||||
|
p1count += (minus1fert[0])*(LogProb)(msc.fert(0)-1);
|
||||||
|
p0count += (minus1fert[0])*(LogProb)(m-2*(msc.fert(0)-1));
|
||||||
|
} else if (minus1fert[0]!=0.0)
|
||||||
|
cerr << "ERROR: M1Fb: " << minus1fert[0] << endl;
|
||||||
|
if (int(m)-2*(int(msc.fert(0))+1)>=0) {
|
||||||
|
p1count += (plus1fert[0])*(LogProb)(msc.fert(0)+1);
|
||||||
|
p0count += (plus1fert[0])*(LogProb)(m-2*(msc.fert(0)+1));
|
||||||
|
}
|
||||||
|
msc.check();
|
||||||
|
return nAl;
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
template<class TRANSPAIR> double collectCountsOverNeighborhoodForSophisticatedModels(
|
||||||
|
const MoveSwapMatrix<TRANSPAIR>&, LogProb, void*) {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
|
||||||
|
const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
|
||||||
|
const TRANSPAIR&ef, LogProb normalized_ascore, d4model*d4Table) {
|
||||||
|
Mmsc.check();
|
||||||
|
const PositionIndex m=msc.get_m(), l=msc.get_l();
|
||||||
|
for (PositionIndex j=1; j<=m; ++j)
|
||||||
|
if (msc(j)!=0)
|
||||||
|
if (msc.get_head(msc(j))==j) {
|
||||||
|
int ep=msc.prev_cept(msc(j));
|
||||||
|
d4Table->augCountRef_first(j, msc.get_center(ep),
|
||||||
|
d4Table->ewordclasses->getClass(ef.get_es(ep)),
|
||||||
|
d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
|
||||||
|
} else {
|
||||||
|
//massert( &d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) == ef.getCountSecond(j,msc.prev_in_cept(j) ));
|
||||||
|
d4Table->augCountRef_bigger(j, msc.prev_in_cept(j), 0,
|
||||||
|
d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
|
||||||
|
const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
|
||||||
|
const TRANSPAIR&ef, LogProb normalized_ascore, d5model*d5Table) {
|
||||||
|
Mmsc.check();
|
||||||
|
_collectCountsOverNeighborhoodForSophisticatedModels(Mmsc, msc, ef,
|
||||||
|
normalized_ascore, &d5Table->d4m);
|
||||||
|
Mmsc.check();
|
||||||
|
const PositionIndex m=msc.get_m(), l=msc.get_l();
|
||||||
|
PositionIndex prev_cept=0;
|
||||||
|
PositionIndex vac_all=m;
|
||||||
|
Vector<char> vac(m+1,0);
|
||||||
|
for (PositionIndex i=1; i<=l; i++) {
|
||||||
|
PositionIndex cur_j=msc.als_i[i];
|
||||||
|
PositionIndex prev_j=0;
|
||||||
|
PositionIndex k=0;
|
||||||
|
if (cur_j) { // process first word of cept
|
||||||
|
k++;
|
||||||
|
d5Table->getCountRef_first(vacancies(vac, cur_j), vacancies(vac,
|
||||||
|
msc.get_center(prev_cept)),
|
||||||
|
d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
|
||||||
|
vac_all-msc.fert(i)+k) +=normalized_ascore;
|
||||||
|
vac_all--;
|
||||||
|
assert(vac[cur_j]==0);
|
||||||
|
vac[cur_j]=1;
|
||||||
|
Mmsc.check();
|
||||||
|
prev_j=cur_j;
|
||||||
|
cur_j=msc.als_j[cur_j].next;
|
||||||
|
}
|
||||||
|
while (cur_j) { // process following words of cept
|
||||||
|
k++;
|
||||||
|
int vprev=vacancies(vac, prev_j);
|
||||||
|
d5Table->getCountRef_bigger(vacancies(vac, cur_j), vprev,
|
||||||
|
d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
|
||||||
|
vac_all-vprev/*war weg*/-msc.fert(i)+k)+=normalized_ascore;
|
||||||
|
vac_all--;
|
||||||
|
vac[cur_j]=1;
|
||||||
|
Mmsc.check();
|
||||||
|
prev_j=cur_j;
|
||||||
|
cur_j=msc.als_j[cur_j].next;
|
||||||
|
}
|
||||||
|
assert(k==msc.fert(i));
|
||||||
|
if (k)
|
||||||
|
prev_cept=i;
|
||||||
|
}
|
||||||
|
assert(vac_all==msc.fert(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int NumberOfAlignmentsInSophisticatedCountCollection;
|
||||||
|
template<class TRANSPAIR, class MODEL> double collectCountsOverNeighborhoodForSophisticatedModels(
|
||||||
|
const MoveSwapMatrix<TRANSPAIR>&msc, LogProb normalized_ascore,
|
||||||
|
MODEL*d5Table) {
|
||||||
|
const PositionIndex m=msc.get_m(), l=msc.get_l();
|
||||||
|
alignment x(msc);
|
||||||
|
double sum=0;
|
||||||
|
msc.check();
|
||||||
|
if ( !msc.isCenterDeleted() ) {
|
||||||
|
_collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc, x,
|
||||||
|
msc.get_ef(), normalized_ascore, d5Table);
|
||||||
|
NumberOfAlignmentsInSophisticatedCountCollection++;
|
||||||
|
sum+=normalized_ascore;
|
||||||
|
}
|
||||||
|
msc.check();
|
||||||
|
for (WordIndex j=1; j<=m; j++)
|
||||||
|
for (WordIndex i=0; i<=l; i++) {
|
||||||
|
WordIndex old=x(j);
|
||||||
|
if (i!=old&& !msc.isDelMove(i, j) ) {
|
||||||
|
msc.check();
|
||||||
|
double cm =msc.cmove(i, j);
|
||||||
|
if(cm < 0){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
double c=cm*normalized_ascore;
|
||||||
|
if (c > COUNTINCREASE_CUTOFF_AL) {
|
||||||
|
x.set(j, i);
|
||||||
|
_collectCountsOverNeighborhoodForSophisticatedModels<
|
||||||
|
TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
|
||||||
|
NumberOfAlignmentsInSophisticatedCountCollection++;
|
||||||
|
x.set(j, old);
|
||||||
|
sum+=c;
|
||||||
|
}
|
||||||
|
msc.check();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (PositionIndex j1=1; j1<=m; j1++) {
|
||||||
|
for (PositionIndex j2=j1+1; j2<=m; j2++) {
|
||||||
|
if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
|
||||||
|
double cs = msc.cswap(j1, j2);
|
||||||
|
if(cs < 0)
|
||||||
|
continue;
|
||||||
|
double c=cs*normalized_ascore;
|
||||||
|
msc.check();
|
||||||
|
if (c > COUNTINCREASE_CUTOFF_AL) {
|
||||||
|
int old1=msc(j1), old2=msc(j2);
|
||||||
|
x.set(j1, old2);
|
||||||
|
x.set(j2, old1);
|
||||||
|
_collectCountsOverNeighborhoodForSophisticatedModels<
|
||||||
|
TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
|
||||||
|
NumberOfAlignmentsInSophisticatedCountCollection++;
|
||||||
|
x.set(j1, old1);
|
||||||
|
x.set(j2, old2);
|
||||||
|
sum+=c;
|
||||||
|
}
|
||||||
|
msc.check();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
msc.check();
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class TRANSPAIR, class MODEL> int collectCountsOverNeighborhood(
|
||||||
|
const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
|
||||||
|
Vector<WordIndex>&es, Vector<WordIndex>&fs, tmodel<COUNT,PROB>&tTable,
|
||||||
|
amodel<COUNT>&aCountTable, amodel<COUNT>&dCountTable,
|
||||||
|
nmodel<COUNT>&nCountTable, SyncDouble&p1count, SyncDouble&p0count,
|
||||||
|
LogProb&_total, float count, bool addCounts, MODEL*d4Table) {
|
||||||
|
int nAl=0;
|
||||||
|
const PositionIndex l=es.size()-1, m=fs.size()-1;
|
||||||
|
Array2<LogProb,Vector<LogProb> > dtcount(l+1, m+1), ncount(l+1,
|
||||||
|
MAX_FERTILITY+1);
|
||||||
|
LogProb p0=0, p1=0, all_total=0;
|
||||||
|
for (unsigned int i=0; i<smsc.size(); ++i) {
|
||||||
|
LogProb this_total=0;
|
||||||
|
nAl+=collectCountsOverNeighborhood(*smsc[i].first, smsc[i].second,
|
||||||
|
dtcount, ncount, p1, p0, this_total);
|
||||||
|
all_total+=this_total;
|
||||||
|
}
|
||||||
|
_total=all_total;
|
||||||
|
if(count==0){
|
||||||
|
cerr << "WARNING: COUNT ==0" << endl;
|
||||||
|
}else
|
||||||
|
all_total/=(double)count;
|
||||||
|
if(isinf(all_total)){
|
||||||
|
cerr << "ALL_TOTAL is INF\n" ;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
double sum2=0;
|
||||||
|
if (addCounts && d4Table) {
|
||||||
|
for (unsigned int i=0; i<smsc.size(); ++i) {
|
||||||
|
//for(WordIndex j=1;j<=m;j++)for(WordIndex ii=0;ii<=l;ii++)
|
||||||
|
// (*smsc[i].first).cmove(ii,j);
|
||||||
|
sum2+=collectCountsOverNeighborhoodForSophisticatedModels(
|
||||||
|
*smsc[i].first, smsc[i].second/all_total, d4Table);
|
||||||
|
}
|
||||||
|
if (!(fabs(count-sum2)<0.05))
|
||||||
|
cerr << "WARNING: DIFFERENT SUMS: (" << count << ") (" << sum2 << ") (" << all_total
|
||||||
|
<< ")\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
NOTE! HERE IS THE UPDATE PROCESS!
|
||||||
|
*/
|
||||||
|
if(fabs(all_total)==0){
|
||||||
|
// Error
|
||||||
|
cerr << "Hill climbing yields zero count " << endl;
|
||||||
|
}else{
|
||||||
|
if (addCounts) {
|
||||||
|
for (PositionIndex i=0; i<=l; i++) {
|
||||||
|
for (PositionIndex j=1; j<=m; j++) {
|
||||||
|
LogProb ijadd=dtcount(i, j)/all_total;
|
||||||
|
if (ijadd>COUNTINCREASE_CUTOFF_AL) {
|
||||||
|
tTable.incCount(es[i], fs[j], ijadd);
|
||||||
|
dCountTable.addValue(j, i, l, m, ijadd);
|
||||||
|
aCountTable.addValue(i, j, l, m, ijadd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i>0)
|
||||||
|
for (PositionIndex n=0; n<MAX_FERTILITY; n++)
|
||||||
|
nCountTable.addValue(es[i], n, ncount(i, n)/all_total);
|
||||||
|
}
|
||||||
|
p0count+=p0/all_total;
|
||||||
|
p1count+=p1/all_total;
|
||||||
|
}}
|
||||||
|
return nAl;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||||
|
|
||||||
|
This file is part of GIZA++ ( extension of GIZA ).
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef collCounts_h_defined
|
||||||
|
#define collCounts_h_defined
|
||||||
|
#include "alignment.h"
|
||||||
|
#include "transpair_model3.h"
|
||||||
|
#include <map>
|
||||||
|
#include "MoveSwapMatrix.h"
|
||||||
|
#include "D4Tables.h"
|
||||||
|
#include "transpair_model4.h"
|
||||||
|
|
||||||
|
class OneMoveSwap
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
short type;
|
||||||
|
short a,b;
|
||||||
|
OneMoveSwap(short _type,short _a,short _b)
|
||||||
|
: type(_type),a(_a),b(_b)
|
||||||
|
{}
|
||||||
|
OneMoveSwap()
|
||||||
|
: type(0){}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline bool operator<(const OneMoveSwap&a,const OneMoveSwap&b)
|
||||||
|
{
|
||||||
|
if(a.type<b.type)return 1;
|
||||||
|
else if(b.type<a.type)return 0;
|
||||||
|
else if(a.a<b.a)return 1;
|
||||||
|
else if(b.a<a.a)return 0;
|
||||||
|
else return a.b<b.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator==(const OneMoveSwap&a,const OneMoveSwap&b)
|
||||||
|
{
|
||||||
|
return a.type==b.type&&a.a==b.a&&a.b==b.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ostream&operator<<(ostream&out,const OneMoveSwap&o)
|
||||||
|
{
|
||||||
|
return out << '(' << o.type << "," << o.a << "," << o.b << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ostream &operator<<(ostream &out,const set<OneMoveSwap>&s)
|
||||||
|
{
|
||||||
|
for(set<OneMoveSwap>::const_iterator i=s.begin();i!=s.end();++i)
|
||||||
|
cout << *i << ' ';
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool makeOneMoveSwap(const alignment&a,const alignment&b,set<OneMoveSwap>&oms);
|
||||||
|
|
||||||
|
template<class TRANSPAIR,class MODEL>
|
||||||
|
int collectCountsOverNeighborhood(const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
|
||||||
|
Vector<WordIndex>&es,
|
||||||
|
Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,
|
||||||
|
amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,
|
||||||
|
nmodel<COUNT>&nCountTable,double&p1count,double&p0count,
|
||||||
|
LogProb&_total,float count,bool addCounts,MODEL*d4Table=0);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,3 @@
|
|||||||
|
/*!
|
||||||
|
This is global definition for all main files of the program set
|
||||||
|
*/
|
@ -0,0 +1,126 @@
|
|||||||
|
// D4 Normalization executable
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <strstream>
|
||||||
|
#include <string>
|
||||||
|
#include "hmm.h"
|
||||||
|
#include "D4Tables.h"
|
||||||
|
#include "Parameter.h"
|
||||||
|
#define ITER_M2 0
|
||||||
|
#define ITER_MH 5
|
||||||
|
GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
|
||||||
|
GLOBAL_PARAMETER3(int,Model2_Iterations,"Model2_Iterations","NO. ITERATIONS MODEL 2","m2","number of iterations for Model 2",PARLEV_ITER,ITER_M2);
|
||||||
|
GLOBAL_PARAMETER3(int,HMM_Iterations,"HMM_Iterations","mh","number of iterations for HMM alignment model","mh", PARLEV_ITER,ITER_MH);
|
||||||
|
GLOBAL_PARAMETER3(int,Model3_Iterations,"Model3_Iterations","NO. ITERATIONS MODEL 3","m3","number of iterations for Model 3",PARLEV_ITER,5);
|
||||||
|
GLOBAL_PARAMETER3(int,Model4_Iterations,"Model4_Iterations","NO. ITERATIONS MODEL 4","m4","number of iterations for Model 4",PARLEV_ITER,5);
|
||||||
|
GLOBAL_PARAMETER3(int,Model5_Iterations,"Model5_Iterations","NO. ITERATIONS MODEL 5","m5","number of iterations for Model 5",PARLEV_ITER,0);
|
||||||
|
GLOBAL_PARAMETER3(int,Model6_Iterations,"Model6_Iterations","NO. ITERATIONS MODEL 6","m6","number of iterations for Model 6",PARLEV_ITER,0);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(float, PROB_SMOOTH,"probSmooth","probability smoothing (floor) value ",PARLEV_OPTHEUR,1e-7);
|
||||||
|
GLOBAL_PARAMETER(float, MINCOUNTINCREASE,"minCountIncrease","minimal count increase",PARLEV_OPTHEUR,1e-7);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER2(int,Transfer_Dump_Freq,"TRANSFER DUMP FREQUENCY","t2to3","output: dump of transfer from Model 2 to 3",PARLEV_OUTPUT,0);
|
||||||
|
GLOBAL_PARAMETER2(bool,Verbose,"verbose","v","0: not verbose; 1: verbose",PARLEV_OUTPUT,0);
|
||||||
|
GLOBAL_PARAMETER(bool,Log,"log","0: no logfile; 1: logfile",PARLEV_OUTPUT,0);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(double,P0,"p0","fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)",PARLEV_EM,-1.0);
|
||||||
|
GLOBAL_PARAMETER(double,M5P0,"m5p0","fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)",PARLEV_EM,-1.0);
|
||||||
|
GLOBAL_PARAMETER3(bool,Peg,"pegging","p","DO PEGGING? (Y/N)","0: no pegging; 1: do pegging",PARLEV_EM,0);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(short,OldADBACKOFF,"adbackoff","",-1,0);
|
||||||
|
GLOBAL_PARAMETER2(unsigned int,MAX_SENTENCE_LENGTH,"ml","MAX SENTENCE LENGTH","maximum sentence length",0,MAX_SENTENCE_LENGTH_ALLOWED);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(short, DeficientDistortionForEmptyWord,"DeficientDistortionForEmptyWord","0: IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word",PARLEV_MODELS,0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
Here are parameters to support Load models and dump models
|
||||||
|
*/
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(int,restart,"restart","Restart training from a level,0: Normal restart, from model 1, 1: Model 1, 2: Model 2 Init (Using Model 1 model input and train model 2), 3: Model 2, (using model 2 input and train model 2), 4 : HMM Init (Using Model 1 model and train HMM), 5: HMM (Using Model 2 model and train HMM) 6 : HMM (Using HMM Model and train HMM), 7: Model 3 Init (Use HMM model and train model 3) 8: Model 3 Init (Use Model 2 model and train model 3) 9: Model 3, 10: Model 4 Init (Use Model 3 model and train Model 4) 11: Model 4 and on, ",PARLEV_INPUT,0);
|
||||||
|
GLOBAL_PARAMETER(bool,dumpCount,"dumpcount","Whether we are going to dump count (in addition to) final output?",PARLEV_OUTPUT,false);
|
||||||
|
GLOBAL_PARAMETER(bool,dumpCountUsingWordString,"dumpcountusingwordstring","In count table, should actual word appears or just the id? default is id",PARLEV_OUTPUT,false);
|
||||||
|
/// END
|
||||||
|
short OutputInAachenFormat=0;
|
||||||
|
bool Transfer=TRANSFER;
|
||||||
|
bool Transfer2to3=0;
|
||||||
|
short NoEmptyWord=0;
|
||||||
|
bool FEWDUMPS=0;
|
||||||
|
GLOBAL_PARAMETER(bool,ONLYALDUMPS,"ONLYALDUMPS","1: do not write any files",PARLEV_OUTPUT,0);
|
||||||
|
GLOBAL_PARAMETER(short,NCPUS,"NCPUS","Number of CPUS",PARLEV_EM,2);
|
||||||
|
GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detailled alignment format, 1: compact alignment format ",PARLEV_OUTPUT,0);
|
||||||
|
GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
|
||||||
|
"maximal fertility for fertility models", PARLEV_EM, 10);
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
|
||||||
|
TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
|
||||||
|
a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]){
|
||||||
|
if(argc < 5){
|
||||||
|
cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
WordClasses ewc,fwc;
|
||||||
|
d4model d4m(MAX_SENTENCE_LENGTH,ewc,fwc);
|
||||||
|
Vector<WordEntry> evlist,fvlist;
|
||||||
|
vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
|
||||||
|
TargetVocabFilename = argv[2];
|
||||||
|
SourceVocabFilename = argv[1];
|
||||||
|
eTrainVcbList.setName(argv[1]);
|
||||||
|
fTrainVcbList.setName(argv[2]);
|
||||||
|
eTrainVcbList.readVocabList();
|
||||||
|
fTrainVcbList.readVocabList();
|
||||||
|
string evcbcls = argv[1];
|
||||||
|
string fvcbcls = argv[2];
|
||||||
|
evcbcls += ".classes";
|
||||||
|
fvcbcls += ".classes";
|
||||||
|
d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, evcbcls.c_str(), fvcbcls.c_str(),eTrainVcbList,fTrainVcbList);
|
||||||
|
// Start iteration:
|
||||||
|
for(int i =4; i< argc ; i++){
|
||||||
|
string name = argv[i];
|
||||||
|
string nameA = name ;
|
||||||
|
string nameB = name + ".b";
|
||||||
|
if(d4m.augCount(nameA.c_str(),nameB.c_str())){
|
||||||
|
cerr << "Loading (d4) table " << nameA << "/" << nameB << " OK" << endl;
|
||||||
|
|
||||||
|
}else{
|
||||||
|
cerr << "ERROR Loading (d) table " << nameA << " " << nameB << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
d4m.normalizeTable();
|
||||||
|
string DiffOPath = argv[3];
|
||||||
|
string diff1 = DiffOPath;
|
||||||
|
string diff2 = DiffOPath+".b";
|
||||||
|
cerr << "Outputing d4 table to " << diff1 << " " << diff2;
|
||||||
|
d4m.printProbTable(diff1.c_str(),diff2.c_str());
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some utility functions to get it compile..
|
||||||
|
|
||||||
|
ofstream logmsg;
|
||||||
|
const string str2Num(int n) {
|
||||||
|
string number = "";
|
||||||
|
do {
|
||||||
|
number.insert((size_t)0, 1, (char)(n % 10 + '0'));
|
||||||
|
} while ((n /= 10) > 0);
|
||||||
|
return (number);
|
||||||
|
}
|
||||||
|
double LAMBDA=1.09;
|
||||||
|
|
||||||
|
Vector<map< pair<int,int>,char > > ReferenceAlignment;
|
||||||
|
|
||||||
|
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
|
||||||
|
const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
|
||||||
|
int&eventsMissing, int&eventsToomuch, int pair_no){
|
||||||
|
}
|
||||||
|
|
||||||
|
void printGIZAPars(ostream&out){
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,78 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef _defs_h
|
||||||
|
#define _defs_h 1
|
||||||
|
#include <string>
|
||||||
|
#include <cmath>
|
||||||
|
#include <climits>
|
||||||
|
|
||||||
|
const int TRANSFER_SIMPLE=1;
|
||||||
|
const int TRANSFER=0;
|
||||||
|
|
||||||
|
const unsigned int MAX_SENTENCE_LENGTH_ALLOWED=101;
|
||||||
|
const int TRAIN_BUFFER_SIZE= 50000;
|
||||||
|
//#ifdef WORDINDEX_WITH_4_BYTE
|
||||||
|
typedef unsigned int WordIndex;
|
||||||
|
const unsigned int MAX_VOCAB_SIZE=UINT_MAX;
|
||||||
|
typedef unsigned int PositionIndex;
|
||||||
|
//#else
|
||||||
|
//typedef unsigned short WordIndex;
|
||||||
|
//const unsigned int MAX_VOCAB_SIZE=USHRT_MAX;
|
||||||
|
//typedef unsigned short PositionIndex;
|
||||||
|
//#endif
|
||||||
|
extern WordIndex MAX_FERTILITY;
|
||||||
|
|
||||||
|
const int MAX_W=457979;
|
||||||
|
extern double LAMBDA; // Lambda that is used to scale cross_entropy factor
|
||||||
|
|
||||||
|
typedef float PROB ;
|
||||||
|
typedef float COUNT ;
|
||||||
|
|
||||||
|
class LogProb {
|
||||||
|
private:
|
||||||
|
double x ;
|
||||||
|
public:
|
||||||
|
LogProb():x(0){}
|
||||||
|
LogProb(double y):x(y){}
|
||||||
|
LogProb(float y):x(y){}
|
||||||
|
LogProb(int y):x(y){}
|
||||||
|
LogProb(WordIndex y):x(y){}
|
||||||
|
operator double() const {return x;}
|
||||||
|
LogProb operator *= (double y) { x *= y ; return *this;}
|
||||||
|
LogProb operator *= (LogProb y) { x *= y.x ; return *this;}
|
||||||
|
LogProb operator /= (double y) { x /= y ; return *this;}
|
||||||
|
LogProb operator /= (LogProb y) { x /= y.x ; return *this;}
|
||||||
|
LogProb operator += (double y) { x += y ; return *this;}
|
||||||
|
LogProb operator += (LogProb y) { x += y.x ; return *this;}
|
||||||
|
};
|
||||||
|
|
||||||
|
const int PARLEV_ITER=1;
|
||||||
|
const int PARLEV_OPTHEUR=2;
|
||||||
|
const int PARLEV_OUTPUT=3;
|
||||||
|
const int PARLEV_SMOOTH=4;
|
||||||
|
const int PARLEV_EM=5;
|
||||||
|
const int PARLEV_MODELS=6;
|
||||||
|
const int PARLEV_SPECIAL=7;
|
||||||
|
const int PARLEV_INPUT=8;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,59 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef FILE_SPEC_H
|
||||||
|
#define FILE_SPEC_H
|
||||||
|
|
||||||
|
#include <ctime>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
/* This function returns a string, locally called file_spec. This
|
||||||
|
string is the concatenation of the date and time of execution
|
||||||
|
and the user who is performing the execution */
|
||||||
|
/* Originally implemented in C by Yaser Al-Onaizan;
|
||||||
|
editions for C++ and formatting by Noah A. Smith, 9 July 1999 */
|
||||||
|
|
||||||
|
char *Get_File_Spec (){
|
||||||
|
struct tm *local;
|
||||||
|
time_t t;
|
||||||
|
char *user;
|
||||||
|
char time_stmp[57];
|
||||||
|
char *file_spec = 0;
|
||||||
|
|
||||||
|
t = time(NULL);
|
||||||
|
local = localtime(&t);
|
||||||
|
|
||||||
|
sprintf(time_stmp, "%02d-%02d-%02d.%02d%02d%02d.", local->tm_year,
|
||||||
|
(local->tm_mon + 1), local->tm_mday, local->tm_hour,
|
||||||
|
local->tm_min, local->tm_sec);
|
||||||
|
user = getenv("USER");
|
||||||
|
|
||||||
|
file_spec = (char *)malloc(sizeof(char) *
|
||||||
|
(strlen(time_stmp) + strlen(user) + 1));
|
||||||
|
file_spec[0] = '\0';
|
||||||
|
strcat(file_spec, time_stmp) ;
|
||||||
|
strcat(file_spec, user);
|
||||||
|
return file_spec;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,470 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/* --------------------------------------------------------------------------*
|
||||||
|
* *
|
||||||
|
* Module : getSentece *
|
||||||
|
* *
|
||||||
|
* Method Definitions File: getSentence.cc *
|
||||||
|
* *
|
||||||
|
* Objective: Defines clases and methods for handling I/O for the parallel *
|
||||||
|
* corpus. *
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#include "getSentence.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <strstream>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <boost/algorithm/string.hpp>
|
||||||
|
#include <vector>
|
||||||
|
#include <set>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include "Parameter.h"
|
||||||
|
#include "errno.h"
|
||||||
|
|
||||||
|
int PrintedTooLong=0;
|
||||||
|
|
||||||
|
/* -------------- Method Defnitions for Class sentenceHandler ---------------*/
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(double,ManlexMAX_MULTIPLICITY,"manlexMAX_MULTIPLICITY","",PARLEV_EM,20.0);
|
||||||
|
GLOBAL_PARAMETER(double,Manlexfactor1,"manlexfactor1","",PARLEV_EM,0.0);
|
||||||
|
GLOBAL_PARAMETER(double,Manlexfactor2,"manlexfactor2","",PARLEV_EM,0.0);
|
||||||
|
|
||||||
|
sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
|
||||||
|
vcbList* flist) : realCount(0)
|
||||||
|
// This method is the constructor of the class, it also intitializes the
|
||||||
|
// sentence pair sequential number (count) to zero.
|
||||||
|
{
|
||||||
|
pthread_mutex_init(&readsent_mutex,NULL);
|
||||||
|
pthread_mutex_init(&setprob_mutex,NULL);
|
||||||
|
|
||||||
|
position = 0;
|
||||||
|
readflag = false ;
|
||||||
|
allInMemory = false ;
|
||||||
|
inputFilename = filename ;
|
||||||
|
inputFile = new ifstream(filename);
|
||||||
|
pair_no = 0 ;
|
||||||
|
if(!(*inputFile)){
|
||||||
|
cerr << "\nERROR:(a) Cannot open " << filename;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
currentSentence = 0;
|
||||||
|
totalPairs1 = 0 ;
|
||||||
|
totalPairs2 =0;
|
||||||
|
pair_no = 0 ;
|
||||||
|
noSentInBuffer = 0 ;
|
||||||
|
Buffer.clear();
|
||||||
|
bool isNegative=0;
|
||||||
|
std::set<WordIndex> evoc,fvoc;
|
||||||
|
evoc.insert(0);
|
||||||
|
fvoc.insert(0);
|
||||||
|
if (elist && flist){
|
||||||
|
cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
|
||||||
|
sentPair s ;
|
||||||
|
while (getNextSentence(s, elist, flist))
|
||||||
|
{
|
||||||
|
for(int i = 0 ; i< s.eSent.size() ; i++){
|
||||||
|
evoc.insert(s.eSent[i]);
|
||||||
|
}
|
||||||
|
for(int i = 0 ; i< s.fSent.size() ; i++){
|
||||||
|
fvoc.insert(s.fSent[i]);
|
||||||
|
}
|
||||||
|
totalPairs1++;
|
||||||
|
totalPairs2+=s.realCount;
|
||||||
|
// NOTE: this value might change during training
|
||||||
|
// for words from the manual dictionary, yet this is ignored!
|
||||||
|
|
||||||
|
if( s.noOcc<0 )
|
||||||
|
isNegative=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( isNegative==1 )
|
||||||
|
{
|
||||||
|
cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
|
||||||
|
realCount=new Vector<double>(totalPairs1,1.0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
realCount=0;
|
||||||
|
elist->compact(evoc);
|
||||||
|
flist->compact(fvoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
|
||||||
|
vcbList* flist,std::set<WordIndex>& eapp, std::set<WordIndex>& fapp) : realCount(0)
|
||||||
|
// This method is the constructor of the class, it also intitializes the
|
||||||
|
// sentence pair sequential number (count) to z
|
||||||
|
{
|
||||||
|
pthread_mutex_init(&readsent_mutex,NULL);
|
||||||
|
pthread_mutex_init(&setprob_mutex,NULL);
|
||||||
|
position = 0;
|
||||||
|
readflag = false ;
|
||||||
|
allInMemory = false ;
|
||||||
|
inputFilename = filename ;
|
||||||
|
inputFile = new ifstream(filename);
|
||||||
|
pair_no = 0 ;
|
||||||
|
if(!(*inputFile)){
|
||||||
|
cerr << "\nERROR:(a) Cannot open " << filename;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
currentSentence = 0;
|
||||||
|
totalPairs1 = 0 ;
|
||||||
|
totalPairs2 =0;
|
||||||
|
pair_no = 0 ;
|
||||||
|
noSentInBuffer = 0 ;
|
||||||
|
Buffer.clear();
|
||||||
|
bool isNegative=0;
|
||||||
|
if (elist && flist){
|
||||||
|
cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
|
||||||
|
sentPair s ;
|
||||||
|
while (getNextSentence(s, elist, flist))
|
||||||
|
{
|
||||||
|
int k;
|
||||||
|
for(k=0;k<s.eSent.size();k++){
|
||||||
|
eapp.insert(s.eSent[k]);
|
||||||
|
}
|
||||||
|
for(k=0;k<s.fSent.size();k++){
|
||||||
|
fapp.insert(s.fSent[k]);
|
||||||
|
}
|
||||||
|
totalPairs1++;
|
||||||
|
totalPairs2+=s.realCount;
|
||||||
|
// NOTE: this value might change during training
|
||||||
|
// for words from the manual dictionary, yet this is ignored!
|
||||||
|
|
||||||
|
if( s.noOcc<0 )
|
||||||
|
isNegative=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( isNegative==1 )
|
||||||
|
{
|
||||||
|
cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
|
||||||
|
realCount=new Vector<double>(totalPairs1,1.0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
realCount=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void sentenceHandler::rewind()
|
||||||
|
{
|
||||||
|
position = 0;
|
||||||
|
currentSentence = 0;
|
||||||
|
readflag = false ;
|
||||||
|
if (!allInMemory ||
|
||||||
|
!(Buffer.size() >= 1 && Buffer[currentSentence].sentenceNo == 1)){
|
||||||
|
// check if the buffer doe not already has the first chunk of pairs
|
||||||
|
if (Buffer.size() > 0)
|
||||||
|
cerr << ' ' << Buffer[currentSentence].sentenceNo << '\n';
|
||||||
|
// totalPairs = 0 ;
|
||||||
|
pair_no = 0 ;
|
||||||
|
noSentInBuffer = 0 ;
|
||||||
|
Buffer.clear();
|
||||||
|
}
|
||||||
|
if (!allInMemory){
|
||||||
|
delete inputFile;
|
||||||
|
inputFile = new ifstream(inputFilename);
|
||||||
|
if(!(*inputFile)){
|
||||||
|
cerr << "\nERROR:(b) Cannot open " << inputFilename << " " << (int)errno;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int sentenceHandler::getNextSentence(sentPair& sent, vcbList* elist, vcbList* flist)
|
||||||
|
{
|
||||||
|
pthread_mutex_lock(&readsent_mutex);
|
||||||
|
|
||||||
|
do{
|
||||||
|
sentPair s ;
|
||||||
|
if (readflag){
|
||||||
|
cerr << "Attempting to read from the end of corpus, rewinding\n";
|
||||||
|
//rewind();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (currentSentence >= noSentInBuffer){
|
||||||
|
if (allInMemory)
|
||||||
|
break;
|
||||||
|
/* no more sentences in buffer */
|
||||||
|
noSentInBuffer = 0 ;
|
||||||
|
currentSentence = 0 ;
|
||||||
|
Buffer.clear();
|
||||||
|
cout << "Reading more sentence pairs into memory ... \n";
|
||||||
|
while((noSentInBuffer < TRAIN_BUFFER_SIZE) && readNextSentence(s)){
|
||||||
|
if ((s.fSent.size()-1) > (MAX_FERTILITY-1) * (s.eSent.size()-1)){
|
||||||
|
cerr << "WARNING: The following sentence pair has source/target sentence length ratio more than\n"<<
|
||||||
|
"the maximum allowed limit for a source word fertility\n"<<
|
||||||
|
" source length = " << s.eSent.size()-1 << " target length = " << s.fSent.size()-1 <<
|
||||||
|
" ratio " << double(s.fSent.size()-1)/ (s.eSent.size()-1) << " ferility limit : " <<
|
||||||
|
MAX_FERTILITY-1 << '\n';
|
||||||
|
cerr << "Shortening sentence \n";
|
||||||
|
cerr << s;
|
||||||
|
s.eSent.resize(min(s.eSent.size(),s.fSent.size()));
|
||||||
|
s.fSent.resize(min(s.eSent.size(),s.fSent.size()));
|
||||||
|
}
|
||||||
|
Buffer.push_back(s) ;
|
||||||
|
//cerr << s.eAnchor.size() << " " << Buffer[Buffer.size()-1].eAnchor.size()<< endl;
|
||||||
|
if (elist && flist){
|
||||||
|
if ((*elist).size() > 0)
|
||||||
|
for (WordIndex i= 0 ; i < s.eSent.size() ; i++){
|
||||||
|
if (s.eSent[i] >= (*elist).uniqTokens()){
|
||||||
|
if( PrintedTooLong++<100)
|
||||||
|
cerr << "ERROR: source word " << s.eSent[i] << " is not in the vocabulary list \n";
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
(*elist).incFreq(s.eSent[i], s.realCount);
|
||||||
|
}
|
||||||
|
if ((*flist).size() > 0)
|
||||||
|
for (WordIndex j= 1 ; j < s.fSent.size() ; j++){
|
||||||
|
if (s.fSent[j] >= (*flist).uniqTokens()){
|
||||||
|
cerr << "ERROR: target word " << s.fSent[j] << " is not in the vocabulary list \n";
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
(*flist).incFreq(s.fSent[j], s.realCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
noSentInBuffer++;
|
||||||
|
}
|
||||||
|
if (inputFile->eof()){
|
||||||
|
allInMemory = (Buffer.size() >= 1 &&
|
||||||
|
Buffer[currentSentence].sentenceNo == 1) ;
|
||||||
|
if (allInMemory)
|
||||||
|
cout << "Corpus fits in memory, corpus has: " << Buffer.size() <<
|
||||||
|
" sentence pairs.\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(noSentInBuffer <= 0 ){
|
||||||
|
//cerr << "# sent in buffer " << noSentInBuffer << '\n';
|
||||||
|
readflag = true ;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sent = Buffer[currentSentence++] ;
|
||||||
|
// cerr << currentSentence-1 << " " << sent.eAnchor.size() << " " << Buffer[currentSentence-1].eAnchor.size()<< endl;
|
||||||
|
position ++;
|
||||||
|
if( sent.noOcc<0 && realCount ){
|
||||||
|
if( Manlexfactor1 && sent.noOcc==-1.0 )
|
||||||
|
sent.realCount=Manlexfactor1;
|
||||||
|
else if( Manlexfactor2 && sent.noOcc==-2.0 )
|
||||||
|
sent.realCount=Manlexfactor2;
|
||||||
|
else
|
||||||
|
sent.realCount=(*realCount)[sent.getSentenceNo()-1];
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&readsent_mutex);
|
||||||
|
return position ;
|
||||||
|
}while(false);
|
||||||
|
pthread_mutex_unlock(&readsent_mutex);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
bool sentenceHandler::readNextSentence(sentPair& sent)
|
||||||
|
/* This method reads in a new pair of sentences, each pair is read from the
|
||||||
|
corpus file as line triples. The first line the no of times this line
|
||||||
|
pair occured in the corpus, the second line is the source sentence and
|
||||||
|
the third is the target sentence. The sentences are represented by a space
|
||||||
|
separated positive integer token ids. */
|
||||||
|
{
|
||||||
|
|
||||||
|
string line;
|
||||||
|
bool fail(false) ;
|
||||||
|
|
||||||
|
sent.clear();
|
||||||
|
vector<string> splits;
|
||||||
|
if (getline(*inputFile, line)){
|
||||||
|
|
||||||
|
boost::algorithm::split(splits,line,boost::algorithm::is_any_of("|#*"));
|
||||||
|
|
||||||
|
if(splits.size() == 1 || splits.size() == 0){
|
||||||
|
// continue, no problem
|
||||||
|
|
||||||
|
}else if(splits.size()>=3){
|
||||||
|
line = splits[0];
|
||||||
|
}else{
|
||||||
|
fail = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
istrstream buffer(line.c_str());
|
||||||
|
buffer >> sent.noOcc;
|
||||||
|
if( sent.noOcc<0 )
|
||||||
|
{
|
||||||
|
if( realCount )
|
||||||
|
{
|
||||||
|
if( Manlexfactor1 && sent.noOcc==-1.0 )
|
||||||
|
sent.realCount=Manlexfactor1;
|
||||||
|
else if( Manlexfactor2 && sent.noOcc==-2.0 )
|
||||||
|
sent.realCount=Manlexfactor2;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sent.realCount=(*realCount)[pair_no];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
sent.realCount=1.0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
sent.realCount=sent.noOcc;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fail = true ;;
|
||||||
|
}
|
||||||
|
if (splits.size()>=3 || getline(*inputFile, line)){
|
||||||
|
if(splits.size()>=3){
|
||||||
|
line = splits[1];
|
||||||
|
}
|
||||||
|
istrstream buffer(line.c_str());
|
||||||
|
WordIndex w; // w is a local variabe for token id
|
||||||
|
sent.eSent.push_back(0); // each source word is assumed to have 0 ==
|
||||||
|
// a null word (id 0) at the begining of the sentence.
|
||||||
|
while(buffer>>w){ // read source sentece , word by word .
|
||||||
|
if (sent.eSent.size() < MAX_SENTENCE_LENGTH)
|
||||||
|
sent.eSent.push_back(w);
|
||||||
|
else {
|
||||||
|
if( PrintedTooLong++<100)
|
||||||
|
cerr << "{WARNING:(a)truncated sentence "<<pair_no<<"}";
|
||||||
|
//cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
|
||||||
|
//cerr << "The following sentence will be truncated\n" << line;
|
||||||
|
break ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fail = true ;
|
||||||
|
}
|
||||||
|
if (splits.size()>=3 ||getline(*inputFile, line)){
|
||||||
|
if(splits.size()>=3){
|
||||||
|
line = splits[2];
|
||||||
|
}
|
||||||
|
istrstream buffer(line.c_str());
|
||||||
|
WordIndex w; // w is a local variabe for token id
|
||||||
|
sent.fSent.push_back(0); //0 is inserted for program uniformity
|
||||||
|
while(buffer>>w){ // read target sentece , word by word .
|
||||||
|
if (sent.fSent.size() < MAX_SENTENCE_LENGTH)
|
||||||
|
sent.fSent.push_back(w);
|
||||||
|
else {
|
||||||
|
if( PrintedTooLong++<100)
|
||||||
|
cerr << "{WARNING:(b)truncated sentence "<<pair_no<<"}";
|
||||||
|
//cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
|
||||||
|
//cerr << "The following sentence will be truncated\n" << line;
|
||||||
|
break ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fail = true ;
|
||||||
|
}
|
||||||
|
sent.eAnchor.clear();
|
||||||
|
sent.fAnchor.clear();
|
||||||
|
// cerr << "Splits: " << splits.size() << endl;
|
||||||
|
if(splits.size()>3){
|
||||||
|
vector<string> al,eal;
|
||||||
|
al.clear();
|
||||||
|
boost::algorithm::split(al,splits[3],boost::algorithm::is_any_of(" "));
|
||||||
|
for(int w = 0 ; w < al.size(); w++){
|
||||||
|
eal.clear();
|
||||||
|
boost::algorithm::split(eal,al[w],boost::algorithm::is_any_of("-"));
|
||||||
|
if(eal.size()==2){
|
||||||
|
int ea = atoi(eal[0].c_str());
|
||||||
|
int fa = atoi(eal[1].c_str());
|
||||||
|
if(ea >= sent.eSent.size() || fa >= sent.fSent.size())
|
||||||
|
continue;
|
||||||
|
sent.eAnchor.push_back(ea);
|
||||||
|
sent.fAnchor.push_back(fa);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// cerr << "Read partial alignment: " << sent.eAnchor.size() << " " <<
|
||||||
|
// sent.fAnchor.size() << "\n";
|
||||||
|
}
|
||||||
|
if (fail){
|
||||||
|
sent.eSent.clear();
|
||||||
|
sent.fSent.clear();
|
||||||
|
sent.eAnchor.clear();
|
||||||
|
sent.fAnchor.clear();
|
||||||
|
sent.sentenceNo = 0 ;
|
||||||
|
sent.noOcc = 0 ;
|
||||||
|
sent.realCount=0;
|
||||||
|
return(false);
|
||||||
|
}
|
||||||
|
if( sent.eSent.size()==1||sent.fSent.size()==1 )
|
||||||
|
cerr << "ERROR: Forbidden zero sentence length " << sent.sentenceNo << endl;
|
||||||
|
sent.sentenceNo = ++pair_no;
|
||||||
|
if(pair_no % 100000 == 0)
|
||||||
|
cout << "[sent:" << sent.sentenceNo << "]"<< '\n';
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
double optimize_lambda(Vector<double>&vd)
|
||||||
|
{
|
||||||
|
Vector<double> l;
|
||||||
|
for(double lambda=1.0;lambda<ManlexMAX_MULTIPLICITY;lambda+=0.33)
|
||||||
|
{
|
||||||
|
double prod=0.0;
|
||||||
|
for(unsigned int i=0;i<vd.size();++i)
|
||||||
|
{
|
||||||
|
prod += vd[i]*exp(lambda*vd[i])/(exp(lambda*vd[i])-1.0);
|
||||||
|
}
|
||||||
|
l.push_back(fabs(prod-1.0));
|
||||||
|
}
|
||||||
|
double lam=double(min_element(l.begin(),l.end())-l.begin())*0.33+1.0;
|
||||||
|
if( lam<1.0 )
|
||||||
|
{
|
||||||
|
cerr << "ERROR: lambda is smaller than one: " << lam << endl;
|
||||||
|
for(unsigned int i=0;i<vd.size();++i)
|
||||||
|
cerr << vd[i] << ' ';
|
||||||
|
cerr << endl;
|
||||||
|
}
|
||||||
|
return lam;
|
||||||
|
}
|
||||||
|
|
||||||
|
void sentenceHandler::setProbOfSentence(const sentPair&s,double d)
|
||||||
|
{
|
||||||
|
|
||||||
|
if( realCount==0 )
|
||||||
|
return;
|
||||||
|
else{
|
||||||
|
pthread_mutex_lock(&setprob_mutex);
|
||||||
|
if( s.noOcc<=0 )
|
||||||
|
{
|
||||||
|
double ed=exp(d);
|
||||||
|
if( oldPairs.size()>0&&(oldPairs.back().get_eSent()!=s.get_eSent()||oldPairs.back().getSentenceNo()>=s.getSentenceNo()) )
|
||||||
|
{
|
||||||
|
double lambda=optimize_lambda(oldProbs);
|
||||||
|
for(unsigned int i=0;i<oldPairs.size();++i)
|
||||||
|
{
|
||||||
|
if( oldProbs[i]<1e-5 )
|
||||||
|
(*realCount)[oldPairs[i].getSentenceNo()-1]=1.0;
|
||||||
|
else
|
||||||
|
(*realCount)[oldPairs[i].getSentenceNo()-1]=lambda*oldProbs[i]/(1-exp(-lambda*oldProbs[i]));
|
||||||
|
}
|
||||||
|
oldPairs.clear();
|
||||||
|
oldProbs.clear();
|
||||||
|
}
|
||||||
|
oldPairs.push_back(s);
|
||||||
|
oldProbs.push_back(ed);
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&setprob_mutex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ------------- End of Method Definition of Class sentenceHandler ----------*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,136 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
/* --------------------------------------------------------------------------*
|
||||||
|
* *
|
||||||
|
* Module : getSentence *
|
||||||
|
* *
|
||||||
|
* Prototypes File: getSentence.h *
|
||||||
|
* *
|
||||||
|
* Objective: Defines clases and methods for handling I/O for the parallel *
|
||||||
|
* corpus. *
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _sentenceHandler_h
|
||||||
|
#define _sentenceHandler_h 1
|
||||||
|
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
#include <set>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include "Vector.h"
|
||||||
|
#include "defs.h"
|
||||||
|
#include "vocab.h"
|
||||||
|
#include "Globals.h"
|
||||||
|
/*----------------------- Class Prototype Definition ------------------------*
|
||||||
|
Class Name: sentenceHandleer
|
||||||
|
Objective: This class is defined to handle training sentece pairs from the
|
||||||
|
parallel corpus. Each pair has: a target sentece, called here French; a
|
||||||
|
source sentece, called here English sentece; and an integer number denoting
|
||||||
|
the number of times this pair occured in trining corpus. Both source and
|
||||||
|
target senteces are represented as integer vector (variable size arrays),
|
||||||
|
each entry is a numeric value which is the token id for the particular token
|
||||||
|
in the sentece.
|
||||||
|
|
||||||
|
*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
class sentPair{
|
||||||
|
public:
|
||||||
|
int sentenceNo ;
|
||||||
|
float noOcc;
|
||||||
|
float realCount;
|
||||||
|
Vector<WordIndex> eSent ;
|
||||||
|
Vector<WordIndex> fSent;
|
||||||
|
Vector<WordIndex> eAnchor;
|
||||||
|
Vector<WordIndex> fAnchor;
|
||||||
|
public:
|
||||||
|
sentPair(){};
|
||||||
|
void clear(){ eSent.clear(); fSent.clear();eAnchor.clear(),fAnchor.clear(); noOcc=0; realCount=0; sentenceNo=0;};
|
||||||
|
const Vector<WordIndex>&get_eSent()const
|
||||||
|
{ return eSent; }
|
||||||
|
const Vector<WordIndex>&get_fSent()const
|
||||||
|
{ return fSent; }
|
||||||
|
int getSentenceNo()const
|
||||||
|
{ return sentenceNo; }
|
||||||
|
double getCount()const
|
||||||
|
{ return realCount; }
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
inline ostream&operator<<(ostream&of,const sentPair&s)
|
||||||
|
{
|
||||||
|
of << "Sent No: " << s.sentenceNo << " , No. Occurrences: " << s.noOcc << '\n';
|
||||||
|
if( s.noOcc!=s.realCount )
|
||||||
|
of << " Used No. Occurrences: " << s.realCount << '\n';
|
||||||
|
unsigned int i;
|
||||||
|
for(i=0; i < s.eSent.size(); i++)
|
||||||
|
of << s.eSent[i] << ' ';
|
||||||
|
of << '\n';
|
||||||
|
for(i=1; i < s.fSent.size(); i++)
|
||||||
|
of << s.fSent[i] << ' ';
|
||||||
|
of << '\n';
|
||||||
|
return of;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Thread-safe version of sentence handler*/
|
||||||
|
class sentenceHandler{
|
||||||
|
public:
|
||||||
|
const char * inputFilename; // parallel corpus file name, similar for all
|
||||||
|
// sentence pair objects
|
||||||
|
ifstream *inputFile; // parallel corpus file handler
|
||||||
|
Vector<sentPair> Buffer;
|
||||||
|
int noSentInBuffer ;
|
||||||
|
int currentSentence ;
|
||||||
|
int position; /*Sentence position (will be returned)*/
|
||||||
|
int totalPairs1 ;
|
||||||
|
double totalPairs2;
|
||||||
|
bool readflag ; // true if you reach the end of file
|
||||||
|
bool allInMemory ;
|
||||||
|
int pair_no ;
|
||||||
|
Vector<double> *realCount;
|
||||||
|
|
||||||
|
Vector<sentPair> oldPairs;
|
||||||
|
Vector<double> oldProbs;
|
||||||
|
sentenceHandler(){};
|
||||||
|
sentenceHandler(const char* filename, vcbList* elist=0, vcbList* flist=0);
|
||||||
|
sentenceHandler(const char* filename, vcbList* elist, vcbList* flist,set<WordIndex>& eapp, set<WordIndex>& fapp);
|
||||||
|
void rewind();
|
||||||
|
int getNextSentence(sentPair&, vcbList* = 0, vcbList* = 0); // will be defined in the definition file, this
|
||||||
|
int getTotalNoPairs1()const {return totalPairs1;};
|
||||||
|
double getTotalNoPairs2()const {return totalPairs2;};
|
||||||
|
// method will read the next pair of sentence from memory buffer
|
||||||
|
void setProbOfSentence(const sentPair&s,double d);
|
||||||
|
private:
|
||||||
|
pthread_mutex_t readsent_mutex;
|
||||||
|
pthread_mutex_t setprob_mutex;
|
||||||
|
bool readNextSentence(sentPair&); // will be defined in the definition file, this
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
1088
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/src/hmm.cpp
Normal file
1088
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/src/hmm.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,103 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef _hmm_h
|
||||||
|
#define _hmm_h 1
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include "Vector.h"
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#if __GNUC__>2
|
||||||
|
#include <ext/hash_map>
|
||||||
|
using __gnu_cxx::hash_map;
|
||||||
|
#else
|
||||||
|
#include <hash_map>
|
||||||
|
#endif
|
||||||
|
#include <fstream>
|
||||||
|
#include <cmath>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
|
#include "TTables.h"
|
||||||
|
#include "ATables.h"
|
||||||
|
#include "getSentence.h"
|
||||||
|
#include "defs.h"
|
||||||
|
#include "model2.h"
|
||||||
|
#include "Perplexity.h"
|
||||||
|
#include "vocab.h"
|
||||||
|
#include "WordClasses.h"
|
||||||
|
#include "HMMTables.h"
|
||||||
|
#include "ForwardBackward.h"
|
||||||
|
#include "ttableDiff.hpp"
|
||||||
|
|
||||||
|
class hmm : public model2{
|
||||||
|
public:
|
||||||
|
WordClasses& ewordclasses;
|
||||||
|
WordClasses& fwordclasses;
|
||||||
|
public:
|
||||||
|
HMMTables<int,WordClasses> counts,probs;
|
||||||
|
public:
|
||||||
|
template<class MAPPER>
|
||||||
|
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile){
|
||||||
|
ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
|
||||||
|
if( !estrm ) {
|
||||||
|
cerr << "ERROR: can not read " << efile << endl;
|
||||||
|
}else
|
||||||
|
ewordclasses.read(estrm,m1,Elist);
|
||||||
|
if( !fstrm )
|
||||||
|
cerr << "ERROR: can not read " << ffile << endl;
|
||||||
|
else
|
||||||
|
fwordclasses.read(fstrm,m2,Flist);
|
||||||
|
}
|
||||||
|
hmm(model2&m2,WordClasses &e, WordClasses& f);
|
||||||
|
void initialize_table_uniformly(sentenceHandler&);
|
||||||
|
int em_with_tricks(int iterations, bool dumpCount = false,
|
||||||
|
const char* dumpCountName = NULL, bool useString = false,bool resume=false);
|
||||||
|
CTTableDiff<COUNT,PROB>* em_one_step(int it);
|
||||||
|
// void em_one_step_2(int it,int part);
|
||||||
|
void load_table(const char* aname);
|
||||||
|
|
||||||
|
// void em_loop(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
|
||||||
|
// const char* alignfile, Perplexity&, bool test,bool doInit,int iter);
|
||||||
|
/* CTTableDiff<COUNT,PROB>* em_loop_1(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
|
||||||
|
const char* alignfile, Perplexity&, bool test,bool doInit,int iter);*/
|
||||||
|
/* void em_loop_2( Perplexity& perp, sentenceHandler& sHandler1,
|
||||||
|
bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
|
||||||
|
bool test,bool doInit,int part);*/
|
||||||
|
void em_loop(Perplexity& perp, sentenceHandler& sHandler1,
|
||||||
|
bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
|
||||||
|
bool test,bool doInit,int
|
||||||
|
);
|
||||||
|
void em_thread(int it,string alignfile,bool dump_files,bool resume=false);
|
||||||
|
HMMNetwork *makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const;
|
||||||
|
void clearCountTable();
|
||||||
|
friend class model3;
|
||||||
|
};
|
||||||
|
//int multi_thread_em(int noIter, int noThread, hmm* base);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,135 @@
|
|||||||
|
// HMM Normalization executable
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <strstream>
|
||||||
|
#include <string>
|
||||||
|
#include "hmm.h"
|
||||||
|
#include "Parameter.h"
|
||||||
|
#define ITER_M2 0
|
||||||
|
#define ITER_MH 5
|
||||||
|
GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
|
||||||
|
GLOBAL_PARAMETER3(int,Model2_Iterations,"Model2_Iterations","NO. ITERATIONS MODEL 2","m2","number of iterations for Model 2",PARLEV_ITER,ITER_M2);
|
||||||
|
GLOBAL_PARAMETER3(int,HMM_Iterations,"HMM_Iterations","mh","number of iterations for HMM alignment model","mh", PARLEV_ITER,ITER_MH);
|
||||||
|
GLOBAL_PARAMETER3(int,Model3_Iterations,"Model3_Iterations","NO. ITERATIONS MODEL 3","m3","number of iterations for Model 3",PARLEV_ITER,5);
|
||||||
|
GLOBAL_PARAMETER3(int,Model4_Iterations,"Model4_Iterations","NO. ITERATIONS MODEL 4","m4","number of iterations for Model 4",PARLEV_ITER,5);
|
||||||
|
GLOBAL_PARAMETER3(int,Model5_Iterations,"Model5_Iterations","NO. ITERATIONS MODEL 5","m5","number of iterations for Model 5",PARLEV_ITER,0);
|
||||||
|
GLOBAL_PARAMETER3(int,Model6_Iterations,"Model6_Iterations","NO. ITERATIONS MODEL 6","m6","number of iterations for Model 6",PARLEV_ITER,0);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(float, PROB_SMOOTH,"probSmooth","probability smoothing (floor) value ",PARLEV_OPTHEUR,1e-7);
|
||||||
|
GLOBAL_PARAMETER(float, MINCOUNTINCREASE,"minCountIncrease","minimal count increase",PARLEV_OPTHEUR,1e-7);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER2(int,Transfer_Dump_Freq,"TRANSFER DUMP FREQUENCY","t2to3","output: dump of transfer from Model 2 to 3",PARLEV_OUTPUT,0);
|
||||||
|
GLOBAL_PARAMETER2(bool,Verbose,"verbose","v","0: not verbose; 1: verbose",PARLEV_OUTPUT,0);
|
||||||
|
GLOBAL_PARAMETER(bool,Log,"log","0: no logfile; 1: logfile",PARLEV_OUTPUT,0);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(double,P0,"p0","fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)",PARLEV_EM,-1.0);
|
||||||
|
GLOBAL_PARAMETER(double,M5P0,"m5p0","fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)",PARLEV_EM,-1.0);
|
||||||
|
GLOBAL_PARAMETER3(bool,Peg,"pegging","p","DO PEGGING? (Y/N)","0: no pegging; 1: do pegging",PARLEV_EM,0);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(short,OldADBACKOFF,"adbackoff","",-1,0);
|
||||||
|
GLOBAL_PARAMETER2(unsigned int,MAX_SENTENCE_LENGTH,"ml","MAX SENTENCE LENGTH","maximum sentence length",0,MAX_SENTENCE_LENGTH_ALLOWED);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(short, DeficientDistortionForEmptyWord,"DeficientDistortionForEmptyWord","0: IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word",PARLEV_MODELS,0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
Here are parameters to support Load models and dump models
|
||||||
|
*/
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(int,restart,"restart","Restart training from a level,0: Normal restart, from model 1, 1: Model 1, 2: Model 2 Init (Using Model 1 model input and train model 2), 3: Model 2, (using model 2 input and train model 2), 4 : HMM Init (Using Model 1 model and train HMM), 5: HMM (Using Model 2 model and train HMM) 6 : HMM (Using HMM Model and train HMM), 7: Model 3 Init (Use HMM model and train model 3) 8: Model 3 Init (Use Model 2 model and train model 3) 9: Model 3, 10: Model 4 Init (Use Model 3 model and train Model 4) 11: Model 4 and on, ",PARLEV_INPUT,0);
|
||||||
|
GLOBAL_PARAMETER(bool,dumpCount,"dumpcount","Whether we are going to dump count (in addition to) final output?",PARLEV_OUTPUT,false);
|
||||||
|
GLOBAL_PARAMETER(bool,dumpCountUsingWordString,"dumpcountusingwordstring","In count table, should actual word appears or just the id? default is id",PARLEV_OUTPUT,false);
|
||||||
|
/// END
|
||||||
|
short OutputInAachenFormat=0;
|
||||||
|
bool Transfer=TRANSFER;
|
||||||
|
bool Transfer2to3=0;
|
||||||
|
short NoEmptyWord=0;
|
||||||
|
bool FEWDUMPS=0;
|
||||||
|
GLOBAL_PARAMETER(bool,ONLYALDUMPS,"ONLYALDUMPS","1: do not write any files",PARLEV_OUTPUT,0);
|
||||||
|
GLOBAL_PARAMETER(short,NCPUS,"NCPUS","Number of CPUS",PARLEV_EM,2);
|
||||||
|
GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detailled alignment format, 1: compact alignment format ",PARLEV_OUTPUT,0);
|
||||||
|
GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
|
||||||
|
|
||||||
|
GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
|
||||||
|
"maximal fertility for fertility models", PARLEV_EM, 10);
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
|
||||||
|
TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
|
||||||
|
a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]){
|
||||||
|
if(argc < 5){
|
||||||
|
cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
Vector<WordEntry> evlist,fvlist;
|
||||||
|
vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
|
||||||
|
TargetVocabFilename = argv[2];
|
||||||
|
SourceVocabFilename = argv[1];
|
||||||
|
eTrainVcbList.setName(argv[1]);
|
||||||
|
fTrainVcbList.setName(argv[2]);
|
||||||
|
eTrainVcbList.readVocabList();
|
||||||
|
fTrainVcbList.readVocabList();
|
||||||
|
Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp;
|
||||||
|
tmodel<float, float> tTable;
|
||||||
|
sentenceHandler *corpus = new sentenceHandler();
|
||||||
|
|
||||||
|
|
||||||
|
model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList, tTable,
|
||||||
|
trainPerp, *corpus, &testPerp, corpus, trainViterbiPerp,
|
||||||
|
&testViterbiPerp);
|
||||||
|
amodel<float> aTable(false);
|
||||||
|
amodel<float> aCountTable(false);
|
||||||
|
model2 m2(m1, aTable, aCountTable);
|
||||||
|
WordClasses french,english;
|
||||||
|
hmm h(m2,english,french);
|
||||||
|
string evcbcls = argv[1];
|
||||||
|
string fvcbcls = argv[2];
|
||||||
|
evcbcls += ".classes";
|
||||||
|
fvcbcls += ".classes";
|
||||||
|
h.makeWordClasses(m1.Elist, m1.Flist, evcbcls.c_str(), fvcbcls.c_str());
|
||||||
|
string base = argv[4];
|
||||||
|
string baseA = base+".alpha";
|
||||||
|
string baseB = base+".beta";
|
||||||
|
string output = argv[3];
|
||||||
|
string outputA = output+".alpha";
|
||||||
|
string outputB = output+".beta";
|
||||||
|
h.probs.readJumps(base.c_str(),NULL,baseA.c_str(), baseB.c_str());
|
||||||
|
// Start iteration:
|
||||||
|
for(int i = 5; i< argc ; i++){
|
||||||
|
string name = argv[i];
|
||||||
|
string nameA = name + ".alpha";
|
||||||
|
string nameB = name + ".beta";
|
||||||
|
if(h.counts.readJumps(name.c_str(),NULL,nameA.c_str(), nameB.c_str()))
|
||||||
|
h.probs.merge(h.counts);
|
||||||
|
else
|
||||||
|
cerr << "Error, cannot load name.c_str()";
|
||||||
|
h.clearCountTable();
|
||||||
|
}
|
||||||
|
h.probs.writeJumps(output.c_str(),NULL,outputA.c_str(), outputB.c_str());
|
||||||
|
delete corpus;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some utility functions to get it compile..
|
||||||
|
|
||||||
|
ofstream logmsg;
|
||||||
|
const string str2Num(int n) {
|
||||||
|
string number = "";
|
||||||
|
do {
|
||||||
|
number.insert((size_t)0, 1, (char)(n % 10 + '0'));
|
||||||
|
} while ((n /= 10) > 0);
|
||||||
|
return (number);
|
||||||
|
}
|
||||||
|
double LAMBDA=1.09;
|
||||||
|
|
||||||
|
Vector<map< pair<int,int>,char > > ReferenceAlignment;
|
||||||
|
|
||||||
|
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
|
||||||
|
const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
|
||||||
|
int&eventsMissing, int&eventsToomuch, int pair_no){
|
||||||
|
}
|
||||||
|
|
||||||
|
void printGIZAPars(ostream&out){
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,154 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Routines to perform integer exponential arithmetic.
|
||||||
|
// A number x is represented as n, where x = b**n.
|
||||||
|
// It is assumed that b > 1, something like b = 1.001;
|
||||||
|
|
||||||
|
#include "logprob.h"
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
double *LogProb::ntof = NULL; // Tables will be initialized
|
||||||
|
int *LogProb::addtbl = NULL; // in Initialize function.
|
||||||
|
int *LogProb::subtbl = NULL; //
|
||||||
|
|
||||||
|
const int LogProb::max_2byte_integer = 32767;
|
||||||
|
const int LogProb::min_2byte_integer = -32768;
|
||||||
|
const double LogProb::b = 1.001; // a logarithm basis
|
||||||
|
const double LogProb::logb2 = log(b);
|
||||||
|
//const int LogProb::nmax = round(78.0E0 * log(1.0E1) / logb2);
|
||||||
|
const int LogProb::nmax = round(300.0E0 * log(1.0E1) / logb2);
|
||||||
|
const int LogProb::nmin = -nmax;
|
||||||
|
const int LogProb::tblbnd = round(log((b-1.0E0)/2.0E0)/logb2);
|
||||||
|
const int LogProb::zeron = round(pow(-2, 23));
|
||||||
|
const int LogProb::onen = 0;
|
||||||
|
const int LogProb::infn = onen - zeron;
|
||||||
|
|
||||||
|
const int LogProb::initialized = LogProb::Initialize();
|
||||||
|
const LogProb LogProb::zero(0);
|
||||||
|
const LogProb LogProb::one(1);
|
||||||
|
const LogProb LogProb::minus2(1e-2);
|
||||||
|
const LogProb LogProb::minus4(1e-4);
|
||||||
|
const LogProb LogProb::minus6(1e-6);
|
||||||
|
const LogProb LogProb::minus8(1e-8);
|
||||||
|
const LogProb LogProb::minus10(1e-10);
|
||||||
|
const LogProb LogProb::minus12(1e-12);
|
||||||
|
const LogProb LogProb::minus14(1e-14);
|
||||||
|
const LogProb LogProb::minus16(1e-16);
|
||||||
|
|
||||||
|
// static table initialization function
|
||||||
|
int LogProb::Initialize()
|
||||||
|
{
|
||||||
|
int nbytes = sizeof(double)*(nmax-nmin+1) + sizeof(int)*(0-tblbnd+1);
|
||||||
|
std::cerr << nbytes << " bytes used for LogProb tables (C++ version)\n";
|
||||||
|
ntof = new double[nmax-nmin+1];
|
||||||
|
addtbl = new int[-tblbnd+1];
|
||||||
|
subtbl = new int[-tblbnd+1];
|
||||||
|
|
||||||
|
// char filename[257];
|
||||||
|
// string filename ;
|
||||||
|
// ifstream ifs;
|
||||||
|
// ifs.open(filename.c_str());
|
||||||
|
// if (!ifs)
|
||||||
|
// {
|
||||||
|
int i;
|
||||||
|
std::cerr << "Building integer logs conversion tables\n";
|
||||||
|
ntof[0] = 0 ;
|
||||||
|
|
||||||
|
for (i=nmin+1; i<=nmax; ++i)
|
||||||
|
{
|
||||||
|
double x = i;
|
||||||
|
ntof[i-nmin] = exp(x*logb2);
|
||||||
|
|
||||||
|
}
|
||||||
|
for (i=tblbnd; i<=0; ++i)
|
||||||
|
{
|
||||||
|
double x = 1.0 + pow(b, i);
|
||||||
|
addtbl[i-tblbnd] = round(log(x)/logb2);
|
||||||
|
}
|
||||||
|
double sqrtb = exp(0.5*logb2);
|
||||||
|
for (i=0; i<=-tblbnd; ++i)
|
||||||
|
{
|
||||||
|
double x = sqrtb * pow(b, i) - 1.0;
|
||||||
|
subtbl[i] = round(log(x)/logb2);
|
||||||
|
}
|
||||||
|
// if (toolsRoot)
|
||||||
|
// {
|
||||||
|
// ofstream ofs(filename.c_str());
|
||||||
|
// if (!ofs)
|
||||||
|
// cerr << "Could not write LogProb data to " << filename << endl;
|
||||||
|
// else
|
||||||
|
// {
|
||||||
|
// ofs.write((const char *)ntof, sizeof(double) * (nmax-nmin+1));
|
||||||
|
// ofs.write((const char *)addtbl, sizeof(int) * (-tblbnd+1));
|
||||||
|
// ofs.write((const char *)subtbl, sizeof(int) * (-tblbnd+1));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else
|
||||||
|
// {
|
||||||
|
// ifs.read((char *)ntof, sizeof(double) * (nmax - nmin + 1));
|
||||||
|
// ifs.read((char *)addtbl, sizeof(int) * (-tblbnd+1));
|
||||||
|
// ifs.read((char *)subtbl, sizeof(int) * (-tblbnd+1));
|
||||||
|
// }
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LogProb::FreeTables()
|
||||||
|
{
|
||||||
|
delete [] addtbl;
|
||||||
|
delete [] subtbl;
|
||||||
|
delete [] ntof;
|
||||||
|
}
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
// Aritmetic operators
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
// Subtract two logarithm numbers. Use the following method:
|
||||||
|
// b**n - b**m = b**m( b**(n-m) - 1 ), assuming n >= m.
|
||||||
|
LogProb& LogProb::operator-=(const LogProb &subs)
|
||||||
|
{
|
||||||
|
if (subs.logr == zeron)
|
||||||
|
return *this;
|
||||||
|
int a = logr - subs.logr;
|
||||||
|
if (a <= 0)
|
||||||
|
{
|
||||||
|
if (a < 0)
|
||||||
|
{
|
||||||
|
std::cerr << "WARNING(logprob): Invalid arguments to nsub" <<(*this)<< " " << subs << std::endl;
|
||||||
|
//abort();
|
||||||
|
}
|
||||||
|
logr = zeron;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
if (a > -tblbnd)
|
||||||
|
return *this;
|
||||||
|
logr = subs.logr + subtbl[a];
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,217 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
EGYPT Toolkit for Statistical Machine Translation
|
||||||
|
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef _LOGPROB_H
|
||||||
|
#define _LOGPROB_H
|
||||||
|
|
||||||
|
// Routines to perform integer exponential arithmetic.
|
||||||
|
// A number x is represented as n, where x = b**n
|
||||||
|
// It is assumed that b > 1, something like b = 1.001
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <cmath>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
//#define MAX(A,B) ((A) > (B) ? (A) : (B))
|
||||||
|
//#define MIN(A,B) ((A) > (B) ? (B) : (A))
|
||||||
|
|
||||||
|
|
||||||
|
class LogProb {
|
||||||
|
public:
|
||||||
|
// mj for cross entropy
|
||||||
|
double base2() const {
|
||||||
|
return (logr * logb2 / log(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Constructors
|
||||||
|
LogProb() : logr(zeron) {}
|
||||||
|
LogProb(const LogProb &obj) : logr(obj.logr) {}
|
||||||
|
LogProb(double x) : logr(x == 0.0 ? zeron : round(log(x)/logb2)) {}
|
||||||
|
// destructor
|
||||||
|
~LogProb() {} // default destructor
|
||||||
|
|
||||||
|
operator double() const // converts logr to (double) b**logr
|
||||||
|
{
|
||||||
|
if (logr < nmin) return ntof[0];
|
||||||
|
if (logr > nmax) return ntof[nmax-nmin];
|
||||||
|
return ntof[logr-nmin];
|
||||||
|
}
|
||||||
|
|
||||||
|
LogProb &operator=(const LogProb &obj) { logr = obj.logr; return *this; }
|
||||||
|
int operator!() const { return logr == zeron; }
|
||||||
|
|
||||||
|
// iostream friend specifications
|
||||||
|
friend std::ostream& operator<<(std::ostream& os, const LogProb &obj);
|
||||||
|
friend std::istream& operator>>(std::istream& is, LogProb &obj);
|
||||||
|
friend std::ostream& operator<<=(std::ostream& os, const LogProb &obj);
|
||||||
|
friend std::istream& operator>>=(std::istream& is, LogProb &obj);
|
||||||
|
|
||||||
|
// arithmetic operators
|
||||||
|
LogProb &operator+=(const LogProb &add) // logr2 = logb ( b**logr2 + b**logr1 )
|
||||||
|
// Add two numbers represented as logarithms. Use the following method:
|
||||||
|
// b**n + b**m = b**n(1 + b**(m-n)), assuming n >= m.
|
||||||
|
{
|
||||||
|
if (add.logr == zeron)
|
||||||
|
return *this;
|
||||||
|
if (logr == zeron)
|
||||||
|
{
|
||||||
|
logr = add.logr;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
int a = add.logr - logr;
|
||||||
|
if (a > 0)
|
||||||
|
{
|
||||||
|
a = -a;
|
||||||
|
logr = add.logr;
|
||||||
|
}
|
||||||
|
if (a < tblbnd)
|
||||||
|
return *this;
|
||||||
|
logr += addtbl[a-tblbnd];
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
LogProb &operator-=(const LogProb &); // logr2 = logb ( b**logr2 + b**logr1 )
|
||||||
|
LogProb operator*(const LogProb &mul) const // logr3 = logr2 + logr1
|
||||||
|
{
|
||||||
|
LogProb result; // start out with result == 0
|
||||||
|
if ((logr != zeron) && (mul.logr != zeron))
|
||||||
|
result.logr = std::max(logr+mul.logr, zeron);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
LogProb operator*(double x) const // logr3 = logr2 + logr1
|
||||||
|
{
|
||||||
|
return (*this)*(LogProb)x;
|
||||||
|
}
|
||||||
|
LogProb operator^(const int i) const // logr2 = logr1 * i
|
||||||
|
{
|
||||||
|
LogProb result; // start out with result == 0
|
||||||
|
// if ((logr != zeron) && (mul.logr != zeron))
|
||||||
|
result.logr = logr * i ;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
LogProb &operator*=(const LogProb &mul) // logr2 += logr1
|
||||||
|
{
|
||||||
|
if ((logr == zeron) || (mul.logr == zeron))
|
||||||
|
logr = zeron;
|
||||||
|
else
|
||||||
|
logr = std::max(logr+mul.logr, zeron);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
LogProb operator/(const LogProb &div) const // logr3 = logr2 -logr1
|
||||||
|
{
|
||||||
|
LogProb result;
|
||||||
|
if (logr != zeron)
|
||||||
|
result.logr = std::max(logr - div.logr, zeron);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
LogProb &operator/=(const LogProb &div) // logr2 -= logr1
|
||||||
|
{
|
||||||
|
if (logr != zeron)
|
||||||
|
logr = std::max(logr - div.logr, zeron);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
LogProb operator+(const LogProb &l) const // logr3 = logb ( b**logr2 + b**logr1 )
|
||||||
|
{ LogProb result(*this); result += l; return result; }
|
||||||
|
LogProb operator-(const LogProb &l) const // logr3 = logb ( b**logr2 - b**logr1 )
|
||||||
|
{ LogProb result(*this); result -= l; return result; }
|
||||||
|
LogProb power(const int n) const // logr2 = logr1 * int
|
||||||
|
{ LogProb result(*this); result.logr *= n; return result; }
|
||||||
|
|
||||||
|
// Conditional operators
|
||||||
|
int operator<(const LogProb &obj) const { return logr < obj.logr; }
|
||||||
|
int operator<=(const LogProb &obj) const { return logr <= obj.logr; }
|
||||||
|
int operator>(const LogProb &obj) const { return logr > obj.logr; }
|
||||||
|
int operator>=(const LogProb &obj) const { return logr >= obj.logr; }
|
||||||
|
int operator==(const LogProb &obj) const { return logr == obj.logr; }
|
||||||
|
int operator!=(const LogProb &obj) const { return logr != obj.logr; }
|
||||||
|
int operator<(double d) const { return ((double)*this) < d; }
|
||||||
|
int operator<=(double d) const { return ((double)*this) <= d; }
|
||||||
|
int operator>(double d) const { return ((double)*this) > d; }
|
||||||
|
int operator>=(double d) const { return ((double)*this) >= d; }
|
||||||
|
int operator==(double d) const { return ((double)*this) == d; }
|
||||||
|
int operator!=(double d) const { return ((double)*this) != d; }
|
||||||
|
|
||||||
|
|
||||||
|
LogProb &SetZero() { logr = zeron; return *this; } // representation of 0,
|
||||||
|
LogProb &SetOne() { logr = onen; return *this; } // 1, and
|
||||||
|
LogProb &SetInf() { logr = infn; return *this; } // inf in logarithm domain
|
||||||
|
|
||||||
|
private:
|
||||||
|
int logr; // a representation of logarithm
|
||||||
|
// static constants
|
||||||
|
static const int initialized; // initialization flag
|
||||||
|
static const double b;
|
||||||
|
static const double logb2;
|
||||||
|
static const int nmin, nmax;
|
||||||
|
static const int tblbnd;
|
||||||
|
static const int zeron, onen, infn; // zero, one, and inf in log domain
|
||||||
|
static const int max_2byte_integer, min_2byte_integer;
|
||||||
|
|
||||||
|
// Arithmetic computation Tables
|
||||||
|
static double *ntof;
|
||||||
|
static int *addtbl;
|
||||||
|
static int *subtbl;
|
||||||
|
|
||||||
|
static int Initialize();
|
||||||
|
|
||||||
|
public:
|
||||||
|
static void FreeTables();
|
||||||
|
// constants for initializing LogProbs to 0 or 1
|
||||||
|
static const LogProb zero;
|
||||||
|
static const LogProb one;
|
||||||
|
static const LogProb minus2;
|
||||||
|
static const LogProb minus4;
|
||||||
|
static const LogProb minus6;
|
||||||
|
static const LogProb minus8;
|
||||||
|
static const LogProb minus10;
|
||||||
|
static const LogProb minus12;
|
||||||
|
static const LogProb minus14;
|
||||||
|
static const LogProb minus16;
|
||||||
|
};
|
||||||
|
|
||||||
|
// iostream friend operators
|
||||||
|
inline std::ostream &operator<<(std::ostream& os, const LogProb &obj)
|
||||||
|
{
|
||||||
|
return os << (double) obj; // output in linear domain, b**logr
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::istream &operator>>(std::istream& is, LogProb &obj)
|
||||||
|
{
|
||||||
|
double d;
|
||||||
|
is >> d;
|
||||||
|
obj = d;
|
||||||
|
return is;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::ostream &operator<<=(std::ostream& os, const LogProb &obj) // write binary
|
||||||
|
{
|
||||||
|
os.write((const char *)&obj.logr, sizeof(obj.logr));
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::istream &operator>>=(std::istream& is, LogProb &obj)
|
||||||
|
{
|
||||||
|
is.read((char *)&obj.logr, sizeof(obj.logr));
|
||||||
|
return is;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,370 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef ARRAY_H_DEFINED
|
||||||
|
#define ARRAY_H_DEFINED
|
||||||
|
using namespace std;
|
||||||
|
#include "myassert.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <functional>
|
||||||
|
#include "my.h"
|
||||||
|
|
||||||
|
#define ARRAY_DEBUG
|
||||||
|
|
||||||
|
|
||||||
|
template<class T> class Array
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
T *p;
|
||||||
|
int realSize;
|
||||||
|
int maxWritten;
|
||||||
|
char a;
|
||||||
|
|
||||||
|
void copy(T *a,const T *b,int n);
|
||||||
|
void copy(T *a,T *b,int n);
|
||||||
|
void _expand();
|
||||||
|
|
||||||
|
public:
|
||||||
|
Array()
|
||||||
|
: p(0),realSize(0),maxWritten(-1) ,a(1)
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "MAKE ARRAY: " << this<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
Array(const Array<T> &x)
|
||||||
|
: p(new T[x.maxWritten+1]),realSize(x.maxWritten+1),maxWritten(x.maxWritten),a(x.a)
|
||||||
|
{
|
||||||
|
copy(p,x.p,realSize);
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
explicit Array(int n)
|
||||||
|
: p(new T[n]),realSize(n),maxWritten(n-1),a(0)
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
Array(int n,const T&_init,int _a=0)
|
||||||
|
: p(new T[n]),realSize(n),maxWritten(n-1),a(_a)
|
||||||
|
{
|
||||||
|
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
~Array()
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
delete [] p;
|
||||||
|
}
|
||||||
|
|
||||||
|
Array<T>& operator=(const Array<T>&x)
|
||||||
|
{
|
||||||
|
if( this!= &x )
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
delete [] p;
|
||||||
|
realSize = x.maxWritten+1;
|
||||||
|
maxWritten = x.maxWritten;
|
||||||
|
a = x.a;
|
||||||
|
p = new T[realSize];
|
||||||
|
copy(p,x.p,realSize);
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Array<T>& operator=(Array<T>&x)
|
||||||
|
{
|
||||||
|
if( this!= &x )
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
delete [] p;
|
||||||
|
realSize = x.maxWritten+1;
|
||||||
|
maxWritten = x.maxWritten;
|
||||||
|
a = x.a;
|
||||||
|
p = new T[realSize];
|
||||||
|
copy(p,x.p,realSize);
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
void allowAccess(int n)
|
||||||
|
{
|
||||||
|
while( realSize<=n )
|
||||||
|
_expand();
|
||||||
|
maxWritten=max(maxWritten,n);
|
||||||
|
massert( maxWritten<realSize );
|
||||||
|
}
|
||||||
|
void resize(int n)
|
||||||
|
{
|
||||||
|
while( realSize<n )
|
||||||
|
_expand();
|
||||||
|
maxWritten=n-1;
|
||||||
|
}
|
||||||
|
void sort(int until=-1)
|
||||||
|
{
|
||||||
|
if( until== -1 ) until=size();
|
||||||
|
std::sort(p,p+until);
|
||||||
|
}
|
||||||
|
void invsort(int until=-1)
|
||||||
|
{
|
||||||
|
if( until== -1 ) until=size();
|
||||||
|
std::sort(p,p+until,greater<T>());
|
||||||
|
}
|
||||||
|
void init(int n,const T&_init,bool _a=0)
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
delete []p;
|
||||||
|
p=new T[n];
|
||||||
|
realSize=n;
|
||||||
|
a=_a;
|
||||||
|
maxWritten=n-1;
|
||||||
|
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
inline int size() const
|
||||||
|
{massert( maxWritten<realSize );
|
||||||
|
return maxWritten+1;}
|
||||||
|
inline int low() const
|
||||||
|
{ return 0; }
|
||||||
|
inline int high() const
|
||||||
|
{ return maxWritten; }
|
||||||
|
inline bool autoexpand() const
|
||||||
|
{return a;}
|
||||||
|
inline void autoexpand(bool autoExp)
|
||||||
|
{a=autoExp;}
|
||||||
|
int findMax() const;
|
||||||
|
int findMin() const;
|
||||||
|
const void errorAccess(int n) const;
|
||||||
|
inline T*getPointerToData(){return p;}
|
||||||
|
|
||||||
|
inline T& operator[](int n)
|
||||||
|
{
|
||||||
|
if( a && n==maxWritten+1 )
|
||||||
|
allowAccess(n);
|
||||||
|
if( n<0 || n>maxWritten )
|
||||||
|
errorAccess(n);
|
||||||
|
return p[n];
|
||||||
|
}
|
||||||
|
inline const T& operator[](int n) const
|
||||||
|
{
|
||||||
|
if(n<0 || n>maxWritten )
|
||||||
|
errorAccess(n);
|
||||||
|
return p[n];
|
||||||
|
}
|
||||||
|
const T&top(int n=0) const
|
||||||
|
{return (*this)[maxWritten-n];}
|
||||||
|
T&top(int n=0)
|
||||||
|
{return (*this)[maxWritten-n];}
|
||||||
|
T&push(const T&x)
|
||||||
|
{
|
||||||
|
(*this)[maxWritten+1]=x;
|
||||||
|
return top();
|
||||||
|
}
|
||||||
|
bool writeTo(ostream&out) const
|
||||||
|
{
|
||||||
|
out << "Array ";
|
||||||
|
out << size() << " ";
|
||||||
|
out << a << endl;
|
||||||
|
for(int iv=0;iv<=maxWritten;iv++)
|
||||||
|
{
|
||||||
|
writeOb(out,(*this)[iv]);
|
||||||
|
out << endl;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
bool readFrom(istream&in)
|
||||||
|
{
|
||||||
|
string s;
|
||||||
|
if( !in )
|
||||||
|
{
|
||||||
|
cerr << "ERROR(Array): file cannot be opened.\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
in >> s;
|
||||||
|
if( !(s=="Array") )
|
||||||
|
{
|
||||||
|
cerr << "ERROR(Array): Array!='"<<s<<"'\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int biggest;
|
||||||
|
in >> biggest;
|
||||||
|
in >> a;
|
||||||
|
resize(biggest);
|
||||||
|
for(int iv=0;iv<size();iv++)
|
||||||
|
{
|
||||||
|
readOb(in,(*this)[iv]);
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T> bool operator==(const Array<T> &x, const Array<T> &y)
|
||||||
|
{
|
||||||
|
if( &x == &y )
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( y.size()!=x.size() )
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(int iii=0;iii<x.size();iii++)
|
||||||
|
if( !(x[iii]==y[iii]) )
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> bool operator<(const Array<T> &x, const Array<T> &y)
|
||||||
|
{
|
||||||
|
if( &x == &y )
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( y.size()<x.size() )
|
||||||
|
return !(y<x);
|
||||||
|
for(int iii=0;iii<x.size();iii++)
|
||||||
|
{
|
||||||
|
massert( iii!=y.size() );
|
||||||
|
if( x[iii]<y[iii] )
|
||||||
|
return 1;
|
||||||
|
else if( y[iii]<x[iii] )
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return x.size()!=y.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class T> const void Array<T>:: errorAccess(int n) const
|
||||||
|
{
|
||||||
|
cerr << "ERROR: Access to array element " << n
|
||||||
|
<< " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
|
||||||
|
cout << "ERROR: Access to array element " << n
|
||||||
|
<< " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
|
||||||
|
massert(0);
|
||||||
|
#ifndef DEBUG
|
||||||
|
abort();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> ostream& operator<<(ostream&o,const Array<T>&a)
|
||||||
|
{
|
||||||
|
o << "Array(" << a.size() << "," << a.autoexpand() << "){ ";
|
||||||
|
for(int iii=0;iii<a.size();iii++)
|
||||||
|
o << " " << iii<< ":" << a[iii]<<";";
|
||||||
|
return o << "}\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> istream& operator>>(istream&in, Array<T>&)
|
||||||
|
{return in;}
|
||||||
|
|
||||||
|
template<class T> int Hash(const Array<T>&a)
|
||||||
|
{
|
||||||
|
int n=0;
|
||||||
|
for(int iii=0;iii<a.size();iii++)
|
||||||
|
n+=Hash(a[iii])*(iii+1);
|
||||||
|
return n+a.size()*47;
|
||||||
|
}
|
||||||
|
template<class T> void Array<T>::copy(T *aa,const T *bb,int n)
|
||||||
|
{
|
||||||
|
for(int iii=0;iii<n;iii++)
|
||||||
|
aa[iii]=bb[iii];
|
||||||
|
}
|
||||||
|
template<class T> void Array<T>::copy(T *aa,T *bb,int n)
|
||||||
|
{
|
||||||
|
for(int iii=0;iii<n;iii++)
|
||||||
|
aa[iii]=bb[iii];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> void Array<T>::_expand()
|
||||||
|
{
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
T *oldp=p;
|
||||||
|
int oldsize=realSize;
|
||||||
|
realSize=realSize*2+1;
|
||||||
|
p=new T[realSize];
|
||||||
|
copy(p,oldp,oldsize);
|
||||||
|
delete [] oldp;
|
||||||
|
#ifdef VERY_ARRAY_DEBUG
|
||||||
|
cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> int Array<T>::findMax() const
|
||||||
|
{
|
||||||
|
if( size()==0 )
|
||||||
|
return -1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int maxPos=0;
|
||||||
|
for(int iii=1;iii<size();iii++)
|
||||||
|
if( (*this)[maxPos]<(*this)[iii] )
|
||||||
|
maxPos=iii;
|
||||||
|
return maxPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class T> int Array<T>::findMin() const
|
||||||
|
{
|
||||||
|
if( size()==0 )
|
||||||
|
return -1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int minPos=0;
|
||||||
|
for(int iii=1;iii<size();iii++)
|
||||||
|
if( (*this)[iii]<(*this)[minPos] )
|
||||||
|
minPos=iii;
|
||||||
|
return minPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,287 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef FIXARRAY_H_DEFINED
|
||||||
|
#define FIXARRAY_H_DEFINED
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
bool writeOb(ostream&out,const T&f)
|
||||||
|
{
|
||||||
|
out << f << " ";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
bool readOb(istream&in,T&f)
|
||||||
|
{
|
||||||
|
in >> f;
|
||||||
|
char c;
|
||||||
|
in.get(c);
|
||||||
|
massert(c==' ');
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
bool writeOb(ostream&out,const string &s,const T&f)
|
||||||
|
{
|
||||||
|
out << s << " " << f << " ";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
template<class T>
|
||||||
|
bool readOb(istream&in,const string&s,T&f)
|
||||||
|
{
|
||||||
|
string ss;
|
||||||
|
in >> ss;
|
||||||
|
if( s!=ss )
|
||||||
|
{
|
||||||
|
cerr << "ERROR: readOb should be '" << s << "' and is '" << ss << "'" << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
in >> f;
|
||||||
|
char c;
|
||||||
|
in.get(c);
|
||||||
|
massert(c==' ');
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> class FixedArray
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
void copy(T *aa,const T *bb,int nnn)
|
||||||
|
{for(int iii=0;iii<nnn;iii++)aa[iii]=bb[iii];}
|
||||||
|
|
||||||
|
public:
|
||||||
|
T *p;
|
||||||
|
int realSize;
|
||||||
|
FixedArray()
|
||||||
|
: p(0),realSize(0){}
|
||||||
|
FixedArray(const FixedArray<T> &x)
|
||||||
|
: p(new T[x.realSize]),realSize(x.realSize) {copy(p,x.p,realSize);}
|
||||||
|
explicit FixedArray(int n)
|
||||||
|
: p(new T[n]),realSize(n){}
|
||||||
|
FixedArray(int n,const T&_init)
|
||||||
|
: p(new T[n]),realSize(n){for(int z=0;z<n;z++)p[z]=_init;}
|
||||||
|
FixedArray(const FixedArray&f,const T&t)
|
||||||
|
: p(new T[f.size()+1]),realSize(f.size()+1){for(int z=0;z<f.size();z++)p[z]=f[z];p[f.size()]=t;}
|
||||||
|
~FixedArray()
|
||||||
|
{ delete [] p;p=0;realSize=-1;}
|
||||||
|
|
||||||
|
FixedArray<T>& operator=(const FixedArray<T>&x)
|
||||||
|
{
|
||||||
|
if( this!= &x )
|
||||||
|
{
|
||||||
|
delete [] p;
|
||||||
|
realSize = x.realSize;
|
||||||
|
p = new T[x.realSize];
|
||||||
|
copy(p,x.p,realSize);
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
void resize(int n)
|
||||||
|
{
|
||||||
|
if( n<=realSize )
|
||||||
|
shrink(n);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
T*np=new T[n];
|
||||||
|
copy(np,p,realSize);
|
||||||
|
delete []p;
|
||||||
|
p=np;
|
||||||
|
realSize=n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void shrink(int n)
|
||||||
|
{
|
||||||
|
assert(n<=realSize);
|
||||||
|
realSize=n;
|
||||||
|
}
|
||||||
|
void init(int n,const T&_init)
|
||||||
|
{
|
||||||
|
delete []p;
|
||||||
|
p=new T[n];
|
||||||
|
realSize=n;
|
||||||
|
for(int l=0;l<n;l++)p[l]=_init;
|
||||||
|
}
|
||||||
|
inline const T&top(int n=0) const
|
||||||
|
{return (*this)[realSize-1-n];}
|
||||||
|
inline int size() const
|
||||||
|
{return realSize;}
|
||||||
|
|
||||||
|
inline T*begin(){ return p; }
|
||||||
|
inline T*end(){ return p+realSize; }
|
||||||
|
|
||||||
|
inline const T*begin()const{ return p; }
|
||||||
|
inline const T*end()const{return p+realSize;}
|
||||||
|
|
||||||
|
inline int low() const
|
||||||
|
{return 0;}
|
||||||
|
inline int high() const
|
||||||
|
{return realSize-1;}
|
||||||
|
const void errorAccess(int n) const;
|
||||||
|
|
||||||
|
inline T& operator[](int n)
|
||||||
|
{
|
||||||
|
return p[n];
|
||||||
|
}
|
||||||
|
inline const T& operator[](int n) const
|
||||||
|
{
|
||||||
|
return p[n];
|
||||||
|
}
|
||||||
|
bool writeTo(ostream&out) const
|
||||||
|
{
|
||||||
|
out << "FixedArray ";
|
||||||
|
out << size() << " ";
|
||||||
|
for(int a=0;a<size();a++)
|
||||||
|
{
|
||||||
|
writeOb(out,(*this)[a]);
|
||||||
|
out << " ";
|
||||||
|
}
|
||||||
|
out << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
bool readFrom(istream&in)
|
||||||
|
{
|
||||||
|
string s;
|
||||||
|
if( !in )
|
||||||
|
{
|
||||||
|
cerr << "ERROR(FixedArray): file cannot be opened.\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
in >> s;
|
||||||
|
if( !(s=="FixedArray") )
|
||||||
|
{
|
||||||
|
cerr << "ERROR(FixedArray): FixedArray!='"<<s<<"'\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int biggest;
|
||||||
|
in >> biggest;
|
||||||
|
resize(biggest);
|
||||||
|
for(int a=0;a<size();a++)
|
||||||
|
readOb(in,(*this)[a]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
void sort(int until=-1)
|
||||||
|
{
|
||||||
|
if( until== -1 ) until=size();
|
||||||
|
std::sort(p,p+until);
|
||||||
|
}
|
||||||
|
void invsort(int until=-1)
|
||||||
|
{
|
||||||
|
if( until== -1 ) until=size();
|
||||||
|
std::sort(p,p+until,greater<T>());
|
||||||
|
}
|
||||||
|
int binary_locate(const T&t)
|
||||||
|
{
|
||||||
|
T*ppos=std::lower_bound(p,p+size(),t);
|
||||||
|
int pos=ppos-p;
|
||||||
|
if( pos>=-1&&pos<size() )
|
||||||
|
return pos;
|
||||||
|
else
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
int binary_search(const T&t)
|
||||||
|
{
|
||||||
|
T*ppos=std::lower_bound(p,p+size(),t);
|
||||||
|
int pos=ppos-p;
|
||||||
|
if( pos>=0&&pos<size()&& *ppos==t )
|
||||||
|
return pos;
|
||||||
|
else
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
typedef T* iterator;
|
||||||
|
typedef const T* const_iterator;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T> bool operator<(const FixedArray<T> &x, const FixedArray<T> &y)
|
||||||
|
{
|
||||||
|
return lexicographical_compare(x.begin(),x.end(),y.begin(),y.end());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class T> bool operator==(const FixedArray<T> &x, const FixedArray<T> &y)
|
||||||
|
{
|
||||||
|
if( &x == &y )return 1;
|
||||||
|
const int s = x.size();
|
||||||
|
if( s !=y.size() )return 0;
|
||||||
|
for(int iii=0;iii<s;iii++)
|
||||||
|
if( !(x.p[iii]==y.p[iii]) )
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> int Hash(const FixedArray<T>&a)
|
||||||
|
{
|
||||||
|
int n=0;
|
||||||
|
const int s=a.size();
|
||||||
|
for(int iii=0;iii<s;iii++)
|
||||||
|
n=13*n+Hash(a.p[iii]);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> const void FixedArray<T>:: errorAccess(int n) const
|
||||||
|
{
|
||||||
|
massert(0);
|
||||||
|
cerr << "ERROR: Access to array element " << n
|
||||||
|
<< " (" << realSize << "," << (void*)p << ")\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> ostream& operator<<(ostream&o,const FixedArray<T>&a)
|
||||||
|
{
|
||||||
|
o << "FixedArray(" << a.size() << "){ ";
|
||||||
|
for(int iii=0;iii<a.size();iii++)
|
||||||
|
o << " " << iii<< ":" << a[iii]<<";";
|
||||||
|
return o << "}\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> istream& operator>>(istream&in, FixedArray<T>&)
|
||||||
|
{ return in;}
|
||||||
|
|
||||||
|
template<class T> FixedArray<T> operator+(const FixedArray<T>&a,const FixedArray<T>&b)
|
||||||
|
{
|
||||||
|
massert(a.size()==b.size());
|
||||||
|
FixedArray<T> x(a.size());
|
||||||
|
for(int iii=0;iii<a.size();iii++)
|
||||||
|
x[iii]=a[iii]+b[iii];
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
template<class T> FixedArray<T> operator|(const FixedArray<T>&aaa,const FixedArray<T>&bbb)
|
||||||
|
{
|
||||||
|
iassert(aaa.size()==bbb.size());
|
||||||
|
|
||||||
|
FixedArray<T> xxx(aaa.size());
|
||||||
|
for(int iii=0;iii<aaa.size();iii++)
|
||||||
|
xxx.p[iii]=aaa.p[iii]||bbb.p[iii];
|
||||||
|
return xxx;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,48 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef CLASS_FlexArray_defined
|
||||||
|
#define CLASS_FlexArray_defined
|
||||||
|
#include "FixedArray.h"
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
class FlexArray
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
FixedArray<T> p;
|
||||||
|
int start,end;
|
||||||
|
public:
|
||||||
|
FlexArray(int _start=0,int _end=-1)
|
||||||
|
: p(_end-_start+1),start(_start),end(_end) {}
|
||||||
|
T&operator[](int i)
|
||||||
|
{return p[i-start];}
|
||||||
|
const T&operator[](int i)const
|
||||||
|
{returnp[i-start];}
|
||||||
|
int low()const{return start;}
|
||||||
|
int high()const{return end;}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,159 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "GDAOptimization.h"
|
||||||
|
#include "ProblemTest.h"
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#define GDAOptimization GDAOptimization
|
||||||
|
#define IterOptimization IterOptimization
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double GDAOptimization::defaultTemperatur=1e100;
|
||||||
|
|
||||||
|
|
||||||
|
double GDAOptimization::defaultAlpha=0.001;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
GDAOptimization::GDAOptimization(Problem &p,int m)
|
||||||
|
: IterOptimization(p,m) ,temperatur(defaultTemperatur),alpha(defaultAlpha)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
GDAOptimization::GDAOptimization(Problem &p,double t,double a,int m)
|
||||||
|
: IterOptimization(p,m) ,temperatur(t) ,alpha(a)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
GDAOptimization::GDAOptimization(GDAOptimization &o)
|
||||||
|
: IterOptimization(o)
|
||||||
|
{
|
||||||
|
temperatur = o.temperatur;
|
||||||
|
alpha = o.alpha;
|
||||||
|
gdaEndFlag = o.gdaEndFlag;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void GDAOptimization::zInitialize()
|
||||||
|
{
|
||||||
|
IterOptimization::zInitialize();
|
||||||
|
if(temperatur==1e100)
|
||||||
|
{
|
||||||
|
double v=problem.value();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
temperatur=v;
|
||||||
|
}
|
||||||
|
assert(alpha>=0);
|
||||||
|
}
|
||||||
|
|
||||||
|
short GDAOptimization::accept(double delta)
|
||||||
|
{
|
||||||
|
if( curValue + delta < temperatur )
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GDAOptimization::abkuehlen()
|
||||||
|
{
|
||||||
|
double newTemperatur = temperatur - alpha*(temperatur - curValue);
|
||||||
|
if( fabs(temperatur - newTemperatur)<1e-30 )
|
||||||
|
gdaEndFlag=1;
|
||||||
|
else
|
||||||
|
gdaEndFlag=0;
|
||||||
|
temperatur = newTemperatur;
|
||||||
|
}
|
||||||
|
|
||||||
|
short GDAOptimization::end()
|
||||||
|
{
|
||||||
|
return ( endFlag>0 ) && ( gdaEndFlag );
|
||||||
|
}
|
||||||
|
|
||||||
|
void GDAOptimization::makeGraphOutput()
|
||||||
|
{
|
||||||
|
IterOptimization::makeGraphOutput();
|
||||||
|
*GraphOutput << temperatur-curValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double GDAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
|
||||||
|
int optimierungsschritte,int print)
|
||||||
|
{
|
||||||
|
if(typ!=1)
|
||||||
|
{
|
||||||
|
cerr << "Error: wrong parameter-type in GDAOptimization::optimizeValue ("
|
||||||
|
<< typ << ")\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
double bestPar=-1,best=1e100;
|
||||||
|
double now;
|
||||||
|
if( print )
|
||||||
|
cout << "#GDA-optimizeValues: " << numParameter<<endl;
|
||||||
|
|
||||||
|
|
||||||
|
defaultTemperatur=1e100;
|
||||||
|
|
||||||
|
for(int i=0;i<=numParameter;i++)
|
||||||
|
{
|
||||||
|
StatVar end,laufzeit,init;
|
||||||
|
defaultAlpha = pow(pow(200,1.0/numParameter),i)*0.002;
|
||||||
|
solveProblem(0,p,proParameter,optimierungsschritte,GDA_OPT,now,end,
|
||||||
|
laufzeit,init);
|
||||||
|
if( best>now )
|
||||||
|
{
|
||||||
|
best=now;
|
||||||
|
bestPar=defaultAlpha;
|
||||||
|
}
|
||||||
|
if( print )
|
||||||
|
{
|
||||||
|
cout << defaultAlpha <<" ";
|
||||||
|
cout << end.getMean() << " " << end.quantil(0.2) << " "
|
||||||
|
<< end.quantil(0.79) << " " << laufzeit.getMean() << " "
|
||||||
|
<< end.quantil(0.0) << " " << end.getSigma() << " "
|
||||||
|
<< end.getSigmaSmaller()<< " "<< end.getSigmaBigger()<< endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( print )
|
||||||
|
cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit"
|
||||||
|
" Bester Sigma SigmaSmaller SigmaBigger\n";
|
||||||
|
defaultAlpha=0.03;
|
||||||
|
return bestPar;
|
||||||
|
}
|
||||||
|
return 1e100;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef GDAOPTIMIZATION
|
||||||
|
#define GDAOPTIMIZATION
|
||||||
|
#include "IterOptimization.h"
|
||||||
|
|
||||||
|
class GDAOptimization : public IterOptimization
|
||||||
|
{
|
||||||
|
|
||||||
|
private:
|
||||||
|
double temperatur;
|
||||||
|
double alpha;
|
||||||
|
short gdaEndFlag;
|
||||||
|
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void zInitialize();
|
||||||
|
|
||||||
|
|
||||||
|
virtual short accept(double delta);
|
||||||
|
|
||||||
|
|
||||||
|
virtual void abkuehlen();
|
||||||
|
|
||||||
|
|
||||||
|
virtual short end();
|
||||||
|
|
||||||
|
|
||||||
|
virtual void makeGraphOutput();
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
GDAOptimization(Problem &p,double temperatur,double alpha,
|
||||||
|
int maxIter=-1);
|
||||||
|
|
||||||
|
|
||||||
|
GDAOptimization(Problem &p,int maxIter=-1);
|
||||||
|
|
||||||
|
|
||||||
|
GDAOptimization(GDAOptimization &o);
|
||||||
|
|
||||||
|
|
||||||
|
static double optimizeValue(Problem &p,int proParameter,
|
||||||
|
int numParameter,int typ,int schritte= -1,int verbose=1);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static double defaultTemperatur;
|
||||||
|
static double defaultAlpha;
|
||||||
|
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
@ -0,0 +1,57 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "HCOptimization.h"
|
||||||
|
|
||||||
|
HCOptimization::HCOptimization(Problem &p,int m)
|
||||||
|
: IterOptimization(p,m)
|
||||||
|
{
|
||||||
|
if( maxStep<=0 )
|
||||||
|
maxStep=(int)(problem.expectedNumberOfIterations());
|
||||||
|
}
|
||||||
|
HCOptimization::HCOptimization(HCOptimization &o)
|
||||||
|
: IterOptimization(o)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
short HCOptimization::accept(double delta)
|
||||||
|
{
|
||||||
|
if( delta < 0 )
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
short HCOptimization::end()
|
||||||
|
{
|
||||||
|
return endFlag>0;
|
||||||
|
}
|
||||||
|
void HCOptimization::abkuehlen()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,54 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef HCOPTIMIZATION
|
||||||
|
#define HCOPTIMIZATION
|
||||||
|
#include "IterOptimization.h"
|
||||||
|
|
||||||
|
class HCOptimization : public IterOptimization
|
||||||
|
{
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual short accept(double delta);
|
||||||
|
|
||||||
|
|
||||||
|
virtual void abkuehlen();
|
||||||
|
|
||||||
|
|
||||||
|
virtual short end();
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
HCOptimization(Problem &p,int maxIter=-1);
|
||||||
|
|
||||||
|
|
||||||
|
HCOptimization(HCOptimization &o);
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
#endif
|
@ -0,0 +1,199 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "IterOptimization.h"
|
||||||
|
#include "ProblemTest.h"
|
||||||
|
|
||||||
|
ostream *GraphOutput;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
IterOptimization::IterOptimization(Problem& p,int m)
|
||||||
|
: maxNonBetterIterations(0),problem(p),maxStep(m),initialisiert(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
IterOptimization::IterOptimization(IterOptimization& o) : Optimization(),problem(o.problem)
|
||||||
|
{
|
||||||
|
maxNonBetterIterations=o.maxNonBetterIterations;
|
||||||
|
curValue = o.curValue;
|
||||||
|
bestStep = o.bestStep;
|
||||||
|
bestValue = o.bestValue;
|
||||||
|
maxStep = o.maxStep;
|
||||||
|
initialisiert = o.initialisiert;
|
||||||
|
endFlag = o.endFlag;
|
||||||
|
endFlag2 = o.endFlag2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double IterOptimization::minimize(int steps)
|
||||||
|
{
|
||||||
|
if( !initialisiert )
|
||||||
|
zInitialize();
|
||||||
|
|
||||||
|
if( steps==0 )
|
||||||
|
return curValue;
|
||||||
|
|
||||||
|
int t=0;
|
||||||
|
int every=(steps<0)?10000:(steps/1000+1);
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
curStep++;
|
||||||
|
t++;
|
||||||
|
if(verboseMode&&(curStep%1000==0))
|
||||||
|
{
|
||||||
|
if(steps>0)
|
||||||
|
cout << "Processed: " << 100.0*(curStep/(double)max(maxStep,1)) << " percent. (IterOptimization run) "
|
||||||
|
<< curValue << " max:" << maxStep << " " << steps << " \r";
|
||||||
|
else
|
||||||
|
cout << "In step:" << curStep << " currentValue: " << curValue
|
||||||
|
<< " bestValue: " << bestValue-curValue << " " << curStep-bestStep << ". \r";
|
||||||
|
cout.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ProblemChange *change= &(problem.change());
|
||||||
|
|
||||||
|
|
||||||
|
double delta=problem.valueChange(*change);
|
||||||
|
|
||||||
|
|
||||||
|
abkuehlen();
|
||||||
|
|
||||||
|
|
||||||
|
if( accept(delta) )
|
||||||
|
{
|
||||||
|
|
||||||
|
problem.doChange(*change);
|
||||||
|
|
||||||
|
|
||||||
|
curValue+=delta;
|
||||||
|
|
||||||
|
|
||||||
|
if( curValue<bestValue-1e-10 )
|
||||||
|
{
|
||||||
|
bestValue=curValue;
|
||||||
|
bestStep=curStep;
|
||||||
|
endFlag2=endFlag=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( verboseMode>1 )
|
||||||
|
cout<<"in step: "<<curStep<<" accepted with : "<<delta<<endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(curStep - bestStep>maxNonBetterIterations && maxNonBetterIterations>0)
|
||||||
|
endFlag=1;
|
||||||
|
if(curStep - bestStep>2*maxNonBetterIterations && maxNonBetterIterations>0)
|
||||||
|
endFlag2=1;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if( GraphOutput&&((curStep%every)==0) )
|
||||||
|
{
|
||||||
|
makeGraphOutput();
|
||||||
|
*GraphOutput<<" "<<delta<<endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete change;
|
||||||
|
} while( t!=steps && (!end()) && (!problem.endCriterion()) );
|
||||||
|
|
||||||
|
if( GraphOutput)
|
||||||
|
{
|
||||||
|
makeGraphOutput();
|
||||||
|
*GraphOutput<<endl;
|
||||||
|
}
|
||||||
|
return curValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void IterOptimization::zInitialize()
|
||||||
|
{
|
||||||
|
initialisiert=1;
|
||||||
|
bestValue=curValue=problem.value();
|
||||||
|
maxNonBetterIterations=problem.maxNonBetterIterations();
|
||||||
|
bestStep=curStep=0;
|
||||||
|
endFlag2=endFlag=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void IterOptimization::makeGraphOutput()
|
||||||
|
{
|
||||||
|
|
||||||
|
*GraphOutput << curStep << " " <<curValue << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double IterOptimizationOptimizeParameter(Problem &p,
|
||||||
|
double ¶meter,double min,double max,
|
||||||
|
int nRun,int nPar,int verfahren,
|
||||||
|
double &bv)
|
||||||
|
{
|
||||||
|
if( nPar<=0 )
|
||||||
|
return (max+min)/2;
|
||||||
|
|
||||||
|
StatVar end1,time1,init1;
|
||||||
|
StatVar end2,time2,init2;
|
||||||
|
double mean1,mean2;
|
||||||
|
double par1,par2;
|
||||||
|
|
||||||
|
parameter = par1 = min + (max-min)/3;
|
||||||
|
solveProblem(0,p,nRun,-1,verfahren,mean1,end1,time1,init1);
|
||||||
|
cout << parameter << " " << mean1 << " " << end1.quantil(0.0) << " " << end1.quantil(1.0) << endl;
|
||||||
|
|
||||||
|
parameter = par2 = min + 2*(max-min)/3;
|
||||||
|
solveProblem(0,p,nRun,-1,verfahren,mean2,end2,time2,init2);
|
||||||
|
cout << parameter << " " << mean2 << " " << end2.quantil(0.0) << " " << end2.quantil(1.0) << endl;
|
||||||
|
|
||||||
|
double bestPar,bestVal;
|
||||||
|
if(mean1<mean2)
|
||||||
|
{
|
||||||
|
bestVal = mean1;
|
||||||
|
bestPar=IterOptimizationOptimizeParameter(p,parameter,min,min+2*(max-min)/3,nRun,nPar-2,verfahren,bestVal);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bestVal = mean2;
|
||||||
|
bestPar=IterOptimizationOptimizeParameter(p,parameter,min+(max-min)/3,max,nRun,nPar-2,verfahren,bestVal);
|
||||||
|
}
|
||||||
|
if( mean1<bestVal&&mean1<=mean2 )
|
||||||
|
{
|
||||||
|
bv = mean1;
|
||||||
|
return par1;
|
||||||
|
}
|
||||||
|
else if(mean2<bestVal && mean2<=mean1)
|
||||||
|
{
|
||||||
|
bv = mean2;
|
||||||
|
return par2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bv = bestVal;
|
||||||
|
return bestPar;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,123 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef ITEROPTIMIZATION
|
||||||
|
#define ITEROPTIMIZATION
|
||||||
|
|
||||||
|
#include "Optimization.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define ANZ_VERSCHLECHTERUNGEN 500
|
||||||
|
|
||||||
|
extern ostream *GraphOutput;
|
||||||
|
|
||||||
|
|
||||||
|
class IterOptimization : public Optimization
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
int maxNonBetterIterations;
|
||||||
|
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Problem &problem;
|
||||||
|
int curStep;
|
||||||
|
double curValue;
|
||||||
|
int bestStep;
|
||||||
|
double bestValue;
|
||||||
|
int maxStep;
|
||||||
|
int initialisiert;
|
||||||
|
short endFlag;
|
||||||
|
short endFlag2;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
virtual void makeGraphOutput();
|
||||||
|
|
||||||
|
|
||||||
|
virtual short end()=0;
|
||||||
|
|
||||||
|
|
||||||
|
virtual void abkuehlen()=0;
|
||||||
|
|
||||||
|
|
||||||
|
virtual short accept(double delta)=0;
|
||||||
|
|
||||||
|
|
||||||
|
virtual void zInitialize();
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
IterOptimization(Problem &p,int maxIter=-1);
|
||||||
|
|
||||||
|
|
||||||
|
IterOptimization(IterOptimization &o);
|
||||||
|
|
||||||
|
|
||||||
|
virtual double minimize(int steps=-1);
|
||||||
|
|
||||||
|
|
||||||
|
inline int getCurStep();
|
||||||
|
|
||||||
|
|
||||||
|
inline double getCurrentValue();
|
||||||
|
|
||||||
|
|
||||||
|
inline const Problem& getProblem();
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
double IterOptimizationOptimizeParameter(Problem &p,
|
||||||
|
double ¶meter,double min,double max,
|
||||||
|
int nRun,int nPar,int verfahren,double &bv);
|
||||||
|
|
||||||
|
inline int IterOptimization::getCurStep()
|
||||||
|
{
|
||||||
|
return curStep;
|
||||||
|
};
|
||||||
|
inline double IterOptimization::getCurrentValue()
|
||||||
|
{
|
||||||
|
return curValue;
|
||||||
|
};
|
||||||
|
inline const Problem& IterOptimization::getProblem()
|
||||||
|
{
|
||||||
|
return problem;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,439 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef KATEG_OPT_H
|
||||||
|
#define KATEG_OPT_H
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "Problem.h"
|
||||||
|
|
||||||
|
extern double rhoLo;
|
||||||
|
|
||||||
|
typedef int Kategory;
|
||||||
|
typedef int Word;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef FREQTYPE_DOUBLE
|
||||||
|
typedef double FreqType;
|
||||||
|
#else
|
||||||
|
typedef int FreqType;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include "KategProblemWBC.h"
|
||||||
|
|
||||||
|
|
||||||
|
#include "KategProblemKBC.h"
|
||||||
|
|
||||||
|
|
||||||
|
enum {
|
||||||
|
INIT_RAN=1,
|
||||||
|
INIT_AIO=2,
|
||||||
|
INIT_LWRW=3,
|
||||||
|
INIT_FREQ=4,
|
||||||
|
INIT_OTHER=5
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
enum {
|
||||||
|
W_RAN=(8|16),
|
||||||
|
W_DET_DECR=(16),
|
||||||
|
W_DET_INCR =(32)
|
||||||
|
};
|
||||||
|
#define CHOOSE_WORD (8|16|32)
|
||||||
|
|
||||||
|
|
||||||
|
enum {
|
||||||
|
K_DET=(64),
|
||||||
|
K_RAN=(128),
|
||||||
|
K_BEST=(64|128)
|
||||||
|
};
|
||||||
|
#define CHOOSE_KAT (64|128)
|
||||||
|
|
||||||
|
|
||||||
|
enum {
|
||||||
|
CRITERION_ML=0,
|
||||||
|
CRITERION_LO=1,
|
||||||
|
CRITERION_MY=2
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NWG
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
Array<FreqType> freq;
|
||||||
|
|
||||||
|
Array<int> timeOfFreq;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int curTime;
|
||||||
|
public:
|
||||||
|
NWG(int n);
|
||||||
|
void init();
|
||||||
|
|
||||||
|
int anzNot0;
|
||||||
|
|
||||||
|
|
||||||
|
Array<int> not0;
|
||||||
|
|
||||||
|
int word;
|
||||||
|
|
||||||
|
inline void addFreq(int C,FreqType n);
|
||||||
|
|
||||||
|
void sort();
|
||||||
|
|
||||||
|
FreqType getFreq(int i)
|
||||||
|
{
|
||||||
|
if( timeOfFreq[i]==curTime )
|
||||||
|
return freq[i];
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
inline void NWG::addFreq(int g,FreqType n)
|
||||||
|
{
|
||||||
|
if(timeOfFreq[g]==curTime)
|
||||||
|
freq[g]+=n;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
timeOfFreq[g]=curTime;
|
||||||
|
freq[g]=n;
|
||||||
|
not0[anzNot0++]=g;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
struct KategProblemChange : public ProblemChange
|
||||||
|
{
|
||||||
|
void *operator new(size_t size);
|
||||||
|
void operator delete(void *ptr,size_t size);
|
||||||
|
|
||||||
|
int word;
|
||||||
|
int toKat;
|
||||||
|
int fromKat;
|
||||||
|
};
|
||||||
|
|
||||||
|
class KategProblem : public Problem
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
double kat_h_full(int n);
|
||||||
|
double kat_h_full(double n);
|
||||||
|
double kat_h_part(int n);
|
||||||
|
double kat_h_part(double n);
|
||||||
|
double sigmaVerfaelschung;
|
||||||
|
short katWasEmpty;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int nwgWord;
|
||||||
|
|
||||||
|
NWG nwg;
|
||||||
|
NWG ngw;
|
||||||
|
FreqType nww;
|
||||||
|
|
||||||
|
int ursprung,ziel;
|
||||||
|
|
||||||
|
Array<int> _katOfWord;
|
||||||
|
|
||||||
|
int _maxComp,_maxCompVal;
|
||||||
|
|
||||||
|
double nmo_my(int i,int j);
|
||||||
|
double nmo(int i,int j);
|
||||||
|
|
||||||
|
|
||||||
|
double nmo_lo(int i,int j,int &e0,int &e1);
|
||||||
|
|
||||||
|
|
||||||
|
void putWord(int word,int to);
|
||||||
|
|
||||||
|
|
||||||
|
void fastPutWord(int word,int to);
|
||||||
|
|
||||||
|
|
||||||
|
void setKatOfWord(int w,int k)
|
||||||
|
{
|
||||||
|
if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) )
|
||||||
|
{
|
||||||
|
cout << "mkcls::setKatOfWord::ERROR: " << w << " " << k << " " << wordFreq.fixedWord[w] << " " << (*words)[w] << endl;
|
||||||
|
}
|
||||||
|
_katOfWord[w]=k;
|
||||||
|
nwgWord=-1;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void fillNWG(int w);
|
||||||
|
|
||||||
|
|
||||||
|
inline FreqType nstrich(int i,int j);
|
||||||
|
|
||||||
|
|
||||||
|
void vnstrich(int i,int j);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual int _change(ProblemChange **p);
|
||||||
|
|
||||||
|
|
||||||
|
virtual void _doChange(ProblemChange &c);
|
||||||
|
|
||||||
|
|
||||||
|
virtual void _undoChange(ProblemChange &c);
|
||||||
|
|
||||||
|
|
||||||
|
virtual double _value();
|
||||||
|
|
||||||
|
|
||||||
|
double _valueChange(KategProblemChange &k);
|
||||||
|
|
||||||
|
|
||||||
|
virtual void incrementDirection();
|
||||||
|
|
||||||
|
|
||||||
|
virtual int maxDimensionVal(void) ;
|
||||||
|
|
||||||
|
|
||||||
|
virtual int maxDimension(void) ;
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
leda_array<string> *words;
|
||||||
|
typedef leda_set<int> intSet;
|
||||||
|
|
||||||
|
leda_array<intSet> *kats;
|
||||||
|
|
||||||
|
KategProblemWBC wordFreq;
|
||||||
|
KategProblemKBC katFreq;
|
||||||
|
|
||||||
|
Array<int> initLike;
|
||||||
|
|
||||||
|
KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
|
||||||
|
int _nachbarschaft,int minw=0);
|
||||||
|
|
||||||
|
|
||||||
|
virtual ~KategProblem();
|
||||||
|
|
||||||
|
|
||||||
|
virtual void _initialize(int initTyp);
|
||||||
|
virtual void _initialize(int initTyp,int specialFixedWord);
|
||||||
|
|
||||||
|
|
||||||
|
virtual double valueChange(ProblemChange&c);
|
||||||
|
|
||||||
|
|
||||||
|
virtual Problem *makeEqualProblem();
|
||||||
|
|
||||||
|
|
||||||
|
virtual double nicevalue(double value=1e100);
|
||||||
|
|
||||||
|
|
||||||
|
void makeKats();
|
||||||
|
|
||||||
|
|
||||||
|
virtual void dumpOn(ostream &strm);
|
||||||
|
|
||||||
|
|
||||||
|
virtual void dumpInfos(ostream &strm);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inline void katwahl(int k);
|
||||||
|
|
||||||
|
|
||||||
|
inline void wortwahl(int w);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inline int katOfWord(int w);
|
||||||
|
|
||||||
|
|
||||||
|
inline short wortwahl();
|
||||||
|
|
||||||
|
|
||||||
|
inline short katwahl() ;
|
||||||
|
|
||||||
|
|
||||||
|
virtual int maxNonBetterIterations();
|
||||||
|
|
||||||
|
|
||||||
|
virtual int expectedNumberOfIterations();
|
||||||
|
|
||||||
|
|
||||||
|
const char *getString(int i);
|
||||||
|
string getTheString(int i);
|
||||||
|
|
||||||
|
|
||||||
|
void makeTitle(char x[512]);
|
||||||
|
|
||||||
|
|
||||||
|
void fixInitLike();
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
inline int KategProblem::katOfWord(int w){return _katOfWord[w];};
|
||||||
|
inline short KategProblem::wortwahl(){return nachbarschaft&CHOOSE_WORD;};
|
||||||
|
inline short KategProblem::katwahl() {return nachbarschaft&CHOOSE_KAT;};
|
||||||
|
|
||||||
|
inline void KategProblem::katwahl(int k)
|
||||||
|
{
|
||||||
|
nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k;
|
||||||
|
if(k==K_BEST)
|
||||||
|
_maxCompVal=1;
|
||||||
|
else
|
||||||
|
_maxCompVal=katFreq.nKats-2;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline void KategProblem::wortwahl(int w)
|
||||||
|
{
|
||||||
|
nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inline FreqType KategProblem::nstrich(int i,int j)
|
||||||
|
{
|
||||||
|
FreqType n=0;
|
||||||
|
|
||||||
|
if( i==ursprung )
|
||||||
|
n-=nwg.getFreq(j);
|
||||||
|
if( i==ziel )
|
||||||
|
n+=nwg.getFreq(j);
|
||||||
|
|
||||||
|
if( j==ursprung )
|
||||||
|
n-=ngw.getFreq(i);
|
||||||
|
if( j==ziel )
|
||||||
|
n+=ngw.getFreq(i);
|
||||||
|
|
||||||
|
if( i==ursprung && j==ursprung )
|
||||||
|
n+=nww;
|
||||||
|
if( i==ziel && j==ziel )
|
||||||
|
n+=nww;
|
||||||
|
|
||||||
|
if( i==ursprung && j==ziel )
|
||||||
|
n-=nww;
|
||||||
|
if( i==ziel && j==ursprung )
|
||||||
|
n-=nww;
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define MAX_H_TABLE 4000
|
||||||
|
extern double h_table[],l_table[],hmy_table[],hmy_sigma;
|
||||||
|
|
||||||
|
|
||||||
|
inline double kat_mlog(double x)
|
||||||
|
{
|
||||||
|
if(x<=1e-9)
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
return log(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline double kat_mlog(int s)
|
||||||
|
{
|
||||||
|
if(s<=0)
|
||||||
|
return 0;
|
||||||
|
else if( s<MAX_H_TABLE )
|
||||||
|
{
|
||||||
|
massert( s==0 || l_table[s]==log(s) );
|
||||||
|
return l_table[s];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return log((double)(s));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inline double kat_hlo(int n)
|
||||||
|
{
|
||||||
|
return n*kat_mlog(n-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double kat_hlo(double n)
|
||||||
|
{
|
||||||
|
return n*kat_mlog(n-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline double kat_h(int n)
|
||||||
|
{
|
||||||
|
massert(n>=-1);
|
||||||
|
if(n<=0)
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
if(n<MAX_H_TABLE)
|
||||||
|
{
|
||||||
|
massert(n==0||fabs(h_table[n]-n*log((double)n))<1e-8);
|
||||||
|
return h_table[n];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return n*log((double)(n));
|
||||||
|
}
|
||||||
|
inline double kat_h(double n)
|
||||||
|
{
|
||||||
|
if(n<=1e-9)
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
return n*log(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline double kat_etaFkt(int _e0,int e1,int immer0,int cats)
|
||||||
|
{
|
||||||
|
int e0 = _e0 - immer0;
|
||||||
|
int ePlus = cats*cats - _e0;
|
||||||
|
if(cats*cats-e0>1)
|
||||||
|
return e1*log( (ePlus-1.0)/(e0+1.0)*rhoLo );
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double mkat_h_full(int n,double tf);
|
||||||
|
double mkat_h_part(int n,double cf);
|
||||||
|
|
||||||
|
int Hash(const string& s);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,243 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include "KategProblem.h"
|
||||||
|
|
||||||
|
double rhoLo=0.75;
|
||||||
|
#define MAX_VERFAELSCHUNG 5000
|
||||||
|
double verfTab[MAX_VERFAELSCHUNG],verfTabSigma=-1.0;
|
||||||
|
double verfaelsche(int a,double b)
|
||||||
|
{
|
||||||
|
|
||||||
|
if( a>=0&&verfTabSigma==b&&a<MAX_VERFAELSCHUNG )
|
||||||
|
{
|
||||||
|
massert(verfTab[a]== b*(erf(10000.0) - erf(a/b))/2+a);
|
||||||
|
return verfTab[a];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
double x = b*(erf(10000.0) - erf(a/b))/2+a;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double verfaelsche(double,double b)
|
||||||
|
{
|
||||||
|
abort();
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
KategProblemKBC::KategProblemKBC(int s,double sv) :
|
||||||
|
_n(s),_n1(s,0),_n2(s,0),sigmaVerfaelschung(sv),withVerfaelschung(sv!=0.0),
|
||||||
|
_nverf(s),_n1verf(s,0.0),_n2verf(s,0.0),_nWords(0),
|
||||||
|
eta0(s*s),eta1(0),c1_0(s),c2_0(s),
|
||||||
|
_bigramVerfSum(0.0),_unigramVerfSum1(0.0),_unigramVerfSum2(0.0),nKats(s)
|
||||||
|
|
||||||
|
{
|
||||||
|
verfInit0=0.0;
|
||||||
|
int i;
|
||||||
|
if( withVerfaelschung )
|
||||||
|
{
|
||||||
|
verfInit0=verfaelsche(0,sv);
|
||||||
|
cout << "VERFAELSCHUNG wird mitgefuehrt => LANGSAMER!!!\n";
|
||||||
|
}
|
||||||
|
for(i=0;i<s;i++)
|
||||||
|
{
|
||||||
|
_n[i].init(s,0);
|
||||||
|
_nverf[i].init(s,verfInit0);
|
||||||
|
_n1verf[i]=_n2verf[i]=verfInit0;
|
||||||
|
_bigramVerfSum+=verfInit0*s;
|
||||||
|
_unigramVerfSum1+=verfInit0;
|
||||||
|
_unigramVerfSum2+=verfInit0;
|
||||||
|
}
|
||||||
|
if( withVerfaelschung )
|
||||||
|
{
|
||||||
|
cout << "VERFAELSCHUNG " << _bigramVerfSum << " " << _unigramVerfSum1 << " " << _unigramVerfSum2 << endl;
|
||||||
|
}
|
||||||
|
verfTabSigma=sigmaVerfaelschung;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void KategProblemKBC::setN(int w1,int w2, FreqType n)
|
||||||
|
|
||||||
|
{
|
||||||
|
addN(w1,w2,-_n[w1][w2]);
|
||||||
|
addN(w1,w2,n);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double KategProblemKBC::fullBewertung(int auswertung)
|
||||||
|
{
|
||||||
|
|
||||||
|
double bewertung=0;
|
||||||
|
int c1,c2;
|
||||||
|
|
||||||
|
|
||||||
|
switch( auswertung )
|
||||||
|
{
|
||||||
|
case CRITERION_ML:
|
||||||
|
for(c1=0;c1<nKats;c1++)
|
||||||
|
{
|
||||||
|
for(c2=0;c2<nKats;c2++)
|
||||||
|
bewertung-=kat_h(_n[c1][c2]);
|
||||||
|
bewertung+=kat_h(_n1[c1])+kat_h(_n2[c1]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CRITERION_MY:
|
||||||
|
{
|
||||||
|
for(c1=0;c1<nKats;c1++)
|
||||||
|
{
|
||||||
|
for(c2=0;c2<nKats;c2++)
|
||||||
|
bewertung-=mkat_h_full((int)n(c1,c2),nverf(c1,c2));
|
||||||
|
bewertung+=mkat_h_part((int)(n1(c1)),n1verf(c1))+mkat_h_part((int)(n2(c1)),n2verf(c1));
|
||||||
|
}
|
||||||
|
double u1=_unigramVerfSum1-verfInit0*c1_0;
|
||||||
|
double u2=_unigramVerfSum2-verfInit0*c2_0;
|
||||||
|
double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
|
||||||
|
if( verboseMode>1 )
|
||||||
|
{
|
||||||
|
cout << "CRITERION_MY: " << bewertung << endl;
|
||||||
|
cout << "U1:"<<_unigramVerfSum1 << " n:"<<u1<< " "
|
||||||
|
<< "U2:"<<_unigramVerfSum2 << " n:"<<u2<< " "
|
||||||
|
<< "U3:"<<_bigramVerfSum << " n:"<<b<< endl;
|
||||||
|
}
|
||||||
|
if(b>0.000001)
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
if(verboseMode>1 )
|
||||||
|
cout << " NEU: " <<_nWords*log( u1 * u2 / b ) << endl;
|
||||||
|
bewertung -= _nWords*log( u1 * u2 / b );
|
||||||
|
if(verboseMode>1)
|
||||||
|
cout << "SCHLUSSBEWERTUNG: " << bewertung << endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cout << "B zu klein " << b << endl;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CRITERION_LO:
|
||||||
|
for(c1=0;c1<nKats;c1++)
|
||||||
|
{
|
||||||
|
for(c2=0;c2<nKats;c2++)
|
||||||
|
bewertung-=_n[c1][c2]*kat_mlog(_n[c1][c2]-1-rhoLo);
|
||||||
|
bewertung+=_n1[c1]*kat_mlog(_n1[c1]-1)+_n2[c1]*kat_mlog(_n2[c1]-1);
|
||||||
|
}
|
||||||
|
bewertung-=kat_etaFkt(eta0,eta1,(c1_0*nKats+c2_0*nKats-c1_0*c2_0),nKats);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
cerr << "Error: wrong criterion " << auswertung << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
return bewertung;
|
||||||
|
}
|
||||||
|
|
||||||
|
double KategProblemKBC::myCriterionTerm()
|
||||||
|
{
|
||||||
|
iassert( withVerfaelschung );
|
||||||
|
double r;
|
||||||
|
double u1=_unigramVerfSum1-verfInit0*c1_0;
|
||||||
|
double u2=_unigramVerfSum2-verfInit0*c2_0;
|
||||||
|
double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
|
||||||
|
|
||||||
|
|
||||||
|
if( verboseMode>1 )
|
||||||
|
{
|
||||||
|
cout << "nwords divisor:"<<_nWords << " " << u1 * u2 / b << endl;
|
||||||
|
cout << "ergebnis: "<<_nWords*log( u1 * u2 / b ) << endl;
|
||||||
|
cout << "0: "<<c1_0 << endl;
|
||||||
|
}
|
||||||
|
r = _nWords*log( u1 * u2 / b );
|
||||||
|
|
||||||
|
return -r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double KategProblemKBC::bigramVerfSum()
|
||||||
|
{
|
||||||
|
double sum=0;
|
||||||
|
for(int c1=0;c1<nKats;c1++)
|
||||||
|
for(int c2=0;c2<nKats;c2++)
|
||||||
|
sum+=nverf(c1,c2);
|
||||||
|
cout << "BIGRAMVERFSUM: " << sum << endl;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
double KategProblemKBC::unigramVerfSum1()
|
||||||
|
{
|
||||||
|
double sum=0;
|
||||||
|
for(int c1=0;c1<nKats;c1++)
|
||||||
|
sum+=n1verf(c1);
|
||||||
|
cout << "UNIGRAMVERFSUM1: " << sum << endl;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
double KategProblemKBC::unigramVerfSum2()
|
||||||
|
{
|
||||||
|
double sum=0;
|
||||||
|
for(int c1=0;c1<nKats;c1++)
|
||||||
|
sum+=n2verf(c1);
|
||||||
|
cout << "UNIGRAMVERFSUM2: " << sum << endl;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,157 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef KATEGPROBLEMKBC_H
|
||||||
|
#define KATEGPROBLEMKBC_H
|
||||||
|
|
||||||
|
typedef Array<FreqType> FreqArray;
|
||||||
|
typedef Array<double> FreqArrayReal;
|
||||||
|
|
||||||
|
|
||||||
|
double verfaelsche(int a,double b);
|
||||||
|
double verfaelsche(double a,double b);
|
||||||
|
|
||||||
|
class KategProblemKBC
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
friend class KategProblem;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Array<FreqArray> _n;
|
||||||
|
Array<FreqType> _n1;
|
||||||
|
|
||||||
|
Array<FreqType> _n2;
|
||||||
|
|
||||||
|
|
||||||
|
double sigmaVerfaelschung;
|
||||||
|
short withVerfaelschung;
|
||||||
|
|
||||||
|
Array<FreqArrayReal> _nverf;
|
||||||
|
Array<double> _n1verf;
|
||||||
|
Array<double> _n2verf;
|
||||||
|
FreqType _nWords;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int eta0;
|
||||||
|
int eta1;
|
||||||
|
int c1_0;
|
||||||
|
int c2_0;
|
||||||
|
double _bigramVerfSum;
|
||||||
|
double _unigramVerfSum1;
|
||||||
|
double _unigramVerfSum2;
|
||||||
|
double verfInit0;
|
||||||
|
|
||||||
|
public:
|
||||||
|
int nKats;
|
||||||
|
|
||||||
|
KategProblemKBC(int nKats,double sv);
|
||||||
|
|
||||||
|
|
||||||
|
double fullBewertung(int auswertung);
|
||||||
|
|
||||||
|
|
||||||
|
FreqType n(int w1,int w2) { return _n[w1][w2]; };
|
||||||
|
|
||||||
|
|
||||||
|
FreqType n1(int w) { return _n1[w];};
|
||||||
|
|
||||||
|
|
||||||
|
FreqType n2(int w) { return _n2[w];};
|
||||||
|
|
||||||
|
|
||||||
|
double bigramVerfSum();
|
||||||
|
double unigramVerfSum1();
|
||||||
|
double unigramVerfSum2();
|
||||||
|
|
||||||
|
double nverf(int w1,int w2) { return _nverf[w1][w2]; }
|
||||||
|
|
||||||
|
double n1verf(int w) { return _n1verf[w]; };
|
||||||
|
|
||||||
|
double n2verf(int w) { return _n2verf[w]; };
|
||||||
|
|
||||||
|
inline void addN(int w1,int w2, FreqType n);
|
||||||
|
|
||||||
|
|
||||||
|
void setN(int w1,int w2, FreqType n);
|
||||||
|
|
||||||
|
|
||||||
|
double myCriterionTerm();
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
inline void KategProblemKBC::addN(int w1,int w2, FreqType n)
|
||||||
|
{
|
||||||
|
if(n!=0)
|
||||||
|
{
|
||||||
|
FreqType &s= _n[w1][w2];
|
||||||
|
if(s==0)
|
||||||
|
eta0--;
|
||||||
|
else if(s==1)
|
||||||
|
eta1--;
|
||||||
|
if(_n1[w1]==0)
|
||||||
|
c1_0--;
|
||||||
|
if(_n2[w2]==0)
|
||||||
|
c2_0--;
|
||||||
|
|
||||||
|
if(withVerfaelschung)
|
||||||
|
{
|
||||||
|
double verfOld=verfaelsche(s,sigmaVerfaelschung);
|
||||||
|
double verfNew=verfaelsche(s+n,sigmaVerfaelschung);
|
||||||
|
double verfOld1=verfaelsche(_n1[w1],sigmaVerfaelschung);
|
||||||
|
assert(verfOld1==_n1verf[w1]);
|
||||||
|
double verfNew1=verfaelsche(_n1[w1]+n,sigmaVerfaelschung);
|
||||||
|
double verfOld2=verfaelsche(_n2[w2],sigmaVerfaelschung);
|
||||||
|
assert(verfOld2==_n2verf[w2]);
|
||||||
|
double verfNew2=verfaelsche(_n2[w2]+n,sigmaVerfaelschung);
|
||||||
|
_n1verf[w1]=verfNew1;
|
||||||
|
_unigramVerfSum1+=verfNew1-verfOld1;
|
||||||
|
_n2verf[w2]=verfNew2;
|
||||||
|
_unigramVerfSum2+=verfNew2-verfOld2;
|
||||||
|
_nverf[w1][w2]=verfNew;
|
||||||
|
_bigramVerfSum+=verfNew-verfOld;
|
||||||
|
_nWords+=n;
|
||||||
|
}
|
||||||
|
s+=n;_n1[w1]+=n;_n2[w2]+=n;
|
||||||
|
|
||||||
|
assert(_n[w1][w2]>=0);
|
||||||
|
assert(_n1[w1]>=0);
|
||||||
|
assert(_n2[w2]>=0);
|
||||||
|
|
||||||
|
if(s==0)
|
||||||
|
eta0++;
|
||||||
|
else if(s==1)
|
||||||
|
eta1++;
|
||||||
|
if(_n1[w1]==0)
|
||||||
|
c1_0++;
|
||||||
|
if(_n2[w2]==0)
|
||||||
|
c2_0++;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
@ -0,0 +1,700 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||||
|
|
||||||
|
mkcls - a program for making word classes .
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; either version 2
|
||||||
|
of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
USA.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "KategProblemTest.h"
|
||||||
|
|
||||||
|
#include "ProblemTest.h"
|
||||||
|
#include "HCOptimization.h"
|
||||||
|
#include "TAOptimization.h"
|
||||||
|
#include "RRTOptimization.h"
|
||||||
|
#include "GDAOptimization.h"
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
#include <strstream>
|
||||||
|
|
||||||
|
typedef pair<string,string> PSS;
|
||||||
|
|
||||||
|
#define NEW_SENTENCE_END "mkcls-mapped-dollar-symbol-$"
|
||||||
|
|
||||||
|
#ifdef NeXT
|
||||||
|
char *strdup(char *a)
|
||||||
|
{
|
||||||
|
char *p = (char *)malloc(strlen(a)+1);
|
||||||
|
strcpy(p,a);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void writeClasses(Array<Kategory> &katOfWord,KategProblem &problem,ostream &to)
|
||||||
|
{
|
||||||
|
for(int i=0;i<katOfWord.size();i++)
|
||||||
|
{
|
||||||
|
if( strcmp(problem.getString(i),"$") )
|
||||||
|
if( strcmp(problem.getString(i),"mkcls-mapped-dollar-symbol-$")==0 )
|
||||||
|
to << "$" << "\t" << katOfWord[i] << endl;
|
||||||
|
else
|
||||||
|
to << problem.getString(i) << "\t" << katOfWord[i] << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void mysplit(const string &s,string &s1,string &s2)
|
||||||
|
{
|
||||||
|
unsigned int i=0;
|
||||||
|
for(;i<s.length();i++)if( s[i]==' ' || s[i]=='\t' || s[i]==' ')break;
|
||||||
|
s1=s.substr(0,i);
|
||||||
|
for(;i<s.length();i++)if( !(s[i]==' ' || s[i]=='\t' || s[i]==' ') )break;
|
||||||
|
s2=s.substr(i,s.length()-i);
|
||||||
|
|
||||||
|
iassert(s1.size());
|
||||||
|
iassert(s2.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int fromCatFile(KategProblem *p,const char *fname,bool verb)
|
||||||
|
{
|
||||||
|
leda_h_array<string,int> translation(-1);
|
||||||
|
int maxCat=2;
|
||||||
|
ifstream in(fname);
|
||||||
|
if(!in)
|
||||||
|
{
|
||||||
|
cerr << "Error: File '" << fname << "' cannot be opened.\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
for(int i=0;i<p->wordFreq.nWords;i++)
|
||||||
|
(p->initLike)[i]= -1;
|
||||||
|
|
||||||
|
|
||||||
|
translation["1"]=1;
|
||||||
|
translation["0"]=0;
|
||||||
|
|
||||||
|
|
||||||
|
string s;
|
||||||
|
while( getline(in,s) )
|
||||||
|
{
|
||||||
|
string str,categ;
|
||||||
|
mysplit(s,str,categ);
|
||||||
|
int i=p->words->binary_locate(str);
|
||||||
|
if(i>=0 && (*(p->words))[i]==str )
|
||||||
|
{
|
||||||
|
|
||||||
|
if( translation[categ]==-1 )
|
||||||
|
translation[categ]=maxCat++;
|
||||||
|
int cat=translation[categ];
|
||||||
|
if( (p->initLike)[i]!= -1 )
|
||||||
|
cerr << "Warning: Word '" << ((*(p->words))[i])<< "' is already in a category.\n";
|
||||||
|
(p->initLike)[i]=cat;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cerr << "Warning: Word '" << str << "' " << i << " is not in training corpus.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
if( verboseMode )
|
||||||
|
cout << "We have " << maxCat << " read non-empty categories"
|
||||||
|
" (with words from the corpus).\n";
|
||||||
|
|
||||||
|
if(maxCat>p->katFreq.nKats)
|
||||||
|
{
|
||||||
|
cerr << "Error: Not enough categories reserved (only "
|
||||||
|
<< p->katFreq.nKats << ", but i need " << maxCat << ").\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int i=p->words->binary_locate("$");
|
||||||
|
if( i>=0 && (*(p->words))[i]=="$" )
|
||||||
|
(p->initLike)[i]=0;
|
||||||
|
else
|
||||||
|
if( verboseMode )
|
||||||
|
cerr << "Warning: No '$' in vocabulary!\n";
|
||||||
|
|
||||||
|
|
||||||
|
int errors=0;
|
||||||
|
for(i=0;i<p->wordFreq.nWords;i++)
|
||||||
|
if((p->initLike)[i]== -1 )
|
||||||
|
{
|
||||||
|
if( verb ) cerr << "Error: I don't know the category of word " << i
|
||||||
|
<< " (" << (*(p->words))[i] << ") " << ".\n";
|
||||||
|
errors=1;
|
||||||
|
}
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
KategProblem *makeKategProblem(const leda_h_array<PSS,FreqType>&cTbl,const leda_set<string>&setVokabular, int maxClass,int initialisierung,
|
||||||
|
int auswertung,int nachbarschaft,int minWordFrequency)
|
||||||
|
{
|
||||||
|
|
||||||
|
int nwrd=0;
|
||||||
|
leda_array<string>&sVok = *new leda_array<string>(setVokabular.size());
|
||||||
|
string s;
|
||||||
|
unsigned int ctr=0;
|
||||||
|
forall_set(leda_set<string>,s,setVokabular)
|
||||||
|
{
|
||||||
|
if( verboseMode>2 )
|
||||||
|
cout << "mkcls:Wort " << ctr << " " << s << endl;
|
||||||
|
sVok[ctr++]=s;
|
||||||
|
}
|
||||||
|
for(unsigned int z=0;z<ctr-1;z++)
|
||||||
|
iassert( sVok[z]<sVok[z+1] );
|
||||||
|
sVok.sort();
|
||||||
|
|
||||||
|
if( verboseMode>2 )
|
||||||
|
cout << "*****Vocabulary: " << sVok;
|
||||||
|
|
||||||
|
unsigned int vokSize=sVok.size();
|
||||||
|
massert(vokSize==ctr); massert(vokSize==setVokabular.size());
|
||||||
|
if(verboseMode)
|
||||||
|
{cout << "Size of vocabulary: " << vokSize << "\n";cout.flush();}
|
||||||
|
|
||||||
|
KategProblem *k = new KategProblem(vokSize,maxClass,initialisierung,
|
||||||
|
auswertung,nachbarschaft,minWordFrequency);
|
||||||
|
KategProblemWBC &w=k->wordFreq;
|
||||||
|
k->words=&sVok;
|
||||||
|
|
||||||
|
Array<int> after(vokSize,0);
|
||||||
|
Array<int> before(vokSize,0);
|
||||||
|
|
||||||
|
|
||||||
|
nwrd=0;
|
||||||
|
{
|
||||||
|
PSS s;
|
||||||
|
forall_defined_h2(PSS,FreqType,s,cTbl)
|
||||||
|
{
|
||||||
|
const string&ss1=s.first;
|
||||||
|
const string&ss2=s.second;
|
||||||
|
if( ss2.length()&&(ss1!="$" || ss2!="$") )
|
||||||
|
{
|
||||||
|
int i1=sVok.binary_search(ss1);
|
||||||
|
int i2=sVok.binary_search(ss2);
|
||||||
|
iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
|
||||||
|
after[i1]++;
|
||||||
|
before[i2]++;
|
||||||
|
}
|
||||||
|
if( verboseMode&&((nwrd++)%10000==0) )
|
||||||
|
{cout<<"Statistiken-1 " << nwrd<< ". \r";cout.flush();}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(unsigned int i=0;i<vokSize;i++)
|
||||||
|
{
|
||||||
|
w.setAfterWords(i,after[i]);
|
||||||
|
w.setBeforeWords(i,before[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
nwrd=0;
|
||||||
|
PSS s;
|
||||||
|
forall_defined_h2(PSS,FreqType,s,cTbl)
|
||||||
|
{
|
||||||
|
const string&ss1=s.first;
|
||||||
|
const string&ss2=s.second;
|
||||||
|
FreqType p=cTbl[s];
|
||||||
|
if( ss2.length()&&(ss1!="$" || ss2!="$") )
|
||||||
|
{
|
||||||
|
int i1=sVok.binary_search(ss1);
|
||||||
|
int i2=sVok.binary_search(ss2);
|
||||||
|
iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
|
||||||
|
w.setFreq(i1,i2,p);
|
||||||
|
if( verboseMode>2 )
|
||||||
|
cout << "BIGRAMM-HAEUF: " << ss1 << ":" << i1 << " "
|
||||||
|
<< ss2 << ":" << i2 << " " << p << endl;
|
||||||
|
}
|
||||||
|
if( verboseMode&&((nwrd++)%10000==0) )
|
||||||
|
{cout<<"Statistiken-2 " <<nwrd<< ". \r";cout.flush();}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
w.testFull();
|
||||||
|
if(verboseMode){cout << "Datenintegritaet getestet.\n";cout.flush();}
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
|
||||||
|
KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
|
||||||
|
int auswertung,int nachbarschaft,int minWordFrequency)
|
||||||
|
{
|
||||||
|
ifstream file(str);
|
||||||
|
if(!file)return 0;
|
||||||
|
leda_set<string> setVokabular;
|
||||||
|
leda_h_array<PSS,FreqType> cTbl;
|
||||||
|
double c=0;
|
||||||
|
if( verboseMode )cout << "NGRFILE: " << str << endl;
|
||||||
|
string s1,s2;
|
||||||
|
while(file >> c >> s1 >> s2)
|
||||||
|
{
|
||||||
|
if( s1.length()==0||s2.length()==0 )
|
||||||
|
{
|
||||||
|
cerr << "ERROR: strings are zero: " << s1.length() <<" " << s1 <<" " << s2.length()<<" " << s2 << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if( c==0 )
|
||||||
|
{
|
||||||
|
cerr << "Count ist 0 " << s1 << " " << s2 << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
cTbl[pair<string,string>(s1,s2)]=(FreqType)c;
|
||||||
|
setVokabular.insert(s1);
|
||||||
|
setVokabular.insert(s2);
|
||||||
|
if( verboseMode>1 )
|
||||||
|
cout << "R: " << s1 << " " << s2 << " " << c << endl;
|
||||||
|
c=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
KategProblem *fromKModel(const char *str,int maxClass,int initialisierung,
|
||||||
|
int auswertung,int nachbarschaft,int minWordFrequency)
|
||||||
|
{
|
||||||
|
string oldText,text,line;
|
||||||
|
ifstream f(str);
|
||||||
|
if( !f )
|
||||||
|
{
|
||||||
|
cerr << "ERROR: can not open file " << str << ".\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
leda_set<string> setVokabular;
|
||||||
|
leda_h_array<PSS,FreqType> cTbl(0);
|
||||||
|
oldText="$";
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
getline(f,line);
|
||||||
|
if(f.fail() && !f.bad() && !f.eof())
|
||||||
|
{
|
||||||
|
cerr << "WARNING: strange characters in stream (getline) " << endl;f.clear();
|
||||||
|
}
|
||||||
|
if(!f)break;
|
||||||
|
|
||||||
|
istrstream f2(line.c_str());
|
||||||
|
while( 1 )
|
||||||
|
{
|
||||||
|
f2 >> text;
|
||||||
|
if(f2.fail() && !f2.bad() && !f2.eof())
|
||||||
|
{
|
||||||
|
cerr << "WARNING: strange characters in stream (>>) !\n";
|
||||||
|
f2.clear(ios::failbit);
|
||||||
|
}
|
||||||
|
if(!f2){break;}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if( text == "$" )
|
||||||
|
text = "mkcls-mapped-dollar-symbol-$";
|
||||||
|
if( !setVokabular.member(text) )setVokabular.insert(text);
|
||||||
|
cTbl[pair<string,string>(oldText,text)]++;
|
||||||
|
oldText=text;
|
||||||
|
}
|
||||||
|
text="$";
|
||||||
|
if( !setVokabular.member(text) )setVokabular.insert(text);
|
||||||
|
cTbl[pair<string,string>(oldText,text)]++;
|
||||||
|
oldText=text;
|
||||||
|
}
|
||||||
|
return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void KategProblemSetParameters(KategProblem &p)
|
||||||
|
{
|
||||||
|
if( p.katwahl()==K_BEST )
|
||||||
|
{
|
||||||
|
TAOptimization::defaultAnnRate=0.7;
|
||||||
|
RRTOptimization::defaultAnnRate=0.95;
|
||||||
|
GDAOptimization::defaultAlpha=0.05;
|
||||||
|
if( verboseMode )
|
||||||
|
cout << "Parameter-setting like W-DET-BEST\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
TAOptimization::defaultAnnRate=0.4;
|
||||||
|
RRTOptimization::defaultAnnRate=0.6;
|
||||||
|
GDAOptimization::defaultAlpha=0.0125;
|
||||||
|
if( verboseMode )
|
||||||
|
cout << "Parameter-setting like W-DET-DET\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
|
||||||
|
int auswertung,int nachbarschaft,float relInit)
|
||||||
|
{
|
||||||
|
KategProblem &k=
|
||||||
|
*new KategProblem(ANZ_WORD,ANZ_CLS,initValue,auswertung,nachbarschaft);
|
||||||
|
KategProblemWBC &w=k.wordFreq;
|
||||||
|
Array<int> after(ANZ_WORD,0);
|
||||||
|
Array<int> before(ANZ_WORD,0);
|
||||||
|
Array<FreqArray> twoD(ANZ_WORD);
|
||||||
|
int i;
|
||||||
|
for(i=0;i<ANZ_WORD;i++) twoD[i].init(ANZ_WORD,0);
|
||||||
|
|
||||||
|
for(i=0;i<ANZ_WORD;i++)
|
||||||
|
{
|
||||||
|
massert(after[i]==0);
|
||||||
|
massert(before[i]==0);
|
||||||
|
for(int j=0;j<ANZ_WORD;j++)
|
||||||
|
{
|
||||||
|
massert(twoD[i][j]==0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(i=0;i<ANZ_WORD*ANZ_WORD*relInit;i++)
|
||||||
|
{
|
||||||
|
int x=randomInt(ANZ_WORD);
|
||||||
|
int y=randomInt(ANZ_WORD);
|
||||||
|
if(twoD[x][y]==0)
|
||||||
|
{
|
||||||
|
after[x]++;
|
||||||
|
before[y]++;
|
||||||
|
}
|
||||||
|
twoD[x][y]+=randomInt(10)+1;
|
||||||
|
}
|
||||||
|
for(i=0;i<ANZ_WORD;i++)
|
||||||
|
{
|
||||||
|
w.setAfterWords(i,after[i]);
|
||||||
|
w.setBeforeWords(i,before[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i=0;i<ANZ_WORD;i++)
|
||||||
|
{
|
||||||
|
for(int j=0;j<ANZ_WORD;j++)
|
||||||
|
if( twoD[i][j] )
|
||||||
|
w.setFreq(i,j,twoD[i][j]);
|
||||||
|
}
|
||||||
|
w.testFull();
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
char *makeTitle(KategProblem &problem,int verfahren)
|
||||||
|
{
|
||||||
|
char x[1024];
|
||||||
|
switch(verfahren)
|
||||||
|
{
|
||||||
|
case HC_OPT:
|
||||||
|
strcpy(x,"HC ");
|
||||||
|
break;
|
||||||
|
case SA_OPT:
|
||||||
|
strcpy(x,"SA ");
|
||||||
|
break;
|
||||||
|
case TA_OPT:
|
||||||
|
strcpy(x,"TA ");
|
||||||
|
break;
|
||||||
|
case GDA_OPT:
|
||||||
|
strcpy(x,"GDA ");
|
||||||
|
break;
|
||||||
|
case RRT_OPT:
|
||||||
|
strcpy(x,"RRT ");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
problem.makeTitle(x+strlen(x));
|
||||||
|
return strdup(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define MAX_MULTIPLE 10
|
||||||
|
|
||||||
|
Array<KategProblem *> &_izrOptimization(Array<KategProblem *> &probs,
|
||||||
|
int anzprob,double timeForOneRed,double maxClock,Array<Kategory> &katOfWord,
|
||||||
|
int anzIter,int verfahren)
|
||||||
|
{
|
||||||
|
massert(anzprob>1);
|
||||||
|
massert(probs[0]->wordFreq.mindestAnzahl<=1);
|
||||||
|
KategProblem *p0=probs[0];
|
||||||
|
|
||||||
|
int nWords=p0->wordFreq.nWords;
|
||||||
|
int nKats=p0->katFreq.nKats;
|
||||||
|
int minimumNumberOfWords = max(1,int(nWords*0.95));
|
||||||
|
|
||||||
|
int indexOfDurchschnitt;
|
||||||
|
Array<int> newWords(nWords);
|
||||||
|
int useAnzprob=anzprob;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
int w,k;
|
||||||
|
indexOfDurchschnitt=0;
|
||||||
|
for(w=0;w<nWords;w++)
|
||||||
|
newWords[w]=-1;
|
||||||
|
for(k=0;k<useAnzprob;k++)
|
||||||
|
{
|
||||||
|
massert(probs[k]->wordFreq.nWords==nWords);
|
||||||
|
probs[k]->makeKats();
|
||||||
|
}
|
||||||
|
|
||||||
|
for(w=0;w<nWords;w++)
|
||||||
|
{
|
||||||
|
if( newWords[w]==-1 )
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
leda_set<int> durchschnitt=(*p0->kats)[p0->katOfWord(w)];
|
||||||
|
for(k=1;k<useAnzprob;k++)
|
||||||
|
durchschnitt = durchschnitt & (*probs[k]->kats)[probs[k]->katOfWord(w)];
|
||||||
|
|
||||||
|
|
||||||
|
int _anzInDurchschnitt=0;
|
||||||
|
int nr=0;
|
||||||
|
forall_set(leda_set<int>,nr,durchschnitt)
|
||||||
|
{
|
||||||
|
_anzInDurchschnitt++;
|
||||||
|
newWords[nr]=indexOfDurchschnitt;
|
||||||
|
}
|
||||||
|
if( verboseMode && _anzInDurchschnitt>1 && anzIter==0 )
|
||||||
|
{
|
||||||
|
cout << "- (";
|
||||||
|
forall_set(leda_set<int>,nr,durchschnitt)
|
||||||
|
{
|
||||||
|
cout << p0->getString(nr);
|
||||||
|
if( p0->wordFreq.n1(nr)==1 )
|
||||||
|
cout << "* ";
|
||||||
|
else
|
||||||
|
cout << " ";
|
||||||
|
}
|
||||||
|
cout << ")\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for(k=0;k<useAnzprob;k++)
|
||||||
|
{
|
||||||
|
durchschnitt = durchschnitt - (*probs[k]->kats)[probs[k]->katOfWord(w)];
|
||||||
|
}
|
||||||
|
indexOfDurchschnitt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(indexOfDurchschnitt>=minimumNumberOfWords)
|
||||||
|
{
|
||||||
|
if(useAnzprob==1)
|
||||||
|
{
|
||||||
|
cout << "useAnzProb==1 => mysterious.\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
useAnzprob--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while(indexOfDurchschnitt>=minimumNumberOfWords);
|
||||||
|
|
||||||
|
|
||||||
|
Array<KategProblem *> &neu=*new Array<KategProblem *>(MAX_MULTIPLE*anzprob,(KategProblem *)0);
|
||||||
|
qsort(probs.getPointerToData(),useAnzprob,sizeof(KategProblem *),compareProblem);
|
||||||
|
massert(useAnzprob<=probs.size());
|
||||||
|
double startTime=clockSec();
|
||||||
|
int i, numberOfNew;
|
||||||
|
for(numberOfNew=0; (clockSec()-startTime<timeForOneRed)
|
||||||
|
|| (numberOfNew < anzprob) ; numberOfNew++)
|
||||||
|
{
|
||||||
|
int w;
|
||||||
|
if( numberOfNew==anzprob*MAX_MULTIPLE-1 )
|
||||||
|
break;
|
||||||
|
KategProblem *p
|
||||||
|
= neu[numberOfNew]
|
||||||
|
= new KategProblem(indexOfDurchschnitt,nKats-2,
|
||||||
|
p0->initialisierung,p0->auswertung,p0->nachbarschaft);
|
||||||
|
|
||||||
|
for(w=0;w<indexOfDurchschnitt;w++)
|
||||||
|
{
|
||||||
|
p->wordFreq.setAfterWords(w,5);
|
||||||
|
p->wordFreq.setBeforeWords(w,5);
|
||||||
|
}
|
||||||
|
for(w=0;w<nWords;w++)
|
||||||
|
{
|
||||||
|
Array<OneFreq> &after=p0->wordFreq.after[w];
|
||||||
|
int size=after.size();
|
||||||
|
for(i=0;i<size;i++)
|
||||||
|
p->wordFreq.addFreq(newWords[w],newWords[after[i].w],after[i].n);
|
||||||
|
}
|
||||||
|
p->wordFreq.testFull(1);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
p->wordFreq.set_h_of_words(p0->wordFreq.get_h_of_words());
|
||||||
|
double w1=0.0,w2=0.0;
|
||||||
|
if(numberOfNew<useAnzprob)
|
||||||
|
{
|
||||||
|
|
||||||
|
for(i=0;i<nWords;i++)
|
||||||
|
(p->initLike)[newWords[i]]=probs[numberOfNew]->katOfWord(i);
|
||||||
|
p->_initialize(5);
|
||||||
|
HCOptimization hc(*p,-1);
|
||||||
|
if(verboseMode)
|
||||||
|
{
|
||||||
|
w1=p->nicevalue();
|
||||||
|
cout << "from old category system:" << w1 << endl;
|
||||||
|
}
|
||||||
|
hc.minimize(-1);
|
||||||
|
if(verboseMode)
|
||||||
|
{
|
||||||
|
w2=p->nicevalue();
|
||||||
|
if(w2<w1)
|
||||||
|
cout << "improvement: " << w1-w2 << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p->_initialize(1);
|
||||||
|
double mean;
|
||||||
|
StatVar end,laufzeit,start;
|
||||||
|
solveProblem(0,*p,1,-1,verfahren,mean,end,laufzeit,start);
|
||||||
|
w2=p->value();
|
||||||
|
if(verboseMode)
|
||||||
|
cout << "new category system: " << w2 << " (" << p->nicevalue()
|
||||||
|
<< ") Zeit: " << clockSec() << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int p;
|
||||||
|
for(p=0;p<probs.size();p++)
|
||||||
|
{
|
||||||
|
if( probs[p] )
|
||||||
|
delete probs[p];
|
||||||
|
}
|
||||||
|
qsort(neu.getPointerToData(),numberOfNew,sizeof(Problem *),compareProblem);
|
||||||
|
massert(numberOfNew<=neu.size());
|
||||||
|
if( verboseMode )
|
||||||
|
cout << "Iterierte Zustandsraum-Reduktion: " << indexOfDurchschnitt
|
||||||
|
<< " words. costs: " << neu[0]->value() << " "
|
||||||
|
<< neu[0]->nicevalue() << " (" << numberOfNew-anzprob << ")" << "time: "
|
||||||
|
<< clockSec() << endl;
|
||||||
|
if( indexOfDurchschnitt<=nKats
|
||||||
|
|| (clockSec()>maxClock&&maxClock) )
|
||||||
|
{
|
||||||
|
if( clockSec()>maxClock&&maxClock )
|
||||||
|
cout << "STOP (time limit: " << (clockSec()-maxClock) << " s)\n";
|
||||||
|
for(i=0;i<nWords;i++)
|
||||||
|
katOfWord[i]=neu[0]->katOfWord(newWords[i]);
|
||||||
|
return neu;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Array<Kategory> &newKatOfWord=
|
||||||
|
*(new Array<Kategory>(neu[0]->wordFreq.nWords,-1));
|
||||||
|
Array<KategProblem *> &erg=_izrOptimization(neu,anzprob,timeForOneRed,
|
||||||
|
maxClock,newKatOfWord,
|
||||||
|
anzIter+1,verfahren);
|
||||||
|
for(i=0;i<nWords;i++)
|
||||||
|
katOfWord[i]=newKatOfWord[newWords[i]];
|
||||||
|
return erg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
|
||||||
|
double clockForOneRed,double maxClock,int verfahren)
|
||||||
|
{
|
||||||
|
Array<Kategory> katOfWord(p.wordFreq.nWords,-1);
|
||||||
|
int startN;
|
||||||
|
if( clockForOneRed<=0 )
|
||||||
|
startN=firstN;
|
||||||
|
else
|
||||||
|
startN=1000;
|
||||||
|
Array<KategProblem *> probs(startN);
|
||||||
|
double val1=0.0,val2=0.0;
|
||||||
|
double endTime=-1;
|
||||||
|
|
||||||
|
double startTime=clockSec();
|
||||||
|
int i;
|
||||||
|
for(i=0;i<startN;i++)
|
||||||
|
{
|
||||||
|
StatVar end,laufzeit,start;
|
||||||
|
double mean;
|
||||||
|
probs[i] = (KategProblem *)((KategProblem *)p.makeEqualProblem());
|
||||||
|
solveProblem(0,*(probs[i]),1,-1,verfahren,mean,end,laufzeit,start);
|
||||||
|
if( i==minN-1 )
|
||||||
|
endTime = clockSec();
|
||||||
|
if( i>=firstN-1 && (startTime+clockForOneRed>clockSec() || i==999) )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if( endTime<0 )
|
||||||
|
endTime=clockSec();
|
||||||
|
massert(i>=firstN);
|
||||||
|
|
||||||
|
qsort(probs.getPointerToData(),i,sizeof(KategProblem *),compareProblem);
|
||||||
|
massert(i<=probs.size());
|
||||||
|
if( clockForOneRed<=0 )
|
||||||
|
{
|
||||||
|
clockForOneRed=endTime-startTime;
|
||||||
|
if( verboseMode )
|
||||||
|
cout << "time for one reduction: " << clockForOneRed << endl;
|
||||||
|
}
|
||||||
|
_izrOptimization(probs,minN,clockForOneRed,maxClock,katOfWord,0,verfahren);
|
||||||
|
|
||||||
|
KategProblem *n=(KategProblem *)(p.makeEqualProblem());
|
||||||
|
n->initLike= katOfWord;
|
||||||
|
n->_initialize(5);
|
||||||
|
if( verboseMode )
|
||||||
|
val1=n->value();
|
||||||
|
HCOptimization hc(*n,-1);
|
||||||
|
hc.minimize(-1);
|
||||||
|
val2=n->value();
|
||||||
|
if( verboseMode )
|
||||||
|
cout << "last improvement: " << val2-val1 << "\n";
|
||||||
|
cout << "final costs: " << val2 << " " << n->nicevalue() << endl;
|
||||||
|
if(PrintBestTo)
|
||||||
|
n->dumpOn(*PrintBestTo);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user