added mgiza
This commit is contained in:
parent
6f995a64f2
commit
df5dddc924
1160
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/.cproject
Normal file
1160
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/.cproject
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,37 @@
|
||||
.anjuta
|
||||
.tm_project*
|
||||
.libs
|
||||
.deps
|
||||
.*swp
|
||||
.nautilus-metafile.xml
|
||||
*.autosave
|
||||
*.pws
|
||||
*.bak
|
||||
*~
|
||||
#*#
|
||||
*.gladep
|
||||
*.la
|
||||
*.lo
|
||||
*.o
|
||||
*.class
|
||||
*.pyc
|
||||
aclocal.m4
|
||||
autom4te.cache
|
||||
config.h
|
||||
config.h.in
|
||||
config.log
|
||||
config.status
|
||||
configure
|
||||
intltool-extract*
|
||||
intltool-merge*
|
||||
intltool-modules*
|
||||
intltool-update*
|
||||
libtool
|
||||
prepare.sh
|
||||
stamp-h*
|
||||
ltmain.sh
|
||||
mkinstalldirs
|
||||
config.guess
|
||||
config.sub
|
||||
Makefile
|
||||
Makefile.in
|
@ -0,0 +1,82 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>MGizaWhiteList</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
<dictionary>
|
||||
<key>?name?</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.append_environment</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildArguments</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildCommand</key>
|
||||
<value>make</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildLocation</key>
|
||||
<value>${workspace_loc:/MGizaWhiteList/Debug}</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
|
||||
<value>clean</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.contents</key>
|
||||
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
|
||||
<value>false</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.stopOnError</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
674
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/COPYING
Normal file
674
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/COPYING
Normal file
@ -0,0 +1,674 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
237
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/INSTALL
Normal file
237
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/INSTALL
Normal file
@ -0,0 +1,237 @@
|
||||
Installation Instructions
|
||||
*************************
|
||||
|
||||
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
|
||||
2006, 2007 Free Software Foundation, Inc.
|
||||
|
||||
This file is free documentation; the Free Software Foundation gives
|
||||
unlimited permission to copy, distribute and modify it.
|
||||
|
||||
Basic Installation
|
||||
==================
|
||||
|
||||
Briefly, the shell commands `./configure; make; make install' should
|
||||
configure, build, and install this package. The following
|
||||
more-detailed instructions are generic; see the `README' file for
|
||||
instructions specific to this package.
|
||||
|
||||
The `configure' shell script attempts to guess correct values for
|
||||
various system-dependent variables used during compilation. It uses
|
||||
those values to create a `Makefile' in each directory of the package.
|
||||
It may also create one or more `.h' files containing system-dependent
|
||||
definitions. Finally, it creates a shell script `config.status' that
|
||||
you can run in the future to recreate the current configuration, and a
|
||||
file `config.log' containing compiler output (useful mainly for
|
||||
debugging `configure').
|
||||
|
||||
It can also use an optional file (typically called `config.cache'
|
||||
and enabled with `--cache-file=config.cache' or simply `-C') that saves
|
||||
the results of its tests to speed up reconfiguring. Caching is
|
||||
disabled by default to prevent problems with accidental use of stale
|
||||
cache files.
|
||||
|
||||
If you need to do unusual things to compile the package, please try
|
||||
to figure out how `configure' could check whether to do them, and mail
|
||||
diffs or instructions to the address given in the `README' so they can
|
||||
be considered for the next release. If you are using the cache, and at
|
||||
some point `config.cache' contains results you don't want to keep, you
|
||||
may remove or edit it.
|
||||
|
||||
The file `configure.ac' (or `configure.in') is used to create
|
||||
`configure' by a program called `autoconf'. You need `configure.ac' if
|
||||
you want to change it or regenerate `configure' using a newer version
|
||||
of `autoconf'.
|
||||
|
||||
The simplest way to compile this package is:
|
||||
|
||||
1. `cd' to the directory containing the package's source code and type
|
||||
`./configure' to configure the package for your system.
|
||||
|
||||
Running `configure' might take a while. While running, it prints
|
||||
some messages telling which features it is checking for.
|
||||
|
||||
2. Type `make' to compile the package.
|
||||
|
||||
3. Optionally, type `make check' to run any self-tests that come with
|
||||
the package.
|
||||
|
||||
4. Type `make install' to install the programs and any data files and
|
||||
documentation.
|
||||
|
||||
5. You can remove the program binaries and object files from the
|
||||
source code directory by typing `make clean'. To also remove the
|
||||
files that `configure' created (so you can compile the package for
|
||||
a different kind of computer), type `make distclean'. There is
|
||||
also a `make maintainer-clean' target, but that is intended mainly
|
||||
for the package's developers. If you use it, you may have to get
|
||||
all sorts of other programs in order to regenerate files that came
|
||||
with the distribution.
|
||||
|
||||
6. Often, you can also type `make uninstall' to remove the installed
|
||||
files again.
|
||||
|
||||
Compilers and Options
|
||||
=====================
|
||||
|
||||
Some systems require unusual options for compilation or linking that the
|
||||
`configure' script does not know about. Run `./configure --help' for
|
||||
details on some of the pertinent environment variables.
|
||||
|
||||
You can give `configure' initial values for configuration parameters
|
||||
by setting variables in the command line or in the environment. Here
|
||||
is an example:
|
||||
|
||||
./configure CC=c99 CFLAGS=-g LIBS=-lposix
|
||||
|
||||
*Note Defining Variables::, for more details.
|
||||
|
||||
Compiling For Multiple Architectures
|
||||
====================================
|
||||
|
||||
You can compile the package for more than one kind of computer at the
|
||||
same time, by placing the object files for each architecture in their
|
||||
own directory. To do this, you can use GNU `make'. `cd' to the
|
||||
directory where you want the object files and executables to go and run
|
||||
the `configure' script. `configure' automatically checks for the
|
||||
source code in the directory that `configure' is in and in `..'.
|
||||
|
||||
With a non-GNU `make', it is safer to compile the package for one
|
||||
architecture at a time in the source code directory. After you have
|
||||
installed the package for one architecture, use `make distclean' before
|
||||
reconfiguring for another architecture.
|
||||
|
||||
Installation Names
|
||||
==================
|
||||
|
||||
By default, `make install' installs the package's commands under
|
||||
`/usr/local/bin', include files under `/usr/local/include', etc. You
|
||||
can specify an installation prefix other than `/usr/local' by giving
|
||||
`configure' the option `--prefix=PREFIX'.
|
||||
|
||||
You can specify separate installation prefixes for
|
||||
architecture-specific files and architecture-independent files. If you
|
||||
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
|
||||
PREFIX as the prefix for installing programs and libraries.
|
||||
Documentation and other data files still use the regular prefix.
|
||||
|
||||
In addition, if you use an unusual directory layout you can give
|
||||
options like `--bindir=DIR' to specify different values for particular
|
||||
kinds of files. Run `configure --help' for a list of the directories
|
||||
you can set and what kinds of files go in them.
|
||||
|
||||
If the package supports it, you can cause programs to be installed
|
||||
with an extra prefix or suffix on their names by giving `configure' the
|
||||
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
|
||||
|
||||
Optional Features
|
||||
=================
|
||||
|
||||
Some packages pay attention to `--enable-FEATURE' options to
|
||||
`configure', where FEATURE indicates an optional part of the package.
|
||||
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
|
||||
is something like `gnu-as' or `x' (for the X Window System). The
|
||||
`README' should mention any `--enable-' and `--with-' options that the
|
||||
package recognizes.
|
||||
|
||||
For packages that use the X Window System, `configure' can usually
|
||||
find the X include and library files automatically, but if it doesn't,
|
||||
you can use the `configure' options `--x-includes=DIR' and
|
||||
`--x-libraries=DIR' to specify their locations.
|
||||
|
||||
Specifying the System Type
|
||||
==========================
|
||||
|
||||
There may be some features `configure' cannot figure out automatically,
|
||||
but needs to determine by the type of machine the package will run on.
|
||||
Usually, assuming the package is built to be run on the _same_
|
||||
architectures, `configure' can figure that out, but if it prints a
|
||||
message saying it cannot guess the machine type, give it the
|
||||
`--build=TYPE' option. TYPE can either be a short name for the system
|
||||
type, such as `sun4', or a canonical name which has the form:
|
||||
|
||||
CPU-COMPANY-SYSTEM
|
||||
|
||||
where SYSTEM can have one of these forms:
|
||||
|
||||
OS KERNEL-OS
|
||||
|
||||
See the file `config.sub' for the possible values of each field. If
|
||||
`config.sub' isn't included in this package, then this package doesn't
|
||||
need to know the machine type.
|
||||
|
||||
If you are _building_ compiler tools for cross-compiling, you should
|
||||
use the option `--target=TYPE' to select the type of system they will
|
||||
produce code for.
|
||||
|
||||
If you want to _use_ a cross compiler, that generates code for a
|
||||
platform different from the build platform, you should specify the
|
||||
"host" platform (i.e., that on which the generated programs will
|
||||
eventually be run) with `--host=TYPE'.
|
||||
|
||||
Sharing Defaults
|
||||
================
|
||||
|
||||
If you want to set default values for `configure' scripts to share, you
|
||||
can create a site shell script called `config.site' that gives default
|
||||
values for variables like `CC', `cache_file', and `prefix'.
|
||||
`configure' looks for `PREFIX/share/config.site' if it exists, then
|
||||
`PREFIX/etc/config.site' if it exists. Or, you can set the
|
||||
`CONFIG_SITE' environment variable to the location of the site script.
|
||||
A warning: not all `configure' scripts look for a site script.
|
||||
|
||||
Defining Variables
|
||||
==================
|
||||
|
||||
Variables not defined in a site shell script can be set in the
|
||||
environment passed to `configure'. However, some packages may run
|
||||
configure again during the build, and the customized values of these
|
||||
variables may be lost. In order to avoid this problem, you should set
|
||||
them in the `configure' command line, using `VAR=value'. For example:
|
||||
|
||||
./configure CC=/usr/local2/bin/gcc
|
||||
|
||||
causes the specified `gcc' to be used as the C compiler (unless it is
|
||||
overridden in the site shell script).
|
||||
|
||||
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
||||
an Autoconf bug. Until the bug is fixed you can use this workaround:
|
||||
|
||||
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
|
||||
`configure' Invocation
|
||||
======================
|
||||
|
||||
`configure' recognizes the following options to control how it operates.
|
||||
|
||||
`--help'
|
||||
`-h'
|
||||
Print a summary of the options to `configure', and exit.
|
||||
|
||||
`--version'
|
||||
`-V'
|
||||
Print the version of Autoconf used to generate the `configure'
|
||||
script, and exit.
|
||||
|
||||
`--cache-file=FILE'
|
||||
Enable the cache: use and save the results of the tests in FILE,
|
||||
traditionally `config.cache'. FILE defaults to `/dev/null' to
|
||||
disable caching.
|
||||
|
||||
`--config-cache'
|
||||
`-C'
|
||||
Alias for `--cache-file=config.cache'.
|
||||
|
||||
`--quiet'
|
||||
`--silent'
|
||||
`-q'
|
||||
Do not print messages saying which checks are being made. To
|
||||
suppress all normal output, redirect it to `/dev/null' (any error
|
||||
messages will still be shown).
|
||||
|
||||
`--srcdir=DIR'
|
||||
Look for the package's source code in directory DIR. Usually
|
||||
`configure' can determine that directory automatically.
|
||||
|
||||
`configure' also accepts some other, not widely useful, options. Run
|
||||
`configure --help' for more details.
|
||||
|
@ -0,0 +1,29 @@
|
||||
## Process this file with automake to produce Makefile.in
|
||||
## Created by Anjuta
|
||||
|
||||
SUBDIRS = src
|
||||
|
||||
mgizadocdir = ${prefix}/doc/mgiza
|
||||
mgizadoc_DATA = \
|
||||
README\
|
||||
COPYING\
|
||||
AUTHORS\
|
||||
ChangeLog\
|
||||
INSTALL\
|
||||
NEWS
|
||||
|
||||
mgizascriptsdir = ${prefix}/scripts/
|
||||
|
||||
mgizascripts_SCRIPTS = \
|
||||
scripts/*
|
||||
|
||||
EXTRA_DIST = $(mgizadoc_DATA) \
|
||||
${mgizascripts_SCRIPTS}
|
||||
# Copy all the spec files. Of cource, only one is actually used.
|
||||
dist-hook:
|
||||
for specfile in *.spec; do \
|
||||
if test -f $$specfile; then \
|
||||
cp -p $$specfile $(distdir); \
|
||||
fi \
|
||||
done
|
||||
|
@ -0,0 +1,683 @@
|
||||
# Makefile.in generated by automake 1.10.1 from Makefile.am.
|
||||
# @configure_input@
|
||||
|
||||
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
|
||||
# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
||||
# This Makefile.in is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE.
|
||||
|
||||
@SET_MAKE@
|
||||
|
||||
|
||||
VPATH = @srcdir@
|
||||
pkgdatadir = $(datadir)/@PACKAGE@
|
||||
pkglibdir = $(libdir)/@PACKAGE@
|
||||
pkgincludedir = $(includedir)/@PACKAGE@
|
||||
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||
install_sh_DATA = $(install_sh) -c -m 644
|
||||
install_sh_PROGRAM = $(install_sh) -c
|
||||
install_sh_SCRIPT = $(install_sh) -c
|
||||
INSTALL_HEADER = $(INSTALL_DATA)
|
||||
transform = $(program_transform_name)
|
||||
NORMAL_INSTALL = :
|
||||
PRE_INSTALL = :
|
||||
POST_INSTALL = :
|
||||
NORMAL_UNINSTALL = :
|
||||
PRE_UNINSTALL = :
|
||||
POST_UNINSTALL = :
|
||||
subdir = .
|
||||
DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
|
||||
$(srcdir)/Makefile.in $(srcdir)/config.h.in \
|
||||
$(top_srcdir)/configure AUTHORS COPYING ChangeLog INSTALL NEWS \
|
||||
config.guess config.sub depcomp install-sh ltmain.sh missing
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
|
||||
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||
$(ACLOCAL_M4)
|
||||
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
|
||||
configure.lineno config.status.lineno
|
||||
mkinstalldirs = $(install_sh) -d
|
||||
CONFIG_HEADER = config.h
|
||||
CONFIG_CLEAN_FILES =
|
||||
am__installdirs = "$(DESTDIR)$(mgizascriptsdir)" \
|
||||
"$(DESTDIR)$(mgizadocdir)"
|
||||
mgizascriptsSCRIPT_INSTALL = $(INSTALL_SCRIPT)
|
||||
SCRIPTS = $(mgizascripts_SCRIPTS)
|
||||
SOURCES =
|
||||
DIST_SOURCES =
|
||||
RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
|
||||
html-recursive info-recursive install-data-recursive \
|
||||
install-dvi-recursive install-exec-recursive \
|
||||
install-html-recursive install-info-recursive \
|
||||
install-pdf-recursive install-ps-recursive install-recursive \
|
||||
installcheck-recursive installdirs-recursive pdf-recursive \
|
||||
ps-recursive uninstall-recursive
|
||||
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
|
||||
am__vpath_adj = case $$p in \
|
||||
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
|
||||
*) f=$$p;; \
|
||||
esac;
|
||||
am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
|
||||
mgizadocDATA_INSTALL = $(INSTALL_DATA)
|
||||
DATA = $(mgizadoc_DATA)
|
||||
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
|
||||
distclean-recursive maintainer-clean-recursive
|
||||
ETAGS = etags
|
||||
CTAGS = ctags
|
||||
DIST_SUBDIRS = $(SUBDIRS)
|
||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||
distdir = $(PACKAGE)-$(VERSION)
|
||||
top_distdir = $(distdir)
|
||||
am__remove_distdir = \
|
||||
{ test ! -d $(distdir) \
|
||||
|| { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \
|
||||
&& rm -fr $(distdir); }; }
|
||||
DIST_ARCHIVES = $(distdir).tar.gz
|
||||
GZIP_ENV = --best
|
||||
distuninstallcheck_listfiles = find . -type f -print
|
||||
distcleancheck_listfiles = find . -type f -print
|
||||
ACLOCAL = @ACLOCAL@
|
||||
AMTAR = @AMTAR@
|
||||
AUTOCONF = @AUTOCONF@
|
||||
AUTOHEADER = @AUTOHEADER@
|
||||
AUTOMAKE = @AUTOMAKE@
|
||||
AWK = @AWK@
|
||||
CC = @CC@
|
||||
CCDEPMODE = @CCDEPMODE@
|
||||
CFLAGS = @CFLAGS@
|
||||
CPP = @CPP@
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
CXX = @CXX@
|
||||
CXXDEPMODE = @CXXDEPMODE@
|
||||
CXXFLAGS = @CXXFLAGS@
|
||||
CYGPATH_W = @CYGPATH_W@
|
||||
DEFS = @DEFS@
|
||||
DEPDIR = @DEPDIR@
|
||||
ECHO_C = @ECHO_C@
|
||||
ECHO_N = @ECHO_N@
|
||||
ECHO_T = @ECHO_T@
|
||||
EGREP = @EGREP@
|
||||
EXEEXT = @EXEEXT@
|
||||
GREP = @GREP@
|
||||
INSTALL = @INSTALL@
|
||||
INSTALL_DATA = @INSTALL_DATA@
|
||||
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LTLIBOBJS = @LTLIBOBJS@
|
||||
MAINT = @MAINT@
|
||||
MAKEINFO = @MAKEINFO@
|
||||
MKDIR_P = @MKDIR_P@
|
||||
OBJEXT = @OBJEXT@
|
||||
PACKAGE = @PACKAGE@
|
||||
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||
PACKAGE_NAME = @PACKAGE_NAME@
|
||||
PACKAGE_STRING = @PACKAGE_STRING@
|
||||
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||
RANLIB = @RANLIB@
|
||||
SET_MAKE = @SET_MAKE@
|
||||
SHELL = @SHELL@
|
||||
STRIP = @STRIP@
|
||||
VERSION = @VERSION@
|
||||
abs_builddir = @abs_builddir@
|
||||
abs_srcdir = @abs_srcdir@
|
||||
abs_top_builddir = @abs_top_builddir@
|
||||
abs_top_srcdir = @abs_top_srcdir@
|
||||
ac_ct_CC = @ac_ct_CC@
|
||||
ac_ct_CXX = @ac_ct_CXX@
|
||||
am__include = @am__include@
|
||||
am__leading_dot = @am__leading_dot@
|
||||
am__quote = @am__quote@
|
||||
am__tar = @am__tar@
|
||||
am__untar = @am__untar@
|
||||
bindir = @bindir@
|
||||
build_alias = @build_alias@
|
||||
builddir = @builddir@
|
||||
datadir = @datadir@
|
||||
datarootdir = @datarootdir@
|
||||
docdir = @docdir@
|
||||
dvidir = @dvidir@
|
||||
exec_prefix = @exec_prefix@
|
||||
host_alias = @host_alias@
|
||||
htmldir = @htmldir@
|
||||
includedir = @includedir@
|
||||
infodir = @infodir@
|
||||
install_sh = @install_sh@
|
||||
libdir = @libdir@
|
||||
libexecdir = @libexecdir@
|
||||
localedir = @localedir@
|
||||
localstatedir = @localstatedir@
|
||||
mandir = @mandir@
|
||||
mkdir_p = @mkdir_p@
|
||||
oldincludedir = @oldincludedir@
|
||||
pdfdir = @pdfdir@
|
||||
prefix = @prefix@
|
||||
program_transform_name = @program_transform_name@
|
||||
psdir = @psdir@
|
||||
sbindir = @sbindir@
|
||||
sharedstatedir = @sharedstatedir@
|
||||
srcdir = @srcdir@
|
||||
sysconfdir = @sysconfdir@
|
||||
target_alias = @target_alias@
|
||||
top_build_prefix = @top_build_prefix@
|
||||
top_builddir = @top_builddir@
|
||||
top_srcdir = @top_srcdir@
|
||||
SUBDIRS = src
|
||||
mgizadocdir = ${prefix}/doc/mgiza
|
||||
mgizadoc_DATA = \
|
||||
README\
|
||||
COPYING\
|
||||
AUTHORS\
|
||||
ChangeLog\
|
||||
INSTALL\
|
||||
NEWS
|
||||
|
||||
mgizascriptsdir = ${prefix}/scripts/
|
||||
mgizascripts_SCRIPTS = \
|
||||
scripts/*
|
||||
|
||||
EXTRA_DIST = $(mgizadoc_DATA) \
|
||||
${mgizascripts_SCRIPTS}
|
||||
|
||||
all: config.h
|
||||
$(MAKE) $(AM_MAKEFLAGS) all-recursive
|
||||
|
||||
.SUFFIXES:
|
||||
am--refresh:
|
||||
@:
|
||||
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
|
||||
@for dep in $?; do \
|
||||
case '$(am__configure_deps)' in \
|
||||
*$$dep*) \
|
||||
echo ' cd $(srcdir) && $(AUTOMAKE) --gnu '; \
|
||||
cd $(srcdir) && $(AUTOMAKE) --gnu \
|
||||
&& exit 0; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
done; \
|
||||
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
|
||||
cd $(top_srcdir) && \
|
||||
$(AUTOMAKE) --gnu Makefile
|
||||
.PRECIOUS: Makefile
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
@case '$?' in \
|
||||
*config.status*) \
|
||||
echo ' $(SHELL) ./config.status'; \
|
||||
$(SHELL) ./config.status;; \
|
||||
*) \
|
||||
echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
|
||||
cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
|
||||
esac;
|
||||
|
||||
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||
$(SHELL) ./config.status --recheck
|
||||
|
||||
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
|
||||
cd $(srcdir) && $(AUTOCONF)
|
||||
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
|
||||
cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
|
||||
|
||||
config.h: stamp-h1
|
||||
@if test ! -f $@; then \
|
||||
rm -f stamp-h1; \
|
||||
$(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
|
||||
else :; fi
|
||||
|
||||
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
|
||||
@rm -f stamp-h1
|
||||
cd $(top_builddir) && $(SHELL) ./config.status config.h
|
||||
$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
|
||||
cd $(top_srcdir) && $(AUTOHEADER)
|
||||
rm -f stamp-h1
|
||||
touch $@
|
||||
|
||||
distclean-hdr:
|
||||
-rm -f config.h stamp-h1
|
||||
install-mgizascriptsSCRIPTS: $(mgizascripts_SCRIPTS)
|
||||
@$(NORMAL_INSTALL)
|
||||
test -z "$(mgizascriptsdir)" || $(MKDIR_P) "$(DESTDIR)$(mgizascriptsdir)"
|
||||
@list='$(mgizascripts_SCRIPTS)'; for p in $$list; do \
|
||||
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
||||
if test -f $$d$$p; then \
|
||||
f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
|
||||
echo " $(mgizascriptsSCRIPT_INSTALL) '$$d$$p' '$(DESTDIR)$(mgizascriptsdir)/$$f'"; \
|
||||
$(mgizascriptsSCRIPT_INSTALL) "$$d$$p" "$(DESTDIR)$(mgizascriptsdir)/$$f"; \
|
||||
else :; fi; \
|
||||
done
|
||||
|
||||
uninstall-mgizascriptsSCRIPTS:
|
||||
@$(NORMAL_UNINSTALL)
|
||||
@list='$(mgizascripts_SCRIPTS)'; for p in $$list; do \
|
||||
f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
|
||||
echo " rm -f '$(DESTDIR)$(mgizascriptsdir)/$$f'"; \
|
||||
rm -f "$(DESTDIR)$(mgizascriptsdir)/$$f"; \
|
||||
done
|
||||
install-mgizadocDATA: $(mgizadoc_DATA)
|
||||
@$(NORMAL_INSTALL)
|
||||
test -z "$(mgizadocdir)" || $(MKDIR_P) "$(DESTDIR)$(mgizadocdir)"
|
||||
@list='$(mgizadoc_DATA)'; for p in $$list; do \
|
||||
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
||||
f=$(am__strip_dir) \
|
||||
echo " $(mgizadocDATA_INSTALL) '$$d$$p' '$(DESTDIR)$(mgizadocdir)/$$f'"; \
|
||||
$(mgizadocDATA_INSTALL) "$$d$$p" "$(DESTDIR)$(mgizadocdir)/$$f"; \
|
||||
done
|
||||
|
||||
uninstall-mgizadocDATA:
|
||||
@$(NORMAL_UNINSTALL)
|
||||
@list='$(mgizadoc_DATA)'; for p in $$list; do \
|
||||
f=$(am__strip_dir) \
|
||||
echo " rm -f '$(DESTDIR)$(mgizadocdir)/$$f'"; \
|
||||
rm -f "$(DESTDIR)$(mgizadocdir)/$$f"; \
|
||||
done
|
||||
|
||||
# This directory's subdirectories are mostly independent; you can cd
|
||||
# into them and run `make' without going through this Makefile.
|
||||
# To change the values of `make' variables: instead of editing Makefiles,
|
||||
# (1) if the variable is set in `config.status', edit `config.status'
|
||||
# (which will cause the Makefiles to be regenerated when you run `make');
|
||||
# (2) otherwise, pass the desired values on the `make' command line.
|
||||
$(RECURSIVE_TARGETS):
|
||||
@failcom='exit 1'; \
|
||||
for f in x $$MAKEFLAGS; do \
|
||||
case $$f in \
|
||||
*=* | --[!k]*);; \
|
||||
*k*) failcom='fail=yes';; \
|
||||
esac; \
|
||||
done; \
|
||||
dot_seen=no; \
|
||||
target=`echo $@ | sed s/-recursive//`; \
|
||||
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||
echo "Making $$target in $$subdir"; \
|
||||
if test "$$subdir" = "."; then \
|
||||
dot_seen=yes; \
|
||||
local_target="$$target-am"; \
|
||||
else \
|
||||
local_target="$$target"; \
|
||||
fi; \
|
||||
(cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|
||||
|| eval $$failcom; \
|
||||
done; \
|
||||
if test "$$dot_seen" = "no"; then \
|
||||
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
|
||||
fi; test -z "$$fail"
|
||||
|
||||
$(RECURSIVE_CLEAN_TARGETS):
|
||||
@failcom='exit 1'; \
|
||||
for f in x $$MAKEFLAGS; do \
|
||||
case $$f in \
|
||||
*=* | --[!k]*);; \
|
||||
*k*) failcom='fail=yes';; \
|
||||
esac; \
|
||||
done; \
|
||||
dot_seen=no; \
|
||||
case "$@" in \
|
||||
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
|
||||
*) list='$(SUBDIRS)' ;; \
|
||||
esac; \
|
||||
rev=''; for subdir in $$list; do \
|
||||
if test "$$subdir" = "."; then :; else \
|
||||
rev="$$subdir $$rev"; \
|
||||
fi; \
|
||||
done; \
|
||||
rev="$$rev ."; \
|
||||
target=`echo $@ | sed s/-recursive//`; \
|
||||
for subdir in $$rev; do \
|
||||
echo "Making $$target in $$subdir"; \
|
||||
if test "$$subdir" = "."; then \
|
||||
local_target="$$target-am"; \
|
||||
else \
|
||||
local_target="$$target"; \
|
||||
fi; \
|
||||
(cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|
||||
|| eval $$failcom; \
|
||||
done && test -z "$$fail"
|
||||
tags-recursive:
|
||||
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||
test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
|
||||
done
|
||||
ctags-recursive:
|
||||
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||
test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
|
||||
done
|
||||
|
||||
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
||||
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||
unique=`for i in $$list; do \
|
||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||
done | \
|
||||
$(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
|
||||
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||
mkid -fID $$unique
|
||||
tags: TAGS
|
||||
|
||||
TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
|
||||
$(TAGS_FILES) $(LISP)
|
||||
tags=; \
|
||||
here=`pwd`; \
|
||||
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
|
||||
include_option=--etags-include; \
|
||||
empty_fix=.; \
|
||||
else \
|
||||
include_option=--include; \
|
||||
empty_fix=; \
|
||||
fi; \
|
||||
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||
if test "$$subdir" = .; then :; else \
|
||||
test ! -f $$subdir/TAGS || \
|
||||
tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
|
||||
fi; \
|
||||
done; \
|
||||
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
|
||||
unique=`for i in $$list; do \
|
||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||
done | \
|
||||
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||
if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
|
||||
test -n "$$unique" || unique=$$empty_fix; \
|
||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||
$$tags $$unique; \
|
||||
fi
|
||||
ctags: CTAGS
|
||||
CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
|
||||
$(TAGS_FILES) $(LISP)
|
||||
tags=; \
|
||||
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
|
||||
unique=`for i in $$list; do \
|
||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||
done | \
|
||||
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||
test -z "$(CTAGS_ARGS)$$tags$$unique" \
|
||||
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||
$$tags $$unique
|
||||
|
||||
GTAGS:
|
||||
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||
&& cd $(top_srcdir) \
|
||||
&& gtags -i $(GTAGS_ARGS) $$here
|
||||
|
||||
distclean-tags:
|
||||
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||
|
||||
distdir: $(DISTFILES)
|
||||
$(am__remove_distdir)
|
||||
test -d $(distdir) || mkdir $(distdir)
|
||||
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
list='$(DISTFILES)'; \
|
||||
dist_files=`for file in $$list; do echo $$file; done | \
|
||||
sed -e "s|^$$srcdirstrip/||;t" \
|
||||
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||
case $$dist_files in \
|
||||
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||
sort -u` ;; \
|
||||
esac; \
|
||||
for file in $$dist_files; do \
|
||||
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||
if test -d $$d/$$file; then \
|
||||
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
|
||||
fi; \
|
||||
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
|
||||
else \
|
||||
test -f $(distdir)/$$file \
|
||||
|| cp -p $$d/$$file $(distdir)/$$file \
|
||||
|| exit 1; \
|
||||
fi; \
|
||||
done
|
||||
list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
|
||||
if test "$$subdir" = .; then :; else \
|
||||
test -d "$(distdir)/$$subdir" \
|
||||
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|
||||
|| exit 1; \
|
||||
distdir=`$(am__cd) $(distdir) && pwd`; \
|
||||
top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
|
||||
(cd $$subdir && \
|
||||
$(MAKE) $(AM_MAKEFLAGS) \
|
||||
top_distdir="$$top_distdir" \
|
||||
distdir="$$distdir/$$subdir" \
|
||||
am__remove_distdir=: \
|
||||
am__skip_length_check=: \
|
||||
distdir) \
|
||||
|| exit 1; \
|
||||
fi; \
|
||||
done
|
||||
$(MAKE) $(AM_MAKEFLAGS) \
|
||||
top_distdir="$(top_distdir)" distdir="$(distdir)" \
|
||||
dist-hook
|
||||
-find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
|
||||
! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
|
||||
! -type d ! -perm -400 -exec chmod a+r {} \; -o \
|
||||
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
|
||||
|| chmod -R a+r $(distdir)
|
||||
dist-gzip: distdir
|
||||
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
|
||||
$(am__remove_distdir)
|
||||
|
||||
dist-bzip2: distdir
|
||||
tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
|
||||
$(am__remove_distdir)
|
||||
|
||||
dist-lzma: distdir
|
||||
tardir=$(distdir) && $(am__tar) | lzma -9 -c >$(distdir).tar.lzma
|
||||
$(am__remove_distdir)
|
||||
|
||||
dist-tarZ: distdir
|
||||
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
|
||||
$(am__remove_distdir)
|
||||
|
||||
dist-shar: distdir
|
||||
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
|
||||
$(am__remove_distdir)
|
||||
|
||||
dist-zip: distdir
|
||||
-rm -f $(distdir).zip
|
||||
zip -rq $(distdir).zip $(distdir)
|
||||
$(am__remove_distdir)
|
||||
|
||||
dist dist-all: distdir
|
||||
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
|
||||
$(am__remove_distdir)
|
||||
|
||||
# This target untars the dist file and tries a VPATH configuration. Then
|
||||
# it guarantees that the distribution is self-contained by making another
|
||||
# tarfile.
|
||||
distcheck: dist
|
||||
case '$(DIST_ARCHIVES)' in \
|
||||
*.tar.gz*) \
|
||||
GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\
|
||||
*.tar.bz2*) \
|
||||
bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\
|
||||
*.tar.lzma*) \
|
||||
unlzma -c $(distdir).tar.lzma | $(am__untar) ;;\
|
||||
*.tar.Z*) \
|
||||
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
|
||||
*.shar.gz*) \
|
||||
GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\
|
||||
*.zip*) \
|
||||
unzip $(distdir).zip ;;\
|
||||
esac
|
||||
chmod -R a-w $(distdir); chmod a+w $(distdir)
|
||||
mkdir $(distdir)/_build
|
||||
mkdir $(distdir)/_inst
|
||||
chmod a-w $(distdir)
|
||||
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
|
||||
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
|
||||
&& cd $(distdir)/_build \
|
||||
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
|
||||
$(DISTCHECK_CONFIGURE_FLAGS) \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) check \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) install \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) installcheck \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) uninstall \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
|
||||
distuninstallcheck \
|
||||
&& chmod -R a-w "$$dc_install_base" \
|
||||
&& ({ \
|
||||
(cd ../.. && umask 077 && mkdir "$$dc_destdir") \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
|
||||
distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
|
||||
} || { rm -rf "$$dc_destdir"; exit 1; }) \
|
||||
&& rm -rf "$$dc_destdir" \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) dist \
|
||||
&& rm -rf $(DIST_ARCHIVES) \
|
||||
&& $(MAKE) $(AM_MAKEFLAGS) distcleancheck
|
||||
$(am__remove_distdir)
|
||||
@(echo "$(distdir) archives ready for distribution: "; \
|
||||
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
|
||||
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
|
||||
distuninstallcheck:
|
||||
@cd $(distuninstallcheck_dir) \
|
||||
&& test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
|
||||
|| { echo "ERROR: files left after uninstall:" ; \
|
||||
if test -n "$(DESTDIR)"; then \
|
||||
echo " (check DESTDIR support)"; \
|
||||
fi ; \
|
||||
$(distuninstallcheck_listfiles) ; \
|
||||
exit 1; } >&2
|
||||
distcleancheck: distclean
|
||||
@if test '$(srcdir)' = . ; then \
|
||||
echo "ERROR: distcleancheck can only run from a VPATH build" ; \
|
||||
exit 1 ; \
|
||||
fi
|
||||
@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
|
||||
|| { echo "ERROR: files left in build directory after distclean:" ; \
|
||||
$(distcleancheck_listfiles) ; \
|
||||
exit 1; } >&2
|
||||
check-am: all-am
|
||||
check: check-recursive
|
||||
all-am: Makefile $(SCRIPTS) $(DATA) config.h
|
||||
installdirs: installdirs-recursive
|
||||
installdirs-am:
|
||||
for dir in "$(DESTDIR)$(mgizascriptsdir)" "$(DESTDIR)$(mgizadocdir)"; do \
|
||||
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
|
||||
done
|
||||
install: install-recursive
|
||||
install-exec: install-exec-recursive
|
||||
install-data: install-data-recursive
|
||||
uninstall: uninstall-recursive
|
||||
|
||||
install-am: all-am
|
||||
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||
|
||||
installcheck: installcheck-recursive
|
||||
install-strip:
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
`test -z '$(STRIP)' || \
|
||||
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
|
||||
mostlyclean-generic:
|
||||
|
||||
clean-generic:
|
||||
|
||||
distclean-generic:
|
||||
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||
|
||||
maintainer-clean-generic:
|
||||
@echo "This command is intended for maintainers to use"
|
||||
@echo "it deletes files that may require special tools to rebuild."
|
||||
clean: clean-recursive
|
||||
|
||||
clean-am: clean-generic mostlyclean-am
|
||||
|
||||
distclean: distclean-recursive
|
||||
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
|
||||
-rm -f Makefile
|
||||
distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
|
||||
|
||||
dvi: dvi-recursive
|
||||
|
||||
dvi-am:
|
||||
|
||||
html: html-recursive
|
||||
|
||||
info: info-recursive
|
||||
|
||||
info-am:
|
||||
|
||||
install-data-am: install-mgizadocDATA install-mgizascriptsSCRIPTS
|
||||
|
||||
install-dvi: install-dvi-recursive
|
||||
|
||||
install-exec-am:
|
||||
|
||||
install-html: install-html-recursive
|
||||
|
||||
install-info: install-info-recursive
|
||||
|
||||
install-man:
|
||||
|
||||
install-pdf: install-pdf-recursive
|
||||
|
||||
install-ps: install-ps-recursive
|
||||
|
||||
installcheck-am:
|
||||
|
||||
maintainer-clean: maintainer-clean-recursive
|
||||
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
|
||||
-rm -rf $(top_srcdir)/autom4te.cache
|
||||
-rm -f Makefile
|
||||
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||
|
||||
mostlyclean: mostlyclean-recursive
|
||||
|
||||
mostlyclean-am: mostlyclean-generic
|
||||
|
||||
pdf: pdf-recursive
|
||||
|
||||
pdf-am:
|
||||
|
||||
ps: ps-recursive
|
||||
|
||||
ps-am:
|
||||
|
||||
uninstall-am: uninstall-mgizadocDATA uninstall-mgizascriptsSCRIPTS
|
||||
|
||||
.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
|
||||
install-strip
|
||||
|
||||
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
|
||||
all all-am am--refresh check check-am clean clean-generic \
|
||||
ctags ctags-recursive dist dist-all dist-bzip2 dist-gzip \
|
||||
dist-hook dist-lzma dist-shar dist-tarZ dist-zip distcheck \
|
||||
distclean distclean-generic distclean-hdr distclean-tags \
|
||||
distcleancheck distdir distuninstallcheck dvi dvi-am html \
|
||||
html-am info info-am install install-am install-data \
|
||||
install-data-am install-dvi install-dvi-am install-exec \
|
||||
install-exec-am install-html install-html-am install-info \
|
||||
install-info-am install-man install-mgizadocDATA \
|
||||
install-mgizascriptsSCRIPTS install-pdf install-pdf-am \
|
||||
install-ps install-ps-am install-strip installcheck \
|
||||
installcheck-am installdirs installdirs-am maintainer-clean \
|
||||
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
|
||||
pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
|
||||
uninstall-mgizadocDATA uninstall-mgizascriptsSCRIPTS
|
||||
|
||||
# Copy all the spec files. Of cource, only one is actually used.
|
||||
dist-hook:
|
||||
for specfile in *.spec; do \
|
||||
if test -f $$specfile; then \
|
||||
cp -p $$specfile $(distdir); \
|
||||
fi \
|
||||
done
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
.NOEXPORT:
|
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0"?>
|
||||
<gtodo>
|
||||
<category title="Personal" place="0"/>
|
||||
<category title="Business" place="1"/>
|
||||
<category title="Unfiled" place="2"/>
|
||||
</gtodo>
|
932
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/aclocal.m4
vendored
Normal file
932
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/aclocal.m4
vendored
Normal file
@ -0,0 +1,932 @@
|
||||
# generated automatically by aclocal 1.10.1 -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
# 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE.
|
||||
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
m4_if(AC_AUTOCONF_VERSION, [2.62],,
|
||||
[m4_warning([this file was generated for autoconf 2.62.
|
||||
You have another version of autoconf. It may work, but is not guaranteed to.
|
||||
If you have problems, you may need to regenerate the build system entirely.
|
||||
To do so, use the procedure documented by the package, typically `autoreconf'.])])
|
||||
|
||||
# Copyright (C) 2002, 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_AUTOMAKE_VERSION(VERSION)
|
||||
# ----------------------------
|
||||
# Automake X.Y traces this macro to ensure aclocal.m4 has been
|
||||
# generated from the m4 files accompanying Automake X.Y.
|
||||
# (This private macro should not be called outside this file.)
|
||||
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||
[am__api_version='1.10'
|
||||
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||
dnl require some minimum version. Point them to the right macro.
|
||||
m4_if([$1], [1.10.1], [],
|
||||
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||
])
|
||||
|
||||
# _AM_AUTOCONF_VERSION(VERSION)
|
||||
# -----------------------------
|
||||
# aclocal traces this macro to find the Autoconf version.
|
||||
# This is a private macro too. Using m4_define simplifies
|
||||
# the logic in aclocal, which can simply ignore this definition.
|
||||
m4_define([_AM_AUTOCONF_VERSION], [])
|
||||
|
||||
# AM_SET_CURRENT_AUTOMAKE_VERSION
|
||||
# -------------------------------
|
||||
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||
# This function is AC_REQUIREd by AC_INIT_AUTOMAKE.
|
||||
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||
[AM_AUTOMAKE_VERSION([1.10.1])dnl
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
_AM_AUTOCONF_VERSION(AC_AUTOCONF_VERSION)])
|
||||
|
||||
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
|
||||
# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
|
||||
# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
|
||||
#
|
||||
# Of course, Automake must honor this variable whenever it calls a
|
||||
# tool from the auxiliary directory. The problem is that $srcdir (and
|
||||
# therefore $ac_aux_dir as well) can be either absolute or relative,
|
||||
# depending on how configure is run. This is pretty annoying, since
|
||||
# it makes $ac_aux_dir quite unusable in subdirectories: in the top
|
||||
# source directory, any form will work fine, but in subdirectories a
|
||||
# relative path needs to be adjusted first.
|
||||
#
|
||||
# $ac_aux_dir/missing
|
||||
# fails when called from a subdirectory if $ac_aux_dir is relative
|
||||
# $top_srcdir/$ac_aux_dir/missing
|
||||
# fails if $ac_aux_dir is absolute,
|
||||
# fails when called from a subdirectory in a VPATH build with
|
||||
# a relative $ac_aux_dir
|
||||
#
|
||||
# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
|
||||
# are both prefixed by $srcdir. In an in-source build this is usually
|
||||
# harmless because $srcdir is `.', but things will broke when you
|
||||
# start a VPATH build or use an absolute $srcdir.
|
||||
#
|
||||
# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
|
||||
# iff we strip the leading $srcdir from $ac_aux_dir. That would be:
|
||||
# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
|
||||
# and then we would define $MISSING as
|
||||
# MISSING="\${SHELL} $am_aux_dir/missing"
|
||||
# This will work as long as MISSING is not called from configure, because
|
||||
# unfortunately $(top_srcdir) has no meaning in configure.
|
||||
# However there are other variables, like CC, which are often used in
|
||||
# configure, and could therefore not use this "fixed" $ac_aux_dir.
|
||||
#
|
||||
# Another solution, used here, is to always expand $ac_aux_dir to an
|
||||
# absolute PATH. The drawback is that using absolute paths prevent a
|
||||
# configured tree to be moved without reconfiguration.
|
||||
|
||||
AC_DEFUN([AM_AUX_DIR_EXPAND],
|
||||
[dnl Rely on autoconf to set up CDPATH properly.
|
||||
AC_PREREQ([2.50])dnl
|
||||
# expand $ac_aux_dir to an absolute path
|
||||
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||
])
|
||||
|
||||
|
||||
# Copyright (C) 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 4
|
||||
|
||||
# This was merged into AC_PROG_CC in Autoconf.
|
||||
|
||||
AU_DEFUN([AM_PROG_CC_STDC],
|
||||
[AC_PROG_CC
|
||||
AC_DIAGNOSE([obsolete], [$0:
|
||||
your code should no longer depend upon `am_cv_prog_cc_stdc', but upon
|
||||
`ac_cv_prog_cc_stdc'. Remove this warning and the assignment when
|
||||
you adjust the code. You can also remove the above call to
|
||||
AC_PROG_CC if you already called it elsewhere.])
|
||||
am_cv_prog_cc_stdc=$ac_cv_prog_cc_stdc
|
||||
])
|
||||
AU_DEFUN([fp_PROG_CC_STDC])
|
||||
|
||||
# AM_CONDITIONAL -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 8
|
||||
|
||||
# AM_CONDITIONAL(NAME, SHELL-CONDITION)
|
||||
# -------------------------------------
|
||||
# Define a conditional.
|
||||
AC_DEFUN([AM_CONDITIONAL],
|
||||
[AC_PREREQ(2.52)dnl
|
||||
ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])],
|
||||
[$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
|
||||
AC_SUBST([$1_TRUE])dnl
|
||||
AC_SUBST([$1_FALSE])dnl
|
||||
_AM_SUBST_NOTMAKE([$1_TRUE])dnl
|
||||
_AM_SUBST_NOTMAKE([$1_FALSE])dnl
|
||||
if $2; then
|
||||
$1_TRUE=
|
||||
$1_FALSE='#'
|
||||
else
|
||||
$1_TRUE='#'
|
||||
$1_FALSE=
|
||||
fi
|
||||
AC_CONFIG_COMMANDS_PRE(
|
||||
[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
|
||||
AC_MSG_ERROR([[conditional "$1" was never defined.
|
||||
Usually this means the macro was only invoked conditionally.]])
|
||||
fi])])
|
||||
|
||||
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 9
|
||||
|
||||
# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
|
||||
# written in clear, in which case automake, when reading aclocal.m4,
|
||||
# will think it sees a *use*, and therefore will trigger all it's
|
||||
# C support machinery. Also note that it means that autoscan, seeing
|
||||
# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
|
||||
|
||||
|
||||
# _AM_DEPENDENCIES(NAME)
|
||||
# ----------------------
|
||||
# See how the compiler implements dependency checking.
|
||||
# NAME is "CC", "CXX", "GCJ", or "OBJC".
|
||||
# We try a few techniques and use that to set a single cache variable.
|
||||
#
|
||||
# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
|
||||
# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
|
||||
# dependency, and given that the user is not expected to run this macro,
|
||||
# just rely on AC_PROG_CC.
|
||||
AC_DEFUN([_AM_DEPENDENCIES],
|
||||
[AC_REQUIRE([AM_SET_DEPDIR])dnl
|
||||
AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
|
||||
AC_REQUIRE([AM_MAKE_INCLUDE])dnl
|
||||
AC_REQUIRE([AM_DEP_TRACK])dnl
|
||||
|
||||
ifelse([$1], CC, [depcc="$CC" am_compiler_list=],
|
||||
[$1], CXX, [depcc="$CXX" am_compiler_list=],
|
||||
[$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
|
||||
[$1], UPC, [depcc="$UPC" am_compiler_list=],
|
||||
[$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'],
|
||||
[depcc="$$1" am_compiler_list=])
|
||||
|
||||
AC_CACHE_CHECK([dependency style of $depcc],
|
||||
[am_cv_$1_dependencies_compiler_type],
|
||||
[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
|
||||
# We make a subdir and do the tests there. Otherwise we can end up
|
||||
# making bogus files that we don't know about and never remove. For
|
||||
# instance it was reported that on HP-UX the gcc test will end up
|
||||
# making a dummy file named `D' -- because `-MD' means `put the output
|
||||
# in D'.
|
||||
mkdir conftest.dir
|
||||
# Copy depcomp to subdir because otherwise we won't find it if we're
|
||||
# using a relative directory.
|
||||
cp "$am_depcomp" conftest.dir
|
||||
cd conftest.dir
|
||||
# We will build objects and dependencies in a subdirectory because
|
||||
# it helps to detect inapplicable dependency modes. For instance
|
||||
# both Tru64's cc and ICC support -MD to output dependencies as a
|
||||
# side effect of compilation, but ICC will put the dependencies in
|
||||
# the current directory while Tru64 will put them in the object
|
||||
# directory.
|
||||
mkdir sub
|
||||
|
||||
am_cv_$1_dependencies_compiler_type=none
|
||||
if test "$am_compiler_list" = ""; then
|
||||
am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
|
||||
fi
|
||||
for depmode in $am_compiler_list; do
|
||||
# Setup a source with many dependencies, because some compilers
|
||||
# like to wrap large dependency lists on column 80 (with \), and
|
||||
# we should not choose a depcomp mode which is confused by this.
|
||||
#
|
||||
# We need to recreate these files for each test, as the compiler may
|
||||
# overwrite some of them when testing with obscure command lines.
|
||||
# This happens at least with the AIX C compiler.
|
||||
: > sub/conftest.c
|
||||
for i in 1 2 3 4 5 6; do
|
||||
echo '#include "conftst'$i'.h"' >> sub/conftest.c
|
||||
# Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
|
||||
# Solaris 8's {/usr,}/bin/sh.
|
||||
touch sub/conftst$i.h
|
||||
done
|
||||
echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
|
||||
|
||||
case $depmode in
|
||||
nosideeffect)
|
||||
# after this tag, mechanisms are not by side-effect, so they'll
|
||||
# only be used when explicitly requested
|
||||
if test "x$enable_dependency_tracking" = xyes; then
|
||||
continue
|
||||
else
|
||||
break
|
||||
fi
|
||||
;;
|
||||
none) break ;;
|
||||
esac
|
||||
# We check with `-c' and `-o' for the sake of the "dashmstdout"
|
||||
# mode. It turns out that the SunPro C++ compiler does not properly
|
||||
# handle `-M -o', and we need to detect this.
|
||||
if depmode=$depmode \
|
||||
source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \
|
||||
depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
|
||||
$SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \
|
||||
>/dev/null 2>conftest.err &&
|
||||
grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
|
||||
grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
|
||||
grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 &&
|
||||
${MAKE-make} -s -f confmf > /dev/null 2>&1; then
|
||||
# icc doesn't choke on unknown options, it will just issue warnings
|
||||
# or remarks (even with -Werror). So we grep stderr for any message
|
||||
# that says an option was ignored or not supported.
|
||||
# When given -MP, icc 7.0 and 7.1 complain thusly:
|
||||
# icc: Command line warning: ignoring option '-M'; no argument required
|
||||
# The diagnosis changed in icc 8.0:
|
||||
# icc: Command line remark: option '-MP' not supported
|
||||
if (grep 'ignoring option' conftest.err ||
|
||||
grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
|
||||
am_cv_$1_dependencies_compiler_type=$depmode
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
cd ..
|
||||
rm -rf conftest.dir
|
||||
else
|
||||
am_cv_$1_dependencies_compiler_type=none
|
||||
fi
|
||||
])
|
||||
AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
|
||||
AM_CONDITIONAL([am__fastdep$1], [
|
||||
test "x$enable_dependency_tracking" != xno \
|
||||
&& test "$am_cv_$1_dependencies_compiler_type" = gcc3])
|
||||
])
|
||||
|
||||
|
||||
# AM_SET_DEPDIR
|
||||
# -------------
|
||||
# Choose a directory name for dependency files.
|
||||
# This macro is AC_REQUIREd in _AM_DEPENDENCIES
|
||||
AC_DEFUN([AM_SET_DEPDIR],
|
||||
[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||
AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
|
||||
])
|
||||
|
||||
|
||||
# AM_DEP_TRACK
|
||||
# ------------
|
||||
AC_DEFUN([AM_DEP_TRACK],
|
||||
[AC_ARG_ENABLE(dependency-tracking,
|
||||
[ --disable-dependency-tracking speeds up one-time build
|
||||
--enable-dependency-tracking do not reject slow dependency extractors])
|
||||
if test "x$enable_dependency_tracking" != xno; then
|
||||
am_depcomp="$ac_aux_dir/depcomp"
|
||||
AMDEPBACKSLASH='\'
|
||||
fi
|
||||
AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
|
||||
AC_SUBST([AMDEPBACKSLASH])dnl
|
||||
_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
|
||||
])
|
||||
|
||||
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
#serial 3
|
||||
|
||||
# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
# ------------------------------
|
||||
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[for mf in $CONFIG_FILES; do
|
||||
# Strip MF so we end up with the name of the file.
|
||||
mf=`echo "$mf" | sed -e 's/:.*$//'`
|
||||
# Check whether this is an Automake generated Makefile or not.
|
||||
# We used to match only the files named `Makefile.in', but
|
||||
# some people rename them; so instead we look at the file content.
|
||||
# Grep'ing the first line is not enough: some people post-process
|
||||
# each Makefile.in and add a new line on top of each file to say so.
|
||||
# Grep'ing the whole file is not good either: AIX grep has a line
|
||||
# limit of 2048, but all sed's we know have understand at least 4000.
|
||||
if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
|
||||
dirpart=`AS_DIRNAME("$mf")`
|
||||
else
|
||||
continue
|
||||
fi
|
||||
# Extract the definition of DEPDIR, am__include, and am__quote
|
||||
# from the Makefile without running `make'.
|
||||
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
|
||||
test -z "$DEPDIR" && continue
|
||||
am__include=`sed -n 's/^am__include = //p' < "$mf"`
|
||||
test -z "am__include" && continue
|
||||
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
|
||||
# When using ansi2knr, U may be empty or an underscore; expand it
|
||||
U=`sed -n 's/^U = //p' < "$mf"`
|
||||
# Find all dependency output files, they are included files with
|
||||
# $(DEPDIR) in their names. We invoke sed twice because it is the
|
||||
# simplest approach to changing $(DEPDIR) to its actual value in the
|
||||
# expansion.
|
||||
for file in `sed -n "
|
||||
s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
|
||||
sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
|
||||
# Make sure the directory exists.
|
||||
test -f "$dirpart/$file" && continue
|
||||
fdir=`AS_DIRNAME(["$file"])`
|
||||
AS_MKDIR_P([$dirpart/$fdir])
|
||||
# echo "creating $dirpart/$file"
|
||||
echo '# dummy' > "$dirpart/$file"
|
||||
done
|
||||
done
|
||||
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
|
||||
|
||||
# AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
# -----------------------------
|
||||
# This macro should only be invoked once -- use via AC_REQUIRE.
|
||||
#
|
||||
# This code is only required when automatic dependency tracking
|
||||
# is enabled. FIXME. This creates each `.P' file that we will
|
||||
# need in order to bootstrap the dependency handling code.
|
||||
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[AC_CONFIG_COMMANDS([depfiles],
|
||||
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
|
||||
])
|
||||
|
||||
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 8
|
||||
|
||||
# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS.
|
||||
AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
|
||||
|
||||
# Do all the work for Automake. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
# 2005, 2006, 2008 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 13
|
||||
|
||||
# This macro actually does too much. Some checks are only needed if
|
||||
# your package does certain things. But this isn't really a big deal.
|
||||
|
||||
# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
|
||||
# AM_INIT_AUTOMAKE([OPTIONS])
|
||||
# -----------------------------------------------
|
||||
# The call with PACKAGE and VERSION arguments is the old style
|
||||
# call (pre autoconf-2.50), which is being phased out. PACKAGE
|
||||
# and VERSION should now be passed to AC_INIT and removed from
|
||||
# the call to AM_INIT_AUTOMAKE.
|
||||
# We support both call styles for the transition. After
|
||||
# the next Automake release, Autoconf can make the AC_INIT
|
||||
# arguments mandatory, and then we can depend on a new Autoconf
|
||||
# release and drop the old call support.
|
||||
AC_DEFUN([AM_INIT_AUTOMAKE],
|
||||
[AC_PREREQ([2.60])dnl
|
||||
dnl Autoconf wants to disallow AM_ names. We explicitly allow
|
||||
dnl the ones we care about.
|
||||
m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
|
||||
AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
|
||||
AC_REQUIRE([AC_PROG_INSTALL])dnl
|
||||
if test "`cd $srcdir && pwd`" != "`pwd`"; then
|
||||
# Use -I$(srcdir) only when $(srcdir) != ., so that make's output
|
||||
# is not polluted with repeated "-I."
|
||||
AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
|
||||
# test to see if srcdir already configured
|
||||
if test -f $srcdir/config.status; then
|
||||
AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
|
||||
fi
|
||||
fi
|
||||
|
||||
# test whether we have cygpath
|
||||
if test -z "$CYGPATH_W"; then
|
||||
if (cygpath --version) >/dev/null 2>/dev/null; then
|
||||
CYGPATH_W='cygpath -w'
|
||||
else
|
||||
CYGPATH_W=echo
|
||||
fi
|
||||
fi
|
||||
AC_SUBST([CYGPATH_W])
|
||||
|
||||
# Define the identity of the package.
|
||||
dnl Distinguish between old-style and new-style calls.
|
||||
m4_ifval([$2],
|
||||
[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
|
||||
AC_SUBST([PACKAGE], [$1])dnl
|
||||
AC_SUBST([VERSION], [$2])],
|
||||
[_AM_SET_OPTIONS([$1])dnl
|
||||
dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
|
||||
m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,,
|
||||
[m4_fatal([AC_INIT should be called with package and version arguments])])dnl
|
||||
AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
|
||||
AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
|
||||
|
||||
_AM_IF_OPTION([no-define],,
|
||||
[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
|
||||
AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
|
||||
|
||||
# Some tools Automake needs.
|
||||
AC_REQUIRE([AM_SANITY_CHECK])dnl
|
||||
AC_REQUIRE([AC_ARG_PROGRAM])dnl
|
||||
AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
|
||||
AM_MISSING_PROG(AUTOCONF, autoconf)
|
||||
AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
|
||||
AM_MISSING_PROG(AUTOHEADER, autoheader)
|
||||
AM_MISSING_PROG(MAKEINFO, makeinfo)
|
||||
AM_PROG_INSTALL_SH
|
||||
AM_PROG_INSTALL_STRIP
|
||||
AC_REQUIRE([AM_PROG_MKDIR_P])dnl
|
||||
# We need awk for the "check" target. The system "awk" is bad on
|
||||
# some platforms.
|
||||
AC_REQUIRE([AC_PROG_AWK])dnl
|
||||
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
|
||||
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||
_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
|
||||
[_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
|
||||
[_AM_PROG_TAR([v7])])])
|
||||
_AM_IF_OPTION([no-dependencies],,
|
||||
[AC_PROVIDE_IFELSE([AC_PROG_CC],
|
||||
[_AM_DEPENDENCIES(CC)],
|
||||
[define([AC_PROG_CC],
|
||||
defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
|
||||
AC_PROVIDE_IFELSE([AC_PROG_CXX],
|
||||
[_AM_DEPENDENCIES(CXX)],
|
||||
[define([AC_PROG_CXX],
|
||||
defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
|
||||
AC_PROVIDE_IFELSE([AC_PROG_OBJC],
|
||||
[_AM_DEPENDENCIES(OBJC)],
|
||||
[define([AC_PROG_OBJC],
|
||||
defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl
|
||||
])
|
||||
])
|
||||
|
||||
|
||||
# When config.status generates a header, we must update the stamp-h file.
|
||||
# This file resides in the same directory as the config header
|
||||
# that is generated. The stamp files are numbered to have different names.
|
||||
|
||||
# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
|
||||
# loop where config.status creates the headers, so we can generate
|
||||
# our stamp files there.
|
||||
AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
|
||||
[# Compute $1's index in $config_headers.
|
||||
_am_arg=$1
|
||||
_am_stamp_count=1
|
||||
for _am_header in $config_headers :; do
|
||||
case $_am_header in
|
||||
$_am_arg | $_am_arg:* )
|
||||
break ;;
|
||||
* )
|
||||
_am_stamp_count=`expr $_am_stamp_count + 1` ;;
|
||||
esac
|
||||
done
|
||||
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||
|
||||
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_PROG_INSTALL_SH
|
||||
# ------------------
|
||||
# Define $install_sh.
|
||||
AC_DEFUN([AM_PROG_INSTALL_SH],
|
||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"}
|
||||
AC_SUBST(install_sh)])
|
||||
|
||||
# Copyright (C) 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 2
|
||||
|
||||
# Check whether the underlying file-system supports filenames
|
||||
# with a leading dot. For instance MS-DOS doesn't.
|
||||
AC_DEFUN([AM_SET_LEADING_DOT],
|
||||
[rm -rf .tst 2>/dev/null
|
||||
mkdir .tst 2>/dev/null
|
||||
if test -d .tst; then
|
||||
am__leading_dot=.
|
||||
else
|
||||
am__leading_dot=_
|
||||
fi
|
||||
rmdir .tst 2>/dev/null
|
||||
AC_SUBST([am__leading_dot])])
|
||||
|
||||
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
|
||||
# From Jim Meyering
|
||||
|
||||
# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 4
|
||||
|
||||
AC_DEFUN([AM_MAINTAINER_MODE],
|
||||
[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
|
||||
dnl maintainer-mode is disabled by default
|
||||
AC_ARG_ENABLE(maintainer-mode,
|
||||
[ --enable-maintainer-mode enable make rules and dependencies not useful
|
||||
(and sometimes confusing) to the casual installer],
|
||||
USE_MAINTAINER_MODE=$enableval,
|
||||
USE_MAINTAINER_MODE=no)
|
||||
AC_MSG_RESULT([$USE_MAINTAINER_MODE])
|
||||
AM_CONDITIONAL(MAINTAINER_MODE, [test $USE_MAINTAINER_MODE = yes])
|
||||
MAINT=$MAINTAINER_MODE_TRUE
|
||||
AC_SUBST(MAINT)dnl
|
||||
]
|
||||
)
|
||||
|
||||
AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE])
|
||||
|
||||
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 3
|
||||
|
||||
# AM_MAKE_INCLUDE()
|
||||
# -----------------
|
||||
# Check to see how make treats includes.
|
||||
AC_DEFUN([AM_MAKE_INCLUDE],
|
||||
[am_make=${MAKE-make}
|
||||
cat > confinc << 'END'
|
||||
am__doit:
|
||||
@echo done
|
||||
.PHONY: am__doit
|
||||
END
|
||||
# If we don't find an include directive, just comment out the code.
|
||||
AC_MSG_CHECKING([for style of include used by $am_make])
|
||||
am__include="#"
|
||||
am__quote=
|
||||
_am_result=none
|
||||
# First try GNU make style include.
|
||||
echo "include confinc" > confmf
|
||||
# We grep out `Entering directory' and `Leaving directory'
|
||||
# messages which can occur if `w' ends up in MAKEFLAGS.
|
||||
# In particular we don't look at `^make:' because GNU make might
|
||||
# be invoked under some other name (usually "gmake"), in which
|
||||
# case it prints its new name instead of `make'.
|
||||
if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then
|
||||
am__include=include
|
||||
am__quote=
|
||||
_am_result=GNU
|
||||
fi
|
||||
# Now try BSD make style include.
|
||||
if test "$am__include" = "#"; then
|
||||
echo '.include "confinc"' > confmf
|
||||
if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then
|
||||
am__include=.include
|
||||
am__quote="\""
|
||||
_am_result=BSD
|
||||
fi
|
||||
fi
|
||||
AC_SUBST([am__include])
|
||||
AC_SUBST([am__quote])
|
||||
AC_MSG_RESULT([$_am_result])
|
||||
rm -f confinc confmf
|
||||
])
|
||||
|
||||
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 5
|
||||
|
||||
# AM_MISSING_PROG(NAME, PROGRAM)
|
||||
# ------------------------------
|
||||
AC_DEFUN([AM_MISSING_PROG],
|
||||
[AC_REQUIRE([AM_MISSING_HAS_RUN])
|
||||
$1=${$1-"${am_missing_run}$2"}
|
||||
AC_SUBST($1)])
|
||||
|
||||
|
||||
# AM_MISSING_HAS_RUN
|
||||
# ------------------
|
||||
# Define MISSING if not defined so far and test if it supports --run.
|
||||
# If it does, set am_missing_run to use it, otherwise, to nothing.
|
||||
AC_DEFUN([AM_MISSING_HAS_RUN],
|
||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
AC_REQUIRE_AUX_FILE([missing])dnl
|
||||
test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing"
|
||||
# Use eval to expand $SHELL
|
||||
if eval "$MISSING --run true"; then
|
||||
am_missing_run="$MISSING --run "
|
||||
else
|
||||
am_missing_run=
|
||||
AC_MSG_WARN([`missing' script is too old or missing])
|
||||
fi
|
||||
])
|
||||
|
||||
# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_PROG_MKDIR_P
|
||||
# ---------------
|
||||
# Check for `mkdir -p'.
|
||||
AC_DEFUN([AM_PROG_MKDIR_P],
|
||||
[AC_PREREQ([2.60])dnl
|
||||
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
||||
dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
|
||||
dnl while keeping a definition of mkdir_p for backward compatibility.
|
||||
dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
|
||||
dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
|
||||
dnl Makefile.ins that do not define MKDIR_P, so we do our own
|
||||
dnl adjustment using top_builddir (which is defined more often than
|
||||
dnl MKDIR_P).
|
||||
AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
|
||||
case $mkdir_p in
|
||||
[[\\/$]]* | ?:[[\\/]]*) ;;
|
||||
*/*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
|
||||
esac
|
||||
])
|
||||
|
||||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 3
|
||||
|
||||
# _AM_MANGLE_OPTION(NAME)
|
||||
# -----------------------
|
||||
AC_DEFUN([_AM_MANGLE_OPTION],
|
||||
[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
|
||||
|
||||
# _AM_SET_OPTION(NAME)
|
||||
# ------------------------------
|
||||
# Set option NAME. Presently that only means defining a flag for this option.
|
||||
AC_DEFUN([_AM_SET_OPTION],
|
||||
[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
|
||||
|
||||
# _AM_SET_OPTIONS(OPTIONS)
|
||||
# ----------------------------------
|
||||
# OPTIONS is a space-separated list of Automake options.
|
||||
AC_DEFUN([_AM_SET_OPTIONS],
|
||||
[AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
|
||||
|
||||
# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
|
||||
# -------------------------------------------
|
||||
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
|
||||
AC_DEFUN([_AM_IF_OPTION],
|
||||
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
||||
|
||||
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 4
|
||||
|
||||
# AM_SANITY_CHECK
|
||||
# ---------------
|
||||
AC_DEFUN([AM_SANITY_CHECK],
|
||||
[AC_MSG_CHECKING([whether build environment is sane])
|
||||
# Just in case
|
||||
sleep 1
|
||||
echo timestamp > conftest.file
|
||||
# Do `set' in a subshell so we don't clobber the current shell's
|
||||
# arguments. Must try -L first in case configure is actually a
|
||||
# symlink; some systems play weird games with the mod time of symlinks
|
||||
# (eg FreeBSD returns the mod time of the symlink's containing
|
||||
# directory).
|
||||
if (
|
||||
set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null`
|
||||
if test "$[*]" = "X"; then
|
||||
# -L didn't work.
|
||||
set X `ls -t $srcdir/configure conftest.file`
|
||||
fi
|
||||
rm -f conftest.file
|
||||
if test "$[*]" != "X $srcdir/configure conftest.file" \
|
||||
&& test "$[*]" != "X conftest.file $srcdir/configure"; then
|
||||
|
||||
# If neither matched, then we have a broken ls. This can happen
|
||||
# if, for instance, CONFIG_SHELL is bash and it inherits a
|
||||
# broken ls alias from the environment. This has actually
|
||||
# happened. Such a system could not be considered "sane".
|
||||
AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
|
||||
alias in your environment])
|
||||
fi
|
||||
|
||||
test "$[2]" = conftest.file
|
||||
)
|
||||
then
|
||||
# Ok.
|
||||
:
|
||||
else
|
||||
AC_MSG_ERROR([newly created file is older than distributed files!
|
||||
Check your system clock])
|
||||
fi
|
||||
AC_MSG_RESULT(yes)])
|
||||
|
||||
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_PROG_INSTALL_STRIP
|
||||
# ---------------------
|
||||
# One issue with vendor `install' (even GNU) is that you can't
|
||||
# specify the program used to strip binaries. This is especially
|
||||
# annoying in cross-compiling environments, where the build's strip
|
||||
# is unlikely to handle the host's binaries.
|
||||
# Fortunately install-sh will honor a STRIPPROG variable, so we
|
||||
# always use install-sh in `make install-strip', and initialize
|
||||
# STRIPPROG with the value of the STRIP variable (set by the user).
|
||||
AC_DEFUN([AM_PROG_INSTALL_STRIP],
|
||||
[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
|
||||
# Installed binaries are usually stripped using `strip' when the user
|
||||
# run `make install-strip'. However `strip' might not be the right
|
||||
# tool to use in cross-compilation environments, therefore Automake
|
||||
# will honor the `STRIP' environment variable to overrule this program.
|
||||
dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
|
||||
if test "$cross_compiling" != no; then
|
||||
AC_CHECK_TOOL([STRIP], [strip], :)
|
||||
fi
|
||||
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||
|
||||
# Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# _AM_SUBST_NOTMAKE(VARIABLE)
|
||||
# ---------------------------
|
||||
# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
|
||||
# This macro is traced by Automake.
|
||||
AC_DEFUN([_AM_SUBST_NOTMAKE])
|
||||
|
||||
# Check how to create a tarball. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 2
|
||||
|
||||
# _AM_PROG_TAR(FORMAT)
|
||||
# --------------------
|
||||
# Check how to create a tarball in format FORMAT.
|
||||
# FORMAT should be one of `v7', `ustar', or `pax'.
|
||||
#
|
||||
# Substitute a variable $(am__tar) that is a command
|
||||
# writing to stdout a FORMAT-tarball containing the directory
|
||||
# $tardir.
|
||||
# tardir=directory && $(am__tar) > result.tar
|
||||
#
|
||||
# Substitute a variable $(am__untar) that extract such
|
||||
# a tarball read from stdin.
|
||||
# $(am__untar) < result.tar
|
||||
AC_DEFUN([_AM_PROG_TAR],
|
||||
[# Always define AMTAR for backward compatibility.
|
||||
AM_MISSING_PROG([AMTAR], [tar])
|
||||
m4_if([$1], [v7],
|
||||
[am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
|
||||
[m4_case([$1], [ustar],, [pax],,
|
||||
[m4_fatal([Unknown tar format])])
|
||||
AC_MSG_CHECKING([how to create a $1 tar archive])
|
||||
# Loop over all known methods to create a tar archive until one works.
|
||||
_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
|
||||
_am_tools=${am_cv_prog_tar_$1-$_am_tools}
|
||||
# Do not fold the above two line into one, because Tru64 sh and
|
||||
# Solaris sh will not grok spaces in the rhs of `-'.
|
||||
for _am_tool in $_am_tools
|
||||
do
|
||||
case $_am_tool in
|
||||
gnutar)
|
||||
for _am_tar in tar gnutar gtar;
|
||||
do
|
||||
AM_RUN_LOG([$_am_tar --version]) && break
|
||||
done
|
||||
am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
|
||||
am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
|
||||
am__untar="$_am_tar -xf -"
|
||||
;;
|
||||
plaintar)
|
||||
# Must skip GNU tar: if it does not support --format= it doesn't create
|
||||
# ustar tarball either.
|
||||
(tar --version) >/dev/null 2>&1 && continue
|
||||
am__tar='tar chf - "$$tardir"'
|
||||
am__tar_='tar chf - "$tardir"'
|
||||
am__untar='tar xf -'
|
||||
;;
|
||||
pax)
|
||||
am__tar='pax -L -x $1 -w "$$tardir"'
|
||||
am__tar_='pax -L -x $1 -w "$tardir"'
|
||||
am__untar='pax -r'
|
||||
;;
|
||||
cpio)
|
||||
am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
|
||||
am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
|
||||
am__untar='cpio -i -H $1 -d'
|
||||
;;
|
||||
none)
|
||||
am__tar=false
|
||||
am__tar_=false
|
||||
am__untar=false
|
||||
;;
|
||||
esac
|
||||
|
||||
# If the value was cached, stop now. We just wanted to have am__tar
|
||||
# and am__untar set.
|
||||
test -n "${am_cv_prog_tar_$1}" && break
|
||||
|
||||
# tar/untar a dummy directory, and stop if the command works
|
||||
rm -rf conftest.dir
|
||||
mkdir conftest.dir
|
||||
echo GrepMe > conftest.dir/file
|
||||
AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
|
||||
rm -rf conftest.dir
|
||||
if test -s conftest.tar; then
|
||||
AM_RUN_LOG([$am__untar <conftest.tar])
|
||||
grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
|
||||
fi
|
||||
done
|
||||
rm -rf conftest.dir
|
||||
|
||||
AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
|
||||
AC_MSG_RESULT([$am_cv_prog_tar_$1])])
|
||||
AC_SUBST([am__tar])
|
||||
AC_SUBST([am__untar])
|
||||
]) # _AM_PROG_TAR
|
||||
|
159
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/autogen.sh
Executable file
159
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/autogen.sh
Executable file
@ -0,0 +1,159 @@
|
||||
#!/bin/sh
|
||||
# Run this to generate all the initial makefiles, etc.
|
||||
|
||||
srcdir=`dirname $0`
|
||||
test -z "$srcdir" && srcdir=.
|
||||
|
||||
DIE=0
|
||||
|
||||
if [ -n "$GNOME2_DIR" ]; then
|
||||
ACLOCAL_FLAGS="-I $GNOME2_DIR/share/aclocal $ACLOCAL_FLAGS"
|
||||
LD_LIBRARY_PATH="$GNOME2_DIR/lib:$LD_LIBRARY_PATH"
|
||||
PATH="$GNOME2_DIR/bin:$PATH"
|
||||
export PATH
|
||||
export LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
(test -f $srcdir/configure.ac) || {
|
||||
echo -n "**Error**: Directory "\`$srcdir\'" does not look like the"
|
||||
echo " top-level package directory"
|
||||
exit 1
|
||||
}
|
||||
|
||||
(autoconf --version) < /dev/null > /dev/null 2>&1 || {
|
||||
echo
|
||||
echo "**Error**: You must have \`autoconf' installed."
|
||||
echo "Download the appropriate package for your distribution,"
|
||||
echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
|
||||
DIE=1
|
||||
}
|
||||
|
||||
(grep "^IT_PROG_INTLTOOL" $srcdir/configure.ac >/dev/null) && {
|
||||
(intltoolize --version) < /dev/null > /dev/null 2>&1 || {
|
||||
echo
|
||||
echo "**Error**: You must have \`intltool' installed."
|
||||
echo "You can get it from:"
|
||||
echo " ftp://ftp.gnome.org/pub/GNOME/"
|
||||
DIE=1
|
||||
}
|
||||
}
|
||||
|
||||
(grep "^AM_PROG_XML_I18N_TOOLS" $srcdir/configure.ac >/dev/null) && {
|
||||
(xml-i18n-toolize --version) < /dev/null > /dev/null 2>&1 || {
|
||||
echo
|
||||
echo "**Error**: You must have \`xml-i18n-toolize' installed."
|
||||
echo "You can get it from:"
|
||||
echo " ftp://ftp.gnome.org/pub/GNOME/"
|
||||
DIE=1
|
||||
}
|
||||
}
|
||||
|
||||
(grep "^AM_PROG_LIBTOOL" $srcdir/configure.ac >/dev/null) && {
|
||||
(libtool --version) < /dev/null > /dev/null 2>&1 || {
|
||||
echo
|
||||
echo "**Error**: You must have \`libtool' installed."
|
||||
echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
|
||||
DIE=1
|
||||
}
|
||||
}
|
||||
|
||||
(grep "^AM_GLIB_GNU_GETTEXT" $srcdir/configure.ac >/dev/null) && {
|
||||
(grep "sed.*POTFILES" $srcdir/configure.ac) > /dev/null || \
|
||||
(glib-gettextize --version) < /dev/null > /dev/null 2>&1 || {
|
||||
echo
|
||||
echo "**Error**: You must have \`glib' installed."
|
||||
echo "You can get it from: ftp://ftp.gtk.org/pub/gtk"
|
||||
DIE=1
|
||||
}
|
||||
}
|
||||
|
||||
(automake --version) < /dev/null > /dev/null 2>&1 || {
|
||||
echo
|
||||
echo "**Error**: You must have \`automake' installed."
|
||||
echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
|
||||
DIE=1
|
||||
NO_AUTOMAKE=yes
|
||||
}
|
||||
|
||||
|
||||
# if no automake, don't bother testing for aclocal
|
||||
test -n "$NO_AUTOMAKE" || (aclocal --version) < /dev/null > /dev/null 2>&1 || {
|
||||
echo
|
||||
echo "**Error**: Missing \`aclocal'. The version of \`automake'"
|
||||
echo "installed doesn't appear recent enough."
|
||||
echo "You can get automake from ftp://ftp.gnu.org/pub/gnu/"
|
||||
DIE=1
|
||||
}
|
||||
|
||||
if test "$DIE" -eq 1; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test -z "$*"; then
|
||||
echo "**Warning**: I am going to run \`configure' with no arguments."
|
||||
echo "If you wish to pass any to it, please specify them on the"
|
||||
echo \`$0\'" command line."
|
||||
echo
|
||||
fi
|
||||
|
||||
case $CC in
|
||||
xlc )
|
||||
am_opt=--include-deps;;
|
||||
esac
|
||||
|
||||
for coin in `find $srcdir -path $srcdir/CVS -prune -o -name configure.ac -print`
|
||||
do
|
||||
dr=`dirname $coin`
|
||||
if test -f $dr/NO-AUTO-GEN; then
|
||||
echo skipping $dr -- flagged as no auto-gen
|
||||
else
|
||||
echo processing $dr
|
||||
( cd $dr
|
||||
|
||||
aclocalinclude="$ACLOCAL_FLAGS"
|
||||
|
||||
if grep "^AM_GLIB_GNU_GETTEXT" configure.ac >/dev/null; then
|
||||
echo "Creating $dr/aclocal.m4 ..."
|
||||
test -r $dr/aclocal.m4 || touch $dr/aclocal.m4
|
||||
echo "Running glib-gettextize... Ignore non-fatal messages."
|
||||
echo "no" | glib-gettextize --force --copy
|
||||
echo "Making $dr/aclocal.m4 writable ..."
|
||||
test -r $dr/aclocal.m4 && chmod u+w $dr/aclocal.m4
|
||||
fi
|
||||
if grep "^IT_PROG_INTLTOOL" configure.ac >/dev/null; then
|
||||
echo "Running intltoolize..."
|
||||
intltoolize --copy --force --automake
|
||||
fi
|
||||
if grep "^AM_PROG_XML_I18N_TOOLS" configure.ac >/dev/null; then
|
||||
echo "Running xml-i18n-toolize..."
|
||||
xml-i18n-toolize --copy --force --automake
|
||||
fi
|
||||
if grep "^AM_PROG_LIBTOOL" configure.ac >/dev/null; then
|
||||
if test -z "$NO_LIBTOOLIZE" ; then
|
||||
echo "Running libtoolize..."
|
||||
libtoolize --force --copy
|
||||
fi
|
||||
fi
|
||||
echo "Running aclocal $aclocalinclude ..."
|
||||
aclocal $aclocalinclude
|
||||
if grep "^AM_CONFIG_HEADER" configure.ac >/dev/null; then
|
||||
echo "Running autoheader..."
|
||||
autoheader
|
||||
fi
|
||||
echo "Running automake --gnu $am_opt ..."
|
||||
automake --add-missing --gnu $am_opt
|
||||
echo "Running autoconf ..."
|
||||
autoconf
|
||||
)
|
||||
fi
|
||||
done
|
||||
|
||||
conf_flags="--enable-maintainer-mode"
|
||||
|
||||
if test x$NOCONFIGURE = x; then
|
||||
echo Running $srcdir/configure $conf_flags "$@" ...
|
||||
$srcdir/configure $conf_flags "$@" \
|
||||
&& echo Now type \`make\' to compile. || exit 1
|
||||
else
|
||||
echo Skipping configure process.
|
||||
fi
|
1526
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/config.guess
vendored
Executable file
1526
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/config.guess
vendored
Executable file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,26 @@
|
||||
/* config.h. Generated from config.h.in by configure. */
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "mgiza"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "mgiza"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "mgiza 1.0"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "mgiza"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.0"
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "1.0"
|
@ -0,0 +1,25 @@
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Version number of package */
|
||||
#undef VERSION
|
1658
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/config.sub
vendored
Executable file
1658
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/config.sub
vendored
Executable file
File diff suppressed because it is too large
Load Diff
6897
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/configure
vendored
Executable file
6897
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/configure
vendored
Executable file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,28 @@
|
||||
dnl Process this file with autoconf to produce a configure script.
|
||||
dnl Created by Anjuta application wizard.
|
||||
|
||||
AC_INIT(mgiza, 1.0)
|
||||
|
||||
AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
|
||||
AM_CONFIG_HEADER(config.h)
|
||||
AM_MAINTAINER_MODE
|
||||
|
||||
AC_ISC_POSIX
|
||||
AC_PROG_CXX
|
||||
AM_PROG_CC_STDC
|
||||
AC_HEADER_STDC
|
||||
|
||||
|
||||
|
||||
AC_PROG_RANLIB
|
||||
AM_PROG_LIBTOOL
|
||||
AC_PROG_LIBTOOL
|
||||
|
||||
|
||||
|
||||
|
||||
AC_OUTPUT([
|
||||
Makefile
|
||||
src/Makefile
|
||||
src/mkcls/Makefile
|
||||
])
|
519
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/install-sh
Executable file
519
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/install-sh
Executable file
@ -0,0 +1,519 @@
|
||||
#!/bin/sh
|
||||
# install - install a program, script, or datafile
|
||||
|
||||
scriptversion=2006-12-25.00
|
||||
|
||||
# This originates from X11R5 (mit/util/scripts/install.sh), which was
|
||||
# later released in X11R6 (xc/config/util/install.sh) with the
|
||||
# following copyright and license.
|
||||
#
|
||||
# Copyright (C) 1994 X Consortium
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
|
||||
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# Except as contained in this notice, the name of the X Consortium shall not
|
||||
# be used in advertising or otherwise to promote the sale, use or other deal-
|
||||
# ings in this Software without prior written authorization from the X Consor-
|
||||
# tium.
|
||||
#
|
||||
#
|
||||
# FSF changes to this file are in the public domain.
|
||||
#
|
||||
# Calling this script install-sh is preferred over install.sh, to prevent
|
||||
# `make' implicit rules from creating a file called install from it
|
||||
# when there is no Makefile.
|
||||
#
|
||||
# This script is compatible with the BSD install script, but was written
|
||||
# from scratch.
|
||||
|
||||
nl='
|
||||
'
|
||||
IFS=" "" $nl"
|
||||
|
||||
# set DOITPROG to echo to test this script
|
||||
|
||||
# Don't use :- since 4.3BSD and earlier shells don't like it.
|
||||
doit=${DOITPROG-}
|
||||
if test -z "$doit"; then
|
||||
doit_exec=exec
|
||||
else
|
||||
doit_exec=$doit
|
||||
fi
|
||||
|
||||
# Put in absolute file names if you don't have them in your path;
|
||||
# or use environment vars.
|
||||
|
||||
chgrpprog=${CHGRPPROG-chgrp}
|
||||
chmodprog=${CHMODPROG-chmod}
|
||||
chownprog=${CHOWNPROG-chown}
|
||||
cmpprog=${CMPPROG-cmp}
|
||||
cpprog=${CPPROG-cp}
|
||||
mkdirprog=${MKDIRPROG-mkdir}
|
||||
mvprog=${MVPROG-mv}
|
||||
rmprog=${RMPROG-rm}
|
||||
stripprog=${STRIPPROG-strip}
|
||||
|
||||
posix_glob='?'
|
||||
initialize_posix_glob='
|
||||
test "$posix_glob" != "?" || {
|
||||
if (set -f) 2>/dev/null; then
|
||||
posix_glob=
|
||||
else
|
||||
posix_glob=:
|
||||
fi
|
||||
}
|
||||
'
|
||||
|
||||
posix_mkdir=
|
||||
|
||||
# Desired mode of installed file.
|
||||
mode=0755
|
||||
|
||||
chgrpcmd=
|
||||
chmodcmd=$chmodprog
|
||||
chowncmd=
|
||||
mvcmd=$mvprog
|
||||
rmcmd="$rmprog -f"
|
||||
stripcmd=
|
||||
|
||||
src=
|
||||
dst=
|
||||
dir_arg=
|
||||
dst_arg=
|
||||
|
||||
copy_on_change=false
|
||||
no_target_directory=
|
||||
|
||||
usage="\
|
||||
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
|
||||
or: $0 [OPTION]... SRCFILES... DIRECTORY
|
||||
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
|
||||
or: $0 [OPTION]... -d DIRECTORIES...
|
||||
|
||||
In the 1st form, copy SRCFILE to DSTFILE.
|
||||
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
|
||||
In the 4th, create DIRECTORIES.
|
||||
|
||||
Options:
|
||||
--help display this help and exit.
|
||||
--version display version info and exit.
|
||||
|
||||
-c (ignored)
|
||||
-C install only if different (preserve the last data modification time)
|
||||
-d create directories instead of installing files.
|
||||
-g GROUP $chgrpprog installed files to GROUP.
|
||||
-m MODE $chmodprog installed files to MODE.
|
||||
-o USER $chownprog installed files to USER.
|
||||
-s $stripprog installed files.
|
||||
-t DIRECTORY install into DIRECTORY.
|
||||
-T report an error if DSTFILE is a directory.
|
||||
|
||||
Environment variables override the default commands:
|
||||
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
|
||||
RMPROG STRIPPROG
|
||||
"
|
||||
|
||||
while test $# -ne 0; do
|
||||
case $1 in
|
||||
-c) ;;
|
||||
|
||||
-C) copy_on_change=true;;
|
||||
|
||||
-d) dir_arg=true;;
|
||||
|
||||
-g) chgrpcmd="$chgrpprog $2"
|
||||
shift;;
|
||||
|
||||
--help) echo "$usage"; exit $?;;
|
||||
|
||||
-m) mode=$2
|
||||
case $mode in
|
||||
*' '* | *' '* | *'
|
||||
'* | *'*'* | *'?'* | *'['*)
|
||||
echo "$0: invalid mode: $mode" >&2
|
||||
exit 1;;
|
||||
esac
|
||||
shift;;
|
||||
|
||||
-o) chowncmd="$chownprog $2"
|
||||
shift;;
|
||||
|
||||
-s) stripcmd=$stripprog;;
|
||||
|
||||
-t) dst_arg=$2
|
||||
shift;;
|
||||
|
||||
-T) no_target_directory=true;;
|
||||
|
||||
--version) echo "$0 $scriptversion"; exit $?;;
|
||||
|
||||
--) shift
|
||||
break;;
|
||||
|
||||
-*) echo "$0: invalid option: $1" >&2
|
||||
exit 1;;
|
||||
|
||||
*) break;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
|
||||
# When -d is used, all remaining arguments are directories to create.
|
||||
# When -t is used, the destination is already specified.
|
||||
# Otherwise, the last argument is the destination. Remove it from $@.
|
||||
for arg
|
||||
do
|
||||
if test -n "$dst_arg"; then
|
||||
# $@ is not empty: it contains at least $arg.
|
||||
set fnord "$@" "$dst_arg"
|
||||
shift # fnord
|
||||
fi
|
||||
shift # arg
|
||||
dst_arg=$arg
|
||||
done
|
||||
fi
|
||||
|
||||
if test $# -eq 0; then
|
||||
if test -z "$dir_arg"; then
|
||||
echo "$0: no input file specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
# It's OK to call `install-sh -d' without argument.
|
||||
# This can happen when creating conditional directories.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if test -z "$dir_arg"; then
|
||||
trap '(exit $?); exit' 1 2 13 15
|
||||
|
||||
# Set umask so as not to create temps with too-generous modes.
|
||||
# However, 'strip' requires both read and write access to temps.
|
||||
case $mode in
|
||||
# Optimize common cases.
|
||||
*644) cp_umask=133;;
|
||||
*755) cp_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw='% 200'
|
||||
fi
|
||||
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
|
||||
*)
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw=,u+rw
|
||||
fi
|
||||
cp_umask=$mode$u_plus_rw;;
|
||||
esac
|
||||
fi
|
||||
|
||||
for src
|
||||
do
|
||||
# Protect names starting with `-'.
|
||||
case $src in
|
||||
-*) src=./$src;;
|
||||
esac
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
dst=$src
|
||||
dstdir=$dst
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
else
|
||||
|
||||
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
|
||||
# might cause directories to be created, which would be especially bad
|
||||
# if $src (and thus $dsttmp) contains '*'.
|
||||
if test ! -f "$src" && test ! -d "$src"; then
|
||||
echo "$0: $src does not exist." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test -z "$dst_arg"; then
|
||||
echo "$0: no destination specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
dst=$dst_arg
|
||||
# Protect names starting with `-'.
|
||||
case $dst in
|
||||
-*) dst=./$dst;;
|
||||
esac
|
||||
|
||||
# If destination is a directory, append the input filename; won't work
|
||||
# if double slashes aren't ignored.
|
||||
if test -d "$dst"; then
|
||||
if test -n "$no_target_directory"; then
|
||||
echo "$0: $dst_arg: Is a directory" >&2
|
||||
exit 1
|
||||
fi
|
||||
dstdir=$dst
|
||||
dst=$dstdir/`basename "$src"`
|
||||
dstdir_status=0
|
||||
else
|
||||
# Prefer dirname, but fall back on a substitute if dirname fails.
|
||||
dstdir=`
|
||||
(dirname "$dst") 2>/dev/null ||
|
||||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
|
||||
X"$dst" : 'X\(//\)[^/]' \| \
|
||||
X"$dst" : 'X\(//\)$' \| \
|
||||
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
|
||||
echo X"$dst" |
|
||||
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\/\)[^/].*/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\/\)$/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\).*/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
s/.*/./; q'
|
||||
`
|
||||
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
obsolete_mkdir_used=false
|
||||
|
||||
if test $dstdir_status != 0; then
|
||||
case $posix_mkdir in
|
||||
'')
|
||||
# Create intermediate dirs using mode 755 as modified by the umask.
|
||||
# This is like FreeBSD 'install' as of 1997-10-28.
|
||||
umask=`umask`
|
||||
case $stripcmd.$umask in
|
||||
# Optimize common cases.
|
||||
*[2367][2367]) mkdir_umask=$umask;;
|
||||
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
mkdir_umask=`expr $umask + 22 \
|
||||
- $umask % 100 % 40 + $umask % 20 \
|
||||
- $umask % 10 % 4 + $umask % 2
|
||||
`;;
|
||||
*) mkdir_umask=$umask,go-w;;
|
||||
esac
|
||||
|
||||
# With -d, create the new directory with the user-specified mode.
|
||||
# Otherwise, rely on $mkdir_umask.
|
||||
if test -n "$dir_arg"; then
|
||||
mkdir_mode=-m$mode
|
||||
else
|
||||
mkdir_mode=
|
||||
fi
|
||||
|
||||
posix_mkdir=false
|
||||
case $umask in
|
||||
*[123567][0-7][0-7])
|
||||
# POSIX mkdir -p sets u+wx bits regardless of umask, which
|
||||
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
|
||||
;;
|
||||
*)
|
||||
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
|
||||
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
|
||||
|
||||
if (umask $mkdir_umask &&
|
||||
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
|
||||
then
|
||||
if test -z "$dir_arg" || {
|
||||
# Check for POSIX incompatibilities with -m.
|
||||
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
|
||||
# other-writeable bit of parent directory when it shouldn't.
|
||||
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
|
||||
ls_ld_tmpdir=`ls -ld "$tmpdir"`
|
||||
case $ls_ld_tmpdir in
|
||||
d????-?r-*) different_mode=700;;
|
||||
d????-?--*) different_mode=755;;
|
||||
*) false;;
|
||||
esac &&
|
||||
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
|
||||
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
|
||||
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
|
||||
}
|
||||
}
|
||||
then posix_mkdir=:
|
||||
fi
|
||||
rmdir "$tmpdir/d" "$tmpdir"
|
||||
else
|
||||
# Remove any dirs left behind by ancient mkdir implementations.
|
||||
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
|
||||
fi
|
||||
trap '' 0;;
|
||||
esac;;
|
||||
esac
|
||||
|
||||
if
|
||||
$posix_mkdir && (
|
||||
umask $mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
|
||||
)
|
||||
then :
|
||||
else
|
||||
|
||||
# The umask is ridiculous, or mkdir does not conform to POSIX,
|
||||
# or it failed possibly due to a race condition. Create the
|
||||
# directory the slow way, step by step, checking for races as we go.
|
||||
|
||||
case $dstdir in
|
||||
/*) prefix='/';;
|
||||
-*) prefix='./';;
|
||||
*) prefix='';;
|
||||
esac
|
||||
|
||||
eval "$initialize_posix_glob"
|
||||
|
||||
oIFS=$IFS
|
||||
IFS=/
|
||||
$posix_glob set -f
|
||||
set fnord $dstdir
|
||||
shift
|
||||
$posix_glob set +f
|
||||
IFS=$oIFS
|
||||
|
||||
prefixes=
|
||||
|
||||
for d
|
||||
do
|
||||
test -z "$d" && continue
|
||||
|
||||
prefix=$prefix$d
|
||||
if test -d "$prefix"; then
|
||||
prefixes=
|
||||
else
|
||||
if $posix_mkdir; then
|
||||
(umask=$mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
|
||||
# Don't fail if two instances are running concurrently.
|
||||
test -d "$prefix" || exit 1
|
||||
else
|
||||
case $prefix in
|
||||
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
|
||||
*) qprefix=$prefix;;
|
||||
esac
|
||||
prefixes="$prefixes '$qprefix'"
|
||||
fi
|
||||
fi
|
||||
prefix=$prefix/
|
||||
done
|
||||
|
||||
if test -n "$prefixes"; then
|
||||
# Don't fail if two instances are running concurrently.
|
||||
(umask $mkdir_umask &&
|
||||
eval "\$doit_exec \$mkdirprog $prefixes") ||
|
||||
test -d "$dstdir" || exit 1
|
||||
obsolete_mkdir_used=true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
|
||||
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
|
||||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
|
||||
else
|
||||
|
||||
# Make a couple of temp file names in the proper directory.
|
||||
dsttmp=$dstdir/_inst.$$_
|
||||
rmtmp=$dstdir/_rm.$$_
|
||||
|
||||
# Trap to clean up those temp files at exit.
|
||||
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
|
||||
|
||||
# Copy the file name to the temp name.
|
||||
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
|
||||
|
||||
# and set any options; do chmod last to preserve setuid bits.
|
||||
#
|
||||
# If any of these fail, we abort the whole thing. If we want to
|
||||
# ignore errors from any of these, just make sure not to ignore
|
||||
# errors from the above "$doit $cpprog $src $dsttmp" command.
|
||||
#
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
|
||||
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
|
||||
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
|
||||
|
||||
# If -C, don't bother to copy if it wouldn't change the file.
|
||||
if $copy_on_change &&
|
||||
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
|
||||
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
|
||||
|
||||
eval "$initialize_posix_glob" &&
|
||||
$posix_glob set -f &&
|
||||
set X $old && old=:$2:$4:$5:$6 &&
|
||||
set X $new && new=:$2:$4:$5:$6 &&
|
||||
$posix_glob set +f &&
|
||||
|
||||
test "$old" = "$new" &&
|
||||
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
|
||||
then
|
||||
rm -f "$dsttmp"
|
||||
else
|
||||
# Rename the file to the real destination.
|
||||
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
|
||||
|
||||
# The rename failed, perhaps because mv can't rename something else
|
||||
# to itself, or perhaps because mv is so ancient that it does not
|
||||
# support -f.
|
||||
{
|
||||
# Now remove or move aside any old file at destination location.
|
||||
# We try this two ways since rm can't unlink itself on some
|
||||
# systems and the destination file might be busy for other
|
||||
# reasons. In this case, the final cleanup might fail but the new
|
||||
# file should still install successfully.
|
||||
{
|
||||
test ! -f "$dst" ||
|
||||
$doit $rmcmd -f "$dst" 2>/dev/null ||
|
||||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
|
||||
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
|
||||
} ||
|
||||
{ echo "$0: cannot unlink or rename $dst" >&2
|
||||
(exit 1); exit 1
|
||||
}
|
||||
} &&
|
||||
|
||||
# Now rename the file to the real destination.
|
||||
$doit $mvcmd "$dsttmp" "$dst"
|
||||
}
|
||||
fi || exit 1
|
||||
|
||||
trap '' 0
|
||||
fi
|
||||
done
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-end: "$"
|
||||
# End:
|
8412
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/ltmain.sh
Normal file
8412
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/ltmain.sh
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,37 @@
|
||||
<?xml version="1.0"?>
|
||||
<anjuta>
|
||||
<plugin name="GBF Project Manager"
|
||||
url="http://anjuta.org/plugins/"
|
||||
mandatory="yes">
|
||||
<require group="Anjuta Plugin"
|
||||
attribute="Interfaces"
|
||||
value="IAnjutaProjectManager"/>
|
||||
<require group="Project"
|
||||
attribute="Supported-Project-Types"
|
||||
value="automake"/>
|
||||
</plugin>
|
||||
<plugin name="Make Build System"
|
||||
url="http://anjuta.org/plugins/"
|
||||
mandatory="yes">
|
||||
<require group="Anjuta Plugin"
|
||||
attribute="Interfaces"
|
||||
value="IAnjutaBuildable"/>
|
||||
<require group="Build"
|
||||
attribute="Supported-Build-Types"
|
||||
value="make"/>
|
||||
</plugin>
|
||||
<plugin name="Task Manager"
|
||||
url="http://anjuta.org/plugins/"
|
||||
mandatory="no">
|
||||
<require group="Anjuta Plugin"
|
||||
attribute="Interfaces"
|
||||
value="IAnjutaTodo"/>
|
||||
</plugin>
|
||||
<plugin name="Debug Manager"
|
||||
url="http://anjuta.org/plugins/"
|
||||
mandatory="no">
|
||||
<require group="Anjuta Plugin"
|
||||
attribute="Interfaces"
|
||||
value="IAnjutaDebuggerManager"/>
|
||||
</plugin>
|
||||
</anjuta>
|
367
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/missing
Executable file
367
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/missing
Executable file
@ -0,0 +1,367 @@
|
||||
#! /bin/sh
|
||||
# Common stub for a few missing GNU programs while installing.
|
||||
|
||||
scriptversion=2006-05-10.23
|
||||
|
||||
# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006
|
||||
# Free Software Foundation, Inc.
|
||||
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
# 02110-1301, USA.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
if test $# -eq 0; then
|
||||
echo 1>&2 "Try \`$0 --help' for more information"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run=:
|
||||
sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
|
||||
sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
|
||||
|
||||
# In the cases where this matters, `missing' is being run in the
|
||||
# srcdir already.
|
||||
if test -f configure.ac; then
|
||||
configure_ac=configure.ac
|
||||
else
|
||||
configure_ac=configure.in
|
||||
fi
|
||||
|
||||
msg="missing on your system"
|
||||
|
||||
case $1 in
|
||||
--run)
|
||||
# Try to run requested program, and just exit if it succeeds.
|
||||
run=
|
||||
shift
|
||||
"$@" && exit 0
|
||||
# Exit code 63 means version mismatch. This often happens
|
||||
# when the user try to use an ancient version of a tool on
|
||||
# a file that requires a minimum version. In this case we
|
||||
# we should proceed has if the program had been absent, or
|
||||
# if --run hadn't been passed.
|
||||
if test $? = 63; then
|
||||
run=:
|
||||
msg="probably too old"
|
||||
fi
|
||||
;;
|
||||
|
||||
-h|--h|--he|--hel|--help)
|
||||
echo "\
|
||||
$0 [OPTION]... PROGRAM [ARGUMENT]...
|
||||
|
||||
Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
|
||||
error status if there is no known handling for PROGRAM.
|
||||
|
||||
Options:
|
||||
-h, --help display this help and exit
|
||||
-v, --version output version information and exit
|
||||
--run try to run the given command, and emulate it if it fails
|
||||
|
||||
Supported PROGRAM values:
|
||||
aclocal touch file \`aclocal.m4'
|
||||
autoconf touch file \`configure'
|
||||
autoheader touch file \`config.h.in'
|
||||
autom4te touch the output file, or create a stub one
|
||||
automake touch all \`Makefile.in' files
|
||||
bison create \`y.tab.[ch]', if possible, from existing .[ch]
|
||||
flex create \`lex.yy.c', if possible, from existing .c
|
||||
help2man touch the output file
|
||||
lex create \`lex.yy.c', if possible, from existing .c
|
||||
makeinfo touch the output file
|
||||
tar try tar, gnutar, gtar, then tar without non-portable flags
|
||||
yacc create \`y.tab.[ch]', if possible, from existing .[ch]
|
||||
|
||||
Send bug reports to <bug-automake@gnu.org>."
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
|
||||
echo "missing $scriptversion (GNU Automake)"
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-*)
|
||||
echo 1>&2 "$0: Unknown \`$1' option"
|
||||
echo 1>&2 "Try \`$0 --help' for more information"
|
||||
exit 1
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
# Now exit if we have it, but it failed. Also exit now if we
|
||||
# don't have it and --version was passed (most likely to detect
|
||||
# the program).
|
||||
case $1 in
|
||||
lex|yacc)
|
||||
# Not GNU programs, they don't have --version.
|
||||
;;
|
||||
|
||||
tar)
|
||||
if test -n "$run"; then
|
||||
echo 1>&2 "ERROR: \`tar' requires --run"
|
||||
exit 1
|
||||
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
|
||||
# We have it, but it failed.
|
||||
exit 1
|
||||
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
||||
# Could not run --version or --help. This is probably someone
|
||||
# running `$TOOL --version' or `$TOOL --help' to check whether
|
||||
# $TOOL exists and not knowing $TOOL uses missing.
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# If it does not exist, or fails to run (possibly an outdated version),
|
||||
# try to emulate it.
|
||||
case $1 in
|
||||
aclocal*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`acinclude.m4' or \`${configure_ac}'. You might want
|
||||
to install the \`Automake' and \`Perl' packages. Grab them from
|
||||
any GNU archive site."
|
||||
touch aclocal.m4
|
||||
;;
|
||||
|
||||
autoconf)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`${configure_ac}'. You might want to install the
|
||||
\`Autoconf' and \`GNU m4' packages. Grab them from any GNU
|
||||
archive site."
|
||||
touch configure
|
||||
;;
|
||||
|
||||
autoheader)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`acconfig.h' or \`${configure_ac}'. You might want
|
||||
to install the \`Autoconf' and \`GNU m4' packages. Grab them
|
||||
from any GNU archive site."
|
||||
files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
|
||||
test -z "$files" && files="config.h"
|
||||
touch_files=
|
||||
for f in $files; do
|
||||
case $f in
|
||||
*:*) touch_files="$touch_files "`echo "$f" |
|
||||
sed -e 's/^[^:]*://' -e 's/:.*//'`;;
|
||||
*) touch_files="$touch_files $f.in";;
|
||||
esac
|
||||
done
|
||||
touch $touch_files
|
||||
;;
|
||||
|
||||
automake*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
|
||||
You might want to install the \`Automake' and \`Perl' packages.
|
||||
Grab them from any GNU archive site."
|
||||
find . -type f -name Makefile.am -print |
|
||||
sed 's/\.am$/.in/' |
|
||||
while read f; do touch "$f"; done
|
||||
;;
|
||||
|
||||
autom4te)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is needed, but is $msg.
|
||||
You might have modified some files without having the
|
||||
proper tools for further handling them.
|
||||
You can get \`$1' as part of \`Autoconf' from any GNU
|
||||
archive site."
|
||||
|
||||
file=`echo "$*" | sed -n "$sed_output"`
|
||||
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||
if test -f "$file"; then
|
||||
touch $file
|
||||
else
|
||||
test -z "$file" || exec >$file
|
||||
echo "#! /bin/sh"
|
||||
echo "# Created by GNU Automake missing as a replacement of"
|
||||
echo "# $ $@"
|
||||
echo "exit 0"
|
||||
chmod +x $file
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
bison|yacc)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' $msg. You should only need it if
|
||||
you modified a \`.y' file. You may need the \`Bison' package
|
||||
in order for those modifications to take effect. You can get
|
||||
\`Bison' from any GNU archive site."
|
||||
rm -f y.tab.c y.tab.h
|
||||
if test $# -ne 1; then
|
||||
eval LASTARG="\${$#}"
|
||||
case $LASTARG in
|
||||
*.y)
|
||||
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
|
||||
if test -f "$SRCFILE"; then
|
||||
cp "$SRCFILE" y.tab.c
|
||||
fi
|
||||
SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
|
||||
if test -f "$SRCFILE"; then
|
||||
cp "$SRCFILE" y.tab.h
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if test ! -f y.tab.h; then
|
||||
echo >y.tab.h
|
||||
fi
|
||||
if test ! -f y.tab.c; then
|
||||
echo 'main() { return 0; }' >y.tab.c
|
||||
fi
|
||||
;;
|
||||
|
||||
lex|flex)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified a \`.l' file. You may need the \`Flex' package
|
||||
in order for those modifications to take effect. You can get
|
||||
\`Flex' from any GNU archive site."
|
||||
rm -f lex.yy.c
|
||||
if test $# -ne 1; then
|
||||
eval LASTARG="\${$#}"
|
||||
case $LASTARG in
|
||||
*.l)
|
||||
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
|
||||
if test -f "$SRCFILE"; then
|
||||
cp "$SRCFILE" lex.yy.c
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if test ! -f lex.yy.c; then
|
||||
echo 'main() { return 0; }' >lex.yy.c
|
||||
fi
|
||||
;;
|
||||
|
||||
help2man)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified a dependency of a manual page. You may need the
|
||||
\`Help2man' package in order for those modifications to take
|
||||
effect. You can get \`Help2man' from any GNU archive site."
|
||||
|
||||
file=`echo "$*" | sed -n "$sed_output"`
|
||||
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||
if test -f "$file"; then
|
||||
touch $file
|
||||
else
|
||||
test -z "$file" || exec >$file
|
||||
echo ".ab help2man is required to generate this page"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
makeinfo)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified a \`.texi' or \`.texinfo' file, or any other file
|
||||
indirectly affecting the aspect of the manual. The spurious
|
||||
call might also be the consequence of using a buggy \`make' (AIX,
|
||||
DU, IRIX). You might want to install the \`Texinfo' package or
|
||||
the \`GNU make' package. Grab either from any GNU archive site."
|
||||
# The file to touch is that specified with -o ...
|
||||
file=`echo "$*" | sed -n "$sed_output"`
|
||||
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||
if test -z "$file"; then
|
||||
# ... or it is the one specified with @setfilename ...
|
||||
infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
|
||||
file=`sed -n '
|
||||
/^@setfilename/{
|
||||
s/.* \([^ ]*\) *$/\1/
|
||||
p
|
||||
q
|
||||
}' $infile`
|
||||
# ... or it is derived from the source name (dir/f.texi becomes f.info)
|
||||
test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
|
||||
fi
|
||||
# If the file does not exist, the user really needs makeinfo;
|
||||
# let's fail without touching anything.
|
||||
test -f $file || exit 1
|
||||
touch $file
|
||||
;;
|
||||
|
||||
tar)
|
||||
shift
|
||||
|
||||
# We have already tried tar in the generic part.
|
||||
# Look for gnutar/gtar before invocation to avoid ugly error
|
||||
# messages.
|
||||
if (gnutar --version > /dev/null 2>&1); then
|
||||
gnutar "$@" && exit 0
|
||||
fi
|
||||
if (gtar --version > /dev/null 2>&1); then
|
||||
gtar "$@" && exit 0
|
||||
fi
|
||||
firstarg="$1"
|
||||
if shift; then
|
||||
case $firstarg in
|
||||
*o*)
|
||||
firstarg=`echo "$firstarg" | sed s/o//`
|
||||
tar "$firstarg" "$@" && exit 0
|
||||
;;
|
||||
esac
|
||||
case $firstarg in
|
||||
*h*)
|
||||
firstarg=`echo "$firstarg" | sed s/h//`
|
||||
tar "$firstarg" "$@" && exit 0
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
echo 1>&2 "\
|
||||
WARNING: I can't seem to be able to run \`tar' with the given arguments.
|
||||
You may want to install GNU tar or Free paxutils, or check the
|
||||
command line arguments."
|
||||
exit 1
|
||||
;;
|
||||
|
||||
*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is needed, and is $msg.
|
||||
You might have modified some files without having the
|
||||
proper tools for further handling them. Check the \`README' file,
|
||||
it often tells you about the needed prerequisites for installing
|
||||
this package. You may also peek at any GNU archive site, in case
|
||||
some other package would contain this missing \`$1' program."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-end: "$"
|
||||
# End:
|
@ -0,0 +1,2 @@
|
||||
sed -e 's/^[ \t]*//' -e 's/[ \t][ \t]*/ /g' -e 's/[ \t]*$//'
|
||||
|
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
MGIZA=${QMT_HOME}/bin/mgiza
|
||||
|
||||
if [ $# -lt 4 ]; then
|
||||
echo "OK, this is simple, put me into your Moses training directory, link your source/target corpus" 1>&2
|
||||
echo "and run " $0 " PREFIX src_tag tgt_tag root-dir." 1>&2
|
||||
echo "and get force-aligned data: root-dir/giza.[src-tgt|tgt-src]/*.A3.final.* " 1>&2
|
||||
echo "make sure I can find PREFIX.src_tag-tgt_tag and PREFIX.tgt_tag-src_tag, and \${QMT_HOME} is set" 1>&2
|
||||
exit
|
||||
fi
|
||||
|
||||
PRE=$1
|
||||
SRC=$2
|
||||
TGT=$3
|
||||
ROOT=$4
|
||||
|
||||
mkdir -p $ROOT/giza.${SRC}-${TGT}
|
||||
mkdir -p $ROOT/giza.${TGT}-${SRC}
|
||||
mkdir -p $ROOT/corpus
|
||||
|
||||
echo "Generating corpus file " 1>&2
|
||||
|
||||
${QMT_HOME}/scripts/plain2snt-hasvcb.py corpus/$SRC.vcb corpus/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/corpus/${TGT}-${SRC}.snt $ROOT/corpus/${SRC}-${TGT}.snt $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb
|
||||
|
||||
ln -sf $PWD/corpus/$SRC.vcb.classes $PWD/corpus/$TGT.vcb.classes $ROOT/corpus/
|
||||
|
||||
echo "Generating co-occurrence file " 1>&2
|
||||
|
||||
${QMT_HOME}/bin/snt2cooc $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb $ROOT/corpus/${TGT}-${SRC}.snt
|
||||
${QMT_HOME}/bin//snt2cooc $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc $ROOT/corpus/$TGT.vcb $ROOT/corpus/$SRC.vcb $ROOT/corpus/${SRC}-${TGT}.snt
|
||||
|
||||
echo "Running force alignment " 1>&2
|
||||
|
||||
$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/corpus/$TGT-$SRC.snt -o $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC} \
|
||||
-s $ROOT/corpus/$SRC.vcb -t $ROOT/corpus/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc \
|
||||
-restart 11 -previoust giza.$TGT-$SRC/$TGT-$SRC.t3.final \
|
||||
-previousa giza.$TGT-$SRC/$TGT-$SRC.a3.final -previousd giza.$TGT-$SRC/$TGT-$SRC.d3.final \
|
||||
-previousn giza.$TGT-$SRC/$TGT-$SRC.n3.final -previousd4 giza.$TGT-$SRC/$TGT-$SRC.d4.final \
|
||||
-previousd42 giza.$TGT-$SRC/$TGT-$SRC.D4.final -m3 0 -m4 1
|
||||
|
||||
$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/corpus/$SRC-$TGT.snt -o $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT} \
|
||||
-s $ROOT/corpus/$TGT.vcb -t $ROOT/corpus/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc \
|
||||
-restart 11 -previoust giza.$SRC-$TGT/$SRC-$TGT.t3.final \
|
||||
-previousa giza.$SRC-$TGT/$SRC-$TGT.a3.final -previousd giza.$SRC-$TGT/$SRC-$TGT.d3.final \
|
||||
-previousn giza.$SRC-$TGT/$SRC-$TGT.n3.final -previousd4 giza.$SRC-$TGT/$SRC-$TGT.d4.final \
|
||||
-previousd42 giza.$SRC-$TGT/$SRC-$TGT.D4.final -m3 0 -m4 1
|
||||
|
112
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/scripts/giza2bal.pl
Executable file
112
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/scripts/giza2bal.pl
Executable file
@ -0,0 +1,112 @@
|
||||
#! /usr/bin/perl
|
||||
|
||||
# $Id: giza2bal.pl 1562 2008-02-19 20:48:14Z redpony $
|
||||
#Converts direct and inverted alignments into a more compact
|
||||
#bi-alignment format. It optionally reads the counting file
|
||||
#produced by giza containing the frequency of each traning sentence.
|
||||
|
||||
#Copyright Marcello Federico, November 2004
|
||||
|
||||
($cnt,$dir,$inv)=();
|
||||
|
||||
while ($w=shift @ARGV){
|
||||
$dir=shift(@ARGV),next if $w eq "-d";
|
||||
$inv=shift(@ARGV),next if $w eq "-i";
|
||||
$cnt=shift(@ARGV),next if $w eq "-c";
|
||||
}
|
||||
|
||||
my $lc = 0;
|
||||
|
||||
if (!$dir || !inv){
|
||||
print "usage: giza2bal.pl [-c <count-file>] -d <dir-align-file> -i <inv-align-file>\n";
|
||||
print "input files can be also commands, e.g. -d \"gunzip -c file.gz\"\n";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
$|=1;
|
||||
|
||||
open(DIR,"<$dir") || open(DIR,"$dir|") || die "cannot open $dir\n";
|
||||
open(INV,"<$inv") || open(INV,"$inv|") || die "cannot open $dir\n";
|
||||
|
||||
if ($cnt){
|
||||
open(CNT,"<$cnt") || open(CNT,"$cnt|") || die "cannot open $dir\n";
|
||||
}
|
||||
|
||||
|
||||
sub ReadBiAlign{
|
||||
local($fd0,$fd1,$fd2,*s1,*s2,*a,*b,*c)=@_;
|
||||
local($dummy,$n);
|
||||
|
||||
chop($c=<$fd0>); ## count
|
||||
$dummy=<$fd0>; ## header
|
||||
$dummy=<$fd0>; ## header
|
||||
$c=1 if !$c;
|
||||
|
||||
$dummy=<$fd1>; ## header
|
||||
chop($s1=<$fd1>);
|
||||
chop($t1=<$fd1>);
|
||||
|
||||
$dummy=<$fd2>; ## header
|
||||
chop($s2=<$fd2>);
|
||||
chop($t2=<$fd2>);
|
||||
|
||||
@a=@b=();
|
||||
$lc++;
|
||||
|
||||
#get target statistics
|
||||
$n=1;
|
||||
$t1=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
|
||||
while ($t1=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
|
||||
grep($a[$_]=$n,split(/\s+/,$2));
|
||||
$n++;
|
||||
}
|
||||
|
||||
$m=1;
|
||||
$t2=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
|
||||
while ($t2=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
|
||||
grep($b[$_]=$m,split(/\s+/,$2));
|
||||
$m++;
|
||||
}
|
||||
|
||||
$M=split(/\s+/,$s1);
|
||||
$N=split(/\s+/,$s2);
|
||||
|
||||
if ($m != ($M+1) || $n != ($N+1)) {
|
||||
print STDERR "Sentence mismatch error! Line #$lc\n";
|
||||
$s1 = "ALIGN_ERR";
|
||||
$s2 = "ALIGN_ERR";
|
||||
@a=(); @b=();
|
||||
for ($j=1;$j<2;$j++){ $a[$j]=1; }
|
||||
for ($i=1;$i<2;$i++){ $b[$i]=1; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
for ($j=1;$j<$m;$j++){
|
||||
$a[$j]=0 if !$a[$j];
|
||||
}
|
||||
|
||||
for ($i=1;$i<$n;$i++){
|
||||
$b[$i]=0 if !$b[$i];
|
||||
}
|
||||
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
$skip=0;
|
||||
$ccc=0;
|
||||
while(!eof(DIR)){
|
||||
|
||||
if (ReadBiAlign(CNT,DIR,INV,*src,*tgt,*a,*b,*c))
|
||||
{
|
||||
$ccc++;
|
||||
print "$c\n";
|
||||
print $#a," $src \# @a[1..$#a]\n";
|
||||
print $#b," $tgt \# @b[1..$#b]\n";
|
||||
}
|
||||
else{
|
||||
print "\n";
|
||||
print STDERR "." if !(++$skip % 1000);
|
||||
}
|
||||
};
|
||||
print STDERR "skip=<$skip> counts=<$ccc>\n";
|
@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python
|
||||
# Author : Qin Gao
|
||||
# Date : Dec 31, 2007
|
||||
# Purpose: Combine multiple alignment files into a single one, the files are
|
||||
# prodcuced by MGIZA, which has sentence IDs, and every file is
|
||||
# ordered inside
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
if len(sys.argv)<2:
|
||||
sys.stderr.write("Provide me the file names (at least 2)\n");
|
||||
sys.exit();
|
||||
|
||||
sent_id = 0;
|
||||
|
||||
files = [];
|
||||
ids = [];
|
||||
|
||||
sents = [];
|
||||
done = [];
|
||||
|
||||
for i in range(1,len(sys.argv)):
|
||||
files.append(open(sys.argv[i],"r"));
|
||||
ids.append(0);
|
||||
sents.append("");
|
||||
done.append(False);
|
||||
|
||||
r = re.compile("\\((\\d+)\\)");
|
||||
i = 0;
|
||||
while i< len(files):
|
||||
st1 = files[i].readline();
|
||||
st2 = files[i].readline();
|
||||
st3 = files[i].readline();
|
||||
if len(st1)==0 or len(st2)==0 or len(st3)==0:
|
||||
done[i] = True;
|
||||
else:
|
||||
mt = r.search(st1);
|
||||
id = int(mt.group(1));
|
||||
ids[i] = id;
|
||||
sents[i] = (st1, st2, st3);
|
||||
i += 1
|
||||
|
||||
cont = True;
|
||||
while (cont):
|
||||
sent_id += 1;
|
||||
writeOne = False;
|
||||
# Now try to read more sentences
|
||||
i = 0;
|
||||
cont = False;
|
||||
while i < len(files):
|
||||
if done[i]:
|
||||
i+=1
|
||||
continue;
|
||||
cont = True;
|
||||
if ids[i] == sent_id:
|
||||
sys.stdout.write("%s%s%s"%(sents[i][0],sents[i][1],sents[i][2]));
|
||||
writeOne = True;
|
||||
st1 = files[i].readline();
|
||||
st2 = files[i].readline();
|
||||
st3 = files[i].readline();
|
||||
if len(st1)==0 or len(st2)==0 or len(st3)==0:
|
||||
done[i] = True;
|
||||
else:
|
||||
mt = r.search(st1);
|
||||
id = int(mt.group(1));
|
||||
ids[i] = id;
|
||||
sents[i] = (st1, st2, st3);
|
||||
cont = True;
|
||||
break;
|
||||
elif ids[i] < sent_id:
|
||||
sys.stderr.write("ERROR! DUPLICATED ENTRY %d\n" % ids[i]);
|
||||
sys.exit();
|
||||
else:
|
||||
cont = True;
|
||||
i+=1;
|
||||
if (not writeOne) and cont:
|
||||
sys.stderr.write("ERROR! MISSING ENTRy %d\n" % sent_id);
|
||||
#sys.exit();
|
||||
sys.stderr.write("Combined %d files, totally %d sents \n" %(len(files),sent_id-1));
|
@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from sys import *
|
||||
|
||||
def loadvcb(fname,out):
|
||||
dict={};
|
||||
df = open(fname,"r");
|
||||
for line in df:
|
||||
out.write(line);
|
||||
ws = line.strip().split();
|
||||
id = int(ws[0]);
|
||||
wd = ws[1];
|
||||
dict[wd]=id;
|
||||
return dict;
|
||||
|
||||
if len(argv)<9:
|
||||
stderr.write("Error, the input should be \n");
|
||||
stderr.write("%s evcb fvcb etxt ftxt esnt(out) fsnt(out) evcbx(out) fvcbx(out)\n" % argv[0]);
|
||||
stderr.write("You should concatenate the evcbx and fvcbx to existing vcb files\n");
|
||||
exit();
|
||||
|
||||
ein = open(argv[3],"r");
|
||||
fin = open(argv[4],"r");
|
||||
|
||||
eout = open(argv[5],"w");
|
||||
fout = open(argv[6],"w");
|
||||
|
||||
evcbx = open(argv[7],"w");
|
||||
fvcbx = open(argv[8],"w");
|
||||
evcb = loadvcb(argv[1],evcbx);
|
||||
fvcb = loadvcb(argv[2],fvcbx);
|
||||
|
||||
i=0
|
||||
while True:
|
||||
i+=1;
|
||||
eline=ein.readline();
|
||||
fline=fin.readline();
|
||||
if len(eline)==0 or len(fline)==0:
|
||||
break;
|
||||
ewords = eline.strip().split();
|
||||
fwords = fline.strip().split();
|
||||
el = [];
|
||||
fl = [];
|
||||
j=0;
|
||||
for w in ewords:
|
||||
j+=1
|
||||
if evcb.has_key(w):
|
||||
el.append(evcb[w]);
|
||||
else:
|
||||
if evcb.has_key(w.lower()):
|
||||
el.append(evcb[w.lower()]);
|
||||
else:
|
||||
##stdout.write("#E %d %d %s\n" % (i,j,w))
|
||||
#el.append(1);
|
||||
nid = len(evcb)+1;
|
||||
evcb[w.lower()] = nid;
|
||||
evcbx.write("%d %s 1\n" % (nid, w));
|
||||
el.append(nid);
|
||||
|
||||
j=0;
|
||||
for w in fwords:
|
||||
j+=1
|
||||
if fvcb.has_key(w):
|
||||
fl.append(fvcb[w]);
|
||||
else:
|
||||
if fvcb.has_key(w.lower()):
|
||||
fl.append(fvcb[w.lower()]);
|
||||
else:
|
||||
#stdout.write("#F %d %d %s\n" % (i,j,w))
|
||||
nid = len(fvcb)+1;
|
||||
fvcb[w.lower()] = nid;
|
||||
fvcbx.write("%d %s 1\n" % (nid, w));
|
||||
fl.append(nid);
|
||||
#fl.append(1);
|
||||
eout.write("1\n");
|
||||
fout.write("1\n");
|
||||
for I in el:
|
||||
eout.write("%d " % I);
|
||||
eout.write("\n");
|
||||
for I in fl:
|
||||
eout.write("%d " % I);
|
||||
fout.write("%d " % I);
|
||||
eout.write("\n");
|
||||
fout.write("\n");
|
||||
for I in el:
|
||||
fout.write("%d " % I);
|
||||
fout.write("\n");
|
||||
|
||||
fout.close();
|
||||
eout.close();
|
||||
fvcbx.close();
|
||||
evcbx.close();
|
||||
|
116
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/scripts/sntpostproc.py
Executable file
116
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/scripts/sntpostproc.py
Executable file
@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# This script post process the snt file -- either in single-line format or in multi-line format
|
||||
# The output, however, will always be in single-line format
|
||||
|
||||
from sys import *
|
||||
from optparse import OptionParser
|
||||
import re;
|
||||
usage = """
|
||||
The script post process the snt file, the input could be single-line snt
|
||||
file or multi-line, (triple line) and can insert sentence weight to the
|
||||
file (-w) or add partial alignment to the file (-a)
|
||||
Usage %prog -s sntfile -w weight-file -a alignfile -o outputfile
|
||||
"""
|
||||
parser = OptionParser(usage=usage)
|
||||
|
||||
|
||||
parser = OptionParser()
|
||||
|
||||
parser.add_option("-s", "--snt", dest="snt",default=None,
|
||||
help="The input snt file", metavar="FILE")
|
||||
|
||||
parser.add_option("-w", "--weight", dest="weight",default=None,
|
||||
help="The input weight file", metavar="FILE")
|
||||
|
||||
|
||||
parser.add_option("-o", "--output", dest="output",default="-",
|
||||
help="The input partial alignment file, one sentence per line", metavar="FILE")
|
||||
|
||||
parser.add_option("-a", "--align", dest="align",default=None,
|
||||
help="The input partial alignment file, one sentence per line", metavar="FILE")
|
||||
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if options.snt == None:
|
||||
parser.print_help();
|
||||
exit();
|
||||
else:
|
||||
sfile = open(options.snt,"r");
|
||||
|
||||
if options.output=="-":
|
||||
ofile = stdout;
|
||||
else:
|
||||
ofile = open(options.output,"w");
|
||||
|
||||
wfile = None;
|
||||
|
||||
if options.weight <> None:
|
||||
wfile = open(options.weight,"r");
|
||||
|
||||
afile = None;
|
||||
if options.align <> None:
|
||||
afile = open(options.align,"r");
|
||||
|
||||
rr = re.compile("[\\|\\#\\*]");
|
||||
wt = 0.0;
|
||||
al = {};
|
||||
e = "";
|
||||
f = "";
|
||||
|
||||
def parse_ax(line):
|
||||
alq = {};
|
||||
als = line.strip().split(" ");
|
||||
for e in als:
|
||||
if len(e.strip())>0:
|
||||
alo = e.split("-");
|
||||
if len(alo)==2:
|
||||
alq[tuple(alo)] = 1;
|
||||
return alq;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
while True:
|
||||
l = sfile.readline();
|
||||
if len(l) == 0:
|
||||
break;
|
||||
lp = rr.split(l.strip());
|
||||
if len(lp)>=3:
|
||||
wt = float(lp[0]);
|
||||
e = lp[1];
|
||||
f = lp[2];
|
||||
if len(lp) > 3:
|
||||
al = parse_ax(lp[3]);
|
||||
else:
|
||||
al = {};
|
||||
else:
|
||||
wt = float(l);
|
||||
e = sfile.readline().strip();
|
||||
f = sfile.readline().strip();
|
||||
al={}
|
||||
if wfile <> None:
|
||||
lw = wfile.readline().strip();
|
||||
if len(lw)>0:
|
||||
wt = float(lw);
|
||||
else:
|
||||
wt = 1;
|
||||
if afile <> None:
|
||||
la = afile.readline().strip();
|
||||
if len(la)>0:
|
||||
al1 = parse_ax(la);
|
||||
for entry in al1.keys():
|
||||
al[entry] = 1;
|
||||
|
||||
ofile.write("%g | %s | %s" % (wt, e, f));
|
||||
if len(al)>0:
|
||||
ofile.write(" |");
|
||||
|
||||
for entry in al.keys():
|
||||
ofile.write(" %s-%s" % entry);
|
||||
ofile.write("\n");
|
||||
|
||||
|
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
OUTPUT=$1
|
||||
shift
|
||||
GIZA2BAL=$1
|
||||
shift
|
||||
SYMAL=$1
|
||||
shift
|
||||
STOT=$1
|
||||
shift
|
||||
TTOS=$1
|
||||
shift
|
||||
|
||||
perl $GIZA2BAL -d ${STOT} -i ${TTOS} | $SYMAL $* > $OUTPUT
|
||||
|
@ -0,0 +1,17 @@
|
||||
.libs
|
||||
.deps
|
||||
.*swp
|
||||
.nautilus-metafile.xml
|
||||
*.autosave
|
||||
*.bak
|
||||
*~
|
||||
#*#
|
||||
*.gladep
|
||||
*.la
|
||||
*.lo
|
||||
*.o
|
||||
*.class
|
||||
*.pyc
|
||||
*.plugin
|
||||
Makefile
|
||||
Makefile.in
|
@ -0,0 +1,212 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#include "ATables.h"
|
||||
#include "Globals.h"
|
||||
#include "myassert.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
GLOBAL_PARAMETER(bool,CompactADTable,"compactadtable","1: only 3-dimensional alignment table for IBM-2 and IBM-3",PARLEV_MODELS,1);
|
||||
GLOBAL_PARAMETER(float,amodel_smooth_factor,"model23SmoothFactor","smoothing parameter for IBM-2/3 (interpolation with constant)",PARLEV_SMOOTH,0.0);
|
||||
|
||||
template <class VALTYPE>
|
||||
void amodel<VALTYPE>::printTable(const char *filename) const{
|
||||
// print amodel to file with the name filename (it'll be created or overwritten
|
||||
// format : for a table :
|
||||
// aj j l m val
|
||||
// where aj is source word pos, j target word pos, l source sentence length,
|
||||
// m is target sentence length.
|
||||
//
|
||||
//return;
|
||||
if (is_distortion)
|
||||
cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n';
|
||||
else
|
||||
cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n';
|
||||
|
||||
ofstream of(filename);
|
||||
double ssum=0.0;
|
||||
for(WordIndex l=0; l < MaxSentLength; l++){
|
||||
for(WordIndex m=0;m<MaxSentLength;m++){
|
||||
if( CompactADTable && l!=m )
|
||||
continue;
|
||||
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
||||
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
||||
if( is_distortion==0 ){
|
||||
for(WordIndex j=1;j<=M; j++){
|
||||
double sum=0.0;
|
||||
for(WordIndex i=0;i<=L; i++){
|
||||
VALTYPE x=getValue(i, j, L, M);
|
||||
if( x>PROB_SMOOTH ){
|
||||
of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
|
||||
sum+=x;
|
||||
}
|
||||
}
|
||||
ssum+=sum;
|
||||
}
|
||||
}else{
|
||||
for(WordIndex i=0;i<=L;i++){
|
||||
double sum=0.0;
|
||||
for(WordIndex j=1;j<=M;j++){
|
||||
VALTYPE x=getValue(j, i, L, M);
|
||||
if( x>PROB_SMOOTH ){
|
||||
of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
|
||||
sum+=x;
|
||||
}
|
||||
}
|
||||
ssum+=sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class VALTYPE>
|
||||
void amodel<VALTYPE>::printRealTable(const char *filename) const{
|
||||
// print amodel to file with the name filename (it'll be created or overwritten
|
||||
// format : for a table :
|
||||
// aj j l m val
|
||||
// where aj is source word pos, j target word pos, l source sentence length,
|
||||
// m is target sentence length.
|
||||
//
|
||||
//return;
|
||||
if (is_distortion)
|
||||
cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n';
|
||||
else
|
||||
cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n';
|
||||
|
||||
ofstream of(filename);
|
||||
for(WordIndex l=0; l < MaxSentLength; l++){
|
||||
for(WordIndex m=0;m<MaxSentLength;m++){
|
||||
if( CompactADTable && l!=m )
|
||||
continue;
|
||||
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
||||
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
||||
if( is_distortion==0 ){
|
||||
for(WordIndex j=1;j<=M; j++){
|
||||
for(WordIndex i=0;i<=L; i++){
|
||||
VALTYPE x=getValue(i, j, L, M);
|
||||
if( x>MINCOUNTINCREASE )
|
||||
of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
|
||||
}
|
||||
}
|
||||
}else{
|
||||
for(WordIndex i=0;i<=L;i++){
|
||||
for(WordIndex j=1;j<=M;j++){
|
||||
VALTYPE x=getValue(j, i, L, M);
|
||||
if( x>MINCOUNTINCREASE )
|
||||
of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern short NoEmptyWord;
|
||||
|
||||
template <class VALTYPE>
|
||||
bool amodel<VALTYPE>::readTable(const char *filename){
|
||||
/* This function reads the a table from a file.
|
||||
Each line is of the format: aj j l m val
|
||||
where aj is the source word position, j the target word position,
|
||||
l the source sentence length, and m the target sentence length
|
||||
|
||||
This function also works for a d table, where the positions
|
||||
of aj and i are swapped. Both the a and d tables are 4 dimensional
|
||||
hashes; this function will simply read in the four values and keep
|
||||
them in that order when hashing the fifth value.
|
||||
NAS, 7/11/99
|
||||
*/
|
||||
ifstream inf(filename);
|
||||
cout << "Reading a/d table from " << filename << "\n";
|
||||
if(!inf){
|
||||
cerr << "\nERROR: Cannot open " << filename<<"\n";
|
||||
return false;
|
||||
}
|
||||
WordIndex w, x, l, m;
|
||||
VALTYPE prob;
|
||||
while(inf >> w >> x >> l >> m >> prob )
|
||||
// the NULL word is added to the length
|
||||
// of the sentence in the tables, but discount it when you write the tables.
|
||||
setValue(w, x, l, m, prob);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class VALTYPE>
|
||||
bool amodel<VALTYPE>::readAugTable(const char *filename){
|
||||
/* This function reads the a table from a file.
|
||||
Each line is of the format: aj j l m val
|
||||
where aj is the source word position, j the target word position,
|
||||
l the source sentence length, and m the target sentence length
|
||||
|
||||
This function also works for a d table, where the positions
|
||||
of aj and i are swapped. Both the a and d tables are 4 dimensional
|
||||
hashes; this function will simply read in the four values and keep
|
||||
them in that order when hashing the fifth value.
|
||||
NAS, 7/11/99
|
||||
*/
|
||||
ifstream inf(filename);
|
||||
cout << "Reading a/d table from " << filename << "\n";
|
||||
if(!inf){
|
||||
cerr << "\nERROR: Cannot open " << filename<<"\n";
|
||||
return false;
|
||||
}
|
||||
WordIndex w, x, l, m;
|
||||
VALTYPE prob;
|
||||
while(inf >> w >> x >> l >> m >> prob )
|
||||
// the NULL word is added to the length
|
||||
// of the sentence in the tables, but discount it when you write the tables.
|
||||
addValue(w, x, l, m, prob);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class VALTYPE>
|
||||
bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am){
|
||||
cout << "start merging " <<"\n";
|
||||
for(WordIndex l=0; l < MaxSentLength; l++){
|
||||
for(WordIndex m=0;m<MaxSentLength;m++){
|
||||
if( CompactADTable && l!=m )
|
||||
continue;
|
||||
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
||||
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
||||
if( is_distortion==0 ){
|
||||
for(WordIndex j=1;j<=M; j++){
|
||||
for(WordIndex i=0;i<=L; i++){
|
||||
VALTYPE x=am.getValue(i, j, L, M);
|
||||
addValue(i,j,L,M,x);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
for(WordIndex i=0;i<=L;i++){
|
||||
for(WordIndex j=1;j<=M;j++){
|
||||
VALTYPE x=am.getValue(j, i, L, M);
|
||||
addValue(j,i,L,M,x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template class amodel<COUNT> ;
|
||||
//template class amodel<PROB> ;
|
@ -0,0 +1,191 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/* --------------------------------------------------------------------------*
|
||||
* *
|
||||
* Module :ATables *
|
||||
* *
|
||||
* Prototypes File: ATables.h *
|
||||
* *
|
||||
* Objective: Defines clases and methods for handling I/O for distortion & *
|
||||
* alignment tables. *
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _atables_h
|
||||
#define _atables_h 1
|
||||
|
||||
#include "defs.h"
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "Vector.h"
|
||||
#include <utility>
|
||||
#if __GNUC__>2
|
||||
#include <ext/hash_map>
|
||||
using __gnu_cxx::hash_map;
|
||||
#else
|
||||
#include <hash_map>
|
||||
#endif
|
||||
#include <fstream>
|
||||
#include "Array4.h"
|
||||
#include "myassert.h"
|
||||
#include "Globals.h"
|
||||
#include "syncObj.h"
|
||||
|
||||
extern bool CompactADTable;
|
||||
extern float amodel_smooth_factor;
|
||||
extern short NoEmptyWord;
|
||||
|
||||
/* ------------------- Class Defintions of amodel ---------------------------*/
|
||||
/* Class Name: amodel:
|
||||
Objective: This defines the underlying data structure for distortiont prob.
|
||||
and count tables. They are defined as a hash table. Each entry in the hash
|
||||
table is the probability (d(j/l,m,i), where j is word target position, i is
|
||||
source word position connected to it, m is target sentence length, and l is
|
||||
source sentence length) or count collected for it. The probability and the
|
||||
count are represented as log integer probability as
|
||||
defined by the class LogProb .
|
||||
|
||||
This class is used to represents a Tables (probabiliity) and d (distortion)
|
||||
tables and also their corresponding count tables .
|
||||
|
||||
*--------------------------------------------------------------------------*/
|
||||
|
||||
inline int Mabs(int a){
|
||||
if(a<0)
|
||||
return -a;
|
||||
else
|
||||
return a;
|
||||
}
|
||||
|
||||
template <class VALTYPE>
|
||||
class amodel{
|
||||
public:
|
||||
Array4<VALTYPE> a;
|
||||
bool is_distortion ;
|
||||
WordIndex MaxSentLength;
|
||||
bool ignoreL, ignoreM;
|
||||
VALTYPE get(WordIndex aj, WordIndex j, WordIndex l, WordIndex m)const{
|
||||
massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );massert( (!is_distortion) || aj!=0 );
|
||||
massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
|
||||
massert( l<MaxSentLength );massert( m<MaxSentLength );
|
||||
return a.get(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
|
||||
}
|
||||
|
||||
static float smooth_factor;
|
||||
amodel(bool flag = false)
|
||||
: a(MAX_SENTENCE_LENGTH+1,0.0), is_distortion(flag), MaxSentLength(MAX_SENTENCE_LENGTH)
|
||||
{};
|
||||
|
||||
protected:
|
||||
VALTYPE&getRef(WordIndex aj, WordIndex j, WordIndex l, WordIndex m){
|
||||
massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );
|
||||
massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
|
||||
massert( l<MaxSentLength );massert( m<MaxSentLength );
|
||||
return a(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
|
||||
}
|
||||
public:
|
||||
void setValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
|
||||
lock.lock();
|
||||
getRef(aj, j, l, m)=val;
|
||||
lock.unlock();
|
||||
}
|
||||
|
||||
Mutex lock;
|
||||
public:
|
||||
/**
|
||||
By Qin
|
||||
*/
|
||||
void addValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
|
||||
lock.lock();
|
||||
getRef(aj, j, l, m)+=val;
|
||||
lock.unlock();
|
||||
}
|
||||
bool merge(amodel<VALTYPE>& am);
|
||||
VALTYPE getValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m) const{
|
||||
if( is_distortion==0 )
|
||||
return max(double(PROB_SMOOTH),amodel_smooth_factor/(l+1)+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
|
||||
else
|
||||
return max(double(PROB_SMOOTH),amodel_smooth_factor/m+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
|
||||
}
|
||||
|
||||
void printTable(const char* filename)const ;
|
||||
void printRealTable(const char* filename)const ;
|
||||
template<class COUNT>
|
||||
void normalize(amodel<COUNT>& aTable)const
|
||||
{
|
||||
WordIndex i, j, l, m ;
|
||||
COUNT total;
|
||||
int nParam=0;
|
||||
for(l=0;l<MaxSentLength;l++){
|
||||
for(m=0;m<MaxSentLength;m++){
|
||||
if( CompactADTable && l!=m )
|
||||
continue;
|
||||
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
||||
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
||||
if( is_distortion==0 ){
|
||||
for(j=1;j<=M; j++){
|
||||
total=0.0;
|
||||
for(i=0;i<=L;i++){
|
||||
total+=get(i, j, L, M);
|
||||
}
|
||||
if( total ){
|
||||
for(i=0;i<=L;i++){
|
||||
nParam++;
|
||||
aTable.getRef(i, j, L, M)=get(i, j, L, M)/total;
|
||||
massert(aTable.getRef(i,j,L,M)<=1.0);
|
||||
if( NoEmptyWord&&i==0 )
|
||||
aTable.getRef(i,j,L,M)=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}else{
|
||||
for(i=0;i<=L;i++){
|
||||
total=0.0;
|
||||
for(j=1;j<=M;j++)
|
||||
total+=get(j, i, L, M);
|
||||
if( total )
|
||||
for(j=1;j<=M;j++){
|
||||
aTable.getRef(j, i, L, M)=amodel_smooth_factor/M+(1.0-amodel_smooth_factor)*get(j, i, L, M)/total;
|
||||
nParam++;
|
||||
massert(aTable.getRef(j,i,L,M)<=1.0);
|
||||
if( NoEmptyWord&&i==0 )
|
||||
aTable.getRef(j,i,L,M)=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
cout << "A/D table contains " << nParam << " parameters.\n";
|
||||
}
|
||||
|
||||
bool readTable(const char *filename);
|
||||
bool readAugTable(const char *filename);
|
||||
void clear()
|
||||
{a.clear();}
|
||||
};
|
||||
|
||||
/* ------------------- End of amodel Class Definitions ----------------------*/
|
||||
|
||||
#endif
|
@ -0,0 +1,44 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#include "AlignTables.h"
|
||||
|
||||
bool alignmodel::insert(Vector<WordIndex>& aj, LogProb val)
|
||||
{
|
||||
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator i;
|
||||
i = a.find(aj);
|
||||
if(i != a.end() || val <= 0)
|
||||
return false ;
|
||||
a.insert(pair<const Vector<WordIndex>, LogProb>(aj, val));
|
||||
return true ;
|
||||
}
|
||||
|
||||
|
||||
LogProb alignmodel::getValue(Vector<WordIndex>& align) const
|
||||
{
|
||||
const LogProb zero = 0.0 ;
|
||||
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::const_iterator i;
|
||||
i = a.find(align);
|
||||
if(i == a.end())
|
||||
return zero;
|
||||
else
|
||||
return (*i).second;
|
||||
}
|
@ -0,0 +1,124 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef _aligntables_h
|
||||
#define _aligntables_h 1
|
||||
|
||||
#include "defs.h"
|
||||
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
//#include <vector>
|
||||
#include "Vector.h"
|
||||
#include <utility>
|
||||
#if __GNUC__>2
|
||||
#include <ext/hash_map>
|
||||
using __gnu_cxx::hash_map;
|
||||
#else
|
||||
#include <hash_map>
|
||||
#endif
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include "transpair_model1.h"
|
||||
|
||||
|
||||
/* ----------------- Class Defintions for hashmyalignment --------------------
|
||||
Objective: This class is used to define a hash mapping function to map
|
||||
an alignment (defined as a vector of integers) into a hash key
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
class hashmyalignment : public unary_function< Vector<WordIndex>, size_t >
|
||||
{
|
||||
public:
|
||||
size_t operator() (const Vector<WordIndex>& key) const
|
||||
// to define the mapping function. it takes an alignment (a vector of
|
||||
// integers) and it returns an integer value (hash key).
|
||||
{
|
||||
WordIndex j ;
|
||||
size_t s ;
|
||||
size_t key_sum = 0 ;
|
||||
// logmsg << "For alignment:" ;
|
||||
for (j = 1 ; j < key.size() ; j++){
|
||||
// logmsg << " " << key[j] ;
|
||||
key_sum += (size_t) (int) pow(double(key[j]), double((j % 6)+1));
|
||||
}
|
||||
// logmsg << " , Key value was : " << key_sum;
|
||||
s = key_sum % 1000000 ;
|
||||
// logmsg << " h(k) = " << s << endl ;
|
||||
return(s);
|
||||
}
|
||||
};
|
||||
|
||||
class equal_to_myalignment{
|
||||
// returns true if two alignments are the same (two vectors have same enties)
|
||||
public:
|
||||
bool operator()(const Vector<WordIndex> t1,
|
||||
const Vector<WordIndex> t2) const
|
||||
{WordIndex j ;
|
||||
if (t1.size() != t2.size())
|
||||
return(false);
|
||||
for (j = 1 ; j < t1.size() ; j++)
|
||||
if (t1[j] != t2[j])
|
||||
return(false);
|
||||
return(true);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/* ---------------- End of Class Defnition for hashmyalignment --------------*/
|
||||
|
||||
|
||||
/* ------------------ Class Defintions for alignmodel -----------------------
|
||||
Class Name: alignmodel
|
||||
Objective: Alignments neighborhhoods (collection of alignments) are stored in
|
||||
a hash table (for easy lookup). Each alignment vector is mapped into a hash
|
||||
key using the operator defined above.
|
||||
*--------------------------------------------------------------------------*/
|
||||
|
||||
class alignmodel{
|
||||
private:
|
||||
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment > a;
|
||||
private:
|
||||
// void erase(Vector<WordIndex>&);
|
||||
public:
|
||||
|
||||
// methods;
|
||||
|
||||
inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator begin(void){return a.begin();} // begining of hash
|
||||
inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator end(void){return a.end();} // end of hash
|
||||
inline const hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >& getHash() const {return a;}; // reference to hash table
|
||||
bool insert(Vector<WordIndex>&, LogProb val=0.0); // add a alignmnet
|
||||
// void setValue(Vector<WordIndex>&, LogProb val); // not needed
|
||||
LogProb getValue(Vector<WordIndex>&)const; // retrieve prob. of alignment
|
||||
inline void clear(void){ a.clear();}; // clear hash table
|
||||
// void printTable(const char* filename);
|
||||
inline void resize(WordIndex n) {a.resize(n);}; // resize table
|
||||
|
||||
};
|
||||
|
||||
/* -------------- End of alignmode Class Definitions ------------------------*/
|
||||
#endif
|
@ -0,0 +1,5 @@
|
||||
#ifndef GIZA_ARRAY_H_DEFINED
|
||||
#define GIZA_ARRAY_H_DEFINED
|
||||
#include "Vector.h"
|
||||
#define Array Vector
|
||||
#endif
|
@ -0,0 +1,126 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/*--
|
||||
Array2: Implementation of a twodimensional checked array allowing for
|
||||
a specified underlieing one-dimensional data-structure.
|
||||
|
||||
Franz Josef Och (30/07/99)
|
||||
--*/
|
||||
#ifndef CLASS_Array2_DEFINED
|
||||
#define CLASS_Array2_DEFINED
|
||||
|
||||
#include "mystl.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
template<class T, class Y=vector<T> > class Array2 {
|
||||
public:
|
||||
Y p;
|
||||
// short h1, h2;
|
||||
unsigned int h1, h2;
|
||||
public:
|
||||
Array2(unsigned int _h1, unsigned int _h2) :
|
||||
p(_h1*_h2), h1(_h1), h2(_h2) {
|
||||
}
|
||||
Array2(unsigned int _h1, unsigned int _h2, const T&_init) :
|
||||
p(_h1*_h2, _init), h1(_h1), h2(_h2) {
|
||||
}
|
||||
Array2() :
|
||||
h1(0), h2(0) {
|
||||
}
|
||||
inline T &operator()(unsigned int i, unsigned int j) {
|
||||
assert(i<h1);
|
||||
assert(j<h2);
|
||||
return p[i*h2+j];
|
||||
}
|
||||
inline const T&operator()(unsigned int i, unsigned int j) const {
|
||||
assert(i<h1);
|
||||
assert(j<h2);
|
||||
return p[i*h2+j];
|
||||
}
|
||||
inline T get(unsigned int i, unsigned int j) {
|
||||
assert(i<h1);
|
||||
assert(j<h2);
|
||||
return p[i*h2+j];
|
||||
}
|
||||
inline void set(unsigned int i, unsigned int j, T x) {
|
||||
assert(i<h1);
|
||||
assert(j<h2);
|
||||
p[i*h2+j]=x;
|
||||
}
|
||||
inline const T get(unsigned int i, unsigned int j) const {
|
||||
assert(i<h1);
|
||||
assert(j<h2);
|
||||
return p[i*h2+j];
|
||||
}
|
||||
inline unsigned int getLen1() const {
|
||||
return h1;
|
||||
}
|
||||
inline unsigned int getLen2() const {
|
||||
return h2;
|
||||
}
|
||||
|
||||
inline T*begin() {
|
||||
if (h1==0||h2==0)
|
||||
return 0;
|
||||
return &(p[0]);
|
||||
}
|
||||
inline T*end() {
|
||||
if (h1==0||h2==0)
|
||||
return 0;
|
||||
return &(p[0])+p.size();
|
||||
}
|
||||
|
||||
inline const T*begin() const {
|
||||
return p.begin();
|
||||
}
|
||||
inline const T*end() const {
|
||||
return p.end();
|
||||
}
|
||||
|
||||
friend ostream&operator<<(ostream&out, const Array2<T, Y>&ar) {
|
||||
for (unsigned int i=0; i<ar.getLen1(); i++) {
|
||||
//out << i << ": ";
|
||||
for (unsigned int j=0; j<ar.getLen2(); j++)
|
||||
out << ar(i, j) << ' ';
|
||||
out << '\n';
|
||||
}
|
||||
return out << endl;
|
||||
}
|
||||
inline void resize(unsigned int a, unsigned int b) {
|
||||
if ( !(a==h1&&b==h2)) {
|
||||
h1=a;
|
||||
h2=b;
|
||||
p.resize(h1*h2);
|
||||
}
|
||||
}
|
||||
inline void resize(unsigned int a, unsigned int b, const T&t) {
|
||||
if ( !(a==h1&&b==h2)) {
|
||||
h1=a;
|
||||
h2=b;
|
||||
p.resize(h1*h2);
|
||||
fill(p.begin(), p.end(), t);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,78 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef AlignmentArray4_h_DEFINED
|
||||
#define AlignmentArray4_h_DEFINED
|
||||
|
||||
#include "Array2.h"
|
||||
template<class T> class Array4
|
||||
{
|
||||
private:
|
||||
Array2< Array2<T>* > A;
|
||||
int M;
|
||||
T init;
|
||||
public:
|
||||
Array4(int m,const T&_init)
|
||||
: A(m,m,0),M(m),init(_init) {}
|
||||
~Array4()
|
||||
{
|
||||
for(int l=0;l<M;++l)
|
||||
for(int m=0;m<M;++m)
|
||||
delete A(l,m);
|
||||
}
|
||||
const T&operator()(int i, int j, int l, int m)const
|
||||
{
|
||||
if( A(l,m)==0 )
|
||||
return init;
|
||||
else
|
||||
return (*A(l,m))(i,j);
|
||||
}
|
||||
const T&get(int i, int j, int l, int m)const
|
||||
{
|
||||
if( A(l,m)==0 )
|
||||
return init;
|
||||
else
|
||||
return (*A(l,m))(i,j);
|
||||
}
|
||||
T&operator()(int i, int j, int l, int m)
|
||||
{
|
||||
if( A(l,m)==0 )
|
||||
{
|
||||
A(l,m)=new Array2<T>(max(l+1,m+1),max(l+1,m+1),init);
|
||||
}
|
||||
return (*A(l,m))(i,j);
|
||||
}
|
||||
void clear()
|
||||
{
|
||||
for(int l=0;l<M;++l)
|
||||
for(int m=0;m<M;++m)
|
||||
if( A(l,m) )
|
||||
{
|
||||
Array2<T>&a=*A(l,m);
|
||||
for(int i=0;i<=l;++i)
|
||||
for(int j=0;j<=m;++j)
|
||||
a(i,j)=0.0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,772 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef _d4tables_h_define
|
||||
#define _d4tables_h_define
|
||||
#include <cmath>
|
||||
#include "WordClasses.h"
|
||||
#include "Globals.h"
|
||||
#include "myassert.h"
|
||||
#include "syncObj.h"
|
||||
extern float d4modelsmooth_factor;
|
||||
|
||||
class m4_key {
|
||||
public:
|
||||
int deps;
|
||||
int l;
|
||||
int m;
|
||||
int F;
|
||||
int E;
|
||||
int prevj;
|
||||
int vacancies1, vacancies2;
|
||||
m4_key(int _deps, int _l, int _m, int _F, int _E, int _prevj, int _v1,
|
||||
int _v2) :
|
||||
deps(_deps), l(_l), m(_m), F(_F), E(_E), prevj(_prevj),
|
||||
vacancies1(_v1), vacancies2(_v2) {
|
||||
}
|
||||
friend ostream&print1(ostream&out, const m4_key&x, const WordClasses&wce,
|
||||
const WordClasses&wcf) {
|
||||
if (x.deps&DEP_MODEL_l)
|
||||
out << "l: " << x.l<<' ';
|
||||
if (x.deps&DEP_MODEL_m)
|
||||
out << "m: " << x.m<<' ';
|
||||
if (x.deps&DEP_MODEL_F)
|
||||
out << "F: " << wcf.classString(x.F)<< ' ';
|
||||
if (x.deps&DEP_MODEL_E)
|
||||
out << "E: " << wce.classString(x.E)<< ' ';
|
||||
// if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
|
||||
if (x.vacancies1!=-1)
|
||||
out << "v1: " << x.vacancies1 << ' ';
|
||||
if (x.vacancies2!=-1)
|
||||
out << "v2: " << x.vacancies2 << ' ';
|
||||
return out << '\n';
|
||||
}
|
||||
|
||||
friend ostream&print1_m5(ostream&out, const m4_key&x,
|
||||
const WordClasses&wce, const WordClasses&wcf) {
|
||||
out << ((x.deps&DEP_MODEL_E) ? wce.classString(x.E) : string("0"))
|
||||
<< ' ';
|
||||
out << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F) : string("0"))
|
||||
<< ' ';
|
||||
out << x.vacancies1 << ' ';
|
||||
out << x.vacancies2 << ' ';
|
||||
return out;
|
||||
}
|
||||
|
||||
friend ostream&printb1(ostream&out, const m4_key&x, const WordClasses&wce,
|
||||
const WordClasses&wcf) {
|
||||
if (x.deps&DEP_MODELb_l)
|
||||
out << "l: " << x.l<<' ';
|
||||
if (x.deps&DEP_MODELb_m)
|
||||
out << "m: " << x.m<<' ';
|
||||
if (x.deps&DEP_MODELb_F)
|
||||
out << "F: " << wcf.classString(x.F) << ' ';
|
||||
if (x.deps&DEP_MODELb_E)
|
||||
out << "E: " << wce.classString(x.E) << ' ';
|
||||
if (x.vacancies1!=-1)
|
||||
out << "v1: " << x.vacancies1 << ' ';
|
||||
if (x.vacancies2!=-1)
|
||||
out << "v2: " << x.vacancies2 << ' ';
|
||||
return out << '\n';
|
||||
}
|
||||
friend ostream&printb1_m5(ostream&out, const m4_key&x,
|
||||
const WordClasses&wcf) {
|
||||
out << "-1 " << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F)
|
||||
: string("0"))<< ' ';
|
||||
out << x.vacancies1 << ' ';
|
||||
out << x.vacancies2 << ' ';
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
class compare1 {
|
||||
private:
|
||||
int deps;
|
||||
public:
|
||||
compare1(int _deps) :
|
||||
deps(_deps) {
|
||||
}
|
||||
bool operator()(const m4_key&a, const m4_key&b) const {
|
||||
if (deps&DEP_MODEL_l) {
|
||||
if (a.l<b.l)
|
||||
return 1;
|
||||
if (b.l<a.l)
|
||||
return 0;
|
||||
}
|
||||
if (deps&DEP_MODEL_m) {
|
||||
if (a.m<b.m)
|
||||
return 1;
|
||||
if (b.m<a.m)
|
||||
return 0;
|
||||
}
|
||||
if (deps&DEP_MODEL_F) {
|
||||
if (a.F<b.F)
|
||||
return 1;
|
||||
if (b.F<a.F)
|
||||
return 0;
|
||||
}
|
||||
if (deps&DEP_MODEL_E) {
|
||||
if (a.E<b.E)
|
||||
return 1;
|
||||
if (b.E<a.E)
|
||||
return 0;
|
||||
}
|
||||
//if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
|
||||
if (a.vacancies1<b.vacancies1)
|
||||
return 1;
|
||||
if (b.vacancies1<a.vacancies1)
|
||||
return 0;
|
||||
if (a.vacancies2<b.vacancies2)
|
||||
return 1;
|
||||
if (b.vacancies2<a.vacancies2)
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
class compareb1 {
|
||||
private:
|
||||
int deps;
|
||||
public:
|
||||
compareb1(int _deps) :
|
||||
deps(_deps) {
|
||||
}
|
||||
bool operator()(const m4_key&a, const m4_key&b) const {
|
||||
if (deps&DEP_MODELb_l) {
|
||||
if (a.l<b.l)
|
||||
return 1;
|
||||
if (b.l<a.l)
|
||||
return 0;
|
||||
}
|
||||
if (deps&DEP_MODELb_m) {
|
||||
if (a.m<b.m)
|
||||
return 1;
|
||||
if (b.m<a.m)
|
||||
return 0;
|
||||
}
|
||||
if (deps&DEP_MODELb_F) {
|
||||
if (a.F<b.F)
|
||||
return 1;
|
||||
if (b.F<a.F)
|
||||
return 0;
|
||||
}
|
||||
if (deps&DEP_MODELb_E) {
|
||||
if (a.E<b.E)
|
||||
return 1;
|
||||
if (b.E<a.E)
|
||||
return 0;
|
||||
}
|
||||
//if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
|
||||
if (a.vacancies1<b.vacancies1)
|
||||
return 1;
|
||||
if (b.vacancies1<a.vacancies1)
|
||||
return 0;
|
||||
if (a.vacancies2<b.vacancies2)
|
||||
return 1;
|
||||
if (b.vacancies2<a.vacancies2)
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
inline void tokenize(const string&in, Vector<string>&out) {
|
||||
string s;
|
||||
istrstream l(in.c_str());
|
||||
while (l>>s)
|
||||
out.push_back(s);
|
||||
}
|
||||
|
||||
class d4model {
|
||||
public:
|
||||
typedef Vector<pair<COUNT,PROB> > Vpff;
|
||||
map<m4_key,Vpff,compare1 > D1;
|
||||
map<m4_key,Vpff,compareb1> Db1;
|
||||
PositionIndex msl;
|
||||
WordClasses* ewordclasses;
|
||||
WordClasses* fwordclasses;
|
||||
template<class MAPPER> void makeWordClasses(const MAPPER&m1,
|
||||
const MAPPER&m2, string efile, string ffile, const vcbList& elist,
|
||||
const vcbList& flist) {
|
||||
ifstream estrm(efile.c_str()), fstrm(ffile.c_str());
|
||||
if ( !estrm) {
|
||||
cerr << "ERROR: can not read " << efile << endl;
|
||||
} else
|
||||
ewordclasses->read(estrm, m1,elist);
|
||||
if ( !fstrm)
|
||||
cerr << "ERROR: can not read " << ffile << endl;
|
||||
else
|
||||
fwordclasses->read(fstrm, m2,flist);
|
||||
}
|
||||
d4model(PositionIndex _msl, WordClasses& e, WordClasses& f) :
|
||||
D1(compare1(M4_Dependencies)), Db1(compareb1(M4_Dependencies)),
|
||||
msl(_msl),ewordclasses(&e),fwordclasses(&f) {
|
||||
}
|
||||
|
||||
protected:
|
||||
inline COUNT&getCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
|
||||
int m) {
|
||||
assert(j>=1);
|
||||
m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
|
||||
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
||||
if (p==D1.end())
|
||||
p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||
assert(p!=D1.end());
|
||||
return (p->second)[j-j_cp+msl].first;
|
||||
};
|
||||
|
||||
inline COUNT&getCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
|
||||
int l, int m) {
|
||||
assert(j>=1);
|
||||
assert(j_prev>=1);
|
||||
m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
|
||||
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
||||
if (p==Db1.end())
|
||||
p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||
assert(p!=Db1.end());
|
||||
return (p->second)[j-j_prev+msl].first;
|
||||
};
|
||||
Mutex lock_f,lock_b;
|
||||
public:
|
||||
inline void augCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
|
||||
int m, const COUNT& v){
|
||||
lock_f.lock();
|
||||
getCountRef_first(j,j_cp,E,F,l,m)+=v;
|
||||
lock_f.unlock();
|
||||
}
|
||||
|
||||
inline void augCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
|
||||
int l, int m, const COUNT& v){
|
||||
lock_b.lock();
|
||||
getCountRef_bigger(j,j_prev,E,F,l,m)+=v;
|
||||
lock_b.unlock();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void merge(d4model &d) {
|
||||
map<m4_key,Vpff,compare1 >::iterator it;
|
||||
for (it = d.D1.begin(); it!=d.D1.end(); it++) {
|
||||
map<m4_key,Vpff,compare1 >::iterator p=D1.find(it->first);
|
||||
if (p==D1.end())
|
||||
p=D1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||
int i;
|
||||
for (i=0; i<it->second.size(); i++) {
|
||||
p->second[i].second+=it->second[i].second;
|
||||
}
|
||||
}
|
||||
for (it = d.Db1.begin(); it!=d.Db1.end(); it++) {
|
||||
map<m4_key,Vpff,compare1 >::iterator p=Db1.find(it->first);
|
||||
if (p==Db1.end())
|
||||
p=Db1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||
int i;
|
||||
for (i=0; i<it->second.size(); i++) {
|
||||
p->second[i].second+=it->second[i].second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool augCount(const char* fD1, const char* fDb) {
|
||||
ifstream ifsd(fD1);
|
||||
int deps;
|
||||
int l;
|
||||
int m;
|
||||
int F;
|
||||
int E;
|
||||
int prevj;
|
||||
int vacancies1, vacancies2;
|
||||
int len;
|
||||
double count;
|
||||
if (!ifsd) {
|
||||
cerr << "Failed in " << fD1 << endl;
|
||||
return false;
|
||||
}
|
||||
{
|
||||
while (ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1
|
||||
>>vacancies2>>len) {
|
||||
m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
|
||||
vacancies2);
|
||||
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
||||
if (p==D1.end())
|
||||
p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||
assert(p!=D1.end());
|
||||
int i;
|
||||
for (i=0; i<len; i++) {
|
||||
ifsd >> count;
|
||||
p->second[i].first+=count;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
ifstream ifsd1(fDb);
|
||||
if (!ifsd1) {
|
||||
cerr << "Failed in " << fDb << endl;
|
||||
return false;
|
||||
}
|
||||
{
|
||||
while (ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1
|
||||
>>vacancies2>>len) {
|
||||
m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
|
||||
vacancies2);
|
||||
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
||||
if (p==Db1.end())
|
||||
p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||
assert(p!=D1.end());
|
||||
int i;
|
||||
for (i=0; i<len; i++) {
|
||||
ifsd1 >> count;
|
||||
p->second[i].first+=count;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool readProbTable(const char* fD1, const char* fDb){
|
||||
ifstream ifsd(fD1);
|
||||
int deps;
|
||||
int l;
|
||||
int m;
|
||||
int F;
|
||||
int E;
|
||||
int prevj;
|
||||
int vacancies1,vacancies2;
|
||||
int len;
|
||||
double count;
|
||||
if(!ifsd){
|
||||
cerr << "Failed in " << fD1 << endl;
|
||||
return false;
|
||||
}
|
||||
{
|
||||
while(ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
|
||||
m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
|
||||
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
||||
if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||
assert(p!=D1.end());
|
||||
int i;
|
||||
for(i=0;i<len;i++){
|
||||
ifsd >> count;
|
||||
p->second[i].second=count;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
ifstream ifsd1(fDb);
|
||||
if(!ifsd1){
|
||||
cerr << "Failed in " << fDb << endl;
|
||||
return false;
|
||||
}
|
||||
{
|
||||
while(ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
|
||||
m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
|
||||
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
||||
if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
||||
assert(p!=D1.end());
|
||||
int i;
|
||||
for(i=0;i<len;i++){
|
||||
ifsd1 >> count;
|
||||
p->second[i].second=count;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool printProbTable(const char* fD1, const char* fDb) {
|
||||
ofstream ofsd(fD1);
|
||||
if (!ofsd.is_open()) {
|
||||
return false;
|
||||
}
|
||||
{
|
||||
map<m4_key,Vpff,compare1 >::iterator it;
|
||||
for (it = D1.begin(); it!=D1.end(); it++) {
|
||||
ofsd << it->first.deps << " " << it->first.l << " "
|
||||
<< it->first.m << " " << it->first.F << " "
|
||||
<< it->first.E << " " << it->first.prevj << " "
|
||||
<< it->first.vacancies1 << " " << it->first.vacancies2
|
||||
<< " " << it->second.size() << " ";
|
||||
int i;
|
||||
for (i=0; i<it->second.size(); i++) {
|
||||
ofsd << it->second[i].second << " ";
|
||||
}
|
||||
ofsd << endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ofstream ofsdb(fDb);
|
||||
if (!ofsdb.is_open()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
map<m4_key,Vpff,compareb1 >::iterator it;
|
||||
for (it = Db1.begin(); it!=Db1.end(); it++) {
|
||||
ofsdb << it->first.deps << " " << it->first.l << " " << it->first.m
|
||||
<< " " << it->first.F << " " << it->first.E << " "
|
||||
<< it->first.prevj << " " << it->first.vacancies1 << " "
|
||||
<< it->first.vacancies2 << " " << it->second.size()<< endl;
|
||||
int i;
|
||||
for (i=0; i<it->second.size(); i++) {
|
||||
ofsdb << it->second[i].second << " ";
|
||||
}
|
||||
ofsdb << endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool dumpCount(const char* fD1, const char* fDb){
|
||||
ofstream ofsd(fD1);
|
||||
if(!ofsd.is_open()){
|
||||
return false;
|
||||
}
|
||||
{
|
||||
map<m4_key,Vpff,compare1 >::iterator it;
|
||||
for(it = D1.begin(); it!=D1.end();it++){
|
||||
ofsd << it->first.deps << " "
|
||||
<< it->first.l << " "
|
||||
<< it->first.m << " "
|
||||
<< it->first.F << " "
|
||||
<< it->first.E << " "
|
||||
<< it->first.prevj << " "
|
||||
<< it->first.vacancies1 << " "
|
||||
<< it->first.vacancies2 << " "
|
||||
<< it->second.size() << " ";
|
||||
int i;
|
||||
for(i=0;i<it->second.size();i++){
|
||||
ofsd << it->second[i].first << " ";
|
||||
}
|
||||
ofsd << endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ofstream ofsdb(fDb);
|
||||
if(!ofsdb.is_open()){
|
||||
return false;
|
||||
}
|
||||
|
||||
map<m4_key,Vpff,compareb1 >::iterator it;
|
||||
for(it = Db1.begin(); it!=Db1.end();it++){
|
||||
ofsdb << it->first.deps << " "
|
||||
<< it->first.l << " "
|
||||
<< it->first.m << " "
|
||||
<< it->first.F << " "
|
||||
<< it->first.E << " "
|
||||
<< it->first.prevj << " "
|
||||
<< it->first.vacancies1 << " "
|
||||
<< it->first.vacancies2 << " "
|
||||
<< it->second.size()<< endl;
|
||||
int i;
|
||||
for(i=0;i<it->second.size();i++){
|
||||
ofsdb << it->second[i].first << " ";
|
||||
}
|
||||
ofsdb << endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,
|
||||
int F, int l, int m) const {
|
||||
return D1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
|
||||
}
|
||||
PROB getProb_first_withiterator(WordIndex j, WordIndex j_cp, int m,
|
||||
const map<m4_key,Vpff,compare1 >::const_iterator& p) const {
|
||||
assert(j>=1);
|
||||
//assert(j_cp>=0);
|
||||
assert(j<=msl);
|
||||
assert(j_cp<=msl);
|
||||
if (p==D1.end()) {
|
||||
return PROB_SMOOTH;
|
||||
} else {
|
||||
massert((p->second)[j-j_cp+msl].second<=1.0);
|
||||
return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
|
||||
-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
|
||||
}
|
||||
}
|
||||
|
||||
PROB getProb_first(WordIndex j, WordIndex j_cp, int E, int F, int l, int m) const {
|
||||
assert(j>=1);
|
||||
//assert(j_cp>=0);
|
||||
assert(j<=msl);
|
||||
assert(j_cp<=msl);
|
||||
m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
|
||||
map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
|
||||
if (p==D1.end()) {
|
||||
return PROB_SMOOTH;
|
||||
} else {
|
||||
massert((p->second)[j-j_cp+msl].second<=1.0);
|
||||
return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
|
||||
-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
|
||||
}
|
||||
}
|
||||
map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,
|
||||
int F, int l, int m) const {
|
||||
return Db1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
|
||||
}
|
||||
PROB getProb_bigger_withiterator(WordIndex j, WordIndex j_prev, int m,
|
||||
const map<m4_key,Vpff,compareb1 >::const_iterator&p) const {
|
||||
massert(j>=1);
|
||||
massert(j_prev>=1);
|
||||
massert(j>j_prev);
|
||||
massert(j<=msl);
|
||||
massert(j_prev<=msl);
|
||||
if (p==Db1.end()) {
|
||||
return PROB_SMOOTH;
|
||||
} else {
|
||||
massert((p->second)[j-j_prev+msl].second<=1.0 );
|
||||
return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
|
||||
-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
|
||||
}
|
||||
}
|
||||
|
||||
PROB getProb_bigger(WordIndex j, WordIndex j_prev, int E, int F, int l,
|
||||
int m) const {
|
||||
massert(j>=1);
|
||||
massert(j_prev>=1);
|
||||
massert(j>j_prev);
|
||||
massert(j<=msl);
|
||||
massert(j_prev<=msl);
|
||||
m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
|
||||
map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
|
||||
if (p==Db1.end()) {
|
||||
return PROB_SMOOTH;
|
||||
} else {
|
||||
massert((p->second)[j-j_prev+msl].second<=1.0 );
|
||||
return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
|
||||
-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
|
||||
}
|
||||
}
|
||||
|
||||
void normalizeTable() {
|
||||
int nParams=0;
|
||||
for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
|
||||
Vpff&d1=i->second;
|
||||
double sum=0.0;
|
||||
for (PositionIndex i=0; i<d1.size(); i++)
|
||||
sum+=d1[i].first;
|
||||
for (PositionIndex i=0; i<d1.size(); i++) {
|
||||
d1[i].second=sum ? (d1[i].first/sum) : (1.0/d1.size());
|
||||
nParams++;
|
||||
}
|
||||
}
|
||||
for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
|
||||
Vpff&db1=i->second;
|
||||
double sum=0.0;
|
||||
for (PositionIndex i=0; i<db1.size(); i++)
|
||||
sum+=db1[i].first;
|
||||
for (PositionIndex i=0; i<db1.size(); i++) {
|
||||
db1[i].second=sum ? (db1[i].first/sum) : (1.0/db1.size());
|
||||
nParams++;
|
||||
}
|
||||
}
|
||||
cout << "D4 table contains " << nParams << " parameters.\n";
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
|
||||
Vpff&d1=i->second;
|
||||
for (PositionIndex i=0; i<d1.size(); i++)
|
||||
d1[i].first=0.0;
|
||||
}
|
||||
for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
|
||||
Vpff&db1=i->second;
|
||||
for (PositionIndex i=0; i<db1.size(); i++)
|
||||
db1[i].first=0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/*void printProbTable(const char*fname1,const char*fname2)
|
||||
{
|
||||
ofstream out(fname1);
|
||||
double ssum=0.0;
|
||||
out << "# Translation tables for Model 4 .\n";
|
||||
out << "# Table for head of cept.\n";
|
||||
for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i){
|
||||
const Vpff&d1=i->second;
|
||||
double sum=0.0;
|
||||
for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
|
||||
if ( sum ){
|
||||
print1(out,i->first,ewordclasses,fwordclasses);
|
||||
out << "SUM: " << sum << ' '<< '\n';
|
||||
for(unsigned ii=0;ii<d1.size();ii++)
|
||||
if( d1[ii].first )
|
||||
out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
|
||||
out << endl;
|
||||
}
|
||||
ssum+=sum;
|
||||
}
|
||||
out << "# Table for non-head of cept.\n";
|
||||
for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
|
||||
{
|
||||
const Vpff&db1=i->second;
|
||||
double sum=0.0;
|
||||
for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
|
||||
if( sum ){
|
||||
printb1(out,i->first,ewordclasses,fwordclasses);
|
||||
out << "SUM: " << sum << ' '<<'\n';
|
||||
for(unsigned ii=0;ii<db1.size();ii++)
|
||||
if( db1[ii].first )
|
||||
{
|
||||
out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
|
||||
}
|
||||
out << endl;
|
||||
}
|
||||
ssum+=sum;
|
||||
}
|
||||
out << endl << "FULL-SUM: " << ssum << endl;
|
||||
if( M4_Dependencies==76 ){
|
||||
ofstream out2(fname2);
|
||||
for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
|
||||
{
|
||||
const Vpff&d1=i->second;
|
||||
for(unsigned ii=0;ii<d1.size();ii++)
|
||||
if( d1[ii].first )
|
||||
out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
|
||||
}
|
||||
for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i) {
|
||||
const Vpff&db1=i->second;
|
||||
for(unsigned ii=0;ii<db1.size();ii++)
|
||||
if( db1[ii].first )
|
||||
out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
bool readProbTable(const char *fname) {
|
||||
cerr << "Reading D4Tables from " << fname << endl;
|
||||
ifstream file(fname);
|
||||
string line;
|
||||
do {
|
||||
getline(file, line);
|
||||
} while (line.length()&&line[0]=='#');
|
||||
|
||||
do {
|
||||
while (line.length()==0)
|
||||
getline(file, line);
|
||||
if (line[0]=='#')
|
||||
break;
|
||||
Vector<string> linestr;
|
||||
tokenize(line, linestr);
|
||||
m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
|
||||
for (unsigned int i=0; i<linestr.size(); i+=2) {
|
||||
if (linestr[i]=="l:") {
|
||||
k.l=atoi(linestr[i+1].c_str());
|
||||
iassert(M4_Dependencies&DEP_MODEL_l);
|
||||
}
|
||||
if (linestr[i]=="m:") {
|
||||
k.m=atoi(linestr[i+1].c_str());
|
||||
iassert(M4_Dependencies&DEP_MODEL_m);
|
||||
}
|
||||
if (linestr[i]=="F:") {
|
||||
k.F=(*fwordclasses)(linestr[i+1]);
|
||||
iassert(M4_Dependencies&DEP_MODEL_F);
|
||||
}
|
||||
if (linestr[i]=="E:") {
|
||||
k.E=(*ewordclasses)(linestr[i+1]);
|
||||
iassert(M4_Dependencies&DEP_MODEL_E);
|
||||
}
|
||||
//if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
|
||||
}
|
||||
string str;
|
||||
double sum;
|
||||
file >> str >> sum;
|
||||
iassert(str=="SUM:");
|
||||
if (str!="SUM:")
|
||||
cerr << "ERROR: string is " << str << " and not sum " << endl;
|
||||
|
||||
do {
|
||||
int value;
|
||||
double count;
|
||||
getline(file, line);
|
||||
istrstream twonumbers(line.c_str());
|
||||
if (twonumbers >> value >> count) {
|
||||
if (D1.count(k)==0)
|
||||
D1.insert(make_pair(k, Vpff(msl*2+1, pair<COUNT, PROB>(
|
||||
0.0, 0.0))));
|
||||
D1[k][value+msl]=make_pair(count, count/sum);
|
||||
}
|
||||
} while (line.length());
|
||||
} while (file);
|
||||
do {
|
||||
getline(file, line);
|
||||
} while (line.length()&&line[0]=='#');
|
||||
do {
|
||||
while (line.length()==0)
|
||||
getline(file, line);
|
||||
if (line[0]=='#')
|
||||
break;
|
||||
Vector<string> linestr;
|
||||
tokenize(line, linestr);
|
||||
m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
|
||||
bool sumRead=0;
|
||||
for (unsigned int i=0; i<linestr.size(); i+=2) {
|
||||
if (linestr[i]=="l:") {
|
||||
k.l=atoi(linestr[i+1].c_str());
|
||||
iassert(M4_Dependencies&DEP_MODELb_l);
|
||||
} else if (linestr[i]=="m:") {
|
||||
k.m=atoi(linestr[i+1].c_str());
|
||||
iassert(M4_Dependencies&DEP_MODELb_m);
|
||||
} else if (linestr[i]=="F:") {
|
||||
k.F=(*fwordclasses)(linestr[i+1]);
|
||||
iassert(M4_Dependencies&DEP_MODELb_F);
|
||||
} else if (linestr[i]=="E:") {
|
||||
k.E=(*ewordclasses)(linestr[i+1]);
|
||||
iassert(M4_Dependencies&DEP_MODELb_E);
|
||||
} else if (linestr[i]=="SUM:") {
|
||||
cerr << "Warning: obviously no dependency.\n";
|
||||
sumRead=1;
|
||||
} else if (linestr[i]=="FULL-SUM:") {
|
||||
break;
|
||||
} else {
|
||||
cerr << "ERROR: error in reading d4 tables: " << linestr[i]
|
||||
<< ' ' << linestr[i+1] << endl;
|
||||
}
|
||||
}
|
||||
string str;
|
||||
double sum;
|
||||
if (sumRead==0)
|
||||
file >> str >> sum;
|
||||
else {
|
||||
str=linestr[0];
|
||||
sum=atof(linestr[1].c_str());
|
||||
}
|
||||
if (str!="SUM:")
|
||||
cerr << "ERROR: should read SUM but read " << str << endl;
|
||||
do {
|
||||
int value;
|
||||
double count;
|
||||
getline(file, line);
|
||||
istrstream twonumbers(line.c_str());
|
||||
if (twonumbers >> value >> count) {
|
||||
if (Db1.count(k)==0)
|
||||
Db1.insert(make_pair(k, Vpff(msl*2+1,
|
||||
pair<COUNT, PROB>(0.0, 0.0))));
|
||||
Db1[k][value+msl]=make_pair(count, count/sum);
|
||||
}
|
||||
} while (file&&line.length());
|
||||
} while (file);
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,233 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef _d5tables_h_define
|
||||
#define _d5tables_h_define
|
||||
#include <cmath>
|
||||
#include "D4Tables.h"
|
||||
|
||||
extern float d5modelsmooth_countoffset;
|
||||
extern float d5modelsmooth_factor;
|
||||
|
||||
#define UNSEENPROB (1.0/vacancies_total)
|
||||
|
||||
class d5model
|
||||
{
|
||||
private:
|
||||
typedef Vector < pair < COUNT,PROB > >Vpff;
|
||||
map< m4_key,Vpff,compare1 > D1;
|
||||
map< m4_key,Vpff,compareb1 > Db1;
|
||||
public:
|
||||
d4model&d4m;
|
||||
WordClasses* ewordclasses;
|
||||
WordClasses* fwordclasses;
|
||||
template<class MAPPER>
|
||||
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile
|
||||
, const vcbList& elist,
|
||||
const vcbList& flist)
|
||||
{
|
||||
ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
|
||||
if( !estrm )
|
||||
cerr << "ERROR: can not read classes from " << efile << endl;
|
||||
else
|
||||
ewordclasses->read(estrm,m1,elist);
|
||||
if( !fstrm )
|
||||
cerr << "ERROR: can not read classes from " << ffile << endl;
|
||||
else
|
||||
fwordclasses->read(fstrm,m2,flist);
|
||||
}
|
||||
d5model (d4model&_d4m)
|
||||
:D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m),
|
||||
ewordclasses(_d4m.ewordclasses),fwordclasses(_d4m.fwordclasses)
|
||||
{}
|
||||
COUNT &getCountRef_first (PositionIndex vacancies_j,
|
||||
PositionIndex vacancies_jp, int F,
|
||||
PositionIndex l, PositionIndex m,
|
||||
PositionIndex vacancies_total)
|
||||
{
|
||||
massert(vacancies_j>0);
|
||||
massert(vacancies_total>0);
|
||||
//massert(vacancies_jp<=vacancies_total);
|
||||
massert(vacancies_j <=vacancies_total);
|
||||
massert(vacancies_total<=m);
|
||||
m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
|
||||
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
||||
if(p==D1.end())
|
||||
p=D1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
|
||||
massert(p!=D1.end());
|
||||
return (p->second)[vacancies_j].first;
|
||||
}
|
||||
COUNT &getCountRef_bigger (PositionIndex vacancies_j,
|
||||
PositionIndex vacancies_jp, int F,
|
||||
PositionIndex l, PositionIndex m,
|
||||
PositionIndex vacancies_total)
|
||||
{
|
||||
massert(vacancies_j>0);
|
||||
massert(vacancies_total>0);
|
||||
massert (vacancies_jp <= vacancies_j);
|
||||
massert (vacancies_j-vacancies_jp <= vacancies_total);
|
||||
m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
|
||||
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
||||
if(p==Db1.end())
|
||||
p=Db1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
|
||||
massert(p!=Db1.end());
|
||||
return (p->second)[vacancies_j - vacancies_jp].first;
|
||||
}
|
||||
PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp,
|
||||
int F, PositionIndex l, PositionIndex m,
|
||||
PositionIndex vacancies_total) const
|
||||
{
|
||||
massert(vacancies_j>0);
|
||||
massert(vacancies_total>0);
|
||||
//massert(vacancies_jp<=vacancies_total);
|
||||
massert(vacancies_j <=vacancies_total);
|
||||
massert(vacancies_total<=m);
|
||||
m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
|
||||
map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
|
||||
if( p==D1.end() )
|
||||
return UNSEENPROB;
|
||||
else
|
||||
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second);
|
||||
}
|
||||
PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp,
|
||||
int F, PositionIndex l, PositionIndex m,
|
||||
PositionIndex vacancies_total) const
|
||||
{
|
||||
massert(vacancies_j>0);
|
||||
massert(vacancies_total>0);
|
||||
massert (vacancies_jp <= vacancies_j);
|
||||
massert (vacancies_j-vacancies_jp <= vacancies_total);
|
||||
m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
|
||||
map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
|
||||
if(p==Db1.end())
|
||||
return UNSEENPROB;
|
||||
else
|
||||
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second);
|
||||
}
|
||||
void normalizeTable ()
|
||||
{
|
||||
int nParams=0;
|
||||
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
|
||||
{
|
||||
Vpff&d1=i->second;
|
||||
COUNT sum=0.0;
|
||||
for(PositionIndex i=0;i<d1.size();i++)
|
||||
sum+=d1[i].first+d5modelsmooth_countoffset;
|
||||
for(PositionIndex i=0;i<d1.size();i++)
|
||||
{
|
||||
d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size());
|
||||
nParams++;
|
||||
}
|
||||
}
|
||||
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
|
||||
{
|
||||
Vpff&db1=i->second;
|
||||
double sum=0.0;
|
||||
for(PositionIndex i=0;i<db1.size();i++)
|
||||
sum+=db1[i].first+d5modelsmooth_countoffset;
|
||||
for(PositionIndex i=0;i<db1.size();i++)
|
||||
{
|
||||
db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size());
|
||||
nParams++;
|
||||
}
|
||||
}
|
||||
cout << "D5 table contains " << nParams << " parameters.\n";
|
||||
}
|
||||
|
||||
friend ostream&operator<<(ostream&out,d5model&d5m) {
|
||||
out << "# Translation tables for Model 5 .\n";
|
||||
out << "# Table for head of cept.\n";
|
||||
for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin();i!=d5m.D1.end();++i){
|
||||
const Vpff&d1=i->second;
|
||||
COUNT sum=0.0;
|
||||
for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
|
||||
if ( sum ) {
|
||||
for(unsigned ii=0;ii<d1.size();ii++)
|
||||
{
|
||||
print1_m5(out,i->first,*d5m.ewordclasses,*d5m.fwordclasses);
|
||||
out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n';
|
||||
}
|
||||
out << endl;
|
||||
}
|
||||
}
|
||||
out << "# Table for non-head of cept.\n";
|
||||
for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin();i!=d5m.Db1.end();++i){
|
||||
const Vpff&db1=i->second;
|
||||
double sum=0.0;
|
||||
for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
|
||||
if( sum ){
|
||||
for(unsigned ii=0;ii<db1.size();ii++){
|
||||
printb1_m5(out,i->first,*d5m.fwordclasses);
|
||||
out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n';
|
||||
}
|
||||
out << endl;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
void readProbTable(const char*x)
|
||||
{
|
||||
ifstream f(x);
|
||||
string l;
|
||||
while(getline(f,l))
|
||||
{
|
||||
if(l.length()&&l[0]=='#')
|
||||
continue;
|
||||
istrstream is(l.c_str());
|
||||
string E,F;
|
||||
int v1,v2,ii;
|
||||
double prob,count;
|
||||
if(is>>E>>F>>v1>>v2>>ii>>prob>>count)
|
||||
{
|
||||
//cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl;
|
||||
if( count>0 )
|
||||
if( E=="-1")
|
||||
getCountRef_bigger(ii,0,(*fwordclasses)(F),1000,1000,v2)+=count;
|
||||
else
|
||||
getCountRef_first(ii,v1,(*fwordclasses)(F),1000,1000,v2)+=count;
|
||||
}
|
||||
}
|
||||
normalizeTable();
|
||||
//ofstream of("M5FILE");
|
||||
//of << (*this);
|
||||
}
|
||||
void clear()
|
||||
{
|
||||
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
|
||||
{
|
||||
Vpff&d1=i->second;
|
||||
for(PositionIndex i=0;i<d1.size();i++)
|
||||
d1[i].first=0.0;
|
||||
}
|
||||
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
|
||||
{
|
||||
Vpff&db1=i->second;
|
||||
for(PositionIndex i=0;i<db1.size();i++)
|
||||
db1[i].first=0.0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -0,0 +1,93 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/* Noah A. Smith
|
||||
Dictionary object for dictionary filter in Model 1 training
|
||||
|
||||
Dictionary file must be in order (sorted) by Foreign vocab id, but English
|
||||
vocab ids may be in any order.
|
||||
|
||||
9 August 1999
|
||||
*/
|
||||
|
||||
#include "Dictionary.h"
|
||||
#include <cstring>
|
||||
|
||||
Dictionary::Dictionary(const char *filename){
|
||||
if(!strcmp(filename, "")){
|
||||
dead = true;
|
||||
return;
|
||||
}
|
||||
dead = false;
|
||||
cout << "Reading dictionary from: " << filename << '\n';
|
||||
ifstream dFile(filename);
|
||||
if(!dFile){
|
||||
cerr << "ERROR: Can't open dictionary: " << filename << '\n';
|
||||
exit(1);
|
||||
}
|
||||
|
||||
currindexmin = 0;
|
||||
currindexmax = 0;
|
||||
currval = 0;
|
||||
int p, q;
|
||||
while((dFile >> p >> q)){
|
||||
pairs[0].push_back(p);
|
||||
pairs[1].push_back(q);
|
||||
}
|
||||
cout << "Dictionary read; " << pairs[0].size() << " pairs loaded." << '\n';
|
||||
dFile.close();
|
||||
}
|
||||
|
||||
|
||||
bool Dictionary::indict(int p, int q){
|
||||
if(dead) return false;
|
||||
if(p == 0 && q == 0) return false;
|
||||
if(currval == p){
|
||||
for(int i = currindexmin; i <= currindexmax; i++)
|
||||
if(pairs[1][i] == q) return true;
|
||||
return false;
|
||||
}
|
||||
else{
|
||||
int begin = 0, end = pairs[0].size() - 1, middle = 0;
|
||||
unsigned int t;
|
||||
bool ret = false;
|
||||
while(begin <= end){
|
||||
middle = begin + ((end - begin) >> 1);
|
||||
if(p < pairs[0][middle]) end = middle - 1;
|
||||
else if(p > pairs[0][middle]) begin = middle + 1;
|
||||
else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
t = middle;
|
||||
while(pairs[0][t] == p )
|
||||
if(pairs[1][t--] == q) ret = true;
|
||||
currindexmin = t + 1;
|
||||
t = middle + 1;
|
||||
while(pairs[0][t] == p && t < pairs[0].size())
|
||||
if(pairs[1][t++] == q) ret = true;
|
||||
currindexmax = t - 1;
|
||||
currval = p;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,48 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/* Noah A. Smith
|
||||
Dictionary object for dictionary filter in Model 1 training
|
||||
|
||||
9 August 1999
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include "Vector.h"
|
||||
|
||||
#ifndef DICTIONARY_H
|
||||
#define DICTIONARY_H
|
||||
|
||||
class Dictionary{
|
||||
private:
|
||||
Vector<int> pairs[2];
|
||||
int currval;
|
||||
int currindexmin;
|
||||
int currindexmax;
|
||||
bool dead;
|
||||
public:
|
||||
Dictionary(const char *);
|
||||
bool indict(int, int);
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,58 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1988,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef CLASS_FlexArray_defined
|
||||
#define CLASS_FlexArray_defined
|
||||
#include "Array.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
template<class T>
|
||||
class FlexArray
|
||||
{
|
||||
private:
|
||||
Array<T> p;
|
||||
int start,End;
|
||||
public:
|
||||
FlexArray(int _start=0,int _end=-1)
|
||||
: p(_end-_start+1),start(_start),End(_end) {}
|
||||
FlexArray(int _start,int _end,const T&init)
|
||||
: p(_end-_start+1,init),start(_start),End(_end) {}
|
||||
T&operator[](int i)
|
||||
{return p[i-start];}
|
||||
const T&operator[](int i)const
|
||||
{return p[i-start];}
|
||||
int low()const{return start;}
|
||||
int high()const{return End;}
|
||||
T*begin(){return conv<double>(p.begin());}
|
||||
T*end(){return conv<double>(p.end());}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
inline ostream&operator<<(ostream&out,const FlexArray<T>&x)
|
||||
{
|
||||
for(int i=x.low();i<=x.high();++i)
|
||||
out << i << ':' << x[i] << ';' << ' ';
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,240 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef NO_TRAINING
|
||||
#include "ForwardBackward.h"
|
||||
#include "Globals.h"
|
||||
#include "myassert.h"
|
||||
#include "HMMTables.h"
|
||||
#include "mymath.h"
|
||||
|
||||
double ForwardBackwardTraining(const HMMNetwork&net, Array<double>&g, Array<
|
||||
Array2<double> >&E) {
|
||||
const int I = net.size1(), J = net.size2(), N = I * J;
|
||||
Array<double> alpha(N, 0), beta(N, 0), sum(J);
|
||||
for (int i = 0; i < I; i++)
|
||||
beta[N - I + i] = net.getBetainit(i);
|
||||
double * cur_beta = conv<double> (beta.begin()) + N - I - 1;
|
||||
for (int j = J - 2; j >= 0; --j)
|
||||
for (int ti = I - 1; ti >= 0; --ti, --cur_beta) {
|
||||
const double *next_beta = conv<double> (beta.begin()) + (j + 1) * I;
|
||||
const double *alprob = &net.outProb(j, ti, 0), *next_node =
|
||||
&net.nodeProb(0, j + 1);
|
||||
for (int ni = 0; ni < I; ++ni, (next_node += J)) {
|
||||
massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
|
||||
massert(next_node == &net.nodeProb(ni,j+1));
|
||||
/* if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
|
||||
cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
|
||||
<< next_beta-beta.begin() << ") alprob:" << (*alprob) << " lexprob:" << (*next_node) << endl;*/
|
||||
(*cur_beta) += (*next_beta++) * (*alprob++) * (*next_node);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < I; i++)
|
||||
alpha[i] = net.getAlphainit(i) * net.nodeProb(i, 0);
|
||||
double* cur_alpha = conv<double> (alpha.begin()) + I;
|
||||
cur_beta = conv<double> (beta.begin()) + I;
|
||||
for (int j = 1; j < J; j++) {
|
||||
Array2<double>&e = E[(E.size() == 1) ? 0 : (j - 1)];
|
||||
if ((E.size() != 1) || j == 1) {
|
||||
e.resize(I, I);
|
||||
fill(e.begin(), e.end(), 0.0);
|
||||
}
|
||||
|
||||
for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_beta) {
|
||||
const double * prev_alpha = conv<double> (alpha.begin()) + I * (j
|
||||
- 1);
|
||||
double *cur_e = &e(ti, 0);
|
||||
double this_node = net.nodeProb(ti, j);
|
||||
const double* alprob = &net.outProb(j - 1, 0, ti);
|
||||
for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) {
|
||||
massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
|
||||
massert(&e(ti,pi)==cur_e);
|
||||
const double alpha_increment = *prev_alpha * (*alprob)
|
||||
* this_node;
|
||||
(*cur_alpha) += alpha_increment;
|
||||
(*cur_e++) += alpha_increment * (*cur_beta);
|
||||
}
|
||||
}
|
||||
}
|
||||
g.resize(N);
|
||||
transform(alpha.begin(), alpha.end(), beta.begin(), g.begin(), multiplies<
|
||||
double> ());
|
||||
double bsum = 0, esum = 0, esum2;
|
||||
for (int i = 0; i < I; i++)
|
||||
bsum += beta[i] * net.nodeProb(i, 0) * net.getAlphainit(i);
|
||||
for (unsigned int j = 0; j < (unsigned int) E.size(); j++) {
|
||||
Array2<double>&e = E[j];
|
||||
const double *epe = e.end();
|
||||
for (const double*ep = e.begin(); ep != epe; ++ep)
|
||||
esum += *ep;
|
||||
}
|
||||
if (J > 1)
|
||||
esum2 = esum / (J - 1);
|
||||
else
|
||||
esum2 = 0.0;
|
||||
if (!(esum2 == 0.0 || mfabs(esum2 - bsum) / bsum < 1e-3 * I))
|
||||
cout << "ERROR2: " << esum2 << " " << bsum << " " << esum << net
|
||||
<< endl;
|
||||
double * sumptr = conv<double> (sum.begin());
|
||||
double* ge = conv<double> (g.end());
|
||||
for (double* gp = conv<double> (g.begin()); gp != ge; gp += I) {
|
||||
*sumptr++ = normalize_if_possible(gp, gp + I);
|
||||
if (bsum && !(mfabs((*(sumptr - 1) - bsum) / bsum) < 1e-3 * I))
|
||||
cout << "ERROR: " << *(sumptr - 1) << " " << bsum << " " << mfabs(
|
||||
(*(sumptr - 1) - bsum) / bsum) << ' ' << I << ' ' << J
|
||||
<< endl;
|
||||
}
|
||||
for (unsigned int j = 0; j < (unsigned int) E.size(); j++) {
|
||||
Array2<double>&e = E[j];
|
||||
double* epe = e.end();
|
||||
if (esum)
|
||||
for (double*ep = e.begin(); ep != epe; ++ep)
|
||||
*ep /= esum;
|
||||
else
|
||||
for (double*ep = e.begin(); ep != epe; ++ep)
|
||||
*ep /= 1.0 / (max(I * I, I * I * (J - 1)));
|
||||
}
|
||||
if (sum.size())
|
||||
return sum[0];
|
||||
else
|
||||
return 1.0;
|
||||
}
|
||||
void HMMViterbi(const HMMNetwork&net, Array<int>&vit) {
|
||||
const int I = net.size1(), J = net.size2();
|
||||
vit.resize(J);
|
||||
Array<double> g;
|
||||
Array<Array2<double> > e(1);
|
||||
ForwardBackwardTraining(net, g, e);
|
||||
for (int j = 0; j < J; j++) {
|
||||
double * begin = conv<double> (g.begin()) + I * j;
|
||||
vit[j] = max_element(begin, begin + I) - begin;
|
||||
}
|
||||
}
|
||||
void HMMViterbi(const HMMNetwork&net, Array<double>&g, Array<int>&vit) {
|
||||
const int I = net.size1(), J = net.size2();
|
||||
vit.resize(J);
|
||||
for (int j = 0; j < J; j++) {
|
||||
double* begin = conv<double> (g.begin()) + I * j;
|
||||
vit[j] = max_element(begin, begin + I) - begin;
|
||||
}
|
||||
}
|
||||
|
||||
double HMMRealViterbi(const HMMNetwork&net, Array<int>&vitar, int pegi,
|
||||
int pegj, bool verbose) {
|
||||
const int I = net.size1(), J = net.size2(), N = I * J;
|
||||
Array<double> alpha(N, -1);
|
||||
Array<double*> bp(N, (double*) 0);
|
||||
vitar.resize(J);
|
||||
if (J == 0)
|
||||
return 1.0;
|
||||
for (int i = 0; i < I; i++) {
|
||||
alpha[i] = net.getAlphainit(i) * net.nodeProb(i, 0);
|
||||
if (i > I / 2)
|
||||
alpha[i] = 0; // only first empty word can be chosen
|
||||
bp[i] = 0;
|
||||
}
|
||||
double *cur_alpha = conv<double> (alpha.begin()) + I;
|
||||
double **cur_bp = conv<double*> (bp.begin()) + I;
|
||||
for (int j = 1; j < J; j++) {
|
||||
if (pegj + 1 == j)
|
||||
for (int ti = 0; ti < I; ti++)
|
||||
if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2))
|
||||
(cur_alpha - I)[ti] = 0.0;
|
||||
for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_bp) {
|
||||
double* prev_alpha = conv<double> (alpha.begin()) + I * (j - 1);
|
||||
double this_node = net.nodeProb(ti, j);
|
||||
const double *alprob = &net.outProb(j - 1, 0, ti);
|
||||
for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) {
|
||||
massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
|
||||
const double alpha_increment = *prev_alpha * (*alprob)
|
||||
* this_node;
|
||||
if (alpha_increment > *cur_alpha) {
|
||||
(*cur_alpha) = alpha_increment;
|
||||
(*cur_bp) = prev_alpha;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < I; i++)
|
||||
alpha[N - I + i] *= net.getBetainit(i);
|
||||
if (pegj == J - 1)
|
||||
for (int ti = 0; ti < I; ti++)
|
||||
if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2))
|
||||
(alpha)[N - I + ti] = 0.0;
|
||||
|
||||
int j = J - 1;
|
||||
cur_alpha = conv<double> (alpha.begin()) + j * I;
|
||||
vitar[J - 1] = max_element(cur_alpha, cur_alpha + I) - cur_alpha;
|
||||
double ret = *max_element(cur_alpha, cur_alpha + I);
|
||||
while (bp[vitar[j] + j * I]) {
|
||||
cur_alpha -= I;
|
||||
vitar[j - 1] = bp[vitar[j] + j * I] - cur_alpha;
|
||||
massert(vitar[j-1]<I&&vitar[j-1]>=0);
|
||||
j--;
|
||||
}
|
||||
massert(j==0);
|
||||
if (verbose) {
|
||||
cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
|
||||
for (int j = 0; j < J; j++)
|
||||
cout << "NP " << net.nodeProb(vitar[j], j) << ' ' << "AP " << ((j
|
||||
== 0) ? net.getAlphainit(vitar[j]) : net.outProb(j - 1,
|
||||
vitar[j - 1], vitar[j])) << " j:" << j << " i:" << vitar[j]
|
||||
<< "; ";
|
||||
cout << endl;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
double MaximumTraining(const HMMNetwork&net, Array<double>&g, Array<Array2<
|
||||
double> >&E) {
|
||||
Array<int> vitar;
|
||||
double ret = HMMRealViterbi(net, vitar);
|
||||
const int I = net.size1(), J = net.size2();
|
||||
if (E.size() == 1) {
|
||||
Array2<double>&e = E[0];
|
||||
e.resize(I, I);
|
||||
g.resize(I * J);
|
||||
fill(g.begin(), g.end(), 0.0);
|
||||
fill(e.begin(), e.end(), 0.0);
|
||||
for (int i = 0; i < J; ++i) {
|
||||
g[i * I + vitar[i]] = 1.0;
|
||||
if (i > 0)
|
||||
e(vitar[i], vitar[i - 1])++;
|
||||
}
|
||||
} else {
|
||||
g.resize(I * J);
|
||||
fill(g.begin(), g.end(), 0.0);
|
||||
for (int i = 0; i < J; ++i) {
|
||||
g[i * I + vitar[i]] = 1.0;
|
||||
if (i > 0) {
|
||||
Array2<double>&e = E[i - 1];
|
||||
e.resize(I, I);
|
||||
fill(e.begin(), e.end(), 0.0);
|
||||
e(vitar[i], vitar[i - 1])++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,62 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef NO_EM_MARKOF_ZEUGS_DEFINED
|
||||
#define NO_EM_MARKOF_ZEUGS_DEFINED
|
||||
#ifndef NO_TRAINING
|
||||
#include "myassert.h"
|
||||
#include "Array.h"
|
||||
#include "Array2.h"
|
||||
|
||||
class HMMNetwork
|
||||
{
|
||||
public:
|
||||
int as,bs;
|
||||
Array2<double> n;
|
||||
Array<Array2<double> > e;
|
||||
Array<double> alphainit;
|
||||
Array<double> betainit;
|
||||
int ab;
|
||||
double finalMultiply;
|
||||
HMMNetwork(int I,int J)
|
||||
: as(I),bs(J),n(as,bs),/*e(as,as,0.0),*/e(0),alphainit(as,1.0/as),betainit(as,1.0),ab(as*bs),finalMultiply(1.0)
|
||||
{}
|
||||
double getAlphainit(int i)const{return alphainit[i];}
|
||||
double getBetainit(int i)const{return betainit[i];}
|
||||
inline int size1()const{return as;}
|
||||
inline int size2()const{return bs;}
|
||||
inline const double&nodeProb(int i,int j)const
|
||||
{return n(i,j);}
|
||||
inline const double&outProb(int j,int i1,int i2)const
|
||||
{/*massert(e[min(int(e.size())-1,j)](i1,i2) );*/ return e[min(int(e.size())-1,j)](i1,i2);}
|
||||
friend ostream&operator<<(ostream&out,const HMMNetwork&x)
|
||||
{
|
||||
return out <<"N: \n"<< x.n << endl << "E: \n" << x.e << "A:\n" << x.alphainit << "B:\n" << x.betainit << endl;
|
||||
}
|
||||
};
|
||||
double ForwardBackwardTraining(const HMMNetwork&mc,Array<double>&gamma,Array<Array2<double> >&epsilon);
|
||||
void HMMViterbi(const HMMNetwork&mc,Array<int>&vit);
|
||||
double HMMRealViterbi(const HMMNetwork&net,Array<int>&vit,int pegi=-1,int pegj=-1,bool verbose=0);
|
||||
double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&e);
|
||||
void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit);
|
||||
#endif
|
||||
#endif
|
@ -0,0 +1,75 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef Globals_asdf_defined
|
||||
#define Globals_asdf_defined
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <syncObj.h>
|
||||
#include "defs.h"
|
||||
#include "Vector.h"
|
||||
|
||||
extern float PROB_SMOOTH,MINCOUNTINCREASE;
|
||||
extern bool Verbose, Log, Peg, Transfer, Transfer2to3, useDict ;
|
||||
extern string Prefix, LogFilename, OPath,
|
||||
SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename,
|
||||
t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
|
||||
extern ofstream logmsg ;
|
||||
extern Mutex logmsg_lock;
|
||||
extern double M5P0,P0 ;
|
||||
extern bool NODUMPS, FEWDUMPS ;
|
||||
extern string Usage ;
|
||||
extern unsigned int MAX_SENTENCE_LENGTH ;
|
||||
extern int PegUntil;
|
||||
|
||||
extern short DeficientDistortionForEmptyWord;
|
||||
|
||||
extern int M4_Dependencies;
|
||||
extern int M5_Dependencies;
|
||||
|
||||
extern short OutputInAachenFormat;
|
||||
|
||||
#define DEP_MODEL_l 1
|
||||
#define DEP_MODEL_m 2
|
||||
#define DEP_MODEL_F 4
|
||||
#define DEP_MODEL_E 8
|
||||
|
||||
#define DEP_MODELb_l 16
|
||||
#define DEP_MODELb_m 32
|
||||
#define DEP_MODELb_F 64
|
||||
#define DEP_MODELb_E 128
|
||||
|
||||
#define DEP_SUM 256
|
||||
|
||||
class vcbList;
|
||||
|
||||
extern vcbList *globeTrainVcbList, *globfTrainVcbList;
|
||||
|
||||
extern short PredictionInAlignments;
|
||||
extern short SmoothHMM;
|
||||
#define VERB Verbose
|
||||
|
||||
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,const Vector<WordIndex>&test,int l,int&missing,int&toomuch,int&eventsMissing,int&eventsToomuch,int);
|
||||
extern Vector<map< pair<int,int>,char > > ReferenceAlignment;
|
||||
void printGIZAPars(ostream&out);
|
||||
|
||||
#endif
|
@ -0,0 +1,512 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#include "HMMTables.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include "Globals.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::writeJumps(ostream&out) const {
|
||||
double ssum=0.0;
|
||||
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||
alProb.begin(); i!=alProb.end(); ++i) {
|
||||
double sum=0.0;
|
||||
out << "\n\nDistribution for: ";
|
||||
printAlDeps(out, i->first, *mapper1, *mapper2);
|
||||
out << ' ';
|
||||
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||
if (i->second[a]) {
|
||||
out << a << ':' << i->second[a] << ';' << ' ';
|
||||
sum+=i->second[a];
|
||||
}
|
||||
out << '\n' << '\n';
|
||||
out << "SUM: " << sum << '\n';
|
||||
ssum+=sum;
|
||||
}
|
||||
out << "FULL-SUM: " << ssum << '\n';
|
||||
}
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::readJumps(istream&) {
|
||||
}
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> double HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::getAlProb(int istrich, int k, int sentLength,
|
||||
int J, CLS w1, CLS w2, int j, int iter) const {
|
||||
massert(k<sentLength&&k>=0);
|
||||
massert(istrich<sentLength&&istrich>=-1);
|
||||
int pos=istrich-k;
|
||||
switch (PredictionInAlignments) {
|
||||
case 0:
|
||||
pos=istrich-k;
|
||||
break;
|
||||
case 1:
|
||||
pos=k;
|
||||
break;
|
||||
case 2:
|
||||
pos=(k*J-j*sentLength);
|
||||
if (pos>0)
|
||||
pos+=J/2;
|
||||
else
|
||||
pos-=J/2;
|
||||
pos/=J;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
lock.lock();
|
||||
typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator p=
|
||||
alProb.find(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
|
||||
if (p!=alProb.end() ) {
|
||||
lock.unlock();
|
||||
return (p->second)[pos];
|
||||
} else {
|
||||
if (iter>0&&iter<5000)
|
||||
cout << "WARNING: Not found: " << ' ' << J << ' ' << sentLength
|
||||
<< '\n';;
|
||||
lock.unlock();
|
||||
return 1.0/(2*sentLength-1);
|
||||
}
|
||||
lock.unlock();
|
||||
}
|
||||
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::addAlCount(int istrich, int k, int sentLength,
|
||||
int J, CLS w1, CLS w2, int j, double value, double valuePredicted) {
|
||||
int pos=istrich-k;
|
||||
switch (PredictionInAlignments) {
|
||||
case 0:
|
||||
pos=istrich-k;
|
||||
break;
|
||||
case 1:
|
||||
pos=k;
|
||||
break;
|
||||
case 2:
|
||||
pos=(k*J-j*sentLength);
|
||||
if (pos>0)
|
||||
pos+=J/2;
|
||||
else
|
||||
pos-=J/2;
|
||||
pos/=J;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
||||
AlDeps<CLS> deps(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
|
||||
|
||||
{
|
||||
lock.lock();
|
||||
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||
alProb.find(deps);
|
||||
if (p==alProb.end() ) {
|
||||
if ( (CompareAlDeps&1)==0)
|
||||
p
|
||||
=alProb.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
|
||||
else
|
||||
p=alProb.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
|
||||
}
|
||||
p->second[pos]+=value;
|
||||
lock.unlock();
|
||||
}
|
||||
|
||||
if (valuePredicted) {
|
||||
lock.lock();
|
||||
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||
alProbPredicted.find(deps);
|
||||
if (p==alProbPredicted.end() ) {
|
||||
if ( (CompareAlDeps&1)==0)
|
||||
p
|
||||
=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
|
||||
else
|
||||
p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
|
||||
}
|
||||
p->second[pos]+=valuePredicted;
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
template<class CLS, class MAPPERCLASSTOSTRING>
|
||||
pair<Array<double>,Mutex>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetAlphaInit(int I)
|
||||
{
|
||||
alphalock.lock();
|
||||
if( !init_alpha.count(I) ){
|
||||
init_alpha[I]=pair<Array<double>,Mutex>(Array<double>(I,0),Mutex());
|
||||
}
|
||||
pair<Array<double>,Mutex>& ret = init_alpha[I];
|
||||
alphalock.unlock();
|
||||
return ret;
|
||||
}
|
||||
template<class CLS, class MAPPERCLASSTOSTRING>
|
||||
pair<Array<double>,Mutex>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetBetaInit(int I)
|
||||
{
|
||||
betalock.lock();
|
||||
if( !init_beta.count(I) ){
|
||||
init_beta[I]=pair<Array<double>,Mutex>(Array<double>(I,0),Mutex());
|
||||
}
|
||||
pair<Array<double>,Mutex>& ret = init_beta[I];
|
||||
betalock.unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::getAlphaInit(int I, Array<double>&x) const {
|
||||
alphalock.lock();
|
||||
hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=init_alpha.find(I);
|
||||
if (i==init_alpha.end() ){
|
||||
alphalock.unlock();
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
x=i->second.first;
|
||||
alphalock.unlock();
|
||||
for (unsigned int j=x.size()/2+1; j<x.size(); ++j)
|
||||
// only first empty word can be chosen
|
||||
x[j]=0;
|
||||
return 1;
|
||||
}
|
||||
alphalock.unlock();
|
||||
}
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::getBetaInit(int I, Array<double>&x) const {
|
||||
betalock.lock();
|
||||
hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=init_beta.find(I);
|
||||
if (i==init_beta.end() ){
|
||||
betalock.unlock();
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
x=i->second.first;
|
||||
betalock.unlock();
|
||||
return 1;
|
||||
}
|
||||
betalock.unlock();
|
||||
}
|
||||
|
||||
/***********************************
|
||||
By Edward Gao
|
||||
************************************/
|
||||
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::writeJumps(const char* alprob,
|
||||
const char* alpredict, const char* alpha, const char* beta) const {
|
||||
if (alprob) {
|
||||
ofstream ofs(alprob);
|
||||
if (!ofs.is_open()) {
|
||||
cerr << "Cannot open file for HMM output " << alprob << endl;
|
||||
return false;
|
||||
}
|
||||
cerr << "Dumping HMM table to " << alprob << endl;
|
||||
|
||||
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||
alProb.begin(); i!=alProb.end(); ++i) {
|
||||
double sum=0.0;
|
||||
ofs <<i->first.englishSentenceLength << " "
|
||||
<< i->first.classPrevious << " " << i->first.previous
|
||||
<< " " << i->first.j << " " << i->first.Cj <<" "
|
||||
<< i->second.low() <<" " << i->second.high()<< " ";
|
||||
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||
if (i->second[a]) {
|
||||
ofs << a << ' ' << i->second[a] << ' ';
|
||||
sum+=i->second[a];
|
||||
}
|
||||
ofs << endl;
|
||||
}
|
||||
ofs.close();
|
||||
}
|
||||
if (alpredict) {
|
||||
ofstream ofs(alpredict);
|
||||
if (!ofs.is_open()) {
|
||||
cerr << "Cannot open file for HMM output " << alpredict << endl;
|
||||
return false;
|
||||
}
|
||||
cerr << "Dumping HMM table to " << alpredict << endl;
|
||||
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||
alProbPredicted.begin(); i!=alProbPredicted.end(); ++i) {
|
||||
double sum=0.0;
|
||||
ofs << i->first.englishSentenceLength << " "
|
||||
<< i->first.classPrevious << " " << i->first.previous
|
||||
<< " " << i->first.j << " " << i->first.Cj <<" "
|
||||
<< i->second.low() <<" " << i->second.high()<< " ";
|
||||
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||
if (i->second[a]) {
|
||||
ofs << a << ' ' << i->second[a] << ' ';
|
||||
sum+=i->second[a];
|
||||
}
|
||||
ofs << endl;
|
||||
}
|
||||
ofs.close();
|
||||
}
|
||||
if (alpha) {
|
||||
ofstream ofs(alpha);
|
||||
|
||||
if (!ofs.is_open()) {
|
||||
cerr << "Cannot open file for HMM output " << alpha << endl;
|
||||
return false;
|
||||
}
|
||||
cerr << "Dumping HMM table to " << alpha << endl;
|
||||
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
|
||||
init_alpha.begin(); i!=init_alpha.end(); i++) {
|
||||
ofs << i->first << " " << i->second.first.size() <<" ";
|
||||
int j;
|
||||
for (j=0; j<i->second.first.size(); j++) {
|
||||
ofs << i->second.first[j] << " ";
|
||||
}
|
||||
ofs<<endl;
|
||||
}
|
||||
ofs.close();
|
||||
}
|
||||
if (beta) {
|
||||
ofstream ofs(beta);
|
||||
if (!ofs.is_open()) {
|
||||
cerr << "Cannot open file for HMM output " << beta << endl;
|
||||
return false;
|
||||
}
|
||||
cerr << "Dumping HMM table to " << beta << endl;
|
||||
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
|
||||
init_beta.begin(); i!=init_beta.end(); i++) {
|
||||
ofs << i->first << " " << i->second.first.size() << " ";
|
||||
int j;
|
||||
for (j=0; j<i->second.first.size(); j++) {
|
||||
ofs << i->second.first[j] << " ";
|
||||
}
|
||||
ofs << endl;
|
||||
}
|
||||
ofs.close();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::readJumps(const char* alprob,
|
||||
const char* alpredict, const char* alpha, const char* beta) {
|
||||
if (alprob) {
|
||||
ifstream ifs(alprob);
|
||||
if (!ifs.is_open()) {
|
||||
cerr << "Cannot open file for HMM input " << alprob << endl;
|
||||
return false;
|
||||
}
|
||||
cerr << "Reading HMM table from " << alprob << endl;
|
||||
string strLine="";
|
||||
bool expect_data = false;
|
||||
while (!ifs.eof()) {
|
||||
strLine = "";
|
||||
getline(ifs, strLine);
|
||||
if (strLine.length()) {
|
||||
stringstream ss(strLine.c_str());
|
||||
AlDeps<CLS> dep;
|
||||
int low, high;
|
||||
ss >> dep.englishSentenceLength >> dep.classPrevious
|
||||
>> dep.previous >> dep.j >> dep.Cj >> low >> high;
|
||||
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||
alProb.find(dep);
|
||||
if (p==alProb.end() ) {
|
||||
p=alProb.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
|
||||
}
|
||||
int pos;
|
||||
double val;
|
||||
while (!ss.eof()) {
|
||||
pos = low-1;
|
||||
val = 0;
|
||||
ss >> pos >> val;
|
||||
if (pos>low-1) {
|
||||
p->second[pos]+=val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (alpredict) {
|
||||
ifstream ifs(alpredict);
|
||||
if (!ifs.is_open()) {
|
||||
cerr << "Cannot open file for HMM input " << alpredict << endl;
|
||||
return false;
|
||||
}
|
||||
cerr << "Reading HMM table from " << alpredict << endl;
|
||||
string strLine="";
|
||||
bool expect_data = false;
|
||||
while (!ifs.eof()) {
|
||||
strLine = "";
|
||||
getline(ifs, strLine);
|
||||
if (strLine.length()) {
|
||||
stringstream ss(strLine.c_str());
|
||||
AlDeps<CLS> dep;
|
||||
int low, high;
|
||||
ss >> dep.englishSentenceLength >> dep.classPrevious
|
||||
>> dep.previous >> dep.j >> dep.Cj >> low >> high;
|
||||
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||
alProbPredicted.find(dep);
|
||||
if (p==alProbPredicted.end() ) {
|
||||
p=alProbPredicted.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
|
||||
}
|
||||
int pos;
|
||||
double val;
|
||||
|
||||
while (!ss.eof()) {
|
||||
pos = low-1;
|
||||
val = 0;
|
||||
ss >> pos >> val;
|
||||
if (pos>low-1) {
|
||||
p->second[pos]+=val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (alpha) {
|
||||
ifstream ifs(alpha);
|
||||
|
||||
if (!ifs.is_open()) {
|
||||
cerr << "Cannot open file for HMM input " << alpha << endl;
|
||||
return false;
|
||||
}
|
||||
string strLine="";
|
||||
bool expect_data = false;
|
||||
while (!ifs.eof()) {
|
||||
strLine = "";
|
||||
getline(ifs, strLine);
|
||||
if (strLine.length()) {
|
||||
stringstream ss(strLine.c_str());
|
||||
int id = -1, size = -1;
|
||||
ss >> id >> size;
|
||||
if (id<0||size<0||id!=size) {
|
||||
cerr << "Mismatch in alpha init table!" << endl;
|
||||
return false;
|
||||
}
|
||||
pair<Array<double>, Mutex>&alp = doGetAlphaInit(id);
|
||||
Array<double>& gk = alp.first;
|
||||
int j;
|
||||
double v;
|
||||
alp.second.lock();
|
||||
for (j=0; j<gk.size(); j++) {
|
||||
ss >> v;
|
||||
gk[j]+=v;
|
||||
}
|
||||
alp.second.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (beta) {
|
||||
ifstream ifs(beta);
|
||||
|
||||
if (!ifs.is_open()) {
|
||||
cerr << "Cannot open file for HMM input " << beta << endl;
|
||||
return false;
|
||||
}
|
||||
string strLine="";
|
||||
bool expect_data = false;
|
||||
while (!ifs.eof()) {
|
||||
strLine = "";
|
||||
getline(ifs, strLine);
|
||||
if (strLine.length()) {
|
||||
stringstream ss(strLine.c_str());
|
||||
int id = -1, size = -1;
|
||||
ss >> id >> size;
|
||||
if (id<0||size<0||id!=size) {
|
||||
cerr << "Mismatch in alpha init table!" << endl;
|
||||
return false;
|
||||
}
|
||||
pair<Array<double>, Mutex>&bet1 = doGetBetaInit(id);
|
||||
Array<double>&bet = bet1.first;
|
||||
|
||||
int j;
|
||||
double v;
|
||||
bet1.second.lock();
|
||||
for (j=0; j<bet.size(); j++) {
|
||||
ss >> v;
|
||||
bet[j]+=v;
|
||||
}
|
||||
bet1.second.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht) {
|
||||
|
||||
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||
ht.alProb.begin(); i!=ht.alProb.end(); ++i) {
|
||||
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||
alProb.find(i->first);
|
||||
if (p==alProb.end() ) {
|
||||
p=alProb.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
|
||||
}
|
||||
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||
if (i->second[a]) {
|
||||
p->second[a] += i->second[a];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
|
||||
ht.alProbPredicted.begin(); i!=ht.alProbPredicted.end(); ++i) {
|
||||
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
|
||||
alProbPredicted.find(i->first);
|
||||
if (p==alProbPredicted.end() ) {
|
||||
p=alProbPredicted.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
|
||||
}
|
||||
for (int a=i->second.low(); a<=i->second.high(); ++a)
|
||||
if (i->second[a]) {
|
||||
p->second[a] += i->second[a];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
|
||||
ht.init_alpha.begin(); i!=ht.init_alpha.end(); i++) {
|
||||
pair<Array<double>,Mutex> alp = doGetAlphaInit(i->first);
|
||||
int j;
|
||||
double v;
|
||||
for (j=0; j<alp.first.size(); j++) {
|
||||
alp.first[j]+=i->second.first[j];
|
||||
}
|
||||
}
|
||||
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
|
||||
ht.init_beta.begin(); i!=ht.init_beta.end(); i++) {
|
||||
pair<Array<double>,Mutex>&alp = doGetBetaInit(i->first);
|
||||
int j;
|
||||
double v;
|
||||
for (j=0; j<alp.first.size(); j++) {
|
||||
alp.first[j]+=i->second.first[j];
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::HMMTables(double _probForEmpty,
|
||||
const MAPPERCLASSTOSTRING&m1, const MAPPERCLASSTOSTRING&m2) :
|
||||
probabilityForEmpty(mfabs(_probForEmpty)),
|
||||
updateProbabilityForEmpty(_probForEmpty<0.0), mapper1(&m1),
|
||||
mapper2(&m2) {
|
||||
}
|
||||
template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
|
||||
MAPPERCLASSTOSTRING>::~HMMTables() {
|
||||
}
|
@ -0,0 +1,179 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef HMM_TABLES_H_ASDF_DEFINED
|
||||
#define HMM_TABLES_H_ASDF_DEFINED
|
||||
#include "FlexArray.h"
|
||||
|
||||
#if __GNUC__>2
|
||||
#include <ext/hash_map>
|
||||
using __gnu_cxx::hash_map;
|
||||
#else
|
||||
#include <hash_map>
|
||||
#endif
|
||||
#include "Array.h"
|
||||
#include <map>
|
||||
#include "mymath.h"
|
||||
#include "syncObj.h"
|
||||
|
||||
template<class T>
|
||||
T normalize_if_possible(T*a,T*b){
|
||||
T sum=0;
|
||||
for(T*i=a;i!=b;++i)
|
||||
sum+=*i;
|
||||
if( sum )
|
||||
for(T*i=a;i!=b;++i)
|
||||
*i/=sum;
|
||||
else
|
||||
fill(a,b,1.0/(b-a));
|
||||
return sum;
|
||||
}
|
||||
|
||||
extern short CompareAlDeps;
|
||||
template<class CLS>
|
||||
class AlDeps{
|
||||
public:
|
||||
int englishSentenceLength;
|
||||
CLS classPrevious;
|
||||
int previous;
|
||||
int j;
|
||||
CLS Cj;
|
||||
AlDeps(){};
|
||||
AlDeps(int l,int p=0,int _j=0,CLS s1=0,CLS _Cj=0)
|
||||
: englishSentenceLength(l),classPrevious(s1),previous(p),j(_j),Cj(_Cj)
|
||||
{}
|
||||
friend bool operator<(const AlDeps&x,const AlDeps&y){
|
||||
if( (CompareAlDeps&1) && x.englishSentenceLength<y.englishSentenceLength ) return 1;
|
||||
if( (CompareAlDeps&1) && y.englishSentenceLength<x.englishSentenceLength ) return 0;
|
||||
if( (CompareAlDeps&2) && x.classPrevious<y.classPrevious ) return 1;
|
||||
if( (CompareAlDeps&2) && y.classPrevious<x.classPrevious ) return 0;
|
||||
if( (CompareAlDeps&4) && x.previous<y.previous ) return 1;
|
||||
if( (CompareAlDeps&4) && y.previous<x.previous ) return 0;
|
||||
if( (CompareAlDeps&8) && x.j<y.j ) return 1;
|
||||
if( (CompareAlDeps&8) && y.j<x.j ) return 0;
|
||||
if( (CompareAlDeps&16) && x.Cj<y.Cj ) return 1;
|
||||
if( (CompareAlDeps&16) && y.Cj<x.Cj ) return 0;
|
||||
return 0;
|
||||
}
|
||||
friend bool operator==(const AlDeps&x,const AlDeps&y)
|
||||
{ return !( x<y || y<x ); }
|
||||
};
|
||||
|
||||
template<class CLS>
|
||||
class Hash_AlDeps{
|
||||
public:
|
||||
unsigned
|
||||
int
|
||||
operator()
|
||||
(const AlDeps<CLS>&x)
|
||||
const
|
||||
{
|
||||
unsigned int hash=0;
|
||||
if( (CompareAlDeps&1) ) { hash=hash+x.englishSentenceLength;hash*=31;}
|
||||
if( (CompareAlDeps&2) ) { hash=hash+x.classPrevious;hash*=31;}
|
||||
if( (CompareAlDeps&4) ) { hash=hash+x.previous;hash*=31;}
|
||||
if( (CompareAlDeps&8) ) { hash=hash+x.j;hash*=31;}
|
||||
if( (CompareAlDeps&16) ) { hash=hash+x.Cj;hash*=31;}
|
||||
return hash;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template<class CLS,class MAPPERCLASSTOSTRING>
|
||||
class HMMTables
|
||||
{
|
||||
Mutex lock;
|
||||
Mutex alphalock,betalock;
|
||||
public:
|
||||
double probabilityForEmpty;
|
||||
bool updateProbabilityForEmpty;
|
||||
hash_map<int, pair<Array<double>,Mutex> > init_alpha;
|
||||
hash_map<int, pair<Array<double>,Mutex> > init_beta;
|
||||
map<AlDeps<CLS>,FlexArray<double> > alProb;
|
||||
map<AlDeps<CLS>,FlexArray<double> > alProbPredicted;
|
||||
int globalCounter;
|
||||
double divSum;
|
||||
double p0_count,np0_count;
|
||||
const MAPPERCLASSTOSTRING*mapper1;
|
||||
const MAPPERCLASSTOSTRING*mapper2;
|
||||
public:
|
||||
bool merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht);
|
||||
const HMMTables<CLS,MAPPERCLASSTOSTRING>*getThis()const {return this;}
|
||||
HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2);
|
||||
virtual ~HMMTables();
|
||||
virtual double getAlProb(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter=0) const;
|
||||
virtual void writeJumps(ostream&) const;
|
||||
/**By Edward Gao, write out all things needed to rebuild the count table*/
|
||||
virtual bool writeJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta )const;
|
||||
virtual bool readJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta );
|
||||
void addAlCount(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted);
|
||||
virtual void readJumps(istream&);
|
||||
virtual bool getAlphaInit(int I,Array<double>&x)const;
|
||||
virtual bool getBetaInit(int I,Array<double> &x)const;
|
||||
pair<Array<double>, Mutex> &doGetAlphaInit(int I);
|
||||
pair<Array<double>, Mutex> &doGetBetaInit(int I);
|
||||
virtual double getProbabilityForEmpty()const
|
||||
{return probabilityForEmpty;}
|
||||
void performGISIteration(const HMMTables<CLS,MAPPERCLASSTOSTRING>*old){
|
||||
cout << "OLDSIZE: " << (old?(old->alProb.size()):0) << " NEWSIZE:"<< alProb.size()<< endl;
|
||||
for(typename map<AlDeps<CLS>,FlexArray<double> >::iterator i=alProb.begin();i!=alProb.end();++i) {
|
||||
if( alProbPredicted.count(i->first)){
|
||||
normalize_if_possible(i->second.begin(),i->second.end());
|
||||
normalize_if_possible(alProbPredicted[i->first].begin(),alProbPredicted[i->first].end());
|
||||
for(int j=i->second.low();j<=i->second.high();++j){
|
||||
if( i->second[j] )
|
||||
if(alProbPredicted[i->first][j]>0.0 )
|
||||
{
|
||||
double op=1.0;
|
||||
if( old && old->alProb.count(i->first) )
|
||||
op=(old->alProb.find(i->first)->second)[j];
|
||||
//cerr << "GIS: " << j << ' ' << " OLD:"
|
||||
// << op << "*true:"
|
||||
// << i->second[j] << "/pred:" << alProbPredicted[i->first][j] << " -> ";
|
||||
|
||||
|
||||
i->second[j]= op*(i->second[j]/alProbPredicted[i->first][j]);
|
||||
//cerr << i->second[j] << endl;
|
||||
}
|
||||
else{
|
||||
cerr << "ERROR2 in performGISiteration: " << i->second[j] << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
cerr << "ERROR in performGISIteration: " << alProbPredicted.count(i->first) << endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class CLS,class MAPPERCLASSTOSTRING>
|
||||
inline void printAlDeps(ostream&out,const AlDeps<CLS>&x,const MAPPERCLASSTOSTRING&mapper1,const MAPPERCLASSTOSTRING&mapper2)
|
||||
{
|
||||
if( (CompareAlDeps&1) ) out << "sentenceLength: " << x.englishSentenceLength<< ' ';
|
||||
if( (CompareAlDeps&2) ) out << "previousClass: " << mapper1.classString(x.classPrevious) << ' ';
|
||||
if( (CompareAlDeps&4) ) out << "previousPosition: " << x.previous << ' ';
|
||||
if( (CompareAlDeps&8) ) out << "FrenchPosition: " << x.j << ' ';
|
||||
if( (CompareAlDeps&16) ) out << "FrenchClass: " << mapper2.classString(x.Cj) << ' ';
|
||||
//out << '\n';
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,217 @@
|
||||
## Process this file with automake to produce Makefile.in
|
||||
|
||||
## Created by Anjuta
|
||||
|
||||
INCLUDES = \
|
||||
-DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
|
||||
-DPACKAGE_SRC_DIR=\""$(srcdir)"\" \
|
||||
-DPACKAGE_DATA_DIR=\""$(datadir)"\"
|
||||
|
||||
AM_CFLAGS =\
|
||||
-Wall\
|
||||
-g
|
||||
|
||||
bin_PROGRAMS = mgiza \
|
||||
snt2cooc\
|
||||
snt2plain\
|
||||
plain2snt \
|
||||
symal \
|
||||
hmmnorm \
|
||||
d4norm
|
||||
|
||||
d4norm_SOURCES = \
|
||||
d4norm.cxx
|
||||
|
||||
d4norm_LDADD = \
|
||||
-lgiza \
|
||||
-lpthread
|
||||
|
||||
d4norm_LDFLAGS = \
|
||||
-L.
|
||||
|
||||
d4norm_DEPENDENCIES = \
|
||||
libgiza.a
|
||||
|
||||
d4norm_CXXFLAGS = \
|
||||
-MT \
|
||||
-MD \
|
||||
-MP \
|
||||
-MF \
|
||||
-O6
|
||||
|
||||
d4norm_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
||||
hmmnorm_SOURCES = \
|
||||
hmmnorm.cxx
|
||||
|
||||
hmmnorm_LDADD = \
|
||||
-lgiza \
|
||||
-lpthread
|
||||
|
||||
hmmnorm_LDFLAGS = \
|
||||
-L.
|
||||
|
||||
hmmnorm_DEPENDENCIES = \
|
||||
libgiza.a
|
||||
|
||||
hmmnorm_CXXFLAGS = \
|
||||
-MT \
|
||||
-MD \
|
||||
-MP \
|
||||
-MF \
|
||||
-O6
|
||||
|
||||
hmmnorm_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
||||
symal_SOURCES = \
|
||||
cmd.c \
|
||||
cmd.h \
|
||||
symal.cpp
|
||||
|
||||
plain2snt_SOURCES = \
|
||||
plain2snt.cpp
|
||||
|
||||
snt2plain_SOURCES = \
|
||||
snt2plain.cpp
|
||||
|
||||
snt2cooc_SOURCES = \
|
||||
snt2cooc.cpp
|
||||
|
||||
snt2cooc_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
||||
mgiza_SOURCES = \
|
||||
main.cpp
|
||||
|
||||
mgiza_DEPENDENCIES = \
|
||||
libgiza.a
|
||||
|
||||
mgiza_CXXFLAGS = \
|
||||
-MT \
|
||||
-MD \
|
||||
-MP \
|
||||
-MF \
|
||||
-O6
|
||||
|
||||
mgiza_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
||||
mgiza_LDFLAGS = \
|
||||
-L.
|
||||
|
||||
mgiza_LDADD = \
|
||||
-lgiza \
|
||||
-lpthread
|
||||
|
||||
lib_LIBRARIES = \
|
||||
libgiza.a
|
||||
|
||||
libgiza_a_SOURCES = \
|
||||
alignment.cpp\
|
||||
alignment.h \
|
||||
AlignTables.cpp \
|
||||
AlignTables.h \
|
||||
Array.h \
|
||||
Array2.h \
|
||||
Array4.h \
|
||||
ATables.cpp \
|
||||
ATables.h \
|
||||
collCounts.cpp \
|
||||
collCounts.h \
|
||||
common.h \
|
||||
D4Tables.h \
|
||||
D5Tables.h \
|
||||
defs.h \
|
||||
Dictionary.cpp \
|
||||
Dictionary.h \
|
||||
file_spec.h \
|
||||
FlexArray.h \
|
||||
ForwardBackward.cpp \
|
||||
ForwardBackward.h \
|
||||
getSentence.cpp \
|
||||
getSentence.h \
|
||||
Globals.h \
|
||||
hmm.cpp \
|
||||
hmm.h \
|
||||
HMMTables.cpp \
|
||||
HMMTables.h \
|
||||
logprob.cpp \
|
||||
logprob.h \
|
||||
model1.cpp \
|
||||
model1.h \
|
||||
model2.cpp \
|
||||
model2.h \
|
||||
model2to3.cpp \
|
||||
model3.cpp \
|
||||
model3.h \
|
||||
model3_viterbi.cpp \
|
||||
model3_viterbi_with_tricks.cpp \
|
||||
model345-peg.cpp \
|
||||
MoveSwapMatrix.cpp \
|
||||
MoveSwapMatrix.h \
|
||||
myassert.cpp \
|
||||
myassert.h \
|
||||
mymath.h \
|
||||
mystl.h \
|
||||
NTables.cpp \
|
||||
NTables.h \
|
||||
Parameter.cpp \
|
||||
Parameter.h \
|
||||
parse.cpp \
|
||||
Perplexity.cpp \
|
||||
Perplexity.h \
|
||||
Pointer.h \
|
||||
reports.cpp \
|
||||
SetArray.cpp \
|
||||
SetArray.h \
|
||||
syncObj.h \
|
||||
transpair_model1.h \
|
||||
transpair_model2.h \
|
||||
transpair_model3.cpp \
|
||||
transpair_model3.h \
|
||||
transpair_model4.cpp \
|
||||
transpair_model4.h \
|
||||
transpair_model5.cpp \
|
||||
transpair_model5.h \
|
||||
transpair_modelhmm.h \
|
||||
ttableDiff.hpp \
|
||||
TTables.cpp \
|
||||
TTables.h \
|
||||
types.h \
|
||||
utility.cpp \
|
||||
utility.h \
|
||||
Vector.h \
|
||||
vocab.cpp \
|
||||
vocab.h \
|
||||
WordClasses.h
|
||||
|
||||
libgiza_a_CXXFLAGS = \
|
||||
-MD \
|
||||
-MP \
|
||||
-MF \
|
||||
-MT \
|
||||
-O6
|
||||
|
||||
libgiza_a_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
||||
SUBDIRS = \
|
||||
mkcls
|
||||
|
@ -0,0 +1,214 @@
|
||||
## Process this file with automake to produce Makefile.in
|
||||
|
||||
## Created by Anjuta
|
||||
|
||||
INCLUDES = \
|
||||
-DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
|
||||
-DPACKAGE_SRC_DIR=\""$(srcdir)"\" \
|
||||
-DPACKAGE_DATA_DIR=\""$(datadir)"\"
|
||||
|
||||
AM_CFLAGS =\
|
||||
-Wall\
|
||||
-g
|
||||
|
||||
bin_PROGRAMS = mgiza \
|
||||
snt2cooc\
|
||||
snt2plain\
|
||||
plain2snt \
|
||||
symal \
|
||||
hmmnorm \
|
||||
d4norm
|
||||
|
||||
d4norm_SOURCES = \
|
||||
d4norm.cxx
|
||||
|
||||
d4norm_LDADD = \
|
||||
-lgiza \
|
||||
-lpthread
|
||||
|
||||
d4norm_LDFLAGS = \
|
||||
-L.
|
||||
|
||||
d4norm_DEPENDENCIES = \
|
||||
libgiza.a
|
||||
|
||||
d4norm_CXXFLAGS = \
|
||||
-MT \
|
||||
-MD \
|
||||
-MP \
|
||||
-MF \
|
||||
-O6
|
||||
|
||||
d4norm_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
||||
hmmnorm_SOURCES = \
|
||||
hmmnorm.cxx
|
||||
|
||||
hmmnorm_LDADD = \
|
||||
-lgiza \
|
||||
-lpthread
|
||||
|
||||
hmmnorm_LDFLAGS = \
|
||||
-L.
|
||||
|
||||
hmmnorm_DEPENDENCIES = \
|
||||
libgiza.a
|
||||
|
||||
hmmnorm_CXXFLAGS = \
|
||||
-MT \
|
||||
-MD \
|
||||
-MP \
|
||||
-MF \
|
||||
-O6
|
||||
|
||||
hmmnorm_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
||||
symal_SOURCES = \
|
||||
cmd.c \
|
||||
cmd.h \
|
||||
symal.cpp
|
||||
|
||||
plain2snt_SOURCES = \
|
||||
plain2snt.cpp
|
||||
|
||||
snt2plain_SOURCES = \
|
||||
snt2plain.cpp
|
||||
|
||||
snt2cooc_SOURCES = \
|
||||
snt2cooc.cpp
|
||||
|
||||
snt2cooc_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
||||
mgiza_SOURCES = \
|
||||
main.cpp
|
||||
|
||||
mgiza_DEPENDENCIES = \
|
||||
libgiza.a
|
||||
|
||||
mgiza_CXXFLAGS = \
|
||||
-MT \
|
||||
-MD \
|
||||
-MP \
|
||||
-MF \
|
||||
-O6
|
||||
|
||||
mgiza_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
||||
mgiza_LDFLAGS = \
|
||||
-L.
|
||||
|
||||
mgiza_LDADD = \
|
||||
-lgiza \
|
||||
-lpthread
|
||||
|
||||
lib_LIBRARIES = \
|
||||
libgiza.a
|
||||
|
||||
libgiza_a_SOURCES = \
|
||||
alignment.cpp\
|
||||
alignment.h \
|
||||
AlignTables.cpp \
|
||||
AlignTables.h \
|
||||
Array.h \
|
||||
Array2.h \
|
||||
Array4.h \
|
||||
ATables.cpp \
|
||||
ATables.h \
|
||||
collCounts.cpp \
|
||||
collCounts.h \
|
||||
common.h \
|
||||
D4Tables.h \
|
||||
D5Tables.h \
|
||||
defs.h \
|
||||
Dictionary.cpp \
|
||||
Dictionary.h \
|
||||
file_spec.h \
|
||||
FlexArray.h \
|
||||
ForwardBackward.cpp \
|
||||
ForwardBackward.h \
|
||||
getSentence.cpp \
|
||||
getSentence.h \
|
||||
Globals.h \
|
||||
hmm.cpp \
|
||||
hmm.h \
|
||||
HMMTables.cpp \
|
||||
HMMTables.h \
|
||||
logprob.cpp \
|
||||
logprob.h \
|
||||
model1.cpp \
|
||||
model1.h \
|
||||
model2.cpp \
|
||||
model2.h \
|
||||
model2to3.cpp \
|
||||
model3.cpp \
|
||||
model3.h \
|
||||
model3_viterbi.cpp \
|
||||
model3_viterbi_with_tricks.cpp \
|
||||
model345-peg.cpp \
|
||||
MoveSwapMatrix.cpp \
|
||||
MoveSwapMatrix.h \
|
||||
myassert.cpp \
|
||||
myassert.h \
|
||||
mymath.h \
|
||||
mystl.h \
|
||||
NTables.cpp \
|
||||
NTables.h \
|
||||
Parameter.cpp \
|
||||
Parameter.h \
|
||||
parse.cpp \
|
||||
Perplexity.cpp \
|
||||
Perplexity.h \
|
||||
Pointer.h \
|
||||
reports.cpp \
|
||||
SetArray.cpp \
|
||||
SetArray.h \
|
||||
syncObj.h \
|
||||
transpair_model1.h \
|
||||
transpair_model2.h \
|
||||
transpair_model3.cpp \
|
||||
transpair_model3.h \
|
||||
transpair_model4.cpp \
|
||||
transpair_model4.h \
|
||||
transpair_model5.cpp \
|
||||
transpair_model5.h \
|
||||
transpair_modelhmm.h \
|
||||
ttableDiff.hpp \
|
||||
TTables.cpp \
|
||||
TTables.h \
|
||||
types.h \
|
||||
utility.cpp \
|
||||
utility.h \
|
||||
Vector.h \
|
||||
vocab.cpp \
|
||||
vocab.h \
|
||||
WordClasses.h
|
||||
|
||||
libgiza_a_CXXFLAGS = \
|
||||
-MD \
|
||||
-MP \
|
||||
-MF \
|
||||
-MT \
|
||||
-O6
|
||||
|
||||
libgiza_a_CPPFLAGS = \
|
||||
-DNDEBUG \
|
||||
-DWORDINDEX_WITH_4_BYTE \
|
||||
-DBINARY_SEARCH_FOR_TTABLE \
|
||||
-DDEBUG
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,235 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#include "MoveSwapMatrix.h"
|
||||
|
||||
template<class TRANSPAIR>
|
||||
MoveSwapMatrix<TRANSPAIR>::MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a)
|
||||
: alignment(_a), ef(_ef), l(ef.get_l()), m(ef.get_m()), _cmove(l+1, m+1), _cswap(m+1, m+1),
|
||||
delmove(l+1, m+1,0),delswap(m+1, m+1,0),changed(l+2, 0), changedCounter(1),
|
||||
modelnr(_ef.modelnr()),lazyEvaluation(0),centerDeleted(0)
|
||||
{
|
||||
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||
if( lazyEvaluation==0)
|
||||
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
|
||||
}
|
||||
|
||||
template<class TRANSPAIR>
|
||||
void MoveSwapMatrix<TRANSPAIR>::updateJ(WordIndex j, bool useChanged,double thisValue)
|
||||
{
|
||||
massert( lazyEvaluation==0 );
|
||||
for(WordIndex i=0;i<=l;i++)
|
||||
if( (useChanged==0||changed[i]!=changedCounter) )
|
||||
if( get_al(j)!=i )
|
||||
_cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
|
||||
else
|
||||
_cmove(i, j)=1.0;
|
||||
for(WordIndex j2=j+1;j2<=m;j2++)
|
||||
if( get_al(j)!=get_al(j2) )
|
||||
_cswap(j, j2)=ef.scoreOfSwap((*this), j, j2,thisValue);
|
||||
else
|
||||
_cswap(j, j2)=1.0;
|
||||
for(WordIndex j2=1;j2<j;j2++)
|
||||
if( get_al(j)!=get_al(j2) )
|
||||
_cswap(j2, j)=ef.scoreOfSwap((*this), j2, j,thisValue);
|
||||
else
|
||||
_cswap(j2, j)=1.0;
|
||||
}
|
||||
template<class TRANSPAIR>
|
||||
void MoveSwapMatrix<TRANSPAIR>::updateI(WordIndex i,double thisValue)
|
||||
{
|
||||
massert( lazyEvaluation==0);
|
||||
for(WordIndex j=1;j<=m;j++)
|
||||
if( get_al(j)!=i )
|
||||
_cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
|
||||
else
|
||||
_cmove(i, j)=1.0;
|
||||
}
|
||||
|
||||
template<class TRANSPAIR>
|
||||
void MoveSwapMatrix<TRANSPAIR>::printWrongs()const{
|
||||
for(WordIndex i=0;i<=l;i++)
|
||||
{
|
||||
for(WordIndex j=1;j<=m;j++)
|
||||
if( get_al(j)==i)
|
||||
cout << "A";
|
||||
else
|
||||
{
|
||||
LogProb real=_cmove(i, j), wanted=ef.scoreOfMove((*this), i, j);
|
||||
if( fabs(1.0-real/wanted)>1e-3 )
|
||||
cout << 'b';
|
||||
else if(fabs(1.0-real/wanted)>1e-10 )
|
||||
cout << 'e';
|
||||
else if(real!=wanted)
|
||||
cout << 'E';
|
||||
else
|
||||
cout << ' ';
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
cout << endl;
|
||||
for(WordIndex j=1;j<=m;j++)
|
||||
{
|
||||
for(WordIndex j1=1;j1<=m;j1++)
|
||||
if( j1>j )
|
||||
{
|
||||
if( get_al(j)==get_al(j1) )
|
||||
cout << 'A';
|
||||
else
|
||||
cout << (_cswap(j, j1)==ef.scoreOfSwap((*this), j, j1));
|
||||
}
|
||||
else
|
||||
cout << ' ';
|
||||
cout << endl;
|
||||
}
|
||||
massert(0);
|
||||
}
|
||||
template<class TRANSPAIR>
|
||||
bool MoveSwapMatrix<TRANSPAIR>::isRight()const{
|
||||
if( lazyEvaluation )
|
||||
return 1;
|
||||
for(WordIndex i=0;i<=l;i++)
|
||||
for(WordIndex j=1;j<=m;j++)
|
||||
if( get_al(j)!=i && (!(doubleEqual(_cmove(i, j), ef.scoreOfMove((*this), i, j)))) )
|
||||
{
|
||||
cerr << "DIFF: " << i << " " << j << " " << _cmove(i, j) << " " << ef.scoreOfMove((*this), i, j) << endl;
|
||||
return 0;
|
||||
}
|
||||
for(WordIndex j=1;j<=m;j++)
|
||||
for(WordIndex j1=1;j1<=m;j1++)
|
||||
if( j1>j&&get_al(j)!=get_al(j1)&&(!doubleEqual(_cswap(j, j1), ef.scoreOfSwap((*this), j, j1))) )
|
||||
{
|
||||
cerr << "DIFFERENT: " << j << " " << j1 << " " << _cswap(j, j1) << " " << ef.scoreOfSwap((*this), j, j1) << endl;
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
template<class TRANSPAIR>
|
||||
void MoveSwapMatrix<TRANSPAIR>::doMove(WordIndex _i, WordIndex _j)
|
||||
{
|
||||
WordIndex old_i=get_al(_j);
|
||||
if(old_i>100){
|
||||
cerr << "Error, invalid index set";
|
||||
return;
|
||||
}
|
||||
if( lazyEvaluation )
|
||||
set(_j,_i);
|
||||
else
|
||||
{
|
||||
if ( modelnr==5||modelnr==6 )
|
||||
{
|
||||
set(_j, _i);
|
||||
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
|
||||
}
|
||||
else if ( modelnr==4 )
|
||||
{
|
||||
changedCounter++;
|
||||
for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
|
||||
for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
|
||||
set(_j, _i);
|
||||
for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
|
||||
for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
|
||||
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||
for(unsigned int i=0;i<=l;i++)
|
||||
if(changed[i]==changedCounter)
|
||||
updateI(i,thisValue);
|
||||
for(unsigned int j=1;j<=m;j++)
|
||||
if( changed[get_al(j)]==changedCounter )
|
||||
updateJ(j, 1,thisValue);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(modelnr==3);
|
||||
set(_j, _i);
|
||||
changedCounter++;
|
||||
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||
updateI(old_i,thisValue);
|
||||
changed[old_i]=changedCounter;
|
||||
updateI(_i,thisValue);
|
||||
changed[_i]=changedCounter;
|
||||
for(WordIndex j=1;j<=m;j++)
|
||||
if( get_al(j)==_i || get_al(j)==old_i )
|
||||
updateJ(j, 1,thisValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
template<class TRANSPAIR>
|
||||
void MoveSwapMatrix<TRANSPAIR>::doSwap(WordIndex _j1, WordIndex _j2)
|
||||
{
|
||||
assert( cswap(_j1, _j2)>1 );
|
||||
WordIndex i1=get_al(_j1), i2=get_al(_j2);
|
||||
if( lazyEvaluation==1 )
|
||||
{
|
||||
set(_j1, i2);
|
||||
set(_j2, i1);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( modelnr==5||modelnr==6 )
|
||||
{
|
||||
set(_j1, i2);
|
||||
set(_j2, i1);
|
||||
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
|
||||
}
|
||||
else if( modelnr==4 )
|
||||
{
|
||||
changedCounter++;
|
||||
for(unsigned int k=prev_cept(i1);k<=next_cept(i1);++k)changed[k]=changedCounter;
|
||||
for(unsigned int k=prev_cept(i2);k<=next_cept(i2);++k)changed[k]=changedCounter;
|
||||
set(_j1, i2);
|
||||
set(_j2, i1);
|
||||
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||
for(unsigned int i=0;i<=l;i++)
|
||||
if(changed[i]==changedCounter)
|
||||
updateI(i,thisValue);
|
||||
for(unsigned int j=1;j<=m;j++)
|
||||
if( changed[get_al(j)]==changedCounter )
|
||||
updateJ(j, 1,thisValue);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(modelnr==3);
|
||||
set(_j1, i2);
|
||||
set(_j2, i1);
|
||||
changedCounter++;
|
||||
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
||||
updateI(i1,thisValue);
|
||||
changed[i1]=changedCounter;
|
||||
updateI(i2,thisValue);
|
||||
changed[i2]=changedCounter;
|
||||
updateJ(_j1, 1,thisValue);
|
||||
updateJ(_j2, 1,thisValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#include "transpair_model3.h"
|
||||
#include "transpair_model4.h"
|
||||
#include "transpair_model5.h"
|
||||
#include "transpair_modelhmm.h"
|
||||
template class MoveSwapMatrix<transpair_model3>;
|
||||
template class MoveSwapMatrix<transpair_model4>;
|
||||
template class MoveSwapMatrix<transpair_model5>;
|
||||
template class MoveSwapMatrix<transpair_modelhmm>;
|
@ -0,0 +1,162 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/*--
|
||||
MoveSwapMatrix: Efficient representation for moving and swapping
|
||||
around in IBM3 training.
|
||||
Franz Josef Och (30/07/99)
|
||||
--*/
|
||||
#ifndef moveswap2_costs_h_defined
|
||||
#define moveswap2_costs_h_defined
|
||||
#include "alignment.h"
|
||||
#include "transpair_model3.h"
|
||||
#include "myassert.h"
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
extern short DoViterbiTraining;
|
||||
|
||||
template<class TRANSPAIR>
|
||||
class MoveSwapMatrix: public alignment {
|
||||
private:
|
||||
const TRANSPAIR&ef;
|
||||
const WordIndex l, m;
|
||||
Array2<LogProb, Vector<LogProb> > _cmove, _cswap;
|
||||
Array2<char, Vector<char> > delmove, delswap;
|
||||
Vector<int> changed;
|
||||
int changedCounter;
|
||||
const int modelnr;
|
||||
bool lazyEvaluation;
|
||||
bool centerDeleted;
|
||||
std::map<int,std::set<int> >untouch_i; // target words that should not be aligned anywhere
|
||||
std::map<int,std::set<int> > untouch_j;
|
||||
public:
|
||||
void addUnTouchI(int i, int j){
|
||||
if(i>0){
|
||||
if(untouch_i.find(i)==untouch_i.end()){
|
||||
untouch_i[i] = std::set<int>();
|
||||
}
|
||||
untouch_i[i].insert(j);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void addUnTouchJ(int j,int i){
|
||||
if(j>0){
|
||||
if(untouch_j.find(j)==untouch_j.end()){
|
||||
untouch_j[j] = std::set<int>();
|
||||
}
|
||||
untouch_j[j].insert(i);
|
||||
}
|
||||
}
|
||||
|
||||
bool check() const {
|
||||
return 1;
|
||||
}
|
||||
const TRANSPAIR&get_ef() const {
|
||||
return ef;
|
||||
}
|
||||
bool isCenterDeleted() const {
|
||||
return centerDeleted;
|
||||
}
|
||||
bool isLazy() const {
|
||||
return lazyEvaluation;
|
||||
}
|
||||
MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a);
|
||||
void updateJ(WordIndex j, bool, double thisValue);
|
||||
void updateI(WordIndex i, double thisValue);
|
||||
void doMove(WordIndex _i, WordIndex _j);
|
||||
void doSwap(WordIndex _j1, WordIndex _j2);
|
||||
void delCenter() {
|
||||
centerDeleted = 1;
|
||||
}
|
||||
void delMove(WordIndex x, WordIndex y) {
|
||||
delmove(x, y) = 1;
|
||||
}
|
||||
void delSwap(WordIndex x, WordIndex y) {
|
||||
massert(y>x);
|
||||
delswap(x, y) = 1;
|
||||
delswap(y, x) = 1;
|
||||
}
|
||||
bool isDelMove(WordIndex x, WordIndex y) const {
|
||||
return DoViterbiTraining || delmove(x, y);
|
||||
}
|
||||
bool isDelSwap(WordIndex x, WordIndex y) const {
|
||||
massert(y>x);
|
||||
return DoViterbiTraining || delswap(x, y);
|
||||
}
|
||||
LogProb cmove(WordIndex x, WordIndex y) const {
|
||||
massert( get_al(y)!=x );
|
||||
massert( delmove(x,y)==0 );
|
||||
if (lazyEvaluation)
|
||||
return ef.scoreOfMove(*this, x, y);
|
||||
else {
|
||||
std::map<int, std::set<int> >::const_iterator it;
|
||||
|
||||
it = untouch_i.find(x);
|
||||
if(it!=untouch_i.end()){
|
||||
// Return -1 if the j jump set is not within the limit
|
||||
if(it->second.find(y) == it->second.end()) //Not in the feasible set
|
||||
return -1;
|
||||
}
|
||||
it = untouch_j.find(y);
|
||||
if(it!=untouch_j.end()){
|
||||
if(it->second.find(x) == it->second.end()) //Not in the feasible set
|
||||
return -1;
|
||||
}
|
||||
return _cmove(x, y);
|
||||
}
|
||||
}
|
||||
LogProb cswap(WordIndex x, WordIndex y) const {
|
||||
massert(x<y);
|
||||
massert(delswap(x,y)==0);
|
||||
massert(get_al(x)!=get_al(y));
|
||||
if (lazyEvaluation)
|
||||
return ef.scoreOfSwap(*this, x, y);
|
||||
else {
|
||||
massert(y>x);
|
||||
std::map<int, std::set<int> >::const_iterator it1,it2;
|
||||
it1 =untouch_j.find(y);
|
||||
it2 = untouch_j.find(x);
|
||||
int nal1 = get_al(y);
|
||||
int nal2 = get_al(x); // Need to test if nal1 is in it2's feasible set
|
||||
// and vice versa
|
||||
|
||||
if(it1!=untouch_j.end()&&it1->second.find(nal2)==it1->second.end()){
|
||||
return -1;
|
||||
}
|
||||
if(it2!=untouch_j.end()&&it2->second.find(nal1)==it2->second.end()){
|
||||
return -1;
|
||||
}
|
||||
// Make sure we never swap these
|
||||
return _cswap(x, y);
|
||||
}
|
||||
}
|
||||
void printWrongs() const;
|
||||
bool isRight() const;
|
||||
friend ostream&operator<<(ostream&out, const MoveSwapMatrix<TRANSPAIR>&m) {
|
||||
return out << (alignment) m << "\nEF:\n" << m.ef << "\nCMOVE\n"
|
||||
<< m._cmove << "\nCSWAP\n" << m._cswap << endl;
|
||||
}
|
||||
;
|
||||
};
|
||||
#endif
|
@ -0,0 +1,184 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#include "NTables.h"
|
||||
#include <iostream>
|
||||
#include "defs.h"
|
||||
#include <fstream>
|
||||
#include "Parameter.h"
|
||||
|
||||
GLOBAL_PARAMETER(double,NTablesFactorGraphemes,"nSmooth","smoothing for fertility parameters (good value: 64): weight for wordlength-dependent fertility parameters",PARLEV_SMOOTH,64.0);
|
||||
GLOBAL_PARAMETER(double,NTablesFactorGeneral,"nSmoothGeneral","smoothing for fertility parameters (default: 0): weight for word-independent fertility parameters",PARLEV_SMOOTH,0.0);
|
||||
|
||||
template <class VALTYPE>
|
||||
void nmodel<VALTYPE>::printNTable(int noEW, const char* filename,
|
||||
const Vector<WordEntry>& evlist,
|
||||
bool actual) const
|
||||
// prints the fertility table but with actual sourcce words (not their id)
|
||||
{
|
||||
cerr << "Dumping nTable to: " << filename << '\n';
|
||||
ofstream of(filename);
|
||||
VALTYPE p ;
|
||||
WordIndex k, i ;
|
||||
for(i=1; int(i) < noEW; i++){
|
||||
if (evlist[i].freq > 0){
|
||||
if (actual)
|
||||
of << evlist[i].word << ' ' ;
|
||||
else
|
||||
of << i << ' ' ;
|
||||
for( k=0; k < MAX_FERTILITY; k++){
|
||||
p = getValue(i, k);
|
||||
if (p <= PROB_SMOOTH)
|
||||
p = 0;
|
||||
of << p << ' ';
|
||||
}
|
||||
of << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class VALTYPE>
|
||||
void nmodel<VALTYPE>::printRealNTable(int noEW, const char* filename,
|
||||
const Vector<WordEntry>& evlist,
|
||||
bool actual) const
|
||||
// prints the fertility table but with actual sourcce words (not their id)
|
||||
{
|
||||
cerr << "Dumping nTable to: " << filename << '\n';
|
||||
ofstream of(filename);
|
||||
VALTYPE p ;
|
||||
WordIndex k, i ;
|
||||
for(i=1; int(i) < noEW; i++){
|
||||
if (evlist[i].freq > 0){
|
||||
if (actual)
|
||||
of << evlist[i].word << ' ' ;
|
||||
else
|
||||
of << i << ' ' ;
|
||||
for( k=0; k < MAX_FERTILITY; k++){
|
||||
p = getValue(i, k);
|
||||
// if (p <= PROB_SMOOTH)
|
||||
// p = 0;
|
||||
of << p << ' ';
|
||||
}
|
||||
of << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class VALTYPE>
|
||||
bool nmodel<VALTYPE>::readNTable(const char *filename){
|
||||
/* This function reads the n table from a file.
|
||||
Each line is of the format: source_word_id p0 p1 p2 ... pn
|
||||
This is the inverse operation of the printTable function.
|
||||
NAS, 7/11/99
|
||||
*/
|
||||
ifstream inf(filename);
|
||||
if(!inf.is_open()){
|
||||
return false;
|
||||
}
|
||||
cerr << "Reading fertility table from " << filename << "\n";
|
||||
if(!inf){
|
||||
cerr << "\nERROR: Cannot open " << filename <<"\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
VALTYPE prob;
|
||||
WordIndex tok, i;
|
||||
int nFert=0;
|
||||
while(!inf.eof()){
|
||||
nFert++;
|
||||
inf >> ws >> tok;
|
||||
if (tok > MAX_VOCAB_SIZE){
|
||||
cerr << "NTables:readNTable(): unrecognized token id: " << tok
|
||||
<<'\n';
|
||||
exit(-1);
|
||||
}
|
||||
for(i = 0; i < MAX_FERTILITY; i++){
|
||||
inf >> ws >> prob;
|
||||
getRef(tok, i)=prob;
|
||||
}
|
||||
}
|
||||
cerr << "Read " << nFert << " entries in fertility table.\n";
|
||||
inf.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class VALTYPE>
|
||||
bool nmodel<VALTYPE>::merge(nmodel<VALTYPE>& n,int noEW, const Vector<WordEntry>& evlist){
|
||||
/* This function reads the n table from a file.
|
||||
Each line is of the format: source_word_id p0 p1 p2 ... pn
|
||||
This is the inverse operation of the printTable function.
|
||||
NAS, 7/11/99
|
||||
*/
|
||||
|
||||
|
||||
VALTYPE p ;
|
||||
WordIndex k, i ;
|
||||
for(i=1; int(i) < noEW; i++){
|
||||
if (evlist[i].freq > 0){
|
||||
for( k=0; k < MAX_FERTILITY; k++){
|
||||
p = n.getValue(i, k);
|
||||
getRef(i,k)+=p;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class VALTYPE>
|
||||
bool nmodel<VALTYPE>::readAugNTable(const char *filename){
|
||||
/* This function reads the n table from a file.
|
||||
Each line is of the format: source_word_id p0 p1 p2 ... pn
|
||||
This is the inverse operation of the printTable function.
|
||||
NAS, 7/11/99
|
||||
*/
|
||||
ifstream inf(filename);
|
||||
if(!inf.is_open()){
|
||||
return false;
|
||||
}
|
||||
cerr << "Reading fertility table from " << filename << "\n";
|
||||
if(!inf){
|
||||
cerr << "\nERROR: Cannot open " << filename <<"\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
VALTYPE prob;
|
||||
WordIndex tok, i;
|
||||
int nFert=0;
|
||||
while(!inf.eof()){
|
||||
nFert++;
|
||||
inf >> ws >> tok;
|
||||
if (tok > MAX_VOCAB_SIZE){
|
||||
cerr << "NTables:readNTable(): unrecognized token id: " << tok
|
||||
<<'\n';
|
||||
exit(-1);
|
||||
}
|
||||
for(i = 0; i < MAX_FERTILITY; i++){
|
||||
inf >> ws >> prob;
|
||||
getRef(tok, i)+=prob;
|
||||
}
|
||||
}
|
||||
cerr << "Read " << nFert << " entries in fertility table.\n";
|
||||
inf.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
template class nmodel<COUNT>;
|
||||
//template class nmodel<PROB>;
|
@ -0,0 +1,145 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef _ntables_h
|
||||
#define _ntables_h 1
|
||||
#include "Array2.h"
|
||||
#include "Vector.h"
|
||||
#include <cassert>
|
||||
#include "defs.h"
|
||||
#include "vocab.h"
|
||||
#include "myassert.h"
|
||||
#include "Globals.h"
|
||||
#include "syncObj.h"
|
||||
|
||||
extern double NTablesFactorGraphemes, NTablesFactorGeneral;
|
||||
|
||||
template<class VALTYPE> class nmodel {
|
||||
private:
|
||||
Array2<VALTYPE, Vector<VALTYPE> > ntab;
|
||||
public:
|
||||
nmodel(int maxw, int maxn) :
|
||||
ntab(maxw, maxn, 0.0) {
|
||||
}
|
||||
VALTYPE getValue(int w, unsigned int n) const {
|
||||
massert(w!=0);
|
||||
if (n>=ntab.getLen2())
|
||||
return 0.0;
|
||||
else
|
||||
return max(ntab(w, n), VALTYPE(PROB_SMOOTH));
|
||||
}
|
||||
protected:
|
||||
inline VALTYPE&getRef(int w, int n) {
|
||||
//massert(w!=0);
|
||||
return ntab(w, n);
|
||||
};
|
||||
Mutex lock;
|
||||
public:
|
||||
inline void addValue(int w , int n,const VALTYPE& t){lock.lock();ntab(w,n)+=t;lock.unlock();};
|
||||
public:
|
||||
template<class COUNT> void normalize(nmodel<COUNT>&write,
|
||||
const Vector<WordEntry>* _evlist) const {
|
||||
int h1=ntab.getLen1(), h2=ntab.getLen2();
|
||||
int nParams=0;
|
||||
if (_evlist&&(NTablesFactorGraphemes||NTablesFactorGeneral)) {
|
||||
size_t maxlen=0;
|
||||
const Vector<WordEntry>&evlist=*_evlist;
|
||||
for (unsigned int i=1; i<evlist.size(); i++)
|
||||
maxlen=max(maxlen, evlist[i].word.length());
|
||||
Array2<COUNT,Vector<COUNT> > counts(maxlen+1, MAX_FERTILITY+1, 0.0);
|
||||
Vector<COUNT> nprob_general(MAX_FERTILITY+1,0.0);
|
||||
for (unsigned int i=1; i<min((unsigned int)h1,
|
||||
(unsigned int)evlist.size()); i++) {
|
||||
int l=evlist[i].word.length();
|
||||
for (int k=0; k<h2; k++) {
|
||||
counts(l, k)+=getValue(i, k);
|
||||
nprob_general[k]+=getValue(i, k);
|
||||
}
|
||||
}
|
||||
COUNT sum2=0;
|
||||
for (unsigned int i=1; i<maxlen+1; i++) {
|
||||
COUNT sum=0.0;
|
||||
for (int k=0; k<h2; k++)
|
||||
sum+=counts(i, k);
|
||||
sum2+=sum;
|
||||
if (sum) {
|
||||
double average=0.0;
|
||||
//cerr << "l: " << i << " " << sum << " ";
|
||||
for (int k=0; k<h2; k++) {
|
||||
counts(i, k)/=sum;
|
||||
//cerr << counts(i,k) << ' ';
|
||||
average+=k*counts(i, k);
|
||||
}
|
||||
//cerr << "avg: " << average << endl;
|
||||
//cerr << '\n';
|
||||
}
|
||||
}
|
||||
for (unsigned int k=0; k<nprob_general.size(); k++)
|
||||
nprob_general[k]/=sum2;
|
||||
|
||||
for (int i=1; i<h1; i++) {
|
||||
int l=-1;
|
||||
if ((unsigned int)i<evlist.size())
|
||||
l=evlist[i].word.length();
|
||||
COUNT sum=0.0;
|
||||
for (int k=0; k<h2; k++)
|
||||
sum+=getValue(i, k)+((l==-1) ? 0.0 : (counts(l, k)
|
||||
*NTablesFactorGraphemes)) + NTablesFactorGeneral
|
||||
*nprob_general[k];
|
||||
assert(sum);
|
||||
for (int k=0; k<h2; k++) {
|
||||
write.getRef(i, k)=(getValue(i, k)+((l==-1) ? 0.0
|
||||
: (counts(l, k)*NTablesFactorGraphemes)))/sum
|
||||
+ NTablesFactorGeneral*nprob_general[k];
|
||||
nParams++;
|
||||
}
|
||||
}
|
||||
} else
|
||||
for (int i=1; i<h1; i++) {
|
||||
COUNT sum=0.0;
|
||||
for (int k=0; k<h2; k++)
|
||||
sum+=getValue(i, k);
|
||||
assert(sum);
|
||||
for (int k=0; k<h2; k++) {
|
||||
write.getRef(i, k)=getValue(i, k)/sum;
|
||||
nParams++;
|
||||
}
|
||||
}
|
||||
cerr << "NTable contains " << nParams << " parameter.\n";
|
||||
}
|
||||
|
||||
bool merge(nmodel<VALTYPE>& n, int noEW, const Vector<WordEntry>& evlist);
|
||||
void clear() {
|
||||
int h1=ntab.getLen1(), h2=ntab.getLen2();
|
||||
for (int i=0; i<h1; i++)
|
||||
for (int k=0; k<h2; k++)
|
||||
ntab(i, k)=0;
|
||||
}
|
||||
void printNTable(int noEW, const char* filename,
|
||||
const Vector<WordEntry>& evlist, bool) const;
|
||||
void printRealNTable(int noEW, const char* filename,
|
||||
const Vector<WordEntry>& evlist, bool) const;
|
||||
bool readAugNTable(const char *filename);
|
||||
bool readNTable(const char *filename);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,144 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#include "Parameter.h"
|
||||
#include "fstream"
|
||||
#include "unistd.h"
|
||||
#include <strstream>
|
||||
|
||||
|
||||
bool absolutePathNames=0;
|
||||
string ParameterPathPrefix;
|
||||
bool ParameterChangedFlag=0;
|
||||
|
||||
bool writeParameters(ofstream&of,const ParSet&parset,int level)
|
||||
{
|
||||
if(!of)return 0;
|
||||
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
|
||||
{
|
||||
if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
|
||||
{
|
||||
ostrstream os;
|
||||
(*i)->printValue(os);
|
||||
os << ends;
|
||||
string s(os.str());
|
||||
of << (*i)->getString() << " ";
|
||||
if( absolutePathNames&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
|
||||
{
|
||||
char path[1024];
|
||||
getcwd(path,1024);
|
||||
of << path << '/';
|
||||
}
|
||||
if( ParameterPathPrefix.length()&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
|
||||
of << ParameterPathPrefix << '/';
|
||||
(*i)->printValue(of);
|
||||
of << endl;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool readParameters(ifstream&f,const ParSet&parset,int verb,int level)
|
||||
{
|
||||
string s;
|
||||
if(!f)return 0;
|
||||
while(getline(f,s))
|
||||
{
|
||||
istrstream eingabe(s.c_str());
|
||||
string s1,s2;
|
||||
eingabe>>s1>>s2;
|
||||
if(makeSetCommand(s1,s2,parset,verb,level)==0)
|
||||
cerr << "ERROR: could not set: (C) " << s1 << " " << s2 << endl;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
bool makeSetCommand(string _s1,string s2,const ParSet&parset,int verb,int level)
|
||||
{
|
||||
ParPtr anf;
|
||||
int anfset=0;
|
||||
string s1=simpleString(_s1);
|
||||
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
|
||||
{
|
||||
if( *(*i)==s1 )
|
||||
{
|
||||
if( level==-1 || level==(*i)->getLevel() )
|
||||
(*i)->setParameter(s2,verb);
|
||||
else if(verb>1)
|
||||
cerr << "ERROR: Could not set: (A) " << s1 << " " << s2 << " " << level << " " << (*i)->getLevel() << endl;
|
||||
return 1;
|
||||
}
|
||||
else if( (*i)->getString().substr(0,s1.length())==s1 )
|
||||
{
|
||||
anf=(*i);anfset++;
|
||||
}
|
||||
}
|
||||
if(anfset==1)
|
||||
{
|
||||
if( level==-1 || level==anf->getLevel() )
|
||||
anf->setParameter(s2,verb);
|
||||
else if( verb>1 )
|
||||
cerr << "ERROR: Could not set: (B) " << s1 << " " << s2 << " " << level << " " << anf->getLevel() << endl;
|
||||
return 1;
|
||||
}
|
||||
if( anfset>1 )
|
||||
cerr << "ERROR: ambiguous parameter '" << s1 << "'.\n";
|
||||
if( anfset==0 )
|
||||
cerr << "ERROR: parameter '" << s1 << "' does not exist.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
ostream& printPars(ostream&of,const ParSet&parset,int level)
|
||||
{
|
||||
if(!of)return of;
|
||||
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
|
||||
{
|
||||
if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
|
||||
{
|
||||
(*i)->printAt(of);
|
||||
of << endl;
|
||||
}
|
||||
}
|
||||
return of;
|
||||
}
|
||||
|
||||
string simpleString(const string s)
|
||||
{
|
||||
string k;
|
||||
for(unsigned int i=0;i<s.length();++i)
|
||||
{
|
||||
char c[2];
|
||||
c[0]=tolower(s[i]);
|
||||
c[1]=0;
|
||||
if( (c[0]>='a'&&c[0]<='z')||(c[0]>='0'&&c[0]<='9') )
|
||||
k += c;
|
||||
}
|
||||
return k;
|
||||
}
|
||||
|
||||
|
||||
ParSet&getGlobalParSet()
|
||||
{
|
||||
static ParSet x;
|
||||
return x;
|
||||
}
|
@ -0,0 +1,200 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef PARAMETER_H_DEFINED
|
||||
#define PARAMETER_H_DEFINED
|
||||
|
||||
#include "mystl.h"
|
||||
#include <set>
|
||||
#include "Pointer.h"
|
||||
#include <string>
|
||||
#include "Globals.h"
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
|
||||
inline unsigned int mConvert(const string&s,unsigned int &i)
|
||||
{
|
||||
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1; }
|
||||
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
|
||||
return i=atoi(s.c_str());
|
||||
}
|
||||
inline int mConvert(const string&s,int &i){
|
||||
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1;}
|
||||
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
|
||||
return i=atoi(s.c_str());
|
||||
}
|
||||
inline double mConvert(const string&s,double &d) { return d=atof(s.c_str()); }
|
||||
inline double mConvert(const string&s,float &d) { return d=atof(s.c_str()); }
|
||||
inline string mConvert(const string&s,string&n) { return n=s; }
|
||||
inline bool mConvert(const string&s,bool&n) {
|
||||
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
|
||||
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
|
||||
return n=atoi(s.c_str());
|
||||
}
|
||||
inline short mConvert(const string&s,short&n) {
|
||||
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
|
||||
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
|
||||
return n=atoi(s.c_str());
|
||||
}
|
||||
inline unsigned short mConvert(const string&s,unsigned short&n) {
|
||||
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
|
||||
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
|
||||
return n=atoi(s.c_str());
|
||||
}
|
||||
|
||||
string simpleString(const string s);
|
||||
|
||||
inline int Hashstring(const string& s)
|
||||
{
|
||||
int sum=0;
|
||||
string::const_iterator i=s.begin(),end=s.end();
|
||||
for(;i!=end;i++)sum=5*sum+(*i);
|
||||
return sum;
|
||||
}
|
||||
|
||||
class _Parameter
|
||||
{
|
||||
protected:
|
||||
string name;
|
||||
bool *ifChanged;
|
||||
string description;
|
||||
int level;
|
||||
bool filename;
|
||||
public:
|
||||
int onlyCopy;
|
||||
_Parameter(string n,bool&b,string desc,int _level,bool _onlyCopy)
|
||||
: name(simpleString(n)),ifChanged(&b),description(desc),level(_level),filename(0),onlyCopy(_onlyCopy) {}
|
||||
virtual ~_Parameter(){};
|
||||
bool operator==(const string&s)const
|
||||
{ return name== simpleString(s); }
|
||||
void setChanged()
|
||||
{ *ifChanged=true; }
|
||||
virtual bool setParameter(string s2,int)=0;
|
||||
virtual ostream&printAt(ostream&out)=0;
|
||||
virtual ostream&printValue(ostream&out)=0;
|
||||
const string&getString() const { return name; }
|
||||
int getLevel() const { return level;}
|
||||
bool isFilename() { return filename;}
|
||||
void setFilename(bool x=1) { filename=x;}
|
||||
friend bool operator==(const _Parameter&a,const _Parameter&b)
|
||||
{ return a.name==b.name; }
|
||||
friend bool operator<(const _Parameter&a,const _Parameter&b)
|
||||
{ return a.name<b.name; }
|
||||
friend int Hash(const _Parameter&aaa)
|
||||
{ return Hashstring(aaa.name); }
|
||||
friend ostream&operator<<(ostream&out,const _Parameter&p)
|
||||
{ return out<<"Parameter: "<<p.name <<endl;}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
class Parameter : public _Parameter
|
||||
{
|
||||
private:
|
||||
T*t;
|
||||
public:
|
||||
Parameter(string n,bool&b,string desc,T&_t,int level=0,bool onlyCopy=0)
|
||||
: _Parameter(n,b,desc,level,onlyCopy),t(&_t) {}
|
||||
virtual ~Parameter(){}
|
||||
virtual bool setParameter(string s2,int verb)
|
||||
{
|
||||
T x;
|
||||
if( !(*t==mConvert(s2,x)))
|
||||
{
|
||||
bool printedFirst=0;
|
||||
if( verb>1 )
|
||||
{
|
||||
cout << "Parameter '"<<name <<"' changed from '"<<*t<<"' to '";
|
||||
printedFirst=1;
|
||||
}
|
||||
mConvert(s2,*t);
|
||||
if( printedFirst )
|
||||
cout << *t <<"'\n";
|
||||
setChanged();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
virtual ostream&printAt(ostream&out)
|
||||
{return out << name << " = " << *t << " (" << description << ")";}
|
||||
virtual ostream&printValue(ostream&out)
|
||||
{return out << *t;}
|
||||
};
|
||||
|
||||
typedef MP<_Parameter> ParPtr;
|
||||
|
||||
class ParSet : public set<ParPtr>
|
||||
{
|
||||
public:
|
||||
void insert(const ParPtr&x)
|
||||
{
|
||||
if( count(x)!=0 )
|
||||
cerr << "ERROR: element " << x->getString() << " already inserted.\n";
|
||||
set<ParPtr>::insert(x);
|
||||
}
|
||||
};
|
||||
|
||||
bool makeSetCommand(string s1,string s2,const ParSet&pars,int verb=1,int level= -1);
|
||||
ostream&printPars(ostream&out,const ParSet&pars,int level=-1);
|
||||
bool writeParameters(ofstream&of,const ParSet&parset,int level=0);
|
||||
bool readParameters(ifstream&f,const ParSet&parset,int verb=2,int level=0);
|
||||
ParSet&getGlobalParSet();
|
||||
extern bool ParameterChangedFlag;
|
||||
template<class T>const T&addGlobalParameter(const char *name,const char *description,int level,T*adr,const T&init)
|
||||
{
|
||||
*adr=init;
|
||||
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
||||
return init;
|
||||
}
|
||||
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *description,int level,T*adr,const T&init)
|
||||
{
|
||||
*adr=init;
|
||||
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
||||
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
|
||||
return init;
|
||||
}
|
||||
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *description,int level,T*adr,const T&init)
|
||||
{
|
||||
*adr=init;
|
||||
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
||||
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
|
||||
getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
|
||||
return init;
|
||||
}
|
||||
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *name4,const char *description,int level,T*adr,const T&init)
|
||||
{
|
||||
*adr=init;
|
||||
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
||||
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
|
||||
getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
|
||||
getGlobalParSet().insert(new Parameter<T>(name4,ParameterChangedFlag,description,*adr,-1));
|
||||
return init;
|
||||
}
|
||||
void MakeParameterOptimizing(istream&file,string resultingParameters);
|
||||
|
||||
#define GLOBAL_PARAMETER(TYP,VARNAME,NAME,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
||||
#define GLOBAL_PARAMETER2(TYP,VARNAME,NAME,NAME2,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
||||
#define GLOBAL_PARAMETER3(TYP,VARNAME,NAME,NAME2,NAME3,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
||||
#define GLOBAL_PARAMETER4(TYP,VARNAME,NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
||||
|
||||
void setParameterLevelName(unsigned int i,string x);
|
||||
|
||||
#endif
|
@ -0,0 +1,42 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/* Perplexity.cc
|
||||
* =============
|
||||
* Mike Jahr, 7/21/99
|
||||
* Machine Translation group, WS99
|
||||
* Center for Language and Speech Processing
|
||||
*
|
||||
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
|
||||
*
|
||||
* Simple class used to calculate cross entropy and perplexity
|
||||
* of models.
|
||||
*/
|
||||
|
||||
#include "Perplexity.h"
|
||||
|
||||
void Perplexity::record(string model){
|
||||
mutex.lock();
|
||||
modelid.push_back(model);
|
||||
perp.push_back(perplexity());
|
||||
ce.push_back(cross_entropy());
|
||||
mutex.unlock();
|
||||
}
|
@ -0,0 +1,115 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/* Perplexity.h
|
||||
* ============
|
||||
* Mike Jahr, 7/15/99
|
||||
* Machine Translation group, WS99
|
||||
* Center for Language and Speech Processing
|
||||
*
|
||||
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
|
||||
*
|
||||
* Simple class used to calculate cross entropy and perplexity
|
||||
* of models.
|
||||
*/
|
||||
|
||||
#ifndef _PERPLEXITY_H
|
||||
#define _PERPLEXITY_H
|
||||
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include "Vector.h"
|
||||
#include "defs.h"
|
||||
#include "Array2.h"
|
||||
#include "Globals.h"
|
||||
#include "syncObj.h"
|
||||
|
||||
#define CROSS_ENTROPY_BASE 2
|
||||
|
||||
class Perplexity {
|
||||
private:
|
||||
double sum;
|
||||
double wc;
|
||||
Array2<double, Vector<double> > *E_M_L;
|
||||
Vector<string> modelid;
|
||||
Vector<double > perp;
|
||||
Vector<double > ce;
|
||||
Vector<string> name ;
|
||||
Mutex mutex;
|
||||
public:
|
||||
~Perplexity() { delete E_M_L;}
|
||||
Perplexity() {
|
||||
E_M_L = new Array2<double, Vector<double> >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH);
|
||||
unsigned int l, m ;
|
||||
Vector<double> fact(MAX_SENTENCE_LENGTH, 1.0);
|
||||
for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++)
|
||||
fact[m] = fact[m-1] * m ;
|
||||
for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++)
|
||||
for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) {
|
||||
(*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) /
|
||||
(fact[m])) ;
|
||||
}
|
||||
sum = 0 ;
|
||||
wc = 0;
|
||||
perp.clear();
|
||||
ce.clear();
|
||||
name.clear();
|
||||
}
|
||||
inline void clear() {
|
||||
mutex.lock();
|
||||
sum = 0 ;
|
||||
wc = 0 ;
|
||||
mutex.unlock();
|
||||
}
|
||||
size_t size() const {return(min(perp.size(), ce.size()));}
|
||||
inline void addFactor(const double p, const double count, const int l,
|
||||
const int m,bool withPoisson) {
|
||||
mutex.lock();
|
||||
wc += count * m ; // number of french words
|
||||
sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ;
|
||||
mutex.unlock();
|
||||
}
|
||||
|
||||
inline double perplexity() const {
|
||||
return exp( -1*sum / wc);
|
||||
}
|
||||
|
||||
inline double cross_entropy() const {
|
||||
return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc));
|
||||
}
|
||||
|
||||
inline double word_count() const {
|
||||
return wc;
|
||||
}
|
||||
|
||||
inline double getSum() const {
|
||||
return sum ;
|
||||
}
|
||||
|
||||
void record(string model);
|
||||
|
||||
friend void generatePerplexityReport(const Perplexity&, const Perplexity&,
|
||||
const Perplexity&, const Perplexity&,
|
||||
ostream&, int, int, bool);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,175 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef HEADER_Pointer_DEFINED
|
||||
#define HEADER_Pointer_DEFINED
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
template<class T>
|
||||
class SmartPointer
|
||||
{
|
||||
protected:
|
||||
T*p;
|
||||
public:
|
||||
SmartPointer(T*_p=0)
|
||||
: p(_p) {}
|
||||
inline T&operator*() const
|
||||
{return *p;}
|
||||
inline T*operator->() const
|
||||
{return p;}
|
||||
inline operator bool() const
|
||||
{return p!=0;}
|
||||
inline T*ptr() const
|
||||
{ return p; }
|
||||
};
|
||||
template<class T> inline ostream &operator<<(ostream&out,const SmartPointer<T>&s)
|
||||
{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
|
||||
|
||||
|
||||
template<class T>
|
||||
class SmartPointerConst
|
||||
{
|
||||
protected:
|
||||
const T*p;
|
||||
public:
|
||||
SmartPointerConst(const T*_p=0)
|
||||
: p(_p) {}
|
||||
inline const T&operator*() const
|
||||
{return *p;}
|
||||
inline const T*operator->() const
|
||||
{return p;}
|
||||
inline operator bool() const
|
||||
{return p!=0;}
|
||||
inline const T*ptr() const
|
||||
{ return p; }
|
||||
};
|
||||
template<class T> inline ostream &operator<<(ostream&out,const SmartPointerConst<T>&s)
|
||||
{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
|
||||
|
||||
template <class T>
|
||||
class UP : public SmartPointer<T>
|
||||
{
|
||||
public:
|
||||
UP(T*_p=0)
|
||||
: SmartPointer<T>(_p) {}
|
||||
};
|
||||
template<class T> inline bool operator==(const UP<T>&s1,const UP<T>&s2)
|
||||
{return s1.ptr()==s2.ptr();}
|
||||
template<class T> inline bool operator<(const UP<T>&s1,const UP<T>&s2)
|
||||
{return s1.ptr() < s2.ptr();}
|
||||
template<class T> inline int Hash(const UP<T> &wp)
|
||||
{if(wp.ptr())return Hash(*wp);else return 0;}
|
||||
|
||||
|
||||
template <class T>
|
||||
class UPConst : public SmartPointerConst<T>
|
||||
{
|
||||
public:
|
||||
UPConst(const T*_p=0)
|
||||
: SmartPointerConst<T>(_p) {}
|
||||
};
|
||||
template<class T> inline bool operator==(const UPConst<T>&s1,const UPConst<T>&s2)
|
||||
{return s1.ptr()==s2.ptr();}
|
||||
template<class T> inline bool operator<(const UPConst<T>&s1,const UPConst<T>&s2)
|
||||
{return s1.ptr()<s2.ptr();}
|
||||
template<class T> inline int Hash(const UPConst<T> &wp)
|
||||
{if(wp.ptr())return Hash(*wp);else return 0;}
|
||||
|
||||
|
||||
template <class T>
|
||||
class MP : public SmartPointer<T>
|
||||
{
|
||||
public:
|
||||
MP(T*_p=0)
|
||||
: SmartPointer<T>(_p) {}
|
||||
};
|
||||
template <class T> inline bool operator==(const MP<T>&s1,const MP<T>&s2)
|
||||
{assert(s1);assert(s2);return *s1==*s2;}
|
||||
template <class T> inline bool operator<(const MP<T>&s1,const MP<T>&s2)
|
||||
{assert(s1);assert(s2);return *s1 < *s2;}
|
||||
template <class T> inline int Hash(const MP<T> &wp)
|
||||
{if(wp.ptr())return Hash(*wp);else return 0;}
|
||||
|
||||
|
||||
template <class T>
|
||||
class MPConst : public SmartPointerConst<T>
|
||||
{
|
||||
public:
|
||||
MPConst(const T*_p=0)
|
||||
: SmartPointerConst<T>(_p) {}
|
||||
};
|
||||
template <class T> inline bool operator==(const MPConst<T>&s1,const MPConst<T>&s2)
|
||||
{assert(s1);assert(s2);return *s1== *s2;}
|
||||
template <class T> inline bool operator<(const MPConst<T>&s1,const MPConst<T>&s2)
|
||||
{assert(s1);assert(s2);return *s1 < *s2;}
|
||||
template <class T> inline int Hash(const MPConst<T> &wp)
|
||||
{if(wp.ptr())return Hash(*wp);else return 0;}
|
||||
|
||||
|
||||
template <class T>
|
||||
class DELP : public SmartPointer<T>
|
||||
{
|
||||
private:
|
||||
DELP(const DELP<T>&x);
|
||||
public:
|
||||
const DELP<T>&operator=(DELP<T>&x)
|
||||
{
|
||||
delete this->p;
|
||||
this->p=x.p;x.p=0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
~DELP()
|
||||
{ delete this->p;this->p=0;}
|
||||
DELP(T*_p=0)
|
||||
: SmartPointer<T>(_p) {}
|
||||
void set(T*_p)
|
||||
{
|
||||
delete this->p;
|
||||
this->p=_p;
|
||||
}
|
||||
friend bool operator==(const DELP<T>&s1,const DELP<T>&s2)
|
||||
{
|
||||
return *(s1.p)== *(s2.p);
|
||||
}
|
||||
friend bool operator<(const DELP<T>&s1,const DELP<T>&s2)
|
||||
{
|
||||
return *(s1.p) < *(s2.p);
|
||||
}
|
||||
friend inline int Hash(const DELP<T> &wp)
|
||||
{
|
||||
if(wp.p)
|
||||
return Hash(*wp.p);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,5 @@
|
||||
|
||||
//#include "SetArray.h"
|
||||
|
||||
#include "Parameter.h"
|
||||
|
@ -0,0 +1,159 @@
|
||||
/*
|
||||
Array of set, for fast access of dictionary, and most important,
|
||||
be threadsafe
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __SET_ARRAY_H__
|
||||
#define __SET_ARRAY_H__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "defs.h"
|
||||
#include "vocab.h"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <pthread.h>
|
||||
#include "syncObj.h"
|
||||
|
||||
template <class COUNT, class PROB>
|
||||
class LpPair {
|
||||
public:
|
||||
COUNT count ;
|
||||
PROB prob ;
|
||||
public: // constructor
|
||||
LpPair():count(0), prob(0){} ;
|
||||
LpPair(COUNT c, PROB p):count(c), prob(p){};
|
||||
} ;
|
||||
|
||||
|
||||
|
||||
|
||||
template <class COUNT, class PROB>
|
||||
class SetArray{
|
||||
public:
|
||||
typedef LpPair<COUNT, PROB> CPPair;
|
||||
protected:
|
||||
|
||||
/*Information stores here*/
|
||||
std::vector<std::map<size_t,CPPair> > store;
|
||||
std::vector<Mutex> muts;
|
||||
size_t nEnglishWord;
|
||||
size_t nFrenchWord;
|
||||
void _init(){
|
||||
store.resize(nEnglishWord);
|
||||
muts.resize(nFrenchWord);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*
|
||||
Get reference, not creating
|
||||
*/
|
||||
CPPair* find(size_t fi, size_t si){
|
||||
/*HERE: lock, unlock after we get the pointer*/
|
||||
muts[fi].lock();
|
||||
/* Sync-ed */
|
||||
std::map<size_t,CPPair>& w = store[fi];
|
||||
typename std::map<size_t,CPPair>::iterator it = w.find((size_t)si);
|
||||
CPPair* q = ( it!=store[fi].end() ? &(it->second) : 0);
|
||||
// for(it = w.begin(); it!=w.end();it++){
|
||||
// cout << it->first << endl;
|
||||
// }
|
||||
/* End Synced*/
|
||||
muts[fi].unlock();
|
||||
return q;
|
||||
};
|
||||
|
||||
/*
|
||||
Get reference, creating it
|
||||
*/
|
||||
inline CPPair& findRef(size_t fi, size_t si){
|
||||
std::map<size_t,CPPair> &x = store[fi];
|
||||
muts[fi].lock();
|
||||
/* Sync-ed */
|
||||
CPPair& ref= x[si];
|
||||
/* End Synced */
|
||||
muts[fi].unlock();
|
||||
};
|
||||
|
||||
|
||||
void insert(size_t fi, size_t si, COUNT count = 0, PROB prob = 0){
|
||||
muts[fi].lock();
|
||||
/*Syced*/
|
||||
std::map<size_t,CPPair> &x = store[fi];
|
||||
CPPair& v= x[si];
|
||||
v.count = count;
|
||||
v.prob = prob;
|
||||
muts[fi].unlock();
|
||||
}
|
||||
|
||||
void incCount(size_t e, size_t f, COUNT inc)
|
||||
// increments the count of the given word pair. if the pair does not exist,
|
||||
// it creates it with the given value.
|
||||
{
|
||||
if( inc ){
|
||||
std::map<size_t,CPPair> &x = store[e];
|
||||
muts[e].lock();
|
||||
CPPair& ref= x[f];
|
||||
ref.count += inc;
|
||||
muts[e].unlock();
|
||||
}
|
||||
}
|
||||
|
||||
PROB getProb(size_t e, size_t f) const
|
||||
// read probability value for P(fj/ei) from the hash table
|
||||
// if pair does not exist, return floor value PROB_SMOOTH
|
||||
{
|
||||
muts[e].lock();
|
||||
typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
|
||||
PROB b;
|
||||
if(it == store[e].end())
|
||||
b = PROB_SMOOTH;
|
||||
else
|
||||
b=max((it->second).prob, PROB_SMOOTH);
|
||||
muts[e].unlock();
|
||||
return b;
|
||||
}
|
||||
|
||||
COUNT getCount(size_t e, size_t f) const
|
||||
/* read count value for entry pair (fj/ei) from the hash table */
|
||||
{
|
||||
muts[e].lock();
|
||||
typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
|
||||
COUNT c;
|
||||
if(it == store[e].end())
|
||||
c = 0;
|
||||
else
|
||||
c = ((*it).second).count;
|
||||
muts[e].unlock();
|
||||
}
|
||||
|
||||
void erase(size_t e, size_t f)
|
||||
// In: a source and a target token ids.
|
||||
// removes the entry with that pair from table
|
||||
{
|
||||
muts[e].lock();
|
||||
store[e].erase(f);
|
||||
muts[e].unlock();
|
||||
};
|
||||
|
||||
inline void setNumberOfEnlish(size_t e){nEnglishWord=e;_init();};
|
||||
inline void setNumberOfFrench(size_t f){nFrenchWord = f;};
|
||||
|
||||
const std::map<size_t,CPPair>& getMap(size_t i) const{
|
||||
return store[i];
|
||||
}
|
||||
|
||||
std::map<size_t,CPPair>& getMap1(size_t i){
|
||||
return store[i];
|
||||
}
|
||||
|
||||
SetArray(size_t e, size_t f): nEnglishWord(e), nFrenchWord(f){
|
||||
_init();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,177 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#include "TTables.h"
|
||||
#include "Parameter.h"
|
||||
#include<iostream>
|
||||
#include <fstream>
|
||||
|
||||
GLOBAL_PARAMETER(float,PROB_CUTOFF,"PROB CUTOFF","Probability cutoff threshold for lexicon probabilities",PARLEV_OPTHEUR,1e-7);
|
||||
GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF,"COUNTINCREASE CUTOFF","countCutoff","Counts increment cutoff threshold",PARLEV_OPTHEUR,1e-6);
|
||||
|
||||
|
||||
/* ------------------ Method Definiotns for Class tmodel --------------------*/
|
||||
|
||||
|
||||
// To output to STDOUT, submit filename as NULL
|
||||
template <class COUNT, class PROB>
|
||||
void tmodel<COUNT, PROB>::printCountTable(const char *filename,
|
||||
const Vector<WordEntry>& evlist,
|
||||
const Vector<WordEntry>& fvlist,
|
||||
const bool actual) const
|
||||
{
|
||||
ostream *tof;
|
||||
|
||||
if(filename)
|
||||
tof = new ofstream(filename);
|
||||
else
|
||||
tof = & cout;
|
||||
|
||||
ostream &of = *tof;
|
||||
/* for(unsigned int i=0;i<es.size()-1;++i)
|
||||
for(unsigned int j=es[i];j<es[i+1];++j)
|
||||
{
|
||||
const CPPair&x=fs[j].second;
|
||||
WordIndex e=i,f=fs[j].first;
|
||||
if( actual )
|
||||
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
|
||||
else
|
||||
of << e << ' ' << f << ' ' << x.prob << '\n';
|
||||
}*/
|
||||
for(unsigned int i=0;i<lexmat.size();++i){
|
||||
if( lexmat[i] ){
|
||||
for(unsigned int j=0;j<lexmat[i]->size();++j)
|
||||
{
|
||||
const CPPair&x=(*lexmat[i])[j].second;
|
||||
WordIndex e=i,f=(*lexmat[i])[j].first;
|
||||
if( x.prob>MINCOUNTINCREASE ){
|
||||
if( actual ){
|
||||
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.count << '\n';
|
||||
}else{
|
||||
of << e << ' ' << f << ' ' << x.count << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(filename){
|
||||
((ofstream*)tof)->close();
|
||||
delete tof;
|
||||
}
|
||||
}
|
||||
|
||||
template <class COUNT, class PROB>
|
||||
void tmodel<COUNT, PROB>::printProbTable(const char *filename,
|
||||
const Vector<WordEntry>& evlist,
|
||||
const Vector<WordEntry>& fvlist,
|
||||
const bool actual) const
|
||||
{
|
||||
ofstream of(filename);
|
||||
/* for(unsigned int i=0;i<es.size()-1;++i)
|
||||
for(unsigned int j=es[i];j<es[i+1];++j)
|
||||
{
|
||||
const CPPair&x=fs[j].second;
|
||||
WordIndex e=i,f=fs[j].first;
|
||||
if( actual )
|
||||
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
|
||||
else
|
||||
of << e << ' ' << f << ' ' << x.prob << '\n';
|
||||
}*/
|
||||
for(unsigned int i=0;i<lexmat.size();++i){
|
||||
if( lexmat[i] ){
|
||||
for(unsigned int j=0;j<lexmat[i]->size();++j)
|
||||
{
|
||||
const CPPair&x=(*lexmat[i])[j].second;
|
||||
WordIndex e=i,f=(*lexmat[i])[j].first;
|
||||
if( x.prob>PROB_SMOOTH ){
|
||||
if( actual ){
|
||||
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
|
||||
}else{
|
||||
of << e << ' ' << f << ' ' << x.prob << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class COUNT, class PROB>
|
||||
void tmodel<COUNT, PROB>::printProbTableInverse(const char *,
|
||||
const Vector<WordEntry>&,
|
||||
const Vector<WordEntry>&,
|
||||
const double,
|
||||
const double,
|
||||
const bool ) const
|
||||
{
|
||||
}
|
||||
template <class COUNT, class PROB>
|
||||
void tmodel<COUNT, PROB>::normalizeTable(const vcbList&, const vcbList&, int)
|
||||
{
|
||||
for(unsigned int i=0;i<lexmat.size();++i){
|
||||
double c=0.0;
|
||||
if( lexmat[i] ){
|
||||
unsigned int lSize=lexmat[i]->size();
|
||||
for(unsigned int j=0;j<lSize;++j)
|
||||
c+=(*lexmat[i])[j].second.count;
|
||||
for(unsigned int j=0;j<lSize;++j) {
|
||||
if( c==0 )
|
||||
(*lexmat[i])[j].second.prob=1.0/(lSize);
|
||||
else
|
||||
(*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c;
|
||||
(*lexmat[i])[j].second.count=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class COUNT, class PROB>
|
||||
bool tmodel<COUNT, PROB>::readProbTable(const char *filename){
|
||||
/* This function reads the t table from a file.
|
||||
Each line is of the format: source_word_id target_word_id p(target_word|source_word)
|
||||
This is the inverse operation of the printTable function.
|
||||
NAS, 7/11/99
|
||||
*/
|
||||
ifstream inf(filename);
|
||||
cerr << "Reading t prob. table from " << filename << "\n";
|
||||
if (!inf) {
|
||||
cerr << "\nERROR: Cannot open " << filename << "\n";
|
||||
return false;
|
||||
}
|
||||
WordIndex src_id, trg_id;
|
||||
PROB prob;
|
||||
int nEntry=0;
|
||||
while (inf >> src_id >> trg_id >> prob) {
|
||||
insert(src_id, trg_id, 0.0, prob);
|
||||
nEntry++;
|
||||
}
|
||||
cerr << "Read " << nEntry << " entries in prob. table.\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template class tmodel<COUNT,PROB> ;
|
||||
|
||||
/* ---------------- End of Method Definitions of class tmodel ---------------*/
|
||||
|
||||
|
||||
|
@ -0,0 +1,330 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/* --------------------------------------------------------------------------*
|
||||
* *
|
||||
* Module : TTables *
|
||||
* *
|
||||
* Prototypes File: TTables.h *
|
||||
* *
|
||||
* Objective: Defines clases and methods for handling I/O for Probability & *
|
||||
* Count tables and also alignment tables *
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _ttables_h
|
||||
#define _ttables_h 1
|
||||
|
||||
|
||||
#include "defs.h"
|
||||
#include "vocab.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "Vector.h"
|
||||
#include <utility>
|
||||
#include "syncObj.h"
|
||||
|
||||
#if __GNUC__>2
|
||||
#include <ext/hash_map>
|
||||
using __gnu_cxx::hash_map;
|
||||
#else
|
||||
#include <hash_map>
|
||||
#endif
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "Globals.h"
|
||||
|
||||
|
||||
/* The tables defined in the following classes are defined as hash tables. For
|
||||
example. the t-table is a hash function of a word pair; an alignment is
|
||||
a hash function of a vector of integer numbers (sentence positions) and so
|
||||
on */
|
||||
|
||||
|
||||
/*----------- Defnition of Hash Function for class tmodel ------- -----------*/
|
||||
|
||||
typedef pair<WordIndex, WordIndex> wordPairIds;
|
||||
|
||||
|
||||
class hashpair : public unary_function< pair<WordIndex, WordIndex>, size_t >
|
||||
{
|
||||
public:
|
||||
size_t operator() (const pair<WordIndex, WordIndex>& key) const
|
||||
{
|
||||
return (size_t) MAX_W*key.first + key.second; /* hash function and it
|
||||
is guarnteed to have
|
||||
unique id for each
|
||||
unique pair */
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
/* ------------------ Class Prototype Definitions ---------------------------*
|
||||
Class Name: tmodel
|
||||
Objective: This defines the underlying data structur for t Tables and t
|
||||
Count Tables. They are defined as a hash table. Each entry in the hash table
|
||||
is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The
|
||||
probability and the count are represented as log integer probability as
|
||||
defined by the class LogProb .
|
||||
|
||||
This class is used to represents t Tables (probabiliity) and n (fertility
|
||||
Tables and also their corresponding count tables .
|
||||
|
||||
*---------------------------------------------------------------------------*/
|
||||
|
||||
//typedef float COUNT ;
|
||||
//typedef LogProb PROB ;
|
||||
template <class COUNT, class PROB>
|
||||
class LpPair {
|
||||
public:
|
||||
COUNT count ;
|
||||
PROB prob ;
|
||||
public: // constructor
|
||||
LpPair():count(0), prob(0){} ;
|
||||
LpPair(COUNT c, PROB p):count(c), prob(p){};
|
||||
} ;
|
||||
|
||||
template<class T>
|
||||
T*mbinary_search(T*x,T*y,unsigned int val)
|
||||
{
|
||||
if( y-x==0 )
|
||||
return 0;
|
||||
if( x->first==val)
|
||||
return x;
|
||||
if( y-x<2 )
|
||||
return 0;
|
||||
T*mid=x+(y-x)/2;
|
||||
if( val < mid->first )
|
||||
return mbinary_search(x,mid,val);
|
||||
else
|
||||
return mbinary_search(mid,y,val);
|
||||
|
||||
}
|
||||
|
||||
template<class T>
|
||||
const T*mbinary_search(const T*x,const T*y,unsigned int val)
|
||||
{
|
||||
if( y-x==0 )
|
||||
return 0;
|
||||
if( x->first==val)
|
||||
return x;
|
||||
if( y-x<2 )
|
||||
return 0;
|
||||
const T*mid=x+(y-x)/2;
|
||||
if( val < mid->first )
|
||||
return mbinary_search(x,mid,val);
|
||||
else
|
||||
return mbinary_search(mid,y,val);
|
||||
|
||||
}
|
||||
|
||||
template <class COUNT, class PROB>
|
||||
class tmodel{
|
||||
typedef LpPair<COUNT, PROB> CPPair;
|
||||
public:
|
||||
bool recordDiff;
|
||||
|
||||
public:
|
||||
int noEnglishWords; // total number of unique source words
|
||||
int noFrenchWords; // total number of unique target words
|
||||
//vector<pair<unsigned int,CPPair> > fs;
|
||||
//vector<unsigned int> es;
|
||||
vector< vector<pair<unsigned int,CPPair> >* > lexmat;
|
||||
vector< Mutex > mutex;
|
||||
|
||||
void erase(WordIndex e, WordIndex f){
|
||||
CPPair *p=find(e,f);
|
||||
if(p)
|
||||
*p=CPPair(0,0);
|
||||
};
|
||||
|
||||
CPPair*find(int e,int f){
|
||||
//pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
|
||||
//pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
|
||||
if(e>lexmat.size()||lexmat[e]==NULL){
|
||||
return NULL;
|
||||
}
|
||||
pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
|
||||
pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
|
||||
pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
|
||||
if( x==0 ){
|
||||
//cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
|
||||
//abort();
|
||||
return 0;
|
||||
}
|
||||
return &(x->second);
|
||||
}
|
||||
|
||||
const CPPair*find(int e,int f)const{
|
||||
if(lexmat[e]==0)
|
||||
return 0;
|
||||
const pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
|
||||
const pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
|
||||
//const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
|
||||
//const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
|
||||
const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
|
||||
if( x==0 ){
|
||||
//cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
|
||||
//abort();
|
||||
return 0;
|
||||
}
|
||||
|
||||
return &(x->second);
|
||||
}
|
||||
public:
|
||||
void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
|
||||
CPPair* found = find(e,f);
|
||||
if(found)
|
||||
*found=CPPair(cval,pval);
|
||||
}
|
||||
|
||||
CPPair*getPtr(int e,int f){return find(e,f);}
|
||||
|
||||
tmodel(){};
|
||||
tmodel(const string&fn) {
|
||||
recordDiff = false;
|
||||
int count=0,count2=0;
|
||||
ifstream infile2(fn.c_str());
|
||||
cerr << "Inputfile in " << fn << endl;
|
||||
int e,f,olde=-1,oldf=-1;
|
||||
pair<unsigned int,CPPair> cp;
|
||||
vector< pair<unsigned int,CPPair> > cps;
|
||||
while(infile2>>e>>f){
|
||||
cp.first=f;
|
||||
assert(e>=olde);
|
||||
assert(e>olde ||f>oldf);
|
||||
if( e!=olde&&olde>=0 ){
|
||||
int oldsize=lexmat.size();
|
||||
lexmat.resize(olde+1);
|
||||
for(unsigned int i=oldsize;i<lexmat.size();++i)
|
||||
lexmat[i]=0;
|
||||
lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
|
||||
cps.clear();
|
||||
if( !((*lexmat[olde]).size()==(*lexmat[olde]).capacity()) )
|
||||
cerr << "eRROR: waste of memory: " << (*lexmat[olde]).size() << " " << (*lexmat[olde]).capacity() << endl;
|
||||
count2+=lexmat[olde]->capacity();
|
||||
}
|
||||
cps.push_back(cp);
|
||||
olde=e;
|
||||
oldf=f;
|
||||
count++;
|
||||
}
|
||||
lexmat.resize(olde+1);
|
||||
lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
|
||||
count2+=lexmat[olde]->capacity();
|
||||
cout << "There are " << count << " " << count2 << " entries in table" << '\n';
|
||||
mutex.resize(lexmat.size());
|
||||
/* Create mutex */
|
||||
}
|
||||
|
||||
|
||||
/* tmodel(const string&fn)
|
||||
{
|
||||
size_t count=0;
|
||||
{
|
||||
ifstream infile1(fn.c_str());
|
||||
if( !infile1 )
|
||||
{
|
||||
cerr << "ERROR: can't read coocurrence file " << fn << '\n';
|
||||
abort();
|
||||
}
|
||||
int e,f;
|
||||
while(infile1>>e>>f)
|
||||
count++;
|
||||
}
|
||||
cout << "There are " << count << " entries in table" << '\n';
|
||||
ifstream infile2(fn.c_str());
|
||||
fs.resize(count);
|
||||
int e,f,olde=-1,oldf=-1;
|
||||
pair<unsigned int,CPPair> cp;
|
||||
count=0;
|
||||
while(infile2>>e>>f)
|
||||
{
|
||||
assert(e>=olde);
|
||||
assert(e>olde ||f>oldf);
|
||||
if( e!=olde )
|
||||
{
|
||||
es.resize(e+1);
|
||||
for(unsigned int i=olde+1;int(i)<=e;++i)
|
||||
es[i]=count;
|
||||
}
|
||||
cp.first=f;
|
||||
assert(count<fs.size());
|
||||
fs[count]=cp;
|
||||
//fs.push_back(cp);
|
||||
olde=e;
|
||||
oldf=f;
|
||||
count++;
|
||||
}
|
||||
assert(count==fs.size());
|
||||
es.push_back(fs.size());
|
||||
cout << fs.size() << " " << count << " coocurrences read" << '\n';
|
||||
}*/
|
||||
|
||||
void incCount(WordIndex e, WordIndex f, COUNT inc) {
|
||||
if( inc ){
|
||||
CPPair *p=find(e,f);
|
||||
if( p ){
|
||||
mutex[e].lock();
|
||||
p->count += inc ;
|
||||
mutex[e].unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PROB getProb(WordIndex e, WordIndex f) const{
|
||||
const CPPair *p=find(e,f);
|
||||
if( p )
|
||||
return max(p->prob, PROB_SMOOTH);
|
||||
else
|
||||
return PROB_SMOOTH;
|
||||
}
|
||||
|
||||
COUNT getCount(WordIndex e, WordIndex f) const
|
||||
{
|
||||
const CPPair *p=find(e,f);
|
||||
if( p )
|
||||
return p->count;
|
||||
else
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
|
||||
void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
|
||||
void printProbTableInverse(const char *filename,
|
||||
const Vector<WordEntry>& evlist,
|
||||
const Vector<WordEntry>& fvlist,
|
||||
const double eTotal,
|
||||
const double fTotal,
|
||||
const bool actual = false ) const;
|
||||
void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
|
||||
bool readProbTable(const char *filename);
|
||||
bool readSubSampledProbTable(const char* filename, std::set<WordIndex> &e, std::set<WordIndex> &f);
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,423 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/*--
|
||||
Vector: checked vector implementation
|
||||
|
||||
Franz Josef Och (30/07/99)
|
||||
--*/
|
||||
#ifndef ARRAY_H_DEFINED
|
||||
#define ARRAY_H_DEFINED
|
||||
#include "mystl.h"
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <functional>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
#ifdef NDEBUG
|
||||
|
||||
#include <vector>
|
||||
#define Vector vector
|
||||
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
|
||||
{
|
||||
o << "Vector(" << a.size() << "){ ";
|
||||
for(unsigned int iii=0;iii<a.size();iii++)
|
||||
o << " " << iii<< ": " << a[iii]<<" ;";
|
||||
return o << "}\n";
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define ARRAY_DEBUG
|
||||
#define memo_del(a, b)
|
||||
#define memo_new(a)
|
||||
|
||||
template<class T> class Vector
|
||||
{
|
||||
private:
|
||||
T *p;
|
||||
int realSize;
|
||||
int maxWritten;
|
||||
|
||||
void copy(T *a, const T *b, int n);
|
||||
void copy(T *a, T *b, int n);
|
||||
void _expand();
|
||||
public:
|
||||
Vector()
|
||||
: p(0), realSize(0), maxWritten(-1)
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "MAKE ARRAY: " << this<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
}
|
||||
Vector(const Vector<T> &x)
|
||||
: p(new T[x.maxWritten+1]), realSize(x.maxWritten+1), maxWritten(x.maxWritten)
|
||||
{
|
||||
memo_new(p);
|
||||
copy(p, x.p, realSize);
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< '\n';
|
||||
#endif
|
||||
}
|
||||
explicit Vector(int n)
|
||||
: p(new T[n]), realSize(n), maxWritten(n-1)
|
||||
{
|
||||
memo_new(p);
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
}
|
||||
Vector(int n, const T&_init)
|
||||
: p(new T[n]), realSize(n), maxWritten(n-1)
|
||||
{
|
||||
memo_new(p);
|
||||
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
}
|
||||
|
||||
~Vector()
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
delete [] p;
|
||||
memo_del(p, 1);
|
||||
#ifndef NDEBUG
|
||||
p=0;realSize=-1;maxWritten=-1;
|
||||
#endif
|
||||
}
|
||||
|
||||
Vector<T>& operator=(const Vector<T>&x)
|
||||
{
|
||||
if( this!= &x )
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
delete [] p;
|
||||
memo_del(p, 1);
|
||||
realSize = x.maxWritten+1;
|
||||
maxWritten = x.maxWritten;
|
||||
p = new T[realSize];
|
||||
memo_new(p);
|
||||
copy(p, x.p, realSize);
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector<T>& operator=(Vector<T>&x)
|
||||
{
|
||||
if( this!= &x )
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
delete [] p;
|
||||
memo_del(p, 1);
|
||||
realSize = x.maxWritten+1;
|
||||
maxWritten = x.maxWritten;
|
||||
p = new T[realSize];
|
||||
memo_new(p);
|
||||
copy(p, x.p, realSize);
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void allowAccess(int n)
|
||||
{
|
||||
while( realSize<=n )
|
||||
_expand();
|
||||
maxWritten=max(maxWritten, n);
|
||||
assert( maxWritten<realSize );
|
||||
}
|
||||
void resize(int n)
|
||||
{
|
||||
while( realSize<n )
|
||||
_expand();
|
||||
maxWritten=n-1;
|
||||
}
|
||||
void clear()
|
||||
{
|
||||
resize(0);
|
||||
}
|
||||
void reserve(int n)
|
||||
{
|
||||
int maxOld=maxWritten;
|
||||
resize(n);
|
||||
maxWritten=maxOld;
|
||||
}
|
||||
void sort(int until=-1)
|
||||
{
|
||||
if( until== -1 ) until=size();
|
||||
std::sort(p, p+until);
|
||||
}
|
||||
void invsort(int until=-1)
|
||||
{
|
||||
if( until== -1 ) until=size();
|
||||
std::sort(p, p+until, greater<T>());
|
||||
}
|
||||
void init(int n, const T&_init)
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
delete []p;
|
||||
memo_del(p, 1);
|
||||
p=new T[n];
|
||||
memo_new(p);
|
||||
realSize=n;
|
||||
maxWritten=n-1;
|
||||
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
}
|
||||
inline unsigned int size() const
|
||||
{assert( maxWritten<realSize );
|
||||
return maxWritten+1;}
|
||||
inline int low() const
|
||||
{ return 0; }
|
||||
inline int high() const
|
||||
{ return maxWritten; }
|
||||
int findMax() const;
|
||||
int findMin() const;
|
||||
void errorAccess(int n) const;
|
||||
inline T*getPointerToData(){return p;}
|
||||
inline T*begin(){return p;}
|
||||
inline T*end(){return p+maxWritten+1;}
|
||||
inline T& operator[](int n)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
if( n<0 || n>maxWritten )
|
||||
errorAccess(n);
|
||||
#endif
|
||||
return p[n];
|
||||
}
|
||||
inline const T& operator[](int n) const
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
if(n<0 || n>maxWritten )
|
||||
errorAccess(n);
|
||||
#endif
|
||||
return p[n];
|
||||
}
|
||||
inline const T& get(int n) const
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
if(n<0 || n>maxWritten )
|
||||
errorAccess(n);
|
||||
#endif
|
||||
return p[n];
|
||||
}
|
||||
const T&top(int n=0) const
|
||||
{return (*this)[maxWritten-n];}
|
||||
T&top(int n=0)
|
||||
{return (*this)[maxWritten-n];}
|
||||
const T&back(int n=0) const
|
||||
{return (*this)[maxWritten-n];}
|
||||
T&back(int n=0)
|
||||
{return (*this)[maxWritten-n];}
|
||||
T&push_back(const T&x)
|
||||
{
|
||||
allowAccess(maxWritten+1);
|
||||
(*this)[maxWritten]=x;
|
||||
return top();
|
||||
}
|
||||
bool writeTo(ostream&out) const
|
||||
{
|
||||
out << "Vector ";
|
||||
out << size() << " ";
|
||||
//out << a << '\n';
|
||||
for(int iv=0;iv<=maxWritten;iv++)
|
||||
{
|
||||
writeOb(out, (*this)[iv]);
|
||||
out << '\n';
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
bool readFrom(istream&in)
|
||||
{
|
||||
string s;
|
||||
if( !in )
|
||||
{
|
||||
cerr << "ERROR(Vector): file cannot be opened.\n";
|
||||
return 0;
|
||||
}
|
||||
in >> s;
|
||||
if( !(s=="Vector") )
|
||||
{
|
||||
cerr << "ERROR(Vector): Vector!='"<<s<<"'\n";
|
||||
return 0;
|
||||
}
|
||||
int biggest;
|
||||
in >> biggest;
|
||||
// in >> a;
|
||||
resize(biggest);
|
||||
for(int iv=0;iv<size();iv++)
|
||||
{
|
||||
readOb(in, (*this)[iv]);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
template<class T> bool operator==(const Vector<T> &x, const Vector<T> &y)
|
||||
{
|
||||
if( &x == &y )
|
||||
return 1;
|
||||
else
|
||||
{
|
||||
if( y.size()!=x.size() )
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
for(unsigned int iii=0;iii<x.size();iii++)
|
||||
if( !(x[iii]==y[iii]) )
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
template<class T> bool operator!=(const Vector<T> &x, const Vector<T> &y)
|
||||
{
|
||||
return !(x==y);
|
||||
}
|
||||
|
||||
template<class T> bool operator<(const Vector<T> &x, const Vector<T> &y)
|
||||
{
|
||||
if( &x == &y )
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
if( y.size()<x.size() )
|
||||
return !(y<x);
|
||||
for(int iii=0;iii<x.size();iii++)
|
||||
{
|
||||
assert( iii!=y.size() );
|
||||
if( x[iii]<y[iii] )
|
||||
return 1;
|
||||
else if( y[iii]<x[iii] )
|
||||
return 0;
|
||||
}
|
||||
return x.size()!=y.size();//??
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class T> void Vector<T>:: errorAccess(int n) const
|
||||
{
|
||||
cerr << "ERROR: Access to array element " << n
|
||||
<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
|
||||
cout << "ERROR: Access to array element " << n
|
||||
<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
|
||||
assert(0);
|
||||
#ifndef DEBUG
|
||||
abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
|
||||
{
|
||||
o << "Vector(" << a.size() << "){ ";
|
||||
for(unsigned int iii=0;iii<a.size();iii++)
|
||||
o << " " << iii<< ": " << a[iii]<<" ;";
|
||||
return o << "}\n";
|
||||
}
|
||||
|
||||
template<class T> istream& operator>>(istream&in, Vector<T>&)
|
||||
{return in;}
|
||||
|
||||
template<class T> int Hash(const Vector<T>&a)
|
||||
{
|
||||
int n=0;
|
||||
for(int iii=0;iii<a.size();iii++)
|
||||
n+=Hash(a[iii])*(iii+1);
|
||||
return n+a.size()*47;
|
||||
}
|
||||
template<class T> void Vector<T>::copy(T *aa, const T *bb, int n)
|
||||
{
|
||||
for(int iii=0;iii<n;iii++)
|
||||
aa[iii]=bb[iii];
|
||||
}
|
||||
template<class T> void Vector<T>::copy(T *aa, T *bb, int n)
|
||||
{
|
||||
for(int iii=0;iii<n;iii++)
|
||||
aa[iii]=bb[iii];
|
||||
}
|
||||
|
||||
template<class T> void Vector<T>::_expand()
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
T *oldp=p;
|
||||
int oldsize=realSize;
|
||||
realSize=realSize*2+1;
|
||||
p=new T[realSize];
|
||||
memo_new(p);
|
||||
copy(p, oldp, oldsize);
|
||||
delete [] oldp;
|
||||
memo_del(oldp, 1);
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class T> int Vector<T>::findMax() const
|
||||
{
|
||||
if( size()==0 )
|
||||
return -1;
|
||||
else
|
||||
{
|
||||
int maxPos=0;
|
||||
for(int iii=1;iii<size();iii++)
|
||||
if( (*this)[maxPos]<(*this)[iii] )
|
||||
maxPos=iii;
|
||||
return maxPos;
|
||||
}
|
||||
}
|
||||
template<class T> int Vector<T>::findMin() const
|
||||
{
|
||||
if( size()==0 )
|
||||
return -1;
|
||||
else
|
||||
{
|
||||
int minPos=0;
|
||||
for(int iii=1;iii<size();iii++)
|
||||
if( (*this)[iii]<(*this)[minPos] )
|
||||
minPos=iii;
|
||||
return minPos;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@ -0,0 +1,103 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef WordClasses_h_DEFINED
|
||||
#define WordClasses_h_DEFINED
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "vocab.h"
|
||||
|
||||
class WordClasses
|
||||
{
|
||||
private:
|
||||
map<string,string> Sw2c;
|
||||
map<string,int> Sc2int;
|
||||
Vector<string> Sint2c;
|
||||
Vector<int> w2c;
|
||||
unsigned int classes;
|
||||
public:
|
||||
WordClasses()
|
||||
: classes(1)
|
||||
{
|
||||
Sint2c.push_back("0");
|
||||
Sc2int["0"]=0;
|
||||
}
|
||||
template<class MAPPER> bool read(istream&in,const MAPPER&m,const vcbList& vcb)
|
||||
{
|
||||
string sline;
|
||||
int maxword=0;
|
||||
int readWord=0, putWord=0;
|
||||
while(getline(in,sline))
|
||||
{
|
||||
readWord ++;
|
||||
string word,wclass;
|
||||
istrstream iline(sline.c_str());
|
||||
iline>>word>>wclass;
|
||||
|
||||
if( !Sc2int.count(wclass) )
|
||||
{
|
||||
Sc2int[wclass]=classes++;
|
||||
Sint2c.push_back(wclass);
|
||||
assert(classes==Sint2c.size());
|
||||
}
|
||||
if(vcb.has_word(word)){
|
||||
maxword=max(m(word),maxword);
|
||||
assert(Sw2c.count(word)==0);
|
||||
Sw2c[word]=wclass;
|
||||
putWord++;
|
||||
}
|
||||
}
|
||||
w2c=Vector<int>(maxword+1,0);
|
||||
for(map<string,string>::const_iterator i=Sw2c.begin();i!=Sw2c.end();++i)
|
||||
w2c[m(i->first)]=Sc2int[i->second];
|
||||
cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl;
|
||||
cout << "Actual number of read words: " << readWord << " stored words: " << putWord << endl;
|
||||
return 1;
|
||||
}
|
||||
int getClass(int w)const
|
||||
{
|
||||
if(w>=0&&int(w)<int(w2c.size()) )
|
||||
return w2c[w];
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
int operator()(const string&x)const
|
||||
{
|
||||
if( Sc2int.count(x) )
|
||||
return Sc2int.find(x)->second;
|
||||
else
|
||||
{
|
||||
cerr << "WARNING: class " << x << " not found.\n";
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
string classString(unsigned int cnr)const
|
||||
{
|
||||
if( cnr<Sint2c.size())
|
||||
return Sint2c[cnr];
|
||||
else
|
||||
return string("0");
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,38 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/*--
|
||||
alignment: 'checked' alignment representation with automatic calculation
|
||||
of fertilities
|
||||
Franz Josef Och (30/07/99)
|
||||
--*/
|
||||
#include "alignment.h"
|
||||
|
||||
ostream&operator<<(ostream&out, const alignment&a)
|
||||
{
|
||||
int m=a.a.size()-1,l=a.f.size()-1;
|
||||
out << "AL(l:"<<l<<",m:"<<m<<")(a: ";
|
||||
for(int j=1;j<=m;j++)out << a(j) << ' ';
|
||||
out << ")(fert: ";
|
||||
for(int i=0;i<=l;i++)out << a.fert(i) << ' ';
|
||||
return out << ") c:"<<"\n";
|
||||
}
|
||||
|
@ -0,0 +1,227 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/*--
|
||||
alignment: 'checked' alignment representation with autom. calc. of fertilities
|
||||
Franz Josef Och (30/07/99)
|
||||
--*/
|
||||
#ifndef alignment_h_fjo_defined
|
||||
#define alignment_h_fjo_defined
|
||||
#include "Vector.h"
|
||||
#include <cassert>
|
||||
#include "defs.h"
|
||||
#include "myassert.h"
|
||||
|
||||
class al_struct
|
||||
{
|
||||
public:
|
||||
al_struct()
|
||||
: prev(0),next(0){}
|
||||
PositionIndex prev,next;
|
||||
};
|
||||
|
||||
|
||||
class alignment
|
||||
{
|
||||
private:
|
||||
Vector<PositionIndex> a;
|
||||
Vector<PositionIndex> positionSum,f;
|
||||
public:
|
||||
Vector<PositionIndex> als_i;
|
||||
Vector<al_struct> als_j;
|
||||
PositionIndex l,m;
|
||||
alignment()
|
||||
{}
|
||||
alignment(PositionIndex _l, PositionIndex _m)
|
||||
: a(_m+1, (PositionIndex)0),
|
||||
positionSum(_l+1, (PositionIndex)0), f(_l+1, (PositionIndex)0), als_i(_l+1,0),als_j(_m+1),l(_l), m(_m)
|
||||
{
|
||||
f[0]=m;
|
||||
for(PositionIndex j=1;j<=m;j++)
|
||||
{
|
||||
if( j>1 )
|
||||
als_j[j].prev= j-1;
|
||||
if( j<m )
|
||||
als_j[j].next= j+1;
|
||||
}
|
||||
als_i[0]=1;
|
||||
}
|
||||
PositionIndex get_l()const
|
||||
{return l;}
|
||||
PositionIndex get_m()const
|
||||
{return m;}
|
||||
void doMove(int i,int j)
|
||||
{
|
||||
set(j,i);
|
||||
}
|
||||
void doSwap(int j1,int j2)
|
||||
{
|
||||
int aj1=a[j1],aj2=a[j2];
|
||||
set(j1,aj2);
|
||||
set(j2,aj1);
|
||||
}
|
||||
void set(PositionIndex j, PositionIndex aj)
|
||||
{
|
||||
PositionIndex old_aj=a[j];
|
||||
massert(j<a.size());massert(aj<f.size());
|
||||
massert(old_aj<f.size());massert(f[old_aj]>0);
|
||||
massert(j>0);
|
||||
positionSum[old_aj]-=j;
|
||||
// ausfuegen
|
||||
PositionIndex prev=als_j[j].prev;
|
||||
PositionIndex next=als_j[j].next;
|
||||
if( next )
|
||||
als_j[next].prev=prev;
|
||||
if( prev )
|
||||
als_j[prev].next=next;
|
||||
else
|
||||
als_i[old_aj]=next;
|
||||
|
||||
// neue Position suchen
|
||||
PositionIndex lfd=als_i[aj],llfd=0;
|
||||
while( lfd && lfd<j )
|
||||
lfd = als_j[llfd=lfd].next;
|
||||
|
||||
// einfuegen
|
||||
als_j[j].prev=llfd;
|
||||
als_j[j].next=lfd;
|
||||
if( llfd )
|
||||
als_j[llfd].next=j;
|
||||
else
|
||||
als_i[aj]=j;
|
||||
if( lfd )
|
||||
als_j[lfd].prev=j;
|
||||
|
||||
f[old_aj]--;
|
||||
positionSum[aj]+=j;
|
||||
f[aj]++;
|
||||
a[j]=aj;
|
||||
}
|
||||
const Vector<PositionIndex>& getAlignment() const
|
||||
{return a ;}
|
||||
PositionIndex get_al(PositionIndex j)const
|
||||
{
|
||||
massert(j<a.size());
|
||||
return a[j];
|
||||
}
|
||||
PositionIndex operator()(PositionIndex j)const
|
||||
{
|
||||
massert(j<a.size());
|
||||
return a[j];
|
||||
}
|
||||
PositionIndex fert(PositionIndex i)const
|
||||
{
|
||||
massert(i<f.size());
|
||||
return f[i];
|
||||
}
|
||||
PositionIndex get_head(PositionIndex i)const
|
||||
{
|
||||
massert( als_i[i]==_get_head(i) );
|
||||
return als_i[i];
|
||||
}
|
||||
PositionIndex get_center(PositionIndex i)const
|
||||
{
|
||||
if( i==0 )return 0;
|
||||
massert(((positionSum[i]+f[i]-1)/f[i]==_get_center(i)));
|
||||
return (positionSum[i]+f[i]-1)/f[i];
|
||||
}
|
||||
PositionIndex _get_head(PositionIndex i)const
|
||||
{
|
||||
if( fert(i)==0 )return 0;
|
||||
for(PositionIndex j=1;j<=m;j++)
|
||||
if( a[j]==i )
|
||||
return j;
|
||||
return 0;
|
||||
}
|
||||
PositionIndex _get_center(PositionIndex i)const
|
||||
{
|
||||
if( i==0 )return 0;
|
||||
massert(fert(i));
|
||||
PositionIndex sum=0;
|
||||
for(PositionIndex j=1;j<=m;j++)
|
||||
if( a[j]==i )
|
||||
sum+=j;
|
||||
return (sum+fert(i)-1)/fert(i);
|
||||
}
|
||||
PositionIndex prev_cept(PositionIndex i)const
|
||||
{
|
||||
if( i==0 )return 0;
|
||||
PositionIndex k=i-1;
|
||||
while(k&&fert(k)==0)
|
||||
k--;
|
||||
return k;
|
||||
}
|
||||
PositionIndex next_cept(PositionIndex i)const
|
||||
{
|
||||
PositionIndex k=i+1;
|
||||
while(k<l+1&&fert(k)==0)
|
||||
k++;
|
||||
return k;
|
||||
}
|
||||
PositionIndex prev_in_cept(PositionIndex j)const
|
||||
{
|
||||
//PositionIndex k=j-1;
|
||||
//while(k&&a[k]!=a[j])
|
||||
//k--;
|
||||
//assert( als_j[j].prev==k );
|
||||
//assert(k);
|
||||
//return k;
|
||||
massert(als_j[j].prev==0||a[als_j[j].prev]==a[j]);
|
||||
return als_j[j].prev;
|
||||
}
|
||||
friend ostream &operator<<(ostream&out, const alignment&a);
|
||||
friend bool operator==(const alignment&a, const alignment&b)
|
||||
{
|
||||
massert(a.a.size()==b.a.size());
|
||||
for(PositionIndex j=1;j<=a.get_m();j++)
|
||||
if(a(j)!=b(j))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
friend bool operator<(const alignment&x, const alignment&y)
|
||||
{
|
||||
massert(x.get_m()==y.get_m());
|
||||
for(PositionIndex j=1;j<=x.get_m();j++)
|
||||
if( x(j)<y(j) )
|
||||
return 1;
|
||||
else if( y(j)<x(j) )
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
friend int differences(const alignment&x, const alignment&y){
|
||||
int count=0;
|
||||
massert(x.get_m()==y.get_m());
|
||||
for(PositionIndex j=1;j<=x.get_m();j++)
|
||||
count += (x(j)!=y(j));
|
||||
return count;
|
||||
}
|
||||
bool valid()const
|
||||
{
|
||||
if( 2*f[0]>m )
|
||||
return 0;
|
||||
for(unsigned int i=1;i<=l;i++)
|
||||
if( f[i]>=MAX_FERTILITY )
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
friend class transpair_model5;
|
||||
};
|
||||
#endif
|
@ -0,0 +1,649 @@
|
||||
|
||||
// $Id: cmd.c 1307 2007-03-14 22:22:36Z hieuhoang1972 $
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cmd.h"
|
||||
|
||||
#ifdef WIN32
|
||||
# define popen _popen
|
||||
# define pclose _pclose
|
||||
#endif
|
||||
|
||||
static Enum_T BoolEnum[] = {
|
||||
{ "FALSE", 0 },
|
||||
{ "TRUE", 1 },
|
||||
{ 0, 0 }
|
||||
};
|
||||
|
||||
#ifdef NEEDSTRDUP
|
||||
char *strdup();
|
||||
#endif
|
||||
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
|
||||
#define LINSIZ 10240
|
||||
#define MAXPARAM 256
|
||||
|
||||
static char *GetLine(),
|
||||
**str2array();
|
||||
static int Scan(),
|
||||
SetParam(),
|
||||
SetEnum(),
|
||||
SetSubrange(),
|
||||
SetStrArray(),
|
||||
SetGte(),
|
||||
SetLte(),
|
||||
CmdError(),
|
||||
EnumError(),
|
||||
SubrangeError(),
|
||||
GteError(),
|
||||
LteError(),
|
||||
PrintParam(),
|
||||
PrintEnum(),
|
||||
PrintStrArray();
|
||||
|
||||
static Cmd_T cmds[MAXPARAM+1];
|
||||
static char *SepString = " \t\n";
|
||||
|
||||
#if defined(__STDC__)
|
||||
#include <stdarg.h>
|
||||
int DeclareParams(char *ParName, ...)
|
||||
#else
|
||||
#include <varargs.h>
|
||||
int DeclareParams(ParName, va_alist)
|
||||
char *ParName;
|
||||
va_dcl
|
||||
#endif
|
||||
{
|
||||
va_list args;
|
||||
static int ParamN = 0;
|
||||
int j,
|
||||
c;
|
||||
char *s;
|
||||
|
||||
#if defined(__STDC__)
|
||||
va_start(args, ParName);
|
||||
#else
|
||||
va_start(args);
|
||||
#endif
|
||||
for(;ParName;) {
|
||||
if(ParamN==MAXPARAM) {
|
||||
fprintf(stderr, "Too many parameters !!\n");
|
||||
break;
|
||||
}
|
||||
for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
|
||||
;
|
||||
if(!c) {
|
||||
fprintf(stderr,
|
||||
"Warning: parameter \"%s\" declared twice.\n",
|
||||
ParName);
|
||||
}
|
||||
for(c=ParamN; c>j; c--) {
|
||||
cmds[c] = cmds[c-1];
|
||||
}
|
||||
cmds[j].Name = ParName;
|
||||
cmds[j].Type = va_arg(args, int);
|
||||
cmds[j].Val = va_arg(args, void *);
|
||||
switch(cmds[j].Type) {
|
||||
case CMDENUMTYPE: /* get the pointer to Enum_T struct */
|
||||
cmds[j].p = va_arg(args, void *);
|
||||
break;
|
||||
case CMDSUBRANGETYPE: /* get the two extremes */
|
||||
cmds[j].p = (void*) calloc(2, sizeof(int));
|
||||
((int*)cmds[j].p)[0] = va_arg(args, int);
|
||||
((int*)cmds[j].p)[1] = va_arg(args, int);
|
||||
break;
|
||||
case CMDGTETYPE: /* get lower or upper bound */
|
||||
case CMDLTETYPE:
|
||||
cmds[j].p = (void*) calloc(1, sizeof(int));
|
||||
((int*)cmds[j].p)[0] = va_arg(args, int);
|
||||
break;
|
||||
case CMDSTRARRAYTYPE: /* get the separators string */
|
||||
cmds[j].p = (s=va_arg(args, char*))
|
||||
? (void*)strdup(s) : 0;
|
||||
break;
|
||||
case CMDBOOLTYPE:
|
||||
cmds[j].Type = CMDENUMTYPE;
|
||||
cmds[j].p = BoolEnum;
|
||||
break;
|
||||
case CMDDOUBLETYPE: /* nothing else is needed */
|
||||
case CMDINTTYPE:
|
||||
case CMDSTRINGTYPE:
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
|
||||
"DeclareParam()", "Unknown Type",
|
||||
cmds[j].Type, "for parameter", cmds[j].Name);
|
||||
exit(1);
|
||||
}
|
||||
ParamN++;
|
||||
ParName = va_arg(args, char *);
|
||||
}
|
||||
cmds[ParamN].Name = NULL;
|
||||
va_end(args);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GetParams(n, a, CmdFileName)
|
||||
int *n;
|
||||
char ***a;
|
||||
char *CmdFileName;
|
||||
{
|
||||
char *Line,
|
||||
*ProgName;
|
||||
int argc = *n;
|
||||
char **argv = *a,
|
||||
*s;
|
||||
FILE *fp;
|
||||
int IsPipe;
|
||||
|
||||
#ifdef MSDOS
|
||||
#define PATHSEP '\\'
|
||||
char *dot = NULL;
|
||||
#else
|
||||
#define PATHSEP '/'
|
||||
#endif
|
||||
|
||||
if(!(Line=malloc(LINSIZ))) {
|
||||
fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
|
||||
LINSIZ);
|
||||
exit(1);
|
||||
}
|
||||
if((ProgName=strrchr(*argv, PATHSEP))) {
|
||||
++ProgName;
|
||||
} else {
|
||||
ProgName = *argv;
|
||||
}
|
||||
#ifdef MSDOS
|
||||
if(dot=strchr(ProgName, '.')) *dot = 0;
|
||||
#endif
|
||||
--argc;
|
||||
++argv;
|
||||
for(;;) {
|
||||
if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
|
||||
CmdFileName = argv[0]+2;
|
||||
++argv;
|
||||
--argc;
|
||||
}
|
||||
if(!CmdFileName) {
|
||||
break;
|
||||
}
|
||||
IsPipe = !strncmp(CmdFileName, "@@", 2);
|
||||
fp = IsPipe
|
||||
? popen(CmdFileName+2, "r")
|
||||
: strcmp(CmdFileName, "-")
|
||||
? fopen(CmdFileName, "r")
|
||||
: stdin;
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Unable to open command file %s\n",
|
||||
CmdFileName);
|
||||
exit(1);
|
||||
}
|
||||
while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
|
||||
if(Scan(ProgName, cmds, Line)) {
|
||||
CmdError(Line);
|
||||
}
|
||||
}
|
||||
if(fp!=stdin) {
|
||||
if(IsPipe) pclose(fp); else fclose(fp);
|
||||
}
|
||||
CmdFileName = NULL;
|
||||
}
|
||||
while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
|
||||
*s = ' ';
|
||||
sprintf(Line, "%s/%s", ProgName, *argv+1);
|
||||
*s = '=';
|
||||
if(Scan(ProgName, cmds, Line)) CmdError(*argv);
|
||||
--argc;
|
||||
++argv;
|
||||
}
|
||||
*n = argc;
|
||||
*a = argv;
|
||||
#ifdef MSDOS
|
||||
if(dot) *dot = '.';
|
||||
#endif
|
||||
free(Line);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PrintParams(ValFlag, fp)
|
||||
int ValFlag;
|
||||
FILE *fp;
|
||||
{
|
||||
int i;
|
||||
|
||||
fflush(fp);
|
||||
if(ValFlag) {
|
||||
fprintf(fp, "Parameters Values:\n");
|
||||
} else {
|
||||
fprintf(fp, "Parameters:\n");
|
||||
}
|
||||
for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
|
||||
fprintf(fp, "\n");
|
||||
fflush(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int SPrintParams(a, pfx)
|
||||
char ***a,
|
||||
*pfx;
|
||||
{
|
||||
int l,
|
||||
n;
|
||||
Cmd_T *cmd;
|
||||
|
||||
if(!pfx) pfx="";
|
||||
l = strlen(pfx);
|
||||
for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
|
||||
a[0] = calloc(n, sizeof(char*));
|
||||
for(n=0, cmd=cmds; cmd->Name; cmd++) {
|
||||
if(!cmd->ArgStr) continue;
|
||||
a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
|
||||
sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
|
||||
++n;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static int CmdError(opt)
|
||||
char *opt;
|
||||
{
|
||||
fprintf(stderr, "Invalid option \"%s\"\n", opt);
|
||||
fprintf(stderr, "This program expectes the following parameters:\n");
|
||||
PrintParams(FALSE, stderr);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static int PrintParam(cmd, ValFlag, fp)
|
||||
Cmd_T *cmd;
|
||||
int ValFlag;
|
||||
FILE *fp;
|
||||
{
|
||||
fprintf(fp, "%4s", "");
|
||||
switch(cmd->Type) {
|
||||
case CMDDOUBLETYPE:
|
||||
fprintf(fp, "%s", cmd->Name);
|
||||
if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
|
||||
fprintf(fp, "\n");
|
||||
break;
|
||||
case CMDENUMTYPE:
|
||||
PrintEnum(cmd, ValFlag, fp);
|
||||
break;
|
||||
case CMDINTTYPE:
|
||||
case CMDSUBRANGETYPE:
|
||||
case CMDGTETYPE:
|
||||
case CMDLTETYPE:
|
||||
fprintf(fp, "%s", cmd->Name);
|
||||
if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
|
||||
fprintf(fp, "\n");
|
||||
break;
|
||||
case CMDSTRINGTYPE:
|
||||
fprintf(fp, "%s", cmd->Name);
|
||||
if(ValFlag) {
|
||||
if(*(char **)cmd->Val) {
|
||||
fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
|
||||
} else {
|
||||
fprintf(fp, ": %s", "NULL");
|
||||
}
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
break;
|
||||
case CMDSTRARRAYTYPE:
|
||||
PrintStrArray(cmd, ValFlag, fp);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
|
||||
"PrintParam",
|
||||
"Unknown Type",
|
||||
cmd->Type,
|
||||
"for parameter",
|
||||
cmd->Name);
|
||||
exit(1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char *GetLine(fp, n, Line)
|
||||
FILE *fp;
|
||||
int n;
|
||||
char *Line;
|
||||
{
|
||||
int j,
|
||||
l,
|
||||
offs=0;
|
||||
|
||||
for(;;) {
|
||||
if(!fgets(Line+offs, n-offs, fp)) {
|
||||
return NULL;
|
||||
}
|
||||
if(Line[offs]=='#') continue;
|
||||
l = strlen(Line+offs)-1;
|
||||
Line[offs+l] = 0;
|
||||
for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
|
||||
;
|
||||
if(l<1) continue;
|
||||
if(j > offs) {
|
||||
char *s = Line+offs,
|
||||
*q = Line+j;
|
||||
|
||||
while((*s++=*q++))
|
||||
;
|
||||
}
|
||||
if(Line[offs+l-1]=='\\') {
|
||||
offs += l;
|
||||
Line[offs-1] = ' ';
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Line;
|
||||
}
|
||||
|
||||
static int Scan(ProgName, cmds, Line)
|
||||
char *ProgName,
|
||||
*Line;
|
||||
Cmd_T *cmds;
|
||||
{
|
||||
char *q,
|
||||
*p;
|
||||
int i,
|
||||
hl,
|
||||
HasToMatch = FALSE,
|
||||
c0,
|
||||
c;
|
||||
|
||||
p = Line+strspn(Line, SepString);
|
||||
if(!(hl=strcspn(p, SepString))) {
|
||||
return 0;
|
||||
}
|
||||
if((q=strchr(p, '/')) && q-p<hl) {
|
||||
*q = 0;
|
||||
if(strcmp(p, ProgName)) {
|
||||
*q = '/';
|
||||
return 0;
|
||||
}
|
||||
*q = '/';
|
||||
HasToMatch=TRUE;
|
||||
p = q+1;
|
||||
}
|
||||
if(!(hl = strcspn(p, SepString))) {
|
||||
return 0;
|
||||
}
|
||||
c0 = p[hl];
|
||||
p[hl] = 0;
|
||||
for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
|
||||
;
|
||||
p[hl] = c0;
|
||||
if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
|
||||
return HasToMatch && c;
|
||||
}
|
||||
|
||||
static int SetParam(cmd, s)
|
||||
Cmd_T *cmd;
|
||||
char *s;
|
||||
{
|
||||
if(!*s && cmd->Type != CMDSTRINGTYPE) {
|
||||
fprintf(stderr,
|
||||
"WARNING: No value specified for parameter \"%s\"\n",
|
||||
cmd->Name);
|
||||
return 0;
|
||||
}
|
||||
switch(cmd->Type) {
|
||||
case CMDDOUBLETYPE:
|
||||
if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
|
||||
fprintf(stderr,
|
||||
"Float value required for parameter \"%s\"\n",
|
||||
cmd->Name);
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case CMDENUMTYPE:
|
||||
SetEnum(cmd, s);
|
||||
break;
|
||||
case CMDINTTYPE:
|
||||
if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
|
||||
fprintf(stderr,
|
||||
"Integer value required for parameter \"%s\"\n",
|
||||
cmd->Name);
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case CMDSTRINGTYPE:
|
||||
*(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
|
||||
? strdup(s)
|
||||
: 0;
|
||||
break;
|
||||
case CMDSTRARRAYTYPE:
|
||||
SetStrArray(cmd, s);
|
||||
break;
|
||||
case CMDGTETYPE:
|
||||
SetGte(cmd, s);
|
||||
break;
|
||||
case CMDLTETYPE:
|
||||
SetLte(cmd, s);
|
||||
break;
|
||||
case CMDSUBRANGETYPE:
|
||||
SetSubrange(cmd, s);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
|
||||
"SetParam",
|
||||
"Unknown Type",
|
||||
cmd->Type,
|
||||
"for parameter",
|
||||
cmd->Name);
|
||||
exit(1);
|
||||
}
|
||||
cmd->ArgStr = strdup(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SetEnum(cmd, s)
|
||||
Cmd_T *cmd;
|
||||
char *s;
|
||||
{
|
||||
Enum_T *en;
|
||||
|
||||
for(en=(Enum_T *)cmd->p; en->Name; en++) {
|
||||
if(*en->Name && !strcmp(s, en->Name)) {
|
||||
*(int *) cmd->Val = en->Idx;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return EnumError(cmd, s);
|
||||
}
|
||||
|
||||
static int SetSubrange(cmd, s)
|
||||
Cmd_T *cmd;
|
||||
char *s;
|
||||
{
|
||||
int n;
|
||||
|
||||
if(sscanf(s, "%d", &n)!=1) {
|
||||
fprintf(stderr,
|
||||
"Integer value required for parameter \"%s\"\n",
|
||||
cmd->Name);
|
||||
exit(1);
|
||||
}
|
||||
if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
|
||||
return SubrangeError(cmd, n);
|
||||
}
|
||||
*(int *)cmd->Val = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SetGte(cmd, s)
|
||||
Cmd_T *cmd;
|
||||
char *s;
|
||||
{
|
||||
int n;
|
||||
|
||||
if(sscanf(s, "%d", &n)!=1) {
|
||||
fprintf(stderr,
|
||||
"Integer value required for parameter \"%s\"\n",
|
||||
cmd->Name);
|
||||
exit(1);
|
||||
}
|
||||
if(n<*(int *)cmd->p) {
|
||||
return GteError(cmd, n);
|
||||
}
|
||||
*(int *)cmd->Val = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SetStrArray(cmd, s)
|
||||
Cmd_T *cmd;
|
||||
char *s;
|
||||
{
|
||||
*(char***)cmd->Val = str2array(s, (char*)cmd->p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SetLte(cmd, s)
|
||||
Cmd_T *cmd;
|
||||
char *s;
|
||||
{
|
||||
int n;
|
||||
|
||||
if(sscanf(s, "%d", &n)!=1) {
|
||||
fprintf(stderr,
|
||||
"Integer value required for parameter \"%s\"\n",
|
||||
cmd->Name);
|
||||
exit(1);
|
||||
}
|
||||
if(n > *(int *)cmd->p) {
|
||||
return LteError(cmd, n);
|
||||
}
|
||||
*(int *)cmd->Val = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int EnumError(cmd, s)
|
||||
Cmd_T *cmd;
|
||||
char *s;
|
||||
{
|
||||
Enum_T *en;
|
||||
|
||||
fprintf(stderr,
|
||||
"Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
|
||||
fprintf(stderr, "Valid values are:\n");
|
||||
for(en=(Enum_T *)cmd->p; en->Name; en++) {
|
||||
if(*en->Name) {
|
||||
fprintf(stderr, " %s\n", en->Name);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static int GteError(cmd, n)
|
||||
Cmd_T *cmd;
|
||||
int n;
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
|
||||
fprintf(stderr, "Valid values must be greater than or equal to %d\n",
|
||||
*(int *)cmd->p);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static int LteError(cmd, n)
|
||||
Cmd_T *cmd;
|
||||
int n;
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
|
||||
fprintf(stderr, "Valid values must be less than or equal to %d\n",
|
||||
*(int *)cmd->p);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static int SubrangeError(cmd, n)
|
||||
Cmd_T *cmd;
|
||||
int n;
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
|
||||
fprintf(stderr, "Valid values range from %d to %d\n",
|
||||
*(int *)cmd->p, *((int *)cmd->p+1));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static int PrintEnum(cmd, ValFlag, fp)
|
||||
Cmd_T *cmd;
|
||||
int ValFlag;
|
||||
FILE *fp;
|
||||
{
|
||||
Enum_T *en;
|
||||
|
||||
fprintf(fp, "%s", cmd->Name);
|
||||
if(ValFlag) {
|
||||
for(en=(Enum_T *)cmd->p; en->Name; en++) {
|
||||
if(*en->Name && en->Idx==*(int *)cmd->Val) {
|
||||
fprintf(fp, ": %s", en->Name);
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int PrintStrArray(cmd, ValFlag, fp)
|
||||
Cmd_T *cmd;
|
||||
int ValFlag;
|
||||
FILE *fp;
|
||||
{
|
||||
char *indent,
|
||||
**s = *(char***)cmd->Val;
|
||||
int l = 4+strlen(cmd->Name);
|
||||
|
||||
fprintf(fp, "%s", cmd->Name);
|
||||
indent = malloc(l+2);
|
||||
memset(indent, ' ', l+1);
|
||||
indent[l+1] = 0;
|
||||
if(ValFlag) {
|
||||
fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
|
||||
if(s) while(*s) {
|
||||
fprintf(fp, "\n%s %s", indent, *s++);
|
||||
}
|
||||
}
|
||||
free(indent);
|
||||
fprintf(fp, "\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char **str2array(s, sep)
|
||||
char *s,
|
||||
*sep;
|
||||
{
|
||||
char *p,
|
||||
**a;
|
||||
int n = 0,
|
||||
l;
|
||||
|
||||
if(!sep) sep = SepString;
|
||||
p = s += strspn(s, sep);
|
||||
while(*p) {
|
||||
p += strcspn(p, sep);
|
||||
p += strspn(p, sep);
|
||||
++n;
|
||||
}
|
||||
a = calloc(n+1, sizeof(char *));
|
||||
p = s;
|
||||
n = 0;
|
||||
while(*p) {
|
||||
l = strcspn(p, sep);
|
||||
a[n] = malloc(l+1);
|
||||
memcpy(a[n], p, l);
|
||||
a[n][l] = 0;
|
||||
++n;
|
||||
p += l;
|
||||
p += strspn(p, sep);
|
||||
}
|
||||
return a;
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
|
||||
// $Id: cmd.h 1307 2007-03-14 22:22:36Z hieuhoang1972 $
|
||||
|
||||
#if !defined(CMD_H)
|
||||
|
||||
#define CMD_H
|
||||
|
||||
#define CMDDOUBLETYPE 1
|
||||
#define CMDENUMTYPE 2
|
||||
#define CMDINTTYPE 3
|
||||
#define CMDSTRINGTYPE 4
|
||||
#define CMDSUBRANGETYPE 5
|
||||
#define CMDGTETYPE 6
|
||||
#define CMDLTETYPE 7
|
||||
#define CMDSTRARRAYTYPE 8
|
||||
#define CMDBOOLTYPE 9
|
||||
|
||||
typedef struct {
|
||||
char *Name;
|
||||
int Idx;
|
||||
} Enum_T;
|
||||
|
||||
typedef struct {
|
||||
int Type;
|
||||
char *Name,
|
||||
*ArgStr;
|
||||
void *Val,
|
||||
*p;
|
||||
} Cmd_T;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__STDC__)
|
||||
int DeclareParams(char *, ...);
|
||||
#else
|
||||
int DeclareParams();
|
||||
#endif
|
||||
|
||||
int GetParams(int *n, char ***a,char *CmdFileName),
|
||||
SPrintParams(),
|
||||
PrintParams();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -0,0 +1,315 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#include "alignment.h"
|
||||
#include "transpair_model3.h"
|
||||
#include <map>
|
||||
#include "collCounts.h"
|
||||
#include "MoveSwapMatrix.h"
|
||||
#include "D5Tables.h"
|
||||
#include "transpair_model5.h"
|
||||
#include "transpair_modelhmm.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
extern float COUNTINCREASE_CUTOFF_AL;
|
||||
// unifies collectCountsOverAlignments and findAlignmentNeighborhood FJO-20/07/99
|
||||
template<class TRANSPAIR> int collectCountsOverNeighborhood(
|
||||
const MoveSwapMatrix<TRANSPAIR>&msc, LogProb ascore,
|
||||
Array2<LogProb,Vector<LogProb> >&dtcount,
|
||||
Array2<LogProb,Vector<LogProb> >&ncount, LogProb&p1count,
|
||||
LogProb&p0count, LogProb&total_count) {
|
||||
int nAl=0;
|
||||
const PositionIndex l=msc.get_l(), m=msc.get_m();
|
||||
Array2<LogProb,Vector<LogProb> > cmove(l+1, m+1), cswap(l+1, m+1);
|
||||
Vector<LogProb> negmove(m+1),negswap(m+1),plus1fert(l+1),minus1fert(l+1);
|
||||
LogProb total_move, total_swap;
|
||||
if (msc.isCenterDeleted()==0) {
|
||||
total_move+=ascore;
|
||||
nAl++;
|
||||
}
|
||||
for (PositionIndex j=1; j<=m; j++) {
|
||||
for (PositionIndex i=0; i<=l; i++) {
|
||||
if (msc(j)!=i && !msc.isDelMove(i, j) ) {
|
||||
double cm = msc.cmove(i, j);
|
||||
if(cm<0)
|
||||
continue;
|
||||
LogProb newscore=ascore*cm;
|
||||
total_move+=newscore;
|
||||
nAl++;
|
||||
cmove(i, j)+=newscore;
|
||||
negmove[j]+=newscore;
|
||||
plus1fert[i]+=newscore;
|
||||
minus1fert[msc(j)]+=newscore;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (PositionIndex j1=1; j1<=m; j1++) {
|
||||
for (PositionIndex j2=j1+1; j2<=m; j2++) {
|
||||
if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
|
||||
double cs = msc.cswap(j1, j2);
|
||||
if(cs < 0){
|
||||
continue;
|
||||
}
|
||||
LogProb newscore=ascore*cs;
|
||||
total_swap+=newscore;
|
||||
nAl++;
|
||||
cswap(msc(j1), j2)+=newscore;
|
||||
cswap(msc(j2), j1)+=newscore;
|
||||
negswap[j1]+=newscore;
|
||||
negswap[j2]+=newscore;
|
||||
}
|
||||
}
|
||||
}
|
||||
total_count+=total_move+total_swap;
|
||||
for (PositionIndex j=1; j<=m; j++)
|
||||
for (PositionIndex i=0; i<=l; i++)
|
||||
dtcount(i, j) += ((i==msc(j)) ? (total_count
|
||||
-(negmove[j]+negswap[j])) : (cswap(i, j)+cmove(i, j)));
|
||||
for (PositionIndex i=1; i<=l; i++) {
|
||||
LogProb temp=minus1fert[i]+plus1fert[i];
|
||||
if (msc.fert(i)<MAX_FERTILITY)
|
||||
ncount(i, msc.fert(i))+=total_count-temp;
|
||||
if (msc.fert(i)>0&&msc.fert(i)-1<MAX_FERTILITY)
|
||||
ncount(i, msc.fert(i)-1)+=minus1fert[i];
|
||||
else if (minus1fert[i]!=0.0)
|
||||
cerr << "ERROR: M1Fa: " << minus1fert[i] << ' ' << i << ' '
|
||||
<< msc.fert(i)<< endl;
|
||||
if (msc.fert(i)+1<MAX_FERTILITY)
|
||||
ncount(i, msc.fert(i)+1)+=plus1fert[i];
|
||||
}
|
||||
LogProb temp=minus1fert[0]+plus1fert[0];
|
||||
p1count += (total_count-temp)*(LogProb)msc.fert(0);
|
||||
p0count += (total_count-temp)*(LogProb)(m-2*msc.fert(0));
|
||||
if (msc.fert(0)>0) {
|
||||
p1count += (minus1fert[0])*(LogProb)(msc.fert(0)-1);
|
||||
p0count += (minus1fert[0])*(LogProb)(m-2*(msc.fert(0)-1));
|
||||
} else if (minus1fert[0]!=0.0)
|
||||
cerr << "ERROR: M1Fb: " << minus1fert[0] << endl;
|
||||
if (int(m)-2*(int(msc.fert(0))+1)>=0) {
|
||||
p1count += (plus1fert[0])*(LogProb)(msc.fert(0)+1);
|
||||
p0count += (plus1fert[0])*(LogProb)(m-2*(msc.fert(0)+1));
|
||||
}
|
||||
msc.check();
|
||||
return nAl;
|
||||
}
|
||||
;
|
||||
|
||||
template<class TRANSPAIR> double collectCountsOverNeighborhoodForSophisticatedModels(
|
||||
const MoveSwapMatrix<TRANSPAIR>&, LogProb, void*) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
|
||||
const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
|
||||
const TRANSPAIR&ef, LogProb normalized_ascore, d4model*d4Table) {
|
||||
Mmsc.check();
|
||||
const PositionIndex m=msc.get_m(), l=msc.get_l();
|
||||
for (PositionIndex j=1; j<=m; ++j)
|
||||
if (msc(j)!=0)
|
||||
if (msc.get_head(msc(j))==j) {
|
||||
int ep=msc.prev_cept(msc(j));
|
||||
d4Table->augCountRef_first(j, msc.get_center(ep),
|
||||
d4Table->ewordclasses->getClass(ef.get_es(ep)),
|
||||
d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
|
||||
} else {
|
||||
//massert( &d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) == ef.getCountSecond(j,msc.prev_in_cept(j) ));
|
||||
d4Table->augCountRef_bigger(j, msc.prev_in_cept(j), 0,
|
||||
d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
|
||||
}
|
||||
}
|
||||
|
||||
template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
|
||||
const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
|
||||
const TRANSPAIR&ef, LogProb normalized_ascore, d5model*d5Table) {
|
||||
Mmsc.check();
|
||||
_collectCountsOverNeighborhoodForSophisticatedModels(Mmsc, msc, ef,
|
||||
normalized_ascore, &d5Table->d4m);
|
||||
Mmsc.check();
|
||||
const PositionIndex m=msc.get_m(), l=msc.get_l();
|
||||
PositionIndex prev_cept=0;
|
||||
PositionIndex vac_all=m;
|
||||
Vector<char> vac(m+1,0);
|
||||
for (PositionIndex i=1; i<=l; i++) {
|
||||
PositionIndex cur_j=msc.als_i[i];
|
||||
PositionIndex prev_j=0;
|
||||
PositionIndex k=0;
|
||||
if (cur_j) { // process first word of cept
|
||||
k++;
|
||||
d5Table->getCountRef_first(vacancies(vac, cur_j), vacancies(vac,
|
||||
msc.get_center(prev_cept)),
|
||||
d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
|
||||
vac_all-msc.fert(i)+k) +=normalized_ascore;
|
||||
vac_all--;
|
||||
assert(vac[cur_j]==0);
|
||||
vac[cur_j]=1;
|
||||
Mmsc.check();
|
||||
prev_j=cur_j;
|
||||
cur_j=msc.als_j[cur_j].next;
|
||||
}
|
||||
while (cur_j) { // process following words of cept
|
||||
k++;
|
||||
int vprev=vacancies(vac, prev_j);
|
||||
d5Table->getCountRef_bigger(vacancies(vac, cur_j), vprev,
|
||||
d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
|
||||
vac_all-vprev/*war weg*/-msc.fert(i)+k)+=normalized_ascore;
|
||||
vac_all--;
|
||||
vac[cur_j]=1;
|
||||
Mmsc.check();
|
||||
prev_j=cur_j;
|
||||
cur_j=msc.als_j[cur_j].next;
|
||||
}
|
||||
assert(k==msc.fert(i));
|
||||
if (k)
|
||||
prev_cept=i;
|
||||
}
|
||||
assert(vac_all==msc.fert(0));
|
||||
}
|
||||
|
||||
extern int NumberOfAlignmentsInSophisticatedCountCollection;
|
||||
template<class TRANSPAIR, class MODEL> double collectCountsOverNeighborhoodForSophisticatedModels(
|
||||
const MoveSwapMatrix<TRANSPAIR>&msc, LogProb normalized_ascore,
|
||||
MODEL*d5Table) {
|
||||
const PositionIndex m=msc.get_m(), l=msc.get_l();
|
||||
alignment x(msc);
|
||||
double sum=0;
|
||||
msc.check();
|
||||
if ( !msc.isCenterDeleted() ) {
|
||||
_collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc, x,
|
||||
msc.get_ef(), normalized_ascore, d5Table);
|
||||
NumberOfAlignmentsInSophisticatedCountCollection++;
|
||||
sum+=normalized_ascore;
|
||||
}
|
||||
msc.check();
|
||||
for (WordIndex j=1; j<=m; j++)
|
||||
for (WordIndex i=0; i<=l; i++) {
|
||||
WordIndex old=x(j);
|
||||
if (i!=old&& !msc.isDelMove(i, j) ) {
|
||||
msc.check();
|
||||
double cm =msc.cmove(i, j);
|
||||
if(cm < 0){
|
||||
continue;
|
||||
}
|
||||
double c=cm*normalized_ascore;
|
||||
if (c > COUNTINCREASE_CUTOFF_AL) {
|
||||
x.set(j, i);
|
||||
_collectCountsOverNeighborhoodForSophisticatedModels<
|
||||
TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
|
||||
NumberOfAlignmentsInSophisticatedCountCollection++;
|
||||
x.set(j, old);
|
||||
sum+=c;
|
||||
}
|
||||
msc.check();
|
||||
}
|
||||
}
|
||||
for (PositionIndex j1=1; j1<=m; j1++) {
|
||||
for (PositionIndex j2=j1+1; j2<=m; j2++) {
|
||||
if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
|
||||
double cs = msc.cswap(j1, j2);
|
||||
if(cs < 0)
|
||||
continue;
|
||||
double c=cs*normalized_ascore;
|
||||
msc.check();
|
||||
if (c > COUNTINCREASE_CUTOFF_AL) {
|
||||
int old1=msc(j1), old2=msc(j2);
|
||||
x.set(j1, old2);
|
||||
x.set(j2, old1);
|
||||
_collectCountsOverNeighborhoodForSophisticatedModels<
|
||||
TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
|
||||
NumberOfAlignmentsInSophisticatedCountCollection++;
|
||||
x.set(j1, old1);
|
||||
x.set(j2, old2);
|
||||
sum+=c;
|
||||
}
|
||||
msc.check();
|
||||
}
|
||||
}
|
||||
}
|
||||
msc.check();
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<class TRANSPAIR, class MODEL> int collectCountsOverNeighborhood(
|
||||
const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
|
||||
Vector<WordIndex>&es, Vector<WordIndex>&fs, tmodel<COUNT,PROB>&tTable,
|
||||
amodel<COUNT>&aCountTable, amodel<COUNT>&dCountTable,
|
||||
nmodel<COUNT>&nCountTable, SyncDouble&p1count, SyncDouble&p0count,
|
||||
LogProb&_total, float count, bool addCounts, MODEL*d4Table) {
|
||||
int nAl=0;
|
||||
const PositionIndex l=es.size()-1, m=fs.size()-1;
|
||||
Array2<LogProb,Vector<LogProb> > dtcount(l+1, m+1), ncount(l+1,
|
||||
MAX_FERTILITY+1);
|
||||
LogProb p0=0, p1=0, all_total=0;
|
||||
for (unsigned int i=0; i<smsc.size(); ++i) {
|
||||
LogProb this_total=0;
|
||||
nAl+=collectCountsOverNeighborhood(*smsc[i].first, smsc[i].second,
|
||||
dtcount, ncount, p1, p0, this_total);
|
||||
all_total+=this_total;
|
||||
}
|
||||
_total=all_total;
|
||||
if(count==0){
|
||||
cerr << "WARNING: COUNT ==0" << endl;
|
||||
}else
|
||||
all_total/=(double)count;
|
||||
if(isinf(all_total)){
|
||||
cerr << "ALL_TOTAL is INF\n" ;
|
||||
return 0;
|
||||
}
|
||||
double sum2=0;
|
||||
if (addCounts && d4Table) {
|
||||
for (unsigned int i=0; i<smsc.size(); ++i) {
|
||||
//for(WordIndex j=1;j<=m;j++)for(WordIndex ii=0;ii<=l;ii++)
|
||||
// (*smsc[i].first).cmove(ii,j);
|
||||
sum2+=collectCountsOverNeighborhoodForSophisticatedModels(
|
||||
*smsc[i].first, smsc[i].second/all_total, d4Table);
|
||||
}
|
||||
if (!(fabs(count-sum2)<0.05))
|
||||
cerr << "WARNING: DIFFERENT SUMS: (" << count << ") (" << sum2 << ") (" << all_total
|
||||
<< ")\n";
|
||||
}
|
||||
|
||||
/**
|
||||
NOTE! HERE IS THE UPDATE PROCESS!
|
||||
*/
|
||||
if(fabs(all_total)==0){
|
||||
// Error
|
||||
cerr << "Hill climbing yields zero count " << endl;
|
||||
}else{
|
||||
if (addCounts) {
|
||||
for (PositionIndex i=0; i<=l; i++) {
|
||||
for (PositionIndex j=1; j<=m; j++) {
|
||||
LogProb ijadd=dtcount(i, j)/all_total;
|
||||
if (ijadd>COUNTINCREASE_CUTOFF_AL) {
|
||||
tTable.incCount(es[i], fs[j], ijadd);
|
||||
dCountTable.addValue(j, i, l, m, ijadd);
|
||||
aCountTable.addValue(i, j, l, m, ijadd);
|
||||
}
|
||||
}
|
||||
if (i>0)
|
||||
for (PositionIndex n=0; n<MAX_FERTILITY; n++)
|
||||
nCountTable.addValue(es[i], n, ncount(i, n)/all_total);
|
||||
}
|
||||
p0count+=p0/all_total;
|
||||
p1count+=p1/all_total;
|
||||
}}
|
||||
return nAl;
|
||||
}
|
||||
|
@ -0,0 +1,80 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
||||
|
||||
This file is part of GIZA++ ( extension of GIZA ).
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef collCounts_h_defined
|
||||
#define collCounts_h_defined
|
||||
#include "alignment.h"
|
||||
#include "transpair_model3.h"
|
||||
#include <map>
|
||||
#include "MoveSwapMatrix.h"
|
||||
#include "D4Tables.h"
|
||||
#include "transpair_model4.h"
|
||||
|
||||
class OneMoveSwap
|
||||
{
|
||||
public:
|
||||
short type;
|
||||
short a,b;
|
||||
OneMoveSwap(short _type,short _a,short _b)
|
||||
: type(_type),a(_a),b(_b)
|
||||
{}
|
||||
OneMoveSwap()
|
||||
: type(0){}
|
||||
};
|
||||
|
||||
inline bool operator<(const OneMoveSwap&a,const OneMoveSwap&b)
|
||||
{
|
||||
if(a.type<b.type)return 1;
|
||||
else if(b.type<a.type)return 0;
|
||||
else if(a.a<b.a)return 1;
|
||||
else if(b.a<a.a)return 0;
|
||||
else return a.b<b.b;
|
||||
}
|
||||
|
||||
inline bool operator==(const OneMoveSwap&a,const OneMoveSwap&b)
|
||||
{
|
||||
return a.type==b.type&&a.a==b.a&&a.b==b.b;
|
||||
}
|
||||
|
||||
inline ostream&operator<<(ostream&out,const OneMoveSwap&o)
|
||||
{
|
||||
return out << '(' << o.type << "," << o.a << "," << o.b << ")";
|
||||
}
|
||||
|
||||
inline ostream &operator<<(ostream &out,const set<OneMoveSwap>&s)
|
||||
{
|
||||
for(set<OneMoveSwap>::const_iterator i=s.begin();i!=s.end();++i)
|
||||
cout << *i << ' ';
|
||||
return out;
|
||||
}
|
||||
|
||||
bool makeOneMoveSwap(const alignment&a,const alignment&b,set<OneMoveSwap>&oms);
|
||||
|
||||
template<class TRANSPAIR,class MODEL>
|
||||
int collectCountsOverNeighborhood(const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
|
||||
Vector<WordIndex>&es,
|
||||
Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,
|
||||
amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,
|
||||
nmodel<COUNT>&nCountTable,double&p1count,double&p0count,
|
||||
LogProb&_total,float count,bool addCounts,MODEL*d4Table=0);
|
||||
|
||||
#endif
|
@ -0,0 +1,3 @@
|
||||
/*!
|
||||
This is global definition for all main files of the program set
|
||||
*/
|
@ -0,0 +1,126 @@
|
||||
// D4 Normalization executable
|
||||
|
||||
#include <iostream>
|
||||
#include <strstream>
|
||||
#include <string>
|
||||
#include "hmm.h"
|
||||
#include "D4Tables.h"
|
||||
#include "Parameter.h"
|
||||
#define ITER_M2 0
|
||||
#define ITER_MH 5
|
||||
GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
|
||||
GLOBAL_PARAMETER3(int,Model2_Iterations,"Model2_Iterations","NO. ITERATIONS MODEL 2","m2","number of iterations for Model 2",PARLEV_ITER,ITER_M2);
|
||||
GLOBAL_PARAMETER3(int,HMM_Iterations,"HMM_Iterations","mh","number of iterations for HMM alignment model","mh", PARLEV_ITER,ITER_MH);
|
||||
GLOBAL_PARAMETER3(int,Model3_Iterations,"Model3_Iterations","NO. ITERATIONS MODEL 3","m3","number of iterations for Model 3",PARLEV_ITER,5);
|
||||
GLOBAL_PARAMETER3(int,Model4_Iterations,"Model4_Iterations","NO. ITERATIONS MODEL 4","m4","number of iterations for Model 4",PARLEV_ITER,5);
|
||||
GLOBAL_PARAMETER3(int,Model5_Iterations,"Model5_Iterations","NO. ITERATIONS MODEL 5","m5","number of iterations for Model 5",PARLEV_ITER,0);
|
||||
GLOBAL_PARAMETER3(int,Model6_Iterations,"Model6_Iterations","NO. ITERATIONS MODEL 6","m6","number of iterations for Model 6",PARLEV_ITER,0);
|
||||
|
||||
GLOBAL_PARAMETER(float, PROB_SMOOTH,"probSmooth","probability smoothing (floor) value ",PARLEV_OPTHEUR,1e-7);
|
||||
GLOBAL_PARAMETER(float, MINCOUNTINCREASE,"minCountIncrease","minimal count increase",PARLEV_OPTHEUR,1e-7);
|
||||
|
||||
GLOBAL_PARAMETER2(int,Transfer_Dump_Freq,"TRANSFER DUMP FREQUENCY","t2to3","output: dump of transfer from Model 2 to 3",PARLEV_OUTPUT,0);
|
||||
GLOBAL_PARAMETER2(bool,Verbose,"verbose","v","0: not verbose; 1: verbose",PARLEV_OUTPUT,0);
|
||||
GLOBAL_PARAMETER(bool,Log,"log","0: no logfile; 1: logfile",PARLEV_OUTPUT,0);
|
||||
|
||||
GLOBAL_PARAMETER(double,P0,"p0","fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)",PARLEV_EM,-1.0);
|
||||
GLOBAL_PARAMETER(double,M5P0,"m5p0","fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)",PARLEV_EM,-1.0);
|
||||
GLOBAL_PARAMETER3(bool,Peg,"pegging","p","DO PEGGING? (Y/N)","0: no pegging; 1: do pegging",PARLEV_EM,0);
|
||||
|
||||
GLOBAL_PARAMETER(short,OldADBACKOFF,"adbackoff","",-1,0);
|
||||
GLOBAL_PARAMETER2(unsigned int,MAX_SENTENCE_LENGTH,"ml","MAX SENTENCE LENGTH","maximum sentence length",0,MAX_SENTENCE_LENGTH_ALLOWED);
|
||||
|
||||
GLOBAL_PARAMETER(short, DeficientDistortionForEmptyWord,"DeficientDistortionForEmptyWord","0: IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word",PARLEV_MODELS,0);
|
||||
|
||||
/**
|
||||
Here are parameters to support Load models and dump models
|
||||
*/
|
||||
|
||||
GLOBAL_PARAMETER(int,restart,"restart","Restart training from a level,0: Normal restart, from model 1, 1: Model 1, 2: Model 2 Init (Using Model 1 model input and train model 2), 3: Model 2, (using model 2 input and train model 2), 4 : HMM Init (Using Model 1 model and train HMM), 5: HMM (Using Model 2 model and train HMM) 6 : HMM (Using HMM Model and train HMM), 7: Model 3 Init (Use HMM model and train model 3) 8: Model 3 Init (Use Model 2 model and train model 3) 9: Model 3, 10: Model 4 Init (Use Model 3 model and train Model 4) 11: Model 4 and on, ",PARLEV_INPUT,0);
|
||||
GLOBAL_PARAMETER(bool,dumpCount,"dumpcount","Whether we are going to dump count (in addition to) final output?",PARLEV_OUTPUT,false);
|
||||
GLOBAL_PARAMETER(bool,dumpCountUsingWordString,"dumpcountusingwordstring","In count table, should actual word appears or just the id? default is id",PARLEV_OUTPUT,false);
|
||||
/// END
|
||||
short OutputInAachenFormat=0;
|
||||
bool Transfer=TRANSFER;
|
||||
bool Transfer2to3=0;
|
||||
short NoEmptyWord=0;
|
||||
bool FEWDUMPS=0;
|
||||
GLOBAL_PARAMETER(bool,ONLYALDUMPS,"ONLYALDUMPS","1: do not write any files",PARLEV_OUTPUT,0);
|
||||
GLOBAL_PARAMETER(short,NCPUS,"NCPUS","Number of CPUS",PARLEV_EM,2);
|
||||
GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detailled alignment format, 1: compact alignment format ",PARLEV_OUTPUT,0);
|
||||
GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
|
||||
|
||||
GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
|
||||
"maximal fertility for fertility models", PARLEV_EM, 10);
|
||||
|
||||
using namespace std;
|
||||
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
|
||||
TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
|
||||
a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
|
||||
|
||||
|
||||
int main(int argc, char* argv[]){
|
||||
if(argc < 5){
|
||||
cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
|
||||
return 1;
|
||||
}
|
||||
WordClasses ewc,fwc;
|
||||
d4model d4m(MAX_SENTENCE_LENGTH,ewc,fwc);
|
||||
Vector<WordEntry> evlist,fvlist;
|
||||
vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
|
||||
TargetVocabFilename = argv[2];
|
||||
SourceVocabFilename = argv[1];
|
||||
eTrainVcbList.setName(argv[1]);
|
||||
fTrainVcbList.setName(argv[2]);
|
||||
eTrainVcbList.readVocabList();
|
||||
fTrainVcbList.readVocabList();
|
||||
string evcbcls = argv[1];
|
||||
string fvcbcls = argv[2];
|
||||
evcbcls += ".classes";
|
||||
fvcbcls += ".classes";
|
||||
d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, evcbcls.c_str(), fvcbcls.c_str(),eTrainVcbList,fTrainVcbList);
|
||||
// Start iteration:
|
||||
for(int i =4; i< argc ; i++){
|
||||
string name = argv[i];
|
||||
string nameA = name ;
|
||||
string nameB = name + ".b";
|
||||
if(d4m.augCount(nameA.c_str(),nameB.c_str())){
|
||||
cerr << "Loading (d4) table " << nameA << "/" << nameB << " OK" << endl;
|
||||
|
||||
}else{
|
||||
cerr << "ERROR Loading (d) table " << nameA << " " << nameB << endl;
|
||||
}
|
||||
}
|
||||
|
||||
d4m.normalizeTable();
|
||||
string DiffOPath = argv[3];
|
||||
string diff1 = DiffOPath;
|
||||
string diff2 = DiffOPath+".b";
|
||||
cerr << "Outputing d4 table to " << diff1 << " " << diff2;
|
||||
d4m.printProbTable(diff1.c_str(),diff2.c_str());
|
||||
|
||||
|
||||
}
|
||||
|
||||
// Some utility functions to get it compile..
|
||||
|
||||
ofstream logmsg;
|
||||
const string str2Num(int n) {
|
||||
string number = "";
|
||||
do {
|
||||
number.insert((size_t)0, 1, (char)(n % 10 + '0'));
|
||||
} while ((n /= 10) > 0);
|
||||
return (number);
|
||||
}
|
||||
double LAMBDA=1.09;
|
||||
|
||||
Vector<map< pair<int,int>,char > > ReferenceAlignment;
|
||||
|
||||
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
|
||||
const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
|
||||
int&eventsMissing, int&eventsToomuch, int pair_no){
|
||||
}
|
||||
|
||||
void printGIZAPars(ostream&out){
|
||||
}
|
||||
|
@ -0,0 +1,78 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef _defs_h
|
||||
#define _defs_h 1
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <climits>
|
||||
|
||||
const int TRANSFER_SIMPLE=1;
|
||||
const int TRANSFER=0;
|
||||
|
||||
const unsigned int MAX_SENTENCE_LENGTH_ALLOWED=101;
|
||||
const int TRAIN_BUFFER_SIZE= 50000;
|
||||
//#ifdef WORDINDEX_WITH_4_BYTE
|
||||
typedef unsigned int WordIndex;
|
||||
const unsigned int MAX_VOCAB_SIZE=UINT_MAX;
|
||||
typedef unsigned int PositionIndex;
|
||||
//#else
|
||||
//typedef unsigned short WordIndex;
|
||||
//const unsigned int MAX_VOCAB_SIZE=USHRT_MAX;
|
||||
//typedef unsigned short PositionIndex;
|
||||
//#endif
|
||||
extern WordIndex MAX_FERTILITY;
|
||||
|
||||
const int MAX_W=457979;
|
||||
extern double LAMBDA; // Lambda that is used to scale cross_entropy factor
|
||||
|
||||
typedef float PROB ;
|
||||
typedef float COUNT ;
|
||||
|
||||
class LogProb {
|
||||
private:
|
||||
double x ;
|
||||
public:
|
||||
LogProb():x(0){}
|
||||
LogProb(double y):x(y){}
|
||||
LogProb(float y):x(y){}
|
||||
LogProb(int y):x(y){}
|
||||
LogProb(WordIndex y):x(y){}
|
||||
operator double() const {return x;}
|
||||
LogProb operator *= (double y) { x *= y ; return *this;}
|
||||
LogProb operator *= (LogProb y) { x *= y.x ; return *this;}
|
||||
LogProb operator /= (double y) { x /= y ; return *this;}
|
||||
LogProb operator /= (LogProb y) { x /= y.x ; return *this;}
|
||||
LogProb operator += (double y) { x += y ; return *this;}
|
||||
LogProb operator += (LogProb y) { x += y.x ; return *this;}
|
||||
};
|
||||
|
||||
const int PARLEV_ITER=1;
|
||||
const int PARLEV_OPTHEUR=2;
|
||||
const int PARLEV_OUTPUT=3;
|
||||
const int PARLEV_SMOOTH=4;
|
||||
const int PARLEV_EM=5;
|
||||
const int PARLEV_MODELS=6;
|
||||
const int PARLEV_SPECIAL=7;
|
||||
const int PARLEV_INPUT=8;
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,59 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef FILE_SPEC_H
|
||||
#define FILE_SPEC_H
|
||||
|
||||
#include <ctime>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
|
||||
/* This function returns a string, locally called file_spec. This
|
||||
string is the concatenation of the date and time of execution
|
||||
and the user who is performing the execution */
|
||||
/* Originally implemented in C by Yaser Al-Onaizan;
|
||||
editions for C++ and formatting by Noah A. Smith, 9 July 1999 */
|
||||
|
||||
char *Get_File_Spec (){
|
||||
struct tm *local;
|
||||
time_t t;
|
||||
char *user;
|
||||
char time_stmp[57];
|
||||
char *file_spec = 0;
|
||||
|
||||
t = time(NULL);
|
||||
local = localtime(&t);
|
||||
|
||||
sprintf(time_stmp, "%02d-%02d-%02d.%02d%02d%02d.", local->tm_year,
|
||||
(local->tm_mon + 1), local->tm_mday, local->tm_hour,
|
||||
local->tm_min, local->tm_sec);
|
||||
user = getenv("USER");
|
||||
|
||||
file_spec = (char *)malloc(sizeof(char) *
|
||||
(strlen(time_stmp) + strlen(user) + 1));
|
||||
file_spec[0] = '\0';
|
||||
strcat(file_spec, time_stmp) ;
|
||||
strcat(file_spec, user);
|
||||
return file_spec;
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,470 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/* --------------------------------------------------------------------------*
|
||||
* *
|
||||
* Module : getSentece *
|
||||
* *
|
||||
* Method Definitions File: getSentence.cc *
|
||||
* *
|
||||
* Objective: Defines clases and methods for handling I/O for the parallel *
|
||||
* corpus. *
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include "getSentence.h"
|
||||
#include <iostream>
|
||||
#include <strstream>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <pthread.h>
|
||||
#include "Parameter.h"
|
||||
#include "errno.h"
|
||||
|
||||
int PrintedTooLong=0;
|
||||
|
||||
/* -------------- Method Defnitions for Class sentenceHandler ---------------*/
|
||||
|
||||
GLOBAL_PARAMETER(double,ManlexMAX_MULTIPLICITY,"manlexMAX_MULTIPLICITY","",PARLEV_EM,20.0);
|
||||
GLOBAL_PARAMETER(double,Manlexfactor1,"manlexfactor1","",PARLEV_EM,0.0);
|
||||
GLOBAL_PARAMETER(double,Manlexfactor2,"manlexfactor2","",PARLEV_EM,0.0);
|
||||
|
||||
sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
|
||||
vcbList* flist) : realCount(0)
|
||||
// This method is the constructor of the class, it also intitializes the
|
||||
// sentence pair sequential number (count) to zero.
|
||||
{
|
||||
pthread_mutex_init(&readsent_mutex,NULL);
|
||||
pthread_mutex_init(&setprob_mutex,NULL);
|
||||
|
||||
position = 0;
|
||||
readflag = false ;
|
||||
allInMemory = false ;
|
||||
inputFilename = filename ;
|
||||
inputFile = new ifstream(filename);
|
||||
pair_no = 0 ;
|
||||
if(!(*inputFile)){
|
||||
cerr << "\nERROR:(a) Cannot open " << filename;
|
||||
exit(1);
|
||||
}
|
||||
currentSentence = 0;
|
||||
totalPairs1 = 0 ;
|
||||
totalPairs2 =0;
|
||||
pair_no = 0 ;
|
||||
noSentInBuffer = 0 ;
|
||||
Buffer.clear();
|
||||
bool isNegative=0;
|
||||
std::set<WordIndex> evoc,fvoc;
|
||||
evoc.insert(0);
|
||||
fvoc.insert(0);
|
||||
if (elist && flist){
|
||||
cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
|
||||
sentPair s ;
|
||||
while (getNextSentence(s, elist, flist))
|
||||
{
|
||||
for(int i = 0 ; i< s.eSent.size() ; i++){
|
||||
evoc.insert(s.eSent[i]);
|
||||
}
|
||||
for(int i = 0 ; i< s.fSent.size() ; i++){
|
||||
fvoc.insert(s.fSent[i]);
|
||||
}
|
||||
totalPairs1++;
|
||||
totalPairs2+=s.realCount;
|
||||
// NOTE: this value might change during training
|
||||
// for words from the manual dictionary, yet this is ignored!
|
||||
|
||||
if( s.noOcc<0 )
|
||||
isNegative=1;
|
||||
}
|
||||
}
|
||||
if( isNegative==1 )
|
||||
{
|
||||
cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
|
||||
realCount=new Vector<double>(totalPairs1,1.0);
|
||||
}
|
||||
else
|
||||
realCount=0;
|
||||
elist->compact(evoc);
|
||||
flist->compact(fvoc);
|
||||
}
|
||||
|
||||
sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
|
||||
vcbList* flist,std::set<WordIndex>& eapp, std::set<WordIndex>& fapp) : realCount(0)
|
||||
// This method is the constructor of the class, it also intitializes the
|
||||
// sentence pair sequential number (count) to z
|
||||
{
|
||||
pthread_mutex_init(&readsent_mutex,NULL);
|
||||
pthread_mutex_init(&setprob_mutex,NULL);
|
||||
position = 0;
|
||||
readflag = false ;
|
||||
allInMemory = false ;
|
||||
inputFilename = filename ;
|
||||
inputFile = new ifstream(filename);
|
||||
pair_no = 0 ;
|
||||
if(!(*inputFile)){
|
||||
cerr << "\nERROR:(a) Cannot open " << filename;
|
||||
exit(1);
|
||||
}
|
||||
currentSentence = 0;
|
||||
totalPairs1 = 0 ;
|
||||
totalPairs2 =0;
|
||||
pair_no = 0 ;
|
||||
noSentInBuffer = 0 ;
|
||||
Buffer.clear();
|
||||
bool isNegative=0;
|
||||
if (elist && flist){
|
||||
cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
|
||||
sentPair s ;
|
||||
while (getNextSentence(s, elist, flist))
|
||||
{
|
||||
int k;
|
||||
for(k=0;k<s.eSent.size();k++){
|
||||
eapp.insert(s.eSent[k]);
|
||||
}
|
||||
for(k=0;k<s.fSent.size();k++){
|
||||
fapp.insert(s.fSent[k]);
|
||||
}
|
||||
totalPairs1++;
|
||||
totalPairs2+=s.realCount;
|
||||
// NOTE: this value might change during training
|
||||
// for words from the manual dictionary, yet this is ignored!
|
||||
|
||||
if( s.noOcc<0 )
|
||||
isNegative=1;
|
||||
}
|
||||
}
|
||||
if( isNegative==1 )
|
||||
{
|
||||
cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
|
||||
realCount=new Vector<double>(totalPairs1,1.0);
|
||||
}
|
||||
else
|
||||
realCount=0;
|
||||
}
|
||||
|
||||
void sentenceHandler::rewind()
|
||||
{
|
||||
position = 0;
|
||||
currentSentence = 0;
|
||||
readflag = false ;
|
||||
if (!allInMemory ||
|
||||
!(Buffer.size() >= 1 && Buffer[currentSentence].sentenceNo == 1)){
|
||||
// check if the buffer doe not already has the first chunk of pairs
|
||||
if (Buffer.size() > 0)
|
||||
cerr << ' ' << Buffer[currentSentence].sentenceNo << '\n';
|
||||
// totalPairs = 0 ;
|
||||
pair_no = 0 ;
|
||||
noSentInBuffer = 0 ;
|
||||
Buffer.clear();
|
||||
}
|
||||
if (!allInMemory){
|
||||
delete inputFile;
|
||||
inputFile = new ifstream(inputFilename);
|
||||
if(!(*inputFile)){
|
||||
cerr << "\nERROR:(b) Cannot open " << inputFilename << " " << (int)errno;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int sentenceHandler::getNextSentence(sentPair& sent, vcbList* elist, vcbList* flist)
|
||||
{
|
||||
pthread_mutex_lock(&readsent_mutex);
|
||||
|
||||
do{
|
||||
sentPair s ;
|
||||
if (readflag){
|
||||
cerr << "Attempting to read from the end of corpus, rewinding\n";
|
||||
//rewind();
|
||||
break;
|
||||
}
|
||||
if (currentSentence >= noSentInBuffer){
|
||||
if (allInMemory)
|
||||
break;
|
||||
/* no more sentences in buffer */
|
||||
noSentInBuffer = 0 ;
|
||||
currentSentence = 0 ;
|
||||
Buffer.clear();
|
||||
cout << "Reading more sentence pairs into memory ... \n";
|
||||
while((noSentInBuffer < TRAIN_BUFFER_SIZE) && readNextSentence(s)){
|
||||
if ((s.fSent.size()-1) > (MAX_FERTILITY-1) * (s.eSent.size()-1)){
|
||||
cerr << "WARNING: The following sentence pair has source/target sentence length ratio more than\n"<<
|
||||
"the maximum allowed limit for a source word fertility\n"<<
|
||||
" source length = " << s.eSent.size()-1 << " target length = " << s.fSent.size()-1 <<
|
||||
" ratio " << double(s.fSent.size()-1)/ (s.eSent.size()-1) << " ferility limit : " <<
|
||||
MAX_FERTILITY-1 << '\n';
|
||||
cerr << "Shortening sentence \n";
|
||||
cerr << s;
|
||||
s.eSent.resize(min(s.eSent.size(),s.fSent.size()));
|
||||
s.fSent.resize(min(s.eSent.size(),s.fSent.size()));
|
||||
}
|
||||
Buffer.push_back(s) ;
|
||||
//cerr << s.eAnchor.size() << " " << Buffer[Buffer.size()-1].eAnchor.size()<< endl;
|
||||
if (elist && flist){
|
||||
if ((*elist).size() > 0)
|
||||
for (WordIndex i= 0 ; i < s.eSent.size() ; i++){
|
||||
if (s.eSent[i] >= (*elist).uniqTokens()){
|
||||
if( PrintedTooLong++<100)
|
||||
cerr << "ERROR: source word " << s.eSent[i] << " is not in the vocabulary list \n";
|
||||
exit(-1);
|
||||
}
|
||||
(*elist).incFreq(s.eSent[i], s.realCount);
|
||||
}
|
||||
if ((*flist).size() > 0)
|
||||
for (WordIndex j= 1 ; j < s.fSent.size() ; j++){
|
||||
if (s.fSent[j] >= (*flist).uniqTokens()){
|
||||
cerr << "ERROR: target word " << s.fSent[j] << " is not in the vocabulary list \n";
|
||||
exit(-1);
|
||||
}
|
||||
(*flist).incFreq(s.fSent[j], s.realCount);
|
||||
}
|
||||
}
|
||||
noSentInBuffer++;
|
||||
}
|
||||
if (inputFile->eof()){
|
||||
allInMemory = (Buffer.size() >= 1 &&
|
||||
Buffer[currentSentence].sentenceNo == 1) ;
|
||||
if (allInMemory)
|
||||
cout << "Corpus fits in memory, corpus has: " << Buffer.size() <<
|
||||
" sentence pairs.\n";
|
||||
}
|
||||
}
|
||||
if(noSentInBuffer <= 0 ){
|
||||
//cerr << "# sent in buffer " << noSentInBuffer << '\n';
|
||||
readflag = true ;
|
||||
break;
|
||||
}
|
||||
sent = Buffer[currentSentence++] ;
|
||||
// cerr << currentSentence-1 << " " << sent.eAnchor.size() << " " << Buffer[currentSentence-1].eAnchor.size()<< endl;
|
||||
position ++;
|
||||
if( sent.noOcc<0 && realCount ){
|
||||
if( Manlexfactor1 && sent.noOcc==-1.0 )
|
||||
sent.realCount=Manlexfactor1;
|
||||
else if( Manlexfactor2 && sent.noOcc==-2.0 )
|
||||
sent.realCount=Manlexfactor2;
|
||||
else
|
||||
sent.realCount=(*realCount)[sent.getSentenceNo()-1];
|
||||
}
|
||||
pthread_mutex_unlock(&readsent_mutex);
|
||||
return position ;
|
||||
}while(false);
|
||||
pthread_mutex_unlock(&readsent_mutex);
|
||||
return 0;
|
||||
}
|
||||
bool sentenceHandler::readNextSentence(sentPair& sent)
|
||||
/* This method reads in a new pair of sentences, each pair is read from the
|
||||
corpus file as line triples. The first line the no of times this line
|
||||
pair occured in the corpus, the second line is the source sentence and
|
||||
the third is the target sentence. The sentences are represented by a space
|
||||
separated positive integer token ids. */
|
||||
{
|
||||
|
||||
string line;
|
||||
bool fail(false) ;
|
||||
|
||||
sent.clear();
|
||||
vector<string> splits;
|
||||
if (getline(*inputFile, line)){
|
||||
|
||||
boost::algorithm::split(splits,line,boost::algorithm::is_any_of("|#*"));
|
||||
|
||||
if(splits.size() == 1 || splits.size() == 0){
|
||||
// continue, no problem
|
||||
|
||||
}else if(splits.size()>=3){
|
||||
line = splits[0];
|
||||
}else{
|
||||
fail = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
istrstream buffer(line.c_str());
|
||||
buffer >> sent.noOcc;
|
||||
if( sent.noOcc<0 )
|
||||
{
|
||||
if( realCount )
|
||||
{
|
||||
if( Manlexfactor1 && sent.noOcc==-1.0 )
|
||||
sent.realCount=Manlexfactor1;
|
||||
else if( Manlexfactor2 && sent.noOcc==-2.0 )
|
||||
sent.realCount=Manlexfactor2;
|
||||
else
|
||||
{
|
||||
sent.realCount=(*realCount)[pair_no];
|
||||
}
|
||||
}
|
||||
else
|
||||
sent.realCount=1.0;
|
||||
}
|
||||
else
|
||||
sent.realCount=sent.noOcc;
|
||||
}
|
||||
else {
|
||||
fail = true ;;
|
||||
}
|
||||
if (splits.size()>=3 || getline(*inputFile, line)){
|
||||
if(splits.size()>=3){
|
||||
line = splits[1];
|
||||
}
|
||||
istrstream buffer(line.c_str());
|
||||
WordIndex w; // w is a local variabe for token id
|
||||
sent.eSent.push_back(0); // each source word is assumed to have 0 ==
|
||||
// a null word (id 0) at the begining of the sentence.
|
||||
while(buffer>>w){ // read source sentece , word by word .
|
||||
if (sent.eSent.size() < MAX_SENTENCE_LENGTH)
|
||||
sent.eSent.push_back(w);
|
||||
else {
|
||||
if( PrintedTooLong++<100)
|
||||
cerr << "{WARNING:(a)truncated sentence "<<pair_no<<"}";
|
||||
//cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
|
||||
//cerr << "The following sentence will be truncated\n" << line;
|
||||
break ;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
fail = true ;
|
||||
}
|
||||
if (splits.size()>=3 ||getline(*inputFile, line)){
|
||||
if(splits.size()>=3){
|
||||
line = splits[2];
|
||||
}
|
||||
istrstream buffer(line.c_str());
|
||||
WordIndex w; // w is a local variabe for token id
|
||||
sent.fSent.push_back(0); //0 is inserted for program uniformity
|
||||
while(buffer>>w){ // read target sentece , word by word .
|
||||
if (sent.fSent.size() < MAX_SENTENCE_LENGTH)
|
||||
sent.fSent.push_back(w);
|
||||
else {
|
||||
if( PrintedTooLong++<100)
|
||||
cerr << "{WARNING:(b)truncated sentence "<<pair_no<<"}";
|
||||
//cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
|
||||
//cerr << "The following sentence will be truncated\n" << line;
|
||||
break ;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
fail = true ;
|
||||
}
|
||||
sent.eAnchor.clear();
|
||||
sent.fAnchor.clear();
|
||||
// cerr << "Splits: " << splits.size() << endl;
|
||||
if(splits.size()>3){
|
||||
vector<string> al,eal;
|
||||
al.clear();
|
||||
boost::algorithm::split(al,splits[3],boost::algorithm::is_any_of(" "));
|
||||
for(int w = 0 ; w < al.size(); w++){
|
||||
eal.clear();
|
||||
boost::algorithm::split(eal,al[w],boost::algorithm::is_any_of("-"));
|
||||
if(eal.size()==2){
|
||||
int ea = atoi(eal[0].c_str());
|
||||
int fa = atoi(eal[1].c_str());
|
||||
if(ea >= sent.eSent.size() || fa >= sent.fSent.size())
|
||||
continue;
|
||||
sent.eAnchor.push_back(ea);
|
||||
sent.fAnchor.push_back(fa);
|
||||
}
|
||||
}
|
||||
// cerr << "Read partial alignment: " << sent.eAnchor.size() << " " <<
|
||||
// sent.fAnchor.size() << "\n";
|
||||
}
|
||||
if (fail){
|
||||
sent.eSent.clear();
|
||||
sent.fSent.clear();
|
||||
sent.eAnchor.clear();
|
||||
sent.fAnchor.clear();
|
||||
sent.sentenceNo = 0 ;
|
||||
sent.noOcc = 0 ;
|
||||
sent.realCount=0;
|
||||
return(false);
|
||||
}
|
||||
if( sent.eSent.size()==1||sent.fSent.size()==1 )
|
||||
cerr << "ERROR: Forbidden zero sentence length " << sent.sentenceNo << endl;
|
||||
sent.sentenceNo = ++pair_no;
|
||||
if(pair_no % 100000 == 0)
|
||||
cout << "[sent:" << sent.sentenceNo << "]"<< '\n';
|
||||
return true;
|
||||
}
|
||||
|
||||
double optimize_lambda(Vector<double>&vd)
|
||||
{
|
||||
Vector<double> l;
|
||||
for(double lambda=1.0;lambda<ManlexMAX_MULTIPLICITY;lambda+=0.33)
|
||||
{
|
||||
double prod=0.0;
|
||||
for(unsigned int i=0;i<vd.size();++i)
|
||||
{
|
||||
prod += vd[i]*exp(lambda*vd[i])/(exp(lambda*vd[i])-1.0);
|
||||
}
|
||||
l.push_back(fabs(prod-1.0));
|
||||
}
|
||||
double lam=double(min_element(l.begin(),l.end())-l.begin())*0.33+1.0;
|
||||
if( lam<1.0 )
|
||||
{
|
||||
cerr << "ERROR: lambda is smaller than one: " << lam << endl;
|
||||
for(unsigned int i=0;i<vd.size();++i)
|
||||
cerr << vd[i] << ' ';
|
||||
cerr << endl;
|
||||
}
|
||||
return lam;
|
||||
}
|
||||
|
||||
void sentenceHandler::setProbOfSentence(const sentPair&s,double d)
|
||||
{
|
||||
|
||||
if( realCount==0 )
|
||||
return;
|
||||
else{
|
||||
pthread_mutex_lock(&setprob_mutex);
|
||||
if( s.noOcc<=0 )
|
||||
{
|
||||
double ed=exp(d);
|
||||
if( oldPairs.size()>0&&(oldPairs.back().get_eSent()!=s.get_eSent()||oldPairs.back().getSentenceNo()>=s.getSentenceNo()) )
|
||||
{
|
||||
double lambda=optimize_lambda(oldProbs);
|
||||
for(unsigned int i=0;i<oldPairs.size();++i)
|
||||
{
|
||||
if( oldProbs[i]<1e-5 )
|
||||
(*realCount)[oldPairs[i].getSentenceNo()-1]=1.0;
|
||||
else
|
||||
(*realCount)[oldPairs[i].getSentenceNo()-1]=lambda*oldProbs[i]/(1-exp(-lambda*oldProbs[i]));
|
||||
}
|
||||
oldPairs.clear();
|
||||
oldProbs.clear();
|
||||
}
|
||||
oldPairs.push_back(s);
|
||||
oldProbs.push_back(ed);
|
||||
}
|
||||
pthread_mutex_unlock(&setprob_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------- End of Method Definition of Class sentenceHandler ----------*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,136 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
/* --------------------------------------------------------------------------*
|
||||
* *
|
||||
* Module : getSentence *
|
||||
* *
|
||||
* Prototypes File: getSentence.h *
|
||||
* *
|
||||
* Objective: Defines clases and methods for handling I/O for the parallel *
|
||||
* corpus. *
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef _sentenceHandler_h
|
||||
#define _sentenceHandler_h 1
|
||||
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <pthread.h>
|
||||
#include "Vector.h"
|
||||
#include "defs.h"
|
||||
#include "vocab.h"
|
||||
#include "Globals.h"
|
||||
/*----------------------- Class Prototype Definition ------------------------*
|
||||
Class Name: sentenceHandleer
|
||||
Objective: This class is defined to handle training sentece pairs from the
|
||||
parallel corpus. Each pair has: a target sentece, called here French; a
|
||||
source sentece, called here English sentece; and an integer number denoting
|
||||
the number of times this pair occured in trining corpus. Both source and
|
||||
target senteces are represented as integer vector (variable size arrays),
|
||||
each entry is a numeric value which is the token id for the particular token
|
||||
in the sentece.
|
||||
|
||||
*---------------------------------------------------------------------------*/
|
||||
|
||||
class sentPair{
|
||||
public:
|
||||
int sentenceNo ;
|
||||
float noOcc;
|
||||
float realCount;
|
||||
Vector<WordIndex> eSent ;
|
||||
Vector<WordIndex> fSent;
|
||||
Vector<WordIndex> eAnchor;
|
||||
Vector<WordIndex> fAnchor;
|
||||
public:
|
||||
sentPair(){};
|
||||
void clear(){ eSent.clear(); fSent.clear();eAnchor.clear(),fAnchor.clear(); noOcc=0; realCount=0; sentenceNo=0;};
|
||||
const Vector<WordIndex>&get_eSent()const
|
||||
{ return eSent; }
|
||||
const Vector<WordIndex>&get_fSent()const
|
||||
{ return fSent; }
|
||||
int getSentenceNo()const
|
||||
{ return sentenceNo; }
|
||||
double getCount()const
|
||||
{ return realCount; }
|
||||
|
||||
};
|
||||
|
||||
inline ostream&operator<<(ostream&of,const sentPair&s)
|
||||
{
|
||||
of << "Sent No: " << s.sentenceNo << " , No. Occurrences: " << s.noOcc << '\n';
|
||||
if( s.noOcc!=s.realCount )
|
||||
of << " Used No. Occurrences: " << s.realCount << '\n';
|
||||
unsigned int i;
|
||||
for(i=0; i < s.eSent.size(); i++)
|
||||
of << s.eSent[i] << ' ';
|
||||
of << '\n';
|
||||
for(i=1; i < s.fSent.size(); i++)
|
||||
of << s.fSent[i] << ' ';
|
||||
of << '\n';
|
||||
return of;
|
||||
}
|
||||
|
||||
/*Thread-safe version of sentence handler*/
|
||||
class sentenceHandler{
|
||||
public:
|
||||
const char * inputFilename; // parallel corpus file name, similar for all
|
||||
// sentence pair objects
|
||||
ifstream *inputFile; // parallel corpus file handler
|
||||
Vector<sentPair> Buffer;
|
||||
int noSentInBuffer ;
|
||||
int currentSentence ;
|
||||
int position; /*Sentence position (will be returned)*/
|
||||
int totalPairs1 ;
|
||||
double totalPairs2;
|
||||
bool readflag ; // true if you reach the end of file
|
||||
bool allInMemory ;
|
||||
int pair_no ;
|
||||
Vector<double> *realCount;
|
||||
|
||||
Vector<sentPair> oldPairs;
|
||||
Vector<double> oldProbs;
|
||||
sentenceHandler(){};
|
||||
sentenceHandler(const char* filename, vcbList* elist=0, vcbList* flist=0);
|
||||
sentenceHandler(const char* filename, vcbList* elist, vcbList* flist,set<WordIndex>& eapp, set<WordIndex>& fapp);
|
||||
void rewind();
|
||||
int getNextSentence(sentPair&, vcbList* = 0, vcbList* = 0); // will be defined in the definition file, this
|
||||
int getTotalNoPairs1()const {return totalPairs1;};
|
||||
double getTotalNoPairs2()const {return totalPairs2;};
|
||||
// method will read the next pair of sentence from memory buffer
|
||||
void setProbOfSentence(const sentPair&s,double d);
|
||||
private:
|
||||
pthread_mutex_t readsent_mutex;
|
||||
pthread_mutex_t setprob_mutex;
|
||||
bool readNextSentence(sentPair&); // will be defined in the definition file, this
|
||||
};
|
||||
|
||||
#endif
|
||||
|
1088
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/src/hmm.cpp
Normal file
1088
mgiza-aligner/mgiza/experimental/alignment-enabled/MGIZA/src/hmm.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,103 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef _hmm_h
|
||||
#define _hmm_h 1
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "Vector.h"
|
||||
#include <utility>
|
||||
|
||||
#if __GNUC__>2
|
||||
#include <ext/hash_map>
|
||||
using __gnu_cxx::hash_map;
|
||||
#else
|
||||
#include <hash_map>
|
||||
#endif
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
|
||||
#include "TTables.h"
|
||||
#include "ATables.h"
|
||||
#include "getSentence.h"
|
||||
#include "defs.h"
|
||||
#include "model2.h"
|
||||
#include "Perplexity.h"
|
||||
#include "vocab.h"
|
||||
#include "WordClasses.h"
|
||||
#include "HMMTables.h"
|
||||
#include "ForwardBackward.h"
|
||||
#include "ttableDiff.hpp"
|
||||
|
||||
class hmm : public model2{
|
||||
public:
|
||||
WordClasses& ewordclasses;
|
||||
WordClasses& fwordclasses;
|
||||
public:
|
||||
HMMTables<int,WordClasses> counts,probs;
|
||||
public:
|
||||
template<class MAPPER>
|
||||
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile){
|
||||
ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
|
||||
if( !estrm ) {
|
||||
cerr << "ERROR: can not read " << efile << endl;
|
||||
}else
|
||||
ewordclasses.read(estrm,m1,Elist);
|
||||
if( !fstrm )
|
||||
cerr << "ERROR: can not read " << ffile << endl;
|
||||
else
|
||||
fwordclasses.read(fstrm,m2,Flist);
|
||||
}
|
||||
hmm(model2&m2,WordClasses &e, WordClasses& f);
|
||||
void initialize_table_uniformly(sentenceHandler&);
|
||||
int em_with_tricks(int iterations, bool dumpCount = false,
|
||||
const char* dumpCountName = NULL, bool useString = false,bool resume=false);
|
||||
CTTableDiff<COUNT,PROB>* em_one_step(int it);
|
||||
// void em_one_step_2(int it,int part);
|
||||
void load_table(const char* aname);
|
||||
|
||||
// void em_loop(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
|
||||
// const char* alignfile, Perplexity&, bool test,bool doInit,int iter);
|
||||
/* CTTableDiff<COUNT,PROB>* em_loop_1(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
|
||||
const char* alignfile, Perplexity&, bool test,bool doInit,int iter);*/
|
||||
/* void em_loop_2( Perplexity& perp, sentenceHandler& sHandler1,
|
||||
bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
|
||||
bool test,bool doInit,int part);*/
|
||||
void em_loop(Perplexity& perp, sentenceHandler& sHandler1,
|
||||
bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
|
||||
bool test,bool doInit,int
|
||||
);
|
||||
void em_thread(int it,string alignfile,bool dump_files,bool resume=false);
|
||||
HMMNetwork *makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const;
|
||||
void clearCountTable();
|
||||
friend class model3;
|
||||
};
|
||||
//int multi_thread_em(int noIter, int noThread, hmm* base);
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,135 @@
|
||||
// HMM Normalization executable
|
||||
|
||||
#include <iostream>
|
||||
#include <strstream>
|
||||
#include <string>
|
||||
#include "hmm.h"
|
||||
#include "Parameter.h"
|
||||
#define ITER_M2 0
|
||||
#define ITER_MH 5
|
||||
GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
|
||||
GLOBAL_PARAMETER3(int,Model2_Iterations,"Model2_Iterations","NO. ITERATIONS MODEL 2","m2","number of iterations for Model 2",PARLEV_ITER,ITER_M2);
|
||||
GLOBAL_PARAMETER3(int,HMM_Iterations,"HMM_Iterations","mh","number of iterations for HMM alignment model","mh", PARLEV_ITER,ITER_MH);
|
||||
GLOBAL_PARAMETER3(int,Model3_Iterations,"Model3_Iterations","NO. ITERATIONS MODEL 3","m3","number of iterations for Model 3",PARLEV_ITER,5);
|
||||
GLOBAL_PARAMETER3(int,Model4_Iterations,"Model4_Iterations","NO. ITERATIONS MODEL 4","m4","number of iterations for Model 4",PARLEV_ITER,5);
|
||||
GLOBAL_PARAMETER3(int,Model5_Iterations,"Model5_Iterations","NO. ITERATIONS MODEL 5","m5","number of iterations for Model 5",PARLEV_ITER,0);
|
||||
GLOBAL_PARAMETER3(int,Model6_Iterations,"Model6_Iterations","NO. ITERATIONS MODEL 6","m6","number of iterations for Model 6",PARLEV_ITER,0);
|
||||
|
||||
GLOBAL_PARAMETER(float, PROB_SMOOTH,"probSmooth","probability smoothing (floor) value ",PARLEV_OPTHEUR,1e-7);
|
||||
GLOBAL_PARAMETER(float, MINCOUNTINCREASE,"minCountIncrease","minimal count increase",PARLEV_OPTHEUR,1e-7);
|
||||
|
||||
GLOBAL_PARAMETER2(int,Transfer_Dump_Freq,"TRANSFER DUMP FREQUENCY","t2to3","output: dump of transfer from Model 2 to 3",PARLEV_OUTPUT,0);
|
||||
GLOBAL_PARAMETER2(bool,Verbose,"verbose","v","0: not verbose; 1: verbose",PARLEV_OUTPUT,0);
|
||||
GLOBAL_PARAMETER(bool,Log,"log","0: no logfile; 1: logfile",PARLEV_OUTPUT,0);
|
||||
|
||||
GLOBAL_PARAMETER(double,P0,"p0","fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)",PARLEV_EM,-1.0);
|
||||
GLOBAL_PARAMETER(double,M5P0,"m5p0","fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)",PARLEV_EM,-1.0);
|
||||
GLOBAL_PARAMETER3(bool,Peg,"pegging","p","DO PEGGING? (Y/N)","0: no pegging; 1: do pegging",PARLEV_EM,0);
|
||||
|
||||
GLOBAL_PARAMETER(short,OldADBACKOFF,"adbackoff","",-1,0);
|
||||
GLOBAL_PARAMETER2(unsigned int,MAX_SENTENCE_LENGTH,"ml","MAX SENTENCE LENGTH","maximum sentence length",0,MAX_SENTENCE_LENGTH_ALLOWED);
|
||||
|
||||
GLOBAL_PARAMETER(short, DeficientDistortionForEmptyWord,"DeficientDistortionForEmptyWord","0: IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word",PARLEV_MODELS,0);
|
||||
|
||||
/**
|
||||
Here are parameters to support Load models and dump models
|
||||
*/
|
||||
|
||||
GLOBAL_PARAMETER(int,restart,"restart","Restart training from a level,0: Normal restart, from model 1, 1: Model 1, 2: Model 2 Init (Using Model 1 model input and train model 2), 3: Model 2, (using model 2 input and train model 2), 4 : HMM Init (Using Model 1 model and train HMM), 5: HMM (Using Model 2 model and train HMM) 6 : HMM (Using HMM Model and train HMM), 7: Model 3 Init (Use HMM model and train model 3) 8: Model 3 Init (Use Model 2 model and train model 3) 9: Model 3, 10: Model 4 Init (Use Model 3 model and train Model 4) 11: Model 4 and on, ",PARLEV_INPUT,0);
|
||||
GLOBAL_PARAMETER(bool,dumpCount,"dumpcount","Whether we are going to dump count (in addition to) final output?",PARLEV_OUTPUT,false);
|
||||
GLOBAL_PARAMETER(bool,dumpCountUsingWordString,"dumpcountusingwordstring","In count table, should actual word appears or just the id? default is id",PARLEV_OUTPUT,false);
|
||||
/// END
|
||||
short OutputInAachenFormat=0;
|
||||
bool Transfer=TRANSFER;
|
||||
bool Transfer2to3=0;
|
||||
short NoEmptyWord=0;
|
||||
bool FEWDUMPS=0;
|
||||
GLOBAL_PARAMETER(bool,ONLYALDUMPS,"ONLYALDUMPS","1: do not write any files",PARLEV_OUTPUT,0);
|
||||
GLOBAL_PARAMETER(short,NCPUS,"NCPUS","Number of CPUS",PARLEV_EM,2);
|
||||
GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detailled alignment format, 1: compact alignment format ",PARLEV_OUTPUT,0);
|
||||
GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
|
||||
|
||||
GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
|
||||
"maximal fertility for fertility models", PARLEV_EM, 10);
|
||||
|
||||
using namespace std;
|
||||
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
|
||||
TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
|
||||
a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
|
||||
|
||||
|
||||
int main(int argc, char* argv[]){
|
||||
if(argc < 5){
|
||||
cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
|
||||
return 1;
|
||||
}
|
||||
Vector<WordEntry> evlist,fvlist;
|
||||
vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
|
||||
TargetVocabFilename = argv[2];
|
||||
SourceVocabFilename = argv[1];
|
||||
eTrainVcbList.setName(argv[1]);
|
||||
fTrainVcbList.setName(argv[2]);
|
||||
eTrainVcbList.readVocabList();
|
||||
fTrainVcbList.readVocabList();
|
||||
Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp;
|
||||
tmodel<float, float> tTable;
|
||||
sentenceHandler *corpus = new sentenceHandler();
|
||||
|
||||
|
||||
model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList, tTable,
|
||||
trainPerp, *corpus, &testPerp, corpus, trainViterbiPerp,
|
||||
&testViterbiPerp);
|
||||
amodel<float> aTable(false);
|
||||
amodel<float> aCountTable(false);
|
||||
model2 m2(m1, aTable, aCountTable);
|
||||
WordClasses french,english;
|
||||
hmm h(m2,english,french);
|
||||
string evcbcls = argv[1];
|
||||
string fvcbcls = argv[2];
|
||||
evcbcls += ".classes";
|
||||
fvcbcls += ".classes";
|
||||
h.makeWordClasses(m1.Elist, m1.Flist, evcbcls.c_str(), fvcbcls.c_str());
|
||||
string base = argv[4];
|
||||
string baseA = base+".alpha";
|
||||
string baseB = base+".beta";
|
||||
string output = argv[3];
|
||||
string outputA = output+".alpha";
|
||||
string outputB = output+".beta";
|
||||
h.probs.readJumps(base.c_str(),NULL,baseA.c_str(), baseB.c_str());
|
||||
// Start iteration:
|
||||
for(int i = 5; i< argc ; i++){
|
||||
string name = argv[i];
|
||||
string nameA = name + ".alpha";
|
||||
string nameB = name + ".beta";
|
||||
if(h.counts.readJumps(name.c_str(),NULL,nameA.c_str(), nameB.c_str()))
|
||||
h.probs.merge(h.counts);
|
||||
else
|
||||
cerr << "Error, cannot load name.c_str()";
|
||||
h.clearCountTable();
|
||||
}
|
||||
h.probs.writeJumps(output.c_str(),NULL,outputA.c_str(), outputB.c_str());
|
||||
delete corpus;
|
||||
}
|
||||
|
||||
// Some utility functions to get it compile..
|
||||
|
||||
ofstream logmsg;
|
||||
const string str2Num(int n) {
|
||||
string number = "";
|
||||
do {
|
||||
number.insert((size_t)0, 1, (char)(n % 10 + '0'));
|
||||
} while ((n /= 10) > 0);
|
||||
return (number);
|
||||
}
|
||||
double LAMBDA=1.09;
|
||||
|
||||
Vector<map< pair<int,int>,char > > ReferenceAlignment;
|
||||
|
||||
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
|
||||
const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
|
||||
int&eventsMissing, int&eventsToomuch, int pair_no){
|
||||
}
|
||||
|
||||
void printGIZAPars(ostream&out){
|
||||
}
|
||||
|
@ -0,0 +1,154 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
// Routines to perform integer exponential arithmetic.
|
||||
// A number x is represented as n, where x = b**n.
|
||||
// It is assumed that b > 1, something like b = 1.001;
|
||||
|
||||
#include "logprob.h"
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
double *LogProb::ntof = NULL; // Tables will be initialized
|
||||
int *LogProb::addtbl = NULL; // in Initialize function.
|
||||
int *LogProb::subtbl = NULL; //
|
||||
|
||||
const int LogProb::max_2byte_integer = 32767;
|
||||
const int LogProb::min_2byte_integer = -32768;
|
||||
const double LogProb::b = 1.001; // a logarithm basis
|
||||
const double LogProb::logb2 = log(b);
|
||||
//const int LogProb::nmax = round(78.0E0 * log(1.0E1) / logb2);
|
||||
const int LogProb::nmax = round(300.0E0 * log(1.0E1) / logb2);
|
||||
const int LogProb::nmin = -nmax;
|
||||
const int LogProb::tblbnd = round(log((b-1.0E0)/2.0E0)/logb2);
|
||||
const int LogProb::zeron = round(pow(-2, 23));
|
||||
const int LogProb::onen = 0;
|
||||
const int LogProb::infn = onen - zeron;
|
||||
|
||||
const int LogProb::initialized = LogProb::Initialize();
|
||||
const LogProb LogProb::zero(0);
|
||||
const LogProb LogProb::one(1);
|
||||
const LogProb LogProb::minus2(1e-2);
|
||||
const LogProb LogProb::minus4(1e-4);
|
||||
const LogProb LogProb::minus6(1e-6);
|
||||
const LogProb LogProb::minus8(1e-8);
|
||||
const LogProb LogProb::minus10(1e-10);
|
||||
const LogProb LogProb::minus12(1e-12);
|
||||
const LogProb LogProb::minus14(1e-14);
|
||||
const LogProb LogProb::minus16(1e-16);
|
||||
|
||||
// static table initialization function
|
||||
int LogProb::Initialize()
|
||||
{
|
||||
int nbytes = sizeof(double)*(nmax-nmin+1) + sizeof(int)*(0-tblbnd+1);
|
||||
std::cerr << nbytes << " bytes used for LogProb tables (C++ version)\n";
|
||||
ntof = new double[nmax-nmin+1];
|
||||
addtbl = new int[-tblbnd+1];
|
||||
subtbl = new int[-tblbnd+1];
|
||||
|
||||
// char filename[257];
|
||||
// string filename ;
|
||||
// ifstream ifs;
|
||||
// ifs.open(filename.c_str());
|
||||
// if (!ifs)
|
||||
// {
|
||||
int i;
|
||||
std::cerr << "Building integer logs conversion tables\n";
|
||||
ntof[0] = 0 ;
|
||||
|
||||
for (i=nmin+1; i<=nmax; ++i)
|
||||
{
|
||||
double x = i;
|
||||
ntof[i-nmin] = exp(x*logb2);
|
||||
|
||||
}
|
||||
for (i=tblbnd; i<=0; ++i)
|
||||
{
|
||||
double x = 1.0 + pow(b, i);
|
||||
addtbl[i-tblbnd] = round(log(x)/logb2);
|
||||
}
|
||||
double sqrtb = exp(0.5*logb2);
|
||||
for (i=0; i<=-tblbnd; ++i)
|
||||
{
|
||||
double x = sqrtb * pow(b, i) - 1.0;
|
||||
subtbl[i] = round(log(x)/logb2);
|
||||
}
|
||||
// if (toolsRoot)
|
||||
// {
|
||||
// ofstream ofs(filename.c_str());
|
||||
// if (!ofs)
|
||||
// cerr << "Could not write LogProb data to " << filename << endl;
|
||||
// else
|
||||
// {
|
||||
// ofs.write((const char *)ntof, sizeof(double) * (nmax-nmin+1));
|
||||
// ofs.write((const char *)addtbl, sizeof(int) * (-tblbnd+1));
|
||||
// ofs.write((const char *)subtbl, sizeof(int) * (-tblbnd+1));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// ifs.read((char *)ntof, sizeof(double) * (nmax - nmin + 1));
|
||||
// ifs.read((char *)addtbl, sizeof(int) * (-tblbnd+1));
|
||||
// ifs.read((char *)subtbl, sizeof(int) * (-tblbnd+1));
|
||||
// }
|
||||
return 1;
|
||||
}
|
||||
|
||||
void LogProb::FreeTables()
|
||||
{
|
||||
delete [] addtbl;
|
||||
delete [] subtbl;
|
||||
delete [] ntof;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
// Aritmetic operators
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
|
||||
// Subtract two logarithm numbers. Use the following method:
|
||||
// b**n - b**m = b**m( b**(n-m) - 1 ), assuming n >= m.
|
||||
LogProb& LogProb::operator-=(const LogProb &subs)
|
||||
{
|
||||
if (subs.logr == zeron)
|
||||
return *this;
|
||||
int a = logr - subs.logr;
|
||||
if (a <= 0)
|
||||
{
|
||||
if (a < 0)
|
||||
{
|
||||
std::cerr << "WARNING(logprob): Invalid arguments to nsub" <<(*this)<< " " << subs << std::endl;
|
||||
//abort();
|
||||
}
|
||||
logr = zeron;
|
||||
return *this;
|
||||
}
|
||||
if (a > -tblbnd)
|
||||
return *this;
|
||||
logr = subs.logr + subtbl[a];
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,217 @@
|
||||
/*
|
||||
|
||||
EGYPT Toolkit for Statistical Machine Translation
|
||||
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
#ifndef _LOGPROB_H
|
||||
#define _LOGPROB_H
|
||||
|
||||
// Routines to perform integer exponential arithmetic.
|
||||
// A number x is represented as n, where x = b**n
|
||||
// It is assumed that b > 1, something like b = 1.001
|
||||
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
//#define MAX(A,B) ((A) > (B) ? (A) : (B))
|
||||
//#define MIN(A,B) ((A) > (B) ? (B) : (A))
|
||||
|
||||
|
||||
class LogProb {
|
||||
public:
|
||||
// mj for cross entropy
|
||||
double base2() const {
|
||||
return (logr * logb2 / log(2));
|
||||
}
|
||||
|
||||
// Constructors
|
||||
LogProb() : logr(zeron) {}
|
||||
LogProb(const LogProb &obj) : logr(obj.logr) {}
|
||||
LogProb(double x) : logr(x == 0.0 ? zeron : round(log(x)/logb2)) {}
|
||||
// destructor
|
||||
~LogProb() {} // default destructor
|
||||
|
||||
operator double() const // converts logr to (double) b**logr
|
||||
{
|
||||
if (logr < nmin) return ntof[0];
|
||||
if (logr > nmax) return ntof[nmax-nmin];
|
||||
return ntof[logr-nmin];
|
||||
}
|
||||
|
||||
LogProb &operator=(const LogProb &obj) { logr = obj.logr; return *this; }
|
||||
int operator!() const { return logr == zeron; }
|
||||
|
||||
// iostream friend specifications
|
||||
friend std::ostream& operator<<(std::ostream& os, const LogProb &obj);
|
||||
friend std::istream& operator>>(std::istream& is, LogProb &obj);
|
||||
friend std::ostream& operator<<=(std::ostream& os, const LogProb &obj);
|
||||
friend std::istream& operator>>=(std::istream& is, LogProb &obj);
|
||||
|
||||
// arithmetic operators
|
||||
LogProb &operator+=(const LogProb &add) // logr2 = logb ( b**logr2 + b**logr1 )
|
||||
// Add two numbers represented as logarithms. Use the following method:
|
||||
// b**n + b**m = b**n(1 + b**(m-n)), assuming n >= m.
|
||||
{
|
||||
if (add.logr == zeron)
|
||||
return *this;
|
||||
if (logr == zeron)
|
||||
{
|
||||
logr = add.logr;
|
||||
return *this;
|
||||
}
|
||||
int a = add.logr - logr;
|
||||
if (a > 0)
|
||||
{
|
||||
a = -a;
|
||||
logr = add.logr;
|
||||
}
|
||||
if (a < tblbnd)
|
||||
return *this;
|
||||
logr += addtbl[a-tblbnd];
|
||||
return *this;
|
||||
}
|
||||
|
||||
LogProb &operator-=(const LogProb &); // logr2 = logb ( b**logr2 + b**logr1 )
|
||||
LogProb operator*(const LogProb &mul) const // logr3 = logr2 + logr1
|
||||
{
|
||||
LogProb result; // start out with result == 0
|
||||
if ((logr != zeron) && (mul.logr != zeron))
|
||||
result.logr = std::max(logr+mul.logr, zeron);
|
||||
return result;
|
||||
}
|
||||
LogProb operator*(double x) const // logr3 = logr2 + logr1
|
||||
{
|
||||
return (*this)*(LogProb)x;
|
||||
}
|
||||
LogProb operator^(const int i) const // logr2 = logr1 * i
|
||||
{
|
||||
LogProb result; // start out with result == 0
|
||||
// if ((logr != zeron) && (mul.logr != zeron))
|
||||
result.logr = logr * i ;
|
||||
return result;
|
||||
}
|
||||
LogProb &operator*=(const LogProb &mul) // logr2 += logr1
|
||||
{
|
||||
if ((logr == zeron) || (mul.logr == zeron))
|
||||
logr = zeron;
|
||||
else
|
||||
logr = std::max(logr+mul.logr, zeron);
|
||||
return *this;
|
||||
}
|
||||
LogProb operator/(const LogProb &div) const // logr3 = logr2 -logr1
|
||||
{
|
||||
LogProb result;
|
||||
if (logr != zeron)
|
||||
result.logr = std::max(logr - div.logr, zeron);
|
||||
return result;
|
||||
}
|
||||
LogProb &operator/=(const LogProb &div) // logr2 -= logr1
|
||||
{
|
||||
if (logr != zeron)
|
||||
logr = std::max(logr - div.logr, zeron);
|
||||
return *this;
|
||||
}
|
||||
LogProb operator+(const LogProb &l) const // logr3 = logb ( b**logr2 + b**logr1 )
|
||||
{ LogProb result(*this); result += l; return result; }
|
||||
LogProb operator-(const LogProb &l) const // logr3 = logb ( b**logr2 - b**logr1 )
|
||||
{ LogProb result(*this); result -= l; return result; }
|
||||
LogProb power(const int n) const // logr2 = logr1 * int
|
||||
{ LogProb result(*this); result.logr *= n; return result; }
|
||||
|
||||
// Conditional operators
|
||||
int operator<(const LogProb &obj) const { return logr < obj.logr; }
|
||||
int operator<=(const LogProb &obj) const { return logr <= obj.logr; }
|
||||
int operator>(const LogProb &obj) const { return logr > obj.logr; }
|
||||
int operator>=(const LogProb &obj) const { return logr >= obj.logr; }
|
||||
int operator==(const LogProb &obj) const { return logr == obj.logr; }
|
||||
int operator!=(const LogProb &obj) const { return logr != obj.logr; }
|
||||
int operator<(double d) const { return ((double)*this) < d; }
|
||||
int operator<=(double d) const { return ((double)*this) <= d; }
|
||||
int operator>(double d) const { return ((double)*this) > d; }
|
||||
int operator>=(double d) const { return ((double)*this) >= d; }
|
||||
int operator==(double d) const { return ((double)*this) == d; }
|
||||
int operator!=(double d) const { return ((double)*this) != d; }
|
||||
|
||||
|
||||
LogProb &SetZero() { logr = zeron; return *this; } // representation of 0,
|
||||
LogProb &SetOne() { logr = onen; return *this; } // 1, and
|
||||
LogProb &SetInf() { logr = infn; return *this; } // inf in logarithm domain
|
||||
|
||||
private:
|
||||
int logr; // a representation of logarithm
|
||||
// static constants
|
||||
static const int initialized; // initialization flag
|
||||
static const double b;
|
||||
static const double logb2;
|
||||
static const int nmin, nmax;
|
||||
static const int tblbnd;
|
||||
static const int zeron, onen, infn; // zero, one, and inf in log domain
|
||||
static const int max_2byte_integer, min_2byte_integer;
|
||||
|
||||
// Arithmetic computation Tables
|
||||
static double *ntof;
|
||||
static int *addtbl;
|
||||
static int *subtbl;
|
||||
|
||||
static int Initialize();
|
||||
|
||||
public:
|
||||
static void FreeTables();
|
||||
// constants for initializing LogProbs to 0 or 1
|
||||
static const LogProb zero;
|
||||
static const LogProb one;
|
||||
static const LogProb minus2;
|
||||
static const LogProb minus4;
|
||||
static const LogProb minus6;
|
||||
static const LogProb minus8;
|
||||
static const LogProb minus10;
|
||||
static const LogProb minus12;
|
||||
static const LogProb minus14;
|
||||
static const LogProb minus16;
|
||||
};
|
||||
|
||||
// iostream friend operators
|
||||
inline std::ostream &operator<<(std::ostream& os, const LogProb &obj)
|
||||
{
|
||||
return os << (double) obj; // output in linear domain, b**logr
|
||||
}
|
||||
|
||||
inline std::istream &operator>>(std::istream& is, LogProb &obj)
|
||||
{
|
||||
double d;
|
||||
is >> d;
|
||||
obj = d;
|
||||
return is;
|
||||
}
|
||||
|
||||
inline std::ostream &operator<<=(std::ostream& os, const LogProb &obj) // write binary
|
||||
{
|
||||
os.write((const char *)&obj.logr, sizeof(obj.logr));
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::istream &operator>>=(std::istream& is, LogProb &obj)
|
||||
{
|
||||
is.read((char *)&obj.logr, sizeof(obj.logr));
|
||||
return is;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,370 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef ARRAY_H_DEFINED
|
||||
#define ARRAY_H_DEFINED
|
||||
using namespace std;
|
||||
#include "myassert.h"
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <functional>
|
||||
#include "my.h"
|
||||
|
||||
#define ARRAY_DEBUG
|
||||
|
||||
|
||||
template<class T> class Array
|
||||
{
|
||||
private:
|
||||
T *p;
|
||||
int realSize;
|
||||
int maxWritten;
|
||||
char a;
|
||||
|
||||
void copy(T *a,const T *b,int n);
|
||||
void copy(T *a,T *b,int n);
|
||||
void _expand();
|
||||
|
||||
public:
|
||||
Array()
|
||||
: p(0),realSize(0),maxWritten(-1) ,a(1)
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "MAKE ARRAY: " << this<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
}
|
||||
Array(const Array<T> &x)
|
||||
: p(new T[x.maxWritten+1]),realSize(x.maxWritten+1),maxWritten(x.maxWritten),a(x.a)
|
||||
{
|
||||
copy(p,x.p,realSize);
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< endl;
|
||||
#endif
|
||||
}
|
||||
explicit Array(int n)
|
||||
: p(new T[n]),realSize(n),maxWritten(n-1),a(0)
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
}
|
||||
Array(int n,const T&_init,int _a=0)
|
||||
: p(new T[n]),realSize(n),maxWritten(n-1),a(_a)
|
||||
{
|
||||
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
~Array()
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
delete [] p;
|
||||
}
|
||||
|
||||
Array<T>& operator=(const Array<T>&x)
|
||||
{
|
||||
if( this!= &x )
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
|
||||
delete [] p;
|
||||
realSize = x.maxWritten+1;
|
||||
maxWritten = x.maxWritten;
|
||||
a = x.a;
|
||||
p = new T[realSize];
|
||||
copy(p,x.p,realSize);
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Array<T>& operator=(Array<T>&x)
|
||||
{
|
||||
if( this!= &x )
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
delete [] p;
|
||||
realSize = x.maxWritten+1;
|
||||
maxWritten = x.maxWritten;
|
||||
a = x.a;
|
||||
p = new T[realSize];
|
||||
copy(p,x.p,realSize);
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void allowAccess(int n)
|
||||
{
|
||||
while( realSize<=n )
|
||||
_expand();
|
||||
maxWritten=max(maxWritten,n);
|
||||
massert( maxWritten<realSize );
|
||||
}
|
||||
void resize(int n)
|
||||
{
|
||||
while( realSize<n )
|
||||
_expand();
|
||||
maxWritten=n-1;
|
||||
}
|
||||
void sort(int until=-1)
|
||||
{
|
||||
if( until== -1 ) until=size();
|
||||
std::sort(p,p+until);
|
||||
}
|
||||
void invsort(int until=-1)
|
||||
{
|
||||
if( until== -1 ) until=size();
|
||||
std::sort(p,p+until,greater<T>());
|
||||
}
|
||||
void init(int n,const T&_init,bool _a=0)
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
delete []p;
|
||||
p=new T[n];
|
||||
realSize=n;
|
||||
a=_a;
|
||||
maxWritten=n-1;
|
||||
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
}
|
||||
inline int size() const
|
||||
{massert( maxWritten<realSize );
|
||||
return maxWritten+1;}
|
||||
inline int low() const
|
||||
{ return 0; }
|
||||
inline int high() const
|
||||
{ return maxWritten; }
|
||||
inline bool autoexpand() const
|
||||
{return a;}
|
||||
inline void autoexpand(bool autoExp)
|
||||
{a=autoExp;}
|
||||
int findMax() const;
|
||||
int findMin() const;
|
||||
const void errorAccess(int n) const;
|
||||
inline T*getPointerToData(){return p;}
|
||||
|
||||
inline T& operator[](int n)
|
||||
{
|
||||
if( a && n==maxWritten+1 )
|
||||
allowAccess(n);
|
||||
if( n<0 || n>maxWritten )
|
||||
errorAccess(n);
|
||||
return p[n];
|
||||
}
|
||||
inline const T& operator[](int n) const
|
||||
{
|
||||
if(n<0 || n>maxWritten )
|
||||
errorAccess(n);
|
||||
return p[n];
|
||||
}
|
||||
const T&top(int n=0) const
|
||||
{return (*this)[maxWritten-n];}
|
||||
T&top(int n=0)
|
||||
{return (*this)[maxWritten-n];}
|
||||
T&push(const T&x)
|
||||
{
|
||||
(*this)[maxWritten+1]=x;
|
||||
return top();
|
||||
}
|
||||
bool writeTo(ostream&out) const
|
||||
{
|
||||
out << "Array ";
|
||||
out << size() << " ";
|
||||
out << a << endl;
|
||||
for(int iv=0;iv<=maxWritten;iv++)
|
||||
{
|
||||
writeOb(out,(*this)[iv]);
|
||||
out << endl;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
bool readFrom(istream&in)
|
||||
{
|
||||
string s;
|
||||
if( !in )
|
||||
{
|
||||
cerr << "ERROR(Array): file cannot be opened.\n";
|
||||
return 0;
|
||||
}
|
||||
in >> s;
|
||||
if( !(s=="Array") )
|
||||
{
|
||||
cerr << "ERROR(Array): Array!='"<<s<<"'\n";
|
||||
return 0;
|
||||
}
|
||||
int biggest;
|
||||
in >> biggest;
|
||||
in >> a;
|
||||
resize(biggest);
|
||||
for(int iv=0;iv<size();iv++)
|
||||
{
|
||||
readOb(in,(*this)[iv]);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
template<class T> bool operator==(const Array<T> &x, const Array<T> &y)
|
||||
{
|
||||
if( &x == &y )
|
||||
return 1;
|
||||
else
|
||||
{
|
||||
if( y.size()!=x.size() )
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
for(int iii=0;iii<x.size();iii++)
|
||||
if( !(x[iii]==y[iii]) )
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class T> bool operator<(const Array<T> &x, const Array<T> &y)
|
||||
{
|
||||
if( &x == &y )
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
if( y.size()<x.size() )
|
||||
return !(y<x);
|
||||
for(int iii=0;iii<x.size();iii++)
|
||||
{
|
||||
massert( iii!=y.size() );
|
||||
if( x[iii]<y[iii] )
|
||||
return 1;
|
||||
else if( y[iii]<x[iii] )
|
||||
return 0;
|
||||
}
|
||||
return x.size()!=y.size();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class T> const void Array<T>:: errorAccess(int n) const
|
||||
{
|
||||
cerr << "ERROR: Access to array element " << n
|
||||
<< " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
|
||||
cout << "ERROR: Access to array element " << n
|
||||
<< " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
|
||||
massert(0);
|
||||
#ifndef DEBUG
|
||||
abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class T> ostream& operator<<(ostream&o,const Array<T>&a)
|
||||
{
|
||||
o << "Array(" << a.size() << "," << a.autoexpand() << "){ ";
|
||||
for(int iii=0;iii<a.size();iii++)
|
||||
o << " " << iii<< ":" << a[iii]<<";";
|
||||
return o << "}\n";
|
||||
}
|
||||
|
||||
template<class T> istream& operator>>(istream&in, Array<T>&)
|
||||
{return in;}
|
||||
|
||||
template<class T> int Hash(const Array<T>&a)
|
||||
{
|
||||
int n=0;
|
||||
for(int iii=0;iii<a.size();iii++)
|
||||
n+=Hash(a[iii])*(iii+1);
|
||||
return n+a.size()*47;
|
||||
}
|
||||
template<class T> void Array<T>::copy(T *aa,const T *bb,int n)
|
||||
{
|
||||
for(int iii=0;iii<n;iii++)
|
||||
aa[iii]=bb[iii];
|
||||
}
|
||||
template<class T> void Array<T>::copy(T *aa,T *bb,int n)
|
||||
{
|
||||
for(int iii=0;iii<n;iii++)
|
||||
aa[iii]=bb[iii];
|
||||
}
|
||||
|
||||
template<class T> void Array<T>::_expand()
|
||||
{
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
T *oldp=p;
|
||||
int oldsize=realSize;
|
||||
realSize=realSize*2+1;
|
||||
p=new T[realSize];
|
||||
copy(p,oldp,oldsize);
|
||||
delete [] oldp;
|
||||
#ifdef VERY_ARRAY_DEBUG
|
||||
cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class T> int Array<T>::findMax() const
|
||||
{
|
||||
if( size()==0 )
|
||||
return -1;
|
||||
else
|
||||
{
|
||||
int maxPos=0;
|
||||
for(int iii=1;iii<size();iii++)
|
||||
if( (*this)[maxPos]<(*this)[iii] )
|
||||
maxPos=iii;
|
||||
return maxPos;
|
||||
}
|
||||
}
|
||||
template<class T> int Array<T>::findMin() const
|
||||
{
|
||||
if( size()==0 )
|
||||
return -1;
|
||||
else
|
||||
{
|
||||
int minPos=0;
|
||||
for(int iii=1;iii<size();iii++)
|
||||
if( (*this)[iii]<(*this)[minPos] )
|
||||
minPos=iii;
|
||||
return minPos;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,287 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef FIXARRAY_H_DEFINED
|
||||
#define FIXARRAY_H_DEFINED
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
|
||||
template<class T>
|
||||
bool writeOb(ostream&out,const T&f)
|
||||
{
|
||||
out << f << " ";
|
||||
return 1;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
bool readOb(istream&in,T&f)
|
||||
{
|
||||
in >> f;
|
||||
char c;
|
||||
in.get(c);
|
||||
massert(c==' ');
|
||||
return 1;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
bool writeOb(ostream&out,const string &s,const T&f)
|
||||
{
|
||||
out << s << " " << f << " ";
|
||||
return 1;
|
||||
}
|
||||
template<class T>
|
||||
bool readOb(istream&in,const string&s,T&f)
|
||||
{
|
||||
string ss;
|
||||
in >> ss;
|
||||
if( s!=ss )
|
||||
{
|
||||
cerr << "ERROR: readOb should be '" << s << "' and is '" << ss << "'" << endl;
|
||||
return 0;
|
||||
}
|
||||
in >> f;
|
||||
char c;
|
||||
in.get(c);
|
||||
massert(c==' ');
|
||||
return 1;
|
||||
}
|
||||
|
||||
template<class T> class FixedArray
|
||||
{
|
||||
private:
|
||||
void copy(T *aa,const T *bb,int nnn)
|
||||
{for(int iii=0;iii<nnn;iii++)aa[iii]=bb[iii];}
|
||||
|
||||
public:
|
||||
T *p;
|
||||
int realSize;
|
||||
FixedArray()
|
||||
: p(0),realSize(0){}
|
||||
FixedArray(const FixedArray<T> &x)
|
||||
: p(new T[x.realSize]),realSize(x.realSize) {copy(p,x.p,realSize);}
|
||||
explicit FixedArray(int n)
|
||||
: p(new T[n]),realSize(n){}
|
||||
FixedArray(int n,const T&_init)
|
||||
: p(new T[n]),realSize(n){for(int z=0;z<n;z++)p[z]=_init;}
|
||||
FixedArray(const FixedArray&f,const T&t)
|
||||
: p(new T[f.size()+1]),realSize(f.size()+1){for(int z=0;z<f.size();z++)p[z]=f[z];p[f.size()]=t;}
|
||||
~FixedArray()
|
||||
{ delete [] p;p=0;realSize=-1;}
|
||||
|
||||
FixedArray<T>& operator=(const FixedArray<T>&x)
|
||||
{
|
||||
if( this!= &x )
|
||||
{
|
||||
delete [] p;
|
||||
realSize = x.realSize;
|
||||
p = new T[x.realSize];
|
||||
copy(p,x.p,realSize);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
void resize(int n)
|
||||
{
|
||||
if( n<=realSize )
|
||||
shrink(n);
|
||||
else
|
||||
{
|
||||
T*np=new T[n];
|
||||
copy(np,p,realSize);
|
||||
delete []p;
|
||||
p=np;
|
||||
realSize=n;
|
||||
}
|
||||
}
|
||||
void shrink(int n)
|
||||
{
|
||||
assert(n<=realSize);
|
||||
realSize=n;
|
||||
}
|
||||
void init(int n,const T&_init)
|
||||
{
|
||||
delete []p;
|
||||
p=new T[n];
|
||||
realSize=n;
|
||||
for(int l=0;l<n;l++)p[l]=_init;
|
||||
}
|
||||
inline const T&top(int n=0) const
|
||||
{return (*this)[realSize-1-n];}
|
||||
inline int size() const
|
||||
{return realSize;}
|
||||
|
||||
inline T*begin(){ return p; }
|
||||
inline T*end(){ return p+realSize; }
|
||||
|
||||
inline const T*begin()const{ return p; }
|
||||
inline const T*end()const{return p+realSize;}
|
||||
|
||||
inline int low() const
|
||||
{return 0;}
|
||||
inline int high() const
|
||||
{return realSize-1;}
|
||||
const void errorAccess(int n) const;
|
||||
|
||||
inline T& operator[](int n)
|
||||
{
|
||||
return p[n];
|
||||
}
|
||||
inline const T& operator[](int n) const
|
||||
{
|
||||
return p[n];
|
||||
}
|
||||
bool writeTo(ostream&out) const
|
||||
{
|
||||
out << "FixedArray ";
|
||||
out << size() << " ";
|
||||
for(int a=0;a<size();a++)
|
||||
{
|
||||
writeOb(out,(*this)[a]);
|
||||
out << " ";
|
||||
}
|
||||
out << endl;
|
||||
return 1;
|
||||
}
|
||||
bool readFrom(istream&in)
|
||||
{
|
||||
string s;
|
||||
if( !in )
|
||||
{
|
||||
cerr << "ERROR(FixedArray): file cannot be opened.\n";
|
||||
return 0;
|
||||
}
|
||||
in >> s;
|
||||
if( !(s=="FixedArray") )
|
||||
{
|
||||
cerr << "ERROR(FixedArray): FixedArray!='"<<s<<"'\n";
|
||||
return 0;
|
||||
}
|
||||
int biggest;
|
||||
in >> biggest;
|
||||
resize(biggest);
|
||||
for(int a=0;a<size();a++)
|
||||
readOb(in,(*this)[a]);
|
||||
return 1;
|
||||
}
|
||||
void sort(int until=-1)
|
||||
{
|
||||
if( until== -1 ) until=size();
|
||||
std::sort(p,p+until);
|
||||
}
|
||||
void invsort(int until=-1)
|
||||
{
|
||||
if( until== -1 ) until=size();
|
||||
std::sort(p,p+until,greater<T>());
|
||||
}
|
||||
int binary_locate(const T&t)
|
||||
{
|
||||
T*ppos=std::lower_bound(p,p+size(),t);
|
||||
int pos=ppos-p;
|
||||
if( pos>=-1&&pos<size() )
|
||||
return pos;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
int binary_search(const T&t)
|
||||
{
|
||||
T*ppos=std::lower_bound(p,p+size(),t);
|
||||
int pos=ppos-p;
|
||||
if( pos>=0&&pos<size()&& *ppos==t )
|
||||
return pos;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
typedef T* iterator;
|
||||
typedef const T* const_iterator;
|
||||
};
|
||||
|
||||
template<class T> bool operator<(const FixedArray<T> &x, const FixedArray<T> &y)
|
||||
{
|
||||
return lexicographical_compare(x.begin(),x.end(),y.begin(),y.end());
|
||||
|
||||
}
|
||||
|
||||
|
||||
template<class T> bool operator==(const FixedArray<T> &x, const FixedArray<T> &y)
|
||||
{
|
||||
if( &x == &y )return 1;
|
||||
const int s = x.size();
|
||||
if( s !=y.size() )return 0;
|
||||
for(int iii=0;iii<s;iii++)
|
||||
if( !(x.p[iii]==y.p[iii]) )
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
template<class T> int Hash(const FixedArray<T>&a)
|
||||
{
|
||||
int n=0;
|
||||
const int s=a.size();
|
||||
for(int iii=0;iii<s;iii++)
|
||||
n=13*n+Hash(a.p[iii]);
|
||||
return n;
|
||||
}
|
||||
|
||||
template<class T> const void FixedArray<T>:: errorAccess(int n) const
|
||||
{
|
||||
massert(0);
|
||||
cerr << "ERROR: Access to array element " << n
|
||||
<< " (" << realSize << "," << (void*)p << ")\n";
|
||||
}
|
||||
|
||||
template<class T> ostream& operator<<(ostream&o,const FixedArray<T>&a)
|
||||
{
|
||||
o << "FixedArray(" << a.size() << "){ ";
|
||||
for(int iii=0;iii<a.size();iii++)
|
||||
o << " " << iii<< ":" << a[iii]<<";";
|
||||
return o << "}\n";
|
||||
}
|
||||
|
||||
template<class T> istream& operator>>(istream&in, FixedArray<T>&)
|
||||
{ return in;}
|
||||
|
||||
template<class T> FixedArray<T> operator+(const FixedArray<T>&a,const FixedArray<T>&b)
|
||||
{
|
||||
massert(a.size()==b.size());
|
||||
FixedArray<T> x(a.size());
|
||||
for(int iii=0;iii<a.size();iii++)
|
||||
x[iii]=a[iii]+b[iii];
|
||||
return x;
|
||||
}
|
||||
template<class T> FixedArray<T> operator|(const FixedArray<T>&aaa,const FixedArray<T>&bbb)
|
||||
{
|
||||
iassert(aaa.size()==bbb.size());
|
||||
|
||||
FixedArray<T> xxx(aaa.size());
|
||||
for(int iii=0;iii<aaa.size();iii++)
|
||||
xxx.p[iii]=aaa.p[iii]||bbb.p[iii];
|
||||
return xxx;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -0,0 +1,48 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef CLASS_FlexArray_defined
|
||||
#define CLASS_FlexArray_defined
|
||||
#include "FixedArray.h"
|
||||
|
||||
template<class T>
|
||||
class FlexArray
|
||||
{
|
||||
private:
|
||||
FixedArray<T> p;
|
||||
int start,end;
|
||||
public:
|
||||
FlexArray(int _start=0,int _end=-1)
|
||||
: p(_end-_start+1),start(_start),end(_end) {}
|
||||
T&operator[](int i)
|
||||
{return p[i-start];}
|
||||
const T&operator[](int i)const
|
||||
{returnp[i-start];}
|
||||
int low()const{return start;}
|
||||
int high()const{return end;}
|
||||
};
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,159 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
#include "GDAOptimization.h"
|
||||
#include "ProblemTest.h"
|
||||
#include <cmath>
|
||||
|
||||
#define GDAOptimization GDAOptimization
|
||||
#define IterOptimization IterOptimization
|
||||
|
||||
|
||||
|
||||
double GDAOptimization::defaultTemperatur=1e100;
|
||||
|
||||
|
||||
double GDAOptimization::defaultAlpha=0.001;
|
||||
|
||||
|
||||
|
||||
GDAOptimization::GDAOptimization(Problem &p,int m)
|
||||
: IterOptimization(p,m) ,temperatur(defaultTemperatur),alpha(defaultAlpha)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
GDAOptimization::GDAOptimization(Problem &p,double t,double a,int m)
|
||||
: IterOptimization(p,m) ,temperatur(t) ,alpha(a)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
GDAOptimization::GDAOptimization(GDAOptimization &o)
|
||||
: IterOptimization(o)
|
||||
{
|
||||
temperatur = o.temperatur;
|
||||
alpha = o.alpha;
|
||||
gdaEndFlag = o.gdaEndFlag;
|
||||
}
|
||||
|
||||
|
||||
void GDAOptimization::zInitialize()
|
||||
{
|
||||
IterOptimization::zInitialize();
|
||||
if(temperatur==1e100)
|
||||
{
|
||||
double v=problem.value();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
temperatur=v;
|
||||
}
|
||||
assert(alpha>=0);
|
||||
}
|
||||
|
||||
short GDAOptimization::accept(double delta)
|
||||
{
|
||||
if( curValue + delta < temperatur )
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void GDAOptimization::abkuehlen()
|
||||
{
|
||||
double newTemperatur = temperatur - alpha*(temperatur - curValue);
|
||||
if( fabs(temperatur - newTemperatur)<1e-30 )
|
||||
gdaEndFlag=1;
|
||||
else
|
||||
gdaEndFlag=0;
|
||||
temperatur = newTemperatur;
|
||||
}
|
||||
|
||||
short GDAOptimization::end()
|
||||
{
|
||||
return ( endFlag>0 ) && ( gdaEndFlag );
|
||||
}
|
||||
|
||||
void GDAOptimization::makeGraphOutput()
|
||||
{
|
||||
IterOptimization::makeGraphOutput();
|
||||
*GraphOutput << temperatur-curValue;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
double GDAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
|
||||
int optimierungsschritte,int print)
|
||||
{
|
||||
if(typ!=1)
|
||||
{
|
||||
cerr << "Error: wrong parameter-type in GDAOptimization::optimizeValue ("
|
||||
<< typ << ")\n";
|
||||
exit(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
double bestPar=-1,best=1e100;
|
||||
double now;
|
||||
if( print )
|
||||
cout << "#GDA-optimizeValues: " << numParameter<<endl;
|
||||
|
||||
|
||||
defaultTemperatur=1e100;
|
||||
|
||||
for(int i=0;i<=numParameter;i++)
|
||||
{
|
||||
StatVar end,laufzeit,init;
|
||||
defaultAlpha = pow(pow(200,1.0/numParameter),i)*0.002;
|
||||
solveProblem(0,p,proParameter,optimierungsschritte,GDA_OPT,now,end,
|
||||
laufzeit,init);
|
||||
if( best>now )
|
||||
{
|
||||
best=now;
|
||||
bestPar=defaultAlpha;
|
||||
}
|
||||
if( print )
|
||||
{
|
||||
cout << defaultAlpha <<" ";
|
||||
cout << end.getMean() << " " << end.quantil(0.2) << " "
|
||||
<< end.quantil(0.79) << " " << laufzeit.getMean() << " "
|
||||
<< end.quantil(0.0) << " " << end.getSigma() << " "
|
||||
<< end.getSigmaSmaller()<< " "<< end.getSigmaBigger()<< endl;
|
||||
}
|
||||
}
|
||||
if( print )
|
||||
cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit"
|
||||
" Bester Sigma SigmaSmaller SigmaBigger\n";
|
||||
defaultAlpha=0.03;
|
||||
return bestPar;
|
||||
}
|
||||
return 1e100;
|
||||
}
|
||||
|
@ -0,0 +1,80 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef GDAOPTIMIZATION
|
||||
#define GDAOPTIMIZATION
|
||||
#include "IterOptimization.h"
|
||||
|
||||
class GDAOptimization : public IterOptimization
|
||||
{
|
||||
|
||||
private:
|
||||
double temperatur;
|
||||
double alpha;
|
||||
short gdaEndFlag;
|
||||
|
||||
|
||||
protected:
|
||||
virtual void zInitialize();
|
||||
|
||||
|
||||
virtual short accept(double delta);
|
||||
|
||||
|
||||
virtual void abkuehlen();
|
||||
|
||||
|
||||
virtual short end();
|
||||
|
||||
|
||||
virtual void makeGraphOutput();
|
||||
|
||||
|
||||
public:
|
||||
GDAOptimization(Problem &p,double temperatur,double alpha,
|
||||
int maxIter=-1);
|
||||
|
||||
|
||||
GDAOptimization(Problem &p,int maxIter=-1);
|
||||
|
||||
|
||||
GDAOptimization(GDAOptimization &o);
|
||||
|
||||
|
||||
static double optimizeValue(Problem &p,int proParameter,
|
||||
int numParameter,int typ,int schritte= -1,int verbose=1);
|
||||
|
||||
|
||||
|
||||
static double defaultTemperatur;
|
||||
static double defaultAlpha;
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -0,0 +1,57 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
#include "HCOptimization.h"
|
||||
|
||||
HCOptimization::HCOptimization(Problem &p,int m)
|
||||
: IterOptimization(p,m)
|
||||
{
|
||||
if( maxStep<=0 )
|
||||
maxStep=(int)(problem.expectedNumberOfIterations());
|
||||
}
|
||||
HCOptimization::HCOptimization(HCOptimization &o)
|
||||
: IterOptimization(o)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
short HCOptimization::accept(double delta)
|
||||
{
|
||||
if( delta < 0 )
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
short HCOptimization::end()
|
||||
{
|
||||
return endFlag>0;
|
||||
}
|
||||
void HCOptimization::abkuehlen()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
@ -0,0 +1,54 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef HCOPTIMIZATION
|
||||
#define HCOPTIMIZATION
|
||||
#include "IterOptimization.h"
|
||||
|
||||
class HCOptimization : public IterOptimization
|
||||
{
|
||||
|
||||
protected:
|
||||
virtual short accept(double delta);
|
||||
|
||||
|
||||
virtual void abkuehlen();
|
||||
|
||||
|
||||
virtual short end();
|
||||
|
||||
|
||||
public:
|
||||
HCOptimization(Problem &p,int maxIter=-1);
|
||||
|
||||
|
||||
HCOptimization(HCOptimization &o);
|
||||
|
||||
|
||||
};
|
||||
#endif
|
@ -0,0 +1,199 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "IterOptimization.h"
|
||||
#include "ProblemTest.h"
|
||||
|
||||
ostream *GraphOutput;
|
||||
|
||||
|
||||
|
||||
IterOptimization::IterOptimization(Problem& p,int m)
|
||||
: maxNonBetterIterations(0),problem(p),maxStep(m),initialisiert(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
IterOptimization::IterOptimization(IterOptimization& o) : Optimization(),problem(o.problem)
|
||||
{
|
||||
maxNonBetterIterations=o.maxNonBetterIterations;
|
||||
curValue = o.curValue;
|
||||
bestStep = o.bestStep;
|
||||
bestValue = o.bestValue;
|
||||
maxStep = o.maxStep;
|
||||
initialisiert = o.initialisiert;
|
||||
endFlag = o.endFlag;
|
||||
endFlag2 = o.endFlag2;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double IterOptimization::minimize(int steps)
|
||||
{
|
||||
if( !initialisiert )
|
||||
zInitialize();
|
||||
|
||||
if( steps==0 )
|
||||
return curValue;
|
||||
|
||||
int t=0;
|
||||
int every=(steps<0)?10000:(steps/1000+1);
|
||||
|
||||
do
|
||||
{
|
||||
curStep++;
|
||||
t++;
|
||||
if(verboseMode&&(curStep%1000==0))
|
||||
{
|
||||
if(steps>0)
|
||||
cout << "Processed: " << 100.0*(curStep/(double)max(maxStep,1)) << " percent. (IterOptimization run) "
|
||||
<< curValue << " max:" << maxStep << " " << steps << " \r";
|
||||
else
|
||||
cout << "In step:" << curStep << " currentValue: " << curValue
|
||||
<< " bestValue: " << bestValue-curValue << " " << curStep-bestStep << ". \r";
|
||||
cout.flush();
|
||||
}
|
||||
|
||||
|
||||
ProblemChange *change= &(problem.change());
|
||||
|
||||
|
||||
double delta=problem.valueChange(*change);
|
||||
|
||||
|
||||
abkuehlen();
|
||||
|
||||
|
||||
if( accept(delta) )
|
||||
{
|
||||
|
||||
problem.doChange(*change);
|
||||
|
||||
|
||||
curValue+=delta;
|
||||
|
||||
|
||||
if( curValue<bestValue-1e-10 )
|
||||
{
|
||||
bestValue=curValue;
|
||||
bestStep=curStep;
|
||||
endFlag2=endFlag=0;
|
||||
}
|
||||
|
||||
if( verboseMode>1 )
|
||||
cout<<"in step: "<<curStep<<" accepted with : "<<delta<<endl;
|
||||
}
|
||||
|
||||
if(curStep - bestStep>maxNonBetterIterations && maxNonBetterIterations>0)
|
||||
endFlag=1;
|
||||
if(curStep - bestStep>2*maxNonBetterIterations && maxNonBetterIterations>0)
|
||||
endFlag2=1;
|
||||
|
||||
|
||||
|
||||
if( GraphOutput&&((curStep%every)==0) )
|
||||
{
|
||||
makeGraphOutput();
|
||||
*GraphOutput<<" "<<delta<<endl;
|
||||
}
|
||||
|
||||
delete change;
|
||||
} while( t!=steps && (!end()) && (!problem.endCriterion()) );
|
||||
|
||||
if( GraphOutput)
|
||||
{
|
||||
makeGraphOutput();
|
||||
*GraphOutput<<endl;
|
||||
}
|
||||
return curValue;
|
||||
}
|
||||
|
||||
|
||||
void IterOptimization::zInitialize()
|
||||
{
|
||||
initialisiert=1;
|
||||
bestValue=curValue=problem.value();
|
||||
maxNonBetterIterations=problem.maxNonBetterIterations();
|
||||
bestStep=curStep=0;
|
||||
endFlag2=endFlag=0;
|
||||
}
|
||||
|
||||
|
||||
void IterOptimization::makeGraphOutput()
|
||||
{
|
||||
|
||||
*GraphOutput << curStep << " " <<curValue << " ";
|
||||
}
|
||||
|
||||
|
||||
double IterOptimizationOptimizeParameter(Problem &p,
|
||||
double ¶meter,double min,double max,
|
||||
int nRun,int nPar,int verfahren,
|
||||
double &bv)
|
||||
{
|
||||
if( nPar<=0 )
|
||||
return (max+min)/2;
|
||||
|
||||
StatVar end1,time1,init1;
|
||||
StatVar end2,time2,init2;
|
||||
double mean1,mean2;
|
||||
double par1,par2;
|
||||
|
||||
parameter = par1 = min + (max-min)/3;
|
||||
solveProblem(0,p,nRun,-1,verfahren,mean1,end1,time1,init1);
|
||||
cout << parameter << " " << mean1 << " " << end1.quantil(0.0) << " " << end1.quantil(1.0) << endl;
|
||||
|
||||
parameter = par2 = min + 2*(max-min)/3;
|
||||
solveProblem(0,p,nRun,-1,verfahren,mean2,end2,time2,init2);
|
||||
cout << parameter << " " << mean2 << " " << end2.quantil(0.0) << " " << end2.quantil(1.0) << endl;
|
||||
|
||||
double bestPar,bestVal;
|
||||
if(mean1<mean2)
|
||||
{
|
||||
bestVal = mean1;
|
||||
bestPar=IterOptimizationOptimizeParameter(p,parameter,min,min+2*(max-min)/3,nRun,nPar-2,verfahren,bestVal);
|
||||
}
|
||||
else
|
||||
{
|
||||
bestVal = mean2;
|
||||
bestPar=IterOptimizationOptimizeParameter(p,parameter,min+(max-min)/3,max,nRun,nPar-2,verfahren,bestVal);
|
||||
}
|
||||
if( mean1<bestVal&&mean1<=mean2 )
|
||||
{
|
||||
bv = mean1;
|
||||
return par1;
|
||||
}
|
||||
else if(mean2<bestVal && mean2<=mean1)
|
||||
{
|
||||
bv = mean2;
|
||||
return par2;
|
||||
}
|
||||
else
|
||||
{
|
||||
bv = bestVal;
|
||||
return bestPar;
|
||||
}
|
||||
}
|
@ -0,0 +1,123 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef ITEROPTIMIZATION
|
||||
#define ITEROPTIMIZATION
|
||||
|
||||
#include "Optimization.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define ANZ_VERSCHLECHTERUNGEN 500
|
||||
|
||||
extern ostream *GraphOutput;
|
||||
|
||||
|
||||
class IterOptimization : public Optimization
|
||||
{
|
||||
|
||||
|
||||
private:
|
||||
int maxNonBetterIterations;
|
||||
|
||||
|
||||
protected:
|
||||
Problem &problem;
|
||||
int curStep;
|
||||
double curValue;
|
||||
int bestStep;
|
||||
double bestValue;
|
||||
int maxStep;
|
||||
int initialisiert;
|
||||
short endFlag;
|
||||
short endFlag2;
|
||||
|
||||
|
||||
|
||||
|
||||
virtual void makeGraphOutput();
|
||||
|
||||
|
||||
virtual short end()=0;
|
||||
|
||||
|
||||
virtual void abkuehlen()=0;
|
||||
|
||||
|
||||
virtual short accept(double delta)=0;
|
||||
|
||||
|
||||
virtual void zInitialize();
|
||||
|
||||
|
||||
public:
|
||||
IterOptimization(Problem &p,int maxIter=-1);
|
||||
|
||||
|
||||
IterOptimization(IterOptimization &o);
|
||||
|
||||
|
||||
virtual double minimize(int steps=-1);
|
||||
|
||||
|
||||
inline int getCurStep();
|
||||
|
||||
|
||||
inline double getCurrentValue();
|
||||
|
||||
|
||||
inline const Problem& getProblem();
|
||||
|
||||
|
||||
};
|
||||
|
||||
double IterOptimizationOptimizeParameter(Problem &p,
|
||||
double ¶meter,double min,double max,
|
||||
int nRun,int nPar,int verfahren,double &bv);
|
||||
|
||||
inline int IterOptimization::getCurStep()
|
||||
{
|
||||
return curStep;
|
||||
};
|
||||
inline double IterOptimization::getCurrentValue()
|
||||
{
|
||||
return curValue;
|
||||
};
|
||||
inline const Problem& IterOptimization::getProblem()
|
||||
{
|
||||
return problem;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,439 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef KATEG_OPT_H
|
||||
#define KATEG_OPT_H
|
||||
#include <string>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "Problem.h"
|
||||
|
||||
extern double rhoLo;
|
||||
|
||||
typedef int Kategory;
|
||||
typedef int Word;
|
||||
|
||||
|
||||
|
||||
#ifdef FREQTYPE_DOUBLE
|
||||
typedef double FreqType;
|
||||
#else
|
||||
typedef int FreqType;
|
||||
#endif
|
||||
|
||||
|
||||
#include "KategProblemWBC.h"
|
||||
|
||||
|
||||
#include "KategProblemKBC.h"
|
||||
|
||||
|
||||
enum {
|
||||
INIT_RAN=1,
|
||||
INIT_AIO=2,
|
||||
INIT_LWRW=3,
|
||||
INIT_FREQ=4,
|
||||
INIT_OTHER=5
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
W_RAN=(8|16),
|
||||
W_DET_DECR=(16),
|
||||
W_DET_INCR =(32)
|
||||
};
|
||||
#define CHOOSE_WORD (8|16|32)
|
||||
|
||||
|
||||
enum {
|
||||
K_DET=(64),
|
||||
K_RAN=(128),
|
||||
K_BEST=(64|128)
|
||||
};
|
||||
#define CHOOSE_KAT (64|128)
|
||||
|
||||
|
||||
enum {
|
||||
CRITERION_ML=0,
|
||||
CRITERION_LO=1,
|
||||
CRITERION_MY=2
|
||||
};
|
||||
|
||||
|
||||
|
||||
class NWG
|
||||
{
|
||||
private:
|
||||
Array<FreqType> freq;
|
||||
|
||||
Array<int> timeOfFreq;
|
||||
|
||||
|
||||
|
||||
|
||||
int curTime;
|
||||
public:
|
||||
NWG(int n);
|
||||
void init();
|
||||
|
||||
int anzNot0;
|
||||
|
||||
|
||||
Array<int> not0;
|
||||
|
||||
int word;
|
||||
|
||||
inline void addFreq(int C,FreqType n);
|
||||
|
||||
void sort();
|
||||
|
||||
FreqType getFreq(int i)
|
||||
{
|
||||
if( timeOfFreq[i]==curTime )
|
||||
return freq[i];
|
||||
else
|
||||
return 0;
|
||||
};
|
||||
};
|
||||
|
||||
inline void NWG::addFreq(int g,FreqType n)
|
||||
{
|
||||
if(timeOfFreq[g]==curTime)
|
||||
freq[g]+=n;
|
||||
else
|
||||
{
|
||||
timeOfFreq[g]=curTime;
|
||||
freq[g]=n;
|
||||
not0[anzNot0++]=g;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct KategProblemChange : public ProblemChange
|
||||
{
|
||||
void *operator new(size_t size);
|
||||
void operator delete(void *ptr,size_t size);
|
||||
|
||||
int word;
|
||||
int toKat;
|
||||
int fromKat;
|
||||
};
|
||||
|
||||
class KategProblem : public Problem
|
||||
{
|
||||
private:
|
||||
double kat_h_full(int n);
|
||||
double kat_h_full(double n);
|
||||
double kat_h_part(int n);
|
||||
double kat_h_part(double n);
|
||||
double sigmaVerfaelschung;
|
||||
short katWasEmpty;
|
||||
|
||||
|
||||
|
||||
int nwgWord;
|
||||
|
||||
NWG nwg;
|
||||
NWG ngw;
|
||||
FreqType nww;
|
||||
|
||||
int ursprung,ziel;
|
||||
|
||||
Array<int> _katOfWord;
|
||||
|
||||
int _maxComp,_maxCompVal;
|
||||
|
||||
double nmo_my(int i,int j);
|
||||
double nmo(int i,int j);
|
||||
|
||||
|
||||
double nmo_lo(int i,int j,int &e0,int &e1);
|
||||
|
||||
|
||||
void putWord(int word,int to);
|
||||
|
||||
|
||||
void fastPutWord(int word,int to);
|
||||
|
||||
|
||||
void setKatOfWord(int w,int k)
|
||||
{
|
||||
if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) )
|
||||
{
|
||||
cout << "mkcls::setKatOfWord::ERROR: " << w << " " << k << " " << wordFreq.fixedWord[w] << " " << (*words)[w] << endl;
|
||||
}
|
||||
_katOfWord[w]=k;
|
||||
nwgWord=-1;
|
||||
};
|
||||
|
||||
|
||||
void fillNWG(int w);
|
||||
|
||||
|
||||
inline FreqType nstrich(int i,int j);
|
||||
|
||||
|
||||
void vnstrich(int i,int j);
|
||||
|
||||
|
||||
|
||||
protected:
|
||||
virtual int _change(ProblemChange **p);
|
||||
|
||||
|
||||
virtual void _doChange(ProblemChange &c);
|
||||
|
||||
|
||||
virtual void _undoChange(ProblemChange &c);
|
||||
|
||||
|
||||
virtual double _value();
|
||||
|
||||
|
||||
double _valueChange(KategProblemChange &k);
|
||||
|
||||
|
||||
virtual void incrementDirection();
|
||||
|
||||
|
||||
virtual int maxDimensionVal(void) ;
|
||||
|
||||
|
||||
virtual int maxDimension(void) ;
|
||||
|
||||
|
||||
public:
|
||||
leda_array<string> *words;
|
||||
typedef leda_set<int> intSet;
|
||||
|
||||
leda_array<intSet> *kats;
|
||||
|
||||
KategProblemWBC wordFreq;
|
||||
KategProblemKBC katFreq;
|
||||
|
||||
Array<int> initLike;
|
||||
|
||||
KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
|
||||
int _nachbarschaft,int minw=0);
|
||||
|
||||
|
||||
virtual ~KategProblem();
|
||||
|
||||
|
||||
virtual void _initialize(int initTyp);
|
||||
virtual void _initialize(int initTyp,int specialFixedWord);
|
||||
|
||||
|
||||
virtual double valueChange(ProblemChange&c);
|
||||
|
||||
|
||||
virtual Problem *makeEqualProblem();
|
||||
|
||||
|
||||
virtual double nicevalue(double value=1e100);
|
||||
|
||||
|
||||
void makeKats();
|
||||
|
||||
|
||||
virtual void dumpOn(ostream &strm);
|
||||
|
||||
|
||||
virtual void dumpInfos(ostream &strm);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
inline void katwahl(int k);
|
||||
|
||||
|
||||
inline void wortwahl(int w);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
inline int katOfWord(int w);
|
||||
|
||||
|
||||
inline short wortwahl();
|
||||
|
||||
|
||||
inline short katwahl() ;
|
||||
|
||||
|
||||
virtual int maxNonBetterIterations();
|
||||
|
||||
|
||||
virtual int expectedNumberOfIterations();
|
||||
|
||||
|
||||
const char *getString(int i);
|
||||
string getTheString(int i);
|
||||
|
||||
|
||||
void makeTitle(char x[512]);
|
||||
|
||||
|
||||
void fixInitLike();
|
||||
|
||||
};
|
||||
|
||||
inline int KategProblem::katOfWord(int w){return _katOfWord[w];};
|
||||
inline short KategProblem::wortwahl(){return nachbarschaft&CHOOSE_WORD;};
|
||||
inline short KategProblem::katwahl() {return nachbarschaft&CHOOSE_KAT;};
|
||||
|
||||
inline void KategProblem::katwahl(int k)
|
||||
{
|
||||
nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k;
|
||||
if(k==K_BEST)
|
||||
_maxCompVal=1;
|
||||
else
|
||||
_maxCompVal=katFreq.nKats-2;
|
||||
};
|
||||
|
||||
inline void KategProblem::wortwahl(int w)
|
||||
{
|
||||
nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w;
|
||||
};
|
||||
|
||||
|
||||
|
||||
inline FreqType KategProblem::nstrich(int i,int j)
|
||||
{
|
||||
FreqType n=0;
|
||||
|
||||
if( i==ursprung )
|
||||
n-=nwg.getFreq(j);
|
||||
if( i==ziel )
|
||||
n+=nwg.getFreq(j);
|
||||
|
||||
if( j==ursprung )
|
||||
n-=ngw.getFreq(i);
|
||||
if( j==ziel )
|
||||
n+=ngw.getFreq(i);
|
||||
|
||||
if( i==ursprung && j==ursprung )
|
||||
n+=nww;
|
||||
if( i==ziel && j==ziel )
|
||||
n+=nww;
|
||||
|
||||
if( i==ursprung && j==ziel )
|
||||
n-=nww;
|
||||
if( i==ziel && j==ursprung )
|
||||
n-=nww;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define MAX_H_TABLE 4000
|
||||
extern double h_table[],l_table[],hmy_table[],hmy_sigma;
|
||||
|
||||
|
||||
inline double kat_mlog(double x)
|
||||
{
|
||||
if(x<=1e-9)
|
||||
return 0;
|
||||
else
|
||||
return log(x);
|
||||
}
|
||||
|
||||
|
||||
inline double kat_mlog(int s)
|
||||
{
|
||||
if(s<=0)
|
||||
return 0;
|
||||
else if( s<MAX_H_TABLE )
|
||||
{
|
||||
massert( s==0 || l_table[s]==log(s) );
|
||||
return l_table[s];
|
||||
}
|
||||
else
|
||||
return log((double)(s));
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline double kat_hlo(int n)
|
||||
{
|
||||
return n*kat_mlog(n-1);
|
||||
}
|
||||
|
||||
inline double kat_hlo(double n)
|
||||
{
|
||||
return n*kat_mlog(n-1);
|
||||
}
|
||||
|
||||
|
||||
inline double kat_h(int n)
|
||||
{
|
||||
massert(n>=-1);
|
||||
if(n<=0)
|
||||
return 0;
|
||||
else
|
||||
if(n<MAX_H_TABLE)
|
||||
{
|
||||
massert(n==0||fabs(h_table[n]-n*log((double)n))<1e-8);
|
||||
return h_table[n];
|
||||
}
|
||||
else
|
||||
return n*log((double)(n));
|
||||
}
|
||||
inline double kat_h(double n)
|
||||
{
|
||||
if(n<=1e-9)
|
||||
return 0;
|
||||
else
|
||||
return n*log(n);
|
||||
}
|
||||
|
||||
|
||||
inline double kat_etaFkt(int _e0,int e1,int immer0,int cats)
|
||||
{
|
||||
int e0 = _e0 - immer0;
|
||||
int ePlus = cats*cats - _e0;
|
||||
if(cats*cats-e0>1)
|
||||
return e1*log( (ePlus-1.0)/(e0+1.0)*rhoLo );
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
double mkat_h_full(int n,double tf);
|
||||
double mkat_h_part(int n,double cf);
|
||||
|
||||
int Hash(const string& s);
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,243 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
#include <cstdlib>
|
||||
#include "KategProblem.h"
|
||||
|
||||
double rhoLo=0.75;
|
||||
#define MAX_VERFAELSCHUNG 5000
|
||||
double verfTab[MAX_VERFAELSCHUNG],verfTabSigma=-1.0;
|
||||
double verfaelsche(int a,double b)
|
||||
{
|
||||
|
||||
if( a>=0&&verfTabSigma==b&&a<MAX_VERFAELSCHUNG )
|
||||
{
|
||||
massert(verfTab[a]== b*(erf(10000.0) - erf(a/b))/2+a);
|
||||
return verfTab[a];
|
||||
}
|
||||
else
|
||||
{
|
||||
double x = b*(erf(10000.0) - erf(a/b))/2+a;
|
||||
return x;
|
||||
}
|
||||
}
|
||||
double verfaelsche(double,double b)
|
||||
{
|
||||
abort();
|
||||
return b;
|
||||
}
|
||||
|
||||
KategProblemKBC::KategProblemKBC(int s,double sv) :
|
||||
_n(s),_n1(s,0),_n2(s,0),sigmaVerfaelschung(sv),withVerfaelschung(sv!=0.0),
|
||||
_nverf(s),_n1verf(s,0.0),_n2verf(s,0.0),_nWords(0),
|
||||
eta0(s*s),eta1(0),c1_0(s),c2_0(s),
|
||||
_bigramVerfSum(0.0),_unigramVerfSum1(0.0),_unigramVerfSum2(0.0),nKats(s)
|
||||
|
||||
{
|
||||
verfInit0=0.0;
|
||||
int i;
|
||||
if( withVerfaelschung )
|
||||
{
|
||||
verfInit0=verfaelsche(0,sv);
|
||||
cout << "VERFAELSCHUNG wird mitgefuehrt => LANGSAMER!!!\n";
|
||||
}
|
||||
for(i=0;i<s;i++)
|
||||
{
|
||||
_n[i].init(s,0);
|
||||
_nverf[i].init(s,verfInit0);
|
||||
_n1verf[i]=_n2verf[i]=verfInit0;
|
||||
_bigramVerfSum+=verfInit0*s;
|
||||
_unigramVerfSum1+=verfInit0;
|
||||
_unigramVerfSum2+=verfInit0;
|
||||
}
|
||||
if( withVerfaelschung )
|
||||
{
|
||||
cout << "VERFAELSCHUNG " << _bigramVerfSum << " " << _unigramVerfSum1 << " " << _unigramVerfSum2 << endl;
|
||||
}
|
||||
verfTabSigma=sigmaVerfaelschung;
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
void KategProblemKBC::setN(int w1,int w2, FreqType n)
|
||||
|
||||
{
|
||||
addN(w1,w2,-_n[w1][w2]);
|
||||
addN(w1,w2,n);
|
||||
}
|
||||
|
||||
|
||||
double KategProblemKBC::fullBewertung(int auswertung)
|
||||
{
|
||||
|
||||
double bewertung=0;
|
||||
int c1,c2;
|
||||
|
||||
|
||||
switch( auswertung )
|
||||
{
|
||||
case CRITERION_ML:
|
||||
for(c1=0;c1<nKats;c1++)
|
||||
{
|
||||
for(c2=0;c2<nKats;c2++)
|
||||
bewertung-=kat_h(_n[c1][c2]);
|
||||
bewertung+=kat_h(_n1[c1])+kat_h(_n2[c1]);
|
||||
}
|
||||
break;
|
||||
case CRITERION_MY:
|
||||
{
|
||||
for(c1=0;c1<nKats;c1++)
|
||||
{
|
||||
for(c2=0;c2<nKats;c2++)
|
||||
bewertung-=mkat_h_full((int)n(c1,c2),nverf(c1,c2));
|
||||
bewertung+=mkat_h_part((int)(n1(c1)),n1verf(c1))+mkat_h_part((int)(n2(c1)),n2verf(c1));
|
||||
}
|
||||
double u1=_unigramVerfSum1-verfInit0*c1_0;
|
||||
double u2=_unigramVerfSum2-verfInit0*c2_0;
|
||||
double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
|
||||
if( verboseMode>1 )
|
||||
{
|
||||
cout << "CRITERION_MY: " << bewertung << endl;
|
||||
cout << "U1:"<<_unigramVerfSum1 << " n:"<<u1<< " "
|
||||
<< "U2:"<<_unigramVerfSum2 << " n:"<<u2<< " "
|
||||
<< "U3:"<<_bigramVerfSum << " n:"<<b<< endl;
|
||||
}
|
||||
if(b>0.000001)
|
||||
{
|
||||
|
||||
|
||||
if(verboseMode>1 )
|
||||
cout << " NEU: " <<_nWords*log( u1 * u2 / b ) << endl;
|
||||
bewertung -= _nWords*log( u1 * u2 / b );
|
||||
if(verboseMode>1)
|
||||
cout << "SCHLUSSBEWERTUNG: " << bewertung << endl;
|
||||
}
|
||||
else
|
||||
cout << "B zu klein " << b << endl;
|
||||
}
|
||||
break;
|
||||
case CRITERION_LO:
|
||||
for(c1=0;c1<nKats;c1++)
|
||||
{
|
||||
for(c2=0;c2<nKats;c2++)
|
||||
bewertung-=_n[c1][c2]*kat_mlog(_n[c1][c2]-1-rhoLo);
|
||||
bewertung+=_n1[c1]*kat_mlog(_n1[c1]-1)+_n2[c1]*kat_mlog(_n2[c1]-1);
|
||||
}
|
||||
bewertung-=kat_etaFkt(eta0,eta1,(c1_0*nKats+c2_0*nKats-c1_0*c2_0),nKats);
|
||||
break;
|
||||
default:
|
||||
cerr << "Error: wrong criterion " << auswertung << endl;
|
||||
exit(1);
|
||||
}
|
||||
return bewertung;
|
||||
}
|
||||
|
||||
double KategProblemKBC::myCriterionTerm()
|
||||
{
|
||||
iassert( withVerfaelschung );
|
||||
double r;
|
||||
double u1=_unigramVerfSum1-verfInit0*c1_0;
|
||||
double u2=_unigramVerfSum2-verfInit0*c2_0;
|
||||
double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
|
||||
|
||||
|
||||
if( verboseMode>1 )
|
||||
{
|
||||
cout << "nwords divisor:"<<_nWords << " " << u1 * u2 / b << endl;
|
||||
cout << "ergebnis: "<<_nWords*log( u1 * u2 / b ) << endl;
|
||||
cout << "0: "<<c1_0 << endl;
|
||||
}
|
||||
r = _nWords*log( u1 * u2 / b );
|
||||
|
||||
return -r;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
double KategProblemKBC::bigramVerfSum()
|
||||
{
|
||||
double sum=0;
|
||||
for(int c1=0;c1<nKats;c1++)
|
||||
for(int c2=0;c2<nKats;c2++)
|
||||
sum+=nverf(c1,c2);
|
||||
cout << "BIGRAMVERFSUM: " << sum << endl;
|
||||
return sum;
|
||||
}
|
||||
|
||||
double KategProblemKBC::unigramVerfSum1()
|
||||
{
|
||||
double sum=0;
|
||||
for(int c1=0;c1<nKats;c1++)
|
||||
sum+=n1verf(c1);
|
||||
cout << "UNIGRAMVERFSUM1: " << sum << endl;
|
||||
return sum;
|
||||
}
|
||||
|
||||
double KategProblemKBC::unigramVerfSum2()
|
||||
{
|
||||
double sum=0;
|
||||
for(int c1=0;c1<nKats;c1++)
|
||||
sum+=n2verf(c1);
|
||||
cout << "UNIGRAMVERFSUM2: " << sum << endl;
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,157 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef KATEGPROBLEMKBC_H
|
||||
#define KATEGPROBLEMKBC_H
|
||||
|
||||
typedef Array<FreqType> FreqArray;
|
||||
typedef Array<double> FreqArrayReal;
|
||||
|
||||
|
||||
double verfaelsche(int a,double b);
|
||||
double verfaelsche(double a,double b);
|
||||
|
||||
class KategProblemKBC
|
||||
|
||||
|
||||
{
|
||||
friend class KategProblem;
|
||||
|
||||
private:
|
||||
Array<FreqArray> _n;
|
||||
Array<FreqType> _n1;
|
||||
|
||||
Array<FreqType> _n2;
|
||||
|
||||
|
||||
double sigmaVerfaelschung;
|
||||
short withVerfaelschung;
|
||||
|
||||
Array<FreqArrayReal> _nverf;
|
||||
Array<double> _n1verf;
|
||||
Array<double> _n2verf;
|
||||
FreqType _nWords;
|
||||
|
||||
protected:
|
||||
int eta0;
|
||||
int eta1;
|
||||
int c1_0;
|
||||
int c2_0;
|
||||
double _bigramVerfSum;
|
||||
double _unigramVerfSum1;
|
||||
double _unigramVerfSum2;
|
||||
double verfInit0;
|
||||
|
||||
public:
|
||||
int nKats;
|
||||
|
||||
KategProblemKBC(int nKats,double sv);
|
||||
|
||||
|
||||
double fullBewertung(int auswertung);
|
||||
|
||||
|
||||
FreqType n(int w1,int w2) { return _n[w1][w2]; };
|
||||
|
||||
|
||||
FreqType n1(int w) { return _n1[w];};
|
||||
|
||||
|
||||
FreqType n2(int w) { return _n2[w];};
|
||||
|
||||
|
||||
double bigramVerfSum();
|
||||
double unigramVerfSum1();
|
||||
double unigramVerfSum2();
|
||||
|
||||
double nverf(int w1,int w2) { return _nverf[w1][w2]; }
|
||||
|
||||
double n1verf(int w) { return _n1verf[w]; };
|
||||
|
||||
double n2verf(int w) { return _n2verf[w]; };
|
||||
|
||||
inline void addN(int w1,int w2, FreqType n);
|
||||
|
||||
|
||||
void setN(int w1,int w2, FreqType n);
|
||||
|
||||
|
||||
double myCriterionTerm();
|
||||
|
||||
};
|
||||
|
||||
inline void KategProblemKBC::addN(int w1,int w2, FreqType n)
|
||||
{
|
||||
if(n!=0)
|
||||
{
|
||||
FreqType &s= _n[w1][w2];
|
||||
if(s==0)
|
||||
eta0--;
|
||||
else if(s==1)
|
||||
eta1--;
|
||||
if(_n1[w1]==0)
|
||||
c1_0--;
|
||||
if(_n2[w2]==0)
|
||||
c2_0--;
|
||||
|
||||
if(withVerfaelschung)
|
||||
{
|
||||
double verfOld=verfaelsche(s,sigmaVerfaelschung);
|
||||
double verfNew=verfaelsche(s+n,sigmaVerfaelschung);
|
||||
double verfOld1=verfaelsche(_n1[w1],sigmaVerfaelschung);
|
||||
assert(verfOld1==_n1verf[w1]);
|
||||
double verfNew1=verfaelsche(_n1[w1]+n,sigmaVerfaelschung);
|
||||
double verfOld2=verfaelsche(_n2[w2],sigmaVerfaelschung);
|
||||
assert(verfOld2==_n2verf[w2]);
|
||||
double verfNew2=verfaelsche(_n2[w2]+n,sigmaVerfaelschung);
|
||||
_n1verf[w1]=verfNew1;
|
||||
_unigramVerfSum1+=verfNew1-verfOld1;
|
||||
_n2verf[w2]=verfNew2;
|
||||
_unigramVerfSum2+=verfNew2-verfOld2;
|
||||
_nverf[w1][w2]=verfNew;
|
||||
_bigramVerfSum+=verfNew-verfOld;
|
||||
_nWords+=n;
|
||||
}
|
||||
s+=n;_n1[w1]+=n;_n2[w2]+=n;
|
||||
|
||||
assert(_n[w1][w2]>=0);
|
||||
assert(_n1[w1]>=0);
|
||||
assert(_n2[w2]>=0);
|
||||
|
||||
if(s==0)
|
||||
eta0++;
|
||||
else if(s==1)
|
||||
eta1++;
|
||||
if(_n1[w1]==0)
|
||||
c1_0++;
|
||||
if(_n2[w2]==0)
|
||||
c2_0++;
|
||||
}
|
||||
};
|
||||
#endif
|
@ -0,0 +1,700 @@
|
||||
/*
|
||||
|
||||
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
|
||||
|
||||
mkcls - a program for making word classes .
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
USA.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
#include "KategProblemTest.h"
|
||||
|
||||
#include "ProblemTest.h"
|
||||
#include "HCOptimization.h"
|
||||
#include "TAOptimization.h"
|
||||
#include "RRTOptimization.h"
|
||||
#include "GDAOptimization.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <strstream>
|
||||
|
||||
typedef pair<string,string> PSS;
|
||||
|
||||
#define NEW_SENTENCE_END "mkcls-mapped-dollar-symbol-$"
|
||||
|
||||
#ifdef NeXT
|
||||
char *strdup(char *a)
|
||||
{
|
||||
char *p = (char *)malloc(strlen(a)+1);
|
||||
strcpy(p,a);
|
||||
return p;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
void writeClasses(Array<Kategory> &katOfWord,KategProblem &problem,ostream &to)
|
||||
{
|
||||
for(int i=0;i<katOfWord.size();i++)
|
||||
{
|
||||
if( strcmp(problem.getString(i),"$") )
|
||||
if( strcmp(problem.getString(i),"mkcls-mapped-dollar-symbol-$")==0 )
|
||||
to << "$" << "\t" << katOfWord[i] << endl;
|
||||
else
|
||||
to << problem.getString(i) << "\t" << katOfWord[i] << endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void mysplit(const string &s,string &s1,string &s2)
|
||||
{
|
||||
unsigned int i=0;
|
||||
for(;i<s.length();i++)if( s[i]==' ' || s[i]=='\t' || s[i]==' ')break;
|
||||
s1=s.substr(0,i);
|
||||
for(;i<s.length();i++)if( !(s[i]==' ' || s[i]=='\t' || s[i]==' ') )break;
|
||||
s2=s.substr(i,s.length()-i);
|
||||
|
||||
iassert(s1.size());
|
||||
iassert(s2.size());
|
||||
}
|
||||
|
||||
|
||||
|
||||
int fromCatFile(KategProblem *p,const char *fname,bool verb)
|
||||
{
|
||||
leda_h_array<string,int> translation(-1);
|
||||
int maxCat=2;
|
||||
ifstream in(fname);
|
||||
if(!in)
|
||||
{
|
||||
cerr << "Error: File '" << fname << "' cannot be opened.\n";
|
||||
exit(1);
|
||||
}
|
||||
for(int i=0;i<p->wordFreq.nWords;i++)
|
||||
(p->initLike)[i]= -1;
|
||||
|
||||
|
||||
translation["1"]=1;
|
||||
translation["0"]=0;
|
||||
|
||||
|
||||
string s;
|
||||
while( getline(in,s) )
|
||||
{
|
||||
string str,categ;
|
||||
mysplit(s,str,categ);
|
||||
int i=p->words->binary_locate(str);
|
||||
if(i>=0 && (*(p->words))[i]==str )
|
||||
{
|
||||
|
||||
if( translation[categ]==-1 )
|
||||
translation[categ]=maxCat++;
|
||||
int cat=translation[categ];
|
||||
if( (p->initLike)[i]!= -1 )
|
||||
cerr << "Warning: Word '" << ((*(p->words))[i])<< "' is already in a category.\n";
|
||||
(p->initLike)[i]=cat;
|
||||
}
|
||||
else
|
||||
cerr << "Warning: Word '" << str << "' " << i << " is not in training corpus.\n";
|
||||
}
|
||||
|
||||
if( verboseMode )
|
||||
cout << "We have " << maxCat << " read non-empty categories"
|
||||
" (with words from the corpus).\n";
|
||||
|
||||
if(maxCat>p->katFreq.nKats)
|
||||
{
|
||||
cerr << "Error: Not enough categories reserved (only "
|
||||
<< p->katFreq.nKats << ", but i need " << maxCat << ").\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
int i=p->words->binary_locate("$");
|
||||
if( i>=0 && (*(p->words))[i]=="$" )
|
||||
(p->initLike)[i]=0;
|
||||
else
|
||||
if( verboseMode )
|
||||
cerr << "Warning: No '$' in vocabulary!\n";
|
||||
|
||||
|
||||
int errors=0;
|
||||
for(i=0;i<p->wordFreq.nWords;i++)
|
||||
if((p->initLike)[i]== -1 )
|
||||
{
|
||||
if( verb ) cerr << "Error: I don't know the category of word " << i
|
||||
<< " (" << (*(p->words))[i] << ") " << ".\n";
|
||||
errors=1;
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
KategProblem *makeKategProblem(const leda_h_array<PSS,FreqType>&cTbl,const leda_set<string>&setVokabular, int maxClass,int initialisierung,
|
||||
int auswertung,int nachbarschaft,int minWordFrequency)
|
||||
{
|
||||
|
||||
int nwrd=0;
|
||||
leda_array<string>&sVok = *new leda_array<string>(setVokabular.size());
|
||||
string s;
|
||||
unsigned int ctr=0;
|
||||
forall_set(leda_set<string>,s,setVokabular)
|
||||
{
|
||||
if( verboseMode>2 )
|
||||
cout << "mkcls:Wort " << ctr << " " << s << endl;
|
||||
sVok[ctr++]=s;
|
||||
}
|
||||
for(unsigned int z=0;z<ctr-1;z++)
|
||||
iassert( sVok[z]<sVok[z+1] );
|
||||
sVok.sort();
|
||||
|
||||
if( verboseMode>2 )
|
||||
cout << "*****Vocabulary: " << sVok;
|
||||
|
||||
unsigned int vokSize=sVok.size();
|
||||
massert(vokSize==ctr); massert(vokSize==setVokabular.size());
|
||||
if(verboseMode)
|
||||
{cout << "Size of vocabulary: " << vokSize << "\n";cout.flush();}
|
||||
|
||||
KategProblem *k = new KategProblem(vokSize,maxClass,initialisierung,
|
||||
auswertung,nachbarschaft,minWordFrequency);
|
||||
KategProblemWBC &w=k->wordFreq;
|
||||
k->words=&sVok;
|
||||
|
||||
Array<int> after(vokSize,0);
|
||||
Array<int> before(vokSize,0);
|
||||
|
||||
|
||||
nwrd=0;
|
||||
{
|
||||
PSS s;
|
||||
forall_defined_h2(PSS,FreqType,s,cTbl)
|
||||
{
|
||||
const string&ss1=s.first;
|
||||
const string&ss2=s.second;
|
||||
if( ss2.length()&&(ss1!="$" || ss2!="$") )
|
||||
{
|
||||
int i1=sVok.binary_search(ss1);
|
||||
int i2=sVok.binary_search(ss2);
|
||||
iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
|
||||
after[i1]++;
|
||||
before[i2]++;
|
||||
}
|
||||
if( verboseMode&&((nwrd++)%10000==0) )
|
||||
{cout<<"Statistiken-1 " << nwrd<< ". \r";cout.flush();}
|
||||
}
|
||||
}
|
||||
|
||||
for(unsigned int i=0;i<vokSize;i++)
|
||||
{
|
||||
w.setAfterWords(i,after[i]);
|
||||
w.setBeforeWords(i,before[i]);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
nwrd=0;
|
||||
PSS s;
|
||||
forall_defined_h2(PSS,FreqType,s,cTbl)
|
||||
{
|
||||
const string&ss1=s.first;
|
||||
const string&ss2=s.second;
|
||||
FreqType p=cTbl[s];
|
||||
if( ss2.length()&&(ss1!="$" || ss2!="$") )
|
||||
{
|
||||
int i1=sVok.binary_search(ss1);
|
||||
int i2=sVok.binary_search(ss2);
|
||||
iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
|
||||
w.setFreq(i1,i2,p);
|
||||
if( verboseMode>2 )
|
||||
cout << "BIGRAMM-HAEUF: " << ss1 << ":" << i1 << " "
|
||||
<< ss2 << ":" << i2 << " " << p << endl;
|
||||
}
|
||||
if( verboseMode&&((nwrd++)%10000==0) )
|
||||
{cout<<"Statistiken-2 " <<nwrd<< ". \r";cout.flush();}
|
||||
}
|
||||
}
|
||||
|
||||
w.testFull();
|
||||
if(verboseMode){cout << "Datenintegritaet getestet.\n";cout.flush();}
|
||||
return k;
|
||||
}
|
||||
|
||||
KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
|
||||
int auswertung,int nachbarschaft,int minWordFrequency)
|
||||
{
|
||||
ifstream file(str);
|
||||
if(!file)return 0;
|
||||
leda_set<string> setVokabular;
|
||||
leda_h_array<PSS,FreqType> cTbl;
|
||||
double c=0;
|
||||
if( verboseMode )cout << "NGRFILE: " << str << endl;
|
||||
string s1,s2;
|
||||
while(file >> c >> s1 >> s2)
|
||||
{
|
||||
if( s1.length()==0||s2.length()==0 )
|
||||
{
|
||||
cerr << "ERROR: strings are zero: " << s1.length() <<" " << s1 <<" " << s2.length()<<" " << s2 << endl;
|
||||
return 0;
|
||||
}
|
||||
if( c==0 )
|
||||
{
|
||||
cerr << "Count ist 0 " << s1 << " " << s2 << endl;
|
||||
return 0;
|
||||
}
|
||||
cTbl[pair<string,string>(s1,s2)]=(FreqType)c;
|
||||
setVokabular.insert(s1);
|
||||
setVokabular.insert(s2);
|
||||
if( verboseMode>1 )
|
||||
cout << "R: " << s1 << " " << s2 << " " << c << endl;
|
||||
c=0;
|
||||
}
|
||||
|
||||
return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
KategProblem *fromKModel(const char *str,int maxClass,int initialisierung,
|
||||
int auswertung,int nachbarschaft,int minWordFrequency)
|
||||
{
|
||||
string oldText,text,line;
|
||||
ifstream f(str);
|
||||
if( !f )
|
||||
{
|
||||
cerr << "ERROR: can not open file " << str << ".\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
leda_set<string> setVokabular;
|
||||
leda_h_array<PSS,FreqType> cTbl(0);
|
||||
oldText="$";
|
||||
while(1)
|
||||
{
|
||||
getline(f,line);
|
||||
if(f.fail() && !f.bad() && !f.eof())
|
||||
{
|
||||
cerr << "WARNING: strange characters in stream (getline) " << endl;f.clear();
|
||||
}
|
||||
if(!f)break;
|
||||
|
||||
istrstream f2(line.c_str());
|
||||
while( 1 )
|
||||
{
|
||||
f2 >> text;
|
||||
if(f2.fail() && !f2.bad() && !f2.eof())
|
||||
{
|
||||
cerr << "WARNING: strange characters in stream (>>) !\n";
|
||||
f2.clear(ios::failbit);
|
||||
}
|
||||
if(!f2){break;}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if( text == "$" )
|
||||
text = "mkcls-mapped-dollar-symbol-$";
|
||||
if( !setVokabular.member(text) )setVokabular.insert(text);
|
||||
cTbl[pair<string,string>(oldText,text)]++;
|
||||
oldText=text;
|
||||
}
|
||||
text="$";
|
||||
if( !setVokabular.member(text) )setVokabular.insert(text);
|
||||
cTbl[pair<string,string>(oldText,text)]++;
|
||||
oldText=text;
|
||||
}
|
||||
return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void KategProblemSetParameters(KategProblem &p)
|
||||
{
|
||||
if( p.katwahl()==K_BEST )
|
||||
{
|
||||
TAOptimization::defaultAnnRate=0.7;
|
||||
RRTOptimization::defaultAnnRate=0.95;
|
||||
GDAOptimization::defaultAlpha=0.05;
|
||||
if( verboseMode )
|
||||
cout << "Parameter-setting like W-DET-BEST\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
TAOptimization::defaultAnnRate=0.4;
|
||||
RRTOptimization::defaultAnnRate=0.6;
|
||||
GDAOptimization::defaultAlpha=0.0125;
|
||||
if( verboseMode )
|
||||
cout << "Parameter-setting like W-DET-DET\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
|
||||
int auswertung,int nachbarschaft,float relInit)
|
||||
{
|
||||
KategProblem &k=
|
||||
*new KategProblem(ANZ_WORD,ANZ_CLS,initValue,auswertung,nachbarschaft);
|
||||
KategProblemWBC &w=k.wordFreq;
|
||||
Array<int> after(ANZ_WORD,0);
|
||||
Array<int> before(ANZ_WORD,0);
|
||||
Array<FreqArray> twoD(ANZ_WORD);
|
||||
int i;
|
||||
for(i=0;i<ANZ_WORD;i++) twoD[i].init(ANZ_WORD,0);
|
||||
|
||||
for(i=0;i<ANZ_WORD;i++)
|
||||
{
|
||||
massert(after[i]==0);
|
||||
massert(before[i]==0);
|
||||
for(int j=0;j<ANZ_WORD;j++)
|
||||
{
|
||||
massert(twoD[i][j]==0);
|
||||
}
|
||||
}
|
||||
for(i=0;i<ANZ_WORD*ANZ_WORD*relInit;i++)
|
||||
{
|
||||
int x=randomInt(ANZ_WORD);
|
||||
int y=randomInt(ANZ_WORD);
|
||||
if(twoD[x][y]==0)
|
||||
{
|
||||
after[x]++;
|
||||
before[y]++;
|
||||
}
|
||||
twoD[x][y]+=randomInt(10)+1;
|
||||
}
|
||||
for(i=0;i<ANZ_WORD;i++)
|
||||
{
|
||||
w.setAfterWords(i,after[i]);
|
||||
w.setBeforeWords(i,before[i]);
|
||||
}
|
||||
|
||||
for(i=0;i<ANZ_WORD;i++)
|
||||
{
|
||||
for(int j=0;j<ANZ_WORD;j++)
|
||||
if( twoD[i][j] )
|
||||
w.setFreq(i,j,twoD[i][j]);
|
||||
}
|
||||
w.testFull();
|
||||
return k;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
char *makeTitle(KategProblem &problem,int verfahren)
|
||||
{
|
||||
char x[1024];
|
||||
switch(verfahren)
|
||||
{
|
||||
case HC_OPT:
|
||||
strcpy(x,"HC ");
|
||||
break;
|
||||
case SA_OPT:
|
||||
strcpy(x,"SA ");
|
||||
break;
|
||||
case TA_OPT:
|
||||
strcpy(x,"TA ");
|
||||
break;
|
||||
case GDA_OPT:
|
||||
strcpy(x,"GDA ");
|
||||
break;
|
||||
case RRT_OPT:
|
||||
strcpy(x,"RRT ");
|
||||
break;
|
||||
}
|
||||
problem.makeTitle(x+strlen(x));
|
||||
return strdup(x);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#define MAX_MULTIPLE 10
|
||||
|
||||
Array<KategProblem *> &_izrOptimization(Array<KategProblem *> &probs,
|
||||
int anzprob,double timeForOneRed,double maxClock,Array<Kategory> &katOfWord,
|
||||
int anzIter,int verfahren)
|
||||
{
|
||||
massert(anzprob>1);
|
||||
massert(probs[0]->wordFreq.mindestAnzahl<=1);
|
||||
KategProblem *p0=probs[0];
|
||||
|
||||
int nWords=p0->wordFreq.nWords;
|
||||
int nKats=p0->katFreq.nKats;
|
||||
int minimumNumberOfWords = max(1,int(nWords*0.95));
|
||||
|
||||
int indexOfDurchschnitt;
|
||||
Array<int> newWords(nWords);
|
||||
int useAnzprob=anzprob;
|
||||
do
|
||||
{
|
||||
int w,k;
|
||||
indexOfDurchschnitt=0;
|
||||
for(w=0;w<nWords;w++)
|
||||
newWords[w]=-1;
|
||||
for(k=0;k<useAnzprob;k++)
|
||||
{
|
||||
massert(probs[k]->wordFreq.nWords==nWords);
|
||||
probs[k]->makeKats();
|
||||
}
|
||||
|
||||
for(w=0;w<nWords;w++)
|
||||
{
|
||||
if( newWords[w]==-1 )
|
||||
{
|
||||
|
||||
|
||||
|
||||
leda_set<int> durchschnitt=(*p0->kats)[p0->katOfWord(w)];
|
||||
for(k=1;k<useAnzprob;k++)
|
||||
durchschnitt = durchschnitt & (*probs[k]->kats)[probs[k]->katOfWord(w)];
|
||||
|
||||
|
||||
int _anzInDurchschnitt=0;
|
||||
int nr=0;
|
||||
forall_set(leda_set<int>,nr,durchschnitt)
|
||||
{
|
||||
_anzInDurchschnitt++;
|
||||
newWords[nr]=indexOfDurchschnitt;
|
||||
}
|
||||
if( verboseMode && _anzInDurchschnitt>1 && anzIter==0 )
|
||||
{
|
||||
cout << "- (";
|
||||
forall_set(leda_set<int>,nr,durchschnitt)
|
||||
{
|
||||
cout << p0->getString(nr);
|
||||
if( p0->wordFreq.n1(nr)==1 )
|
||||
cout << "* ";
|
||||
else
|
||||
cout << " ";
|
||||
}
|
||||
cout << ")\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
for(k=0;k<useAnzprob;k++)
|
||||
{
|
||||
durchschnitt = durchschnitt - (*probs[k]->kats)[probs[k]->katOfWord(w)];
|
||||
}
|
||||
indexOfDurchschnitt++;
|
||||
}
|
||||
}
|
||||
|
||||
if(indexOfDurchschnitt>=minimumNumberOfWords)
|
||||
{
|
||||
if(useAnzprob==1)
|
||||
{
|
||||
cout << "useAnzProb==1 => mysterious.\n";
|
||||
break;
|
||||
}
|
||||
useAnzprob--;
|
||||
}
|
||||
}
|
||||
while(indexOfDurchschnitt>=minimumNumberOfWords);
|
||||
|
||||
|
||||
Array<KategProblem *> &neu=*new Array<KategProblem *>(MAX_MULTIPLE*anzprob,(KategProblem *)0);
|
||||
qsort(probs.getPointerToData(),useAnzprob,sizeof(KategProblem *),compareProblem);
|
||||
massert(useAnzprob<=probs.size());
|
||||
double startTime=clockSec();
|
||||
int i, numberOfNew;
|
||||
for(numberOfNew=0; (clockSec()-startTime<timeForOneRed)
|
||||
|| (numberOfNew < anzprob) ; numberOfNew++)
|
||||
{
|
||||
int w;
|
||||
if( numberOfNew==anzprob*MAX_MULTIPLE-1 )
|
||||
break;
|
||||
KategProblem *p
|
||||
= neu[numberOfNew]
|
||||
= new KategProblem(indexOfDurchschnitt,nKats-2,
|
||||
p0->initialisierung,p0->auswertung,p0->nachbarschaft);
|
||||
|
||||
for(w=0;w<indexOfDurchschnitt;w++)
|
||||
{
|
||||
p->wordFreq.setAfterWords(w,5);
|
||||
p->wordFreq.setBeforeWords(w,5);
|
||||
}
|
||||
for(w=0;w<nWords;w++)
|
||||
{
|
||||
Array<OneFreq> &after=p0->wordFreq.after[w];
|
||||
int size=after.size();
|
||||
for(i=0;i<size;i++)
|
||||
p->wordFreq.addFreq(newWords[w],newWords[after[i].w],after[i].n);
|
||||
}
|
||||
p->wordFreq.testFull(1);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
p->wordFreq.set_h_of_words(p0->wordFreq.get_h_of_words());
|
||||
double w1=0.0,w2=0.0;
|
||||
if(numberOfNew<useAnzprob)
|
||||
{
|
||||
|
||||
for(i=0;i<nWords;i++)
|
||||
(p->initLike)[newWords[i]]=probs[numberOfNew]->katOfWord(i);
|
||||
p->_initialize(5);
|
||||
HCOptimization hc(*p,-1);
|
||||
if(verboseMode)
|
||||
{
|
||||
w1=p->nicevalue();
|
||||
cout << "from old category system:" << w1 << endl;
|
||||
}
|
||||
hc.minimize(-1);
|
||||
if(verboseMode)
|
||||
{
|
||||
w2=p->nicevalue();
|
||||
if(w2<w1)
|
||||
cout << "improvement: " << w1-w2 << endl;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
p->_initialize(1);
|
||||
double mean;
|
||||
StatVar end,laufzeit,start;
|
||||
solveProblem(0,*p,1,-1,verfahren,mean,end,laufzeit,start);
|
||||
w2=p->value();
|
||||
if(verboseMode)
|
||||
cout << "new category system: " << w2 << " (" << p->nicevalue()
|
||||
<< ") Zeit: " << clockSec() << "\n";
|
||||
}
|
||||
}
|
||||
int p;
|
||||
for(p=0;p<probs.size();p++)
|
||||
{
|
||||
if( probs[p] )
|
||||
delete probs[p];
|
||||
}
|
||||
qsort(neu.getPointerToData(),numberOfNew,sizeof(Problem *),compareProblem);
|
||||
massert(numberOfNew<=neu.size());
|
||||
if( verboseMode )
|
||||
cout << "Iterierte Zustandsraum-Reduktion: " << indexOfDurchschnitt
|
||||
<< " words. costs: " << neu[0]->value() << " "
|
||||
<< neu[0]->nicevalue() << " (" << numberOfNew-anzprob << ")" << "time: "
|
||||
<< clockSec() << endl;
|
||||
if( indexOfDurchschnitt<=nKats
|
||||
|| (clockSec()>maxClock&&maxClock) )
|
||||
{
|
||||
if( clockSec()>maxClock&&maxClock )
|
||||
cout << "STOP (time limit: " << (clockSec()-maxClock) << " s)\n";
|
||||
for(i=0;i<nWords;i++)
|
||||
katOfWord[i]=neu[0]->katOfWord(newWords[i]);
|
||||
return neu;
|
||||
}
|
||||
else
|
||||
{
|
||||
Array<Kategory> &newKatOfWord=
|
||||
*(new Array<Kategory>(neu[0]->wordFreq.nWords,-1));
|
||||
Array<KategProblem *> &erg=_izrOptimization(neu,anzprob,timeForOneRed,
|
||||
maxClock,newKatOfWord,
|
||||
anzIter+1,verfahren);
|
||||
for(i=0;i<nWords;i++)
|
||||
katOfWord[i]=newKatOfWord[newWords[i]];
|
||||
return erg;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
|
||||
double clockForOneRed,double maxClock,int verfahren)
|
||||
{
|
||||
Array<Kategory> katOfWord(p.wordFreq.nWords,-1);
|
||||
int startN;
|
||||
if( clockForOneRed<=0 )
|
||||
startN=firstN;
|
||||
else
|
||||
startN=1000;
|
||||
Array<KategProblem *> probs(startN);
|
||||
double val1=0.0,val2=0.0;
|
||||
double endTime=-1;
|
||||
|
||||
double startTime=clockSec();
|
||||
int i;
|
||||
for(i=0;i<startN;i++)
|
||||
{
|
||||
StatVar end,laufzeit,start;
|
||||
double mean;
|
||||
probs[i] = (KategProblem *)((KategProblem *)p.makeEqualProblem());
|
||||
solveProblem(0,*(probs[i]),1,-1,verfahren,mean,end,laufzeit,start);
|
||||
if( i==minN-1 )
|
||||
endTime = clockSec();
|
||||
if( i>=firstN-1 && (startTime+clockForOneRed>clockSec() || i==999) )
|
||||
break;
|
||||
}
|
||||
if( endTime<0 )
|
||||
endTime=clockSec();
|
||||
massert(i>=firstN);
|
||||
|
||||
qsort(probs.getPointerToData(),i,sizeof(KategProblem *),compareProblem);
|
||||
massert(i<=probs.size());
|
||||
if( clockForOneRed<=0 )
|
||||
{
|
||||
clockForOneRed=endTime-startTime;
|
||||
if( verboseMode )
|
||||
cout << "time for one reduction: " << clockForOneRed << endl;
|
||||
}
|
||||
_izrOptimization(probs,minN,clockForOneRed,maxClock,katOfWord,0,verfahren);
|
||||
|
||||
KategProblem *n=(KategProblem *)(p.makeEqualProblem());
|
||||
n->initLike= katOfWord;
|
||||
n->_initialize(5);
|
||||
if( verboseMode )
|
||||
val1=n->value();
|
||||
HCOptimization hc(*n,-1);
|
||||
hc.minimize(-1);
|
||||
val2=n->value();
|
||||
if( verboseMode )
|
||||
cout << "last improvement: " << val2-val1 << "\n";
|
||||
cout << "final costs: " << val2 << " " << n->nicevalue() << endl;
|
||||
if(PrintBestTo)
|
||||
n->dumpOn(*PrintBestTo);
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user