added mgiza

This commit is contained in:
rjawor 2017-01-21 17:07:36 +01:00
parent 6f995a64f2
commit df5dddc924
676 changed files with 212224 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,37 @@
.anjuta
.tm_project*
.libs
.deps
.*swp
.nautilus-metafile.xml
*.autosave
*.pws
*.bak
*~
#*#
*.gladep
*.la
*.lo
*.o
*.class
*.pyc
aclocal.m4
autom4te.cache
config.h
config.h.in
config.log
config.status
configure
intltool-extract*
intltool-merge*
intltool-modules*
intltool-update*
libtool
prepare.sh
stamp-h*
ltmain.sh
mkinstalldirs
config.guess
config.sub
Makefile
Makefile.in

View File

@ -0,0 +1,82 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>MGizaWhiteList</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
<dictionary>
<key>?name?</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildCommand</key>
<value>make</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildLocation</key>
<value>${workspace_loc:/MGizaWhiteList/Debug}</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
</projectDescription>

View File

@ -0,0 +1,674 @@
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<http://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<http://www.gnu.org/philosophy/why-not-lgpl.html>.

View File

@ -0,0 +1,237 @@
Installation Instructions
*************************
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
2006, 2007 Free Software Foundation, Inc.
This file is free documentation; the Free Software Foundation gives
unlimited permission to copy, distribute and modify it.
Basic Installation
==================
Briefly, the shell commands `./configure; make; make install' should
configure, build, and install this package. The following
more-detailed instructions are generic; see the `README' file for
instructions specific to this package.
The `configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses
those values to create a `Makefile' in each directory of the package.
It may also create one or more `.h' files containing system-dependent
definitions. Finally, it creates a shell script `config.status' that
you can run in the future to recreate the current configuration, and a
file `config.log' containing compiler output (useful mainly for
debugging `configure').
It can also use an optional file (typically called `config.cache'
and enabled with `--cache-file=config.cache' or simply `-C') that saves
the results of its tests to speed up reconfiguring. Caching is
disabled by default to prevent problems with accidental use of stale
cache files.
If you need to do unusual things to compile the package, please try
to figure out how `configure' could check whether to do them, and mail
diffs or instructions to the address given in the `README' so they can
be considered for the next release. If you are using the cache, and at
some point `config.cache' contains results you don't want to keep, you
may remove or edit it.
The file `configure.ac' (or `configure.in') is used to create
`configure' by a program called `autoconf'. You need `configure.ac' if
you want to change it or regenerate `configure' using a newer version
of `autoconf'.
The simplest way to compile this package is:
1. `cd' to the directory containing the package's source code and type
`./configure' to configure the package for your system.
Running `configure' might take a while. While running, it prints
some messages telling which features it is checking for.
2. Type `make' to compile the package.
3. Optionally, type `make check' to run any self-tests that come with
the package.
4. Type `make install' to install the programs and any data files and
documentation.
5. You can remove the program binaries and object files from the
source code directory by typing `make clean'. To also remove the
files that `configure' created (so you can compile the package for
a different kind of computer), type `make distclean'. There is
also a `make maintainer-clean' target, but that is intended mainly
for the package's developers. If you use it, you may have to get
all sorts of other programs in order to regenerate files that came
with the distribution.
6. Often, you can also type `make uninstall' to remove the installed
files again.
Compilers and Options
=====================
Some systems require unusual options for compilation or linking that the
`configure' script does not know about. Run `./configure --help' for
details on some of the pertinent environment variables.
You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here
is an example:
./configure CC=c99 CFLAGS=-g LIBS=-lposix
*Note Defining Variables::, for more details.
Compiling For Multiple Architectures
====================================
You can compile the package for more than one kind of computer at the
same time, by placing the object files for each architecture in their
own directory. To do this, you can use GNU `make'. `cd' to the
directory where you want the object files and executables to go and run
the `configure' script. `configure' automatically checks for the
source code in the directory that `configure' is in and in `..'.
With a non-GNU `make', it is safer to compile the package for one
architecture at a time in the source code directory. After you have
installed the package for one architecture, use `make distclean' before
reconfiguring for another architecture.
Installation Names
==================
By default, `make install' installs the package's commands under
`/usr/local/bin', include files under `/usr/local/include', etc. You
can specify an installation prefix other than `/usr/local' by giving
`configure' the option `--prefix=PREFIX'.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
PREFIX as the prefix for installing programs and libraries.
Documentation and other data files still use the regular prefix.
In addition, if you use an unusual directory layout you can give
options like `--bindir=DIR' to specify different values for particular
kinds of files. Run `configure --help' for a list of the directories
you can set and what kinds of files go in them.
If the package supports it, you can cause programs to be installed
with an extra prefix or suffix on their names by giving `configure' the
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
Optional Features
=================
Some packages pay attention to `--enable-FEATURE' options to
`configure', where FEATURE indicates an optional part of the package.
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
is something like `gnu-as' or `x' (for the X Window System). The
`README' should mention any `--enable-' and `--with-' options that the
package recognizes.
For packages that use the X Window System, `configure' can usually
find the X include and library files automatically, but if it doesn't,
you can use the `configure' options `--x-includes=DIR' and
`--x-libraries=DIR' to specify their locations.
Specifying the System Type
==========================
There may be some features `configure' cannot figure out automatically,
but needs to determine by the type of machine the package will run on.
Usually, assuming the package is built to be run on the _same_
architectures, `configure' can figure that out, but if it prints a
message saying it cannot guess the machine type, give it the
`--build=TYPE' option. TYPE can either be a short name for the system
type, such as `sun4', or a canonical name which has the form:
CPU-COMPANY-SYSTEM
where SYSTEM can have one of these forms:
OS KERNEL-OS
See the file `config.sub' for the possible values of each field. If
`config.sub' isn't included in this package, then this package doesn't
need to know the machine type.
If you are _building_ compiler tools for cross-compiling, you should
use the option `--target=TYPE' to select the type of system they will
produce code for.
If you want to _use_ a cross compiler, that generates code for a
platform different from the build platform, you should specify the
"host" platform (i.e., that on which the generated programs will
eventually be run) with `--host=TYPE'.
Sharing Defaults
================
If you want to set default values for `configure' scripts to share, you
can create a site shell script called `config.site' that gives default
values for variables like `CC', `cache_file', and `prefix'.
`configure' looks for `PREFIX/share/config.site' if it exists, then
`PREFIX/etc/config.site' if it exists. Or, you can set the
`CONFIG_SITE' environment variable to the location of the site script.
A warning: not all `configure' scripts look for a site script.
Defining Variables
==================
Variables not defined in a site shell script can be set in the
environment passed to `configure'. However, some packages may run
configure again during the build, and the customized values of these
variables may be lost. In order to avoid this problem, you should set
them in the `configure' command line, using `VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
causes the specified `gcc' to be used as the C compiler (unless it is
overridden in the site shell script).
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
an Autoconf bug. Until the bug is fixed you can use this workaround:
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
`configure' Invocation
======================
`configure' recognizes the following options to control how it operates.
`--help'
`-h'
Print a summary of the options to `configure', and exit.
`--version'
`-V'
Print the version of Autoconf used to generate the `configure'
script, and exit.
`--cache-file=FILE'
Enable the cache: use and save the results of the tests in FILE,
traditionally `config.cache'. FILE defaults to `/dev/null' to
disable caching.
`--config-cache'
`-C'
Alias for `--cache-file=config.cache'.
`--quiet'
`--silent'
`-q'
Do not print messages saying which checks are being made. To
suppress all normal output, redirect it to `/dev/null' (any error
messages will still be shown).
`--srcdir=DIR'
Look for the package's source code in directory DIR. Usually
`configure' can determine that directory automatically.
`configure' also accepts some other, not widely useful, options. Run
`configure --help' for more details.

View File

@ -0,0 +1,29 @@
## Process this file with automake to produce Makefile.in
## Created by Anjuta
SUBDIRS = src
mgizadocdir = ${prefix}/doc/mgiza
mgizadoc_DATA = \
README\
COPYING\
AUTHORS\
ChangeLog\
INSTALL\
NEWS
mgizascriptsdir = ${prefix}/scripts/
mgizascripts_SCRIPTS = \
scripts/*
EXTRA_DIST = $(mgizadoc_DATA) \
${mgizascripts_SCRIPTS}
# Copy all the spec files. Of cource, only one is actually used.
dist-hook:
for specfile in *.spec; do \
if test -f $$specfile; then \
cp -p $$specfile $(distdir); \
fi \
done

View File

@ -0,0 +1,683 @@
# Makefile.in generated by automake 1.10.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
subdir = .
DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in $(srcdir)/config.h.in \
$(top_srcdir)/configure AUTHORS COPYING ChangeLog INSTALL NEWS \
config.guess config.sub depcomp install-sh ltmain.sh missing
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
configure.lineno config.status.lineno
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = config.h
CONFIG_CLEAN_FILES =
am__installdirs = "$(DESTDIR)$(mgizascriptsdir)" \
"$(DESTDIR)$(mgizadocdir)"
mgizascriptsSCRIPT_INSTALL = $(INSTALL_SCRIPT)
SCRIPTS = $(mgizascripts_SCRIPTS)
SOURCES =
DIST_SOURCES =
RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
html-recursive info-recursive install-data-recursive \
install-dvi-recursive install-exec-recursive \
install-html-recursive install-info-recursive \
install-pdf-recursive install-ps-recursive install-recursive \
installcheck-recursive installdirs-recursive pdf-recursive \
ps-recursive uninstall-recursive
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
*) f=$$p;; \
esac;
am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
mgizadocDATA_INSTALL = $(INSTALL_DATA)
DATA = $(mgizadoc_DATA)
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
ETAGS = etags
CTAGS = ctags
DIST_SUBDIRS = $(SUBDIRS)
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
am__remove_distdir = \
{ test ! -d $(distdir) \
|| { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \
&& rm -fr $(distdir); }; }
DIST_ARCHIVES = $(distdir).tar.gz
GZIP_ENV = --best
distuninstallcheck_listfiles = find . -type f -print
distcleancheck_listfiles = find . -type f -print
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CXX = @CXX@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LTLIBOBJS = @LTLIBOBJS@
MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MKDIR_P = @MKDIR_P@
OBJEXT = @OBJEXT@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build_alias = @build_alias@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host_alias = @host_alias@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
SUBDIRS = src
mgizadocdir = ${prefix}/doc/mgiza
mgizadoc_DATA = \
README\
COPYING\
AUTHORS\
ChangeLog\
INSTALL\
NEWS
mgizascriptsdir = ${prefix}/scripts/
mgizascripts_SCRIPTS = \
scripts/*
EXTRA_DIST = $(mgizadoc_DATA) \
${mgizascripts_SCRIPTS}
all: config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
.SUFFIXES:
am--refresh:
@:
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
echo ' cd $(srcdir) && $(AUTOMAKE) --gnu '; \
cd $(srcdir) && $(AUTOMAKE) --gnu \
&& exit 0; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
cd $(top_srcdir) && \
$(AUTOMAKE) --gnu Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
echo ' $(SHELL) ./config.status'; \
$(SHELL) ./config.status;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
$(SHELL) ./config.status --recheck
$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(srcdir) && $(AUTOCONF)
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
config.h: stamp-h1
@if test ! -f $@; then \
rm -f stamp-h1; \
$(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
else :; fi
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
@rm -f stamp-h1
cd $(top_builddir) && $(SHELL) ./config.status config.h
$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_srcdir) && $(AUTOHEADER)
rm -f stamp-h1
touch $@
distclean-hdr:
-rm -f config.h stamp-h1
install-mgizascriptsSCRIPTS: $(mgizascripts_SCRIPTS)
@$(NORMAL_INSTALL)
test -z "$(mgizascriptsdir)" || $(MKDIR_P) "$(DESTDIR)$(mgizascriptsdir)"
@list='$(mgizascripts_SCRIPTS)'; for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
if test -f $$d$$p; then \
f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
echo " $(mgizascriptsSCRIPT_INSTALL) '$$d$$p' '$(DESTDIR)$(mgizascriptsdir)/$$f'"; \
$(mgizascriptsSCRIPT_INSTALL) "$$d$$p" "$(DESTDIR)$(mgizascriptsdir)/$$f"; \
else :; fi; \
done
uninstall-mgizascriptsSCRIPTS:
@$(NORMAL_UNINSTALL)
@list='$(mgizascripts_SCRIPTS)'; for p in $$list; do \
f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \
echo " rm -f '$(DESTDIR)$(mgizascriptsdir)/$$f'"; \
rm -f "$(DESTDIR)$(mgizascriptsdir)/$$f"; \
done
install-mgizadocDATA: $(mgizadoc_DATA)
@$(NORMAL_INSTALL)
test -z "$(mgizadocdir)" || $(MKDIR_P) "$(DESTDIR)$(mgizadocdir)"
@list='$(mgizadoc_DATA)'; for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
f=$(am__strip_dir) \
echo " $(mgizadocDATA_INSTALL) '$$d$$p' '$(DESTDIR)$(mgizadocdir)/$$f'"; \
$(mgizadocDATA_INSTALL) "$$d$$p" "$(DESTDIR)$(mgizadocdir)/$$f"; \
done
uninstall-mgizadocDATA:
@$(NORMAL_UNINSTALL)
@list='$(mgizadoc_DATA)'; for p in $$list; do \
f=$(am__strip_dir) \
echo " rm -f '$(DESTDIR)$(mgizadocdir)/$$f'"; \
rm -f "$(DESTDIR)$(mgizadocdir)/$$f"; \
done
# This directory's subdirectories are mostly independent; you can cd
# into them and run `make' without going through this Makefile.
# To change the values of `make' variables: instead of editing Makefiles,
# (1) if the variable is set in `config.status', edit `config.status'
# (which will cause the Makefiles to be regenerated when you run `make');
# (2) otherwise, pass the desired values on the `make' command line.
$(RECURSIVE_TARGETS):
@failcom='exit 1'; \
for f in x $$MAKEFLAGS; do \
case $$f in \
*=* | --[!k]*);; \
*k*) failcom='fail=yes';; \
esac; \
done; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
list='$(SUBDIRS)'; for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
(cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
$(RECURSIVE_CLEAN_TARGETS):
@failcom='exit 1'; \
for f in x $$MAKEFLAGS; do \
case $$f in \
*=* | --[!k]*);; \
*k*) failcom='fail=yes';; \
esac; \
done; \
dot_seen=no; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
rev=''; for subdir in $$list; do \
if test "$$subdir" = "."; then :; else \
rev="$$subdir $$rev"; \
fi; \
done; \
rev="$$rev ."; \
target=`echo $@ | sed s/-recursive//`; \
for subdir in $$rev; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
(cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done && test -z "$$fail"
tags-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
done
ctags-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
done
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$tags $$unique; \
fi
ctags: CTAGS
CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
test -z "$(CTAGS_ARGS)$$tags$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$tags $$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& cd $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) $$here
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
$(am__remove_distdir)
test -d $(distdir) || mkdir $(distdir)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
fi; \
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
else \
test -f $(distdir)/$$file \
|| cp -p $$d/$$file $(distdir)/$$file \
|| exit 1; \
fi; \
done
list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
distdir=`$(am__cd) $(distdir) && pwd`; \
top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
(cd $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$top_distdir" \
distdir="$$distdir/$$subdir" \
am__remove_distdir=: \
am__skip_length_check=: \
distdir) \
|| exit 1; \
fi; \
done
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$(top_distdir)" distdir="$(distdir)" \
dist-hook
-find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
! -type d ! -perm -400 -exec chmod a+r {} \; -o \
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
|| chmod -R a+r $(distdir)
dist-gzip: distdir
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
$(am__remove_distdir)
dist-bzip2: distdir
tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
$(am__remove_distdir)
dist-lzma: distdir
tardir=$(distdir) && $(am__tar) | lzma -9 -c >$(distdir).tar.lzma
$(am__remove_distdir)
dist-tarZ: distdir
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
$(am__remove_distdir)
dist-shar: distdir
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
$(am__remove_distdir)
dist-zip: distdir
-rm -f $(distdir).zip
zip -rq $(distdir).zip $(distdir)
$(am__remove_distdir)
dist dist-all: distdir
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
$(am__remove_distdir)
# This target untars the dist file and tries a VPATH configuration. Then
# it guarantees that the distribution is self-contained by making another
# tarfile.
distcheck: dist
case '$(DIST_ARCHIVES)' in \
*.tar.gz*) \
GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\
*.tar.bz2*) \
bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lzma*) \
unlzma -c $(distdir).tar.lzma | $(am__untar) ;;\
*.tar.Z*) \
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
*.shar.gz*) \
GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\
*.zip*) \
unzip $(distdir).zip ;;\
esac
chmod -R a-w $(distdir); chmod a+w $(distdir)
mkdir $(distdir)/_build
mkdir $(distdir)/_inst
chmod a-w $(distdir)
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
&& cd $(distdir)/_build \
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
$(DISTCHECK_CONFIGURE_FLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
&& $(MAKE) $(AM_MAKEFLAGS) check \
&& $(MAKE) $(AM_MAKEFLAGS) install \
&& $(MAKE) $(AM_MAKEFLAGS) installcheck \
&& $(MAKE) $(AM_MAKEFLAGS) uninstall \
&& $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
distuninstallcheck \
&& chmod -R a-w "$$dc_install_base" \
&& ({ \
(cd ../.. && umask 077 && mkdir "$$dc_destdir") \
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
} || { rm -rf "$$dc_destdir"; exit 1; }) \
&& rm -rf "$$dc_destdir" \
&& $(MAKE) $(AM_MAKEFLAGS) dist \
&& rm -rf $(DIST_ARCHIVES) \
&& $(MAKE) $(AM_MAKEFLAGS) distcleancheck
$(am__remove_distdir)
@(echo "$(distdir) archives ready for distribution: "; \
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
distuninstallcheck:
@cd $(distuninstallcheck_dir) \
&& test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
|| { echo "ERROR: files left after uninstall:" ; \
if test -n "$(DESTDIR)"; then \
echo " (check DESTDIR support)"; \
fi ; \
$(distuninstallcheck_listfiles) ; \
exit 1; } >&2
distcleancheck: distclean
@if test '$(srcdir)' = . ; then \
echo "ERROR: distcleancheck can only run from a VPATH build" ; \
exit 1 ; \
fi
@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
|| { echo "ERROR: files left in build directory after distclean:" ; \
$(distcleancheck_listfiles) ; \
exit 1; } >&2
check-am: all-am
check: check-recursive
all-am: Makefile $(SCRIPTS) $(DATA) config.h
installdirs: installdirs-recursive
installdirs-am:
for dir in "$(DESTDIR)$(mgizascriptsdir)" "$(DESTDIR)$(mgizadocdir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-recursive
clean-am: clean-generic mostlyclean-am
distclean: distclean-recursive
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -f Makefile
distclean-am: clean-am distclean-generic distclean-hdr distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
info: info-recursive
info-am:
install-data-am: install-mgizadocDATA install-mgizascriptsSCRIPTS
install-dvi: install-dvi-recursive
install-exec-am:
install-html: install-html-recursive
install-info: install-info-recursive
install-man:
install-pdf: install-pdf-recursive
install-ps: install-ps-recursive
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -rf $(top_srcdir)/autom4te.cache
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-generic
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am: uninstall-mgizadocDATA uninstall-mgizascriptsSCRIPTS
.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
install-strip
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
all all-am am--refresh check check-am clean clean-generic \
ctags ctags-recursive dist dist-all dist-bzip2 dist-gzip \
dist-hook dist-lzma dist-shar dist-tarZ dist-zip distcheck \
distclean distclean-generic distclean-hdr distclean-tags \
distcleancheck distdir distuninstallcheck dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-mgizadocDATA \
install-mgizascriptsSCRIPTS install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs installdirs-am maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
uninstall-mgizadocDATA uninstall-mgizascriptsSCRIPTS
# Copy all the spec files. Of cource, only one is actually used.
dist-hook:
for specfile in *.spec; do \
if test -f $$specfile; then \
cp -p $$specfile $(distdir); \
fi \
done
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

View File

@ -0,0 +1,6 @@
<?xml version="1.0"?>
<gtodo>
<category title="Personal" place="0"/>
<category title="Business" place="1"/>
<category title="Unfiled" place="2"/>
</gtodo>

View File

@ -0,0 +1,932 @@
# generated automatically by aclocal 1.10.1 -*- Autoconf -*-
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
# 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
m4_if(AC_AUTOCONF_VERSION, [2.62],,
[m4_warning([this file was generated for autoconf 2.62.
You have another version of autoconf. It may work, but is not guaranteed to.
If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically `autoreconf'.])])
# Copyright (C) 2002, 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_AUTOMAKE_VERSION(VERSION)
# ----------------------------
# Automake X.Y traces this macro to ensure aclocal.m4 has been
# generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.10'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
m4_if([$1], [1.10.1], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
# _AM_AUTOCONF_VERSION(VERSION)
# -----------------------------
# aclocal traces this macro to find the Autoconf version.
# This is a private macro too. Using m4_define simplifies
# the logic in aclocal, which can simply ignore this definition.
m4_define([_AM_AUTOCONF_VERSION], [])
# AM_SET_CURRENT_AUTOMAKE_VERSION
# -------------------------------
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AC_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.10.1])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(AC_AUTOCONF_VERSION)])
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
#
# Of course, Automake must honor this variable whenever it calls a
# tool from the auxiliary directory. The problem is that $srcdir (and
# therefore $ac_aux_dir as well) can be either absolute or relative,
# depending on how configure is run. This is pretty annoying, since
# it makes $ac_aux_dir quite unusable in subdirectories: in the top
# source directory, any form will work fine, but in subdirectories a
# relative path needs to be adjusted first.
#
# $ac_aux_dir/missing
# fails when called from a subdirectory if $ac_aux_dir is relative
# $top_srcdir/$ac_aux_dir/missing
# fails if $ac_aux_dir is absolute,
# fails when called from a subdirectory in a VPATH build with
# a relative $ac_aux_dir
#
# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
# are both prefixed by $srcdir. In an in-source build this is usually
# harmless because $srcdir is `.', but things will broke when you
# start a VPATH build or use an absolute $srcdir.
#
# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
# iff we strip the leading $srcdir from $ac_aux_dir. That would be:
# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
# and then we would define $MISSING as
# MISSING="\${SHELL} $am_aux_dir/missing"
# This will work as long as MISSING is not called from configure, because
# unfortunately $(top_srcdir) has no meaning in configure.
# However there are other variables, like CC, which are often used in
# configure, and could therefore not use this "fixed" $ac_aux_dir.
#
# Another solution, used here, is to always expand $ac_aux_dir to an
# absolute PATH. The drawback is that using absolute paths prevent a
# configured tree to be moved without reconfiguration.
AC_DEFUN([AM_AUX_DIR_EXPAND],
[dnl Rely on autoconf to set up CDPATH properly.
AC_PREREQ([2.50])dnl
# expand $ac_aux_dir to an absolute path
am_aux_dir=`cd $ac_aux_dir && pwd`
])
# Copyright (C) 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 4
# This was merged into AC_PROG_CC in Autoconf.
AU_DEFUN([AM_PROG_CC_STDC],
[AC_PROG_CC
AC_DIAGNOSE([obsolete], [$0:
your code should no longer depend upon `am_cv_prog_cc_stdc', but upon
`ac_cv_prog_cc_stdc'. Remove this warning and the assignment when
you adjust the code. You can also remove the above call to
AC_PROG_CC if you already called it elsewhere.])
am_cv_prog_cc_stdc=$ac_cv_prog_cc_stdc
])
AU_DEFUN([fp_PROG_CC_STDC])
# AM_CONDITIONAL -*- Autoconf -*-
# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 8
# AM_CONDITIONAL(NAME, SHELL-CONDITION)
# -------------------------------------
# Define a conditional.
AC_DEFUN([AM_CONDITIONAL],
[AC_PREREQ(2.52)dnl
ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])],
[$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
AC_SUBST([$1_TRUE])dnl
AC_SUBST([$1_FALSE])dnl
_AM_SUBST_NOTMAKE([$1_TRUE])dnl
_AM_SUBST_NOTMAKE([$1_FALSE])dnl
if $2; then
$1_TRUE=
$1_FALSE='#'
else
$1_TRUE='#'
$1_FALSE=
fi
AC_CONFIG_COMMANDS_PRE(
[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
AC_MSG_ERROR([[conditional "$1" was never defined.
Usually this means the macro was only invoked conditionally.]])
fi])])
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 9
# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
# written in clear, in which case automake, when reading aclocal.m4,
# will think it sees a *use*, and therefore will trigger all it's
# C support machinery. Also note that it means that autoscan, seeing
# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
# _AM_DEPENDENCIES(NAME)
# ----------------------
# See how the compiler implements dependency checking.
# NAME is "CC", "CXX", "GCJ", or "OBJC".
# We try a few techniques and use that to set a single cache variable.
#
# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
# dependency, and given that the user is not expected to run this macro,
# just rely on AC_PROG_CC.
AC_DEFUN([_AM_DEPENDENCIES],
[AC_REQUIRE([AM_SET_DEPDIR])dnl
AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
AC_REQUIRE([AM_MAKE_INCLUDE])dnl
AC_REQUIRE([AM_DEP_TRACK])dnl
ifelse([$1], CC, [depcc="$CC" am_compiler_list=],
[$1], CXX, [depcc="$CXX" am_compiler_list=],
[$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
[$1], UPC, [depcc="$UPC" am_compiler_list=],
[$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'],
[depcc="$$1" am_compiler_list=])
AC_CACHE_CHECK([dependency style of $depcc],
[am_cv_$1_dependencies_compiler_type],
[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
# We make a subdir and do the tests there. Otherwise we can end up
# making bogus files that we don't know about and never remove. For
# instance it was reported that on HP-UX the gcc test will end up
# making a dummy file named `D' -- because `-MD' means `put the output
# in D'.
mkdir conftest.dir
# Copy depcomp to subdir because otherwise we won't find it if we're
# using a relative directory.
cp "$am_depcomp" conftest.dir
cd conftest.dir
# We will build objects and dependencies in a subdirectory because
# it helps to detect inapplicable dependency modes. For instance
# both Tru64's cc and ICC support -MD to output dependencies as a
# side effect of compilation, but ICC will put the dependencies in
# the current directory while Tru64 will put them in the object
# directory.
mkdir sub
am_cv_$1_dependencies_compiler_type=none
if test "$am_compiler_list" = ""; then
am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
fi
for depmode in $am_compiler_list; do
# Setup a source with many dependencies, because some compilers
# like to wrap large dependency lists on column 80 (with \), and
# we should not choose a depcomp mode which is confused by this.
#
# We need to recreate these files for each test, as the compiler may
# overwrite some of them when testing with obscure command lines.
# This happens at least with the AIX C compiler.
: > sub/conftest.c
for i in 1 2 3 4 5 6; do
echo '#include "conftst'$i'.h"' >> sub/conftest.c
# Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
# Solaris 8's {/usr,}/bin/sh.
touch sub/conftst$i.h
done
echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
case $depmode in
nosideeffect)
# after this tag, mechanisms are not by side-effect, so they'll
# only be used when explicitly requested
if test "x$enable_dependency_tracking" = xyes; then
continue
else
break
fi
;;
none) break ;;
esac
# We check with `-c' and `-o' for the sake of the "dashmstdout"
# mode. It turns out that the SunPro C++ compiler does not properly
# handle `-M -o', and we need to detect this.
if depmode=$depmode \
source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \
depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
$SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \
>/dev/null 2>conftest.err &&
grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 &&
${MAKE-make} -s -f confmf > /dev/null 2>&1; then
# icc doesn't choke on unknown options, it will just issue warnings
# or remarks (even with -Werror). So we grep stderr for any message
# that says an option was ignored or not supported.
# When given -MP, icc 7.0 and 7.1 complain thusly:
# icc: Command line warning: ignoring option '-M'; no argument required
# The diagnosis changed in icc 8.0:
# icc: Command line remark: option '-MP' not supported
if (grep 'ignoring option' conftest.err ||
grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
am_cv_$1_dependencies_compiler_type=$depmode
break
fi
fi
done
cd ..
rm -rf conftest.dir
else
am_cv_$1_dependencies_compiler_type=none
fi
])
AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
AM_CONDITIONAL([am__fastdep$1], [
test "x$enable_dependency_tracking" != xno \
&& test "$am_cv_$1_dependencies_compiler_type" = gcc3])
])
# AM_SET_DEPDIR
# -------------
# Choose a directory name for dependency files.
# This macro is AC_REQUIREd in _AM_DEPENDENCIES
AC_DEFUN([AM_SET_DEPDIR],
[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
])
# AM_DEP_TRACK
# ------------
AC_DEFUN([AM_DEP_TRACK],
[AC_ARG_ENABLE(dependency-tracking,
[ --disable-dependency-tracking speeds up one-time build
--enable-dependency-tracking do not reject slow dependency extractors])
if test "x$enable_dependency_tracking" != xno; then
am_depcomp="$ac_aux_dir/depcomp"
AMDEPBACKSLASH='\'
fi
AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
AC_SUBST([AMDEPBACKSLASH])dnl
_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
])
# Generate code to set up dependency tracking. -*- Autoconf -*-
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
#serial 3
# _AM_OUTPUT_DEPENDENCY_COMMANDS
# ------------------------------
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
[for mf in $CONFIG_FILES; do
# Strip MF so we end up with the name of the file.
mf=`echo "$mf" | sed -e 's/:.*$//'`
# Check whether this is an Automake generated Makefile or not.
# We used to match only the files named `Makefile.in', but
# some people rename them; so instead we look at the file content.
# Grep'ing the first line is not enough: some people post-process
# each Makefile.in and add a new line on top of each file to say so.
# Grep'ing the whole file is not good either: AIX grep has a line
# limit of 2048, but all sed's we know have understand at least 4000.
if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
dirpart=`AS_DIRNAME("$mf")`
else
continue
fi
# Extract the definition of DEPDIR, am__include, and am__quote
# from the Makefile without running `make'.
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
test -z "$DEPDIR" && continue
am__include=`sed -n 's/^am__include = //p' < "$mf"`
test -z "am__include" && continue
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
# When using ansi2knr, U may be empty or an underscore; expand it
U=`sed -n 's/^U = //p' < "$mf"`
# Find all dependency output files, they are included files with
# $(DEPDIR) in their names. We invoke sed twice because it is the
# simplest approach to changing $(DEPDIR) to its actual value in the
# expansion.
for file in `sed -n "
s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
# Make sure the directory exists.
test -f "$dirpart/$file" && continue
fdir=`AS_DIRNAME(["$file"])`
AS_MKDIR_P([$dirpart/$fdir])
# echo "creating $dirpart/$file"
echo '# dummy' > "$dirpart/$file"
done
done
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
# AM_OUTPUT_DEPENDENCY_COMMANDS
# -----------------------------
# This macro should only be invoked once -- use via AC_REQUIRE.
#
# This code is only required when automatic dependency tracking
# is enabled. FIXME. This creates each `.P' file that we will
# need in order to bootstrap the dependency handling code.
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
[AC_CONFIG_COMMANDS([depfiles],
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
[AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
])
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 8
# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS.
AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
# Do all the work for Automake. -*- Autoconf -*-
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
# 2005, 2006, 2008 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 13
# This macro actually does too much. Some checks are only needed if
# your package does certain things. But this isn't really a big deal.
# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
# AM_INIT_AUTOMAKE([OPTIONS])
# -----------------------------------------------
# The call with PACKAGE and VERSION arguments is the old style
# call (pre autoconf-2.50), which is being phased out. PACKAGE
# and VERSION should now be passed to AC_INIT and removed from
# the call to AM_INIT_AUTOMAKE.
# We support both call styles for the transition. After
# the next Automake release, Autoconf can make the AC_INIT
# arguments mandatory, and then we can depend on a new Autoconf
# release and drop the old call support.
AC_DEFUN([AM_INIT_AUTOMAKE],
[AC_PREREQ([2.60])dnl
dnl Autoconf wants to disallow AM_ names. We explicitly allow
dnl the ones we care about.
m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
AC_REQUIRE([AC_PROG_INSTALL])dnl
if test "`cd $srcdir && pwd`" != "`pwd`"; then
# Use -I$(srcdir) only when $(srcdir) != ., so that make's output
# is not polluted with repeated "-I."
AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
# test to see if srcdir already configured
if test -f $srcdir/config.status; then
AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
fi
fi
# test whether we have cygpath
if test -z "$CYGPATH_W"; then
if (cygpath --version) >/dev/null 2>/dev/null; then
CYGPATH_W='cygpath -w'
else
CYGPATH_W=echo
fi
fi
AC_SUBST([CYGPATH_W])
# Define the identity of the package.
dnl Distinguish between old-style and new-style calls.
m4_ifval([$2],
[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
AC_SUBST([PACKAGE], [$1])dnl
AC_SUBST([VERSION], [$2])],
[_AM_SET_OPTIONS([$1])dnl
dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,,
[m4_fatal([AC_INIT should be called with package and version arguments])])dnl
AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
_AM_IF_OPTION([no-define],,
[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
# Some tools Automake needs.
AC_REQUIRE([AM_SANITY_CHECK])dnl
AC_REQUIRE([AC_ARG_PROGRAM])dnl
AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
AM_MISSING_PROG(AUTOCONF, autoconf)
AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
AM_MISSING_PROG(AUTOHEADER, autoheader)
AM_MISSING_PROG(MAKEINFO, makeinfo)
AM_PROG_INSTALL_SH
AM_PROG_INSTALL_STRIP
AC_REQUIRE([AM_PROG_MKDIR_P])dnl
# We need awk for the "check" target. The system "awk" is bad on
# some platforms.
AC_REQUIRE([AC_PROG_AWK])dnl
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
[_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
[_AM_PROG_TAR([v7])])])
_AM_IF_OPTION([no-dependencies],,
[AC_PROVIDE_IFELSE([AC_PROG_CC],
[_AM_DEPENDENCIES(CC)],
[define([AC_PROG_CC],
defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
AC_PROVIDE_IFELSE([AC_PROG_CXX],
[_AM_DEPENDENCIES(CXX)],
[define([AC_PROG_CXX],
defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
AC_PROVIDE_IFELSE([AC_PROG_OBJC],
[_AM_DEPENDENCIES(OBJC)],
[define([AC_PROG_OBJC],
defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl
])
])
# When config.status generates a header, we must update the stamp-h file.
# This file resides in the same directory as the config header
# that is generated. The stamp files are numbered to have different names.
# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
# loop where config.status creates the headers, so we can generate
# our stamp files there.
AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
[# Compute $1's index in $config_headers.
_am_arg=$1
_am_stamp_count=1
for _am_header in $config_headers :; do
case $_am_header in
$_am_arg | $_am_arg:* )
break ;;
* )
_am_stamp_count=`expr $_am_stamp_count + 1` ;;
esac
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_PROG_INSTALL_SH
# ------------------
# Define $install_sh.
AC_DEFUN([AM_PROG_INSTALL_SH],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"}
AC_SUBST(install_sh)])
# Copyright (C) 2003, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 2
# Check whether the underlying file-system supports filenames
# with a leading dot. For instance MS-DOS doesn't.
AC_DEFUN([AM_SET_LEADING_DOT],
[rm -rf .tst 2>/dev/null
mkdir .tst 2>/dev/null
if test -d .tst; then
am__leading_dot=.
else
am__leading_dot=_
fi
rmdir .tst 2>/dev/null
AC_SUBST([am__leading_dot])])
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
# From Jim Meyering
# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 4
AC_DEFUN([AM_MAINTAINER_MODE],
[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
dnl maintainer-mode is disabled by default
AC_ARG_ENABLE(maintainer-mode,
[ --enable-maintainer-mode enable make rules and dependencies not useful
(and sometimes confusing) to the casual installer],
USE_MAINTAINER_MODE=$enableval,
USE_MAINTAINER_MODE=no)
AC_MSG_RESULT([$USE_MAINTAINER_MODE])
AM_CONDITIONAL(MAINTAINER_MODE, [test $USE_MAINTAINER_MODE = yes])
MAINT=$MAINTAINER_MODE_TRUE
AC_SUBST(MAINT)dnl
]
)
AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE])
# Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 3
# AM_MAKE_INCLUDE()
# -----------------
# Check to see how make treats includes.
AC_DEFUN([AM_MAKE_INCLUDE],
[am_make=${MAKE-make}
cat > confinc << 'END'
am__doit:
@echo done
.PHONY: am__doit
END
# If we don't find an include directive, just comment out the code.
AC_MSG_CHECKING([for style of include used by $am_make])
am__include="#"
am__quote=
_am_result=none
# First try GNU make style include.
echo "include confinc" > confmf
# We grep out `Entering directory' and `Leaving directory'
# messages which can occur if `w' ends up in MAKEFLAGS.
# In particular we don't look at `^make:' because GNU make might
# be invoked under some other name (usually "gmake"), in which
# case it prints its new name instead of `make'.
if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then
am__include=include
am__quote=
_am_result=GNU
fi
# Now try BSD make style include.
if test "$am__include" = "#"; then
echo '.include "confinc"' > confmf
if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then
am__include=.include
am__quote="\""
_am_result=BSD
fi
fi
AC_SUBST([am__include])
AC_SUBST([am__quote])
AC_MSG_RESULT([$_am_result])
rm -f confinc confmf
])
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 5
# AM_MISSING_PROG(NAME, PROGRAM)
# ------------------------------
AC_DEFUN([AM_MISSING_PROG],
[AC_REQUIRE([AM_MISSING_HAS_RUN])
$1=${$1-"${am_missing_run}$2"}
AC_SUBST($1)])
# AM_MISSING_HAS_RUN
# ------------------
# Define MISSING if not defined so far and test if it supports --run.
# If it does, set am_missing_run to use it, otherwise, to nothing.
AC_DEFUN([AM_MISSING_HAS_RUN],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
AC_REQUIRE_AUX_FILE([missing])dnl
test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing"
# Use eval to expand $SHELL
if eval "$MISSING --run true"; then
am_missing_run="$MISSING --run "
else
am_missing_run=
AC_MSG_WARN([`missing' script is too old or missing])
fi
])
# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_PROG_MKDIR_P
# ---------------
# Check for `mkdir -p'.
AC_DEFUN([AM_PROG_MKDIR_P],
[AC_PREREQ([2.60])dnl
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
dnl while keeping a definition of mkdir_p for backward compatibility.
dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
dnl Makefile.ins that do not define MKDIR_P, so we do our own
dnl adjustment using top_builddir (which is defined more often than
dnl MKDIR_P).
AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
case $mkdir_p in
[[\\/$]]* | ?:[[\\/]]*) ;;
*/*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
esac
])
# Helper functions for option handling. -*- Autoconf -*-
# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 3
# _AM_MANGLE_OPTION(NAME)
# -----------------------
AC_DEFUN([_AM_MANGLE_OPTION],
[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
# _AM_SET_OPTION(NAME)
# ------------------------------
# Set option NAME. Presently that only means defining a flag for this option.
AC_DEFUN([_AM_SET_OPTION],
[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
# _AM_SET_OPTIONS(OPTIONS)
# ----------------------------------
# OPTIONS is a space-separated list of Automake options.
AC_DEFUN([_AM_SET_OPTIONS],
[AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
# -------------------------------------------
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
# Check to make sure that the build environment is sane. -*- Autoconf -*-
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
# Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 4
# AM_SANITY_CHECK
# ---------------
AC_DEFUN([AM_SANITY_CHECK],
[AC_MSG_CHECKING([whether build environment is sane])
# Just in case
sleep 1
echo timestamp > conftest.file
# Do `set' in a subshell so we don't clobber the current shell's
# arguments. Must try -L first in case configure is actually a
# symlink; some systems play weird games with the mod time of symlinks
# (eg FreeBSD returns the mod time of the symlink's containing
# directory).
if (
set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null`
if test "$[*]" = "X"; then
# -L didn't work.
set X `ls -t $srcdir/configure conftest.file`
fi
rm -f conftest.file
if test "$[*]" != "X $srcdir/configure conftest.file" \
&& test "$[*]" != "X conftest.file $srcdir/configure"; then
# If neither matched, then we have a broken ls. This can happen
# if, for instance, CONFIG_SHELL is bash and it inherits a
# broken ls alias from the environment. This has actually
# happened. Such a system could not be considered "sane".
AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
alias in your environment])
fi
test "$[2]" = conftest.file
)
then
# Ok.
:
else
AC_MSG_ERROR([newly created file is older than distributed files!
Check your system clock])
fi
AC_MSG_RESULT(yes)])
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_PROG_INSTALL_STRIP
# ---------------------
# One issue with vendor `install' (even GNU) is that you can't
# specify the program used to strip binaries. This is especially
# annoying in cross-compiling environments, where the build's strip
# is unlikely to handle the host's binaries.
# Fortunately install-sh will honor a STRIPPROG variable, so we
# always use install-sh in `make install-strip', and initialize
# STRIPPROG with the value of the STRIP variable (set by the user).
AC_DEFUN([AM_PROG_INSTALL_STRIP],
[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
# Installed binaries are usually stripped using `strip' when the user
# run `make install-strip'. However `strip' might not be the right
# tool to use in cross-compilation environments, therefore Automake
# will honor the `STRIP' environment variable to overrule this program.
dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
if test "$cross_compiling" != no; then
AC_CHECK_TOOL([STRIP], [strip], :)
fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
# Copyright (C) 2006 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# _AM_SUBST_NOTMAKE(VARIABLE)
# ---------------------------
# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
# This macro is traced by Automake.
AC_DEFUN([_AM_SUBST_NOTMAKE])
# Check how to create a tarball. -*- Autoconf -*-
# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# serial 2
# _AM_PROG_TAR(FORMAT)
# --------------------
# Check how to create a tarball in format FORMAT.
# FORMAT should be one of `v7', `ustar', or `pax'.
#
# Substitute a variable $(am__tar) that is a command
# writing to stdout a FORMAT-tarball containing the directory
# $tardir.
# tardir=directory && $(am__tar) > result.tar
#
# Substitute a variable $(am__untar) that extract such
# a tarball read from stdin.
# $(am__untar) < result.tar
AC_DEFUN([_AM_PROG_TAR],
[# Always define AMTAR for backward compatibility.
AM_MISSING_PROG([AMTAR], [tar])
m4_if([$1], [v7],
[am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
[m4_case([$1], [ustar],, [pax],,
[m4_fatal([Unknown tar format])])
AC_MSG_CHECKING([how to create a $1 tar archive])
# Loop over all known methods to create a tar archive until one works.
_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
_am_tools=${am_cv_prog_tar_$1-$_am_tools}
# Do not fold the above two line into one, because Tru64 sh and
# Solaris sh will not grok spaces in the rhs of `-'.
for _am_tool in $_am_tools
do
case $_am_tool in
gnutar)
for _am_tar in tar gnutar gtar;
do
AM_RUN_LOG([$_am_tar --version]) && break
done
am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
am__untar="$_am_tar -xf -"
;;
plaintar)
# Must skip GNU tar: if it does not support --format= it doesn't create
# ustar tarball either.
(tar --version) >/dev/null 2>&1 && continue
am__tar='tar chf - "$$tardir"'
am__tar_='tar chf - "$tardir"'
am__untar='tar xf -'
;;
pax)
am__tar='pax -L -x $1 -w "$$tardir"'
am__tar_='pax -L -x $1 -w "$tardir"'
am__untar='pax -r'
;;
cpio)
am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
am__untar='cpio -i -H $1 -d'
;;
none)
am__tar=false
am__tar_=false
am__untar=false
;;
esac
# If the value was cached, stop now. We just wanted to have am__tar
# and am__untar set.
test -n "${am_cv_prog_tar_$1}" && break
# tar/untar a dummy directory, and stop if the command works
rm -rf conftest.dir
mkdir conftest.dir
echo GrepMe > conftest.dir/file
AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
rm -rf conftest.dir
if test -s conftest.tar; then
AM_RUN_LOG([$am__untar <conftest.tar])
grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
fi
done
rm -rf conftest.dir
AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
AC_MSG_RESULT([$am_cv_prog_tar_$1])])
AC_SUBST([am__tar])
AC_SUBST([am__untar])
]) # _AM_PROG_TAR

View File

@ -0,0 +1,159 @@
#!/bin/sh
# Run this to generate all the initial makefiles, etc.
srcdir=`dirname $0`
test -z "$srcdir" && srcdir=.
DIE=0
if [ -n "$GNOME2_DIR" ]; then
ACLOCAL_FLAGS="-I $GNOME2_DIR/share/aclocal $ACLOCAL_FLAGS"
LD_LIBRARY_PATH="$GNOME2_DIR/lib:$LD_LIBRARY_PATH"
PATH="$GNOME2_DIR/bin:$PATH"
export PATH
export LD_LIBRARY_PATH
fi
(test -f $srcdir/configure.ac) || {
echo -n "**Error**: Directory "\`$srcdir\'" does not look like the"
echo " top-level package directory"
exit 1
}
(autoconf --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "**Error**: You must have \`autoconf' installed."
echo "Download the appropriate package for your distribution,"
echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
DIE=1
}
(grep "^IT_PROG_INTLTOOL" $srcdir/configure.ac >/dev/null) && {
(intltoolize --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "**Error**: You must have \`intltool' installed."
echo "You can get it from:"
echo " ftp://ftp.gnome.org/pub/GNOME/"
DIE=1
}
}
(grep "^AM_PROG_XML_I18N_TOOLS" $srcdir/configure.ac >/dev/null) && {
(xml-i18n-toolize --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "**Error**: You must have \`xml-i18n-toolize' installed."
echo "You can get it from:"
echo " ftp://ftp.gnome.org/pub/GNOME/"
DIE=1
}
}
(grep "^AM_PROG_LIBTOOL" $srcdir/configure.ac >/dev/null) && {
(libtool --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "**Error**: You must have \`libtool' installed."
echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
DIE=1
}
}
(grep "^AM_GLIB_GNU_GETTEXT" $srcdir/configure.ac >/dev/null) && {
(grep "sed.*POTFILES" $srcdir/configure.ac) > /dev/null || \
(glib-gettextize --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "**Error**: You must have \`glib' installed."
echo "You can get it from: ftp://ftp.gtk.org/pub/gtk"
DIE=1
}
}
(automake --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "**Error**: You must have \`automake' installed."
echo "You can get it from: ftp://ftp.gnu.org/pub/gnu/"
DIE=1
NO_AUTOMAKE=yes
}
# if no automake, don't bother testing for aclocal
test -n "$NO_AUTOMAKE" || (aclocal --version) < /dev/null > /dev/null 2>&1 || {
echo
echo "**Error**: Missing \`aclocal'. The version of \`automake'"
echo "installed doesn't appear recent enough."
echo "You can get automake from ftp://ftp.gnu.org/pub/gnu/"
DIE=1
}
if test "$DIE" -eq 1; then
exit 1
fi
if test -z "$*"; then
echo "**Warning**: I am going to run \`configure' with no arguments."
echo "If you wish to pass any to it, please specify them on the"
echo \`$0\'" command line."
echo
fi
case $CC in
xlc )
am_opt=--include-deps;;
esac
for coin in `find $srcdir -path $srcdir/CVS -prune -o -name configure.ac -print`
do
dr=`dirname $coin`
if test -f $dr/NO-AUTO-GEN; then
echo skipping $dr -- flagged as no auto-gen
else
echo processing $dr
( cd $dr
aclocalinclude="$ACLOCAL_FLAGS"
if grep "^AM_GLIB_GNU_GETTEXT" configure.ac >/dev/null; then
echo "Creating $dr/aclocal.m4 ..."
test -r $dr/aclocal.m4 || touch $dr/aclocal.m4
echo "Running glib-gettextize... Ignore non-fatal messages."
echo "no" | glib-gettextize --force --copy
echo "Making $dr/aclocal.m4 writable ..."
test -r $dr/aclocal.m4 && chmod u+w $dr/aclocal.m4
fi
if grep "^IT_PROG_INTLTOOL" configure.ac >/dev/null; then
echo "Running intltoolize..."
intltoolize --copy --force --automake
fi
if grep "^AM_PROG_XML_I18N_TOOLS" configure.ac >/dev/null; then
echo "Running xml-i18n-toolize..."
xml-i18n-toolize --copy --force --automake
fi
if grep "^AM_PROG_LIBTOOL" configure.ac >/dev/null; then
if test -z "$NO_LIBTOOLIZE" ; then
echo "Running libtoolize..."
libtoolize --force --copy
fi
fi
echo "Running aclocal $aclocalinclude ..."
aclocal $aclocalinclude
if grep "^AM_CONFIG_HEADER" configure.ac >/dev/null; then
echo "Running autoheader..."
autoheader
fi
echo "Running automake --gnu $am_opt ..."
automake --add-missing --gnu $am_opt
echo "Running autoconf ..."
autoconf
)
fi
done
conf_flags="--enable-maintainer-mode"
if test x$NOCONFIGURE = x; then
echo Running $srcdir/configure $conf_flags "$@" ...
$srcdir/configure $conf_flags "$@" \
&& echo Now type \`make\' to compile. || exit 1
else
echo Skipping configure process.
fi

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,26 @@
/* config.h. Generated from config.h.in by configure. */
/* config.h.in. Generated from configure.ac by autoheader. */
/* Name of package */
#define PACKAGE "mgiza"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME "mgiza"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "mgiza 1.0"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "mgiza"
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.0"
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Version number of package */
#define VERSION "1.0"

View File

@ -0,0 +1,25 @@
/* config.h.in. Generated from configure.ac by autoheader. */
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Version number of package */
#undef VERSION

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
dnl Process this file with autoconf to produce a configure script.
dnl Created by Anjuta application wizard.
AC_INIT(mgiza, 1.0)
AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
AM_CONFIG_HEADER(config.h)
AM_MAINTAINER_MODE
AC_ISC_POSIX
AC_PROG_CXX
AM_PROG_CC_STDC
AC_HEADER_STDC
AC_PROG_RANLIB
AM_PROG_LIBTOOL
AC_PROG_LIBTOOL
AC_OUTPUT([
Makefile
src/Makefile
src/mkcls/Makefile
])

View File

@ -0,0 +1,519 @@
#!/bin/sh
# install - install a program, script, or datafile
scriptversion=2006-12-25.00
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
# following copyright and license.
#
# Copyright (C) 1994 X Consortium
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name of the X Consortium shall not
# be used in advertising or otherwise to promote the sale, use or other deal-
# ings in this Software without prior written authorization from the X Consor-
# tium.
#
#
# FSF changes to this file are in the public domain.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# `make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch.
nl='
'
IFS=" "" $nl"
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit=${DOITPROG-}
if test -z "$doit"; then
doit_exec=exec
else
doit_exec=$doit
fi
# Put in absolute file names if you don't have them in your path;
# or use environment vars.
chgrpprog=${CHGRPPROG-chgrp}
chmodprog=${CHMODPROG-chmod}
chownprog=${CHOWNPROG-chown}
cmpprog=${CMPPROG-cmp}
cpprog=${CPPROG-cp}
mkdirprog=${MKDIRPROG-mkdir}
mvprog=${MVPROG-mv}
rmprog=${RMPROG-rm}
stripprog=${STRIPPROG-strip}
posix_glob='?'
initialize_posix_glob='
test "$posix_glob" != "?" || {
if (set -f) 2>/dev/null; then
posix_glob=
else
posix_glob=:
fi
}
'
posix_mkdir=
# Desired mode of installed file.
mode=0755
chgrpcmd=
chmodcmd=$chmodprog
chowncmd=
mvcmd=$mvprog
rmcmd="$rmprog -f"
stripcmd=
src=
dst=
dir_arg=
dst_arg=
copy_on_change=false
no_target_directory=
usage="\
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
or: $0 [OPTION]... SRCFILES... DIRECTORY
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
or: $0 [OPTION]... -d DIRECTORIES...
In the 1st form, copy SRCFILE to DSTFILE.
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
In the 4th, create DIRECTORIES.
Options:
--help display this help and exit.
--version display version info and exit.
-c (ignored)
-C install only if different (preserve the last data modification time)
-d create directories instead of installing files.
-g GROUP $chgrpprog installed files to GROUP.
-m MODE $chmodprog installed files to MODE.
-o USER $chownprog installed files to USER.
-s $stripprog installed files.
-t DIRECTORY install into DIRECTORY.
-T report an error if DSTFILE is a directory.
Environment variables override the default commands:
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
RMPROG STRIPPROG
"
while test $# -ne 0; do
case $1 in
-c) ;;
-C) copy_on_change=true;;
-d) dir_arg=true;;
-g) chgrpcmd="$chgrpprog $2"
shift;;
--help) echo "$usage"; exit $?;;
-m) mode=$2
case $mode in
*' '* | *' '* | *'
'* | *'*'* | *'?'* | *'['*)
echo "$0: invalid mode: $mode" >&2
exit 1;;
esac
shift;;
-o) chowncmd="$chownprog $2"
shift;;
-s) stripcmd=$stripprog;;
-t) dst_arg=$2
shift;;
-T) no_target_directory=true;;
--version) echo "$0 $scriptversion"; exit $?;;
--) shift
break;;
-*) echo "$0: invalid option: $1" >&2
exit 1;;
*) break;;
esac
shift
done
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
# When -d is used, all remaining arguments are directories to create.
# When -t is used, the destination is already specified.
# Otherwise, the last argument is the destination. Remove it from $@.
for arg
do
if test -n "$dst_arg"; then
# $@ is not empty: it contains at least $arg.
set fnord "$@" "$dst_arg"
shift # fnord
fi
shift # arg
dst_arg=$arg
done
fi
if test $# -eq 0; then
if test -z "$dir_arg"; then
echo "$0: no input file specified." >&2
exit 1
fi
# It's OK to call `install-sh -d' without argument.
# This can happen when creating conditional directories.
exit 0
fi
if test -z "$dir_arg"; then
trap '(exit $?); exit' 1 2 13 15
# Set umask so as not to create temps with too-generous modes.
# However, 'strip' requires both read and write access to temps.
case $mode in
# Optimize common cases.
*644) cp_umask=133;;
*755) cp_umask=22;;
*[0-7])
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw='% 200'
fi
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
*)
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw=,u+rw
fi
cp_umask=$mode$u_plus_rw;;
esac
fi
for src
do
# Protect names starting with `-'.
case $src in
-*) src=./$src;;
esac
if test -n "$dir_arg"; then
dst=$src
dstdir=$dst
test -d "$dstdir"
dstdir_status=$?
else
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if test ! -f "$src" && test ! -d "$src"; then
echo "$0: $src does not exist." >&2
exit 1
fi
if test -z "$dst_arg"; then
echo "$0: no destination specified." >&2
exit 1
fi
dst=$dst_arg
# Protect names starting with `-'.
case $dst in
-*) dst=./$dst;;
esac
# If destination is a directory, append the input filename; won't work
# if double slashes aren't ignored.
if test -d "$dst"; then
if test -n "$no_target_directory"; then
echo "$0: $dst_arg: Is a directory" >&2
exit 1
fi
dstdir=$dst
dst=$dstdir/`basename "$src"`
dstdir_status=0
else
# Prefer dirname, but fall back on a substitute if dirname fails.
dstdir=`
(dirname "$dst") 2>/dev/null ||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
X"$dst" : 'X\(//\)[^/]' \| \
X"$dst" : 'X\(//\)$' \| \
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
echo X"$dst" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
s//\1/
q
}
/^X\(\/\/\)[^/].*/{
s//\1/
q
}
/^X\(\/\/\)$/{
s//\1/
q
}
/^X\(\/\).*/{
s//\1/
q
}
s/.*/./; q'
`
test -d "$dstdir"
dstdir_status=$?
fi
fi
obsolete_mkdir_used=false
if test $dstdir_status != 0; then
case $posix_mkdir in
'')
# Create intermediate dirs using mode 755 as modified by the umask.
# This is like FreeBSD 'install' as of 1997-10-28.
umask=`umask`
case $stripcmd.$umask in
# Optimize common cases.
*[2367][2367]) mkdir_umask=$umask;;
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
*[0-7])
mkdir_umask=`expr $umask + 22 \
- $umask % 100 % 40 + $umask % 20 \
- $umask % 10 % 4 + $umask % 2
`;;
*) mkdir_umask=$umask,go-w;;
esac
# With -d, create the new directory with the user-specified mode.
# Otherwise, rely on $mkdir_umask.
if test -n "$dir_arg"; then
mkdir_mode=-m$mode
else
mkdir_mode=
fi
posix_mkdir=false
case $umask in
*[123567][0-7][0-7])
# POSIX mkdir -p sets u+wx bits regardless of umask, which
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
;;
*)
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
if (umask $mkdir_umask &&
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
then
if test -z "$dir_arg" || {
# Check for POSIX incompatibilities with -m.
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
# other-writeable bit of parent directory when it shouldn't.
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
ls_ld_tmpdir=`ls -ld "$tmpdir"`
case $ls_ld_tmpdir in
d????-?r-*) different_mode=700;;
d????-?--*) different_mode=755;;
*) false;;
esac &&
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
}
}
then posix_mkdir=:
fi
rmdir "$tmpdir/d" "$tmpdir"
else
# Remove any dirs left behind by ancient mkdir implementations.
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
fi
trap '' 0;;
esac;;
esac
if
$posix_mkdir && (
umask $mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
)
then :
else
# The umask is ridiculous, or mkdir does not conform to POSIX,
# or it failed possibly due to a race condition. Create the
# directory the slow way, step by step, checking for races as we go.
case $dstdir in
/*) prefix='/';;
-*) prefix='./';;
*) prefix='';;
esac
eval "$initialize_posix_glob"
oIFS=$IFS
IFS=/
$posix_glob set -f
set fnord $dstdir
shift
$posix_glob set +f
IFS=$oIFS
prefixes=
for d
do
test -z "$d" && continue
prefix=$prefix$d
if test -d "$prefix"; then
prefixes=
else
if $posix_mkdir; then
(umask=$mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
# Don't fail if two instances are running concurrently.
test -d "$prefix" || exit 1
else
case $prefix in
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
*) qprefix=$prefix;;
esac
prefixes="$prefixes '$qprefix'"
fi
fi
prefix=$prefix/
done
if test -n "$prefixes"; then
# Don't fail if two instances are running concurrently.
(umask $mkdir_umask &&
eval "\$doit_exec \$mkdirprog $prefixes") ||
test -d "$dstdir" || exit 1
obsolete_mkdir_used=true
fi
fi
fi
if test -n "$dir_arg"; then
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
else
# Make a couple of temp file names in the proper directory.
dsttmp=$dstdir/_inst.$$_
rmtmp=$dstdir/_rm.$$_
# Trap to clean up those temp files at exit.
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
# Copy the file name to the temp name.
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
# and set any options; do chmod last to preserve setuid bits.
#
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $cpprog $src $dsttmp" command.
#
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
# If -C, don't bother to copy if it wouldn't change the file.
if $copy_on_change &&
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
eval "$initialize_posix_glob" &&
$posix_glob set -f &&
set X $old && old=:$2:$4:$5:$6 &&
set X $new && new=:$2:$4:$5:$6 &&
$posix_glob set +f &&
test "$old" = "$new" &&
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
then
rm -f "$dsttmp"
else
# Rename the file to the real destination.
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
# The rename failed, perhaps because mv can't rename something else
# to itself, or perhaps because mv is so ancient that it does not
# support -f.
{
# Now remove or move aside any old file at destination location.
# We try this two ways since rm can't unlink itself on some
# systems and the destination file might be busy for other
# reasons. In this case, the final cleanup might fail but the new
# file should still install successfully.
{
test ! -f "$dst" ||
$doit $rmcmd -f "$dst" 2>/dev/null ||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
} ||
{ echo "$0: cannot unlink or rename $dst" >&2
(exit 1); exit 1
}
} &&
# Now rename the file to the real destination.
$doit $mvcmd "$dsttmp" "$dst"
}
fi || exit 1
trap '' 0
fi
done
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-end: "$"
# End:

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,37 @@
<?xml version="1.0"?>
<anjuta>
<plugin name="GBF Project Manager"
url="http://anjuta.org/plugins/"
mandatory="yes">
<require group="Anjuta Plugin"
attribute="Interfaces"
value="IAnjutaProjectManager"/>
<require group="Project"
attribute="Supported-Project-Types"
value="automake"/>
</plugin>
<plugin name="Make Build System"
url="http://anjuta.org/plugins/"
mandatory="yes">
<require group="Anjuta Plugin"
attribute="Interfaces"
value="IAnjutaBuildable"/>
<require group="Build"
attribute="Supported-Build-Types"
value="make"/>
</plugin>
<plugin name="Task Manager"
url="http://anjuta.org/plugins/"
mandatory="no">
<require group="Anjuta Plugin"
attribute="Interfaces"
value="IAnjutaTodo"/>
</plugin>
<plugin name="Debug Manager"
url="http://anjuta.org/plugins/"
mandatory="no">
<require group="Anjuta Plugin"
attribute="Interfaces"
value="IAnjutaDebuggerManager"/>
</plugin>
</anjuta>

View File

@ -0,0 +1,367 @@
#! /bin/sh
# Common stub for a few missing GNU programs while installing.
scriptversion=2006-05-10.23
# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006
# Free Software Foundation, Inc.
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
if test $# -eq 0; then
echo 1>&2 "Try \`$0 --help' for more information"
exit 1
fi
run=:
sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
# In the cases where this matters, `missing' is being run in the
# srcdir already.
if test -f configure.ac; then
configure_ac=configure.ac
else
configure_ac=configure.in
fi
msg="missing on your system"
case $1 in
--run)
# Try to run requested program, and just exit if it succeeds.
run=
shift
"$@" && exit 0
# Exit code 63 means version mismatch. This often happens
# when the user try to use an ancient version of a tool on
# a file that requires a minimum version. In this case we
# we should proceed has if the program had been absent, or
# if --run hadn't been passed.
if test $? = 63; then
run=:
msg="probably too old"
fi
;;
-h|--h|--he|--hel|--help)
echo "\
$0 [OPTION]... PROGRAM [ARGUMENT]...
Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
error status if there is no known handling for PROGRAM.
Options:
-h, --help display this help and exit
-v, --version output version information and exit
--run try to run the given command, and emulate it if it fails
Supported PROGRAM values:
aclocal touch file \`aclocal.m4'
autoconf touch file \`configure'
autoheader touch file \`config.h.in'
autom4te touch the output file, or create a stub one
automake touch all \`Makefile.in' files
bison create \`y.tab.[ch]', if possible, from existing .[ch]
flex create \`lex.yy.c', if possible, from existing .c
help2man touch the output file
lex create \`lex.yy.c', if possible, from existing .c
makeinfo touch the output file
tar try tar, gnutar, gtar, then tar without non-portable flags
yacc create \`y.tab.[ch]', if possible, from existing .[ch]
Send bug reports to <bug-automake@gnu.org>."
exit $?
;;
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
echo "missing $scriptversion (GNU Automake)"
exit $?
;;
-*)
echo 1>&2 "$0: Unknown \`$1' option"
echo 1>&2 "Try \`$0 --help' for more information"
exit 1
;;
esac
# Now exit if we have it, but it failed. Also exit now if we
# don't have it and --version was passed (most likely to detect
# the program).
case $1 in
lex|yacc)
# Not GNU programs, they don't have --version.
;;
tar)
if test -n "$run"; then
echo 1>&2 "ERROR: \`tar' requires --run"
exit 1
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
exit 1
fi
;;
*)
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
# We have it, but it failed.
exit 1
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
# Could not run --version or --help. This is probably someone
# running `$TOOL --version' or `$TOOL --help' to check whether
# $TOOL exists and not knowing $TOOL uses missing.
exit 1
fi
;;
esac
# If it does not exist, or fails to run (possibly an outdated version),
# try to emulate it.
case $1 in
aclocal*)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified \`acinclude.m4' or \`${configure_ac}'. You might want
to install the \`Automake' and \`Perl' packages. Grab them from
any GNU archive site."
touch aclocal.m4
;;
autoconf)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified \`${configure_ac}'. You might want to install the
\`Autoconf' and \`GNU m4' packages. Grab them from any GNU
archive site."
touch configure
;;
autoheader)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified \`acconfig.h' or \`${configure_ac}'. You might want
to install the \`Autoconf' and \`GNU m4' packages. Grab them
from any GNU archive site."
files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
test -z "$files" && files="config.h"
touch_files=
for f in $files; do
case $f in
*:*) touch_files="$touch_files "`echo "$f" |
sed -e 's/^[^:]*://' -e 's/:.*//'`;;
*) touch_files="$touch_files $f.in";;
esac
done
touch $touch_files
;;
automake*)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
You might want to install the \`Automake' and \`Perl' packages.
Grab them from any GNU archive site."
find . -type f -name Makefile.am -print |
sed 's/\.am$/.in/' |
while read f; do touch "$f"; done
;;
autom4te)
echo 1>&2 "\
WARNING: \`$1' is needed, but is $msg.
You might have modified some files without having the
proper tools for further handling them.
You can get \`$1' as part of \`Autoconf' from any GNU
archive site."
file=`echo "$*" | sed -n "$sed_output"`
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
if test -f "$file"; then
touch $file
else
test -z "$file" || exec >$file
echo "#! /bin/sh"
echo "# Created by GNU Automake missing as a replacement of"
echo "# $ $@"
echo "exit 0"
chmod +x $file
exit 1
fi
;;
bison|yacc)
echo 1>&2 "\
WARNING: \`$1' $msg. You should only need it if
you modified a \`.y' file. You may need the \`Bison' package
in order for those modifications to take effect. You can get
\`Bison' from any GNU archive site."
rm -f y.tab.c y.tab.h
if test $# -ne 1; then
eval LASTARG="\${$#}"
case $LASTARG in
*.y)
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
if test -f "$SRCFILE"; then
cp "$SRCFILE" y.tab.c
fi
SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
if test -f "$SRCFILE"; then
cp "$SRCFILE" y.tab.h
fi
;;
esac
fi
if test ! -f y.tab.h; then
echo >y.tab.h
fi
if test ! -f y.tab.c; then
echo 'main() { return 0; }' >y.tab.c
fi
;;
lex|flex)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified a \`.l' file. You may need the \`Flex' package
in order for those modifications to take effect. You can get
\`Flex' from any GNU archive site."
rm -f lex.yy.c
if test $# -ne 1; then
eval LASTARG="\${$#}"
case $LASTARG in
*.l)
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
if test -f "$SRCFILE"; then
cp "$SRCFILE" lex.yy.c
fi
;;
esac
fi
if test ! -f lex.yy.c; then
echo 'main() { return 0; }' >lex.yy.c
fi
;;
help2man)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified a dependency of a manual page. You may need the
\`Help2man' package in order for those modifications to take
effect. You can get \`Help2man' from any GNU archive site."
file=`echo "$*" | sed -n "$sed_output"`
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
if test -f "$file"; then
touch $file
else
test -z "$file" || exec >$file
echo ".ab help2man is required to generate this page"
exit 1
fi
;;
makeinfo)
echo 1>&2 "\
WARNING: \`$1' is $msg. You should only need it if
you modified a \`.texi' or \`.texinfo' file, or any other file
indirectly affecting the aspect of the manual. The spurious
call might also be the consequence of using a buggy \`make' (AIX,
DU, IRIX). You might want to install the \`Texinfo' package or
the \`GNU make' package. Grab either from any GNU archive site."
# The file to touch is that specified with -o ...
file=`echo "$*" | sed -n "$sed_output"`
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
if test -z "$file"; then
# ... or it is the one specified with @setfilename ...
infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
file=`sed -n '
/^@setfilename/{
s/.* \([^ ]*\) *$/\1/
p
q
}' $infile`
# ... or it is derived from the source name (dir/f.texi becomes f.info)
test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
fi
# If the file does not exist, the user really needs makeinfo;
# let's fail without touching anything.
test -f $file || exit 1
touch $file
;;
tar)
shift
# We have already tried tar in the generic part.
# Look for gnutar/gtar before invocation to avoid ugly error
# messages.
if (gnutar --version > /dev/null 2>&1); then
gnutar "$@" && exit 0
fi
if (gtar --version > /dev/null 2>&1); then
gtar "$@" && exit 0
fi
firstarg="$1"
if shift; then
case $firstarg in
*o*)
firstarg=`echo "$firstarg" | sed s/o//`
tar "$firstarg" "$@" && exit 0
;;
esac
case $firstarg in
*h*)
firstarg=`echo "$firstarg" | sed s/h//`
tar "$firstarg" "$@" && exit 0
;;
esac
fi
echo 1>&2 "\
WARNING: I can't seem to be able to run \`tar' with the given arguments.
You may want to install GNU tar or Free paxutils, or check the
command line arguments."
exit 1
;;
*)
echo 1>&2 "\
WARNING: \`$1' is needed, and is $msg.
You might have modified some files without having the
proper tools for further handling them. Check the \`README' file,
it often tells you about the needed prerequisites for installing
this package. You may also peek at any GNU archive site, in case
some other package would contain this missing \`$1' program."
exit 1
;;
esac
exit 0
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-end: "$"
# End:

View File

@ -0,0 +1,2 @@
sed -e 's/^[ \t]*//' -e 's/[ \t][ \t]*/ /g' -e 's/[ \t]*$//'

View File

@ -0,0 +1,48 @@
#!/usr/bin/env bash
MGIZA=${QMT_HOME}/bin/mgiza
if [ $# -lt 4 ]; then
echo "OK, this is simple, put me into your Moses training directory, link your source/target corpus" 1>&2
echo "and run " $0 " PREFIX src_tag tgt_tag root-dir." 1>&2
echo "and get force-aligned data: root-dir/giza.[src-tgt|tgt-src]/*.A3.final.* " 1>&2
echo "make sure I can find PREFIX.src_tag-tgt_tag and PREFIX.tgt_tag-src_tag, and \${QMT_HOME} is set" 1>&2
exit
fi
PRE=$1
SRC=$2
TGT=$3
ROOT=$4
mkdir -p $ROOT/giza.${SRC}-${TGT}
mkdir -p $ROOT/giza.${TGT}-${SRC}
mkdir -p $ROOT/corpus
echo "Generating corpus file " 1>&2
${QMT_HOME}/scripts/plain2snt-hasvcb.py corpus/$SRC.vcb corpus/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/corpus/${TGT}-${SRC}.snt $ROOT/corpus/${SRC}-${TGT}.snt $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb
ln -sf $PWD/corpus/$SRC.vcb.classes $PWD/corpus/$TGT.vcb.classes $ROOT/corpus/
echo "Generating co-occurrence file " 1>&2
${QMT_HOME}/bin/snt2cooc $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb $ROOT/corpus/${TGT}-${SRC}.snt
${QMT_HOME}/bin//snt2cooc $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc $ROOT/corpus/$TGT.vcb $ROOT/corpus/$SRC.vcb $ROOT/corpus/${SRC}-${TGT}.snt
echo "Running force alignment " 1>&2
$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/corpus/$TGT-$SRC.snt -o $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC} \
-s $ROOT/corpus/$SRC.vcb -t $ROOT/corpus/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc \
-restart 11 -previoust giza.$TGT-$SRC/$TGT-$SRC.t3.final \
-previousa giza.$TGT-$SRC/$TGT-$SRC.a3.final -previousd giza.$TGT-$SRC/$TGT-$SRC.d3.final \
-previousn giza.$TGT-$SRC/$TGT-$SRC.n3.final -previousd4 giza.$TGT-$SRC/$TGT-$SRC.d4.final \
-previousd42 giza.$TGT-$SRC/$TGT-$SRC.D4.final -m3 0 -m4 1
$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/corpus/$SRC-$TGT.snt -o $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT} \
-s $ROOT/corpus/$TGT.vcb -t $ROOT/corpus/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc \
-restart 11 -previoust giza.$SRC-$TGT/$SRC-$TGT.t3.final \
-previousa giza.$SRC-$TGT/$SRC-$TGT.a3.final -previousd giza.$SRC-$TGT/$SRC-$TGT.d3.final \
-previousn giza.$SRC-$TGT/$SRC-$TGT.n3.final -previousd4 giza.$SRC-$TGT/$SRC-$TGT.d4.final \
-previousd42 giza.$SRC-$TGT/$SRC-$TGT.D4.final -m3 0 -m4 1

View File

@ -0,0 +1,112 @@
#! /usr/bin/perl
# $Id: giza2bal.pl 1562 2008-02-19 20:48:14Z redpony $
#Converts direct and inverted alignments into a more compact
#bi-alignment format. It optionally reads the counting file
#produced by giza containing the frequency of each traning sentence.
#Copyright Marcello Federico, November 2004
($cnt,$dir,$inv)=();
while ($w=shift @ARGV){
$dir=shift(@ARGV),next if $w eq "-d";
$inv=shift(@ARGV),next if $w eq "-i";
$cnt=shift(@ARGV),next if $w eq "-c";
}
my $lc = 0;
if (!$dir || !inv){
print "usage: giza2bal.pl [-c <count-file>] -d <dir-align-file> -i <inv-align-file>\n";
print "input files can be also commands, e.g. -d \"gunzip -c file.gz\"\n";
exit(0);
}
$|=1;
open(DIR,"<$dir") || open(DIR,"$dir|") || die "cannot open $dir\n";
open(INV,"<$inv") || open(INV,"$inv|") || die "cannot open $dir\n";
if ($cnt){
open(CNT,"<$cnt") || open(CNT,"$cnt|") || die "cannot open $dir\n";
}
sub ReadBiAlign{
local($fd0,$fd1,$fd2,*s1,*s2,*a,*b,*c)=@_;
local($dummy,$n);
chop($c=<$fd0>); ## count
$dummy=<$fd0>; ## header
$dummy=<$fd0>; ## header
$c=1 if !$c;
$dummy=<$fd1>; ## header
chop($s1=<$fd1>);
chop($t1=<$fd1>);
$dummy=<$fd2>; ## header
chop($s2=<$fd2>);
chop($t2=<$fd2>);
@a=@b=();
$lc++;
#get target statistics
$n=1;
$t1=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
while ($t1=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
grep($a[$_]=$n,split(/\s+/,$2));
$n++;
}
$m=1;
$t2=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
while ($t2=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
grep($b[$_]=$m,split(/\s+/,$2));
$m++;
}
$M=split(/\s+/,$s1);
$N=split(/\s+/,$s2);
if ($m != ($M+1) || $n != ($N+1)) {
print STDERR "Sentence mismatch error! Line #$lc\n";
$s1 = "ALIGN_ERR";
$s2 = "ALIGN_ERR";
@a=(); @b=();
for ($j=1;$j<2;$j++){ $a[$j]=1; }
for ($i=1;$i<2;$i++){ $b[$i]=1; }
return 1;
}
for ($j=1;$j<$m;$j++){
$a[$j]=0 if !$a[$j];
}
for ($i=1;$i<$n;$i++){
$b[$i]=0 if !$b[$i];
}
return 1;
}
$skip=0;
$ccc=0;
while(!eof(DIR)){
if (ReadBiAlign(CNT,DIR,INV,*src,*tgt,*a,*b,*c))
{
$ccc++;
print "$c\n";
print $#a," $src \# @a[1..$#a]\n";
print $#b," $tgt \# @b[1..$#b]\n";
}
else{
print "\n";
print STDERR "." if !(++$skip % 1000);
}
};
print STDERR "skip=<$skip> counts=<$ccc>\n";

View File

@ -0,0 +1,80 @@
#!/usr/bin/env python
# Author : Qin Gao
# Date : Dec 31, 2007
# Purpose: Combine multiple alignment files into a single one, the files are
# prodcuced by MGIZA, which has sentence IDs, and every file is
# ordered inside
import sys
import re
if len(sys.argv)<2:
sys.stderr.write("Provide me the file names (at least 2)\n");
sys.exit();
sent_id = 0;
files = [];
ids = [];
sents = [];
done = [];
for i in range(1,len(sys.argv)):
files.append(open(sys.argv[i],"r"));
ids.append(0);
sents.append("");
done.append(False);
r = re.compile("\\((\\d+)\\)");
i = 0;
while i< len(files):
st1 = files[i].readline();
st2 = files[i].readline();
st3 = files[i].readline();
if len(st1)==0 or len(st2)==0 or len(st3)==0:
done[i] = True;
else:
mt = r.search(st1);
id = int(mt.group(1));
ids[i] = id;
sents[i] = (st1, st2, st3);
i += 1
cont = True;
while (cont):
sent_id += 1;
writeOne = False;
# Now try to read more sentences
i = 0;
cont = False;
while i < len(files):
if done[i]:
i+=1
continue;
cont = True;
if ids[i] == sent_id:
sys.stdout.write("%s%s%s"%(sents[i][0],sents[i][1],sents[i][2]));
writeOne = True;
st1 = files[i].readline();
st2 = files[i].readline();
st3 = files[i].readline();
if len(st1)==0 or len(st2)==0 or len(st3)==0:
done[i] = True;
else:
mt = r.search(st1);
id = int(mt.group(1));
ids[i] = id;
sents[i] = (st1, st2, st3);
cont = True;
break;
elif ids[i] < sent_id:
sys.stderr.write("ERROR! DUPLICATED ENTRY %d\n" % ids[i]);
sys.exit();
else:
cont = True;
i+=1;
if (not writeOne) and cont:
sys.stderr.write("ERROR! MISSING ENTRy %d\n" % sent_id);
#sys.exit();
sys.stderr.write("Combined %d files, totally %d sents \n" %(len(files),sent_id-1));

View File

@ -0,0 +1,93 @@
#!/usr/bin/env python
from sys import *
def loadvcb(fname,out):
dict={};
df = open(fname,"r");
for line in df:
out.write(line);
ws = line.strip().split();
id = int(ws[0]);
wd = ws[1];
dict[wd]=id;
return dict;
if len(argv)<9:
stderr.write("Error, the input should be \n");
stderr.write("%s evcb fvcb etxt ftxt esnt(out) fsnt(out) evcbx(out) fvcbx(out)\n" % argv[0]);
stderr.write("You should concatenate the evcbx and fvcbx to existing vcb files\n");
exit();
ein = open(argv[3],"r");
fin = open(argv[4],"r");
eout = open(argv[5],"w");
fout = open(argv[6],"w");
evcbx = open(argv[7],"w");
fvcbx = open(argv[8],"w");
evcb = loadvcb(argv[1],evcbx);
fvcb = loadvcb(argv[2],fvcbx);
i=0
while True:
i+=1;
eline=ein.readline();
fline=fin.readline();
if len(eline)==0 or len(fline)==0:
break;
ewords = eline.strip().split();
fwords = fline.strip().split();
el = [];
fl = [];
j=0;
for w in ewords:
j+=1
if evcb.has_key(w):
el.append(evcb[w]);
else:
if evcb.has_key(w.lower()):
el.append(evcb[w.lower()]);
else:
##stdout.write("#E %d %d %s\n" % (i,j,w))
#el.append(1);
nid = len(evcb)+1;
evcb[w.lower()] = nid;
evcbx.write("%d %s 1\n" % (nid, w));
el.append(nid);
j=0;
for w in fwords:
j+=1
if fvcb.has_key(w):
fl.append(fvcb[w]);
else:
if fvcb.has_key(w.lower()):
fl.append(fvcb[w.lower()]);
else:
#stdout.write("#F %d %d %s\n" % (i,j,w))
nid = len(fvcb)+1;
fvcb[w.lower()] = nid;
fvcbx.write("%d %s 1\n" % (nid, w));
fl.append(nid);
#fl.append(1);
eout.write("1\n");
fout.write("1\n");
for I in el:
eout.write("%d " % I);
eout.write("\n");
for I in fl:
eout.write("%d " % I);
fout.write("%d " % I);
eout.write("\n");
fout.write("\n");
for I in el:
fout.write("%d " % I);
fout.write("\n");
fout.close();
eout.close();
fvcbx.close();
evcbx.close();

View File

@ -0,0 +1,116 @@
#!/usr/bin/env python
# This script post process the snt file -- either in single-line format or in multi-line format
# The output, however, will always be in single-line format
from sys import *
from optparse import OptionParser
import re;
usage = """
The script post process the snt file, the input could be single-line snt
file or multi-line, (triple line) and can insert sentence weight to the
file (-w) or add partial alignment to the file (-a)
Usage %prog -s sntfile -w weight-file -a alignfile -o outputfile
"""
parser = OptionParser(usage=usage)
parser = OptionParser()
parser.add_option("-s", "--snt", dest="snt",default=None,
help="The input snt file", metavar="FILE")
parser.add_option("-w", "--weight", dest="weight",default=None,
help="The input weight file", metavar="FILE")
parser.add_option("-o", "--output", dest="output",default="-",
help="The input partial alignment file, one sentence per line", metavar="FILE")
parser.add_option("-a", "--align", dest="align",default=None,
help="The input partial alignment file, one sentence per line", metavar="FILE")
(options, args) = parser.parse_args()
if options.snt == None:
parser.print_help();
exit();
else:
sfile = open(options.snt,"r");
if options.output=="-":
ofile = stdout;
else:
ofile = open(options.output,"w");
wfile = None;
if options.weight <> None:
wfile = open(options.weight,"r");
afile = None;
if options.align <> None:
afile = open(options.align,"r");
rr = re.compile("[\\|\\#\\*]");
wt = 0.0;
al = {};
e = "";
f = "";
def parse_ax(line):
alq = {};
als = line.strip().split(" ");
for e in als:
if len(e.strip())>0:
alo = e.split("-");
if len(alo)==2:
alq[tuple(alo)] = 1;
return alq;
while True:
l = sfile.readline();
if len(l) == 0:
break;
lp = rr.split(l.strip());
if len(lp)>=3:
wt = float(lp[0]);
e = lp[1];
f = lp[2];
if len(lp) > 3:
al = parse_ax(lp[3]);
else:
al = {};
else:
wt = float(l);
e = sfile.readline().strip();
f = sfile.readline().strip();
al={}
if wfile <> None:
lw = wfile.readline().strip();
if len(lw)>0:
wt = float(lw);
else:
wt = 1;
if afile <> None:
la = afile.readline().strip();
if len(la)>0:
al1 = parse_ax(la);
for entry in al1.keys():
al[entry] = 1;
ofile.write("%g | %s | %s" % (wt, e, f));
if len(al)>0:
ofile.write(" |");
for entry in al.keys():
ofile.write(" %s-%s" % entry);
ofile.write("\n");

View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
OUTPUT=$1
shift
GIZA2BAL=$1
shift
SYMAL=$1
shift
STOT=$1
shift
TTOS=$1
shift
perl $GIZA2BAL -d ${STOT} -i ${TTOS} | $SYMAL $* > $OUTPUT

View File

@ -0,0 +1,17 @@
.libs
.deps
.*swp
.nautilus-metafile.xml
*.autosave
*.bak
*~
#*#
*.gladep
*.la
*.lo
*.o
*.class
*.pyc
*.plugin
Makefile
Makefile.in

View File

@ -0,0 +1,212 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "ATables.h"
#include "Globals.h"
#include "myassert.h"
#include "Parameter.h"
GLOBAL_PARAMETER(bool,CompactADTable,"compactadtable","1: only 3-dimensional alignment table for IBM-2 and IBM-3",PARLEV_MODELS,1);
GLOBAL_PARAMETER(float,amodel_smooth_factor,"model23SmoothFactor","smoothing parameter for IBM-2/3 (interpolation with constant)",PARLEV_SMOOTH,0.0);
template <class VALTYPE>
void amodel<VALTYPE>::printTable(const char *filename) const{
// print amodel to file with the name filename (it'll be created or overwritten
// format : for a table :
// aj j l m val
// where aj is source word pos, j target word pos, l source sentence length,
// m is target sentence length.
//
//return;
if (is_distortion)
cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n';
else
cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n';
ofstream of(filename);
double ssum=0.0;
for(WordIndex l=0; l < MaxSentLength; l++){
for(WordIndex m=0;m<MaxSentLength;m++){
if( CompactADTable && l!=m )
continue;
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
if( is_distortion==0 ){
for(WordIndex j=1;j<=M; j++){
double sum=0.0;
for(WordIndex i=0;i<=L; i++){
VALTYPE x=getValue(i, j, L, M);
if( x>PROB_SMOOTH ){
of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
sum+=x;
}
}
ssum+=sum;
}
}else{
for(WordIndex i=0;i<=L;i++){
double sum=0.0;
for(WordIndex j=1;j<=M;j++){
VALTYPE x=getValue(j, i, L, M);
if( x>PROB_SMOOTH ){
of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
sum+=x;
}
}
ssum+=sum;
}
}
}
}
}
template <class VALTYPE>
void amodel<VALTYPE>::printRealTable(const char *filename) const{
// print amodel to file with the name filename (it'll be created or overwritten
// format : for a table :
// aj j l m val
// where aj is source word pos, j target word pos, l source sentence length,
// m is target sentence length.
//
//return;
if (is_distortion)
cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n';
else
cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n';
ofstream of(filename);
for(WordIndex l=0; l < MaxSentLength; l++){
for(WordIndex m=0;m<MaxSentLength;m++){
if( CompactADTable && l!=m )
continue;
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
if( is_distortion==0 ){
for(WordIndex j=1;j<=M; j++){
for(WordIndex i=0;i<=L; i++){
VALTYPE x=getValue(i, j, L, M);
if( x>MINCOUNTINCREASE )
of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
}
}
}else{
for(WordIndex i=0;i<=L;i++){
for(WordIndex j=1;j<=M;j++){
VALTYPE x=getValue(j, i, L, M);
if( x>MINCOUNTINCREASE )
of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
}
}
}
}
}
}
extern short NoEmptyWord;
template <class VALTYPE>
bool amodel<VALTYPE>::readTable(const char *filename){
/* This function reads the a table from a file.
Each line is of the format: aj j l m val
where aj is the source word position, j the target word position,
l the source sentence length, and m the target sentence length
This function also works for a d table, where the positions
of aj and i are swapped. Both the a and d tables are 4 dimensional
hashes; this function will simply read in the four values and keep
them in that order when hashing the fifth value.
NAS, 7/11/99
*/
ifstream inf(filename);
cout << "Reading a/d table from " << filename << "\n";
if(!inf){
cerr << "\nERROR: Cannot open " << filename<<"\n";
return false;
}
WordIndex w, x, l, m;
VALTYPE prob;
while(inf >> w >> x >> l >> m >> prob )
// the NULL word is added to the length
// of the sentence in the tables, but discount it when you write the tables.
setValue(w, x, l, m, prob);
return true;
}
template <class VALTYPE>
bool amodel<VALTYPE>::readAugTable(const char *filename){
/* This function reads the a table from a file.
Each line is of the format: aj j l m val
where aj is the source word position, j the target word position,
l the source sentence length, and m the target sentence length
This function also works for a d table, where the positions
of aj and i are swapped. Both the a and d tables are 4 dimensional
hashes; this function will simply read in the four values and keep
them in that order when hashing the fifth value.
NAS, 7/11/99
*/
ifstream inf(filename);
cout << "Reading a/d table from " << filename << "\n";
if(!inf){
cerr << "\nERROR: Cannot open " << filename<<"\n";
return false;
}
WordIndex w, x, l, m;
VALTYPE prob;
while(inf >> w >> x >> l >> m >> prob )
// the NULL word is added to the length
// of the sentence in the tables, but discount it when you write the tables.
addValue(w, x, l, m, prob);
return true;
}
template <class VALTYPE>
bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am){
cout << "start merging " <<"\n";
for(WordIndex l=0; l < MaxSentLength; l++){
for(WordIndex m=0;m<MaxSentLength;m++){
if( CompactADTable && l!=m )
continue;
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
if( is_distortion==0 ){
for(WordIndex j=1;j<=M; j++){
for(WordIndex i=0;i<=L; i++){
VALTYPE x=am.getValue(i, j, L, M);
addValue(i,j,L,M,x);
}
}
}else{
for(WordIndex i=0;i<=L;i++){
for(WordIndex j=1;j<=M;j++){
VALTYPE x=am.getValue(j, i, L, M);
addValue(j,i,L,M,x);
}
}
}
}
}
return true;
}
template class amodel<COUNT> ;
//template class amodel<PROB> ;

View File

@ -0,0 +1,191 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/* --------------------------------------------------------------------------*
* *
* Module :ATables *
* *
* Prototypes File: ATables.h *
* *
* Objective: Defines clases and methods for handling I/O for distortion & *
* alignment tables. *
*****************************************************************************/
#ifndef _atables_h
#define _atables_h 1
#include "defs.h"
#include <cassert>
#include <iostream>
#include <algorithm>
#include <functional>
#include <map>
#include <set>
#include "Vector.h"
#include <utility>
#if __GNUC__>2
#include <ext/hash_map>
using __gnu_cxx::hash_map;
#else
#include <hash_map>
#endif
#include <fstream>
#include "Array4.h"
#include "myassert.h"
#include "Globals.h"
#include "syncObj.h"
extern bool CompactADTable;
extern float amodel_smooth_factor;
extern short NoEmptyWord;
/* ------------------- Class Defintions of amodel ---------------------------*/
/* Class Name: amodel:
Objective: This defines the underlying data structure for distortiont prob.
and count tables. They are defined as a hash table. Each entry in the hash
table is the probability (d(j/l,m,i), where j is word target position, i is
source word position connected to it, m is target sentence length, and l is
source sentence length) or count collected for it. The probability and the
count are represented as log integer probability as
defined by the class LogProb .
This class is used to represents a Tables (probabiliity) and d (distortion)
tables and also their corresponding count tables .
*--------------------------------------------------------------------------*/
inline int Mabs(int a){
if(a<0)
return -a;
else
return a;
}
template <class VALTYPE>
class amodel{
public:
Array4<VALTYPE> a;
bool is_distortion ;
WordIndex MaxSentLength;
bool ignoreL, ignoreM;
VALTYPE get(WordIndex aj, WordIndex j, WordIndex l, WordIndex m)const{
massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );massert( (!is_distortion) || aj!=0 );
massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
massert( l<MaxSentLength );massert( m<MaxSentLength );
return a.get(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
}
static float smooth_factor;
amodel(bool flag = false)
: a(MAX_SENTENCE_LENGTH+1,0.0), is_distortion(flag), MaxSentLength(MAX_SENTENCE_LENGTH)
{};
protected:
VALTYPE&getRef(WordIndex aj, WordIndex j, WordIndex l, WordIndex m){
massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );
massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
massert( l<MaxSentLength );massert( m<MaxSentLength );
return a(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
}
public:
void setValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
lock.lock();
getRef(aj, j, l, m)=val;
lock.unlock();
}
Mutex lock;
public:
/**
By Qin
*/
void addValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
lock.lock();
getRef(aj, j, l, m)+=val;
lock.unlock();
}
bool merge(amodel<VALTYPE>& am);
VALTYPE getValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m) const{
if( is_distortion==0 )
return max(double(PROB_SMOOTH),amodel_smooth_factor/(l+1)+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
else
return max(double(PROB_SMOOTH),amodel_smooth_factor/m+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
}
void printTable(const char* filename)const ;
void printRealTable(const char* filename)const ;
template<class COUNT>
void normalize(amodel<COUNT>& aTable)const
{
WordIndex i, j, l, m ;
COUNT total;
int nParam=0;
for(l=0;l<MaxSentLength;l++){
for(m=0;m<MaxSentLength;m++){
if( CompactADTable && l!=m )
continue;
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
if( is_distortion==0 ){
for(j=1;j<=M; j++){
total=0.0;
for(i=0;i<=L;i++){
total+=get(i, j, L, M);
}
if( total ){
for(i=0;i<=L;i++){
nParam++;
aTable.getRef(i, j, L, M)=get(i, j, L, M)/total;
massert(aTable.getRef(i,j,L,M)<=1.0);
if( NoEmptyWord&&i==0 )
aTable.getRef(i,j,L,M)=0;
}
}
}
}else{
for(i=0;i<=L;i++){
total=0.0;
for(j=1;j<=M;j++)
total+=get(j, i, L, M);
if( total )
for(j=1;j<=M;j++){
aTable.getRef(j, i, L, M)=amodel_smooth_factor/M+(1.0-amodel_smooth_factor)*get(j, i, L, M)/total;
nParam++;
massert(aTable.getRef(j,i,L,M)<=1.0);
if( NoEmptyWord&&i==0 )
aTable.getRef(j,i,L,M)=0;
}
}
}
}
}
cout << "A/D table contains " << nParam << " parameters.\n";
}
bool readTable(const char *filename);
bool readAugTable(const char *filename);
void clear()
{a.clear();}
};
/* ------------------- End of amodel Class Definitions ----------------------*/
#endif

View File

@ -0,0 +1,44 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "AlignTables.h"
bool alignmodel::insert(Vector<WordIndex>& aj, LogProb val)
{
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator i;
i = a.find(aj);
if(i != a.end() || val <= 0)
return false ;
a.insert(pair<const Vector<WordIndex>, LogProb>(aj, val));
return true ;
}
LogProb alignmodel::getValue(Vector<WordIndex>& align) const
{
const LogProb zero = 0.0 ;
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::const_iterator i;
i = a.find(align);
if(i == a.end())
return zero;
else
return (*i).second;
}

View File

@ -0,0 +1,124 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef _aligntables_h
#define _aligntables_h 1
#include "defs.h"
#include <cassert>
#include <iostream>
#include <algorithm>
#include <functional>
#include <map>
#include <set>
//#include <vector>
#include "Vector.h"
#include <utility>
#if __GNUC__>2
#include <ext/hash_map>
using __gnu_cxx::hash_map;
#else
#include <hash_map>
#endif
#include <cmath>
#include <fstream>
#include "transpair_model1.h"
/* ----------------- Class Defintions for hashmyalignment --------------------
Objective: This class is used to define a hash mapping function to map
an alignment (defined as a vector of integers) into a hash key
----------------------------------------------------------------------------*/
class hashmyalignment : public unary_function< Vector<WordIndex>, size_t >
{
public:
size_t operator() (const Vector<WordIndex>& key) const
// to define the mapping function. it takes an alignment (a vector of
// integers) and it returns an integer value (hash key).
{
WordIndex j ;
size_t s ;
size_t key_sum = 0 ;
// logmsg << "For alignment:" ;
for (j = 1 ; j < key.size() ; j++){
// logmsg << " " << key[j] ;
key_sum += (size_t) (int) pow(double(key[j]), double((j % 6)+1));
}
// logmsg << " , Key value was : " << key_sum;
s = key_sum % 1000000 ;
// logmsg << " h(k) = " << s << endl ;
return(s);
}
};
class equal_to_myalignment{
// returns true if two alignments are the same (two vectors have same enties)
public:
bool operator()(const Vector<WordIndex> t1,
const Vector<WordIndex> t2) const
{WordIndex j ;
if (t1.size() != t2.size())
return(false);
for (j = 1 ; j < t1.size() ; j++)
if (t1[j] != t2[j])
return(false);
return(true);
}
};
/* ---------------- End of Class Defnition for hashmyalignment --------------*/
/* ------------------ Class Defintions for alignmodel -----------------------
Class Name: alignmodel
Objective: Alignments neighborhhoods (collection of alignments) are stored in
a hash table (for easy lookup). Each alignment vector is mapped into a hash
key using the operator defined above.
*--------------------------------------------------------------------------*/
class alignmodel{
private:
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment > a;
private:
// void erase(Vector<WordIndex>&);
public:
// methods;
inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator begin(void){return a.begin();} // begining of hash
inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator end(void){return a.end();} // end of hash
inline const hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >& getHash() const {return a;}; // reference to hash table
bool insert(Vector<WordIndex>&, LogProb val=0.0); // add a alignmnet
// void setValue(Vector<WordIndex>&, LogProb val); // not needed
LogProb getValue(Vector<WordIndex>&)const; // retrieve prob. of alignment
inline void clear(void){ a.clear();}; // clear hash table
// void printTable(const char* filename);
inline void resize(WordIndex n) {a.resize(n);}; // resize table
};
/* -------------- End of alignmode Class Definitions ------------------------*/
#endif

View File

@ -0,0 +1,5 @@
#ifndef GIZA_ARRAY_H_DEFINED
#define GIZA_ARRAY_H_DEFINED
#include "Vector.h"
#define Array Vector
#endif

View File

@ -0,0 +1,126 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/*--
Array2: Implementation of a twodimensional checked array allowing for
a specified underlieing one-dimensional data-structure.
Franz Josef Och (30/07/99)
--*/
#ifndef CLASS_Array2_DEFINED
#define CLASS_Array2_DEFINED
#include "mystl.h"
#include <string>
#include <vector>
template<class T, class Y=vector<T> > class Array2 {
public:
Y p;
// short h1, h2;
unsigned int h1, h2;
public:
Array2(unsigned int _h1, unsigned int _h2) :
p(_h1*_h2), h1(_h1), h2(_h2) {
}
Array2(unsigned int _h1, unsigned int _h2, const T&_init) :
p(_h1*_h2, _init), h1(_h1), h2(_h2) {
}
Array2() :
h1(0), h2(0) {
}
inline T &operator()(unsigned int i, unsigned int j) {
assert(i<h1);
assert(j<h2);
return p[i*h2+j];
}
inline const T&operator()(unsigned int i, unsigned int j) const {
assert(i<h1);
assert(j<h2);
return p[i*h2+j];
}
inline T get(unsigned int i, unsigned int j) {
assert(i<h1);
assert(j<h2);
return p[i*h2+j];
}
inline void set(unsigned int i, unsigned int j, T x) {
assert(i<h1);
assert(j<h2);
p[i*h2+j]=x;
}
inline const T get(unsigned int i, unsigned int j) const {
assert(i<h1);
assert(j<h2);
return p[i*h2+j];
}
inline unsigned int getLen1() const {
return h1;
}
inline unsigned int getLen2() const {
return h2;
}
inline T*begin() {
if (h1==0||h2==0)
return 0;
return &(p[0]);
}
inline T*end() {
if (h1==0||h2==0)
return 0;
return &(p[0])+p.size();
}
inline const T*begin() const {
return p.begin();
}
inline const T*end() const {
return p.end();
}
friend ostream&operator<<(ostream&out, const Array2<T, Y>&ar) {
for (unsigned int i=0; i<ar.getLen1(); i++) {
//out << i << ": ";
for (unsigned int j=0; j<ar.getLen2(); j++)
out << ar(i, j) << ' ';
out << '\n';
}
return out << endl;
}
inline void resize(unsigned int a, unsigned int b) {
if ( !(a==h1&&b==h2)) {
h1=a;
h2=b;
p.resize(h1*h2);
}
}
inline void resize(unsigned int a, unsigned int b, const T&t) {
if ( !(a==h1&&b==h2)) {
h1=a;
h2=b;
p.resize(h1*h2);
fill(p.begin(), p.end(), t);
}
}
};
#endif

View File

@ -0,0 +1,78 @@
/*
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef AlignmentArray4_h_DEFINED
#define AlignmentArray4_h_DEFINED
#include "Array2.h"
template<class T> class Array4
{
private:
Array2< Array2<T>* > A;
int M;
T init;
public:
Array4(int m,const T&_init)
: A(m,m,0),M(m),init(_init) {}
~Array4()
{
for(int l=0;l<M;++l)
for(int m=0;m<M;++m)
delete A(l,m);
}
const T&operator()(int i, int j, int l, int m)const
{
if( A(l,m)==0 )
return init;
else
return (*A(l,m))(i,j);
}
const T&get(int i, int j, int l, int m)const
{
if( A(l,m)==0 )
return init;
else
return (*A(l,m))(i,j);
}
T&operator()(int i, int j, int l, int m)
{
if( A(l,m)==0 )
{
A(l,m)=new Array2<T>(max(l+1,m+1),max(l+1,m+1),init);
}
return (*A(l,m))(i,j);
}
void clear()
{
for(int l=0;l<M;++l)
for(int m=0;m<M;++m)
if( A(l,m) )
{
Array2<T>&a=*A(l,m);
for(int i=0;i<=l;++i)
for(int j=0;j<=m;++j)
a(i,j)=0.0;
}
}
};
#endif

View File

@ -0,0 +1,772 @@
/*
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef _d4tables_h_define
#define _d4tables_h_define
#include <cmath>
#include "WordClasses.h"
#include "Globals.h"
#include "myassert.h"
#include "syncObj.h"
extern float d4modelsmooth_factor;
class m4_key {
public:
int deps;
int l;
int m;
int F;
int E;
int prevj;
int vacancies1, vacancies2;
m4_key(int _deps, int _l, int _m, int _F, int _E, int _prevj, int _v1,
int _v2) :
deps(_deps), l(_l), m(_m), F(_F), E(_E), prevj(_prevj),
vacancies1(_v1), vacancies2(_v2) {
}
friend ostream&print1(ostream&out, const m4_key&x, const WordClasses&wce,
const WordClasses&wcf) {
if (x.deps&DEP_MODEL_l)
out << "l: " << x.l<<' ';
if (x.deps&DEP_MODEL_m)
out << "m: " << x.m<<' ';
if (x.deps&DEP_MODEL_F)
out << "F: " << wcf.classString(x.F)<< ' ';
if (x.deps&DEP_MODEL_E)
out << "E: " << wce.classString(x.E)<< ' ';
// if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
if (x.vacancies1!=-1)
out << "v1: " << x.vacancies1 << ' ';
if (x.vacancies2!=-1)
out << "v2: " << x.vacancies2 << ' ';
return out << '\n';
}
friend ostream&print1_m5(ostream&out, const m4_key&x,
const WordClasses&wce, const WordClasses&wcf) {
out << ((x.deps&DEP_MODEL_E) ? wce.classString(x.E) : string("0"))
<< ' ';
out << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F) : string("0"))
<< ' ';
out << x.vacancies1 << ' ';
out << x.vacancies2 << ' ';
return out;
}
friend ostream&printb1(ostream&out, const m4_key&x, const WordClasses&wce,
const WordClasses&wcf) {
if (x.deps&DEP_MODELb_l)
out << "l: " << x.l<<' ';
if (x.deps&DEP_MODELb_m)
out << "m: " << x.m<<' ';
if (x.deps&DEP_MODELb_F)
out << "F: " << wcf.classString(x.F) << ' ';
if (x.deps&DEP_MODELb_E)
out << "E: " << wce.classString(x.E) << ' ';
if (x.vacancies1!=-1)
out << "v1: " << x.vacancies1 << ' ';
if (x.vacancies2!=-1)
out << "v2: " << x.vacancies2 << ' ';
return out << '\n';
}
friend ostream&printb1_m5(ostream&out, const m4_key&x,
const WordClasses&wcf) {
out << "-1 " << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F)
: string("0"))<< ' ';
out << x.vacancies1 << ' ';
out << x.vacancies2 << ' ';
return out;
}
};
class compare1 {
private:
int deps;
public:
compare1(int _deps) :
deps(_deps) {
}
bool operator()(const m4_key&a, const m4_key&b) const {
if (deps&DEP_MODEL_l) {
if (a.l<b.l)
return 1;
if (b.l<a.l)
return 0;
}
if (deps&DEP_MODEL_m) {
if (a.m<b.m)
return 1;
if (b.m<a.m)
return 0;
}
if (deps&DEP_MODEL_F) {
if (a.F<b.F)
return 1;
if (b.F<a.F)
return 0;
}
if (deps&DEP_MODEL_E) {
if (a.E<b.E)
return 1;
if (b.E<a.E)
return 0;
}
//if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
if (a.vacancies1<b.vacancies1)
return 1;
if (b.vacancies1<a.vacancies1)
return 0;
if (a.vacancies2<b.vacancies2)
return 1;
if (b.vacancies2<a.vacancies2)
return 0;
return 0;
}
};
class compareb1 {
private:
int deps;
public:
compareb1(int _deps) :
deps(_deps) {
}
bool operator()(const m4_key&a, const m4_key&b) const {
if (deps&DEP_MODELb_l) {
if (a.l<b.l)
return 1;
if (b.l<a.l)
return 0;
}
if (deps&DEP_MODELb_m) {
if (a.m<b.m)
return 1;
if (b.m<a.m)
return 0;
}
if (deps&DEP_MODELb_F) {
if (a.F<b.F)
return 1;
if (b.F<a.F)
return 0;
}
if (deps&DEP_MODELb_E) {
if (a.E<b.E)
return 1;
if (b.E<a.E)
return 0;
}
//if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
if (a.vacancies1<b.vacancies1)
return 1;
if (b.vacancies1<a.vacancies1)
return 0;
if (a.vacancies2<b.vacancies2)
return 1;
if (b.vacancies2<a.vacancies2)
return 0;
return 0;
}
};
inline void tokenize(const string&in, Vector<string>&out) {
string s;
istrstream l(in.c_str());
while (l>>s)
out.push_back(s);
}
class d4model {
public:
typedef Vector<pair<COUNT,PROB> > Vpff;
map<m4_key,Vpff,compare1 > D1;
map<m4_key,Vpff,compareb1> Db1;
PositionIndex msl;
WordClasses* ewordclasses;
WordClasses* fwordclasses;
template<class MAPPER> void makeWordClasses(const MAPPER&m1,
const MAPPER&m2, string efile, string ffile, const vcbList& elist,
const vcbList& flist) {
ifstream estrm(efile.c_str()), fstrm(ffile.c_str());
if ( !estrm) {
cerr << "ERROR: can not read " << efile << endl;
} else
ewordclasses->read(estrm, m1,elist);
if ( !fstrm)
cerr << "ERROR: can not read " << ffile << endl;
else
fwordclasses->read(fstrm, m2,flist);
}
d4model(PositionIndex _msl, WordClasses& e, WordClasses& f) :
D1(compare1(M4_Dependencies)), Db1(compareb1(M4_Dependencies)),
msl(_msl),ewordclasses(&e),fwordclasses(&f) {
}
protected:
inline COUNT&getCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
int m) {
assert(j>=1);
m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
if (p==D1.end())
p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
assert(p!=D1.end());
return (p->second)[j-j_cp+msl].first;
};
inline COUNT&getCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
int l, int m) {
assert(j>=1);
assert(j_prev>=1);
m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
if (p==Db1.end())
p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
assert(p!=Db1.end());
return (p->second)[j-j_prev+msl].first;
};
Mutex lock_f,lock_b;
public:
inline void augCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
int m, const COUNT& v){
lock_f.lock();
getCountRef_first(j,j_cp,E,F,l,m)+=v;
lock_f.unlock();
}
inline void augCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
int l, int m, const COUNT& v){
lock_b.lock();
getCountRef_bigger(j,j_prev,E,F,l,m)+=v;
lock_b.unlock();
}
void merge(d4model &d) {
map<m4_key,Vpff,compare1 >::iterator it;
for (it = d.D1.begin(); it!=d.D1.end(); it++) {
map<m4_key,Vpff,compare1 >::iterator p=D1.find(it->first);
if (p==D1.end())
p=D1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
int i;
for (i=0; i<it->second.size(); i++) {
p->second[i].second+=it->second[i].second;
}
}
for (it = d.Db1.begin(); it!=d.Db1.end(); it++) {
map<m4_key,Vpff,compare1 >::iterator p=Db1.find(it->first);
if (p==Db1.end())
p=Db1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
int i;
for (i=0; i<it->second.size(); i++) {
p->second[i].second+=it->second[i].second;
}
}
}
bool augCount(const char* fD1, const char* fDb) {
ifstream ifsd(fD1);
int deps;
int l;
int m;
int F;
int E;
int prevj;
int vacancies1, vacancies2;
int len;
double count;
if (!ifsd) {
cerr << "Failed in " << fD1 << endl;
return false;
}
{
while (ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1
>>vacancies2>>len) {
m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
vacancies2);
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
if (p==D1.end())
p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
assert(p!=D1.end());
int i;
for (i=0; i<len; i++) {
ifsd >> count;
p->second[i].first+=count;
}
}
}
ifstream ifsd1(fDb);
if (!ifsd1) {
cerr << "Failed in " << fDb << endl;
return false;
}
{
while (ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1
>>vacancies2>>len) {
m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
vacancies2);
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
if (p==Db1.end())
p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
assert(p!=D1.end());
int i;
for (i=0; i<len; i++) {
ifsd1 >> count;
p->second[i].first+=count;
}
}
}
return true;
}
bool readProbTable(const char* fD1, const char* fDb){
ifstream ifsd(fD1);
int deps;
int l;
int m;
int F;
int E;
int prevj;
int vacancies1,vacancies2;
int len;
double count;
if(!ifsd){
cerr << "Failed in " << fD1 << endl;
return false;
}
{
while(ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
assert(p!=D1.end());
int i;
for(i=0;i<len;i++){
ifsd >> count;
p->second[i].second=count;
}
}
}
ifstream ifsd1(fDb);
if(!ifsd1){
cerr << "Failed in " << fDb << endl;
return false;
}
{
while(ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
assert(p!=D1.end());
int i;
for(i=0;i<len;i++){
ifsd1 >> count;
p->second[i].second=count;
}
}
}
return true;
}
bool printProbTable(const char* fD1, const char* fDb) {
ofstream ofsd(fD1);
if (!ofsd.is_open()) {
return false;
}
{
map<m4_key,Vpff,compare1 >::iterator it;
for (it = D1.begin(); it!=D1.end(); it++) {
ofsd << it->first.deps << " " << it->first.l << " "
<< it->first.m << " " << it->first.F << " "
<< it->first.E << " " << it->first.prevj << " "
<< it->first.vacancies1 << " " << it->first.vacancies2
<< " " << it->second.size() << " ";
int i;
for (i=0; i<it->second.size(); i++) {
ofsd << it->second[i].second << " ";
}
ofsd << endl;
}
}
ofstream ofsdb(fDb);
if (!ofsdb.is_open()) {
return false;
}
map<m4_key,Vpff,compareb1 >::iterator it;
for (it = Db1.begin(); it!=Db1.end(); it++) {
ofsdb << it->first.deps << " " << it->first.l << " " << it->first.m
<< " " << it->first.F << " " << it->first.E << " "
<< it->first.prevj << " " << it->first.vacancies1 << " "
<< it->first.vacancies2 << " " << it->second.size()<< endl;
int i;
for (i=0; i<it->second.size(); i++) {
ofsdb << it->second[i].second << " ";
}
ofsdb << endl;
}
return true;
}
bool dumpCount(const char* fD1, const char* fDb){
ofstream ofsd(fD1);
if(!ofsd.is_open()){
return false;
}
{
map<m4_key,Vpff,compare1 >::iterator it;
for(it = D1.begin(); it!=D1.end();it++){
ofsd << it->first.deps << " "
<< it->first.l << " "
<< it->first.m << " "
<< it->first.F << " "
<< it->first.E << " "
<< it->first.prevj << " "
<< it->first.vacancies1 << " "
<< it->first.vacancies2 << " "
<< it->second.size() << " ";
int i;
for(i=0;i<it->second.size();i++){
ofsd << it->second[i].first << " ";
}
ofsd << endl;
}
}
ofstream ofsdb(fDb);
if(!ofsdb.is_open()){
return false;
}
map<m4_key,Vpff,compareb1 >::iterator it;
for(it = Db1.begin(); it!=Db1.end();it++){
ofsdb << it->first.deps << " "
<< it->first.l << " "
<< it->first.m << " "
<< it->first.F << " "
<< it->first.E << " "
<< it->first.prevj << " "
<< it->first.vacancies1 << " "
<< it->first.vacancies2 << " "
<< it->second.size()<< endl;
int i;
for(i=0;i<it->second.size();i++){
ofsdb << it->second[i].first << " ";
}
ofsdb << endl;
}
return true;
}
map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,
int F, int l, int m) const {
return D1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
}
PROB getProb_first_withiterator(WordIndex j, WordIndex j_cp, int m,
const map<m4_key,Vpff,compare1 >::const_iterator& p) const {
assert(j>=1);
//assert(j_cp>=0);
assert(j<=msl);
assert(j_cp<=msl);
if (p==D1.end()) {
return PROB_SMOOTH;
} else {
massert((p->second)[j-j_cp+msl].second<=1.0);
return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
}
}
PROB getProb_first(WordIndex j, WordIndex j_cp, int E, int F, int l, int m) const {
assert(j>=1);
//assert(j_cp>=0);
assert(j<=msl);
assert(j_cp<=msl);
m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
if (p==D1.end()) {
return PROB_SMOOTH;
} else {
massert((p->second)[j-j_cp+msl].second<=1.0);
return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
}
}
map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,
int F, int l, int m) const {
return Db1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
}
PROB getProb_bigger_withiterator(WordIndex j, WordIndex j_prev, int m,
const map<m4_key,Vpff,compareb1 >::const_iterator&p) const {
massert(j>=1);
massert(j_prev>=1);
massert(j>j_prev);
massert(j<=msl);
massert(j_prev<=msl);
if (p==Db1.end()) {
return PROB_SMOOTH;
} else {
massert((p->second)[j-j_prev+msl].second<=1.0 );
return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
}
}
PROB getProb_bigger(WordIndex j, WordIndex j_prev, int E, int F, int l,
int m) const {
massert(j>=1);
massert(j_prev>=1);
massert(j>j_prev);
massert(j<=msl);
massert(j_prev<=msl);
m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
if (p==Db1.end()) {
return PROB_SMOOTH;
} else {
massert((p->second)[j-j_prev+msl].second<=1.0 );
return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
}
}
void normalizeTable() {
int nParams=0;
for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
Vpff&d1=i->second;
double sum=0.0;
for (PositionIndex i=0; i<d1.size(); i++)
sum+=d1[i].first;
for (PositionIndex i=0; i<d1.size(); i++) {
d1[i].second=sum ? (d1[i].first/sum) : (1.0/d1.size());
nParams++;
}
}
for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
Vpff&db1=i->second;
double sum=0.0;
for (PositionIndex i=0; i<db1.size(); i++)
sum+=db1[i].first;
for (PositionIndex i=0; i<db1.size(); i++) {
db1[i].second=sum ? (db1[i].first/sum) : (1.0/db1.size());
nParams++;
}
}
cout << "D4 table contains " << nParams << " parameters.\n";
}
void clear() {
for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
Vpff&d1=i->second;
for (PositionIndex i=0; i<d1.size(); i++)
d1[i].first=0.0;
}
for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
Vpff&db1=i->second;
for (PositionIndex i=0; i<db1.size(); i++)
db1[i].first=0.0;
}
}
/*void printProbTable(const char*fname1,const char*fname2)
{
ofstream out(fname1);
double ssum=0.0;
out << "# Translation tables for Model 4 .\n";
out << "# Table for head of cept.\n";
for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i){
const Vpff&d1=i->second;
double sum=0.0;
for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
if ( sum ){
print1(out,i->first,ewordclasses,fwordclasses);
out << "SUM: " << sum << ' '<< '\n';
for(unsigned ii=0;ii<d1.size();ii++)
if( d1[ii].first )
out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
out << endl;
}
ssum+=sum;
}
out << "# Table for non-head of cept.\n";
for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
{
const Vpff&db1=i->second;
double sum=0.0;
for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
if( sum ){
printb1(out,i->first,ewordclasses,fwordclasses);
out << "SUM: " << sum << ' '<<'\n';
for(unsigned ii=0;ii<db1.size();ii++)
if( db1[ii].first )
{
out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
}
out << endl;
}
ssum+=sum;
}
out << endl << "FULL-SUM: " << ssum << endl;
if( M4_Dependencies==76 ){
ofstream out2(fname2);
for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
{
const Vpff&d1=i->second;
for(unsigned ii=0;ii<d1.size();ii++)
if( d1[ii].first )
out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
}
for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i) {
const Vpff&db1=i->second;
for(unsigned ii=0;ii<db1.size();ii++)
if( db1[ii].first )
out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
}
}
}*/
bool readProbTable(const char *fname) {
cerr << "Reading D4Tables from " << fname << endl;
ifstream file(fname);
string line;
do {
getline(file, line);
} while (line.length()&&line[0]=='#');
do {
while (line.length()==0)
getline(file, line);
if (line[0]=='#')
break;
Vector<string> linestr;
tokenize(line, linestr);
m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
for (unsigned int i=0; i<linestr.size(); i+=2) {
if (linestr[i]=="l:") {
k.l=atoi(linestr[i+1].c_str());
iassert(M4_Dependencies&DEP_MODEL_l);
}
if (linestr[i]=="m:") {
k.m=atoi(linestr[i+1].c_str());
iassert(M4_Dependencies&DEP_MODEL_m);
}
if (linestr[i]=="F:") {
k.F=(*fwordclasses)(linestr[i+1]);
iassert(M4_Dependencies&DEP_MODEL_F);
}
if (linestr[i]=="E:") {
k.E=(*ewordclasses)(linestr[i+1]);
iassert(M4_Dependencies&DEP_MODEL_E);
}
//if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
}
string str;
double sum;
file >> str >> sum;
iassert(str=="SUM:");
if (str!="SUM:")
cerr << "ERROR: string is " << str << " and not sum " << endl;
do {
int value;
double count;
getline(file, line);
istrstream twonumbers(line.c_str());
if (twonumbers >> value >> count) {
if (D1.count(k)==0)
D1.insert(make_pair(k, Vpff(msl*2+1, pair<COUNT, PROB>(
0.0, 0.0))));
D1[k][value+msl]=make_pair(count, count/sum);
}
} while (line.length());
} while (file);
do {
getline(file, line);
} while (line.length()&&line[0]=='#');
do {
while (line.length()==0)
getline(file, line);
if (line[0]=='#')
break;
Vector<string> linestr;
tokenize(line, linestr);
m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
bool sumRead=0;
for (unsigned int i=0; i<linestr.size(); i+=2) {
if (linestr[i]=="l:") {
k.l=atoi(linestr[i+1].c_str());
iassert(M4_Dependencies&DEP_MODELb_l);
} else if (linestr[i]=="m:") {
k.m=atoi(linestr[i+1].c_str());
iassert(M4_Dependencies&DEP_MODELb_m);
} else if (linestr[i]=="F:") {
k.F=(*fwordclasses)(linestr[i+1]);
iassert(M4_Dependencies&DEP_MODELb_F);
} else if (linestr[i]=="E:") {
k.E=(*ewordclasses)(linestr[i+1]);
iassert(M4_Dependencies&DEP_MODELb_E);
} else if (linestr[i]=="SUM:") {
cerr << "Warning: obviously no dependency.\n";
sumRead=1;
} else if (linestr[i]=="FULL-SUM:") {
break;
} else {
cerr << "ERROR: error in reading d4 tables: " << linestr[i]
<< ' ' << linestr[i+1] << endl;
}
}
string str;
double sum;
if (sumRead==0)
file >> str >> sum;
else {
str=linestr[0];
sum=atof(linestr[1].c_str());
}
if (str!="SUM:")
cerr << "ERROR: should read SUM but read " << str << endl;
do {
int value;
double count;
getline(file, line);
istrstream twonumbers(line.c_str());
if (twonumbers >> value >> count) {
if (Db1.count(k)==0)
Db1.insert(make_pair(k, Vpff(msl*2+1,
pair<COUNT, PROB>(0.0, 0.0))));
Db1[k][value+msl]=make_pair(count, count/sum);
}
} while (file&&line.length());
} while (file);
return 1;
}
};
#endif

View File

@ -0,0 +1,233 @@
/*
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef _d5tables_h_define
#define _d5tables_h_define
#include <cmath>
#include "D4Tables.h"
extern float d5modelsmooth_countoffset;
extern float d5modelsmooth_factor;
#define UNSEENPROB (1.0/vacancies_total)
class d5model
{
private:
typedef Vector < pair < COUNT,PROB > >Vpff;
map< m4_key,Vpff,compare1 > D1;
map< m4_key,Vpff,compareb1 > Db1;
public:
d4model&d4m;
WordClasses* ewordclasses;
WordClasses* fwordclasses;
template<class MAPPER>
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile
, const vcbList& elist,
const vcbList& flist)
{
ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
if( !estrm )
cerr << "ERROR: can not read classes from " << efile << endl;
else
ewordclasses->read(estrm,m1,elist);
if( !fstrm )
cerr << "ERROR: can not read classes from " << ffile << endl;
else
fwordclasses->read(fstrm,m2,flist);
}
d5model (d4model&_d4m)
:D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m),
ewordclasses(_d4m.ewordclasses),fwordclasses(_d4m.fwordclasses)
{}
COUNT &getCountRef_first (PositionIndex vacancies_j,
PositionIndex vacancies_jp, int F,
PositionIndex l, PositionIndex m,
PositionIndex vacancies_total)
{
massert(vacancies_j>0);
massert(vacancies_total>0);
//massert(vacancies_jp<=vacancies_total);
massert(vacancies_j <=vacancies_total);
massert(vacancies_total<=m);
m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
if(p==D1.end())
p=D1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
massert(p!=D1.end());
return (p->second)[vacancies_j].first;
}
COUNT &getCountRef_bigger (PositionIndex vacancies_j,
PositionIndex vacancies_jp, int F,
PositionIndex l, PositionIndex m,
PositionIndex vacancies_total)
{
massert(vacancies_j>0);
massert(vacancies_total>0);
massert (vacancies_jp <= vacancies_j);
massert (vacancies_j-vacancies_jp <= vacancies_total);
m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
if(p==Db1.end())
p=Db1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
massert(p!=Db1.end());
return (p->second)[vacancies_j - vacancies_jp].first;
}
PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp,
int F, PositionIndex l, PositionIndex m,
PositionIndex vacancies_total) const
{
massert(vacancies_j>0);
massert(vacancies_total>0);
//massert(vacancies_jp<=vacancies_total);
massert(vacancies_j <=vacancies_total);
massert(vacancies_total<=m);
m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
if( p==D1.end() )
return UNSEENPROB;
else
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second);
}
PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp,
int F, PositionIndex l, PositionIndex m,
PositionIndex vacancies_total) const
{
massert(vacancies_j>0);
massert(vacancies_total>0);
massert (vacancies_jp <= vacancies_j);
massert (vacancies_j-vacancies_jp <= vacancies_total);
m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
if(p==Db1.end())
return UNSEENPROB;
else
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second);
}
void normalizeTable ()
{
int nParams=0;
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
{
Vpff&d1=i->second;
COUNT sum=0.0;
for(PositionIndex i=0;i<d1.size();i++)
sum+=d1[i].first+d5modelsmooth_countoffset;
for(PositionIndex i=0;i<d1.size();i++)
{
d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size());
nParams++;
}
}
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
{
Vpff&db1=i->second;
double sum=0.0;
for(PositionIndex i=0;i<db1.size();i++)
sum+=db1[i].first+d5modelsmooth_countoffset;
for(PositionIndex i=0;i<db1.size();i++)
{
db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size());
nParams++;
}
}
cout << "D5 table contains " << nParams << " parameters.\n";
}
friend ostream&operator<<(ostream&out,d5model&d5m) {
out << "# Translation tables for Model 5 .\n";
out << "# Table for head of cept.\n";
for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin();i!=d5m.D1.end();++i){
const Vpff&d1=i->second;
COUNT sum=0.0;
for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
if ( sum ) {
for(unsigned ii=0;ii<d1.size();ii++)
{
print1_m5(out,i->first,*d5m.ewordclasses,*d5m.fwordclasses);
out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n';
}
out << endl;
}
}
out << "# Table for non-head of cept.\n";
for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin();i!=d5m.Db1.end();++i){
const Vpff&db1=i->second;
double sum=0.0;
for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
if( sum ){
for(unsigned ii=0;ii<db1.size();ii++){
printb1_m5(out,i->first,*d5m.fwordclasses);
out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n';
}
out << endl;
}
}
return out;
}
void readProbTable(const char*x)
{
ifstream f(x);
string l;
while(getline(f,l))
{
if(l.length()&&l[0]=='#')
continue;
istrstream is(l.c_str());
string E,F;
int v1,v2,ii;
double prob,count;
if(is>>E>>F>>v1>>v2>>ii>>prob>>count)
{
//cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl;
if( count>0 )
if( E=="-1")
getCountRef_bigger(ii,0,(*fwordclasses)(F),1000,1000,v2)+=count;
else
getCountRef_first(ii,v1,(*fwordclasses)(F),1000,1000,v2)+=count;
}
}
normalizeTable();
//ofstream of("M5FILE");
//of << (*this);
}
void clear()
{
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
{
Vpff&d1=i->second;
for(PositionIndex i=0;i<d1.size();i++)
d1[i].first=0.0;
}
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
{
Vpff&db1=i->second;
for(PositionIndex i=0;i<db1.size();i++)
db1[i].first=0.0;
}
}
};
#endif

View File

@ -0,0 +1,93 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/* Noah A. Smith
Dictionary object for dictionary filter in Model 1 training
Dictionary file must be in order (sorted) by Foreign vocab id, but English
vocab ids may be in any order.
9 August 1999
*/
#include "Dictionary.h"
#include <cstring>
Dictionary::Dictionary(const char *filename){
if(!strcmp(filename, "")){
dead = true;
return;
}
dead = false;
cout << "Reading dictionary from: " << filename << '\n';
ifstream dFile(filename);
if(!dFile){
cerr << "ERROR: Can't open dictionary: " << filename << '\n';
exit(1);
}
currindexmin = 0;
currindexmax = 0;
currval = 0;
int p, q;
while((dFile >> p >> q)){
pairs[0].push_back(p);
pairs[1].push_back(q);
}
cout << "Dictionary read; " << pairs[0].size() << " pairs loaded." << '\n';
dFile.close();
}
bool Dictionary::indict(int p, int q){
if(dead) return false;
if(p == 0 && q == 0) return false;
if(currval == p){
for(int i = currindexmin; i <= currindexmax; i++)
if(pairs[1][i] == q) return true;
return false;
}
else{
int begin = 0, end = pairs[0].size() - 1, middle = 0;
unsigned int t;
bool ret = false;
while(begin <= end){
middle = begin + ((end - begin) >> 1);
if(p < pairs[0][middle]) end = middle - 1;
else if(p > pairs[0][middle]) begin = middle + 1;
else{
break;
}
}
t = middle;
while(pairs[0][t] == p )
if(pairs[1][t--] == q) ret = true;
currindexmin = t + 1;
t = middle + 1;
while(pairs[0][t] == p && t < pairs[0].size())
if(pairs[1][t++] == q) ret = true;
currindexmax = t - 1;
currval = p;
return ret;
}
}

View File

@ -0,0 +1,48 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/* Noah A. Smith
Dictionary object for dictionary filter in Model 1 training
9 August 1999
*/
#include <iostream>
#include <fstream>
#include "Vector.h"
#ifndef DICTIONARY_H
#define DICTIONARY_H
class Dictionary{
private:
Vector<int> pairs[2];
int currval;
int currindexmin;
int currindexmax;
bool dead;
public:
Dictionary(const char *);
bool indict(int, int);
};
#endif

View File

@ -0,0 +1,58 @@
/*
Copyright (C) 1988,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef CLASS_FlexArray_defined
#define CLASS_FlexArray_defined
#include "Array.h"
#include <iostream>
#include <fstream>
template<class T>
class FlexArray
{
private:
Array<T> p;
int start,End;
public:
FlexArray(int _start=0,int _end=-1)
: p(_end-_start+1),start(_start),End(_end) {}
FlexArray(int _start,int _end,const T&init)
: p(_end-_start+1,init),start(_start),End(_end) {}
T&operator[](int i)
{return p[i-start];}
const T&operator[](int i)const
{return p[i-start];}
int low()const{return start;}
int high()const{return End;}
T*begin(){return conv<double>(p.begin());}
T*end(){return conv<double>(p.end());}
};
template<class T>
inline ostream&operator<<(ostream&out,const FlexArray<T>&x)
{
for(int i=x.low();i<=x.high();++i)
out << i << ':' << x[i] << ';' << ' ';
return out;
}
#endif

View File

@ -0,0 +1,240 @@
/*
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef NO_TRAINING
#include "ForwardBackward.h"
#include "Globals.h"
#include "myassert.h"
#include "HMMTables.h"
#include "mymath.h"
double ForwardBackwardTraining(const HMMNetwork&net, Array<double>&g, Array<
Array2<double> >&E) {
const int I = net.size1(), J = net.size2(), N = I * J;
Array<double> alpha(N, 0), beta(N, 0), sum(J);
for (int i = 0; i < I; i++)
beta[N - I + i] = net.getBetainit(i);
double * cur_beta = conv<double> (beta.begin()) + N - I - 1;
for (int j = J - 2; j >= 0; --j)
for (int ti = I - 1; ti >= 0; --ti, --cur_beta) {
const double *next_beta = conv<double> (beta.begin()) + (j + 1) * I;
const double *alprob = &net.outProb(j, ti, 0), *next_node =
&net.nodeProb(0, j + 1);
for (int ni = 0; ni < I; ++ni, (next_node += J)) {
massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
massert(next_node == &net.nodeProb(ni,j+1));
/* if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
<< next_beta-beta.begin() << ") alprob:" << (*alprob) << " lexprob:" << (*next_node) << endl;*/
(*cur_beta) += (*next_beta++) * (*alprob++) * (*next_node);
}
}
for (int i = 0; i < I; i++)
alpha[i] = net.getAlphainit(i) * net.nodeProb(i, 0);
double* cur_alpha = conv<double> (alpha.begin()) + I;
cur_beta = conv<double> (beta.begin()) + I;
for (int j = 1; j < J; j++) {
Array2<double>&e = E[(E.size() == 1) ? 0 : (j - 1)];
if ((E.size() != 1) || j == 1) {
e.resize(I, I);
fill(e.begin(), e.end(), 0.0);
}
for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_beta) {
const double * prev_alpha = conv<double> (alpha.begin()) + I * (j
- 1);
double *cur_e = &e(ti, 0);
double this_node = net.nodeProb(ti, j);
const double* alprob = &net.outProb(j - 1, 0, ti);
for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) {
massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
massert(&e(ti,pi)==cur_e);
const double alpha_increment = *prev_alpha * (*alprob)
* this_node;
(*cur_alpha) += alpha_increment;
(*cur_e++) += alpha_increment * (*cur_beta);
}
}
}
g.resize(N);
transform(alpha.begin(), alpha.end(), beta.begin(), g.begin(), multiplies<
double> ());
double bsum = 0, esum = 0, esum2;
for (int i = 0; i < I; i++)
bsum += beta[i] * net.nodeProb(i, 0) * net.getAlphainit(i);
for (unsigned int j = 0; j < (unsigned int) E.size(); j++) {
Array2<double>&e = E[j];
const double *epe = e.end();
for (const double*ep = e.begin(); ep != epe; ++ep)
esum += *ep;
}
if (J > 1)
esum2 = esum / (J - 1);
else
esum2 = 0.0;
if (!(esum2 == 0.0 || mfabs(esum2 - bsum) / bsum < 1e-3 * I))
cout << "ERROR2: " << esum2 << " " << bsum << " " << esum << net
<< endl;
double * sumptr = conv<double> (sum.begin());
double* ge = conv<double> (g.end());
for (double* gp = conv<double> (g.begin()); gp != ge; gp += I) {
*sumptr++ = normalize_if_possible(gp, gp + I);
if (bsum && !(mfabs((*(sumptr - 1) - bsum) / bsum) < 1e-3 * I))
cout << "ERROR: " << *(sumptr - 1) << " " << bsum << " " << mfabs(
(*(sumptr - 1) - bsum) / bsum) << ' ' << I << ' ' << J
<< endl;
}
for (unsigned int j = 0; j < (unsigned int) E.size(); j++) {
Array2<double>&e = E[j];
double* epe = e.end();
if (esum)
for (double*ep = e.begin(); ep != epe; ++ep)
*ep /= esum;
else
for (double*ep = e.begin(); ep != epe; ++ep)
*ep /= 1.0 / (max(I * I, I * I * (J - 1)));
}
if (sum.size())
return sum[0];
else
return 1.0;
}
void HMMViterbi(const HMMNetwork&net, Array<int>&vit) {
const int I = net.size1(), J = net.size2();
vit.resize(J);
Array<double> g;
Array<Array2<double> > e(1);
ForwardBackwardTraining(net, g, e);
for (int j = 0; j < J; j++) {
double * begin = conv<double> (g.begin()) + I * j;
vit[j] = max_element(begin, begin + I) - begin;
}
}
void HMMViterbi(const HMMNetwork&net, Array<double>&g, Array<int>&vit) {
const int I = net.size1(), J = net.size2();
vit.resize(J);
for (int j = 0; j < J; j++) {
double* begin = conv<double> (g.begin()) + I * j;
vit[j] = max_element(begin, begin + I) - begin;
}
}
double HMMRealViterbi(const HMMNetwork&net, Array<int>&vitar, int pegi,
int pegj, bool verbose) {
const int I = net.size1(), J = net.size2(), N = I * J;
Array<double> alpha(N, -1);
Array<double*> bp(N, (double*) 0);
vitar.resize(J);
if (J == 0)
return 1.0;
for (int i = 0; i < I; i++) {
alpha[i] = net.getAlphainit(i) * net.nodeProb(i, 0);
if (i > I / 2)
alpha[i] = 0; // only first empty word can be chosen
bp[i] = 0;
}
double *cur_alpha = conv<double> (alpha.begin()) + I;
double **cur_bp = conv<double*> (bp.begin()) + I;
for (int j = 1; j < J; j++) {
if (pegj + 1 == j)
for (int ti = 0; ti < I; ti++)
if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2))
(cur_alpha - I)[ti] = 0.0;
for (int ti = 0; ti < I; ++ti, ++cur_alpha, ++cur_bp) {
double* prev_alpha = conv<double> (alpha.begin()) + I * (j - 1);
double this_node = net.nodeProb(ti, j);
const double *alprob = &net.outProb(j - 1, 0, ti);
for (int pi = 0; pi < I; ++pi, ++prev_alpha, (alprob += I)) {
massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
const double alpha_increment = *prev_alpha * (*alprob)
* this_node;
if (alpha_increment > *cur_alpha) {
(*cur_alpha) = alpha_increment;
(*cur_bp) = prev_alpha;
}
}
}
}
for (int i = 0; i < I; i++)
alpha[N - I + i] *= net.getBetainit(i);
if (pegj == J - 1)
for (int ti = 0; ti < I; ti++)
if ((pegi != -1 && ti != pegi) || (pegi == -1 && ti < I / 2))
(alpha)[N - I + ti] = 0.0;
int j = J - 1;
cur_alpha = conv<double> (alpha.begin()) + j * I;
vitar[J - 1] = max_element(cur_alpha, cur_alpha + I) - cur_alpha;
double ret = *max_element(cur_alpha, cur_alpha + I);
while (bp[vitar[j] + j * I]) {
cur_alpha -= I;
vitar[j - 1] = bp[vitar[j] + j * I] - cur_alpha;
massert(vitar[j-1]<I&&vitar[j-1]>=0);
j--;
}
massert(j==0);
if (verbose) {
cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
for (int j = 0; j < J; j++)
cout << "NP " << net.nodeProb(vitar[j], j) << ' ' << "AP " << ((j
== 0) ? net.getAlphainit(vitar[j]) : net.outProb(j - 1,
vitar[j - 1], vitar[j])) << " j:" << j << " i:" << vitar[j]
<< "; ";
cout << endl;
}
return ret;
}
double MaximumTraining(const HMMNetwork&net, Array<double>&g, Array<Array2<
double> >&E) {
Array<int> vitar;
double ret = HMMRealViterbi(net, vitar);
const int I = net.size1(), J = net.size2();
if (E.size() == 1) {
Array2<double>&e = E[0];
e.resize(I, I);
g.resize(I * J);
fill(g.begin(), g.end(), 0.0);
fill(e.begin(), e.end(), 0.0);
for (int i = 0; i < J; ++i) {
g[i * I + vitar[i]] = 1.0;
if (i > 0)
e(vitar[i], vitar[i - 1])++;
}
} else {
g.resize(I * J);
fill(g.begin(), g.end(), 0.0);
for (int i = 0; i < J; ++i) {
g[i * I + vitar[i]] = 1.0;
if (i > 0) {
Array2<double>&e = E[i - 1];
e.resize(I, I);
fill(e.begin(), e.end(), 0.0);
e(vitar[i], vitar[i - 1])++;
}
}
}
return ret;
}
#endif

View File

@ -0,0 +1,62 @@
/*
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef NO_EM_MARKOF_ZEUGS_DEFINED
#define NO_EM_MARKOF_ZEUGS_DEFINED
#ifndef NO_TRAINING
#include "myassert.h"
#include "Array.h"
#include "Array2.h"
class HMMNetwork
{
public:
int as,bs;
Array2<double> n;
Array<Array2<double> > e;
Array<double> alphainit;
Array<double> betainit;
int ab;
double finalMultiply;
HMMNetwork(int I,int J)
: as(I),bs(J),n(as,bs),/*e(as,as,0.0),*/e(0),alphainit(as,1.0/as),betainit(as,1.0),ab(as*bs),finalMultiply(1.0)
{}
double getAlphainit(int i)const{return alphainit[i];}
double getBetainit(int i)const{return betainit[i];}
inline int size1()const{return as;}
inline int size2()const{return bs;}
inline const double&nodeProb(int i,int j)const
{return n(i,j);}
inline const double&outProb(int j,int i1,int i2)const
{/*massert(e[min(int(e.size())-1,j)](i1,i2) );*/ return e[min(int(e.size())-1,j)](i1,i2);}
friend ostream&operator<<(ostream&out,const HMMNetwork&x)
{
return out <<"N: \n"<< x.n << endl << "E: \n" << x.e << "A:\n" << x.alphainit << "B:\n" << x.betainit << endl;
}
};
double ForwardBackwardTraining(const HMMNetwork&mc,Array<double>&gamma,Array<Array2<double> >&epsilon);
void HMMViterbi(const HMMNetwork&mc,Array<int>&vit);
double HMMRealViterbi(const HMMNetwork&net,Array<int>&vit,int pegi=-1,int pegj=-1,bool verbose=0);
double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&e);
void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit);
#endif
#endif

View File

@ -0,0 +1,75 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef Globals_asdf_defined
#define Globals_asdf_defined
#include <string>
#include <fstream>
#include <map>
#include <syncObj.h>
#include "defs.h"
#include "Vector.h"
extern float PROB_SMOOTH,MINCOUNTINCREASE;
extern bool Verbose, Log, Peg, Transfer, Transfer2to3, useDict ;
extern string Prefix, LogFilename, OPath,
SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename,
t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
extern ofstream logmsg ;
extern Mutex logmsg_lock;
extern double M5P0,P0 ;
extern bool NODUMPS, FEWDUMPS ;
extern string Usage ;
extern unsigned int MAX_SENTENCE_LENGTH ;
extern int PegUntil;
extern short DeficientDistortionForEmptyWord;
extern int M4_Dependencies;
extern int M5_Dependencies;
extern short OutputInAachenFormat;
#define DEP_MODEL_l 1
#define DEP_MODEL_m 2
#define DEP_MODEL_F 4
#define DEP_MODEL_E 8
#define DEP_MODELb_l 16
#define DEP_MODELb_m 32
#define DEP_MODELb_F 64
#define DEP_MODELb_E 128
#define DEP_SUM 256
class vcbList;
extern vcbList *globeTrainVcbList, *globfTrainVcbList;
extern short PredictionInAlignments;
extern short SmoothHMM;
#define VERB Verbose
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,const Vector<WordIndex>&test,int l,int&missing,int&toomuch,int&eventsMissing,int&eventsToomuch,int);
extern Vector<map< pair<int,int>,char > > ReferenceAlignment;
void printGIZAPars(ostream&out);
#endif

View File

@ -0,0 +1,512 @@
/*
Copyright (C) 1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "HMMTables.h"
#include <fstream>
#include <sstream>
#include "Globals.h"
#include "Parameter.h"
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
MAPPERCLASSTOSTRING>::writeJumps(ostream&out) const {
double ssum=0.0;
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
alProb.begin(); i!=alProb.end(); ++i) {
double sum=0.0;
out << "\n\nDistribution for: ";
printAlDeps(out, i->first, *mapper1, *mapper2);
out << ' ';
for (int a=i->second.low(); a<=i->second.high(); ++a)
if (i->second[a]) {
out << a << ':' << i->second[a] << ';' << ' ';
sum+=i->second[a];
}
out << '\n' << '\n';
out << "SUM: " << sum << '\n';
ssum+=sum;
}
out << "FULL-SUM: " << ssum << '\n';
}
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
MAPPERCLASSTOSTRING>::readJumps(istream&) {
}
template<class CLS, class MAPPERCLASSTOSTRING> double HMMTables<CLS,
MAPPERCLASSTOSTRING>::getAlProb(int istrich, int k, int sentLength,
int J, CLS w1, CLS w2, int j, int iter) const {
massert(k<sentLength&&k>=0);
massert(istrich<sentLength&&istrich>=-1);
int pos=istrich-k;
switch (PredictionInAlignments) {
case 0:
pos=istrich-k;
break;
case 1:
pos=k;
break;
case 2:
pos=(k*J-j*sentLength);
if (pos>0)
pos+=J/2;
else
pos-=J/2;
pos/=J;
break;
default:
abort();
}
lock.lock();
typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator p=
alProb.find(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
if (p!=alProb.end() ) {
lock.unlock();
return (p->second)[pos];
} else {
if (iter>0&&iter<5000)
cout << "WARNING: Not found: " << ' ' << J << ' ' << sentLength
<< '\n';;
lock.unlock();
return 1.0/(2*sentLength-1);
}
lock.unlock();
}
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
MAPPERCLASSTOSTRING>::addAlCount(int istrich, int k, int sentLength,
int J, CLS w1, CLS w2, int j, double value, double valuePredicted) {
int pos=istrich-k;
switch (PredictionInAlignments) {
case 0:
pos=istrich-k;
break;
case 1:
pos=k;
break;
case 2:
pos=(k*J-j*sentLength);
if (pos>0)
pos+=J/2;
else
pos-=J/2;
pos/=J;
break;
default:
abort();
}
AlDeps<CLS> deps(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
{
lock.lock();
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
alProb.find(deps);
if (p==alProb.end() ) {
if ( (CompareAlDeps&1)==0)
p
=alProb.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
else
p=alProb.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
}
p->second[pos]+=value;
lock.unlock();
}
if (valuePredicted) {
lock.lock();
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
alProbPredicted.find(deps);
if (p==alProbPredicted.end() ) {
if ( (CompareAlDeps&1)==0)
p
=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
else
p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
}
p->second[pos]+=valuePredicted;
lock.unlock();
}
}
template<class CLS, class MAPPERCLASSTOSTRING>
pair<Array<double>,Mutex>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetAlphaInit(int I)
{
alphalock.lock();
if( !init_alpha.count(I) ){
init_alpha[I]=pair<Array<double>,Mutex>(Array<double>(I,0),Mutex());
}
pair<Array<double>,Mutex>& ret = init_alpha[I];
alphalock.unlock();
return ret;
}
template<class CLS, class MAPPERCLASSTOSTRING>
pair<Array<double>,Mutex>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetBetaInit(int I)
{
betalock.lock();
if( !init_beta.count(I) ){
init_beta[I]=pair<Array<double>,Mutex>(Array<double>(I,0),Mutex());
}
pair<Array<double>,Mutex>& ret = init_beta[I];
betalock.unlock();
return ret;
}
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
MAPPERCLASSTOSTRING>::getAlphaInit(int I, Array<double>&x) const {
alphalock.lock();
hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=init_alpha.find(I);
if (i==init_alpha.end() ){
alphalock.unlock();
return 0;
}
else {
x=i->second.first;
alphalock.unlock();
for (unsigned int j=x.size()/2+1; j<x.size(); ++j)
// only first empty word can be chosen
x[j]=0;
return 1;
}
alphalock.unlock();
}
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
MAPPERCLASSTOSTRING>::getBetaInit(int I, Array<double>&x) const {
betalock.lock();
hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=init_beta.find(I);
if (i==init_beta.end() ){
betalock.unlock();
return 0;
}
else {
x=i->second.first;
betalock.unlock();
return 1;
}
betalock.unlock();
}
/***********************************
By Edward Gao
************************************/
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
MAPPERCLASSTOSTRING>::writeJumps(const char* alprob,
const char* alpredict, const char* alpha, const char* beta) const {
if (alprob) {
ofstream ofs(alprob);
if (!ofs.is_open()) {
cerr << "Cannot open file for HMM output " << alprob << endl;
return false;
}
cerr << "Dumping HMM table to " << alprob << endl;
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
alProb.begin(); i!=alProb.end(); ++i) {
double sum=0.0;
ofs <<i->first.englishSentenceLength << " "
<< i->first.classPrevious << " " << i->first.previous
<< " " << i->first.j << " " << i->first.Cj <<" "
<< i->second.low() <<" " << i->second.high()<< " ";
for (int a=i->second.low(); a<=i->second.high(); ++a)
if (i->second[a]) {
ofs << a << ' ' << i->second[a] << ' ';
sum+=i->second[a];
}
ofs << endl;
}
ofs.close();
}
if (alpredict) {
ofstream ofs(alpredict);
if (!ofs.is_open()) {
cerr << "Cannot open file for HMM output " << alpredict << endl;
return false;
}
cerr << "Dumping HMM table to " << alpredict << endl;
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
alProbPredicted.begin(); i!=alProbPredicted.end(); ++i) {
double sum=0.0;
ofs << i->first.englishSentenceLength << " "
<< i->first.classPrevious << " " << i->first.previous
<< " " << i->first.j << " " << i->first.Cj <<" "
<< i->second.low() <<" " << i->second.high()<< " ";
for (int a=i->second.low(); a<=i->second.high(); ++a)
if (i->second[a]) {
ofs << a << ' ' << i->second[a] << ' ';
sum+=i->second[a];
}
ofs << endl;
}
ofs.close();
}
if (alpha) {
ofstream ofs(alpha);
if (!ofs.is_open()) {
cerr << "Cannot open file for HMM output " << alpha << endl;
return false;
}
cerr << "Dumping HMM table to " << alpha << endl;
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
init_alpha.begin(); i!=init_alpha.end(); i++) {
ofs << i->first << " " << i->second.first.size() <<" ";
int j;
for (j=0; j<i->second.first.size(); j++) {
ofs << i->second.first[j] << " ";
}
ofs<<endl;
}
ofs.close();
}
if (beta) {
ofstream ofs(beta);
if (!ofs.is_open()) {
cerr << "Cannot open file for HMM output " << beta << endl;
return false;
}
cerr << "Dumping HMM table to " << beta << endl;
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
init_beta.begin(); i!=init_beta.end(); i++) {
ofs << i->first << " " << i->second.first.size() << " ";
int j;
for (j=0; j<i->second.first.size(); j++) {
ofs << i->second.first[j] << " ";
}
ofs << endl;
}
ofs.close();
}
return true;
}
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
MAPPERCLASSTOSTRING>::readJumps(const char* alprob,
const char* alpredict, const char* alpha, const char* beta) {
if (alprob) {
ifstream ifs(alprob);
if (!ifs.is_open()) {
cerr << "Cannot open file for HMM input " << alprob << endl;
return false;
}
cerr << "Reading HMM table from " << alprob << endl;
string strLine="";
bool expect_data = false;
while (!ifs.eof()) {
strLine = "";
getline(ifs, strLine);
if (strLine.length()) {
stringstream ss(strLine.c_str());
AlDeps<CLS> dep;
int low, high;
ss >> dep.englishSentenceLength >> dep.classPrevious
>> dep.previous >> dep.j >> dep.Cj >> low >> high;
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
alProb.find(dep);
if (p==alProb.end() ) {
p=alProb.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
}
int pos;
double val;
while (!ss.eof()) {
pos = low-1;
val = 0;
ss >> pos >> val;
if (pos>low-1) {
p->second[pos]+=val;
}
}
}
}
}
if (alpredict) {
ifstream ifs(alpredict);
if (!ifs.is_open()) {
cerr << "Cannot open file for HMM input " << alpredict << endl;
return false;
}
cerr << "Reading HMM table from " << alpredict << endl;
string strLine="";
bool expect_data = false;
while (!ifs.eof()) {
strLine = "";
getline(ifs, strLine);
if (strLine.length()) {
stringstream ss(strLine.c_str());
AlDeps<CLS> dep;
int low, high;
ss >> dep.englishSentenceLength >> dep.classPrevious
>> dep.previous >> dep.j >> dep.Cj >> low >> high;
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
alProbPredicted.find(dep);
if (p==alProbPredicted.end() ) {
p=alProbPredicted.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
}
int pos;
double val;
while (!ss.eof()) {
pos = low-1;
val = 0;
ss >> pos >> val;
if (pos>low-1) {
p->second[pos]+=val;
}
}
}
}
}
if (alpha) {
ifstream ifs(alpha);
if (!ifs.is_open()) {
cerr << "Cannot open file for HMM input " << alpha << endl;
return false;
}
string strLine="";
bool expect_data = false;
while (!ifs.eof()) {
strLine = "";
getline(ifs, strLine);
if (strLine.length()) {
stringstream ss(strLine.c_str());
int id = -1, size = -1;
ss >> id >> size;
if (id<0||size<0||id!=size) {
cerr << "Mismatch in alpha init table!" << endl;
return false;
}
pair<Array<double>, Mutex>&alp = doGetAlphaInit(id);
Array<double>& gk = alp.first;
int j;
double v;
alp.second.lock();
for (j=0; j<gk.size(); j++) {
ss >> v;
gk[j]+=v;
}
alp.second.unlock();
}
}
}
if (beta) {
ifstream ifs(beta);
if (!ifs.is_open()) {
cerr << "Cannot open file for HMM input " << beta << endl;
return false;
}
string strLine="";
bool expect_data = false;
while (!ifs.eof()) {
strLine = "";
getline(ifs, strLine);
if (strLine.length()) {
stringstream ss(strLine.c_str());
int id = -1, size = -1;
ss >> id >> size;
if (id<0||size<0||id!=size) {
cerr << "Mismatch in alpha init table!" << endl;
return false;
}
pair<Array<double>, Mutex>&bet1 = doGetBetaInit(id);
Array<double>&bet = bet1.first;
int j;
double v;
bet1.second.lock();
for (j=0; j<bet.size(); j++) {
ss >> v;
bet[j]+=v;
}
bet1.second.unlock();
}
}
}
return true;
}
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
MAPPERCLASSTOSTRING>::merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht) {
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
ht.alProb.begin(); i!=ht.alProb.end(); ++i) {
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
alProb.find(i->first);
if (p==alProb.end() ) {
p=alProb.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
}
for (int a=i->second.low(); a<=i->second.high(); ++a)
if (i->second[a]) {
p->second[a] += i->second[a];
}
}
for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
ht.alProbPredicted.begin(); i!=ht.alProbPredicted.end(); ++i) {
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
alProbPredicted.find(i->first);
if (p==alProbPredicted.end() ) {
p=alProbPredicted.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
}
for (int a=i->second.low(); a<=i->second.high(); ++a)
if (i->second[a]) {
p->second[a] += i->second[a];
}
}
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
ht.init_alpha.begin(); i!=ht.init_alpha.end(); i++) {
pair<Array<double>,Mutex> alp = doGetAlphaInit(i->first);
int j;
double v;
for (j=0; j<alp.first.size(); j++) {
alp.first[j]+=i->second.first[j];
}
}
for (typename hash_map<int,pair<Array<double>,Mutex> >::const_iterator i=
ht.init_beta.begin(); i!=ht.init_beta.end(); i++) {
pair<Array<double>,Mutex>&alp = doGetBetaInit(i->first);
int j;
double v;
for (j=0; j<alp.first.size(); j++) {
alp.first[j]+=i->second.first[j];
}
}
return true;
}
//////////////////////////////////////
template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
MAPPERCLASSTOSTRING>::HMMTables(double _probForEmpty,
const MAPPERCLASSTOSTRING&m1, const MAPPERCLASSTOSTRING&m2) :
probabilityForEmpty(mfabs(_probForEmpty)),
updateProbabilityForEmpty(_probForEmpty<0.0), mapper1(&m1),
mapper2(&m2) {
}
template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
MAPPERCLASSTOSTRING>::~HMMTables() {
}

View File

@ -0,0 +1,179 @@
/*
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef HMM_TABLES_H_ASDF_DEFINED
#define HMM_TABLES_H_ASDF_DEFINED
#include "FlexArray.h"
#if __GNUC__>2
#include <ext/hash_map>
using __gnu_cxx::hash_map;
#else
#include <hash_map>
#endif
#include "Array.h"
#include <map>
#include "mymath.h"
#include "syncObj.h"
template<class T>
T normalize_if_possible(T*a,T*b){
T sum=0;
for(T*i=a;i!=b;++i)
sum+=*i;
if( sum )
for(T*i=a;i!=b;++i)
*i/=sum;
else
fill(a,b,1.0/(b-a));
return sum;
}
extern short CompareAlDeps;
template<class CLS>
class AlDeps{
public:
int englishSentenceLength;
CLS classPrevious;
int previous;
int j;
CLS Cj;
AlDeps(){};
AlDeps(int l,int p=0,int _j=0,CLS s1=0,CLS _Cj=0)
: englishSentenceLength(l),classPrevious(s1),previous(p),j(_j),Cj(_Cj)
{}
friend bool operator<(const AlDeps&x,const AlDeps&y){
if( (CompareAlDeps&1) && x.englishSentenceLength<y.englishSentenceLength ) return 1;
if( (CompareAlDeps&1) && y.englishSentenceLength<x.englishSentenceLength ) return 0;
if( (CompareAlDeps&2) && x.classPrevious<y.classPrevious ) return 1;
if( (CompareAlDeps&2) && y.classPrevious<x.classPrevious ) return 0;
if( (CompareAlDeps&4) && x.previous<y.previous ) return 1;
if( (CompareAlDeps&4) && y.previous<x.previous ) return 0;
if( (CompareAlDeps&8) && x.j<y.j ) return 1;
if( (CompareAlDeps&8) && y.j<x.j ) return 0;
if( (CompareAlDeps&16) && x.Cj<y.Cj ) return 1;
if( (CompareAlDeps&16) && y.Cj<x.Cj ) return 0;
return 0;
}
friend bool operator==(const AlDeps&x,const AlDeps&y)
{ return !( x<y || y<x ); }
};
template<class CLS>
class Hash_AlDeps{
public:
unsigned
int
operator()
(const AlDeps<CLS>&x)
const
{
unsigned int hash=0;
if( (CompareAlDeps&1) ) { hash=hash+x.englishSentenceLength;hash*=31;}
if( (CompareAlDeps&2) ) { hash=hash+x.classPrevious;hash*=31;}
if( (CompareAlDeps&4) ) { hash=hash+x.previous;hash*=31;}
if( (CompareAlDeps&8) ) { hash=hash+x.j;hash*=31;}
if( (CompareAlDeps&16) ) { hash=hash+x.Cj;hash*=31;}
return hash;
}
};
template<class CLS,class MAPPERCLASSTOSTRING>
class HMMTables
{
Mutex lock;
Mutex alphalock,betalock;
public:
double probabilityForEmpty;
bool updateProbabilityForEmpty;
hash_map<int, pair<Array<double>,Mutex> > init_alpha;
hash_map<int, pair<Array<double>,Mutex> > init_beta;
map<AlDeps<CLS>,FlexArray<double> > alProb;
map<AlDeps<CLS>,FlexArray<double> > alProbPredicted;
int globalCounter;
double divSum;
double p0_count,np0_count;
const MAPPERCLASSTOSTRING*mapper1;
const MAPPERCLASSTOSTRING*mapper2;
public:
bool merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht);
const HMMTables<CLS,MAPPERCLASSTOSTRING>*getThis()const {return this;}
HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2);
virtual ~HMMTables();
virtual double getAlProb(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter=0) const;
virtual void writeJumps(ostream&) const;
/**By Edward Gao, write out all things needed to rebuild the count table*/
virtual bool writeJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta )const;
virtual bool readJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta );
void addAlCount(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted);
virtual void readJumps(istream&);
virtual bool getAlphaInit(int I,Array<double>&x)const;
virtual bool getBetaInit(int I,Array<double> &x)const;
pair<Array<double>, Mutex> &doGetAlphaInit(int I);
pair<Array<double>, Mutex> &doGetBetaInit(int I);
virtual double getProbabilityForEmpty()const
{return probabilityForEmpty;}
void performGISIteration(const HMMTables<CLS,MAPPERCLASSTOSTRING>*old){
cout << "OLDSIZE: " << (old?(old->alProb.size()):0) << " NEWSIZE:"<< alProb.size()<< endl;
for(typename map<AlDeps<CLS>,FlexArray<double> >::iterator i=alProb.begin();i!=alProb.end();++i) {
if( alProbPredicted.count(i->first)){
normalize_if_possible(i->second.begin(),i->second.end());
normalize_if_possible(alProbPredicted[i->first].begin(),alProbPredicted[i->first].end());
for(int j=i->second.low();j<=i->second.high();++j){
if( i->second[j] )
if(alProbPredicted[i->first][j]>0.0 )
{
double op=1.0;
if( old && old->alProb.count(i->first) )
op=(old->alProb.find(i->first)->second)[j];
//cerr << "GIS: " << j << ' ' << " OLD:"
// << op << "*true:"
// << i->second[j] << "/pred:" << alProbPredicted[i->first][j] << " -> ";
i->second[j]= op*(i->second[j]/alProbPredicted[i->first][j]);
//cerr << i->second[j] << endl;
}
else{
cerr << "ERROR2 in performGISiteration: " << i->second[j] << endl;
}
}
}
else
cerr << "ERROR in performGISIteration: " << alProbPredicted.count(i->first) << endl;
}
}
};
template<class CLS,class MAPPERCLASSTOSTRING>
inline void printAlDeps(ostream&out,const AlDeps<CLS>&x,const MAPPERCLASSTOSTRING&mapper1,const MAPPERCLASSTOSTRING&mapper2)
{
if( (CompareAlDeps&1) ) out << "sentenceLength: " << x.englishSentenceLength<< ' ';
if( (CompareAlDeps&2) ) out << "previousClass: " << mapper1.classString(x.classPrevious) << ' ';
if( (CompareAlDeps&4) ) out << "previousPosition: " << x.previous << ' ';
if( (CompareAlDeps&8) ) out << "FrenchPosition: " << x.j << ' ';
if( (CompareAlDeps&16) ) out << "FrenchClass: " << mapper2.classString(x.Cj) << ' ';
//out << '\n';
}
#endif

View File

@ -0,0 +1,217 @@
## Process this file with automake to produce Makefile.in
## Created by Anjuta
INCLUDES = \
-DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
-DPACKAGE_SRC_DIR=\""$(srcdir)"\" \
-DPACKAGE_DATA_DIR=\""$(datadir)"\"
AM_CFLAGS =\
-Wall\
-g
bin_PROGRAMS = mgiza \
snt2cooc\
snt2plain\
plain2snt \
symal \
hmmnorm \
d4norm
d4norm_SOURCES = \
d4norm.cxx
d4norm_LDADD = \
-lgiza \
-lpthread
d4norm_LDFLAGS = \
-L.
d4norm_DEPENDENCIES = \
libgiza.a
d4norm_CXXFLAGS = \
-MT \
-MD \
-MP \
-MF \
-O6
d4norm_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG
hmmnorm_SOURCES = \
hmmnorm.cxx
hmmnorm_LDADD = \
-lgiza \
-lpthread
hmmnorm_LDFLAGS = \
-L.
hmmnorm_DEPENDENCIES = \
libgiza.a
hmmnorm_CXXFLAGS = \
-MT \
-MD \
-MP \
-MF \
-O6
hmmnorm_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG
symal_SOURCES = \
cmd.c \
cmd.h \
symal.cpp
plain2snt_SOURCES = \
plain2snt.cpp
snt2plain_SOURCES = \
snt2plain.cpp
snt2cooc_SOURCES = \
snt2cooc.cpp
snt2cooc_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG
mgiza_SOURCES = \
main.cpp
mgiza_DEPENDENCIES = \
libgiza.a
mgiza_CXXFLAGS = \
-MT \
-MD \
-MP \
-MF \
-O6
mgiza_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG
mgiza_LDFLAGS = \
-L.
mgiza_LDADD = \
-lgiza \
-lpthread
lib_LIBRARIES = \
libgiza.a
libgiza_a_SOURCES = \
alignment.cpp\
alignment.h \
AlignTables.cpp \
AlignTables.h \
Array.h \
Array2.h \
Array4.h \
ATables.cpp \
ATables.h \
collCounts.cpp \
collCounts.h \
common.h \
D4Tables.h \
D5Tables.h \
defs.h \
Dictionary.cpp \
Dictionary.h \
file_spec.h \
FlexArray.h \
ForwardBackward.cpp \
ForwardBackward.h \
getSentence.cpp \
getSentence.h \
Globals.h \
hmm.cpp \
hmm.h \
HMMTables.cpp \
HMMTables.h \
logprob.cpp \
logprob.h \
model1.cpp \
model1.h \
model2.cpp \
model2.h \
model2to3.cpp \
model3.cpp \
model3.h \
model3_viterbi.cpp \
model3_viterbi_with_tricks.cpp \
model345-peg.cpp \
MoveSwapMatrix.cpp \
MoveSwapMatrix.h \
myassert.cpp \
myassert.h \
mymath.h \
mystl.h \
NTables.cpp \
NTables.h \
Parameter.cpp \
Parameter.h \
parse.cpp \
Perplexity.cpp \
Perplexity.h \
Pointer.h \
reports.cpp \
SetArray.cpp \
SetArray.h \
syncObj.h \
transpair_model1.h \
transpair_model2.h \
transpair_model3.cpp \
transpair_model3.h \
transpair_model4.cpp \
transpair_model4.h \
transpair_model5.cpp \
transpair_model5.h \
transpair_modelhmm.h \
ttableDiff.hpp \
TTables.cpp \
TTables.h \
types.h \
utility.cpp \
utility.h \
Vector.h \
vocab.cpp \
vocab.h \
WordClasses.h
libgiza_a_CXXFLAGS = \
-MD \
-MP \
-MF \
-MT \
-O6
libgiza_a_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG
SUBDIRS = \
mkcls

View File

@ -0,0 +1,214 @@
## Process this file with automake to produce Makefile.in
## Created by Anjuta
INCLUDES = \
-DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
-DPACKAGE_SRC_DIR=\""$(srcdir)"\" \
-DPACKAGE_DATA_DIR=\""$(datadir)"\"
AM_CFLAGS =\
-Wall\
-g
bin_PROGRAMS = mgiza \
snt2cooc\
snt2plain\
plain2snt \
symal \
hmmnorm \
d4norm
d4norm_SOURCES = \
d4norm.cxx
d4norm_LDADD = \
-lgiza \
-lpthread
d4norm_LDFLAGS = \
-L.
d4norm_DEPENDENCIES = \
libgiza.a
d4norm_CXXFLAGS = \
-MT \
-MD \
-MP \
-MF \
-O6
d4norm_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG
hmmnorm_SOURCES = \
hmmnorm.cxx
hmmnorm_LDADD = \
-lgiza \
-lpthread
hmmnorm_LDFLAGS = \
-L.
hmmnorm_DEPENDENCIES = \
libgiza.a
hmmnorm_CXXFLAGS = \
-MT \
-MD \
-MP \
-MF \
-O6
hmmnorm_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG
symal_SOURCES = \
cmd.c \
cmd.h \
symal.cpp
plain2snt_SOURCES = \
plain2snt.cpp
snt2plain_SOURCES = \
snt2plain.cpp
snt2cooc_SOURCES = \
snt2cooc.cpp
snt2cooc_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG
mgiza_SOURCES = \
main.cpp
mgiza_DEPENDENCIES = \
libgiza.a
mgiza_CXXFLAGS = \
-MT \
-MD \
-MP \
-MF \
-O6
mgiza_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG
mgiza_LDFLAGS = \
-L.
mgiza_LDADD = \
-lgiza \
-lpthread
lib_LIBRARIES = \
libgiza.a
libgiza_a_SOURCES = \
alignment.cpp\
alignment.h \
AlignTables.cpp \
AlignTables.h \
Array.h \
Array2.h \
Array4.h \
ATables.cpp \
ATables.h \
collCounts.cpp \
collCounts.h \
common.h \
D4Tables.h \
D5Tables.h \
defs.h \
Dictionary.cpp \
Dictionary.h \
file_spec.h \
FlexArray.h \
ForwardBackward.cpp \
ForwardBackward.h \
getSentence.cpp \
getSentence.h \
Globals.h \
hmm.cpp \
hmm.h \
HMMTables.cpp \
HMMTables.h \
logprob.cpp \
logprob.h \
model1.cpp \
model1.h \
model2.cpp \
model2.h \
model2to3.cpp \
model3.cpp \
model3.h \
model3_viterbi.cpp \
model3_viterbi_with_tricks.cpp \
model345-peg.cpp \
MoveSwapMatrix.cpp \
MoveSwapMatrix.h \
myassert.cpp \
myassert.h \
mymath.h \
mystl.h \
NTables.cpp \
NTables.h \
Parameter.cpp \
Parameter.h \
parse.cpp \
Perplexity.cpp \
Perplexity.h \
Pointer.h \
reports.cpp \
SetArray.cpp \
SetArray.h \
syncObj.h \
transpair_model1.h \
transpair_model2.h \
transpair_model3.cpp \
transpair_model3.h \
transpair_model4.cpp \
transpair_model4.h \
transpair_model5.cpp \
transpair_model5.h \
transpair_modelhmm.h \
ttableDiff.hpp \
TTables.cpp \
TTables.h \
types.h \
utility.cpp \
utility.h \
Vector.h \
vocab.cpp \
vocab.h \
WordClasses.h
libgiza_a_CXXFLAGS = \
-MD \
-MP \
-MF \
-MT \
-O6
libgiza_a_CPPFLAGS = \
-DNDEBUG \
-DWORDINDEX_WITH_4_BYTE \
-DBINARY_SEARCH_FOR_TTABLE \
-DDEBUG

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,235 @@
/*
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "MoveSwapMatrix.h"
template<class TRANSPAIR>
MoveSwapMatrix<TRANSPAIR>::MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a)
: alignment(_a), ef(_ef), l(ef.get_l()), m(ef.get_m()), _cmove(l+1, m+1), _cswap(m+1, m+1),
delmove(l+1, m+1,0),delswap(m+1, m+1,0),changed(l+2, 0), changedCounter(1),
modelnr(_ef.modelnr()),lazyEvaluation(0),centerDeleted(0)
{
double thisValue=ef.scoreOfAlignmentForChange((*this));
if( lazyEvaluation==0)
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
}
template<class TRANSPAIR>
void MoveSwapMatrix<TRANSPAIR>::updateJ(WordIndex j, bool useChanged,double thisValue)
{
massert( lazyEvaluation==0 );
for(WordIndex i=0;i<=l;i++)
if( (useChanged==0||changed[i]!=changedCounter) )
if( get_al(j)!=i )
_cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
else
_cmove(i, j)=1.0;
for(WordIndex j2=j+1;j2<=m;j2++)
if( get_al(j)!=get_al(j2) )
_cswap(j, j2)=ef.scoreOfSwap((*this), j, j2,thisValue);
else
_cswap(j, j2)=1.0;
for(WordIndex j2=1;j2<j;j2++)
if( get_al(j)!=get_al(j2) )
_cswap(j2, j)=ef.scoreOfSwap((*this), j2, j,thisValue);
else
_cswap(j2, j)=1.0;
}
template<class TRANSPAIR>
void MoveSwapMatrix<TRANSPAIR>::updateI(WordIndex i,double thisValue)
{
massert( lazyEvaluation==0);
for(WordIndex j=1;j<=m;j++)
if( get_al(j)!=i )
_cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
else
_cmove(i, j)=1.0;
}
template<class TRANSPAIR>
void MoveSwapMatrix<TRANSPAIR>::printWrongs()const{
for(WordIndex i=0;i<=l;i++)
{
for(WordIndex j=1;j<=m;j++)
if( get_al(j)==i)
cout << "A";
else
{
LogProb real=_cmove(i, j), wanted=ef.scoreOfMove((*this), i, j);
if( fabs(1.0-real/wanted)>1e-3 )
cout << 'b';
else if(fabs(1.0-real/wanted)>1e-10 )
cout << 'e';
else if(real!=wanted)
cout << 'E';
else
cout << ' ';
}
cout << endl;
}
cout << endl;
for(WordIndex j=1;j<=m;j++)
{
for(WordIndex j1=1;j1<=m;j1++)
if( j1>j )
{
if( get_al(j)==get_al(j1) )
cout << 'A';
else
cout << (_cswap(j, j1)==ef.scoreOfSwap((*this), j, j1));
}
else
cout << ' ';
cout << endl;
}
massert(0);
}
template<class TRANSPAIR>
bool MoveSwapMatrix<TRANSPAIR>::isRight()const{
if( lazyEvaluation )
return 1;
for(WordIndex i=0;i<=l;i++)
for(WordIndex j=1;j<=m;j++)
if( get_al(j)!=i && (!(doubleEqual(_cmove(i, j), ef.scoreOfMove((*this), i, j)))) )
{
cerr << "DIFF: " << i << " " << j << " " << _cmove(i, j) << " " << ef.scoreOfMove((*this), i, j) << endl;
return 0;
}
for(WordIndex j=1;j<=m;j++)
for(WordIndex j1=1;j1<=m;j1++)
if( j1>j&&get_al(j)!=get_al(j1)&&(!doubleEqual(_cswap(j, j1), ef.scoreOfSwap((*this), j, j1))) )
{
cerr << "DIFFERENT: " << j << " " << j1 << " " << _cswap(j, j1) << " " << ef.scoreOfSwap((*this), j, j1) << endl;
return 0;
}
return 1;
}
template<class TRANSPAIR>
void MoveSwapMatrix<TRANSPAIR>::doMove(WordIndex _i, WordIndex _j)
{
WordIndex old_i=get_al(_j);
if(old_i>100){
cerr << "Error, invalid index set";
return;
}
if( lazyEvaluation )
set(_j,_i);
else
{
if ( modelnr==5||modelnr==6 )
{
set(_j, _i);
double thisValue=ef.scoreOfAlignmentForChange((*this));
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
}
else if ( modelnr==4 )
{
changedCounter++;
for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
set(_j, _i);
for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
double thisValue=ef.scoreOfAlignmentForChange((*this));
for(unsigned int i=0;i<=l;i++)
if(changed[i]==changedCounter)
updateI(i,thisValue);
for(unsigned int j=1;j<=m;j++)
if( changed[get_al(j)]==changedCounter )
updateJ(j, 1,thisValue);
}
else
{
assert(modelnr==3);
set(_j, _i);
changedCounter++;
double thisValue=ef.scoreOfAlignmentForChange((*this));
updateI(old_i,thisValue);
changed[old_i]=changedCounter;
updateI(_i,thisValue);
changed[_i]=changedCounter;
for(WordIndex j=1;j<=m;j++)
if( get_al(j)==_i || get_al(j)==old_i )
updateJ(j, 1,thisValue);
}
}
}
template<class TRANSPAIR>
void MoveSwapMatrix<TRANSPAIR>::doSwap(WordIndex _j1, WordIndex _j2)
{
assert( cswap(_j1, _j2)>1 );
WordIndex i1=get_al(_j1), i2=get_al(_j2);
if( lazyEvaluation==1 )
{
set(_j1, i2);
set(_j2, i1);
}
else
{
if ( modelnr==5||modelnr==6 )
{
set(_j1, i2);
set(_j2, i1);
double thisValue=ef.scoreOfAlignmentForChange((*this));
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
}
else if( modelnr==4 )
{
changedCounter++;
for(unsigned int k=prev_cept(i1);k<=next_cept(i1);++k)changed[k]=changedCounter;
for(unsigned int k=prev_cept(i2);k<=next_cept(i2);++k)changed[k]=changedCounter;
set(_j1, i2);
set(_j2, i1);
double thisValue=ef.scoreOfAlignmentForChange((*this));
for(unsigned int i=0;i<=l;i++)
if(changed[i]==changedCounter)
updateI(i,thisValue);
for(unsigned int j=1;j<=m;j++)
if( changed[get_al(j)]==changedCounter )
updateJ(j, 1,thisValue);
}
else
{
assert(modelnr==3);
set(_j1, i2);
set(_j2, i1);
changedCounter++;
double thisValue=ef.scoreOfAlignmentForChange((*this));
updateI(i1,thisValue);
changed[i1]=changedCounter;
updateI(i2,thisValue);
changed[i2]=changedCounter;
updateJ(_j1, 1,thisValue);
updateJ(_j2, 1,thisValue);
}
}
}
#include "transpair_model3.h"
#include "transpair_model4.h"
#include "transpair_model5.h"
#include "transpair_modelhmm.h"
template class MoveSwapMatrix<transpair_model3>;
template class MoveSwapMatrix<transpair_model4>;
template class MoveSwapMatrix<transpair_model5>;
template class MoveSwapMatrix<transpair_modelhmm>;

View File

@ -0,0 +1,162 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/*--
MoveSwapMatrix: Efficient representation for moving and swapping
around in IBM3 training.
Franz Josef Och (30/07/99)
--*/
#ifndef moveswap2_costs_h_defined
#define moveswap2_costs_h_defined
#include "alignment.h"
#include "transpair_model3.h"
#include "myassert.h"
#include <set>
#include <map>
#include <vector>
extern short DoViterbiTraining;
template<class TRANSPAIR>
class MoveSwapMatrix: public alignment {
private:
const TRANSPAIR&ef;
const WordIndex l, m;
Array2<LogProb, Vector<LogProb> > _cmove, _cswap;
Array2<char, Vector<char> > delmove, delswap;
Vector<int> changed;
int changedCounter;
const int modelnr;
bool lazyEvaluation;
bool centerDeleted;
std::map<int,std::set<int> >untouch_i; // target words that should not be aligned anywhere
std::map<int,std::set<int> > untouch_j;
public:
void addUnTouchI(int i, int j){
if(i>0){
if(untouch_i.find(i)==untouch_i.end()){
untouch_i[i] = std::set<int>();
}
untouch_i[i].insert(j);
}
}
void addUnTouchJ(int j,int i){
if(j>0){
if(untouch_j.find(j)==untouch_j.end()){
untouch_j[j] = std::set<int>();
}
untouch_j[j].insert(i);
}
}
bool check() const {
return 1;
}
const TRANSPAIR&get_ef() const {
return ef;
}
bool isCenterDeleted() const {
return centerDeleted;
}
bool isLazy() const {
return lazyEvaluation;
}
MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a);
void updateJ(WordIndex j, bool, double thisValue);
void updateI(WordIndex i, double thisValue);
void doMove(WordIndex _i, WordIndex _j);
void doSwap(WordIndex _j1, WordIndex _j2);
void delCenter() {
centerDeleted = 1;
}
void delMove(WordIndex x, WordIndex y) {
delmove(x, y) = 1;
}
void delSwap(WordIndex x, WordIndex y) {
massert(y>x);
delswap(x, y) = 1;
delswap(y, x) = 1;
}
bool isDelMove(WordIndex x, WordIndex y) const {
return DoViterbiTraining || delmove(x, y);
}
bool isDelSwap(WordIndex x, WordIndex y) const {
massert(y>x);
return DoViterbiTraining || delswap(x, y);
}
LogProb cmove(WordIndex x, WordIndex y) const {
massert( get_al(y)!=x );
massert( delmove(x,y)==0 );
if (lazyEvaluation)
return ef.scoreOfMove(*this, x, y);
else {
std::map<int, std::set<int> >::const_iterator it;
it = untouch_i.find(x);
if(it!=untouch_i.end()){
// Return -1 if the j jump set is not within the limit
if(it->second.find(y) == it->second.end()) //Not in the feasible set
return -1;
}
it = untouch_j.find(y);
if(it!=untouch_j.end()){
if(it->second.find(x) == it->second.end()) //Not in the feasible set
return -1;
}
return _cmove(x, y);
}
}
LogProb cswap(WordIndex x, WordIndex y) const {
massert(x<y);
massert(delswap(x,y)==0);
massert(get_al(x)!=get_al(y));
if (lazyEvaluation)
return ef.scoreOfSwap(*this, x, y);
else {
massert(y>x);
std::map<int, std::set<int> >::const_iterator it1,it2;
it1 =untouch_j.find(y);
it2 = untouch_j.find(x);
int nal1 = get_al(y);
int nal2 = get_al(x); // Need to test if nal1 is in it2's feasible set
// and vice versa
if(it1!=untouch_j.end()&&it1->second.find(nal2)==it1->second.end()){
return -1;
}
if(it2!=untouch_j.end()&&it2->second.find(nal1)==it2->second.end()){
return -1;
}
// Make sure we never swap these
return _cswap(x, y);
}
}
void printWrongs() const;
bool isRight() const;
friend ostream&operator<<(ostream&out, const MoveSwapMatrix<TRANSPAIR>&m) {
return out << (alignment) m << "\nEF:\n" << m.ef << "\nCMOVE\n"
<< m._cmove << "\nCSWAP\n" << m._cswap << endl;
}
;
};
#endif

View File

@ -0,0 +1,184 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "NTables.h"
#include <iostream>
#include "defs.h"
#include <fstream>
#include "Parameter.h"
GLOBAL_PARAMETER(double,NTablesFactorGraphemes,"nSmooth","smoothing for fertility parameters (good value: 64): weight for wordlength-dependent fertility parameters",PARLEV_SMOOTH,64.0);
GLOBAL_PARAMETER(double,NTablesFactorGeneral,"nSmoothGeneral","smoothing for fertility parameters (default: 0): weight for word-independent fertility parameters",PARLEV_SMOOTH,0.0);
template <class VALTYPE>
void nmodel<VALTYPE>::printNTable(int noEW, const char* filename,
const Vector<WordEntry>& evlist,
bool actual) const
// prints the fertility table but with actual sourcce words (not their id)
{
cerr << "Dumping nTable to: " << filename << '\n';
ofstream of(filename);
VALTYPE p ;
WordIndex k, i ;
for(i=1; int(i) < noEW; i++){
if (evlist[i].freq > 0){
if (actual)
of << evlist[i].word << ' ' ;
else
of << i << ' ' ;
for( k=0; k < MAX_FERTILITY; k++){
p = getValue(i, k);
if (p <= PROB_SMOOTH)
p = 0;
of << p << ' ';
}
of << '\n';
}
}
}
template <class VALTYPE>
void nmodel<VALTYPE>::printRealNTable(int noEW, const char* filename,
const Vector<WordEntry>& evlist,
bool actual) const
// prints the fertility table but with actual sourcce words (not their id)
{
cerr << "Dumping nTable to: " << filename << '\n';
ofstream of(filename);
VALTYPE p ;
WordIndex k, i ;
for(i=1; int(i) < noEW; i++){
if (evlist[i].freq > 0){
if (actual)
of << evlist[i].word << ' ' ;
else
of << i << ' ' ;
for( k=0; k < MAX_FERTILITY; k++){
p = getValue(i, k);
// if (p <= PROB_SMOOTH)
// p = 0;
of << p << ' ';
}
of << '\n';
}
}
}
template <class VALTYPE>
bool nmodel<VALTYPE>::readNTable(const char *filename){
/* This function reads the n table from a file.
Each line is of the format: source_word_id p0 p1 p2 ... pn
This is the inverse operation of the printTable function.
NAS, 7/11/99
*/
ifstream inf(filename);
if(!inf.is_open()){
return false;
}
cerr << "Reading fertility table from " << filename << "\n";
if(!inf){
cerr << "\nERROR: Cannot open " << filename <<"\n";
return false;
}
VALTYPE prob;
WordIndex tok, i;
int nFert=0;
while(!inf.eof()){
nFert++;
inf >> ws >> tok;
if (tok > MAX_VOCAB_SIZE){
cerr << "NTables:readNTable(): unrecognized token id: " << tok
<<'\n';
exit(-1);
}
for(i = 0; i < MAX_FERTILITY; i++){
inf >> ws >> prob;
getRef(tok, i)=prob;
}
}
cerr << "Read " << nFert << " entries in fertility table.\n";
inf.close();
return true;
}
template <class VALTYPE>
bool nmodel<VALTYPE>::merge(nmodel<VALTYPE>& n,int noEW, const Vector<WordEntry>& evlist){
/* This function reads the n table from a file.
Each line is of the format: source_word_id p0 p1 p2 ... pn
This is the inverse operation of the printTable function.
NAS, 7/11/99
*/
VALTYPE p ;
WordIndex k, i ;
for(i=1; int(i) < noEW; i++){
if (evlist[i].freq > 0){
for( k=0; k < MAX_FERTILITY; k++){
p = n.getValue(i, k);
getRef(i,k)+=p;
}
}
}
return true;
}
template <class VALTYPE>
bool nmodel<VALTYPE>::readAugNTable(const char *filename){
/* This function reads the n table from a file.
Each line is of the format: source_word_id p0 p1 p2 ... pn
This is the inverse operation of the printTable function.
NAS, 7/11/99
*/
ifstream inf(filename);
if(!inf.is_open()){
return false;
}
cerr << "Reading fertility table from " << filename << "\n";
if(!inf){
cerr << "\nERROR: Cannot open " << filename <<"\n";
return false;
}
VALTYPE prob;
WordIndex tok, i;
int nFert=0;
while(!inf.eof()){
nFert++;
inf >> ws >> tok;
if (tok > MAX_VOCAB_SIZE){
cerr << "NTables:readNTable(): unrecognized token id: " << tok
<<'\n';
exit(-1);
}
for(i = 0; i < MAX_FERTILITY; i++){
inf >> ws >> prob;
getRef(tok, i)+=prob;
}
}
cerr << "Read " << nFert << " entries in fertility table.\n";
inf.close();
return true;
}
template class nmodel<COUNT>;
//template class nmodel<PROB>;

View File

@ -0,0 +1,145 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef _ntables_h
#define _ntables_h 1
#include "Array2.h"
#include "Vector.h"
#include <cassert>
#include "defs.h"
#include "vocab.h"
#include "myassert.h"
#include "Globals.h"
#include "syncObj.h"
extern double NTablesFactorGraphemes, NTablesFactorGeneral;
template<class VALTYPE> class nmodel {
private:
Array2<VALTYPE, Vector<VALTYPE> > ntab;
public:
nmodel(int maxw, int maxn) :
ntab(maxw, maxn, 0.0) {
}
VALTYPE getValue(int w, unsigned int n) const {
massert(w!=0);
if (n>=ntab.getLen2())
return 0.0;
else
return max(ntab(w, n), VALTYPE(PROB_SMOOTH));
}
protected:
inline VALTYPE&getRef(int w, int n) {
//massert(w!=0);
return ntab(w, n);
};
Mutex lock;
public:
inline void addValue(int w , int n,const VALTYPE& t){lock.lock();ntab(w,n)+=t;lock.unlock();};
public:
template<class COUNT> void normalize(nmodel<COUNT>&write,
const Vector<WordEntry>* _evlist) const {
int h1=ntab.getLen1(), h2=ntab.getLen2();
int nParams=0;
if (_evlist&&(NTablesFactorGraphemes||NTablesFactorGeneral)) {
size_t maxlen=0;
const Vector<WordEntry>&evlist=*_evlist;
for (unsigned int i=1; i<evlist.size(); i++)
maxlen=max(maxlen, evlist[i].word.length());
Array2<COUNT,Vector<COUNT> > counts(maxlen+1, MAX_FERTILITY+1, 0.0);
Vector<COUNT> nprob_general(MAX_FERTILITY+1,0.0);
for (unsigned int i=1; i<min((unsigned int)h1,
(unsigned int)evlist.size()); i++) {
int l=evlist[i].word.length();
for (int k=0; k<h2; k++) {
counts(l, k)+=getValue(i, k);
nprob_general[k]+=getValue(i, k);
}
}
COUNT sum2=0;
for (unsigned int i=1; i<maxlen+1; i++) {
COUNT sum=0.0;
for (int k=0; k<h2; k++)
sum+=counts(i, k);
sum2+=sum;
if (sum) {
double average=0.0;
//cerr << "l: " << i << " " << sum << " ";
for (int k=0; k<h2; k++) {
counts(i, k)/=sum;
//cerr << counts(i,k) << ' ';
average+=k*counts(i, k);
}
//cerr << "avg: " << average << endl;
//cerr << '\n';
}
}
for (unsigned int k=0; k<nprob_general.size(); k++)
nprob_general[k]/=sum2;
for (int i=1; i<h1; i++) {
int l=-1;
if ((unsigned int)i<evlist.size())
l=evlist[i].word.length();
COUNT sum=0.0;
for (int k=0; k<h2; k++)
sum+=getValue(i, k)+((l==-1) ? 0.0 : (counts(l, k)
*NTablesFactorGraphemes)) + NTablesFactorGeneral
*nprob_general[k];
assert(sum);
for (int k=0; k<h2; k++) {
write.getRef(i, k)=(getValue(i, k)+((l==-1) ? 0.0
: (counts(l, k)*NTablesFactorGraphemes)))/sum
+ NTablesFactorGeneral*nprob_general[k];
nParams++;
}
}
} else
for (int i=1; i<h1; i++) {
COUNT sum=0.0;
for (int k=0; k<h2; k++)
sum+=getValue(i, k);
assert(sum);
for (int k=0; k<h2; k++) {
write.getRef(i, k)=getValue(i, k)/sum;
nParams++;
}
}
cerr << "NTable contains " << nParams << " parameter.\n";
}
bool merge(nmodel<VALTYPE>& n, int noEW, const Vector<WordEntry>& evlist);
void clear() {
int h1=ntab.getLen1(), h2=ntab.getLen2();
for (int i=0; i<h1; i++)
for (int k=0; k<h2; k++)
ntab(i, k)=0;
}
void printNTable(int noEW, const char* filename,
const Vector<WordEntry>& evlist, bool) const;
void printRealNTable(int noEW, const char* filename,
const Vector<WordEntry>& evlist, bool) const;
bool readAugNTable(const char *filename);
bool readNTable(const char *filename);
};
#endif

View File

@ -0,0 +1,144 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "Parameter.h"
#include "fstream"
#include "unistd.h"
#include <strstream>
bool absolutePathNames=0;
string ParameterPathPrefix;
bool ParameterChangedFlag=0;
bool writeParameters(ofstream&of,const ParSet&parset,int level)
{
if(!of)return 0;
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
{
if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
{
ostrstream os;
(*i)->printValue(os);
os << ends;
string s(os.str());
of << (*i)->getString() << " ";
if( absolutePathNames&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
{
char path[1024];
getcwd(path,1024);
of << path << '/';
}
if( ParameterPathPrefix.length()&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
of << ParameterPathPrefix << '/';
(*i)->printValue(of);
of << endl;
}
}
return 1;
}
bool readParameters(ifstream&f,const ParSet&parset,int verb,int level)
{
string s;
if(!f)return 0;
while(getline(f,s))
{
istrstream eingabe(s.c_str());
string s1,s2;
eingabe>>s1>>s2;
if(makeSetCommand(s1,s2,parset,verb,level)==0)
cerr << "ERROR: could not set: (C) " << s1 << " " << s2 << endl;
}
return 1;
}
bool makeSetCommand(string _s1,string s2,const ParSet&parset,int verb,int level)
{
ParPtr anf;
int anfset=0;
string s1=simpleString(_s1);
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
{
if( *(*i)==s1 )
{
if( level==-1 || level==(*i)->getLevel() )
(*i)->setParameter(s2,verb);
else if(verb>1)
cerr << "ERROR: Could not set: (A) " << s1 << " " << s2 << " " << level << " " << (*i)->getLevel() << endl;
return 1;
}
else if( (*i)->getString().substr(0,s1.length())==s1 )
{
anf=(*i);anfset++;
}
}
if(anfset==1)
{
if( level==-1 || level==anf->getLevel() )
anf->setParameter(s2,verb);
else if( verb>1 )
cerr << "ERROR: Could not set: (B) " << s1 << " " << s2 << " " << level << " " << anf->getLevel() << endl;
return 1;
}
if( anfset>1 )
cerr << "ERROR: ambiguous parameter '" << s1 << "'.\n";
if( anfset==0 )
cerr << "ERROR: parameter '" << s1 << "' does not exist.\n";
return 0;
}
ostream& printPars(ostream&of,const ParSet&parset,int level)
{
if(!of)return of;
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
{
if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
{
(*i)->printAt(of);
of << endl;
}
}
return of;
}
string simpleString(const string s)
{
string k;
for(unsigned int i=0;i<s.length();++i)
{
char c[2];
c[0]=tolower(s[i]);
c[1]=0;
if( (c[0]>='a'&&c[0]<='z')||(c[0]>='0'&&c[0]<='9') )
k += c;
}
return k;
}
ParSet&getGlobalParSet()
{
static ParSet x;
return x;
}

View File

@ -0,0 +1,200 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef PARAMETER_H_DEFINED
#define PARAMETER_H_DEFINED
#include "mystl.h"
#include <set>
#include "Pointer.h"
#include <string>
#include "Globals.h"
#include <fstream>
#include <cstring>
inline unsigned int mConvert(const string&s,unsigned int &i)
{
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1; }
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
return i=atoi(s.c_str());
}
inline int mConvert(const string&s,int &i){
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1;}
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
return i=atoi(s.c_str());
}
inline double mConvert(const string&s,double &d) { return d=atof(s.c_str()); }
inline double mConvert(const string&s,float &d) { return d=atof(s.c_str()); }
inline string mConvert(const string&s,string&n) { return n=s; }
inline bool mConvert(const string&s,bool&n) {
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
return n=atoi(s.c_str());
}
inline short mConvert(const string&s,short&n) {
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
return n=atoi(s.c_str());
}
inline unsigned short mConvert(const string&s,unsigned short&n) {
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
return n=atoi(s.c_str());
}
string simpleString(const string s);
inline int Hashstring(const string& s)
{
int sum=0;
string::const_iterator i=s.begin(),end=s.end();
for(;i!=end;i++)sum=5*sum+(*i);
return sum;
}
class _Parameter
{
protected:
string name;
bool *ifChanged;
string description;
int level;
bool filename;
public:
int onlyCopy;
_Parameter(string n,bool&b,string desc,int _level,bool _onlyCopy)
: name(simpleString(n)),ifChanged(&b),description(desc),level(_level),filename(0),onlyCopy(_onlyCopy) {}
virtual ~_Parameter(){};
bool operator==(const string&s)const
{ return name== simpleString(s); }
void setChanged()
{ *ifChanged=true; }
virtual bool setParameter(string s2,int)=0;
virtual ostream&printAt(ostream&out)=0;
virtual ostream&printValue(ostream&out)=0;
const string&getString() const { return name; }
int getLevel() const { return level;}
bool isFilename() { return filename;}
void setFilename(bool x=1) { filename=x;}
friend bool operator==(const _Parameter&a,const _Parameter&b)
{ return a.name==b.name; }
friend bool operator<(const _Parameter&a,const _Parameter&b)
{ return a.name<b.name; }
friend int Hash(const _Parameter&aaa)
{ return Hashstring(aaa.name); }
friend ostream&operator<<(ostream&out,const _Parameter&p)
{ return out<<"Parameter: "<<p.name <<endl;}
};
template<class T>
class Parameter : public _Parameter
{
private:
T*t;
public:
Parameter(string n,bool&b,string desc,T&_t,int level=0,bool onlyCopy=0)
: _Parameter(n,b,desc,level,onlyCopy),t(&_t) {}
virtual ~Parameter(){}
virtual bool setParameter(string s2,int verb)
{
T x;
if( !(*t==mConvert(s2,x)))
{
bool printedFirst=0;
if( verb>1 )
{
cout << "Parameter '"<<name <<"' changed from '"<<*t<<"' to '";
printedFirst=1;
}
mConvert(s2,*t);
if( printedFirst )
cout << *t <<"'\n";
setChanged();
return 1;
}
return 0;
}
virtual ostream&printAt(ostream&out)
{return out << name << " = " << *t << " (" << description << ")";}
virtual ostream&printValue(ostream&out)
{return out << *t;}
};
typedef MP<_Parameter> ParPtr;
class ParSet : public set<ParPtr>
{
public:
void insert(const ParPtr&x)
{
if( count(x)!=0 )
cerr << "ERROR: element " << x->getString() << " already inserted.\n";
set<ParPtr>::insert(x);
}
};
bool makeSetCommand(string s1,string s2,const ParSet&pars,int verb=1,int level= -1);
ostream&printPars(ostream&out,const ParSet&pars,int level=-1);
bool writeParameters(ofstream&of,const ParSet&parset,int level=0);
bool readParameters(ifstream&f,const ParSet&parset,int verb=2,int level=0);
ParSet&getGlobalParSet();
extern bool ParameterChangedFlag;
template<class T>const T&addGlobalParameter(const char *name,const char *description,int level,T*adr,const T&init)
{
*adr=init;
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
return init;
}
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *description,int level,T*adr,const T&init)
{
*adr=init;
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
return init;
}
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *description,int level,T*adr,const T&init)
{
*adr=init;
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
return init;
}
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *name4,const char *description,int level,T*adr,const T&init)
{
*adr=init;
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
getGlobalParSet().insert(new Parameter<T>(name4,ParameterChangedFlag,description,*adr,-1));
return init;
}
void MakeParameterOptimizing(istream&file,string resultingParameters);
#define GLOBAL_PARAMETER(TYP,VARNAME,NAME,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,DESCRIPTION,LEVEL,&VARNAME,INIT);
#define GLOBAL_PARAMETER2(TYP,VARNAME,NAME,NAME2,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,DESCRIPTION,LEVEL,&VARNAME,INIT);
#define GLOBAL_PARAMETER3(TYP,VARNAME,NAME,NAME2,NAME3,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,DESCRIPTION,LEVEL,&VARNAME,INIT);
#define GLOBAL_PARAMETER4(TYP,VARNAME,NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,&VARNAME,INIT);
void setParameterLevelName(unsigned int i,string x);
#endif

View File

@ -0,0 +1,42 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/* Perplexity.cc
* =============
* Mike Jahr, 7/21/99
* Machine Translation group, WS99
* Center for Language and Speech Processing
*
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
*
* Simple class used to calculate cross entropy and perplexity
* of models.
*/
#include "Perplexity.h"
void Perplexity::record(string model){
mutex.lock();
modelid.push_back(model);
perp.push_back(perplexity());
ce.push_back(cross_entropy());
mutex.unlock();
}

View File

@ -0,0 +1,115 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/* Perplexity.h
* ============
* Mike Jahr, 7/15/99
* Machine Translation group, WS99
* Center for Language and Speech Processing
*
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
*
* Simple class used to calculate cross entropy and perplexity
* of models.
*/
#ifndef _PERPLEXITY_H
#define _PERPLEXITY_H
#include <cmath>
#include <fstream>
#include "Vector.h"
#include "defs.h"
#include "Array2.h"
#include "Globals.h"
#include "syncObj.h"
#define CROSS_ENTROPY_BASE 2
class Perplexity {
private:
double sum;
double wc;
Array2<double, Vector<double> > *E_M_L;
Vector<string> modelid;
Vector<double > perp;
Vector<double > ce;
Vector<string> name ;
Mutex mutex;
public:
~Perplexity() { delete E_M_L;}
Perplexity() {
E_M_L = new Array2<double, Vector<double> >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH);
unsigned int l, m ;
Vector<double> fact(MAX_SENTENCE_LENGTH, 1.0);
for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++)
fact[m] = fact[m-1] * m ;
for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++)
for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) {
(*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) /
(fact[m])) ;
}
sum = 0 ;
wc = 0;
perp.clear();
ce.clear();
name.clear();
}
inline void clear() {
mutex.lock();
sum = 0 ;
wc = 0 ;
mutex.unlock();
}
size_t size() const {return(min(perp.size(), ce.size()));}
inline void addFactor(const double p, const double count, const int l,
const int m,bool withPoisson) {
mutex.lock();
wc += count * m ; // number of french words
sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ;
mutex.unlock();
}
inline double perplexity() const {
return exp( -1*sum / wc);
}
inline double cross_entropy() const {
return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc));
}
inline double word_count() const {
return wc;
}
inline double getSum() const {
return sum ;
}
void record(string model);
friend void generatePerplexityReport(const Perplexity&, const Perplexity&,
const Perplexity&, const Perplexity&,
ostream&, int, int, bool);
};
#endif

View File

@ -0,0 +1,175 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef HEADER_Pointer_DEFINED
#define HEADER_Pointer_DEFINED
#include <cassert>
#include <iostream>
template<class T>
class SmartPointer
{
protected:
T*p;
public:
SmartPointer(T*_p=0)
: p(_p) {}
inline T&operator*() const
{return *p;}
inline T*operator->() const
{return p;}
inline operator bool() const
{return p!=0;}
inline T*ptr() const
{ return p; }
};
template<class T> inline ostream &operator<<(ostream&out,const SmartPointer<T>&s)
{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
template<class T>
class SmartPointerConst
{
protected:
const T*p;
public:
SmartPointerConst(const T*_p=0)
: p(_p) {}
inline const T&operator*() const
{return *p;}
inline const T*operator->() const
{return p;}
inline operator bool() const
{return p!=0;}
inline const T*ptr() const
{ return p; }
};
template<class T> inline ostream &operator<<(ostream&out,const SmartPointerConst<T>&s)
{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
template <class T>
class UP : public SmartPointer<T>
{
public:
UP(T*_p=0)
: SmartPointer<T>(_p) {}
};
template<class T> inline bool operator==(const UP<T>&s1,const UP<T>&s2)
{return s1.ptr()==s2.ptr();}
template<class T> inline bool operator<(const UP<T>&s1,const UP<T>&s2)
{return s1.ptr() < s2.ptr();}
template<class T> inline int Hash(const UP<T> &wp)
{if(wp.ptr())return Hash(*wp);else return 0;}
template <class T>
class UPConst : public SmartPointerConst<T>
{
public:
UPConst(const T*_p=0)
: SmartPointerConst<T>(_p) {}
};
template<class T> inline bool operator==(const UPConst<T>&s1,const UPConst<T>&s2)
{return s1.ptr()==s2.ptr();}
template<class T> inline bool operator<(const UPConst<T>&s1,const UPConst<T>&s2)
{return s1.ptr()<s2.ptr();}
template<class T> inline int Hash(const UPConst<T> &wp)
{if(wp.ptr())return Hash(*wp);else return 0;}
template <class T>
class MP : public SmartPointer<T>
{
public:
MP(T*_p=0)
: SmartPointer<T>(_p) {}
};
template <class T> inline bool operator==(const MP<T>&s1,const MP<T>&s2)
{assert(s1);assert(s2);return *s1==*s2;}
template <class T> inline bool operator<(const MP<T>&s1,const MP<T>&s2)
{assert(s1);assert(s2);return *s1 < *s2;}
template <class T> inline int Hash(const MP<T> &wp)
{if(wp.ptr())return Hash(*wp);else return 0;}
template <class T>
class MPConst : public SmartPointerConst<T>
{
public:
MPConst(const T*_p=0)
: SmartPointerConst<T>(_p) {}
};
template <class T> inline bool operator==(const MPConst<T>&s1,const MPConst<T>&s2)
{assert(s1);assert(s2);return *s1== *s2;}
template <class T> inline bool operator<(const MPConst<T>&s1,const MPConst<T>&s2)
{assert(s1);assert(s2);return *s1 < *s2;}
template <class T> inline int Hash(const MPConst<T> &wp)
{if(wp.ptr())return Hash(*wp);else return 0;}
template <class T>
class DELP : public SmartPointer<T>
{
private:
DELP(const DELP<T>&x);
public:
const DELP<T>&operator=(DELP<T>&x)
{
delete this->p;
this->p=x.p;x.p=0;
return *this;
}
~DELP()
{ delete this->p;this->p=0;}
DELP(T*_p=0)
: SmartPointer<T>(_p) {}
void set(T*_p)
{
delete this->p;
this->p=_p;
}
friend bool operator==(const DELP<T>&s1,const DELP<T>&s2)
{
return *(s1.p)== *(s2.p);
}
friend bool operator<(const DELP<T>&s1,const DELP<T>&s2)
{
return *(s1.p) < *(s2.p);
}
friend inline int Hash(const DELP<T> &wp)
{
if(wp.p)
return Hash(*wp.p);
else
return 0;
}
};
#endif

View File

@ -0,0 +1,5 @@
//#include "SetArray.h"
#include "Parameter.h"

View File

@ -0,0 +1,159 @@
/*
Array of set, for fast access of dictionary, and most important,
be threadsafe
*/
#ifndef __SET_ARRAY_H__
#define __SET_ARRAY_H__
#include <map>
#include <vector>
#include "defs.h"
#include "vocab.h"
#include <cstdio>
#include <cstdlib>
#include <pthread.h>
#include "syncObj.h"
template <class COUNT, class PROB>
class LpPair {
public:
COUNT count ;
PROB prob ;
public: // constructor
LpPair():count(0), prob(0){} ;
LpPair(COUNT c, PROB p):count(c), prob(p){};
} ;
template <class COUNT, class PROB>
class SetArray{
public:
typedef LpPair<COUNT, PROB> CPPair;
protected:
/*Information stores here*/
std::vector<std::map<size_t,CPPair> > store;
std::vector<Mutex> muts;
size_t nEnglishWord;
size_t nFrenchWord;
void _init(){
store.resize(nEnglishWord);
muts.resize(nFrenchWord);
}
public:
/*
Get reference, not creating
*/
CPPair* find(size_t fi, size_t si){
/*HERE: lock, unlock after we get the pointer*/
muts[fi].lock();
/* Sync-ed */
std::map<size_t,CPPair>& w = store[fi];
typename std::map<size_t,CPPair>::iterator it = w.find((size_t)si);
CPPair* q = ( it!=store[fi].end() ? &(it->second) : 0);
// for(it = w.begin(); it!=w.end();it++){
// cout << it->first << endl;
// }
/* End Synced*/
muts[fi].unlock();
return q;
};
/*
Get reference, creating it
*/
inline CPPair& findRef(size_t fi, size_t si){
std::map<size_t,CPPair> &x = store[fi];
muts[fi].lock();
/* Sync-ed */
CPPair& ref= x[si];
/* End Synced */
muts[fi].unlock();
};
void insert(size_t fi, size_t si, COUNT count = 0, PROB prob = 0){
muts[fi].lock();
/*Syced*/
std::map<size_t,CPPair> &x = store[fi];
CPPair& v= x[si];
v.count = count;
v.prob = prob;
muts[fi].unlock();
}
void incCount(size_t e, size_t f, COUNT inc)
// increments the count of the given word pair. if the pair does not exist,
// it creates it with the given value.
{
if( inc ){
std::map<size_t,CPPair> &x = store[e];
muts[e].lock();
CPPair& ref= x[f];
ref.count += inc;
muts[e].unlock();
}
}
PROB getProb(size_t e, size_t f) const
// read probability value for P(fj/ei) from the hash table
// if pair does not exist, return floor value PROB_SMOOTH
{
muts[e].lock();
typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
PROB b;
if(it == store[e].end())
b = PROB_SMOOTH;
else
b=max((it->second).prob, PROB_SMOOTH);
muts[e].unlock();
return b;
}
COUNT getCount(size_t e, size_t f) const
/* read count value for entry pair (fj/ei) from the hash table */
{
muts[e].lock();
typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
COUNT c;
if(it == store[e].end())
c = 0;
else
c = ((*it).second).count;
muts[e].unlock();
}
void erase(size_t e, size_t f)
// In: a source and a target token ids.
// removes the entry with that pair from table
{
muts[e].lock();
store[e].erase(f);
muts[e].unlock();
};
inline void setNumberOfEnlish(size_t e){nEnglishWord=e;_init();};
inline void setNumberOfFrench(size_t f){nFrenchWord = f;};
const std::map<size_t,CPPair>& getMap(size_t i) const{
return store[i];
}
std::map<size_t,CPPair>& getMap1(size_t i){
return store[i];
}
SetArray(size_t e, size_t f): nEnglishWord(e), nFrenchWord(f){
_init();
}
};
#endif

View File

@ -0,0 +1,177 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "TTables.h"
#include "Parameter.h"
#include<iostream>
#include <fstream>
GLOBAL_PARAMETER(float,PROB_CUTOFF,"PROB CUTOFF","Probability cutoff threshold for lexicon probabilities",PARLEV_OPTHEUR,1e-7);
GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF,"COUNTINCREASE CUTOFF","countCutoff","Counts increment cutoff threshold",PARLEV_OPTHEUR,1e-6);
/* ------------------ Method Definiotns for Class tmodel --------------------*/
// To output to STDOUT, submit filename as NULL
template <class COUNT, class PROB>
void tmodel<COUNT, PROB>::printCountTable(const char *filename,
const Vector<WordEntry>& evlist,
const Vector<WordEntry>& fvlist,
const bool actual) const
{
ostream *tof;
if(filename)
tof = new ofstream(filename);
else
tof = & cout;
ostream &of = *tof;
/* for(unsigned int i=0;i<es.size()-1;++i)
for(unsigned int j=es[i];j<es[i+1];++j)
{
const CPPair&x=fs[j].second;
WordIndex e=i,f=fs[j].first;
if( actual )
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
else
of << e << ' ' << f << ' ' << x.prob << '\n';
}*/
for(unsigned int i=0;i<lexmat.size();++i){
if( lexmat[i] ){
for(unsigned int j=0;j<lexmat[i]->size();++j)
{
const CPPair&x=(*lexmat[i])[j].second;
WordIndex e=i,f=(*lexmat[i])[j].first;
if( x.prob>MINCOUNTINCREASE ){
if( actual ){
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.count << '\n';
}else{
of << e << ' ' << f << ' ' << x.count << '\n';
}
}
}
}
}
if(filename){
((ofstream*)tof)->close();
delete tof;
}
}
template <class COUNT, class PROB>
void tmodel<COUNT, PROB>::printProbTable(const char *filename,
const Vector<WordEntry>& evlist,
const Vector<WordEntry>& fvlist,
const bool actual) const
{
ofstream of(filename);
/* for(unsigned int i=0;i<es.size()-1;++i)
for(unsigned int j=es[i];j<es[i+1];++j)
{
const CPPair&x=fs[j].second;
WordIndex e=i,f=fs[j].first;
if( actual )
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
else
of << e << ' ' << f << ' ' << x.prob << '\n';
}*/
for(unsigned int i=0;i<lexmat.size();++i){
if( lexmat[i] ){
for(unsigned int j=0;j<lexmat[i]->size();++j)
{
const CPPair&x=(*lexmat[i])[j].second;
WordIndex e=i,f=(*lexmat[i])[j].first;
if( x.prob>PROB_SMOOTH ){
if( actual ){
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
}else{
of << e << ' ' << f << ' ' << x.prob << '\n';
}
}
}
}
}
}
template <class COUNT, class PROB>
void tmodel<COUNT, PROB>::printProbTableInverse(const char *,
const Vector<WordEntry>&,
const Vector<WordEntry>&,
const double,
const double,
const bool ) const
{
}
template <class COUNT, class PROB>
void tmodel<COUNT, PROB>::normalizeTable(const vcbList&, const vcbList&, int)
{
for(unsigned int i=0;i<lexmat.size();++i){
double c=0.0;
if( lexmat[i] ){
unsigned int lSize=lexmat[i]->size();
for(unsigned int j=0;j<lSize;++j)
c+=(*lexmat[i])[j].second.count;
for(unsigned int j=0;j<lSize;++j) {
if( c==0 )
(*lexmat[i])[j].second.prob=1.0/(lSize);
else
(*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c;
(*lexmat[i])[j].second.count=0;
}
}
}
}
template <class COUNT, class PROB>
bool tmodel<COUNT, PROB>::readProbTable(const char *filename){
/* This function reads the t table from a file.
Each line is of the format: source_word_id target_word_id p(target_word|source_word)
This is the inverse operation of the printTable function.
NAS, 7/11/99
*/
ifstream inf(filename);
cerr << "Reading t prob. table from " << filename << "\n";
if (!inf) {
cerr << "\nERROR: Cannot open " << filename << "\n";
return false;
}
WordIndex src_id, trg_id;
PROB prob;
int nEntry=0;
while (inf >> src_id >> trg_id >> prob) {
insert(src_id, trg_id, 0.0, prob);
nEntry++;
}
cerr << "Read " << nEntry << " entries in prob. table.\n";
return true;
}
template class tmodel<COUNT,PROB> ;
/* ---------------- End of Method Definitions of class tmodel ---------------*/

View File

@ -0,0 +1,330 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/* --------------------------------------------------------------------------*
* *
* Module : TTables *
* *
* Prototypes File: TTables.h *
* *
* Objective: Defines clases and methods for handling I/O for Probability & *
* Count tables and also alignment tables *
*****************************************************************************/
#ifndef _ttables_h
#define _ttables_h 1
#include "defs.h"
#include "vocab.h"
#include <cassert>
#include <iostream>
#include <algorithm>
#include <functional>
#include <map>
#include <set>
#include "Vector.h"
#include <utility>
#include "syncObj.h"
#if __GNUC__>2
#include <ext/hash_map>
using __gnu_cxx::hash_map;
#else
#include <hash_map>
#endif
#include <fstream>
#include "Globals.h"
/* The tables defined in the following classes are defined as hash tables. For
example. the t-table is a hash function of a word pair; an alignment is
a hash function of a vector of integer numbers (sentence positions) and so
on */
/*----------- Defnition of Hash Function for class tmodel ------- -----------*/
typedef pair<WordIndex, WordIndex> wordPairIds;
class hashpair : public unary_function< pair<WordIndex, WordIndex>, size_t >
{
public:
size_t operator() (const pair<WordIndex, WordIndex>& key) const
{
return (size_t) MAX_W*key.first + key.second; /* hash function and it
is guarnteed to have
unique id for each
unique pair */
}
};
/* ------------------ Class Prototype Definitions ---------------------------*
Class Name: tmodel
Objective: This defines the underlying data structur for t Tables and t
Count Tables. They are defined as a hash table. Each entry in the hash table
is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The
probability and the count are represented as log integer probability as
defined by the class LogProb .
This class is used to represents t Tables (probabiliity) and n (fertility
Tables and also their corresponding count tables .
*---------------------------------------------------------------------------*/
//typedef float COUNT ;
//typedef LogProb PROB ;
template <class COUNT, class PROB>
class LpPair {
public:
COUNT count ;
PROB prob ;
public: // constructor
LpPair():count(0), prob(0){} ;
LpPair(COUNT c, PROB p):count(c), prob(p){};
} ;
template<class T>
T*mbinary_search(T*x,T*y,unsigned int val)
{
if( y-x==0 )
return 0;
if( x->first==val)
return x;
if( y-x<2 )
return 0;
T*mid=x+(y-x)/2;
if( val < mid->first )
return mbinary_search(x,mid,val);
else
return mbinary_search(mid,y,val);
}
template<class T>
const T*mbinary_search(const T*x,const T*y,unsigned int val)
{
if( y-x==0 )
return 0;
if( x->first==val)
return x;
if( y-x<2 )
return 0;
const T*mid=x+(y-x)/2;
if( val < mid->first )
return mbinary_search(x,mid,val);
else
return mbinary_search(mid,y,val);
}
template <class COUNT, class PROB>
class tmodel{
typedef LpPair<COUNT, PROB> CPPair;
public:
bool recordDiff;
public:
int noEnglishWords; // total number of unique source words
int noFrenchWords; // total number of unique target words
//vector<pair<unsigned int,CPPair> > fs;
//vector<unsigned int> es;
vector< vector<pair<unsigned int,CPPair> >* > lexmat;
vector< Mutex > mutex;
void erase(WordIndex e, WordIndex f){
CPPair *p=find(e,f);
if(p)
*p=CPPair(0,0);
};
CPPair*find(int e,int f){
//pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
//pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
if(e>lexmat.size()||lexmat[e]==NULL){
return NULL;
}
pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
if( x==0 ){
//cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
//abort();
return 0;
}
return &(x->second);
}
const CPPair*find(int e,int f)const{
if(lexmat[e]==0)
return 0;
const pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
const pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
//const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
//const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
if( x==0 ){
//cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
//abort();
return 0;
}
return &(x->second);
}
public:
void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
CPPair* found = find(e,f);
if(found)
*found=CPPair(cval,pval);
}
CPPair*getPtr(int e,int f){return find(e,f);}
tmodel(){};
tmodel(const string&fn) {
recordDiff = false;
int count=0,count2=0;
ifstream infile2(fn.c_str());
cerr << "Inputfile in " << fn << endl;
int e,f,olde=-1,oldf=-1;
pair<unsigned int,CPPair> cp;
vector< pair<unsigned int,CPPair> > cps;
while(infile2>>e>>f){
cp.first=f;
assert(e>=olde);
assert(e>olde ||f>oldf);
if( e!=olde&&olde>=0 ){
int oldsize=lexmat.size();
lexmat.resize(olde+1);
for(unsigned int i=oldsize;i<lexmat.size();++i)
lexmat[i]=0;
lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
cps.clear();
if( !((*lexmat[olde]).size()==(*lexmat[olde]).capacity()) )
cerr << "eRROR: waste of memory: " << (*lexmat[olde]).size() << " " << (*lexmat[olde]).capacity() << endl;
count2+=lexmat[olde]->capacity();
}
cps.push_back(cp);
olde=e;
oldf=f;
count++;
}
lexmat.resize(olde+1);
lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
count2+=lexmat[olde]->capacity();
cout << "There are " << count << " " << count2 << " entries in table" << '\n';
mutex.resize(lexmat.size());
/* Create mutex */
}
/* tmodel(const string&fn)
{
size_t count=0;
{
ifstream infile1(fn.c_str());
if( !infile1 )
{
cerr << "ERROR: can't read coocurrence file " << fn << '\n';
abort();
}
int e,f;
while(infile1>>e>>f)
count++;
}
cout << "There are " << count << " entries in table" << '\n';
ifstream infile2(fn.c_str());
fs.resize(count);
int e,f,olde=-1,oldf=-1;
pair<unsigned int,CPPair> cp;
count=0;
while(infile2>>e>>f)
{
assert(e>=olde);
assert(e>olde ||f>oldf);
if( e!=olde )
{
es.resize(e+1);
for(unsigned int i=olde+1;int(i)<=e;++i)
es[i]=count;
}
cp.first=f;
assert(count<fs.size());
fs[count]=cp;
//fs.push_back(cp);
olde=e;
oldf=f;
count++;
}
assert(count==fs.size());
es.push_back(fs.size());
cout << fs.size() << " " << count << " coocurrences read" << '\n';
}*/
void incCount(WordIndex e, WordIndex f, COUNT inc) {
if( inc ){
CPPair *p=find(e,f);
if( p ){
mutex[e].lock();
p->count += inc ;
mutex[e].unlock();
}
}
}
PROB getProb(WordIndex e, WordIndex f) const{
const CPPair *p=find(e,f);
if( p )
return max(p->prob, PROB_SMOOTH);
else
return PROB_SMOOTH;
}
COUNT getCount(WordIndex e, WordIndex f) const
{
const CPPair *p=find(e,f);
if( p )
return p->count;
else
return 0.0;
}
void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
void printProbTableInverse(const char *filename,
const Vector<WordEntry>& evlist,
const Vector<WordEntry>& fvlist,
const double eTotal,
const double fTotal,
const bool actual = false ) const;
void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
bool readProbTable(const char *filename);
bool readSubSampledProbTable(const char* filename, std::set<WordIndex> &e, std::set<WordIndex> &f);
};
#endif

View File

@ -0,0 +1,423 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/*--
Vector: checked vector implementation
Franz Josef Och (30/07/99)
--*/
#ifndef ARRAY_H_DEFINED
#define ARRAY_H_DEFINED
#include "mystl.h"
#include <algorithm>
#include <string>
#include <utility>
#include <functional>
#include <cassert>
#ifdef NDEBUG
#include <vector>
#define Vector vector
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
{
o << "Vector(" << a.size() << "){ ";
for(unsigned int iii=0;iii<a.size();iii++)
o << " " << iii<< ": " << a[iii]<<" ;";
return o << "}\n";
}
#else
#define ARRAY_DEBUG
#define memo_del(a, b)
#define memo_new(a)
template<class T> class Vector
{
private:
T *p;
int realSize;
int maxWritten;
void copy(T *a, const T *b, int n);
void copy(T *a, T *b, int n);
void _expand();
public:
Vector()
: p(0), realSize(0), maxWritten(-1)
{
#ifdef VERY_ARRAY_DEBUG
cout << "MAKE ARRAY: " << this<<" "<<(void*)p << '\n';
#endif
}
Vector(const Vector<T> &x)
: p(new T[x.maxWritten+1]), realSize(x.maxWritten+1), maxWritten(x.maxWritten)
{
memo_new(p);
copy(p, x.p, realSize);
#ifdef VERY_ARRAY_DEBUG
cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< '\n';
#endif
}
explicit Vector(int n)
: p(new T[n]), realSize(n), maxWritten(n-1)
{
memo_new(p);
#ifdef VERY_ARRAY_DEBUG
cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
}
Vector(int n, const T&_init)
: p(new T[n]), realSize(n), maxWritten(n-1)
{
memo_new(p);
for(int iii=0;iii<n;iii++)p[iii]=_init;
#ifdef VERY_ARRAY_DEBUG
cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
}
~Vector()
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
delete [] p;
memo_del(p, 1);
#ifndef NDEBUG
p=0;realSize=-1;maxWritten=-1;
#endif
}
Vector<T>& operator=(const Vector<T>&x)
{
if( this!= &x )
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
delete [] p;
memo_del(p, 1);
realSize = x.maxWritten+1;
maxWritten = x.maxWritten;
p = new T[realSize];
memo_new(p);
copy(p, x.p, realSize);
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
}
return *this;
}
Vector<T>& operator=(Vector<T>&x)
{
if( this!= &x )
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
delete [] p;
memo_del(p, 1);
realSize = x.maxWritten+1;
maxWritten = x.maxWritten;
p = new T[realSize];
memo_new(p);
copy(p, x.p, realSize);
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
}
return *this;
}
void allowAccess(int n)
{
while( realSize<=n )
_expand();
maxWritten=max(maxWritten, n);
assert( maxWritten<realSize );
}
void resize(int n)
{
while( realSize<n )
_expand();
maxWritten=n-1;
}
void clear()
{
resize(0);
}
void reserve(int n)
{
int maxOld=maxWritten;
resize(n);
maxWritten=maxOld;
}
void sort(int until=-1)
{
if( until== -1 ) until=size();
std::sort(p, p+until);
}
void invsort(int until=-1)
{
if( until== -1 ) until=size();
std::sort(p, p+until, greater<T>());
}
void init(int n, const T&_init)
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
delete []p;
memo_del(p, 1);
p=new T[n];
memo_new(p);
realSize=n;
maxWritten=n-1;
for(int iii=0;iii<n;iii++)p[iii]=_init;
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
}
inline unsigned int size() const
{assert( maxWritten<realSize );
return maxWritten+1;}
inline int low() const
{ return 0; }
inline int high() const
{ return maxWritten; }
int findMax() const;
int findMin() const;
void errorAccess(int n) const;
inline T*getPointerToData(){return p;}
inline T*begin(){return p;}
inline T*end(){return p+maxWritten+1;}
inline T& operator[](int n)
{
#ifndef NDEBUG
if( n<0 || n>maxWritten )
errorAccess(n);
#endif
return p[n];
}
inline const T& operator[](int n) const
{
#ifndef NDEBUG
if(n<0 || n>maxWritten )
errorAccess(n);
#endif
return p[n];
}
inline const T& get(int n) const
{
#ifndef NDEBUG
if(n<0 || n>maxWritten )
errorAccess(n);
#endif
return p[n];
}
const T&top(int n=0) const
{return (*this)[maxWritten-n];}
T&top(int n=0)
{return (*this)[maxWritten-n];}
const T&back(int n=0) const
{return (*this)[maxWritten-n];}
T&back(int n=0)
{return (*this)[maxWritten-n];}
T&push_back(const T&x)
{
allowAccess(maxWritten+1);
(*this)[maxWritten]=x;
return top();
}
bool writeTo(ostream&out) const
{
out << "Vector ";
out << size() << " ";
//out << a << '\n';
for(int iv=0;iv<=maxWritten;iv++)
{
writeOb(out, (*this)[iv]);
out << '\n';
}
return 1;
}
bool readFrom(istream&in)
{
string s;
if( !in )
{
cerr << "ERROR(Vector): file cannot be opened.\n";
return 0;
}
in >> s;
if( !(s=="Vector") )
{
cerr << "ERROR(Vector): Vector!='"<<s<<"'\n";
return 0;
}
int biggest;
in >> biggest;
// in >> a;
resize(biggest);
for(int iv=0;iv<size();iv++)
{
readOb(in, (*this)[iv]);
}
return 1;
}
};
template<class T> bool operator==(const Vector<T> &x, const Vector<T> &y)
{
if( &x == &y )
return 1;
else
{
if( y.size()!=x.size() )
return 0;
else
{
for(unsigned int iii=0;iii<x.size();iii++)
if( !(x[iii]==y[iii]) )
return 0;
return 1;
}
}
}
template<class T> bool operator!=(const Vector<T> &x, const Vector<T> &y)
{
return !(x==y);
}
template<class T> bool operator<(const Vector<T> &x, const Vector<T> &y)
{
if( &x == &y )
return 0;
else
{
if( y.size()<x.size() )
return !(y<x);
for(int iii=0;iii<x.size();iii++)
{
assert( iii!=y.size() );
if( x[iii]<y[iii] )
return 1;
else if( y[iii]<x[iii] )
return 0;
}
return x.size()!=y.size();//??
}
}
template<class T> void Vector<T>:: errorAccess(int n) const
{
cerr << "ERROR: Access to array element " << n
<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
cout << "ERROR: Access to array element " << n
<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
assert(0);
#ifndef DEBUG
abort();
#endif
}
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
{
o << "Vector(" << a.size() << "){ ";
for(unsigned int iii=0;iii<a.size();iii++)
o << " " << iii<< ": " << a[iii]<<" ;";
return o << "}\n";
}
template<class T> istream& operator>>(istream&in, Vector<T>&)
{return in;}
template<class T> int Hash(const Vector<T>&a)
{
int n=0;
for(int iii=0;iii<a.size();iii++)
n+=Hash(a[iii])*(iii+1);
return n+a.size()*47;
}
template<class T> void Vector<T>::copy(T *aa, const T *bb, int n)
{
for(int iii=0;iii<n;iii++)
aa[iii]=bb[iii];
}
template<class T> void Vector<T>::copy(T *aa, T *bb, int n)
{
for(int iii=0;iii<n;iii++)
aa[iii]=bb[iii];
}
template<class T> void Vector<T>::_expand()
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
T *oldp=p;
int oldsize=realSize;
realSize=realSize*2+1;
p=new T[realSize];
memo_new(p);
copy(p, oldp, oldsize);
delete [] oldp;
memo_del(oldp, 1);
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
#endif
}
template<class T> int Vector<T>::findMax() const
{
if( size()==0 )
return -1;
else
{
int maxPos=0;
for(int iii=1;iii<size();iii++)
if( (*this)[maxPos]<(*this)[iii] )
maxPos=iii;
return maxPos;
}
}
template<class T> int Vector<T>::findMin() const
{
if( size()==0 )
return -1;
else
{
int minPos=0;
for(int iii=1;iii<size();iii++)
if( (*this)[iii]<(*this)[minPos] )
minPos=iii;
return minPos;
}
}
#endif
#endif

View File

@ -0,0 +1,103 @@
/*
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef WordClasses_h_DEFINED
#define WordClasses_h_DEFINED
#include <map>
#include <string>
#include <set>
#include "vocab.h"
class WordClasses
{
private:
map<string,string> Sw2c;
map<string,int> Sc2int;
Vector<string> Sint2c;
Vector<int> w2c;
unsigned int classes;
public:
WordClasses()
: classes(1)
{
Sint2c.push_back("0");
Sc2int["0"]=0;
}
template<class MAPPER> bool read(istream&in,const MAPPER&m,const vcbList& vcb)
{
string sline;
int maxword=0;
int readWord=0, putWord=0;
while(getline(in,sline))
{
readWord ++;
string word,wclass;
istrstream iline(sline.c_str());
iline>>word>>wclass;
if( !Sc2int.count(wclass) )
{
Sc2int[wclass]=classes++;
Sint2c.push_back(wclass);
assert(classes==Sint2c.size());
}
if(vcb.has_word(word)){
maxword=max(m(word),maxword);
assert(Sw2c.count(word)==0);
Sw2c[word]=wclass;
putWord++;
}
}
w2c=Vector<int>(maxword+1,0);
for(map<string,string>::const_iterator i=Sw2c.begin();i!=Sw2c.end();++i)
w2c[m(i->first)]=Sc2int[i->second];
cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl;
cout << "Actual number of read words: " << readWord << " stored words: " << putWord << endl;
return 1;
}
int getClass(int w)const
{
if(w>=0&&int(w)<int(w2c.size()) )
return w2c[w];
else
return 0;
}
int operator()(const string&x)const
{
if( Sc2int.count(x) )
return Sc2int.find(x)->second;
else
{
cerr << "WARNING: class " << x << " not found.\n";
return 0;
}
}
string classString(unsigned int cnr)const
{
if( cnr<Sint2c.size())
return Sint2c[cnr];
else
return string("0");
}
};
#endif

View File

@ -0,0 +1,38 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/*--
alignment: 'checked' alignment representation with automatic calculation
of fertilities
Franz Josef Och (30/07/99)
--*/
#include "alignment.h"
ostream&operator<<(ostream&out, const alignment&a)
{
int m=a.a.size()-1,l=a.f.size()-1;
out << "AL(l:"<<l<<",m:"<<m<<")(a: ";
for(int j=1;j<=m;j++)out << a(j) << ' ';
out << ")(fert: ";
for(int i=0;i<=l;i++)out << a.fert(i) << ' ';
return out << ") c:"<<"\n";
}

View File

@ -0,0 +1,227 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/*--
alignment: 'checked' alignment representation with autom. calc. of fertilities
Franz Josef Och (30/07/99)
--*/
#ifndef alignment_h_fjo_defined
#define alignment_h_fjo_defined
#include "Vector.h"
#include <cassert>
#include "defs.h"
#include "myassert.h"
class al_struct
{
public:
al_struct()
: prev(0),next(0){}
PositionIndex prev,next;
};
class alignment
{
private:
Vector<PositionIndex> a;
Vector<PositionIndex> positionSum,f;
public:
Vector<PositionIndex> als_i;
Vector<al_struct> als_j;
PositionIndex l,m;
alignment()
{}
alignment(PositionIndex _l, PositionIndex _m)
: a(_m+1, (PositionIndex)0),
positionSum(_l+1, (PositionIndex)0), f(_l+1, (PositionIndex)0), als_i(_l+1,0),als_j(_m+1),l(_l), m(_m)
{
f[0]=m;
for(PositionIndex j=1;j<=m;j++)
{
if( j>1 )
als_j[j].prev= j-1;
if( j<m )
als_j[j].next= j+1;
}
als_i[0]=1;
}
PositionIndex get_l()const
{return l;}
PositionIndex get_m()const
{return m;}
void doMove(int i,int j)
{
set(j,i);
}
void doSwap(int j1,int j2)
{
int aj1=a[j1],aj2=a[j2];
set(j1,aj2);
set(j2,aj1);
}
void set(PositionIndex j, PositionIndex aj)
{
PositionIndex old_aj=a[j];
massert(j<a.size());massert(aj<f.size());
massert(old_aj<f.size());massert(f[old_aj]>0);
massert(j>0);
positionSum[old_aj]-=j;
// ausfuegen
PositionIndex prev=als_j[j].prev;
PositionIndex next=als_j[j].next;
if( next )
als_j[next].prev=prev;
if( prev )
als_j[prev].next=next;
else
als_i[old_aj]=next;
// neue Position suchen
PositionIndex lfd=als_i[aj],llfd=0;
while( lfd && lfd<j )
lfd = als_j[llfd=lfd].next;
// einfuegen
als_j[j].prev=llfd;
als_j[j].next=lfd;
if( llfd )
als_j[llfd].next=j;
else
als_i[aj]=j;
if( lfd )
als_j[lfd].prev=j;
f[old_aj]--;
positionSum[aj]+=j;
f[aj]++;
a[j]=aj;
}
const Vector<PositionIndex>& getAlignment() const
{return a ;}
PositionIndex get_al(PositionIndex j)const
{
massert(j<a.size());
return a[j];
}
PositionIndex operator()(PositionIndex j)const
{
massert(j<a.size());
return a[j];
}
PositionIndex fert(PositionIndex i)const
{
massert(i<f.size());
return f[i];
}
PositionIndex get_head(PositionIndex i)const
{
massert( als_i[i]==_get_head(i) );
return als_i[i];
}
PositionIndex get_center(PositionIndex i)const
{
if( i==0 )return 0;
massert(((positionSum[i]+f[i]-1)/f[i]==_get_center(i)));
return (positionSum[i]+f[i]-1)/f[i];
}
PositionIndex _get_head(PositionIndex i)const
{
if( fert(i)==0 )return 0;
for(PositionIndex j=1;j<=m;j++)
if( a[j]==i )
return j;
return 0;
}
PositionIndex _get_center(PositionIndex i)const
{
if( i==0 )return 0;
massert(fert(i));
PositionIndex sum=0;
for(PositionIndex j=1;j<=m;j++)
if( a[j]==i )
sum+=j;
return (sum+fert(i)-1)/fert(i);
}
PositionIndex prev_cept(PositionIndex i)const
{
if( i==0 )return 0;
PositionIndex k=i-1;
while(k&&fert(k)==0)
k--;
return k;
}
PositionIndex next_cept(PositionIndex i)const
{
PositionIndex k=i+1;
while(k<l+1&&fert(k)==0)
k++;
return k;
}
PositionIndex prev_in_cept(PositionIndex j)const
{
//PositionIndex k=j-1;
//while(k&&a[k]!=a[j])
//k--;
//assert( als_j[j].prev==k );
//assert(k);
//return k;
massert(als_j[j].prev==0||a[als_j[j].prev]==a[j]);
return als_j[j].prev;
}
friend ostream &operator<<(ostream&out, const alignment&a);
friend bool operator==(const alignment&a, const alignment&b)
{
massert(a.a.size()==b.a.size());
for(PositionIndex j=1;j<=a.get_m();j++)
if(a(j)!=b(j))
return 0;
return 1;
}
friend bool operator<(const alignment&x, const alignment&y)
{
massert(x.get_m()==y.get_m());
for(PositionIndex j=1;j<=x.get_m();j++)
if( x(j)<y(j) )
return 1;
else if( y(j)<x(j) )
return 0;
return 0;
}
friend int differences(const alignment&x, const alignment&y){
int count=0;
massert(x.get_m()==y.get_m());
for(PositionIndex j=1;j<=x.get_m();j++)
count += (x(j)!=y(j));
return count;
}
bool valid()const
{
if( 2*f[0]>m )
return 0;
for(unsigned int i=1;i<=l;i++)
if( f[i]>=MAX_FERTILITY )
return 0;
return 1;
}
friend class transpair_model5;
};
#endif

View File

@ -0,0 +1,649 @@
// $Id: cmd.c 1307 2007-03-14 22:22:36Z hieuhoang1972 $
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "cmd.h"
#ifdef WIN32
# define popen _popen
# define pclose _pclose
#endif
static Enum_T BoolEnum[] = {
{ "FALSE", 0 },
{ "TRUE", 1 },
{ 0, 0 }
};
#ifdef NEEDSTRDUP
char *strdup();
#endif
#define FALSE 0
#define TRUE 1
#define LINSIZ 10240
#define MAXPARAM 256
static char *GetLine(),
**str2array();
static int Scan(),
SetParam(),
SetEnum(),
SetSubrange(),
SetStrArray(),
SetGte(),
SetLte(),
CmdError(),
EnumError(),
SubrangeError(),
GteError(),
LteError(),
PrintParam(),
PrintEnum(),
PrintStrArray();
static Cmd_T cmds[MAXPARAM+1];
static char *SepString = " \t\n";
#if defined(__STDC__)
#include <stdarg.h>
int DeclareParams(char *ParName, ...)
#else
#include <varargs.h>
int DeclareParams(ParName, va_alist)
char *ParName;
va_dcl
#endif
{
va_list args;
static int ParamN = 0;
int j,
c;
char *s;
#if defined(__STDC__)
va_start(args, ParName);
#else
va_start(args);
#endif
for(;ParName;) {
if(ParamN==MAXPARAM) {
fprintf(stderr, "Too many parameters !!\n");
break;
}
for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
;
if(!c) {
fprintf(stderr,
"Warning: parameter \"%s\" declared twice.\n",
ParName);
}
for(c=ParamN; c>j; c--) {
cmds[c] = cmds[c-1];
}
cmds[j].Name = ParName;
cmds[j].Type = va_arg(args, int);
cmds[j].Val = va_arg(args, void *);
switch(cmds[j].Type) {
case CMDENUMTYPE: /* get the pointer to Enum_T struct */
cmds[j].p = va_arg(args, void *);
break;
case CMDSUBRANGETYPE: /* get the two extremes */
cmds[j].p = (void*) calloc(2, sizeof(int));
((int*)cmds[j].p)[0] = va_arg(args, int);
((int*)cmds[j].p)[1] = va_arg(args, int);
break;
case CMDGTETYPE: /* get lower or upper bound */
case CMDLTETYPE:
cmds[j].p = (void*) calloc(1, sizeof(int));
((int*)cmds[j].p)[0] = va_arg(args, int);
break;
case CMDSTRARRAYTYPE: /* get the separators string */
cmds[j].p = (s=va_arg(args, char*))
? (void*)strdup(s) : 0;
break;
case CMDBOOLTYPE:
cmds[j].Type = CMDENUMTYPE;
cmds[j].p = BoolEnum;
break;
case CMDDOUBLETYPE: /* nothing else is needed */
case CMDINTTYPE:
case CMDSTRINGTYPE:
break;
default:
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
"DeclareParam()", "Unknown Type",
cmds[j].Type, "for parameter", cmds[j].Name);
exit(1);
}
ParamN++;
ParName = va_arg(args, char *);
}
cmds[ParamN].Name = NULL;
va_end(args);
return 0;
}
int GetParams(n, a, CmdFileName)
int *n;
char ***a;
char *CmdFileName;
{
char *Line,
*ProgName;
int argc = *n;
char **argv = *a,
*s;
FILE *fp;
int IsPipe;
#ifdef MSDOS
#define PATHSEP '\\'
char *dot = NULL;
#else
#define PATHSEP '/'
#endif
if(!(Line=malloc(LINSIZ))) {
fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
LINSIZ);
exit(1);
}
if((ProgName=strrchr(*argv, PATHSEP))) {
++ProgName;
} else {
ProgName = *argv;
}
#ifdef MSDOS
if(dot=strchr(ProgName, '.')) *dot = 0;
#endif
--argc;
++argv;
for(;;) {
if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
CmdFileName = argv[0]+2;
++argv;
--argc;
}
if(!CmdFileName) {
break;
}
IsPipe = !strncmp(CmdFileName, "@@", 2);
fp = IsPipe
? popen(CmdFileName+2, "r")
: strcmp(CmdFileName, "-")
? fopen(CmdFileName, "r")
: stdin;
if(!fp) {
fprintf(stderr, "Unable to open command file %s\n",
CmdFileName);
exit(1);
}
while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
if(Scan(ProgName, cmds, Line)) {
CmdError(Line);
}
}
if(fp!=stdin) {
if(IsPipe) pclose(fp); else fclose(fp);
}
CmdFileName = NULL;
}
while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
*s = ' ';
sprintf(Line, "%s/%s", ProgName, *argv+1);
*s = '=';
if(Scan(ProgName, cmds, Line)) CmdError(*argv);
--argc;
++argv;
}
*n = argc;
*a = argv;
#ifdef MSDOS
if(dot) *dot = '.';
#endif
free(Line);
return 0;
}
int PrintParams(ValFlag, fp)
int ValFlag;
FILE *fp;
{
int i;
fflush(fp);
if(ValFlag) {
fprintf(fp, "Parameters Values:\n");
} else {
fprintf(fp, "Parameters:\n");
}
for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
fprintf(fp, "\n");
fflush(fp);
return 0;
}
int SPrintParams(a, pfx)
char ***a,
*pfx;
{
int l,
n;
Cmd_T *cmd;
if(!pfx) pfx="";
l = strlen(pfx);
for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
a[0] = calloc(n, sizeof(char*));
for(n=0, cmd=cmds; cmd->Name; cmd++) {
if(!cmd->ArgStr) continue;
a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
++n;
}
return n;
}
static int CmdError(opt)
char *opt;
{
fprintf(stderr, "Invalid option \"%s\"\n", opt);
fprintf(stderr, "This program expectes the following parameters:\n");
PrintParams(FALSE, stderr);
exit(0);
}
static int PrintParam(cmd, ValFlag, fp)
Cmd_T *cmd;
int ValFlag;
FILE *fp;
{
fprintf(fp, "%4s", "");
switch(cmd->Type) {
case CMDDOUBLETYPE:
fprintf(fp, "%s", cmd->Name);
if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
fprintf(fp, "\n");
break;
case CMDENUMTYPE:
PrintEnum(cmd, ValFlag, fp);
break;
case CMDINTTYPE:
case CMDSUBRANGETYPE:
case CMDGTETYPE:
case CMDLTETYPE:
fprintf(fp, "%s", cmd->Name);
if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
fprintf(fp, "\n");
break;
case CMDSTRINGTYPE:
fprintf(fp, "%s", cmd->Name);
if(ValFlag) {
if(*(char **)cmd->Val) {
fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
} else {
fprintf(fp, ": %s", "NULL");
}
}
fprintf(fp, "\n");
break;
case CMDSTRARRAYTYPE:
PrintStrArray(cmd, ValFlag, fp);
break;
default:
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
"PrintParam",
"Unknown Type",
cmd->Type,
"for parameter",
cmd->Name);
exit(1);
}
return 0;
}
static char *GetLine(fp, n, Line)
FILE *fp;
int n;
char *Line;
{
int j,
l,
offs=0;
for(;;) {
if(!fgets(Line+offs, n-offs, fp)) {
return NULL;
}
if(Line[offs]=='#') continue;
l = strlen(Line+offs)-1;
Line[offs+l] = 0;
for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
;
if(l<1) continue;
if(j > offs) {
char *s = Line+offs,
*q = Line+j;
while((*s++=*q++))
;
}
if(Line[offs+l-1]=='\\') {
offs += l;
Line[offs-1] = ' ';
} else {
break;
}
}
return Line;
}
static int Scan(ProgName, cmds, Line)
char *ProgName,
*Line;
Cmd_T *cmds;
{
char *q,
*p;
int i,
hl,
HasToMatch = FALSE,
c0,
c;
p = Line+strspn(Line, SepString);
if(!(hl=strcspn(p, SepString))) {
return 0;
}
if((q=strchr(p, '/')) && q-p<hl) {
*q = 0;
if(strcmp(p, ProgName)) {
*q = '/';
return 0;
}
*q = '/';
HasToMatch=TRUE;
p = q+1;
}
if(!(hl = strcspn(p, SepString))) {
return 0;
}
c0 = p[hl];
p[hl] = 0;
for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
;
p[hl] = c0;
if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
return HasToMatch && c;
}
static int SetParam(cmd, s)
Cmd_T *cmd;
char *s;
{
if(!*s && cmd->Type != CMDSTRINGTYPE) {
fprintf(stderr,
"WARNING: No value specified for parameter \"%s\"\n",
cmd->Name);
return 0;
}
switch(cmd->Type) {
case CMDDOUBLETYPE:
if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
fprintf(stderr,
"Float value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
break;
case CMDENUMTYPE:
SetEnum(cmd, s);
break;
case CMDINTTYPE:
if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
fprintf(stderr,
"Integer value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
break;
case CMDSTRINGTYPE:
*(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
? strdup(s)
: 0;
break;
case CMDSTRARRAYTYPE:
SetStrArray(cmd, s);
break;
case CMDGTETYPE:
SetGte(cmd, s);
break;
case CMDLTETYPE:
SetLte(cmd, s);
break;
case CMDSUBRANGETYPE:
SetSubrange(cmd, s);
break;
default:
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
"SetParam",
"Unknown Type",
cmd->Type,
"for parameter",
cmd->Name);
exit(1);
}
cmd->ArgStr = strdup(s);
return 0;
}
static int SetEnum(cmd, s)
Cmd_T *cmd;
char *s;
{
Enum_T *en;
for(en=(Enum_T *)cmd->p; en->Name; en++) {
if(*en->Name && !strcmp(s, en->Name)) {
*(int *) cmd->Val = en->Idx;
return 0;
}
}
return EnumError(cmd, s);
}
static int SetSubrange(cmd, s)
Cmd_T *cmd;
char *s;
{
int n;
if(sscanf(s, "%d", &n)!=1) {
fprintf(stderr,
"Integer value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
return SubrangeError(cmd, n);
}
*(int *)cmd->Val = n;
return 0;
}
static int SetGte(cmd, s)
Cmd_T *cmd;
char *s;
{
int n;
if(sscanf(s, "%d", &n)!=1) {
fprintf(stderr,
"Integer value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
if(n<*(int *)cmd->p) {
return GteError(cmd, n);
}
*(int *)cmd->Val = n;
return 0;
}
static int SetStrArray(cmd, s)
Cmd_T *cmd;
char *s;
{
*(char***)cmd->Val = str2array(s, (char*)cmd->p);
return 0;
}
static int SetLte(cmd, s)
Cmd_T *cmd;
char *s;
{
int n;
if(sscanf(s, "%d", &n)!=1) {
fprintf(stderr,
"Integer value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
if(n > *(int *)cmd->p) {
return LteError(cmd, n);
}
*(int *)cmd->Val = n;
return 0;
}
static int EnumError(cmd, s)
Cmd_T *cmd;
char *s;
{
Enum_T *en;
fprintf(stderr,
"Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
fprintf(stderr, "Valid values are:\n");
for(en=(Enum_T *)cmd->p; en->Name; en++) {
if(*en->Name) {
fprintf(stderr, " %s\n", en->Name);
}
}
fprintf(stderr, "\n");
exit(1);
}
static int GteError(cmd, n)
Cmd_T *cmd;
int n;
{
fprintf(stderr,
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
fprintf(stderr, "Valid values must be greater than or equal to %d\n",
*(int *)cmd->p);
exit(1);
}
static int LteError(cmd, n)
Cmd_T *cmd;
int n;
{
fprintf(stderr,
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
fprintf(stderr, "Valid values must be less than or equal to %d\n",
*(int *)cmd->p);
exit(1);
}
static int SubrangeError(cmd, n)
Cmd_T *cmd;
int n;
{
fprintf(stderr,
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
fprintf(stderr, "Valid values range from %d to %d\n",
*(int *)cmd->p, *((int *)cmd->p+1));
exit(1);
}
static int PrintEnum(cmd, ValFlag, fp)
Cmd_T *cmd;
int ValFlag;
FILE *fp;
{
Enum_T *en;
fprintf(fp, "%s", cmd->Name);
if(ValFlag) {
for(en=(Enum_T *)cmd->p; en->Name; en++) {
if(*en->Name && en->Idx==*(int *)cmd->Val) {
fprintf(fp, ": %s", en->Name);
}
}
}
fprintf(fp, "\n");
return 0;
}
static int PrintStrArray(cmd, ValFlag, fp)
Cmd_T *cmd;
int ValFlag;
FILE *fp;
{
char *indent,
**s = *(char***)cmd->Val;
int l = 4+strlen(cmd->Name);
fprintf(fp, "%s", cmd->Name);
indent = malloc(l+2);
memset(indent, ' ', l+1);
indent[l+1] = 0;
if(ValFlag) {
fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
if(s) while(*s) {
fprintf(fp, "\n%s %s", indent, *s++);
}
}
free(indent);
fprintf(fp, "\n");
return 0;
}
static char **str2array(s, sep)
char *s,
*sep;
{
char *p,
**a;
int n = 0,
l;
if(!sep) sep = SepString;
p = s += strspn(s, sep);
while(*p) {
p += strcspn(p, sep);
p += strspn(p, sep);
++n;
}
a = calloc(n+1, sizeof(char *));
p = s;
n = 0;
while(*p) {
l = strcspn(p, sep);
a[n] = malloc(l+1);
memcpy(a[n], p, l);
a[n][l] = 0;
++n;
p += l;
p += strspn(p, sep);
}
return a;
}

View File

@ -0,0 +1,51 @@
// $Id: cmd.h 1307 2007-03-14 22:22:36Z hieuhoang1972 $
#if !defined(CMD_H)
#define CMD_H
#define CMDDOUBLETYPE 1
#define CMDENUMTYPE 2
#define CMDINTTYPE 3
#define CMDSTRINGTYPE 4
#define CMDSUBRANGETYPE 5
#define CMDGTETYPE 6
#define CMDLTETYPE 7
#define CMDSTRARRAYTYPE 8
#define CMDBOOLTYPE 9
typedef struct {
char *Name;
int Idx;
} Enum_T;
typedef struct {
int Type;
char *Name,
*ArgStr;
void *Val,
*p;
} Cmd_T;
#ifdef __cplusplus
extern "C" {
#endif
#if defined(__STDC__)
int DeclareParams(char *, ...);
#else
int DeclareParams();
#endif
int GetParams(int *n, char ***a,char *CmdFileName),
SPrintParams(),
PrintParams();
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,315 @@
/*
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "alignment.h"
#include "transpair_model3.h"
#include <map>
#include "collCounts.h"
#include "MoveSwapMatrix.h"
#include "D5Tables.h"
#include "transpair_model5.h"
#include "transpair_modelhmm.h"
#include "Parameter.h"
extern float COUNTINCREASE_CUTOFF_AL;
// unifies collectCountsOverAlignments and findAlignmentNeighborhood FJO-20/07/99
template<class TRANSPAIR> int collectCountsOverNeighborhood(
const MoveSwapMatrix<TRANSPAIR>&msc, LogProb ascore,
Array2<LogProb,Vector<LogProb> >&dtcount,
Array2<LogProb,Vector<LogProb> >&ncount, LogProb&p1count,
LogProb&p0count, LogProb&total_count) {
int nAl=0;
const PositionIndex l=msc.get_l(), m=msc.get_m();
Array2<LogProb,Vector<LogProb> > cmove(l+1, m+1), cswap(l+1, m+1);
Vector<LogProb> negmove(m+1),negswap(m+1),plus1fert(l+1),minus1fert(l+1);
LogProb total_move, total_swap;
if (msc.isCenterDeleted()==0) {
total_move+=ascore;
nAl++;
}
for (PositionIndex j=1; j<=m; j++) {
for (PositionIndex i=0; i<=l; i++) {
if (msc(j)!=i && !msc.isDelMove(i, j) ) {
double cm = msc.cmove(i, j);
if(cm<0)
continue;
LogProb newscore=ascore*cm;
total_move+=newscore;
nAl++;
cmove(i, j)+=newscore;
negmove[j]+=newscore;
plus1fert[i]+=newscore;
minus1fert[msc(j)]+=newscore;
}
}
}
for (PositionIndex j1=1; j1<=m; j1++) {
for (PositionIndex j2=j1+1; j2<=m; j2++) {
if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
double cs = msc.cswap(j1, j2);
if(cs < 0){
continue;
}
LogProb newscore=ascore*cs;
total_swap+=newscore;
nAl++;
cswap(msc(j1), j2)+=newscore;
cswap(msc(j2), j1)+=newscore;
negswap[j1]+=newscore;
negswap[j2]+=newscore;
}
}
}
total_count+=total_move+total_swap;
for (PositionIndex j=1; j<=m; j++)
for (PositionIndex i=0; i<=l; i++)
dtcount(i, j) += ((i==msc(j)) ? (total_count
-(negmove[j]+negswap[j])) : (cswap(i, j)+cmove(i, j)));
for (PositionIndex i=1; i<=l; i++) {
LogProb temp=minus1fert[i]+plus1fert[i];
if (msc.fert(i)<MAX_FERTILITY)
ncount(i, msc.fert(i))+=total_count-temp;
if (msc.fert(i)>0&&msc.fert(i)-1<MAX_FERTILITY)
ncount(i, msc.fert(i)-1)+=minus1fert[i];
else if (minus1fert[i]!=0.0)
cerr << "ERROR: M1Fa: " << minus1fert[i] << ' ' << i << ' '
<< msc.fert(i)<< endl;
if (msc.fert(i)+1<MAX_FERTILITY)
ncount(i, msc.fert(i)+1)+=plus1fert[i];
}
LogProb temp=minus1fert[0]+plus1fert[0];
p1count += (total_count-temp)*(LogProb)msc.fert(0);
p0count += (total_count-temp)*(LogProb)(m-2*msc.fert(0));
if (msc.fert(0)>0) {
p1count += (minus1fert[0])*(LogProb)(msc.fert(0)-1);
p0count += (minus1fert[0])*(LogProb)(m-2*(msc.fert(0)-1));
} else if (minus1fert[0]!=0.0)
cerr << "ERROR: M1Fb: " << minus1fert[0] << endl;
if (int(m)-2*(int(msc.fert(0))+1)>=0) {
p1count += (plus1fert[0])*(LogProb)(msc.fert(0)+1);
p0count += (plus1fert[0])*(LogProb)(m-2*(msc.fert(0)+1));
}
msc.check();
return nAl;
}
;
template<class TRANSPAIR> double collectCountsOverNeighborhoodForSophisticatedModels(
const MoveSwapMatrix<TRANSPAIR>&, LogProb, void*) {
return 0.0;
}
template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
const TRANSPAIR&ef, LogProb normalized_ascore, d4model*d4Table) {
Mmsc.check();
const PositionIndex m=msc.get_m(), l=msc.get_l();
for (PositionIndex j=1; j<=m; ++j)
if (msc(j)!=0)
if (msc.get_head(msc(j))==j) {
int ep=msc.prev_cept(msc(j));
d4Table->augCountRef_first(j, msc.get_center(ep),
d4Table->ewordclasses->getClass(ef.get_es(ep)),
d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
} else {
//massert( &d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) == ef.getCountSecond(j,msc.prev_in_cept(j) ));
d4Table->augCountRef_bigger(j, msc.prev_in_cept(j), 0,
d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
}
}
template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
const TRANSPAIR&ef, LogProb normalized_ascore, d5model*d5Table) {
Mmsc.check();
_collectCountsOverNeighborhoodForSophisticatedModels(Mmsc, msc, ef,
normalized_ascore, &d5Table->d4m);
Mmsc.check();
const PositionIndex m=msc.get_m(), l=msc.get_l();
PositionIndex prev_cept=0;
PositionIndex vac_all=m;
Vector<char> vac(m+1,0);
for (PositionIndex i=1; i<=l; i++) {
PositionIndex cur_j=msc.als_i[i];
PositionIndex prev_j=0;
PositionIndex k=0;
if (cur_j) { // process first word of cept
k++;
d5Table->getCountRef_first(vacancies(vac, cur_j), vacancies(vac,
msc.get_center(prev_cept)),
d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
vac_all-msc.fert(i)+k) +=normalized_ascore;
vac_all--;
assert(vac[cur_j]==0);
vac[cur_j]=1;
Mmsc.check();
prev_j=cur_j;
cur_j=msc.als_j[cur_j].next;
}
while (cur_j) { // process following words of cept
k++;
int vprev=vacancies(vac, prev_j);
d5Table->getCountRef_bigger(vacancies(vac, cur_j), vprev,
d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
vac_all-vprev/*war weg*/-msc.fert(i)+k)+=normalized_ascore;
vac_all--;
vac[cur_j]=1;
Mmsc.check();
prev_j=cur_j;
cur_j=msc.als_j[cur_j].next;
}
assert(k==msc.fert(i));
if (k)
prev_cept=i;
}
assert(vac_all==msc.fert(0));
}
extern int NumberOfAlignmentsInSophisticatedCountCollection;
template<class TRANSPAIR, class MODEL> double collectCountsOverNeighborhoodForSophisticatedModels(
const MoveSwapMatrix<TRANSPAIR>&msc, LogProb normalized_ascore,
MODEL*d5Table) {
const PositionIndex m=msc.get_m(), l=msc.get_l();
alignment x(msc);
double sum=0;
msc.check();
if ( !msc.isCenterDeleted() ) {
_collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc, x,
msc.get_ef(), normalized_ascore, d5Table);
NumberOfAlignmentsInSophisticatedCountCollection++;
sum+=normalized_ascore;
}
msc.check();
for (WordIndex j=1; j<=m; j++)
for (WordIndex i=0; i<=l; i++) {
WordIndex old=x(j);
if (i!=old&& !msc.isDelMove(i, j) ) {
msc.check();
double cm =msc.cmove(i, j);
if(cm < 0){
continue;
}
double c=cm*normalized_ascore;
if (c > COUNTINCREASE_CUTOFF_AL) {
x.set(j, i);
_collectCountsOverNeighborhoodForSophisticatedModels<
TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
NumberOfAlignmentsInSophisticatedCountCollection++;
x.set(j, old);
sum+=c;
}
msc.check();
}
}
for (PositionIndex j1=1; j1<=m; j1++) {
for (PositionIndex j2=j1+1; j2<=m; j2++) {
if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
double cs = msc.cswap(j1, j2);
if(cs < 0)
continue;
double c=cs*normalized_ascore;
msc.check();
if (c > COUNTINCREASE_CUTOFF_AL) {
int old1=msc(j1), old2=msc(j2);
x.set(j1, old2);
x.set(j2, old1);
_collectCountsOverNeighborhoodForSophisticatedModels<
TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
NumberOfAlignmentsInSophisticatedCountCollection++;
x.set(j1, old1);
x.set(j2, old2);
sum+=c;
}
msc.check();
}
}
}
msc.check();
return sum;
}
template<class TRANSPAIR, class MODEL> int collectCountsOverNeighborhood(
const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
Vector<WordIndex>&es, Vector<WordIndex>&fs, tmodel<COUNT,PROB>&tTable,
amodel<COUNT>&aCountTable, amodel<COUNT>&dCountTable,
nmodel<COUNT>&nCountTable, SyncDouble&p1count, SyncDouble&p0count,
LogProb&_total, float count, bool addCounts, MODEL*d4Table) {
int nAl=0;
const PositionIndex l=es.size()-1, m=fs.size()-1;
Array2<LogProb,Vector<LogProb> > dtcount(l+1, m+1), ncount(l+1,
MAX_FERTILITY+1);
LogProb p0=0, p1=0, all_total=0;
for (unsigned int i=0; i<smsc.size(); ++i) {
LogProb this_total=0;
nAl+=collectCountsOverNeighborhood(*smsc[i].first, smsc[i].second,
dtcount, ncount, p1, p0, this_total);
all_total+=this_total;
}
_total=all_total;
if(count==0){
cerr << "WARNING: COUNT ==0" << endl;
}else
all_total/=(double)count;
if(isinf(all_total)){
cerr << "ALL_TOTAL is INF\n" ;
return 0;
}
double sum2=0;
if (addCounts && d4Table) {
for (unsigned int i=0; i<smsc.size(); ++i) {
//for(WordIndex j=1;j<=m;j++)for(WordIndex ii=0;ii<=l;ii++)
// (*smsc[i].first).cmove(ii,j);
sum2+=collectCountsOverNeighborhoodForSophisticatedModels(
*smsc[i].first, smsc[i].second/all_total, d4Table);
}
if (!(fabs(count-sum2)<0.05))
cerr << "WARNING: DIFFERENT SUMS: (" << count << ") (" << sum2 << ") (" << all_total
<< ")\n";
}
/**
NOTE! HERE IS THE UPDATE PROCESS
*/
if(fabs(all_total)==0){
// Error
cerr << "Hill climbing yields zero count " << endl;
}else{
if (addCounts) {
for (PositionIndex i=0; i<=l; i++) {
for (PositionIndex j=1; j<=m; j++) {
LogProb ijadd=dtcount(i, j)/all_total;
if (ijadd>COUNTINCREASE_CUTOFF_AL) {
tTable.incCount(es[i], fs[j], ijadd);
dCountTable.addValue(j, i, l, m, ijadd);
aCountTable.addValue(i, j, l, m, ijadd);
}
}
if (i>0)
for (PositionIndex n=0; n<MAX_FERTILITY; n++)
nCountTable.addValue(es[i], n, ncount(i, n)/all_total);
}
p0count+=p0/all_total;
p1count+=p1/all_total;
}}
return nAl;
}

View File

@ -0,0 +1,80 @@
/*
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
This file is part of GIZA++ ( extension of GIZA ).
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef collCounts_h_defined
#define collCounts_h_defined
#include "alignment.h"
#include "transpair_model3.h"
#include <map>
#include "MoveSwapMatrix.h"
#include "D4Tables.h"
#include "transpair_model4.h"
class OneMoveSwap
{
public:
short type;
short a,b;
OneMoveSwap(short _type,short _a,short _b)
: type(_type),a(_a),b(_b)
{}
OneMoveSwap()
: type(0){}
};
inline bool operator<(const OneMoveSwap&a,const OneMoveSwap&b)
{
if(a.type<b.type)return 1;
else if(b.type<a.type)return 0;
else if(a.a<b.a)return 1;
else if(b.a<a.a)return 0;
else return a.b<b.b;
}
inline bool operator==(const OneMoveSwap&a,const OneMoveSwap&b)
{
return a.type==b.type&&a.a==b.a&&a.b==b.b;
}
inline ostream&operator<<(ostream&out,const OneMoveSwap&o)
{
return out << '(' << o.type << "," << o.a << "," << o.b << ")";
}
inline ostream &operator<<(ostream &out,const set<OneMoveSwap>&s)
{
for(set<OneMoveSwap>::const_iterator i=s.begin();i!=s.end();++i)
cout << *i << ' ';
return out;
}
bool makeOneMoveSwap(const alignment&a,const alignment&b,set<OneMoveSwap>&oms);
template<class TRANSPAIR,class MODEL>
int collectCountsOverNeighborhood(const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
Vector<WordIndex>&es,
Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,
amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,
nmodel<COUNT>&nCountTable,double&p1count,double&p0count,
LogProb&_total,float count,bool addCounts,MODEL*d4Table=0);
#endif

View File

@ -0,0 +1,3 @@
/*!
This is global definition for all main files of the program set
*/

View File

@ -0,0 +1,126 @@
// D4 Normalization executable
#include <iostream>
#include <strstream>
#include <string>
#include "hmm.h"
#include "D4Tables.h"
#include "Parameter.h"
#define ITER_M2 0
#define ITER_MH 5
GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
GLOBAL_PARAMETER3(int,Model2_Iterations,"Model2_Iterations","NO. ITERATIONS MODEL 2","m2","number of iterations for Model 2",PARLEV_ITER,ITER_M2);
GLOBAL_PARAMETER3(int,HMM_Iterations,"HMM_Iterations","mh","number of iterations for HMM alignment model","mh", PARLEV_ITER,ITER_MH);
GLOBAL_PARAMETER3(int,Model3_Iterations,"Model3_Iterations","NO. ITERATIONS MODEL 3","m3","number of iterations for Model 3",PARLEV_ITER,5);
GLOBAL_PARAMETER3(int,Model4_Iterations,"Model4_Iterations","NO. ITERATIONS MODEL 4","m4","number of iterations for Model 4",PARLEV_ITER,5);
GLOBAL_PARAMETER3(int,Model5_Iterations,"Model5_Iterations","NO. ITERATIONS MODEL 5","m5","number of iterations for Model 5",PARLEV_ITER,0);
GLOBAL_PARAMETER3(int,Model6_Iterations,"Model6_Iterations","NO. ITERATIONS MODEL 6","m6","number of iterations for Model 6",PARLEV_ITER,0);
GLOBAL_PARAMETER(float, PROB_SMOOTH,"probSmooth","probability smoothing (floor) value ",PARLEV_OPTHEUR,1e-7);
GLOBAL_PARAMETER(float, MINCOUNTINCREASE,"minCountIncrease","minimal count increase",PARLEV_OPTHEUR,1e-7);
GLOBAL_PARAMETER2(int,Transfer_Dump_Freq,"TRANSFER DUMP FREQUENCY","t2to3","output: dump of transfer from Model 2 to 3",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER2(bool,Verbose,"verbose","v","0: not verbose; 1: verbose",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(bool,Log,"log","0: no logfile; 1: logfile",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(double,P0,"p0","fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)",PARLEV_EM,-1.0);
GLOBAL_PARAMETER(double,M5P0,"m5p0","fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)",PARLEV_EM,-1.0);
GLOBAL_PARAMETER3(bool,Peg,"pegging","p","DO PEGGING? (Y/N)","0: no pegging; 1: do pegging",PARLEV_EM,0);
GLOBAL_PARAMETER(short,OldADBACKOFF,"adbackoff","",-1,0);
GLOBAL_PARAMETER2(unsigned int,MAX_SENTENCE_LENGTH,"ml","MAX SENTENCE LENGTH","maximum sentence length",0,MAX_SENTENCE_LENGTH_ALLOWED);
GLOBAL_PARAMETER(short, DeficientDistortionForEmptyWord,"DeficientDistortionForEmptyWord","0: IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word",PARLEV_MODELS,0);
/**
Here are parameters to support Load models and dump models
*/
GLOBAL_PARAMETER(int,restart,"restart","Restart training from a level,0: Normal restart, from model 1, 1: Model 1, 2: Model 2 Init (Using Model 1 model input and train model 2), 3: Model 2, (using model 2 input and train model 2), 4 : HMM Init (Using Model 1 model and train HMM), 5: HMM (Using Model 2 model and train HMM) 6 : HMM (Using HMM Model and train HMM), 7: Model 3 Init (Use HMM model and train model 3) 8: Model 3 Init (Use Model 2 model and train model 3) 9: Model 3, 10: Model 4 Init (Use Model 3 model and train Model 4) 11: Model 4 and on, ",PARLEV_INPUT,0);
GLOBAL_PARAMETER(bool,dumpCount,"dumpcount","Whether we are going to dump count (in addition to) final output?",PARLEV_OUTPUT,false);
GLOBAL_PARAMETER(bool,dumpCountUsingWordString,"dumpcountusingwordstring","In count table, should actual word appears or just the id? default is id",PARLEV_OUTPUT,false);
/// END
short OutputInAachenFormat=0;
bool Transfer=TRANSFER;
bool Transfer2to3=0;
short NoEmptyWord=0;
bool FEWDUMPS=0;
GLOBAL_PARAMETER(bool,ONLYALDUMPS,"ONLYALDUMPS","1: do not write any files",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(short,NCPUS,"NCPUS","Number of CPUS",PARLEV_EM,2);
GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detailled alignment format, 1: compact alignment format ",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
"maximal fertility for fertility models", PARLEV_EM, 10);
using namespace std;
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
int main(int argc, char* argv[]){
if(argc < 5){
cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
return 1;
}
WordClasses ewc,fwc;
d4model d4m(MAX_SENTENCE_LENGTH,ewc,fwc);
Vector<WordEntry> evlist,fvlist;
vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
TargetVocabFilename = argv[2];
SourceVocabFilename = argv[1];
eTrainVcbList.setName(argv[1]);
fTrainVcbList.setName(argv[2]);
eTrainVcbList.readVocabList();
fTrainVcbList.readVocabList();
string evcbcls = argv[1];
string fvcbcls = argv[2];
evcbcls += ".classes";
fvcbcls += ".classes";
d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, evcbcls.c_str(), fvcbcls.c_str(),eTrainVcbList,fTrainVcbList);
// Start iteration:
for(int i =4; i< argc ; i++){
string name = argv[i];
string nameA = name ;
string nameB = name + ".b";
if(d4m.augCount(nameA.c_str(),nameB.c_str())){
cerr << "Loading (d4) table " << nameA << "/" << nameB << " OK" << endl;
}else{
cerr << "ERROR Loading (d) table " << nameA << " " << nameB << endl;
}
}
d4m.normalizeTable();
string DiffOPath = argv[3];
string diff1 = DiffOPath;
string diff2 = DiffOPath+".b";
cerr << "Outputing d4 table to " << diff1 << " " << diff2;
d4m.printProbTable(diff1.c_str(),diff2.c_str());
}
// Some utility functions to get it compile..
ofstream logmsg;
const string str2Num(int n) {
string number = "";
do {
number.insert((size_t)0, 1, (char)(n % 10 + '0'));
} while ((n /= 10) > 0);
return (number);
}
double LAMBDA=1.09;
Vector<map< pair<int,int>,char > > ReferenceAlignment;
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
int&eventsMissing, int&eventsToomuch, int pair_no){
}
void printGIZAPars(ostream&out){
}

View File

@ -0,0 +1,78 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef _defs_h
#define _defs_h 1
#include <string>
#include <cmath>
#include <climits>
const int TRANSFER_SIMPLE=1;
const int TRANSFER=0;
const unsigned int MAX_SENTENCE_LENGTH_ALLOWED=101;
const int TRAIN_BUFFER_SIZE= 50000;
//#ifdef WORDINDEX_WITH_4_BYTE
typedef unsigned int WordIndex;
const unsigned int MAX_VOCAB_SIZE=UINT_MAX;
typedef unsigned int PositionIndex;
//#else
//typedef unsigned short WordIndex;
//const unsigned int MAX_VOCAB_SIZE=USHRT_MAX;
//typedef unsigned short PositionIndex;
//#endif
extern WordIndex MAX_FERTILITY;
const int MAX_W=457979;
extern double LAMBDA; // Lambda that is used to scale cross_entropy factor
typedef float PROB ;
typedef float COUNT ;
class LogProb {
private:
double x ;
public:
LogProb():x(0){}
LogProb(double y):x(y){}
LogProb(float y):x(y){}
LogProb(int y):x(y){}
LogProb(WordIndex y):x(y){}
operator double() const {return x;}
LogProb operator *= (double y) { x *= y ; return *this;}
LogProb operator *= (LogProb y) { x *= y.x ; return *this;}
LogProb operator /= (double y) { x /= y ; return *this;}
LogProb operator /= (LogProb y) { x /= y.x ; return *this;}
LogProb operator += (double y) { x += y ; return *this;}
LogProb operator += (LogProb y) { x += y.x ; return *this;}
};
const int PARLEV_ITER=1;
const int PARLEV_OPTHEUR=2;
const int PARLEV_OUTPUT=3;
const int PARLEV_SMOOTH=4;
const int PARLEV_EM=5;
const int PARLEV_MODELS=6;
const int PARLEV_SPECIAL=7;
const int PARLEV_INPUT=8;
#endif

View File

@ -0,0 +1,59 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef FILE_SPEC_H
#define FILE_SPEC_H
#include <ctime>
#include <cstdlib>
#include <cstring>
#include <cstdio>
/* This function returns a string, locally called file_spec. This
string is the concatenation of the date and time of execution
and the user who is performing the execution */
/* Originally implemented in C by Yaser Al-Onaizan;
editions for C++ and formatting by Noah A. Smith, 9 July 1999 */
char *Get_File_Spec (){
struct tm *local;
time_t t;
char *user;
char time_stmp[57];
char *file_spec = 0;
t = time(NULL);
local = localtime(&t);
sprintf(time_stmp, "%02d-%02d-%02d.%02d%02d%02d.", local->tm_year,
(local->tm_mon + 1), local->tm_mday, local->tm_hour,
local->tm_min, local->tm_sec);
user = getenv("USER");
file_spec = (char *)malloc(sizeof(char) *
(strlen(time_stmp) + strlen(user) + 1));
file_spec[0] = '\0';
strcat(file_spec, time_stmp) ;
strcat(file_spec, user);
return file_spec;
}
#endif

View File

@ -0,0 +1,470 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/* --------------------------------------------------------------------------*
* *
* Module : getSentece *
* *
* Method Definitions File: getSentence.cc *
* *
* Objective: Defines clases and methods for handling I/O for the parallel *
* corpus. *
*****************************************************************************/
#include "getSentence.h"
#include <iostream>
#include <strstream>
#include <cstdio>
#include <cstdlib>
#include <boost/algorithm/string.hpp>
#include <vector>
#include <set>
#include <pthread.h>
#include "Parameter.h"
#include "errno.h"
int PrintedTooLong=0;
/* -------------- Method Defnitions for Class sentenceHandler ---------------*/
GLOBAL_PARAMETER(double,ManlexMAX_MULTIPLICITY,"manlexMAX_MULTIPLICITY","",PARLEV_EM,20.0);
GLOBAL_PARAMETER(double,Manlexfactor1,"manlexfactor1","",PARLEV_EM,0.0);
GLOBAL_PARAMETER(double,Manlexfactor2,"manlexfactor2","",PARLEV_EM,0.0);
sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
vcbList* flist) : realCount(0)
// This method is the constructor of the class, it also intitializes the
// sentence pair sequential number (count) to zero.
{
pthread_mutex_init(&readsent_mutex,NULL);
pthread_mutex_init(&setprob_mutex,NULL);
position = 0;
readflag = false ;
allInMemory = false ;
inputFilename = filename ;
inputFile = new ifstream(filename);
pair_no = 0 ;
if(!(*inputFile)){
cerr << "\nERROR:(a) Cannot open " << filename;
exit(1);
}
currentSentence = 0;
totalPairs1 = 0 ;
totalPairs2 =0;
pair_no = 0 ;
noSentInBuffer = 0 ;
Buffer.clear();
bool isNegative=0;
std::set<WordIndex> evoc,fvoc;
evoc.insert(0);
fvoc.insert(0);
if (elist && flist){
cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
sentPair s ;
while (getNextSentence(s, elist, flist))
{
for(int i = 0 ; i< s.eSent.size() ; i++){
evoc.insert(s.eSent[i]);
}
for(int i = 0 ; i< s.fSent.size() ; i++){
fvoc.insert(s.fSent[i]);
}
totalPairs1++;
totalPairs2+=s.realCount;
// NOTE: this value might change during training
// for words from the manual dictionary, yet this is ignored!
if( s.noOcc<0 )
isNegative=1;
}
}
if( isNegative==1 )
{
cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
realCount=new Vector<double>(totalPairs1,1.0);
}
else
realCount=0;
elist->compact(evoc);
flist->compact(fvoc);
}
sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
vcbList* flist,std::set<WordIndex>& eapp, std::set<WordIndex>& fapp) : realCount(0)
// This method is the constructor of the class, it also intitializes the
// sentence pair sequential number (count) to z
{
pthread_mutex_init(&readsent_mutex,NULL);
pthread_mutex_init(&setprob_mutex,NULL);
position = 0;
readflag = false ;
allInMemory = false ;
inputFilename = filename ;
inputFile = new ifstream(filename);
pair_no = 0 ;
if(!(*inputFile)){
cerr << "\nERROR:(a) Cannot open " << filename;
exit(1);
}
currentSentence = 0;
totalPairs1 = 0 ;
totalPairs2 =0;
pair_no = 0 ;
noSentInBuffer = 0 ;
Buffer.clear();
bool isNegative=0;
if (elist && flist){
cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
sentPair s ;
while (getNextSentence(s, elist, flist))
{
int k;
for(k=0;k<s.eSent.size();k++){
eapp.insert(s.eSent[k]);
}
for(k=0;k<s.fSent.size();k++){
fapp.insert(s.fSent[k]);
}
totalPairs1++;
totalPairs2+=s.realCount;
// NOTE: this value might change during training
// for words from the manual dictionary, yet this is ignored!
if( s.noOcc<0 )
isNegative=1;
}
}
if( isNegative==1 )
{
cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
realCount=new Vector<double>(totalPairs1,1.0);
}
else
realCount=0;
}
void sentenceHandler::rewind()
{
position = 0;
currentSentence = 0;
readflag = false ;
if (!allInMemory ||
!(Buffer.size() >= 1 && Buffer[currentSentence].sentenceNo == 1)){
// check if the buffer doe not already has the first chunk of pairs
if (Buffer.size() > 0)
cerr << ' ' << Buffer[currentSentence].sentenceNo << '\n';
// totalPairs = 0 ;
pair_no = 0 ;
noSentInBuffer = 0 ;
Buffer.clear();
}
if (!allInMemory){
delete inputFile;
inputFile = new ifstream(inputFilename);
if(!(*inputFile)){
cerr << "\nERROR:(b) Cannot open " << inputFilename << " " << (int)errno;
}
}
}
int sentenceHandler::getNextSentence(sentPair& sent, vcbList* elist, vcbList* flist)
{
pthread_mutex_lock(&readsent_mutex);
do{
sentPair s ;
if (readflag){
cerr << "Attempting to read from the end of corpus, rewinding\n";
//rewind();
break;
}
if (currentSentence >= noSentInBuffer){
if (allInMemory)
break;
/* no more sentences in buffer */
noSentInBuffer = 0 ;
currentSentence = 0 ;
Buffer.clear();
cout << "Reading more sentence pairs into memory ... \n";
while((noSentInBuffer < TRAIN_BUFFER_SIZE) && readNextSentence(s)){
if ((s.fSent.size()-1) > (MAX_FERTILITY-1) * (s.eSent.size()-1)){
cerr << "WARNING: The following sentence pair has source/target sentence length ratio more than\n"<<
"the maximum allowed limit for a source word fertility\n"<<
" source length = " << s.eSent.size()-1 << " target length = " << s.fSent.size()-1 <<
" ratio " << double(s.fSent.size()-1)/ (s.eSent.size()-1) << " ferility limit : " <<
MAX_FERTILITY-1 << '\n';
cerr << "Shortening sentence \n";
cerr << s;
s.eSent.resize(min(s.eSent.size(),s.fSent.size()));
s.fSent.resize(min(s.eSent.size(),s.fSent.size()));
}
Buffer.push_back(s) ;
//cerr << s.eAnchor.size() << " " << Buffer[Buffer.size()-1].eAnchor.size()<< endl;
if (elist && flist){
if ((*elist).size() > 0)
for (WordIndex i= 0 ; i < s.eSent.size() ; i++){
if (s.eSent[i] >= (*elist).uniqTokens()){
if( PrintedTooLong++<100)
cerr << "ERROR: source word " << s.eSent[i] << " is not in the vocabulary list \n";
exit(-1);
}
(*elist).incFreq(s.eSent[i], s.realCount);
}
if ((*flist).size() > 0)
for (WordIndex j= 1 ; j < s.fSent.size() ; j++){
if (s.fSent[j] >= (*flist).uniqTokens()){
cerr << "ERROR: target word " << s.fSent[j] << " is not in the vocabulary list \n";
exit(-1);
}
(*flist).incFreq(s.fSent[j], s.realCount);
}
}
noSentInBuffer++;
}
if (inputFile->eof()){
allInMemory = (Buffer.size() >= 1 &&
Buffer[currentSentence].sentenceNo == 1) ;
if (allInMemory)
cout << "Corpus fits in memory, corpus has: " << Buffer.size() <<
" sentence pairs.\n";
}
}
if(noSentInBuffer <= 0 ){
//cerr << "# sent in buffer " << noSentInBuffer << '\n';
readflag = true ;
break;
}
sent = Buffer[currentSentence++] ;
// cerr << currentSentence-1 << " " << sent.eAnchor.size() << " " << Buffer[currentSentence-1].eAnchor.size()<< endl;
position ++;
if( sent.noOcc<0 && realCount ){
if( Manlexfactor1 && sent.noOcc==-1.0 )
sent.realCount=Manlexfactor1;
else if( Manlexfactor2 && sent.noOcc==-2.0 )
sent.realCount=Manlexfactor2;
else
sent.realCount=(*realCount)[sent.getSentenceNo()-1];
}
pthread_mutex_unlock(&readsent_mutex);
return position ;
}while(false);
pthread_mutex_unlock(&readsent_mutex);
return 0;
}
bool sentenceHandler::readNextSentence(sentPair& sent)
/* This method reads in a new pair of sentences, each pair is read from the
corpus file as line triples. The first line the no of times this line
pair occured in the corpus, the second line is the source sentence and
the third is the target sentence. The sentences are represented by a space
separated positive integer token ids. */
{
string line;
bool fail(false) ;
sent.clear();
vector<string> splits;
if (getline(*inputFile, line)){
boost::algorithm::split(splits,line,boost::algorithm::is_any_of("|#*"));
if(splits.size() == 1 || splits.size() == 0){
// continue, no problem
}else if(splits.size()>=3){
line = splits[0];
}else{
fail = true;
return false;
}
istrstream buffer(line.c_str());
buffer >> sent.noOcc;
if( sent.noOcc<0 )
{
if( realCount )
{
if( Manlexfactor1 && sent.noOcc==-1.0 )
sent.realCount=Manlexfactor1;
else if( Manlexfactor2 && sent.noOcc==-2.0 )
sent.realCount=Manlexfactor2;
else
{
sent.realCount=(*realCount)[pair_no];
}
}
else
sent.realCount=1.0;
}
else
sent.realCount=sent.noOcc;
}
else {
fail = true ;;
}
if (splits.size()>=3 || getline(*inputFile, line)){
if(splits.size()>=3){
line = splits[1];
}
istrstream buffer(line.c_str());
WordIndex w; // w is a local variabe for token id
sent.eSent.push_back(0); // each source word is assumed to have 0 ==
// a null word (id 0) at the begining of the sentence.
while(buffer>>w){ // read source sentece , word by word .
if (sent.eSent.size() < MAX_SENTENCE_LENGTH)
sent.eSent.push_back(w);
else {
if( PrintedTooLong++<100)
cerr << "{WARNING:(a)truncated sentence "<<pair_no<<"}";
//cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
//cerr << "The following sentence will be truncated\n" << line;
break ;
}
}
}
else {
fail = true ;
}
if (splits.size()>=3 ||getline(*inputFile, line)){
if(splits.size()>=3){
line = splits[2];
}
istrstream buffer(line.c_str());
WordIndex w; // w is a local variabe for token id
sent.fSent.push_back(0); //0 is inserted for program uniformity
while(buffer>>w){ // read target sentece , word by word .
if (sent.fSent.size() < MAX_SENTENCE_LENGTH)
sent.fSent.push_back(w);
else {
if( PrintedTooLong++<100)
cerr << "{WARNING:(b)truncated sentence "<<pair_no<<"}";
//cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
//cerr << "The following sentence will be truncated\n" << line;
break ;
}
}
}
else {
fail = true ;
}
sent.eAnchor.clear();
sent.fAnchor.clear();
// cerr << "Splits: " << splits.size() << endl;
if(splits.size()>3){
vector<string> al,eal;
al.clear();
boost::algorithm::split(al,splits[3],boost::algorithm::is_any_of(" "));
for(int w = 0 ; w < al.size(); w++){
eal.clear();
boost::algorithm::split(eal,al[w],boost::algorithm::is_any_of("-"));
if(eal.size()==2){
int ea = atoi(eal[0].c_str());
int fa = atoi(eal[1].c_str());
if(ea >= sent.eSent.size() || fa >= sent.fSent.size())
continue;
sent.eAnchor.push_back(ea);
sent.fAnchor.push_back(fa);
}
}
// cerr << "Read partial alignment: " << sent.eAnchor.size() << " " <<
// sent.fAnchor.size() << "\n";
}
if (fail){
sent.eSent.clear();
sent.fSent.clear();
sent.eAnchor.clear();
sent.fAnchor.clear();
sent.sentenceNo = 0 ;
sent.noOcc = 0 ;
sent.realCount=0;
return(false);
}
if( sent.eSent.size()==1||sent.fSent.size()==1 )
cerr << "ERROR: Forbidden zero sentence length " << sent.sentenceNo << endl;
sent.sentenceNo = ++pair_no;
if(pair_no % 100000 == 0)
cout << "[sent:" << sent.sentenceNo << "]"<< '\n';
return true;
}
double optimize_lambda(Vector<double>&vd)
{
Vector<double> l;
for(double lambda=1.0;lambda<ManlexMAX_MULTIPLICITY;lambda+=0.33)
{
double prod=0.0;
for(unsigned int i=0;i<vd.size();++i)
{
prod += vd[i]*exp(lambda*vd[i])/(exp(lambda*vd[i])-1.0);
}
l.push_back(fabs(prod-1.0));
}
double lam=double(min_element(l.begin(),l.end())-l.begin())*0.33+1.0;
if( lam<1.0 )
{
cerr << "ERROR: lambda is smaller than one: " << lam << endl;
for(unsigned int i=0;i<vd.size();++i)
cerr << vd[i] << ' ';
cerr << endl;
}
return lam;
}
void sentenceHandler::setProbOfSentence(const sentPair&s,double d)
{
if( realCount==0 )
return;
else{
pthread_mutex_lock(&setprob_mutex);
if( s.noOcc<=0 )
{
double ed=exp(d);
if( oldPairs.size()>0&&(oldPairs.back().get_eSent()!=s.get_eSent()||oldPairs.back().getSentenceNo()>=s.getSentenceNo()) )
{
double lambda=optimize_lambda(oldProbs);
for(unsigned int i=0;i<oldPairs.size();++i)
{
if( oldProbs[i]<1e-5 )
(*realCount)[oldPairs[i].getSentenceNo()-1]=1.0;
else
(*realCount)[oldPairs[i].getSentenceNo()-1]=lambda*oldProbs[i]/(1-exp(-lambda*oldProbs[i]));
}
oldPairs.clear();
oldProbs.clear();
}
oldPairs.push_back(s);
oldProbs.push_back(ed);
}
pthread_mutex_unlock(&setprob_mutex);
}
}
/* ------------- End of Method Definition of Class sentenceHandler ----------*/

View File

@ -0,0 +1,136 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
/* --------------------------------------------------------------------------*
* *
* Module : getSentence *
* *
* Prototypes File: getSentence.h *
* *
* Objective: Defines clases and methods for handling I/O for the parallel *
* corpus. *
*****************************************************************************/
#ifndef _sentenceHandler_h
#define _sentenceHandler_h 1
#include <iostream>
#include <fstream>
#include <string>
#include <set>
#include <cstdio>
#include <cstdlib>
#include <pthread.h>
#include "Vector.h"
#include "defs.h"
#include "vocab.h"
#include "Globals.h"
/*----------------------- Class Prototype Definition ------------------------*
Class Name: sentenceHandleer
Objective: This class is defined to handle training sentece pairs from the
parallel corpus. Each pair has: a target sentece, called here French; a
source sentece, called here English sentece; and an integer number denoting
the number of times this pair occured in trining corpus. Both source and
target senteces are represented as integer vector (variable size arrays),
each entry is a numeric value which is the token id for the particular token
in the sentece.
*---------------------------------------------------------------------------*/
class sentPair{
public:
int sentenceNo ;
float noOcc;
float realCount;
Vector<WordIndex> eSent ;
Vector<WordIndex> fSent;
Vector<WordIndex> eAnchor;
Vector<WordIndex> fAnchor;
public:
sentPair(){};
void clear(){ eSent.clear(); fSent.clear();eAnchor.clear(),fAnchor.clear(); noOcc=0; realCount=0; sentenceNo=0;};
const Vector<WordIndex>&get_eSent()const
{ return eSent; }
const Vector<WordIndex>&get_fSent()const
{ return fSent; }
int getSentenceNo()const
{ return sentenceNo; }
double getCount()const
{ return realCount; }
};
inline ostream&operator<<(ostream&of,const sentPair&s)
{
of << "Sent No: " << s.sentenceNo << " , No. Occurrences: " << s.noOcc << '\n';
if( s.noOcc!=s.realCount )
of << " Used No. Occurrences: " << s.realCount << '\n';
unsigned int i;
for(i=0; i < s.eSent.size(); i++)
of << s.eSent[i] << ' ';
of << '\n';
for(i=1; i < s.fSent.size(); i++)
of << s.fSent[i] << ' ';
of << '\n';
return of;
}
/*Thread-safe version of sentence handler*/
class sentenceHandler{
public:
const char * inputFilename; // parallel corpus file name, similar for all
// sentence pair objects
ifstream *inputFile; // parallel corpus file handler
Vector<sentPair> Buffer;
int noSentInBuffer ;
int currentSentence ;
int position; /*Sentence position (will be returned)*/
int totalPairs1 ;
double totalPairs2;
bool readflag ; // true if you reach the end of file
bool allInMemory ;
int pair_no ;
Vector<double> *realCount;
Vector<sentPair> oldPairs;
Vector<double> oldProbs;
sentenceHandler(){};
sentenceHandler(const char* filename, vcbList* elist=0, vcbList* flist=0);
sentenceHandler(const char* filename, vcbList* elist, vcbList* flist,set<WordIndex>& eapp, set<WordIndex>& fapp);
void rewind();
int getNextSentence(sentPair&, vcbList* = 0, vcbList* = 0); // will be defined in the definition file, this
int getTotalNoPairs1()const {return totalPairs1;};
double getTotalNoPairs2()const {return totalPairs2;};
// method will read the next pair of sentence from memory buffer
void setProbOfSentence(const sentPair&s,double d);
private:
pthread_mutex_t readsent_mutex;
pthread_mutex_t setprob_mutex;
bool readNextSentence(sentPair&); // will be defined in the definition file, this
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,103 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef _hmm_h
#define _hmm_h 1
#include <cassert>
#include <iostream>
#include <algorithm>
#include <functional>
#include <map>
#include <set>
#include "Vector.h"
#include <utility>
#if __GNUC__>2
#include <ext/hash_map>
using __gnu_cxx::hash_map;
#else
#include <hash_map>
#endif
#include <fstream>
#include <cmath>
#include <ctime>
#include "TTables.h"
#include "ATables.h"
#include "getSentence.h"
#include "defs.h"
#include "model2.h"
#include "Perplexity.h"
#include "vocab.h"
#include "WordClasses.h"
#include "HMMTables.h"
#include "ForwardBackward.h"
#include "ttableDiff.hpp"
class hmm : public model2{
public:
WordClasses& ewordclasses;
WordClasses& fwordclasses;
public:
HMMTables<int,WordClasses> counts,probs;
public:
template<class MAPPER>
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile){
ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
if( !estrm ) {
cerr << "ERROR: can not read " << efile << endl;
}else
ewordclasses.read(estrm,m1,Elist);
if( !fstrm )
cerr << "ERROR: can not read " << ffile << endl;
else
fwordclasses.read(fstrm,m2,Flist);
}
hmm(model2&m2,WordClasses &e, WordClasses& f);
void initialize_table_uniformly(sentenceHandler&);
int em_with_tricks(int iterations, bool dumpCount = false,
const char* dumpCountName = NULL, bool useString = false,bool resume=false);
CTTableDiff<COUNT,PROB>* em_one_step(int it);
// void em_one_step_2(int it,int part);
void load_table(const char* aname);
// void em_loop(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
// const char* alignfile, Perplexity&, bool test,bool doInit,int iter);
/* CTTableDiff<COUNT,PROB>* em_loop_1(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
const char* alignfile, Perplexity&, bool test,bool doInit,int iter);*/
/* void em_loop_2( Perplexity& perp, sentenceHandler& sHandler1,
bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
bool test,bool doInit,int part);*/
void em_loop(Perplexity& perp, sentenceHandler& sHandler1,
bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
bool test,bool doInit,int
);
void em_thread(int it,string alignfile,bool dump_files,bool resume=false);
HMMNetwork *makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const;
void clearCountTable();
friend class model3;
};
//int multi_thread_em(int noIter, int noThread, hmm* base);
#endif

View File

@ -0,0 +1,135 @@
// HMM Normalization executable
#include <iostream>
#include <strstream>
#include <string>
#include "hmm.h"
#include "Parameter.h"
#define ITER_M2 0
#define ITER_MH 5
GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
GLOBAL_PARAMETER3(int,Model2_Iterations,"Model2_Iterations","NO. ITERATIONS MODEL 2","m2","number of iterations for Model 2",PARLEV_ITER,ITER_M2);
GLOBAL_PARAMETER3(int,HMM_Iterations,"HMM_Iterations","mh","number of iterations for HMM alignment model","mh", PARLEV_ITER,ITER_MH);
GLOBAL_PARAMETER3(int,Model3_Iterations,"Model3_Iterations","NO. ITERATIONS MODEL 3","m3","number of iterations for Model 3",PARLEV_ITER,5);
GLOBAL_PARAMETER3(int,Model4_Iterations,"Model4_Iterations","NO. ITERATIONS MODEL 4","m4","number of iterations for Model 4",PARLEV_ITER,5);
GLOBAL_PARAMETER3(int,Model5_Iterations,"Model5_Iterations","NO. ITERATIONS MODEL 5","m5","number of iterations for Model 5",PARLEV_ITER,0);
GLOBAL_PARAMETER3(int,Model6_Iterations,"Model6_Iterations","NO. ITERATIONS MODEL 6","m6","number of iterations for Model 6",PARLEV_ITER,0);
GLOBAL_PARAMETER(float, PROB_SMOOTH,"probSmooth","probability smoothing (floor) value ",PARLEV_OPTHEUR,1e-7);
GLOBAL_PARAMETER(float, MINCOUNTINCREASE,"minCountIncrease","minimal count increase",PARLEV_OPTHEUR,1e-7);
GLOBAL_PARAMETER2(int,Transfer_Dump_Freq,"TRANSFER DUMP FREQUENCY","t2to3","output: dump of transfer from Model 2 to 3",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER2(bool,Verbose,"verbose","v","0: not verbose; 1: verbose",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(bool,Log,"log","0: no logfile; 1: logfile",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(double,P0,"p0","fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)",PARLEV_EM,-1.0);
GLOBAL_PARAMETER(double,M5P0,"m5p0","fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)",PARLEV_EM,-1.0);
GLOBAL_PARAMETER3(bool,Peg,"pegging","p","DO PEGGING? (Y/N)","0: no pegging; 1: do pegging",PARLEV_EM,0);
GLOBAL_PARAMETER(short,OldADBACKOFF,"adbackoff","",-1,0);
GLOBAL_PARAMETER2(unsigned int,MAX_SENTENCE_LENGTH,"ml","MAX SENTENCE LENGTH","maximum sentence length",0,MAX_SENTENCE_LENGTH_ALLOWED);
GLOBAL_PARAMETER(short, DeficientDistortionForEmptyWord,"DeficientDistortionForEmptyWord","0: IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word",PARLEV_MODELS,0);
/**
Here are parameters to support Load models and dump models
*/
GLOBAL_PARAMETER(int,restart,"restart","Restart training from a level,0: Normal restart, from model 1, 1: Model 1, 2: Model 2 Init (Using Model 1 model input and train model 2), 3: Model 2, (using model 2 input and train model 2), 4 : HMM Init (Using Model 1 model and train HMM), 5: HMM (Using Model 2 model and train HMM) 6 : HMM (Using HMM Model and train HMM), 7: Model 3 Init (Use HMM model and train model 3) 8: Model 3 Init (Use Model 2 model and train model 3) 9: Model 3, 10: Model 4 Init (Use Model 3 model and train Model 4) 11: Model 4 and on, ",PARLEV_INPUT,0);
GLOBAL_PARAMETER(bool,dumpCount,"dumpcount","Whether we are going to dump count (in addition to) final output?",PARLEV_OUTPUT,false);
GLOBAL_PARAMETER(bool,dumpCountUsingWordString,"dumpcountusingwordstring","In count table, should actual word appears or just the id? default is id",PARLEV_OUTPUT,false);
/// END
short OutputInAachenFormat=0;
bool Transfer=TRANSFER;
bool Transfer2to3=0;
short NoEmptyWord=0;
bool FEWDUMPS=0;
GLOBAL_PARAMETER(bool,ONLYALDUMPS,"ONLYALDUMPS","1: do not write any files",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(short,NCPUS,"NCPUS","Number of CPUS",PARLEV_EM,2);
GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detailled alignment format, 1: compact alignment format ",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
"maximal fertility for fertility models", PARLEV_EM, 10);
using namespace std;
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
int main(int argc, char* argv[]){
if(argc < 5){
cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
return 1;
}
Vector<WordEntry> evlist,fvlist;
vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
TargetVocabFilename = argv[2];
SourceVocabFilename = argv[1];
eTrainVcbList.setName(argv[1]);
fTrainVcbList.setName(argv[2]);
eTrainVcbList.readVocabList();
fTrainVcbList.readVocabList();
Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp;
tmodel<float, float> tTable;
sentenceHandler *corpus = new sentenceHandler();
model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList, tTable,
trainPerp, *corpus, &testPerp, corpus, trainViterbiPerp,
&testViterbiPerp);
amodel<float> aTable(false);
amodel<float> aCountTable(false);
model2 m2(m1, aTable, aCountTable);
WordClasses french,english;
hmm h(m2,english,french);
string evcbcls = argv[1];
string fvcbcls = argv[2];
evcbcls += ".classes";
fvcbcls += ".classes";
h.makeWordClasses(m1.Elist, m1.Flist, evcbcls.c_str(), fvcbcls.c_str());
string base = argv[4];
string baseA = base+".alpha";
string baseB = base+".beta";
string output = argv[3];
string outputA = output+".alpha";
string outputB = output+".beta";
h.probs.readJumps(base.c_str(),NULL,baseA.c_str(), baseB.c_str());
// Start iteration:
for(int i = 5; i< argc ; i++){
string name = argv[i];
string nameA = name + ".alpha";
string nameB = name + ".beta";
if(h.counts.readJumps(name.c_str(),NULL,nameA.c_str(), nameB.c_str()))
h.probs.merge(h.counts);
else
cerr << "Error, cannot load name.c_str()";
h.clearCountTable();
}
h.probs.writeJumps(output.c_str(),NULL,outputA.c_str(), outputB.c_str());
delete corpus;
}
// Some utility functions to get it compile..
ofstream logmsg;
const string str2Num(int n) {
string number = "";
do {
number.insert((size_t)0, 1, (char)(n % 10 + '0'));
} while ((n /= 10) > 0);
return (number);
}
double LAMBDA=1.09;
Vector<map< pair<int,int>,char > > ReferenceAlignment;
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
int&eventsMissing, int&eventsToomuch, int pair_no){
}
void printGIZAPars(ostream&out){
}

View File

@ -0,0 +1,154 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
// Routines to perform integer exponential arithmetic.
// A number x is represented as n, where x = b**n.
// It is assumed that b > 1, something like b = 1.001;
#include "logprob.h"
#include <cstdlib>
#include <cstdio>
#include <iostream>
#include <fstream>
#include <string>
double *LogProb::ntof = NULL; // Tables will be initialized
int *LogProb::addtbl = NULL; // in Initialize function.
int *LogProb::subtbl = NULL; //
const int LogProb::max_2byte_integer = 32767;
const int LogProb::min_2byte_integer = -32768;
const double LogProb::b = 1.001; // a logarithm basis
const double LogProb::logb2 = log(b);
//const int LogProb::nmax = round(78.0E0 * log(1.0E1) / logb2);
const int LogProb::nmax = round(300.0E0 * log(1.0E1) / logb2);
const int LogProb::nmin = -nmax;
const int LogProb::tblbnd = round(log((b-1.0E0)/2.0E0)/logb2);
const int LogProb::zeron = round(pow(-2, 23));
const int LogProb::onen = 0;
const int LogProb::infn = onen - zeron;
const int LogProb::initialized = LogProb::Initialize();
const LogProb LogProb::zero(0);
const LogProb LogProb::one(1);
const LogProb LogProb::minus2(1e-2);
const LogProb LogProb::minus4(1e-4);
const LogProb LogProb::minus6(1e-6);
const LogProb LogProb::minus8(1e-8);
const LogProb LogProb::minus10(1e-10);
const LogProb LogProb::minus12(1e-12);
const LogProb LogProb::minus14(1e-14);
const LogProb LogProb::minus16(1e-16);
// static table initialization function
int LogProb::Initialize()
{
int nbytes = sizeof(double)*(nmax-nmin+1) + sizeof(int)*(0-tblbnd+1);
std::cerr << nbytes << " bytes used for LogProb tables (C++ version)\n";
ntof = new double[nmax-nmin+1];
addtbl = new int[-tblbnd+1];
subtbl = new int[-tblbnd+1];
// char filename[257];
// string filename ;
// ifstream ifs;
// ifs.open(filename.c_str());
// if (!ifs)
// {
int i;
std::cerr << "Building integer logs conversion tables\n";
ntof[0] = 0 ;
for (i=nmin+1; i<=nmax; ++i)
{
double x = i;
ntof[i-nmin] = exp(x*logb2);
}
for (i=tblbnd; i<=0; ++i)
{
double x = 1.0 + pow(b, i);
addtbl[i-tblbnd] = round(log(x)/logb2);
}
double sqrtb = exp(0.5*logb2);
for (i=0; i<=-tblbnd; ++i)
{
double x = sqrtb * pow(b, i) - 1.0;
subtbl[i] = round(log(x)/logb2);
}
// if (toolsRoot)
// {
// ofstream ofs(filename.c_str());
// if (!ofs)
// cerr << "Could not write LogProb data to " << filename << endl;
// else
// {
// ofs.write((const char *)ntof, sizeof(double) * (nmax-nmin+1));
// ofs.write((const char *)addtbl, sizeof(int) * (-tblbnd+1));
// ofs.write((const char *)subtbl, sizeof(int) * (-tblbnd+1));
// }
// }
// }
// else
// {
// ifs.read((char *)ntof, sizeof(double) * (nmax - nmin + 1));
// ifs.read((char *)addtbl, sizeof(int) * (-tblbnd+1));
// ifs.read((char *)subtbl, sizeof(int) * (-tblbnd+1));
// }
return 1;
}
void LogProb::FreeTables()
{
delete [] addtbl;
delete [] subtbl;
delete [] ntof;
}
//---------------------------------------------------------------------------
// Aritmetic operators
//---------------------------------------------------------------------------
// Subtract two logarithm numbers. Use the following method:
// b**n - b**m = b**m( b**(n-m) - 1 ), assuming n >= m.
LogProb& LogProb::operator-=(const LogProb &subs)
{
if (subs.logr == zeron)
return *this;
int a = logr - subs.logr;
if (a <= 0)
{
if (a < 0)
{
std::cerr << "WARNING(logprob): Invalid arguments to nsub" <<(*this)<< " " << subs << std::endl;
//abort();
}
logr = zeron;
return *this;
}
if (a > -tblbnd)
return *this;
logr = subs.logr + subtbl[a];
return *this;
}

View File

@ -0,0 +1,217 @@
/*
EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef _LOGPROB_H
#define _LOGPROB_H
// Routines to perform integer exponential arithmetic.
// A number x is represented as n, where x = b**n
// It is assumed that b > 1, something like b = 1.001
#include <iostream>
#include <cmath>
#include <algorithm>
//#define MAX(A,B) ((A) > (B) ? (A) : (B))
//#define MIN(A,B) ((A) > (B) ? (B) : (A))
class LogProb {
public:
// mj for cross entropy
double base2() const {
return (logr * logb2 / log(2));
}
// Constructors
LogProb() : logr(zeron) {}
LogProb(const LogProb &obj) : logr(obj.logr) {}
LogProb(double x) : logr(x == 0.0 ? zeron : round(log(x)/logb2)) {}
// destructor
~LogProb() {} // default destructor
operator double() const // converts logr to (double) b**logr
{
if (logr < nmin) return ntof[0];
if (logr > nmax) return ntof[nmax-nmin];
return ntof[logr-nmin];
}
LogProb &operator=(const LogProb &obj) { logr = obj.logr; return *this; }
int operator!() const { return logr == zeron; }
// iostream friend specifications
friend std::ostream& operator<<(std::ostream& os, const LogProb &obj);
friend std::istream& operator>>(std::istream& is, LogProb &obj);
friend std::ostream& operator<<=(std::ostream& os, const LogProb &obj);
friend std::istream& operator>>=(std::istream& is, LogProb &obj);
// arithmetic operators
LogProb &operator+=(const LogProb &add) // logr2 = logb ( b**logr2 + b**logr1 )
// Add two numbers represented as logarithms. Use the following method:
// b**n + b**m = b**n(1 + b**(m-n)), assuming n >= m.
{
if (add.logr == zeron)
return *this;
if (logr == zeron)
{
logr = add.logr;
return *this;
}
int a = add.logr - logr;
if (a > 0)
{
a = -a;
logr = add.logr;
}
if (a < tblbnd)
return *this;
logr += addtbl[a-tblbnd];
return *this;
}
LogProb &operator-=(const LogProb &); // logr2 = logb ( b**logr2 + b**logr1 )
LogProb operator*(const LogProb &mul) const // logr3 = logr2 + logr1
{
LogProb result; // start out with result == 0
if ((logr != zeron) && (mul.logr != zeron))
result.logr = std::max(logr+mul.logr, zeron);
return result;
}
LogProb operator*(double x) const // logr3 = logr2 + logr1
{
return (*this)*(LogProb)x;
}
LogProb operator^(const int i) const // logr2 = logr1 * i
{
LogProb result; // start out with result == 0
// if ((logr != zeron) && (mul.logr != zeron))
result.logr = logr * i ;
return result;
}
LogProb &operator*=(const LogProb &mul) // logr2 += logr1
{
if ((logr == zeron) || (mul.logr == zeron))
logr = zeron;
else
logr = std::max(logr+mul.logr, zeron);
return *this;
}
LogProb operator/(const LogProb &div) const // logr3 = logr2 -logr1
{
LogProb result;
if (logr != zeron)
result.logr = std::max(logr - div.logr, zeron);
return result;
}
LogProb &operator/=(const LogProb &div) // logr2 -= logr1
{
if (logr != zeron)
logr = std::max(logr - div.logr, zeron);
return *this;
}
LogProb operator+(const LogProb &l) const // logr3 = logb ( b**logr2 + b**logr1 )
{ LogProb result(*this); result += l; return result; }
LogProb operator-(const LogProb &l) const // logr3 = logb ( b**logr2 - b**logr1 )
{ LogProb result(*this); result -= l; return result; }
LogProb power(const int n) const // logr2 = logr1 * int
{ LogProb result(*this); result.logr *= n; return result; }
// Conditional operators
int operator<(const LogProb &obj) const { return logr < obj.logr; }
int operator<=(const LogProb &obj) const { return logr <= obj.logr; }
int operator>(const LogProb &obj) const { return logr > obj.logr; }
int operator>=(const LogProb &obj) const { return logr >= obj.logr; }
int operator==(const LogProb &obj) const { return logr == obj.logr; }
int operator!=(const LogProb &obj) const { return logr != obj.logr; }
int operator<(double d) const { return ((double)*this) < d; }
int operator<=(double d) const { return ((double)*this) <= d; }
int operator>(double d) const { return ((double)*this) > d; }
int operator>=(double d) const { return ((double)*this) >= d; }
int operator==(double d) const { return ((double)*this) == d; }
int operator!=(double d) const { return ((double)*this) != d; }
LogProb &SetZero() { logr = zeron; return *this; } // representation of 0,
LogProb &SetOne() { logr = onen; return *this; } // 1, and
LogProb &SetInf() { logr = infn; return *this; } // inf in logarithm domain
private:
int logr; // a representation of logarithm
// static constants
static const int initialized; // initialization flag
static const double b;
static const double logb2;
static const int nmin, nmax;
static const int tblbnd;
static const int zeron, onen, infn; // zero, one, and inf in log domain
static const int max_2byte_integer, min_2byte_integer;
// Arithmetic computation Tables
static double *ntof;
static int *addtbl;
static int *subtbl;
static int Initialize();
public:
static void FreeTables();
// constants for initializing LogProbs to 0 or 1
static const LogProb zero;
static const LogProb one;
static const LogProb minus2;
static const LogProb minus4;
static const LogProb minus6;
static const LogProb minus8;
static const LogProb minus10;
static const LogProb minus12;
static const LogProb minus14;
static const LogProb minus16;
};
// iostream friend operators
inline std::ostream &operator<<(std::ostream& os, const LogProb &obj)
{
return os << (double) obj; // output in linear domain, b**logr
}
inline std::istream &operator>>(std::istream& is, LogProb &obj)
{
double d;
is >> d;
obj = d;
return is;
}
inline std::ostream &operator<<=(std::ostream& os, const LogProb &obj) // write binary
{
os.write((const char *)&obj.logr, sizeof(obj.logr));
return os;
}
inline std::istream &operator>>=(std::istream& is, LogProb &obj)
{
is.read((char *)&obj.logr, sizeof(obj.logr));
return is;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,370 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef ARRAY_H_DEFINED
#define ARRAY_H_DEFINED
using namespace std;
#include "myassert.h"
#include <algorithm>
#include <string>
#include <utility>
#include <functional>
#include "my.h"
#define ARRAY_DEBUG
template<class T> class Array
{
private:
T *p;
int realSize;
int maxWritten;
char a;
void copy(T *a,const T *b,int n);
void copy(T *a,T *b,int n);
void _expand();
public:
Array()
: p(0),realSize(0),maxWritten(-1) ,a(1)
{
#ifdef VERY_ARRAY_DEBUG
cout << "MAKE ARRAY: " << this<<" "<<(void*)p << endl;
#endif
}
Array(const Array<T> &x)
: p(new T[x.maxWritten+1]),realSize(x.maxWritten+1),maxWritten(x.maxWritten),a(x.a)
{
copy(p,x.p,realSize);
#ifdef VERY_ARRAY_DEBUG
cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< endl;
#endif
}
explicit Array(int n)
: p(new T[n]),realSize(n),maxWritten(n-1),a(0)
{
#ifdef VERY_ARRAY_DEBUG
cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
}
Array(int n,const T&_init,int _a=0)
: p(new T[n]),realSize(n),maxWritten(n-1),a(_a)
{
for(int iii=0;iii<n;iii++)p[iii]=_init;
#ifdef VERY_ARRAY_DEBUG
cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
}
~Array()
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
delete [] p;
}
Array<T>& operator=(const Array<T>&x)
{
if( this!= &x )
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
delete [] p;
realSize = x.maxWritten+1;
maxWritten = x.maxWritten;
a = x.a;
p = new T[realSize];
copy(p,x.p,realSize);
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
}
return *this;
}
Array<T>& operator=(Array<T>&x)
{
if( this!= &x )
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
delete [] p;
realSize = x.maxWritten+1;
maxWritten = x.maxWritten;
a = x.a;
p = new T[realSize];
copy(p,x.p,realSize);
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
}
return *this;
}
void allowAccess(int n)
{
while( realSize<=n )
_expand();
maxWritten=max(maxWritten,n);
massert( maxWritten<realSize );
}
void resize(int n)
{
while( realSize<n )
_expand();
maxWritten=n-1;
}
void sort(int until=-1)
{
if( until== -1 ) until=size();
std::sort(p,p+until);
}
void invsort(int until=-1)
{
if( until== -1 ) until=size();
std::sort(p,p+until,greater<T>());
}
void init(int n,const T&_init,bool _a=0)
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
delete []p;
p=new T[n];
realSize=n;
a=_a;
maxWritten=n-1;
for(int iii=0;iii<n;iii++)p[iii]=_init;
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
}
inline int size() const
{massert( maxWritten<realSize );
return maxWritten+1;}
inline int low() const
{ return 0; }
inline int high() const
{ return maxWritten; }
inline bool autoexpand() const
{return a;}
inline void autoexpand(bool autoExp)
{a=autoExp;}
int findMax() const;
int findMin() const;
const void errorAccess(int n) const;
inline T*getPointerToData(){return p;}
inline T& operator[](int n)
{
if( a && n==maxWritten+1 )
allowAccess(n);
if( n<0 || n>maxWritten )
errorAccess(n);
return p[n];
}
inline const T& operator[](int n) const
{
if(n<0 || n>maxWritten )
errorAccess(n);
return p[n];
}
const T&top(int n=0) const
{return (*this)[maxWritten-n];}
T&top(int n=0)
{return (*this)[maxWritten-n];}
T&push(const T&x)
{
(*this)[maxWritten+1]=x;
return top();
}
bool writeTo(ostream&out) const
{
out << "Array ";
out << size() << " ";
out << a << endl;
for(int iv=0;iv<=maxWritten;iv++)
{
writeOb(out,(*this)[iv]);
out << endl;
}
return 1;
}
bool readFrom(istream&in)
{
string s;
if( !in )
{
cerr << "ERROR(Array): file cannot be opened.\n";
return 0;
}
in >> s;
if( !(s=="Array") )
{
cerr << "ERROR(Array): Array!='"<<s<<"'\n";
return 0;
}
int biggest;
in >> biggest;
in >> a;
resize(biggest);
for(int iv=0;iv<size();iv++)
{
readOb(in,(*this)[iv]);
}
return 1;
}
};
template<class T> bool operator==(const Array<T> &x, const Array<T> &y)
{
if( &x == &y )
return 1;
else
{
if( y.size()!=x.size() )
return 0;
else
{
for(int iii=0;iii<x.size();iii++)
if( !(x[iii]==y[iii]) )
return 0;
return 1;
}
}
}
template<class T> bool operator<(const Array<T> &x, const Array<T> &y)
{
if( &x == &y )
return 0;
else
{
if( y.size()<x.size() )
return !(y<x);
for(int iii=0;iii<x.size();iii++)
{
massert( iii!=y.size() );
if( x[iii]<y[iii] )
return 1;
else if( y[iii]<x[iii] )
return 0;
}
return x.size()!=y.size();
}
}
template<class T> const void Array<T>:: errorAccess(int n) const
{
cerr << "ERROR: Access to array element " << n
<< " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
cout << "ERROR: Access to array element " << n
<< " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
massert(0);
#ifndef DEBUG
abort();
#endif
}
template<class T> ostream& operator<<(ostream&o,const Array<T>&a)
{
o << "Array(" << a.size() << "," << a.autoexpand() << "){ ";
for(int iii=0;iii<a.size();iii++)
o << " " << iii<< ":" << a[iii]<<";";
return o << "}\n";
}
template<class T> istream& operator>>(istream&in, Array<T>&)
{return in;}
template<class T> int Hash(const Array<T>&a)
{
int n=0;
for(int iii=0;iii<a.size();iii++)
n+=Hash(a[iii])*(iii+1);
return n+a.size()*47;
}
template<class T> void Array<T>::copy(T *aa,const T *bb,int n)
{
for(int iii=0;iii<n;iii++)
aa[iii]=bb[iii];
}
template<class T> void Array<T>::copy(T *aa,T *bb,int n)
{
for(int iii=0;iii<n;iii++)
aa[iii]=bb[iii];
}
template<class T> void Array<T>::_expand()
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
T *oldp=p;
int oldsize=realSize;
realSize=realSize*2+1;
p=new T[realSize];
copy(p,oldp,oldsize);
delete [] oldp;
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
#endif
}
template<class T> int Array<T>::findMax() const
{
if( size()==0 )
return -1;
else
{
int maxPos=0;
for(int iii=1;iii<size();iii++)
if( (*this)[maxPos]<(*this)[iii] )
maxPos=iii;
return maxPos;
}
}
template<class T> int Array<T>::findMin() const
{
if( size()==0 )
return -1;
else
{
int minPos=0;
for(int iii=1;iii<size();iii++)
if( (*this)[iii]<(*this)[minPos] )
minPos=iii;
return minPos;
}
}
#endif

View File

@ -0,0 +1,287 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef FIXARRAY_H_DEFINED
#define FIXARRAY_H_DEFINED
#include <iostream>
#include <string>
#include <functional>
template<class T>
bool writeOb(ostream&out,const T&f)
{
out << f << " ";
return 1;
}
template<class T>
bool readOb(istream&in,T&f)
{
in >> f;
char c;
in.get(c);
massert(c==' ');
return 1;
}
template<class T>
bool writeOb(ostream&out,const string &s,const T&f)
{
out << s << " " << f << " ";
return 1;
}
template<class T>
bool readOb(istream&in,const string&s,T&f)
{
string ss;
in >> ss;
if( s!=ss )
{
cerr << "ERROR: readOb should be '" << s << "' and is '" << ss << "'" << endl;
return 0;
}
in >> f;
char c;
in.get(c);
massert(c==' ');
return 1;
}
template<class T> class FixedArray
{
private:
void copy(T *aa,const T *bb,int nnn)
{for(int iii=0;iii<nnn;iii++)aa[iii]=bb[iii];}
public:
T *p;
int realSize;
FixedArray()
: p(0),realSize(0){}
FixedArray(const FixedArray<T> &x)
: p(new T[x.realSize]),realSize(x.realSize) {copy(p,x.p,realSize);}
explicit FixedArray(int n)
: p(new T[n]),realSize(n){}
FixedArray(int n,const T&_init)
: p(new T[n]),realSize(n){for(int z=0;z<n;z++)p[z]=_init;}
FixedArray(const FixedArray&f,const T&t)
: p(new T[f.size()+1]),realSize(f.size()+1){for(int z=0;z<f.size();z++)p[z]=f[z];p[f.size()]=t;}
~FixedArray()
{ delete [] p;p=0;realSize=-1;}
FixedArray<T>& operator=(const FixedArray<T>&x)
{
if( this!= &x )
{
delete [] p;
realSize = x.realSize;
p = new T[x.realSize];
copy(p,x.p,realSize);
}
return *this;
}
void resize(int n)
{
if( n<=realSize )
shrink(n);
else
{
T*np=new T[n];
copy(np,p,realSize);
delete []p;
p=np;
realSize=n;
}
}
void shrink(int n)
{
assert(n<=realSize);
realSize=n;
}
void init(int n,const T&_init)
{
delete []p;
p=new T[n];
realSize=n;
for(int l=0;l<n;l++)p[l]=_init;
}
inline const T&top(int n=0) const
{return (*this)[realSize-1-n];}
inline int size() const
{return realSize;}
inline T*begin(){ return p; }
inline T*end(){ return p+realSize; }
inline const T*begin()const{ return p; }
inline const T*end()const{return p+realSize;}
inline int low() const
{return 0;}
inline int high() const
{return realSize-1;}
const void errorAccess(int n) const;
inline T& operator[](int n)
{
return p[n];
}
inline const T& operator[](int n) const
{
return p[n];
}
bool writeTo(ostream&out) const
{
out << "FixedArray ";
out << size() << " ";
for(int a=0;a<size();a++)
{
writeOb(out,(*this)[a]);
out << " ";
}
out << endl;
return 1;
}
bool readFrom(istream&in)
{
string s;
if( !in )
{
cerr << "ERROR(FixedArray): file cannot be opened.\n";
return 0;
}
in >> s;
if( !(s=="FixedArray") )
{
cerr << "ERROR(FixedArray): FixedArray!='"<<s<<"'\n";
return 0;
}
int biggest;
in >> biggest;
resize(biggest);
for(int a=0;a<size();a++)
readOb(in,(*this)[a]);
return 1;
}
void sort(int until=-1)
{
if( until== -1 ) until=size();
std::sort(p,p+until);
}
void invsort(int until=-1)
{
if( until== -1 ) until=size();
std::sort(p,p+until,greater<T>());
}
int binary_locate(const T&t)
{
T*ppos=std::lower_bound(p,p+size(),t);
int pos=ppos-p;
if( pos>=-1&&pos<size() )
return pos;
else
return -1;
}
int binary_search(const T&t)
{
T*ppos=std::lower_bound(p,p+size(),t);
int pos=ppos-p;
if( pos>=0&&pos<size()&& *ppos==t )
return pos;
else
return -1;
}
typedef T* iterator;
typedef const T* const_iterator;
};
template<class T> bool operator<(const FixedArray<T> &x, const FixedArray<T> &y)
{
return lexicographical_compare(x.begin(),x.end(),y.begin(),y.end());
}
template<class T> bool operator==(const FixedArray<T> &x, const FixedArray<T> &y)
{
if( &x == &y )return 1;
const int s = x.size();
if( s !=y.size() )return 0;
for(int iii=0;iii<s;iii++)
if( !(x.p[iii]==y.p[iii]) )
return 0;
return 1;
}
template<class T> int Hash(const FixedArray<T>&a)
{
int n=0;
const int s=a.size();
for(int iii=0;iii<s;iii++)
n=13*n+Hash(a.p[iii]);
return n;
}
template<class T> const void FixedArray<T>:: errorAccess(int n) const
{
massert(0);
cerr << "ERROR: Access to array element " << n
<< " (" << realSize << "," << (void*)p << ")\n";
}
template<class T> ostream& operator<<(ostream&o,const FixedArray<T>&a)
{
o << "FixedArray(" << a.size() << "){ ";
for(int iii=0;iii<a.size();iii++)
o << " " << iii<< ":" << a[iii]<<";";
return o << "}\n";
}
template<class T> istream& operator>>(istream&in, FixedArray<T>&)
{ return in;}
template<class T> FixedArray<T> operator+(const FixedArray<T>&a,const FixedArray<T>&b)
{
massert(a.size()==b.size());
FixedArray<T> x(a.size());
for(int iii=0;iii<a.size();iii++)
x[iii]=a[iii]+b[iii];
return x;
}
template<class T> FixedArray<T> operator|(const FixedArray<T>&aaa,const FixedArray<T>&bbb)
{
iassert(aaa.size()==bbb.size());
FixedArray<T> xxx(aaa.size());
for(int iii=0;iii<aaa.size();iii++)
xxx.p[iii]=aaa.p[iii]||bbb.p[iii];
return xxx;
}
#endif

View File

@ -0,0 +1,48 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef CLASS_FlexArray_defined
#define CLASS_FlexArray_defined
#include "FixedArray.h"
template<class T>
class FlexArray
{
private:
FixedArray<T> p;
int start,end;
public:
FlexArray(int _start=0,int _end=-1)
: p(_end-_start+1),start(_start),end(_end) {}
T&operator[](int i)
{return p[i-start];}
const T&operator[](int i)const
{returnp[i-start];}
int low()const{return start;}
int high()const{return end;}
};
#endif

View File

@ -0,0 +1,159 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "GDAOptimization.h"
#include "ProblemTest.h"
#include <cmath>
#define GDAOptimization GDAOptimization
#define IterOptimization IterOptimization
double GDAOptimization::defaultTemperatur=1e100;
double GDAOptimization::defaultAlpha=0.001;
GDAOptimization::GDAOptimization(Problem &p,int m)
: IterOptimization(p,m) ,temperatur(defaultTemperatur),alpha(defaultAlpha)
{
}
GDAOptimization::GDAOptimization(Problem &p,double t,double a,int m)
: IterOptimization(p,m) ,temperatur(t) ,alpha(a)
{
}
GDAOptimization::GDAOptimization(GDAOptimization &o)
: IterOptimization(o)
{
temperatur = o.temperatur;
alpha = o.alpha;
gdaEndFlag = o.gdaEndFlag;
}
void GDAOptimization::zInitialize()
{
IterOptimization::zInitialize();
if(temperatur==1e100)
{
double v=problem.value();
temperatur=v;
}
assert(alpha>=0);
}
short GDAOptimization::accept(double delta)
{
if( curValue + delta < temperatur )
return 1;
else
return 0;
}
void GDAOptimization::abkuehlen()
{
double newTemperatur = temperatur - alpha*(temperatur - curValue);
if( fabs(temperatur - newTemperatur)<1e-30 )
gdaEndFlag=1;
else
gdaEndFlag=0;
temperatur = newTemperatur;
}
short GDAOptimization::end()
{
return ( endFlag>0 ) && ( gdaEndFlag );
}
void GDAOptimization::makeGraphOutput()
{
IterOptimization::makeGraphOutput();
*GraphOutput << temperatur-curValue;
}
double GDAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
int optimierungsschritte,int print)
{
if(typ!=1)
{
cerr << "Error: wrong parameter-type in GDAOptimization::optimizeValue ("
<< typ << ")\n";
exit(1);
}
else
{
double bestPar=-1,best=1e100;
double now;
if( print )
cout << "#GDA-optimizeValues: " << numParameter<<endl;
defaultTemperatur=1e100;
for(int i=0;i<=numParameter;i++)
{
StatVar end,laufzeit,init;
defaultAlpha = pow(pow(200,1.0/numParameter),i)*0.002;
solveProblem(0,p,proParameter,optimierungsschritte,GDA_OPT,now,end,
laufzeit,init);
if( best>now )
{
best=now;
bestPar=defaultAlpha;
}
if( print )
{
cout << defaultAlpha <<" ";
cout << end.getMean() << " " << end.quantil(0.2) << " "
<< end.quantil(0.79) << " " << laufzeit.getMean() << " "
<< end.quantil(0.0) << " " << end.getSigma() << " "
<< end.getSigmaSmaller()<< " "<< end.getSigmaBigger()<< endl;
}
}
if( print )
cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit"
" Bester Sigma SigmaSmaller SigmaBigger\n";
defaultAlpha=0.03;
return bestPar;
}
return 1e100;
}

View File

@ -0,0 +1,80 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef GDAOPTIMIZATION
#define GDAOPTIMIZATION
#include "IterOptimization.h"
class GDAOptimization : public IterOptimization
{
private:
double temperatur;
double alpha;
short gdaEndFlag;
protected:
virtual void zInitialize();
virtual short accept(double delta);
virtual void abkuehlen();
virtual short end();
virtual void makeGraphOutput();
public:
GDAOptimization(Problem &p,double temperatur,double alpha,
int maxIter=-1);
GDAOptimization(Problem &p,int maxIter=-1);
GDAOptimization(GDAOptimization &o);
static double optimizeValue(Problem &p,int proParameter,
int numParameter,int typ,int schritte= -1,int verbose=1);
static double defaultTemperatur;
static double defaultAlpha;
};
#endif

View File

@ -0,0 +1,57 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "HCOptimization.h"
HCOptimization::HCOptimization(Problem &p,int m)
: IterOptimization(p,m)
{
if( maxStep<=0 )
maxStep=(int)(problem.expectedNumberOfIterations());
}
HCOptimization::HCOptimization(HCOptimization &o)
: IterOptimization(o)
{
}
short HCOptimization::accept(double delta)
{
if( delta < 0 )
return 1;
else
return 0;
}
short HCOptimization::end()
{
return endFlag>0;
}
void HCOptimization::abkuehlen()
{
}

View File

@ -0,0 +1,54 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef HCOPTIMIZATION
#define HCOPTIMIZATION
#include "IterOptimization.h"
class HCOptimization : public IterOptimization
{
protected:
virtual short accept(double delta);
virtual void abkuehlen();
virtual short end();
public:
HCOptimization(Problem &p,int maxIter=-1);
HCOptimization(HCOptimization &o);
};
#endif

View File

@ -0,0 +1,199 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "IterOptimization.h"
#include "ProblemTest.h"
ostream *GraphOutput;
IterOptimization::IterOptimization(Problem& p,int m)
: maxNonBetterIterations(0),problem(p),maxStep(m),initialisiert(0)
{
}
IterOptimization::IterOptimization(IterOptimization& o) : Optimization(),problem(o.problem)
{
maxNonBetterIterations=o.maxNonBetterIterations;
curValue = o.curValue;
bestStep = o.bestStep;
bestValue = o.bestValue;
maxStep = o.maxStep;
initialisiert = o.initialisiert;
endFlag = o.endFlag;
endFlag2 = o.endFlag2;
}
double IterOptimization::minimize(int steps)
{
if( !initialisiert )
zInitialize();
if( steps==0 )
return curValue;
int t=0;
int every=(steps<0)?10000:(steps/1000+1);
do
{
curStep++;
t++;
if(verboseMode&&(curStep%1000==0))
{
if(steps>0)
cout << "Processed: " << 100.0*(curStep/(double)max(maxStep,1)) << " percent. (IterOptimization run) "
<< curValue << " max:" << maxStep << " " << steps << " \r";
else
cout << "In step:" << curStep << " currentValue: " << curValue
<< " bestValue: " << bestValue-curValue << " " << curStep-bestStep << ". \r";
cout.flush();
}
ProblemChange *change= &(problem.change());
double delta=problem.valueChange(*change);
abkuehlen();
if( accept(delta) )
{
problem.doChange(*change);
curValue+=delta;
if( curValue<bestValue-1e-10 )
{
bestValue=curValue;
bestStep=curStep;
endFlag2=endFlag=0;
}
if( verboseMode>1 )
cout<<"in step: "<<curStep<<" accepted with : "<<delta<<endl;
}
if(curStep - bestStep>maxNonBetterIterations && maxNonBetterIterations>0)
endFlag=1;
if(curStep - bestStep>2*maxNonBetterIterations && maxNonBetterIterations>0)
endFlag2=1;
if( GraphOutput&&((curStep%every)==0) )
{
makeGraphOutput();
*GraphOutput<<" "<<delta<<endl;
}
delete change;
} while( t!=steps && (!end()) && (!problem.endCriterion()) );
if( GraphOutput)
{
makeGraphOutput();
*GraphOutput<<endl;
}
return curValue;
}
void IterOptimization::zInitialize()
{
initialisiert=1;
bestValue=curValue=problem.value();
maxNonBetterIterations=problem.maxNonBetterIterations();
bestStep=curStep=0;
endFlag2=endFlag=0;
}
void IterOptimization::makeGraphOutput()
{
*GraphOutput << curStep << " " <<curValue << " ";
}
double IterOptimizationOptimizeParameter(Problem &p,
double &parameter,double min,double max,
int nRun,int nPar,int verfahren,
double &bv)
{
if( nPar<=0 )
return (max+min)/2;
StatVar end1,time1,init1;
StatVar end2,time2,init2;
double mean1,mean2;
double par1,par2;
parameter = par1 = min + (max-min)/3;
solveProblem(0,p,nRun,-1,verfahren,mean1,end1,time1,init1);
cout << parameter << " " << mean1 << " " << end1.quantil(0.0) << " " << end1.quantil(1.0) << endl;
parameter = par2 = min + 2*(max-min)/3;
solveProblem(0,p,nRun,-1,verfahren,mean2,end2,time2,init2);
cout << parameter << " " << mean2 << " " << end2.quantil(0.0) << " " << end2.quantil(1.0) << endl;
double bestPar,bestVal;
if(mean1<mean2)
{
bestVal = mean1;
bestPar=IterOptimizationOptimizeParameter(p,parameter,min,min+2*(max-min)/3,nRun,nPar-2,verfahren,bestVal);
}
else
{
bestVal = mean2;
bestPar=IterOptimizationOptimizeParameter(p,parameter,min+(max-min)/3,max,nRun,nPar-2,verfahren,bestVal);
}
if( mean1<bestVal&&mean1<=mean2 )
{
bv = mean1;
return par1;
}
else if(mean2<bestVal && mean2<=mean1)
{
bv = mean2;
return par2;
}
else
{
bv = bestVal;
return bestPar;
}
}

View File

@ -0,0 +1,123 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef ITEROPTIMIZATION
#define ITEROPTIMIZATION
#include "Optimization.h"
#define ANZ_VERSCHLECHTERUNGEN 500
extern ostream *GraphOutput;
class IterOptimization : public Optimization
{
private:
int maxNonBetterIterations;
protected:
Problem &problem;
int curStep;
double curValue;
int bestStep;
double bestValue;
int maxStep;
int initialisiert;
short endFlag;
short endFlag2;
virtual void makeGraphOutput();
virtual short end()=0;
virtual void abkuehlen()=0;
virtual short accept(double delta)=0;
virtual void zInitialize();
public:
IterOptimization(Problem &p,int maxIter=-1);
IterOptimization(IterOptimization &o);
virtual double minimize(int steps=-1);
inline int getCurStep();
inline double getCurrentValue();
inline const Problem& getProblem();
};
double IterOptimizationOptimizeParameter(Problem &p,
double &parameter,double min,double max,
int nRun,int nPar,int verfahren,double &bv);
inline int IterOptimization::getCurStep()
{
return curStep;
};
inline double IterOptimization::getCurrentValue()
{
return curValue;
};
inline const Problem& IterOptimization::getProblem()
{
return problem;
};
#endif

View File

@ -0,0 +1,439 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef KATEG_OPT_H
#define KATEG_OPT_H
#include <string>
#include <stdlib.h>
#include "Problem.h"
extern double rhoLo;
typedef int Kategory;
typedef int Word;
#ifdef FREQTYPE_DOUBLE
typedef double FreqType;
#else
typedef int FreqType;
#endif
#include "KategProblemWBC.h"
#include "KategProblemKBC.h"
enum {
INIT_RAN=1,
INIT_AIO=2,
INIT_LWRW=3,
INIT_FREQ=4,
INIT_OTHER=5
};
enum {
W_RAN=(8|16),
W_DET_DECR=(16),
W_DET_INCR =(32)
};
#define CHOOSE_WORD (8|16|32)
enum {
K_DET=(64),
K_RAN=(128),
K_BEST=(64|128)
};
#define CHOOSE_KAT (64|128)
enum {
CRITERION_ML=0,
CRITERION_LO=1,
CRITERION_MY=2
};
class NWG
{
private:
Array<FreqType> freq;
Array<int> timeOfFreq;
int curTime;
public:
NWG(int n);
void init();
int anzNot0;
Array<int> not0;
int word;
inline void addFreq(int C,FreqType n);
void sort();
FreqType getFreq(int i)
{
if( timeOfFreq[i]==curTime )
return freq[i];
else
return 0;
};
};
inline void NWG::addFreq(int g,FreqType n)
{
if(timeOfFreq[g]==curTime)
freq[g]+=n;
else
{
timeOfFreq[g]=curTime;
freq[g]=n;
not0[anzNot0++]=g;
}
}
struct KategProblemChange : public ProblemChange
{
void *operator new(size_t size);
void operator delete(void *ptr,size_t size);
int word;
int toKat;
int fromKat;
};
class KategProblem : public Problem
{
private:
double kat_h_full(int n);
double kat_h_full(double n);
double kat_h_part(int n);
double kat_h_part(double n);
double sigmaVerfaelschung;
short katWasEmpty;
int nwgWord;
NWG nwg;
NWG ngw;
FreqType nww;
int ursprung,ziel;
Array<int> _katOfWord;
int _maxComp,_maxCompVal;
double nmo_my(int i,int j);
double nmo(int i,int j);
double nmo_lo(int i,int j,int &e0,int &e1);
void putWord(int word,int to);
void fastPutWord(int word,int to);
void setKatOfWord(int w,int k)
{
if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) )
{
cout << "mkcls::setKatOfWord::ERROR: " << w << " " << k << " " << wordFreq.fixedWord[w] << " " << (*words)[w] << endl;
}
_katOfWord[w]=k;
nwgWord=-1;
};
void fillNWG(int w);
inline FreqType nstrich(int i,int j);
void vnstrich(int i,int j);
protected:
virtual int _change(ProblemChange **p);
virtual void _doChange(ProblemChange &c);
virtual void _undoChange(ProblemChange &c);
virtual double _value();
double _valueChange(KategProblemChange &k);
virtual void incrementDirection();
virtual int maxDimensionVal(void) ;
virtual int maxDimension(void) ;
public:
leda_array<string> *words;
typedef leda_set<int> intSet;
leda_array<intSet> *kats;
KategProblemWBC wordFreq;
KategProblemKBC katFreq;
Array<int> initLike;
KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
int _nachbarschaft,int minw=0);
virtual ~KategProblem();
virtual void _initialize(int initTyp);
virtual void _initialize(int initTyp,int specialFixedWord);
virtual double valueChange(ProblemChange&c);
virtual Problem *makeEqualProblem();
virtual double nicevalue(double value=1e100);
void makeKats();
virtual void dumpOn(ostream &strm);
virtual void dumpInfos(ostream &strm);
inline void katwahl(int k);
inline void wortwahl(int w);
inline int katOfWord(int w);
inline short wortwahl();
inline short katwahl() ;
virtual int maxNonBetterIterations();
virtual int expectedNumberOfIterations();
const char *getString(int i);
string getTheString(int i);
void makeTitle(char x[512]);
void fixInitLike();
};
inline int KategProblem::katOfWord(int w){return _katOfWord[w];};
inline short KategProblem::wortwahl(){return nachbarschaft&CHOOSE_WORD;};
inline short KategProblem::katwahl() {return nachbarschaft&CHOOSE_KAT;};
inline void KategProblem::katwahl(int k)
{
nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k;
if(k==K_BEST)
_maxCompVal=1;
else
_maxCompVal=katFreq.nKats-2;
};
inline void KategProblem::wortwahl(int w)
{
nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w;
};
inline FreqType KategProblem::nstrich(int i,int j)
{
FreqType n=0;
if( i==ursprung )
n-=nwg.getFreq(j);
if( i==ziel )
n+=nwg.getFreq(j);
if( j==ursprung )
n-=ngw.getFreq(i);
if( j==ziel )
n+=ngw.getFreq(i);
if( i==ursprung && j==ursprung )
n+=nww;
if( i==ziel && j==ziel )
n+=nww;
if( i==ursprung && j==ziel )
n-=nww;
if( i==ziel && j==ursprung )
n-=nww;
return n;
}
#define MAX_H_TABLE 4000
extern double h_table[],l_table[],hmy_table[],hmy_sigma;
inline double kat_mlog(double x)
{
if(x<=1e-9)
return 0;
else
return log(x);
}
inline double kat_mlog(int s)
{
if(s<=0)
return 0;
else if( s<MAX_H_TABLE )
{
massert( s==0 || l_table[s]==log(s) );
return l_table[s];
}
else
return log((double)(s));
}
inline double kat_hlo(int n)
{
return n*kat_mlog(n-1);
}
inline double kat_hlo(double n)
{
return n*kat_mlog(n-1);
}
inline double kat_h(int n)
{
massert(n>=-1);
if(n<=0)
return 0;
else
if(n<MAX_H_TABLE)
{
massert(n==0||fabs(h_table[n]-n*log((double)n))<1e-8);
return h_table[n];
}
else
return n*log((double)(n));
}
inline double kat_h(double n)
{
if(n<=1e-9)
return 0;
else
return n*log(n);
}
inline double kat_etaFkt(int _e0,int e1,int immer0,int cats)
{
int e0 = _e0 - immer0;
int ePlus = cats*cats - _e0;
if(cats*cats-e0>1)
return e1*log( (ePlus-1.0)/(e0+1.0)*rhoLo );
else
return 0;
}
double mkat_h_full(int n,double tf);
double mkat_h_part(int n,double cf);
int Hash(const string& s);
#endif

View File

@ -0,0 +1,243 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include <cstdlib>
#include "KategProblem.h"
double rhoLo=0.75;
#define MAX_VERFAELSCHUNG 5000
double verfTab[MAX_VERFAELSCHUNG],verfTabSigma=-1.0;
double verfaelsche(int a,double b)
{
if( a>=0&&verfTabSigma==b&&a<MAX_VERFAELSCHUNG )
{
massert(verfTab[a]== b*(erf(10000.0) - erf(a/b))/2+a);
return verfTab[a];
}
else
{
double x = b*(erf(10000.0) - erf(a/b))/2+a;
return x;
}
}
double verfaelsche(double,double b)
{
abort();
return b;
}
KategProblemKBC::KategProblemKBC(int s,double sv) :
_n(s),_n1(s,0),_n2(s,0),sigmaVerfaelschung(sv),withVerfaelschung(sv!=0.0),
_nverf(s),_n1verf(s,0.0),_n2verf(s,0.0),_nWords(0),
eta0(s*s),eta1(0),c1_0(s),c2_0(s),
_bigramVerfSum(0.0),_unigramVerfSum1(0.0),_unigramVerfSum2(0.0),nKats(s)
{
verfInit0=0.0;
int i;
if( withVerfaelschung )
{
verfInit0=verfaelsche(0,sv);
cout << "VERFAELSCHUNG wird mitgefuehrt => LANGSAMER!!!\n";
}
for(i=0;i<s;i++)
{
_n[i].init(s,0);
_nverf[i].init(s,verfInit0);
_n1verf[i]=_n2verf[i]=verfInit0;
_bigramVerfSum+=verfInit0*s;
_unigramVerfSum1+=verfInit0;
_unigramVerfSum2+=verfInit0;
}
if( withVerfaelschung )
{
cout << "VERFAELSCHUNG " << _bigramVerfSum << " " << _unigramVerfSum1 << " " << _unigramVerfSum2 << endl;
}
verfTabSigma=sigmaVerfaelschung;
}
void KategProblemKBC::setN(int w1,int w2, FreqType n)
{
addN(w1,w2,-_n[w1][w2]);
addN(w1,w2,n);
}
double KategProblemKBC::fullBewertung(int auswertung)
{
double bewertung=0;
int c1,c2;
switch( auswertung )
{
case CRITERION_ML:
for(c1=0;c1<nKats;c1++)
{
for(c2=0;c2<nKats;c2++)
bewertung-=kat_h(_n[c1][c2]);
bewertung+=kat_h(_n1[c1])+kat_h(_n2[c1]);
}
break;
case CRITERION_MY:
{
for(c1=0;c1<nKats;c1++)
{
for(c2=0;c2<nKats;c2++)
bewertung-=mkat_h_full((int)n(c1,c2),nverf(c1,c2));
bewertung+=mkat_h_part((int)(n1(c1)),n1verf(c1))+mkat_h_part((int)(n2(c1)),n2verf(c1));
}
double u1=_unigramVerfSum1-verfInit0*c1_0;
double u2=_unigramVerfSum2-verfInit0*c2_0;
double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
if( verboseMode>1 )
{
cout << "CRITERION_MY: " << bewertung << endl;
cout << "U1:"<<_unigramVerfSum1 << " n:"<<u1<< " "
<< "U2:"<<_unigramVerfSum2 << " n:"<<u2<< " "
<< "U3:"<<_bigramVerfSum << " n:"<<b<< endl;
}
if(b>0.000001)
{
if(verboseMode>1 )
cout << " NEU: " <<_nWords*log( u1 * u2 / b ) << endl;
bewertung -= _nWords*log( u1 * u2 / b );
if(verboseMode>1)
cout << "SCHLUSSBEWERTUNG: " << bewertung << endl;
}
else
cout << "B zu klein " << b << endl;
}
break;
case CRITERION_LO:
for(c1=0;c1<nKats;c1++)
{
for(c2=0;c2<nKats;c2++)
bewertung-=_n[c1][c2]*kat_mlog(_n[c1][c2]-1-rhoLo);
bewertung+=_n1[c1]*kat_mlog(_n1[c1]-1)+_n2[c1]*kat_mlog(_n2[c1]-1);
}
bewertung-=kat_etaFkt(eta0,eta1,(c1_0*nKats+c2_0*nKats-c1_0*c2_0),nKats);
break;
default:
cerr << "Error: wrong criterion " << auswertung << endl;
exit(1);
}
return bewertung;
}
double KategProblemKBC::myCriterionTerm()
{
iassert( withVerfaelschung );
double r;
double u1=_unigramVerfSum1-verfInit0*c1_0;
double u2=_unigramVerfSum2-verfInit0*c2_0;
double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
if( verboseMode>1 )
{
cout << "nwords divisor:"<<_nWords << " " << u1 * u2 / b << endl;
cout << "ergebnis: "<<_nWords*log( u1 * u2 / b ) << endl;
cout << "0: "<<c1_0 << endl;
}
r = _nWords*log( u1 * u2 / b );
return -r;
}
double KategProblemKBC::bigramVerfSum()
{
double sum=0;
for(int c1=0;c1<nKats;c1++)
for(int c2=0;c2<nKats;c2++)
sum+=nverf(c1,c2);
cout << "BIGRAMVERFSUM: " << sum << endl;
return sum;
}
double KategProblemKBC::unigramVerfSum1()
{
double sum=0;
for(int c1=0;c1<nKats;c1++)
sum+=n1verf(c1);
cout << "UNIGRAMVERFSUM1: " << sum << endl;
return sum;
}
double KategProblemKBC::unigramVerfSum2()
{
double sum=0;
for(int c1=0;c1<nKats;c1++)
sum+=n2verf(c1);
cout << "UNIGRAMVERFSUM2: " << sum << endl;
return sum;
}

View File

@ -0,0 +1,157 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef KATEGPROBLEMKBC_H
#define KATEGPROBLEMKBC_H
typedef Array<FreqType> FreqArray;
typedef Array<double> FreqArrayReal;
double verfaelsche(int a,double b);
double verfaelsche(double a,double b);
class KategProblemKBC
{
friend class KategProblem;
private:
Array<FreqArray> _n;
Array<FreqType> _n1;
Array<FreqType> _n2;
double sigmaVerfaelschung;
short withVerfaelschung;
Array<FreqArrayReal> _nverf;
Array<double> _n1verf;
Array<double> _n2verf;
FreqType _nWords;
protected:
int eta0;
int eta1;
int c1_0;
int c2_0;
double _bigramVerfSum;
double _unigramVerfSum1;
double _unigramVerfSum2;
double verfInit0;
public:
int nKats;
KategProblemKBC(int nKats,double sv);
double fullBewertung(int auswertung);
FreqType n(int w1,int w2) { return _n[w1][w2]; };
FreqType n1(int w) { return _n1[w];};
FreqType n2(int w) { return _n2[w];};
double bigramVerfSum();
double unigramVerfSum1();
double unigramVerfSum2();
double nverf(int w1,int w2) { return _nverf[w1][w2]; }
double n1verf(int w) { return _n1verf[w]; };
double n2verf(int w) { return _n2verf[w]; };
inline void addN(int w1,int w2, FreqType n);
void setN(int w1,int w2, FreqType n);
double myCriterionTerm();
};
inline void KategProblemKBC::addN(int w1,int w2, FreqType n)
{
if(n!=0)
{
FreqType &s= _n[w1][w2];
if(s==0)
eta0--;
else if(s==1)
eta1--;
if(_n1[w1]==0)
c1_0--;
if(_n2[w2]==0)
c2_0--;
if(withVerfaelschung)
{
double verfOld=verfaelsche(s,sigmaVerfaelschung);
double verfNew=verfaelsche(s+n,sigmaVerfaelschung);
double verfOld1=verfaelsche(_n1[w1],sigmaVerfaelschung);
assert(verfOld1==_n1verf[w1]);
double verfNew1=verfaelsche(_n1[w1]+n,sigmaVerfaelschung);
double verfOld2=verfaelsche(_n2[w2],sigmaVerfaelschung);
assert(verfOld2==_n2verf[w2]);
double verfNew2=verfaelsche(_n2[w2]+n,sigmaVerfaelschung);
_n1verf[w1]=verfNew1;
_unigramVerfSum1+=verfNew1-verfOld1;
_n2verf[w2]=verfNew2;
_unigramVerfSum2+=verfNew2-verfOld2;
_nverf[w1][w2]=verfNew;
_bigramVerfSum+=verfNew-verfOld;
_nWords+=n;
}
s+=n;_n1[w1]+=n;_n2[w2]+=n;
assert(_n[w1][w2]>=0);
assert(_n1[w1]>=0);
assert(_n2[w2]>=0);
if(s==0)
eta0++;
else if(s==1)
eta1++;
if(_n1[w1]==0)
c1_0++;
if(_n2[w2]==0)
c2_0++;
}
};
#endif

View File

@ -0,0 +1,700 @@
/*
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
mkcls - a program for making word classes .
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include "KategProblemTest.h"
#include "ProblemTest.h"
#include "HCOptimization.h"
#include "TAOptimization.h"
#include "RRTOptimization.h"
#include "GDAOptimization.h"
#include <cstdlib>
#include <cstdio>
#include <string>
#include <strstream>
typedef pair<string,string> PSS;
#define NEW_SENTENCE_END "mkcls-mapped-dollar-symbol-$"
#ifdef NeXT
char *strdup(char *a)
{
char *p = (char *)malloc(strlen(a)+1);
strcpy(p,a);
return p;
}
#endif
void writeClasses(Array<Kategory> &katOfWord,KategProblem &problem,ostream &to)
{
for(int i=0;i<katOfWord.size();i++)
{
if( strcmp(problem.getString(i),"$") )
if( strcmp(problem.getString(i),"mkcls-mapped-dollar-symbol-$")==0 )
to << "$" << "\t" << katOfWord[i] << endl;
else
to << problem.getString(i) << "\t" << katOfWord[i] << endl;
}
}
void mysplit(const string &s,string &s1,string &s2)
{
unsigned int i=0;
for(;i<s.length();i++)if( s[i]==' ' || s[i]=='\t' || s[i]==' ')break;
s1=s.substr(0,i);
for(;i<s.length();i++)if( !(s[i]==' ' || s[i]=='\t' || s[i]==' ') )break;
s2=s.substr(i,s.length()-i);
iassert(s1.size());
iassert(s2.size());
}
int fromCatFile(KategProblem *p,const char *fname,bool verb)
{
leda_h_array<string,int> translation(-1);
int maxCat=2;
ifstream in(fname);
if(!in)
{
cerr << "Error: File '" << fname << "' cannot be opened.\n";
exit(1);
}
for(int i=0;i<p->wordFreq.nWords;i++)
(p->initLike)[i]= -1;
translation["1"]=1;
translation["0"]=0;
string s;
while( getline(in,s) )
{
string str,categ;
mysplit(s,str,categ);
int i=p->words->binary_locate(str);
if(i>=0 && (*(p->words))[i]==str )
{
if( translation[categ]==-1 )
translation[categ]=maxCat++;
int cat=translation[categ];
if( (p->initLike)[i]!= -1 )
cerr << "Warning: Word '" << ((*(p->words))[i])<< "' is already in a category.\n";
(p->initLike)[i]=cat;
}
else
cerr << "Warning: Word '" << str << "' " << i << " is not in training corpus.\n";
}
if( verboseMode )
cout << "We have " << maxCat << " read non-empty categories"
" (with words from the corpus).\n";
if(maxCat>p->katFreq.nKats)
{
cerr << "Error: Not enough categories reserved (only "
<< p->katFreq.nKats << ", but i need " << maxCat << ").\n";
exit(1);
}
int i=p->words->binary_locate("$");
if( i>=0 && (*(p->words))[i]=="$" )
(p->initLike)[i]=0;
else
if( verboseMode )
cerr << "Warning: No '$' in vocabulary!\n";
int errors=0;
for(i=0;i<p->wordFreq.nWords;i++)
if((p->initLike)[i]== -1 )
{
if( verb ) cerr << "Error: I don't know the category of word " << i
<< " (" << (*(p->words))[i] << ") " << ".\n";
errors=1;
}
return errors;
}
KategProblem *makeKategProblem(const leda_h_array<PSS,FreqType>&cTbl,const leda_set<string>&setVokabular, int maxClass,int initialisierung,
int auswertung,int nachbarschaft,int minWordFrequency)
{
int nwrd=0;
leda_array<string>&sVok = *new leda_array<string>(setVokabular.size());
string s;
unsigned int ctr=0;
forall_set(leda_set<string>,s,setVokabular)
{
if( verboseMode>2 )
cout << "mkcls:Wort " << ctr << " " << s << endl;
sVok[ctr++]=s;
}
for(unsigned int z=0;z<ctr-1;z++)
iassert( sVok[z]<sVok[z+1] );
sVok.sort();
if( verboseMode>2 )
cout << "*****Vocabulary: " << sVok;
unsigned int vokSize=sVok.size();
massert(vokSize==ctr); massert(vokSize==setVokabular.size());
if(verboseMode)
{cout << "Size of vocabulary: " << vokSize << "\n";cout.flush();}
KategProblem *k = new KategProblem(vokSize,maxClass,initialisierung,
auswertung,nachbarschaft,minWordFrequency);
KategProblemWBC &w=k->wordFreq;
k->words=&sVok;
Array<int> after(vokSize,0);
Array<int> before(vokSize,0);
nwrd=0;
{
PSS s;
forall_defined_h2(PSS,FreqType,s,cTbl)
{
const string&ss1=s.first;
const string&ss2=s.second;
if( ss2.length()&&(ss1!="$" || ss2!="$") )
{
int i1=sVok.binary_search(ss1);
int i2=sVok.binary_search(ss2);
iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
after[i1]++;
before[i2]++;
}
if( verboseMode&&((nwrd++)%10000==0) )
{cout<<"Statistiken-1 " << nwrd<< ". \r";cout.flush();}
}
}
for(unsigned int i=0;i<vokSize;i++)
{
w.setAfterWords(i,after[i]);
w.setBeforeWords(i,before[i]);
}
{
nwrd=0;
PSS s;
forall_defined_h2(PSS,FreqType,s,cTbl)
{
const string&ss1=s.first;
const string&ss2=s.second;
FreqType p=cTbl[s];
if( ss2.length()&&(ss1!="$" || ss2!="$") )
{
int i1=sVok.binary_search(ss1);
int i2=sVok.binary_search(ss2);
iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
w.setFreq(i1,i2,p);
if( verboseMode>2 )
cout << "BIGRAMM-HAEUF: " << ss1 << ":" << i1 << " "
<< ss2 << ":" << i2 << " " << p << endl;
}
if( verboseMode&&((nwrd++)%10000==0) )
{cout<<"Statistiken-2 " <<nwrd<< ". \r";cout.flush();}
}
}
w.testFull();
if(verboseMode){cout << "Datenintegritaet getestet.\n";cout.flush();}
return k;
}
KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
int auswertung,int nachbarschaft,int minWordFrequency)
{
ifstream file(str);
if(!file)return 0;
leda_set<string> setVokabular;
leda_h_array<PSS,FreqType> cTbl;
double c=0;
if( verboseMode )cout << "NGRFILE: " << str << endl;
string s1,s2;
while(file >> c >> s1 >> s2)
{
if( s1.length()==0||s2.length()==0 )
{
cerr << "ERROR: strings are zero: " << s1.length() <<" " << s1 <<" " << s2.length()<<" " << s2 << endl;
return 0;
}
if( c==0 )
{
cerr << "Count ist 0 " << s1 << " " << s2 << endl;
return 0;
}
cTbl[pair<string,string>(s1,s2)]=(FreqType)c;
setVokabular.insert(s1);
setVokabular.insert(s2);
if( verboseMode>1 )
cout << "R: " << s1 << " " << s2 << " " << c << endl;
c=0;
}
return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
}
KategProblem *fromKModel(const char *str,int maxClass,int initialisierung,
int auswertung,int nachbarschaft,int minWordFrequency)
{
string oldText,text,line;
ifstream f(str);
if( !f )
{
cerr << "ERROR: can not open file " << str << ".\n";
return 0;
}
leda_set<string> setVokabular;
leda_h_array<PSS,FreqType> cTbl(0);
oldText="$";
while(1)
{
getline(f,line);
if(f.fail() && !f.bad() && !f.eof())
{
cerr << "WARNING: strange characters in stream (getline) " << endl;f.clear();
}
if(!f)break;
istrstream f2(line.c_str());
while( 1 )
{
f2 >> text;
if(f2.fail() && !f2.bad() && !f2.eof())
{
cerr << "WARNING: strange characters in stream (>>) !\n";
f2.clear(ios::failbit);
}
if(!f2){break;}
if( text == "$" )
text = "mkcls-mapped-dollar-symbol-$";
if( !setVokabular.member(text) )setVokabular.insert(text);
cTbl[pair<string,string>(oldText,text)]++;
oldText=text;
}
text="$";
if( !setVokabular.member(text) )setVokabular.insert(text);
cTbl[pair<string,string>(oldText,text)]++;
oldText=text;
}
return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
}
void KategProblemSetParameters(KategProblem &p)
{
if( p.katwahl()==K_BEST )
{
TAOptimization::defaultAnnRate=0.7;
RRTOptimization::defaultAnnRate=0.95;
GDAOptimization::defaultAlpha=0.05;
if( verboseMode )
cout << "Parameter-setting like W-DET-BEST\n";
}
else
{
TAOptimization::defaultAnnRate=0.4;
RRTOptimization::defaultAnnRate=0.6;
GDAOptimization::defaultAlpha=0.0125;
if( verboseMode )
cout << "Parameter-setting like W-DET-DET\n";
}
}
KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
int auswertung,int nachbarschaft,float relInit)
{
KategProblem &k=
*new KategProblem(ANZ_WORD,ANZ_CLS,initValue,auswertung,nachbarschaft);
KategProblemWBC &w=k.wordFreq;
Array<int> after(ANZ_WORD,0);
Array<int> before(ANZ_WORD,0);
Array<FreqArray> twoD(ANZ_WORD);
int i;
for(i=0;i<ANZ_WORD;i++) twoD[i].init(ANZ_WORD,0);
for(i=0;i<ANZ_WORD;i++)
{
massert(after[i]==0);
massert(before[i]==0);
for(int j=0;j<ANZ_WORD;j++)
{
massert(twoD[i][j]==0);
}
}
for(i=0;i<ANZ_WORD*ANZ_WORD*relInit;i++)
{
int x=randomInt(ANZ_WORD);
int y=randomInt(ANZ_WORD);
if(twoD[x][y]==0)
{
after[x]++;
before[y]++;
}
twoD[x][y]+=randomInt(10)+1;
}
for(i=0;i<ANZ_WORD;i++)
{
w.setAfterWords(i,after[i]);
w.setBeforeWords(i,before[i]);
}
for(i=0;i<ANZ_WORD;i++)
{
for(int j=0;j<ANZ_WORD;j++)
if( twoD[i][j] )
w.setFreq(i,j,twoD[i][j]);
}
w.testFull();
return k;
}
char *makeTitle(KategProblem &problem,int verfahren)
{
char x[1024];
switch(verfahren)
{
case HC_OPT:
strcpy(x,"HC ");
break;
case SA_OPT:
strcpy(x,"SA ");
break;
case TA_OPT:
strcpy(x,"TA ");
break;
case GDA_OPT:
strcpy(x,"GDA ");
break;
case RRT_OPT:
strcpy(x,"RRT ");
break;
}
problem.makeTitle(x+strlen(x));
return strdup(x);
}
#define MAX_MULTIPLE 10
Array<KategProblem *> &_izrOptimization(Array<KategProblem *> &probs,
int anzprob,double timeForOneRed,double maxClock,Array<Kategory> &katOfWord,
int anzIter,int verfahren)
{
massert(anzprob>1);
massert(probs[0]->wordFreq.mindestAnzahl<=1);
KategProblem *p0=probs[0];
int nWords=p0->wordFreq.nWords;
int nKats=p0->katFreq.nKats;
int minimumNumberOfWords = max(1,int(nWords*0.95));
int indexOfDurchschnitt;
Array<int> newWords(nWords);
int useAnzprob=anzprob;
do
{
int w,k;
indexOfDurchschnitt=0;
for(w=0;w<nWords;w++)
newWords[w]=-1;
for(k=0;k<useAnzprob;k++)
{
massert(probs[k]->wordFreq.nWords==nWords);
probs[k]->makeKats();
}
for(w=0;w<nWords;w++)
{
if( newWords[w]==-1 )
{
leda_set<int> durchschnitt=(*p0->kats)[p0->katOfWord(w)];
for(k=1;k<useAnzprob;k++)
durchschnitt = durchschnitt & (*probs[k]->kats)[probs[k]->katOfWord(w)];
int _anzInDurchschnitt=0;
int nr=0;
forall_set(leda_set<int>,nr,durchschnitt)
{
_anzInDurchschnitt++;
newWords[nr]=indexOfDurchschnitt;
}
if( verboseMode && _anzInDurchschnitt>1 && anzIter==0 )
{
cout << "- (";
forall_set(leda_set<int>,nr,durchschnitt)
{
cout << p0->getString(nr);
if( p0->wordFreq.n1(nr)==1 )
cout << "* ";
else
cout << " ";
}
cout << ")\n";
}
for(k=0;k<useAnzprob;k++)
{
durchschnitt = durchschnitt - (*probs[k]->kats)[probs[k]->katOfWord(w)];
}
indexOfDurchschnitt++;
}
}
if(indexOfDurchschnitt>=minimumNumberOfWords)
{
if(useAnzprob==1)
{
cout << "useAnzProb==1 => mysterious.\n";
break;
}
useAnzprob--;
}
}
while(indexOfDurchschnitt>=minimumNumberOfWords);
Array<KategProblem *> &neu=*new Array<KategProblem *>(MAX_MULTIPLE*anzprob,(KategProblem *)0);
qsort(probs.getPointerToData(),useAnzprob,sizeof(KategProblem *),compareProblem);
massert(useAnzprob<=probs.size());
double startTime=clockSec();
int i, numberOfNew;
for(numberOfNew=0; (clockSec()-startTime<timeForOneRed)
|| (numberOfNew < anzprob) ; numberOfNew++)
{
int w;
if( numberOfNew==anzprob*MAX_MULTIPLE-1 )
break;
KategProblem *p
= neu[numberOfNew]
= new KategProblem(indexOfDurchschnitt,nKats-2,
p0->initialisierung,p0->auswertung,p0->nachbarschaft);
for(w=0;w<indexOfDurchschnitt;w++)
{
p->wordFreq.setAfterWords(w,5);
p->wordFreq.setBeforeWords(w,5);
}
for(w=0;w<nWords;w++)
{
Array<OneFreq> &after=p0->wordFreq.after[w];
int size=after.size();
for(i=0;i<size;i++)
p->wordFreq.addFreq(newWords[w],newWords[after[i].w],after[i].n);
}
p->wordFreq.testFull(1);
p->wordFreq.set_h_of_words(p0->wordFreq.get_h_of_words());
double w1=0.0,w2=0.0;
if(numberOfNew<useAnzprob)
{
for(i=0;i<nWords;i++)
(p->initLike)[newWords[i]]=probs[numberOfNew]->katOfWord(i);
p->_initialize(5);
HCOptimization hc(*p,-1);
if(verboseMode)
{
w1=p->nicevalue();
cout << "from old category system:" << w1 << endl;
}
hc.minimize(-1);
if(verboseMode)
{
w2=p->nicevalue();
if(w2<w1)
cout << "improvement: " << w1-w2 << endl;
}
}
else
{
p->_initialize(1);
double mean;
StatVar end,laufzeit,start;
solveProblem(0,*p,1,-1,verfahren,mean,end,laufzeit,start);
w2=p->value();
if(verboseMode)
cout << "new category system: " << w2 << " (" << p->nicevalue()
<< ") Zeit: " << clockSec() << "\n";
}
}
int p;
for(p=0;p<probs.size();p++)
{
if( probs[p] )
delete probs[p];
}
qsort(neu.getPointerToData(),numberOfNew,sizeof(Problem *),compareProblem);
massert(numberOfNew<=neu.size());
if( verboseMode )
cout << "Iterierte Zustandsraum-Reduktion: " << indexOfDurchschnitt
<< " words. costs: " << neu[0]->value() << " "
<< neu[0]->nicevalue() << " (" << numberOfNew-anzprob << ")" << "time: "
<< clockSec() << endl;
if( indexOfDurchschnitt<=nKats
|| (clockSec()>maxClock&&maxClock) )
{
if( clockSec()>maxClock&&maxClock )
cout << "STOP (time limit: " << (clockSec()-maxClock) << " s)\n";
for(i=0;i<nWords;i++)
katOfWord[i]=neu[0]->katOfWord(newWords[i]);
return neu;
}
else
{
Array<Kategory> &newKatOfWord=
*(new Array<Kategory>(neu[0]->wordFreq.nWords,-1));
Array<KategProblem *> &erg=_izrOptimization(neu,anzprob,timeForOneRed,
maxClock,newKatOfWord,
anzIter+1,verfahren);
for(i=0;i<nWords;i++)
katOfWord[i]=newKatOfWord[newWords[i]];
return erg;
}
}
KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
double clockForOneRed,double maxClock,int verfahren)
{
Array<Kategory> katOfWord(p.wordFreq.nWords,-1);
int startN;
if( clockForOneRed<=0 )
startN=firstN;
else
startN=1000;
Array<KategProblem *> probs(startN);
double val1=0.0,val2=0.0;
double endTime=-1;
double startTime=clockSec();
int i;
for(i=0;i<startN;i++)
{
StatVar end,laufzeit,start;
double mean;
probs[i] = (KategProblem *)((KategProblem *)p.makeEqualProblem());
solveProblem(0,*(probs[i]),1,-1,verfahren,mean,end,laufzeit,start);
if( i==minN-1 )
endTime = clockSec();
if( i>=firstN-1 && (startTime+clockForOneRed>clockSec() || i==999) )
break;
}
if( endTime<0 )
endTime=clockSec();
massert(i>=firstN);
qsort(probs.getPointerToData(),i,sizeof(KategProblem *),compareProblem);
massert(i<=probs.size());
if( clockForOneRed<=0 )
{
clockForOneRed=endTime-startTime;
if( verboseMode )
cout << "time for one reduction: " << clockForOneRed << endl;
}
_izrOptimization(probs,minN,clockForOneRed,maxClock,katOfWord,0,verfahren);
KategProblem *n=(KategProblem *)(p.makeEqualProblem());
n->initLike= katOfWord;
n->_initialize(5);
if( verboseMode )
val1=n->value();
HCOptimization hc(*n,-1);
hc.minimize(-1);
val2=n->value();
if( verboseMode )
cout << "last improvement: " << val2-val1 << "\n";
cout << "final costs: " << val2 << " " << n->nicevalue() << endl;
if(PrintBestTo)
n->dumpOn(*PrintBestTo);
return n;
}

Some files were not shown because too many files have changed in this diff Show More