- cataloged all the licenses for the libraries Gridworks depends on
- added the secondstring libraries that contains all sorts of useful string distance functions - added a java arithmetic coding library (used to implement a string distance based on PPM arithmetic coding) - added the vicino kNN string clustering library (from MIT's SIMILE) git-svn-id: http://google-refine.googlecode.com/svn/trunk@181 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
5a0a8bea4f
commit
c07431fb88
@ -11,8 +11,10 @@
|
|||||||
<classpathentry kind="lib" path="lib/json-20100208.jar" sourcepath="lib-src/json-20100208-sources.jar"/>
|
<classpathentry kind="lib" path="lib/json-20100208.jar" sourcepath="lib-src/json-20100208-sources.jar"/>
|
||||||
<classpathentry kind="lib" path="lib/icu4j-4.2.1.jar" sourcepath="lib-src/icu4j-4.2.1-sources.jar"/>
|
<classpathentry kind="lib" path="lib/icu4j-4.2.1.jar" sourcepath="lib-src/icu4j-4.2.1-sources.jar"/>
|
||||||
<classpathentry kind="lib" path="lib/cos-20081226.jar" sourcepath="lib-src/cos-20081226-sources.jar"/>
|
<classpathentry kind="lib" path="lib/cos-20081226.jar" sourcepath="lib-src/cos-20081226-sources.jar"/>
|
||||||
|
<classpathentry kind="lib" path="lib/arithcode-1.1.jar" sourcepath="lib-src/arithcode-1.1-sources.jar"/>
|
||||||
|
<classpathentry kind="lib" path="lib/jdatapath-alpha2.jar" sourcepath="lib-src/jdatapath-alpha2-sources.jar"/>
|
||||||
|
<classpathentry kind="lib" path="lib/secondstring-20100303.jar" sourcepath="lib-src/secondstring-20100303-sources.jar"/>
|
||||||
<classpathentry kind="lib" path="lib/poi-3.6.jar"/>
|
<classpathentry kind="lib" path="lib/poi-3.6.jar"/>
|
||||||
<classpathentry kind="lib" path="lib/poi-ooxml-3.6.jar"/>
|
<classpathentry kind="lib" path="lib/poi-ooxml-3.6.jar"/>
|
||||||
<classpathentry kind="lib" path="lib/jdatapath-alpha2.jar" sourcepath="lib-src/jdatapath-alpha2-sources.jar"/>
|
|
||||||
<classpathentry kind="output" path="build/classes"/>
|
<classpathentry kind="output" path="build/classes"/>
|
||||||
</classpath>
|
</classpath>
|
||||||
|
43
LICENSE.txt
43
LICENSE.txt
@ -1,4 +1,45 @@
|
|||||||
/*
|
/*
|
||||||
* (c) Copyright 2010 Metaweb Technologies, Inc. All rights reserved.
|
* (c) Copyright 2010 Metaweb Technologies, Inc. All rights reserved.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
See the 'licenses' directory for a list of the licenses for the libraries we depend on,
|
||||||
|
ordered here by license:
|
||||||
|
|
||||||
|
licenses/apache2.0.LICENSE.txt
|
||||||
|
jetty
|
||||||
|
jetty-util
|
||||||
|
servlet-api
|
||||||
|
commons-lang
|
||||||
|
commons-codec
|
||||||
|
log4j
|
||||||
|
xmlbeans
|
||||||
|
poi
|
||||||
|
poi-ooxml
|
||||||
|
poi-ooxml-schemas
|
||||||
|
jdatapath
|
||||||
|
|
||||||
|
licenses/secondstring.LICENSE.txt (BSD family)
|
||||||
|
secondstring
|
||||||
|
|
||||||
|
licenses/dom4j.LICENSE.txt (BSD family)
|
||||||
|
dom4j
|
||||||
|
|
||||||
|
licenses/simile.LICENSE.txt (BSD family)
|
||||||
|
vicino
|
||||||
|
|
||||||
|
licenses/arithcode.LICENSE.txt (BSD family)
|
||||||
|
arithcode
|
||||||
|
|
||||||
|
licenses/icu4j.LICENSE.txt (MIT family)
|
||||||
|
icu4j
|
||||||
|
|
||||||
|
licenses/slf4j.LICENSE.txt (MIT family)
|
||||||
|
slf4j-api
|
||||||
|
slf4j-log4j12
|
||||||
|
jcl-over-slf4j
|
||||||
|
|
||||||
|
licenses/json.LICENSE.txt (MIT family)
|
||||||
|
json
|
||||||
|
|
||||||
|
licenses/cos.LICENSE.txt
|
||||||
|
cos
|
BIN
lib-src/arithcode-1.1-sources.jar
Normal file
BIN
lib-src/arithcode-1.1-sources.jar
Normal file
Binary file not shown.
BIN
lib-src/secondstring-20100303-sources.jar
Normal file
BIN
lib-src/secondstring-20100303-sources.jar
Normal file
Binary file not shown.
BIN
lib/arithcode-1.1.jar
Normal file
BIN
lib/arithcode-1.1.jar
Normal file
Binary file not shown.
BIN
lib/secondstring-20100303.jar
Normal file
BIN
lib/secondstring-20100303.jar
Normal file
Binary file not shown.
202
licenses/apache2.0.LICENSE.txt
Normal file
202
licenses/apache2.0.LICENSE.txt
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
30
licenses/arithcode.LICENSE.txt
Normal file
30
licenses/arithcode.LICENSE.txt
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
Copyright (c) 2002, Bob Carpenter.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following
|
||||||
|
disclaimer in the documentation and/or other materials provided
|
||||||
|
with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of colloquial.com nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
82
licenses/cos.LICENSE.txt
Normal file
82
licenses/cos.LICENSE.txt
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
Copyright (C) 2001-2009 by Jason Hunter, jhunter@servlets.com.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
The source code, object code, and documentation in the com.oreilly.servlet
|
||||||
|
package is copyright and owned by Jason Hunter.
|
||||||
|
|
||||||
|
|
||||||
|
ON-SITE USE RIGHTS
|
||||||
|
|
||||||
|
Permission is granted to use the com.oreilly.servlet.* packages in the
|
||||||
|
development of any *non-commercial* project. For this use you are granted
|
||||||
|
a non-exclusive, non-transferable limited license at no cost.
|
||||||
|
|
||||||
|
For a *commercial* project, permission is granted to use the
|
||||||
|
com.oreilly.servlet.* packages provided that every person on the development
|
||||||
|
team for that project owns a copy of the book Java Servlet Programming
|
||||||
|
(O'Reilly) in its most recent edition. The most recent edition is currently
|
||||||
|
the 2nd Edition, available in association with Amazon.com at
|
||||||
|
http://www.amazon.com/exec/obidos/ASIN/0596000405/jasonhunter.
|
||||||
|
|
||||||
|
Other (sometimes cheaper) license terms are available upon request; please
|
||||||
|
write to jhunter@servlets.com for more information.
|
||||||
|
|
||||||
|
|
||||||
|
REDISTRIBUTION RIGHTS
|
||||||
|
|
||||||
|
Commercial redistribution rights of the com.oreilly.servlet.* packages are
|
||||||
|
available by writing jhunter@servlets.com.
|
||||||
|
|
||||||
|
Non-commercial redistribution is permitted provided that:
|
||||||
|
|
||||||
|
1. You redistribute the package in object code form only (as Java .class files
|
||||||
|
or a .jar file containing the .class files) and only as part of a product that
|
||||||
|
uses the classes as part of its primary functionality.
|
||||||
|
|
||||||
|
2. The product containing the package is non-commercial in nature.
|
||||||
|
|
||||||
|
3. The public interface to the classes in the package, and the public
|
||||||
|
interface to any classes with similar functionality, is hidden from end users
|
||||||
|
when engaged in normal use of the product.
|
||||||
|
|
||||||
|
4. The distribution is not part of a software development kit, operating
|
||||||
|
system, other library, or a development tool without written permission from
|
||||||
|
the copyright holder.
|
||||||
|
|
||||||
|
5. The distribution includes copyright notice as follows: "The source code,
|
||||||
|
object code, and documentation in the com.oreilly.servlet package is copyright
|
||||||
|
and owned by Jason Hunter." in the documentation and/or other materials
|
||||||
|
provided with the distribution.
|
||||||
|
|
||||||
|
6. You reproduce the above copyright notice, this list of conditions, and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
7. Licensor retains title to and ownership of the Software and all
|
||||||
|
enhancements, modifications, and updates to the Software.
|
||||||
|
|
||||||
|
Note that the com.oreilly.servlet package is provided "as is" and the author
|
||||||
|
will not be liable for any damages suffered as a result of your use.
|
||||||
|
Furthermore, you understand the package comes without any guarantee of
|
||||||
|
technical support.
|
||||||
|
|
||||||
|
You can always find the latest version of the com.oreilly.servlet package at
|
||||||
|
http://www.servlets.com.
|
||||||
|
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
||||||
|
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
SUCH DAMAGE.
|
||||||
|
|
||||||
|
Thanks,
|
||||||
|
|
||||||
|
Jason Hunter
|
||||||
|
jhunter AT servlets.com
|
27
licenses/dom4j.LICENSE.txt
Normal file
27
licenses/dom4j.LICENSE.txt
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
Redistribution and use of this software and associated documentation ("Software"),
|
||||||
|
with or without modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain copyright statements and notices.
|
||||||
|
Redistributions must also contain a copy of this document.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer in the documentation and/or other
|
||||||
|
materials provided with the distribution.
|
||||||
|
3. The name "DOM4J" must not be used to endorse or promote products derived from this
|
||||||
|
Software without prior written permission of MetaStuff, Ltd. For written permission,
|
||||||
|
please contact dom4j-info@metastuff.com.
|
||||||
|
4. Products derived from this Software may not be called "DOM4J" nor may "DOM4J"
|
||||||
|
appear in their names without prior written permission of MetaStuff, Ltd. DOM4J is a
|
||||||
|
registered trademark of MetaStuff, Ltd.
|
||||||
|
5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED
|
||||||
|
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
||||||
|
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||||
|
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||||
|
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
|
28
licenses/icu4j.LICENSE.txt
Normal file
28
licenses/icu4j.LICENSE.txt
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
ICU License - ICU 1.8.1 and later
|
||||||
|
|
||||||
|
COPYRIGHT AND PERMISSION NOTICE
|
||||||
|
|
||||||
|
Copyright (c) 1995-2009 International Business Machines Corporation and others
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||||
|
software and associated documentation files (the "Software"), to deal in the Software
|
||||||
|
without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||||
|
publish, distribute, and/or sell copies of the Software, and to permit persons to whom
|
||||||
|
the Software is furnished to do so, provided that the above copyright notice(s) and this
|
||||||
|
permission notice appear in all copies of the Software and that both the above copyright
|
||||||
|
notice(s) and this permission notice appear in supporting documentation.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||||
|
PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
|
||||||
|
INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
|
||||||
|
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
|
||||||
|
Except as contained in this notice, the name of a copyright holder shall not be used
|
||||||
|
in advertising or otherwise to promote the sale, use or other dealings in this Software
|
||||||
|
without prior written authorization of the copyright holder.
|
||||||
|
|
21
licenses/json.LICENSE.txt
Normal file
21
licenses/json.LICENSE.txt
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
Copyright (c) 2002 JSON.org
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
The Software shall be used for Good, not Evil.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
41
licenses/secondstring.LICENSE.txt
Normal file
41
licenses/secondstring.LICENSE.txt
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
Copyright (c) 2003 Carnegie Mellon University
|
||||||
|
All rights reserved.
|
||||||
|
Developed by: Center for Automated Learning and Discovery
|
||||||
|
Carnegie Mellon University
|
||||||
|
http://www.cald.cs.cmu.edu
|
||||||
|
|
||||||
|
The design and implementation of this software was supported in
|
||||||
|
part by National Science Foundation Grant No. EIA-0131884 to the
|
||||||
|
National Institute of Statistical Sciences, and by a contract
|
||||||
|
from the Army Research Office to the Center for Computer and
|
||||||
|
Communications Security with Carnegie Mellon University.
|
||||||
|
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal with the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimers.
|
||||||
|
Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimers in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
Neither the names of the Center for Automated Learning and Discovery,
|
||||||
|
or Carnegie Mellon University, nor the names of its contributors may
|
||||||
|
be used to endorse or promote products derived from this Software
|
||||||
|
without specific prior written permission. THE SOFTWARE IS PROVIDED
|
||||||
|
"AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||||
|
BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
OTHER DEALINGS WITH THE SOFTWARE.
|
||||||
|
|
||||||
|
[This is an instance of the University of Illinois/NCSA Open Source
|
||||||
|
agreement, obtained from http://www.opensource.org/licenses/UoI-NCSA.php]
|
||||||
|
|
29
licenses/simile.LICENSE.txt
Normal file
29
licenses/simile.LICENSE.txt
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
/*
|
||||||
|
* (c) Copyright Massachusetts Institute of Technology & Contributors.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. The name of the author may not be used to endorse or promote products
|
||||||
|
* derived from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
22
licenses/slf4j.LICENSE.txt
Normal file
22
licenses/slf4j.LICENSE.txt
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
Copyright (c) 2004-2008 QOS.ch
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||||
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
7
src/main/java/edu/mit/simile/vicino/Distance.java
Normal file
7
src/main/java/edu/mit/simile/vicino/Distance.java
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
package edu.mit.simile.vicino;
|
||||||
|
|
||||||
|
public interface Distance {
|
||||||
|
|
||||||
|
public float d(String x, String y);
|
||||||
|
|
||||||
|
}
|
61
src/main/java/edu/mit/simile/vicino/Distributor.java
Normal file
61
src/main/java/edu/mit/simile/vicino/Distributor.java
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
package edu.mit.simile.vicino;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import edu.mit.simile.vicino.Distance;
|
||||||
|
|
||||||
|
public class Distributor extends Operator {
|
||||||
|
|
||||||
|
private static final int COLUMNS = 70;
|
||||||
|
private static final char CHAR = '=';
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
Distance d = getDistance(args[0]);
|
||||||
|
|
||||||
|
List<String> strings = getStrings(args[1]);
|
||||||
|
|
||||||
|
int buckets = Integer.parseInt(args[2]);
|
||||||
|
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
int[] values = new int[buckets];
|
||||||
|
|
||||||
|
int size = strings.size();
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
String x = (String) strings.get((int) (Math.random() * size));
|
||||||
|
String y = (String) strings.get((int) (Math.random() * size));
|
||||||
|
int dist = (int) (buckets * d.d(x, y));
|
||||||
|
values[dist]++;
|
||||||
|
System.out.print(".");
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
|
||||||
|
long stop = System.currentTimeMillis();
|
||||||
|
float m = ((float) (stop - start)) / (float) size;
|
||||||
|
|
||||||
|
int maxValue = 0;
|
||||||
|
for (int i = 0; i < buckets; i++) {
|
||||||
|
if (values[i] > maxValue) {
|
||||||
|
maxValue = values[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out
|
||||||
|
.println("+-------------------------------------------------------------------");
|
||||||
|
for (int i = 0; i < buckets; i++) {
|
||||||
|
System.out.println("|" + bar(COLUMNS * values[i] / maxValue));
|
||||||
|
}
|
||||||
|
System.out
|
||||||
|
.println("+-------------------------------------------------------------------");
|
||||||
|
|
||||||
|
System.out.println("\n Each distance calculation took: " + m + " millis");
|
||||||
|
}
|
||||||
|
|
||||||
|
static private String bar(int value) {
|
||||||
|
StringBuffer b = new StringBuffer(value);
|
||||||
|
for (int i = 0; i < value; i++) {
|
||||||
|
b.append(CHAR);
|
||||||
|
}
|
||||||
|
return b.toString();
|
||||||
|
}
|
||||||
|
}
|
10
src/main/java/edu/mit/simile/vicino/Meter.java
Normal file
10
src/main/java/edu/mit/simile/vicino/Meter.java
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
package edu.mit.simile.vicino;
|
||||||
|
|
||||||
|
public class Meter extends Operator {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
Distance d = getDistance(args[0]);
|
||||||
|
System.out.println(args[1] + " <- " + d.d(args[1], args[2]) + " -> " + args[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
47
src/main/java/edu/mit/simile/vicino/Operator.java
Normal file
47
src/main/java/edu/mit/simile/vicino/Operator.java
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
package edu.mit.simile.vicino;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Operator {
|
||||||
|
|
||||||
|
static void log(String msg) {
|
||||||
|
System.out.println(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Distance getDistance(String distance) throws Exception {
|
||||||
|
return (Distance) Class.forName("edu.mit.simile.vicino.distances." + distance + "Distance").newInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
static List<String> getStrings(String fileName) throws IOException {
|
||||||
|
ArrayList<String> strings = new ArrayList<String>();
|
||||||
|
|
||||||
|
File file = new File(fileName);
|
||||||
|
if (file.isDirectory()) {
|
||||||
|
File[] files = file.listFiles();
|
||||||
|
for (int i = 0; i < files.length; i++) {
|
||||||
|
BufferedReader input = new BufferedReader(new FileReader(files[i]));
|
||||||
|
StringBuffer b = new StringBuffer();
|
||||||
|
String line;
|
||||||
|
while ((line = input.readLine()) != null) {
|
||||||
|
b.append(line.trim());
|
||||||
|
}
|
||||||
|
input.close();
|
||||||
|
strings.add(b.toString());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
BufferedReader input = new BufferedReader(new FileReader(fileName));
|
||||||
|
String line;
|
||||||
|
while ((line = input.readLine()) != null) {
|
||||||
|
strings.add(line.trim());
|
||||||
|
}
|
||||||
|
input.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings;
|
||||||
|
}
|
||||||
|
}
|
52
src/main/java/edu/mit/simile/vicino/Seeker.java
Normal file
52
src/main/java/edu/mit/simile/vicino/Seeker.java
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
package edu.mit.simile.vicino;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import edu.mit.simile.vicino.vptree.VPTree;
|
||||||
|
import edu.mit.simile.vicino.vptree.VPTreeBuilder;
|
||||||
|
import edu.mit.simile.vicino.vptree.VPTreeSeeker;
|
||||||
|
|
||||||
|
public class Seeker extends Operator {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
Distance d = getDistance(args[0]);
|
||||||
|
|
||||||
|
log("Working with distance: " + d);
|
||||||
|
List<String> strings = getStrings(args[1]);
|
||||||
|
log("Obtained " + strings.size() + " from " + args[1]);
|
||||||
|
|
||||||
|
log("Building VPTree...");
|
||||||
|
VPTreeBuilder builder = new VPTreeBuilder(d);
|
||||||
|
VPTree tree = builder.buildVPTree(strings);
|
||||||
|
log("..done");
|
||||||
|
|
||||||
|
VPTreeSeeker seeker = new VPTreeSeeker(d, tree);
|
||||||
|
|
||||||
|
log("type a string|range then hit return:");
|
||||||
|
BufferedReader input = new BufferedReader(new InputStreamReader(
|
||||||
|
System.in));
|
||||||
|
String line = null;
|
||||||
|
while ((line = input.readLine()) != null) {
|
||||||
|
int index = line.indexOf('|');
|
||||||
|
String query = line.substring(0, index);
|
||||||
|
float range = Float.parseFloat(line.substring(index + 1));
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
List<? extends Serializable> results = seeker.range(query, range);
|
||||||
|
long stop = System.currentTimeMillis();
|
||||||
|
Iterator<? extends Serializable> j = results.iterator();
|
||||||
|
if (j.hasNext()) {
|
||||||
|
while (j.hasNext()) {
|
||||||
|
String r = (String) j.next();
|
||||||
|
log(" " + r);
|
||||||
|
}
|
||||||
|
log(" [done in " + (stop - start) + "ms]");
|
||||||
|
} else {
|
||||||
|
log(" [no results found in " + (stop - start) + "ms]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
50
src/main/java/edu/mit/simile/vicino/Tester.java
Normal file
50
src/main/java/edu/mit/simile/vicino/Tester.java
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
package edu.mit.simile.vicino;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class Tester extends Operator {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
Distance d = getDistance(args[0]);
|
||||||
|
|
||||||
|
List<String> strings = getStrings(args[1]);
|
||||||
|
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
|
||||||
|
int size = strings.size();
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
String x = (String) strings.get((int) (Math.random() * size));
|
||||||
|
String y = (String) strings.get((int) (Math.random() * size));
|
||||||
|
String z = (String) strings.get((int) (Math.random() * size));
|
||||||
|
boolean metric = metric(x, y, z, d);
|
||||||
|
if (metric) {
|
||||||
|
System.out.println("metric");
|
||||||
|
} else {
|
||||||
|
System.out.println("***** NOT METRIC *****");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
long stop = System.currentTimeMillis();
|
||||||
|
float m = ((float) (stop - start)) / (float) size;
|
||||||
|
|
||||||
|
System.out.println("\n Each metric evaluation took: " + m + " millis");
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean metric(String x, String y, String z, Distance d) {
|
||||||
|
float dxx = d.d(x, x);
|
||||||
|
boolean identity = (dxx == 0.0f);
|
||||||
|
float dxy = d.d(x, y);
|
||||||
|
float dyx = d.d(y, x);
|
||||||
|
boolean simmetrical = (dxy == dyx);
|
||||||
|
float dxz = d.d(x, z);
|
||||||
|
float dyz = d.d(y, z);
|
||||||
|
boolean triangular = (dxz <= dxy + dyz);
|
||||||
|
return (identity && simmetrical && triangular);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Distance getDistance(String distance) throws Exception {
|
||||||
|
return (Distance) Class.forName(
|
||||||
|
"edu.mit.simile.vicino.distances." + distance + "Distance")
|
||||||
|
.newInstance();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,26 @@
|
|||||||
|
package edu.mit.simile.vicino.distances;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.tools.bzip2.CBZip2OutputStream;
|
||||||
|
|
||||||
|
public class BZip2Distance extends PseudoMetricDistance {
|
||||||
|
|
||||||
|
public float d2(String x, String y) {
|
||||||
|
String str = x + y;
|
||||||
|
float result = 0.0f;
|
||||||
|
try {
|
||||||
|
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length());
|
||||||
|
CBZip2OutputStream os = new CBZip2OutputStream(baos);
|
||||||
|
os.write(str.getBytes());
|
||||||
|
os.close();
|
||||||
|
baos.close();
|
||||||
|
result = baos.toByteArray().length;
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,26 @@
|
|||||||
|
package edu.mit.simile.vicino.distances;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
|
||||||
|
public class GZipDistance extends PseudoMetricDistance {
|
||||||
|
|
||||||
|
public float d2(String x, String y) {
|
||||||
|
String str = x + y;
|
||||||
|
float result = 0.0f;
|
||||||
|
try {
|
||||||
|
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length());
|
||||||
|
GZIPOutputStream os = new GZIPOutputStream(baos);
|
||||||
|
os.write(str.getBytes());
|
||||||
|
os.close();
|
||||||
|
baos.close();
|
||||||
|
result = baos.toByteArray().length;
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,18 @@
|
|||||||
|
package edu.mit.simile.vicino.distances;
|
||||||
|
|
||||||
|
import com.wcohen.ss.Jaccard;
|
||||||
|
import com.wcohen.ss.api.StringDistance;
|
||||||
|
|
||||||
|
public class JaccardDistance extends MetricDistance {
|
||||||
|
|
||||||
|
StringDistance distance;
|
||||||
|
|
||||||
|
public JaccardDistance() {
|
||||||
|
this.distance = new Jaccard();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected float d2(String x, String y) {
|
||||||
|
return Math.abs((float) this.distance.score(x, y) - 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,18 @@
|
|||||||
|
package edu.mit.simile.vicino.distances;
|
||||||
|
|
||||||
|
import com.wcohen.ss.Jaro;
|
||||||
|
import com.wcohen.ss.api.StringDistance;
|
||||||
|
|
||||||
|
public class JaroDistance extends MetricDistance {
|
||||||
|
|
||||||
|
StringDistance distance;
|
||||||
|
|
||||||
|
public JaroDistance() {
|
||||||
|
this.distance = new Jaro();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected float d2(String x, String y) {
|
||||||
|
return Math.abs((float) this.distance.score(x, y) - 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
package edu.mit.simile.vicino.distances;
|
||||||
|
|
||||||
|
import com.wcohen.ss.Levenstein;
|
||||||
|
import com.wcohen.ss.api.StringDistance;
|
||||||
|
|
||||||
|
import edu.mit.simile.vicino.Distance;
|
||||||
|
|
||||||
|
public class LevensteinDistance implements Distance {
|
||||||
|
|
||||||
|
StringDistance distance;
|
||||||
|
|
||||||
|
public LevensteinDistance() {
|
||||||
|
this.distance = new Levenstein();
|
||||||
|
}
|
||||||
|
|
||||||
|
public float d(String x, String y) {
|
||||||
|
float d = Math.abs((float) this.distance.score(x, y));
|
||||||
|
// System.out.println(this.distance.explainScore(x,y));
|
||||||
|
return d / (x.length() + y.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
package edu.mit.simile.vicino.distances;
|
||||||
|
|
||||||
|
import edu.mit.simile.vicino.Distance;
|
||||||
|
|
||||||
|
public abstract class MetricDistance implements Distance {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* public float d(String x,String y) { float dxy = d2(x,y); float dx =
|
||||||
|
* d2(x,""); float dy = d2(y,""); float result = dxy / (dx + dy); return
|
||||||
|
* result; }
|
||||||
|
*/
|
||||||
|
|
||||||
|
public float d(String x, String y) {
|
||||||
|
return d2(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract float d2(String x, String y);
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
package edu.mit.simile.vicino.distances;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import com.colloquial.arithcode.ArithCodeOutputStream;
|
||||||
|
import com.colloquial.arithcode.PPMModel;
|
||||||
|
|
||||||
|
public class PPMDistance extends PseudoMetricDistance {
|
||||||
|
|
||||||
|
public float d2(String x, String y) {
|
||||||
|
String str = x + y;
|
||||||
|
float result = 0.0f;
|
||||||
|
try {
|
||||||
|
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length());
|
||||||
|
ArithCodeOutputStream os = new ArithCodeOutputStream(baos,new PPMModel(8));
|
||||||
|
os.write(str.getBytes());
|
||||||
|
os.close();
|
||||||
|
baos.close();
|
||||||
|
result = baos.toByteArray().length;
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,17 @@
|
|||||||
|
package edu.mit.simile.vicino.distances;
|
||||||
|
|
||||||
|
import edu.mit.simile.vicino.Distance;
|
||||||
|
|
||||||
|
public abstract class PseudoMetricDistance implements Distance {
|
||||||
|
|
||||||
|
public float d(String x, String y) {
|
||||||
|
float cxx = d2(x, x);
|
||||||
|
float cyy = d2(y, y);
|
||||||
|
float cxy = d2(x, y);
|
||||||
|
float cyx = d2(y, x);
|
||||||
|
float result1 = (cxy + cyx) / (cxx + cyy) - 1.0f;
|
||||||
|
return result1;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract float d2(String x, String y);
|
||||||
|
}
|
42
src/main/java/edu/mit/simile/vicino/vptree/Node.java
Executable file
42
src/main/java/edu/mit/simile/vicino/vptree/Node.java
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
package edu.mit.simile.vicino.vptree;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class represent a couple (Object, distance) value of that Object from
|
||||||
|
* the Vp in each step of the algorithm.
|
||||||
|
*
|
||||||
|
* @author Paolo Ciccarese
|
||||||
|
*/
|
||||||
|
public class Node implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = -2077473220894258550L;
|
||||||
|
|
||||||
|
private final Serializable obj;
|
||||||
|
private float distance;
|
||||||
|
|
||||||
|
public Node(Serializable obj, int i) {
|
||||||
|
this.obj = obj;
|
||||||
|
this.distance = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Node(Serializable obj) {
|
||||||
|
this.obj = obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Serializable get() {
|
||||||
|
return this.obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDistance(float distance) {
|
||||||
|
this.distance = distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getDistance() {
|
||||||
|
return distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return obj.toString();
|
||||||
|
}
|
||||||
|
}
|
94
src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java
Executable file
94
src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java
Executable file
@ -0,0 +1,94 @@
|
|||||||
|
package edu.mit.simile.vicino.vptree;
|
||||||
|
|
||||||
|
public class NodeSorter {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sort array of Objects using the QuickSort algorithm.
|
||||||
|
*
|
||||||
|
* @param s
|
||||||
|
* An Object[].
|
||||||
|
* @param lo
|
||||||
|
* The current lower bound.
|
||||||
|
* @param hi
|
||||||
|
* The current upper bound.
|
||||||
|
*/
|
||||||
|
public static void sort(Node nodes[], int lo, int hi) {
|
||||||
|
if (lo >= hi) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use median-of-three(lo, mid, hi) to pick a partition. Also swap them
|
||||||
|
* into relative order while we are at it.
|
||||||
|
*/
|
||||||
|
int mid = (lo + hi) / 2;
|
||||||
|
|
||||||
|
if (nodes[lo].getDistance() > nodes[mid].getDistance()) {
|
||||||
|
// Swap.
|
||||||
|
Node tmp = nodes[lo];
|
||||||
|
nodes[lo] = nodes[mid];
|
||||||
|
nodes[mid] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nodes[mid].getDistance() > nodes[hi].getDistance()) {
|
||||||
|
// Swap .
|
||||||
|
Node tmp = nodes[mid];
|
||||||
|
nodes[mid] = nodes[hi];
|
||||||
|
nodes[hi] = tmp;
|
||||||
|
|
||||||
|
if (nodes[lo].getDistance() > nodes[mid].getDistance()) {
|
||||||
|
// Swap.
|
||||||
|
Node tmp2 = nodes[lo];
|
||||||
|
nodes[lo] = nodes[mid];
|
||||||
|
nodes[mid] = tmp2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start one past lo since already handled lo.
|
||||||
|
|
||||||
|
int left = lo + 1;
|
||||||
|
|
||||||
|
// Similarly, end one before hi since already handled hi.
|
||||||
|
|
||||||
|
int right = hi - 1;
|
||||||
|
|
||||||
|
// If there are three or fewer elements, we are done.
|
||||||
|
|
||||||
|
if (left >= right) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node partition = nodes[mid];
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
while (nodes[right].getDistance() > partition.getDistance()) {
|
||||||
|
--right;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (left < right && nodes[left].getDistance() <= partition.getDistance()) {
|
||||||
|
++left;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (left < right) {
|
||||||
|
// Swap.
|
||||||
|
Node tmp = nodes[left];
|
||||||
|
nodes[left] = nodes[right];
|
||||||
|
nodes[right] = tmp;
|
||||||
|
|
||||||
|
--right;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort(nodes, lo, left);
|
||||||
|
sort(nodes, left + 1, hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sorts and array of objects.
|
||||||
|
*/
|
||||||
|
public void sort(Node nodes[]) {
|
||||||
|
NodeSorter.sort(nodes, 0, nodes.length - 1);
|
||||||
|
}
|
||||||
|
}
|
52
src/main/java/edu/mit/simile/vicino/vptree/TNode.java
Executable file
52
src/main/java/edu/mit/simile/vicino/vptree/TNode.java
Executable file
@ -0,0 +1,52 @@
|
|||||||
|
package edu.mit.simile.vicino.vptree;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Paolo Ciccarese
|
||||||
|
*/
|
||||||
|
public class TNode implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = -217604190976851241L;
|
||||||
|
|
||||||
|
private final Serializable obj;
|
||||||
|
private float median;
|
||||||
|
private TNode left;
|
||||||
|
private TNode right;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Object will be fixed during the instantiation of the node, while the
|
||||||
|
* children will be defined in another iteration of the algorithm,
|
||||||
|
*/
|
||||||
|
public TNode(Serializable obj) {
|
||||||
|
this.obj = obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Serializable get() {
|
||||||
|
return this.obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMedian(float median) {
|
||||||
|
this.median = median;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getMedian() {
|
||||||
|
return median;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLeft(TNode leftNode) {
|
||||||
|
this.left = leftNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TNode getLeft() {
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRight(TNode rightNode) {
|
||||||
|
this.right = rightNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TNode getRight() {
|
||||||
|
return right;
|
||||||
|
}
|
||||||
|
}
|
33
src/main/java/edu/mit/simile/vicino/vptree/VPTree.java
Executable file
33
src/main/java/edu/mit/simile/vicino/vptree/VPTree.java
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
package edu.mit.simile.vicino.vptree;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The VPTree class.
|
||||||
|
*
|
||||||
|
* @author Paolo Ciccarese
|
||||||
|
*/
|
||||||
|
public class VPTree implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 1291056732155841123L;
|
||||||
|
|
||||||
|
private TNode root;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the root of the VPTree.
|
||||||
|
*
|
||||||
|
* @param root The VPTree root.
|
||||||
|
*/
|
||||||
|
public void setRoot(TNode root) {
|
||||||
|
this.root = root;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the root of the VPTree.
|
||||||
|
*
|
||||||
|
* @return The VPTree root.
|
||||||
|
*/
|
||||||
|
public TNode getRoot() {
|
||||||
|
return root;
|
||||||
|
}
|
||||||
|
}
|
101
src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java
Executable file
101
src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java
Executable file
@ -0,0 +1,101 @@
|
|||||||
|
package edu.mit.simile.vicino.vptree;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import edu.mit.simile.vicino.Distance;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Paolo Ciccarese
|
||||||
|
* @author Stefano Mazzocchi
|
||||||
|
*/
|
||||||
|
public class VPTreeBuilder {
|
||||||
|
|
||||||
|
private static final boolean DEBUG = false;
|
||||||
|
|
||||||
|
private Random generator = new Random(System.currentTimeMillis());
|
||||||
|
|
||||||
|
private VPTree tree;
|
||||||
|
private final Distance distance;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines a VPTree Builder for a specific distance.
|
||||||
|
*
|
||||||
|
* @param distance The class implementing the distance.
|
||||||
|
*/
|
||||||
|
public VPTreeBuilder(Distance distance) {
|
||||||
|
this.distance = distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public VPTree buildVPTree(Collection<? extends Serializable> col) {
|
||||||
|
Node nodes[] = new Node[col.size()];
|
||||||
|
Iterator<? extends Serializable> i = col.iterator();
|
||||||
|
int counter = 0;
|
||||||
|
while (i.hasNext()) {
|
||||||
|
Serializable s = (Serializable) i.next();
|
||||||
|
nodes[counter++] = new Node(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
tree = new VPTree();
|
||||||
|
tree.setRoot(addNode(nodes, 0, nodes.length - 1));
|
||||||
|
return tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
private TNode addNode(Node nodes[], int begin, int end) {
|
||||||
|
|
||||||
|
int delta = end - begin;
|
||||||
|
int middle = begin + delta / 2;
|
||||||
|
|
||||||
|
TNode node = new TNode(nodes[begin + getRandomIndex(delta)].get());
|
||||||
|
|
||||||
|
if (DEBUG) System.out.println("\nnode: " + node.get().toString());
|
||||||
|
|
||||||
|
calculateDistances(node, nodes, begin, end);
|
||||||
|
orderDistances(nodes, begin, end);
|
||||||
|
|
||||||
|
if (DEBUG) {
|
||||||
|
for (int i = begin; i <= end; i++) {
|
||||||
|
System.out.println(" +-- " + nodes[i].getDistance() + " --> " + nodes[i].get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delta + 1 > 0) {
|
||||||
|
if (middle - (begin + 1) >= 1) {
|
||||||
|
node.setLeft(addNode(nodes, begin + 1, middle));
|
||||||
|
if (DEBUG) System.out.println(" L --> " + node.getLeft().get());
|
||||||
|
} else if (middle - (begin + 1) == 0) {
|
||||||
|
node.setLeft(new TNode(nodes[middle].get()));
|
||||||
|
if (DEBUG) System.out.println(" L --> " + node.getLeft().get());
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((end - (middle + 1)) >= 1) {
|
||||||
|
node.setRight(addNode(nodes, middle + 1, end));
|
||||||
|
if (DEBUG) System.out.println(" R --> " + node.getRight().get());
|
||||||
|
} else if (end - (middle + 1) == 0) {
|
||||||
|
node.setRight(new TNode(nodes[middle + 1].get()));
|
||||||
|
if (DEBUG) System.out.println(" R --> " + node.getRight().get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void calculateDistances(TNode pivot, Node nodes[], int begin, int end) {
|
||||||
|
for (int i = begin; i <= end; i++) {
|
||||||
|
Object x = pivot.get();
|
||||||
|
Object y = nodes[i].get();
|
||||||
|
float d = (x == y) ? 0.0f : distance.d(x.toString(), y.toString());
|
||||||
|
nodes[i].setDistance(d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void orderDistances(Node nodes[], int begin, int end) {
|
||||||
|
NodeSorter.sort(nodes, begin, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getRandomIndex(int max) {
|
||||||
|
return generator.nextInt(max);
|
||||||
|
}
|
||||||
|
}
|
48
src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java
Executable file
48
src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java
Executable file
@ -0,0 +1,48 @@
|
|||||||
|
package edu.mit.simile.vicino.vptree;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import edu.mit.simile.vicino.Distance;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Paolo Ciccarese
|
||||||
|
*/
|
||||||
|
public class VPTreeSeeker {
|
||||||
|
|
||||||
|
VPTree tree;
|
||||||
|
Distance distance;
|
||||||
|
|
||||||
|
public VPTreeSeeker(Distance distance, VPTree tree) {
|
||||||
|
this.distance = distance;
|
||||||
|
this.tree = tree;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<? extends Serializable> range(Object query, float range) {
|
||||||
|
return rangeTraversal(query, range, tree.getRoot(), new ArrayList<Serializable>());
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Serializable> rangeTraversal(Object query, float range, TNode tNode, List<Serializable> results) {
|
||||||
|
|
||||||
|
if (tNode != null) {
|
||||||
|
float distance = this.distance.d(query.toString(), tNode.toString());
|
||||||
|
|
||||||
|
if (distance < range) {
|
||||||
|
results.add(tNode.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((distance + range) < tNode.getMedian()) {
|
||||||
|
rangeTraversal(query, range, tNode.getLeft(), results);
|
||||||
|
} else if ((distance - range) > tNode.getMedian()) {
|
||||||
|
rangeTraversal(query, range, tNode.getRight(), results);
|
||||||
|
} else {
|
||||||
|
rangeTraversal(query, range, tNode.getLeft(), results);
|
||||||
|
rangeTraversal(query, range, tNode.getRight(), results);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
136
src/main/java/org/apache/tools/bzip2/BZip2Constants.java
Normal file
136
src/main/java/org/apache/tools/bzip2/BZip2Constants.java
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
/*
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution, if
|
||||||
|
* any, must include the following acknowlegement:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowlegement may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowlegements normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Ant" and "Apache Software
|
||||||
|
* Foundation" must not be used to endorse or promote products derived
|
||||||
|
* from this software without prior written permission. For written
|
||||||
|
* permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache"
|
||||||
|
* nor may "Apache" appear in their names without prior written
|
||||||
|
* permission of the Apache Group.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This package is based on the work done by Keiron Liddle, Aftex Software
|
||||||
|
* <keiron@aftexsw.com> to whom the Ant project is very grateful for his
|
||||||
|
* great code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.tools.bzip2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for both the compress and decompress classes.
|
||||||
|
* Holds common arrays, and static data.
|
||||||
|
*
|
||||||
|
* @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
|
||||||
|
*/
|
||||||
|
public interface BZip2Constants {
|
||||||
|
|
||||||
|
int baseBlockSize = 100000;
|
||||||
|
int MAX_ALPHA_SIZE = 258;
|
||||||
|
int MAX_CODE_LEN = 23;
|
||||||
|
int RUNA = 0;
|
||||||
|
int RUNB = 1;
|
||||||
|
int N_GROUPS = 6;
|
||||||
|
int G_SIZE = 50;
|
||||||
|
int N_ITERS = 4;
|
||||||
|
int MAX_SELECTORS = (2 + (900000 / G_SIZE));
|
||||||
|
int NUM_OVERSHOOT_BYTES = 20;
|
||||||
|
|
||||||
|
int[] rNums = {
|
||||||
|
619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
|
||||||
|
985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
|
||||||
|
733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
|
||||||
|
419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
|
||||||
|
878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
|
||||||
|
862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
|
||||||
|
150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
|
||||||
|
170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
|
||||||
|
73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
|
||||||
|
909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
|
||||||
|
641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
|
||||||
|
161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
|
||||||
|
382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
|
||||||
|
98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
|
||||||
|
227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
|
||||||
|
469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
|
||||||
|
184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
|
||||||
|
715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
|
||||||
|
951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
|
||||||
|
652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
|
||||||
|
645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
|
||||||
|
609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
|
||||||
|
653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
|
||||||
|
411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
|
||||||
|
170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
|
||||||
|
857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
|
||||||
|
669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
|
||||||
|
944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
|
||||||
|
344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
|
||||||
|
897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
|
||||||
|
433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
|
||||||
|
686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
|
||||||
|
946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
|
||||||
|
978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
|
||||||
|
680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
|
||||||
|
707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
|
||||||
|
297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
|
||||||
|
134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
|
||||||
|
343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
|
||||||
|
140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
|
||||||
|
170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
|
||||||
|
369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
|
||||||
|
804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
|
||||||
|
896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
|
||||||
|
661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
|
||||||
|
768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
|
||||||
|
61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
|
||||||
|
372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
|
||||||
|
780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
|
||||||
|
920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
|
||||||
|
645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
|
||||||
|
936, 638
|
||||||
|
};
|
||||||
|
}
|
865
src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java
Normal file
865
src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java
Normal file
@ -0,0 +1,865 @@
|
|||||||
|
/*
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution, if
|
||||||
|
* any, must include the following acknowlegement:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowlegement may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowlegements normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Ant" and "Apache Software
|
||||||
|
* Foundation" must not be used to endorse or promote products derived
|
||||||
|
* from this software without prior written permission. For written
|
||||||
|
* permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache"
|
||||||
|
* nor may "Apache" appear in their names without prior written
|
||||||
|
* permission of the Apache Group.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This package is based on the work done by Keiron Liddle, Aftex Software
|
||||||
|
* <keiron@aftexsw.com> to whom the Ant project is very grateful for his
|
||||||
|
* great code.
|
||||||
|
*/
|
||||||
|
package org.apache.tools.bzip2;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An input stream that decompresses from the BZip2 format (without the file
|
||||||
|
* header chars) to be read as any other stream.
|
||||||
|
*
|
||||||
|
* @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
|
||||||
|
*/
|
||||||
|
public class CBZip2InputStream extends InputStream implements BZip2Constants {
|
||||||
|
private static void cadvise() {
|
||||||
|
System.out.println("CRC Error");
|
||||||
|
//throw new CCoruptionError();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void compressedStreamEOF() {
|
||||||
|
cadvise();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void makeMaps() {
|
||||||
|
int i;
|
||||||
|
nInUse = 0;
|
||||||
|
for (i = 0; i < 256; i++) {
|
||||||
|
if (inUse[i]) {
|
||||||
|
seqToUnseq[nInUse] = (char) i;
|
||||||
|
unseqToSeq[i] = (char) nInUse;
|
||||||
|
nInUse++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
index of the last char in the block, so
|
||||||
|
the block size == last + 1.
|
||||||
|
*/
|
||||||
|
private int last;
|
||||||
|
|
||||||
|
/*
|
||||||
|
index in zptr[] of original string after sorting.
|
||||||
|
*/
|
||||||
|
private int origPtr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
always: in the range 0 .. 9.
|
||||||
|
The current block size is 100000 * this number.
|
||||||
|
*/
|
||||||
|
private int blockSize100k;
|
||||||
|
|
||||||
|
private boolean blockRandomised;
|
||||||
|
|
||||||
|
private int bsBuff;
|
||||||
|
private int bsLive;
|
||||||
|
private CRC mCrc = new CRC();
|
||||||
|
|
||||||
|
private boolean[] inUse = new boolean[256];
|
||||||
|
private int nInUse;
|
||||||
|
|
||||||
|
private char[] seqToUnseq = new char[256];
|
||||||
|
private char[] unseqToSeq = new char[256];
|
||||||
|
|
||||||
|
private char[] selector = new char[MAX_SELECTORS];
|
||||||
|
private char[] selectorMtf = new char[MAX_SELECTORS];
|
||||||
|
|
||||||
|
private int[] tt;
|
||||||
|
private char[] ll8;
|
||||||
|
|
||||||
|
/*
|
||||||
|
freq table collected to save a pass over the data
|
||||||
|
during decompression.
|
||||||
|
*/
|
||||||
|
private int[] unzftab = new int[256];
|
||||||
|
|
||||||
|
private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
|
||||||
|
private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE];
|
||||||
|
private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
|
||||||
|
private int[] minLens = new int[N_GROUPS];
|
||||||
|
|
||||||
|
private InputStream bsStream;
|
||||||
|
|
||||||
|
private boolean streamEnd = false;
|
||||||
|
|
||||||
|
private int currentChar = -1;
|
||||||
|
|
||||||
|
private static final int START_BLOCK_STATE = 1;
|
||||||
|
private static final int RAND_PART_A_STATE = 2;
|
||||||
|
private static final int RAND_PART_B_STATE = 3;
|
||||||
|
private static final int RAND_PART_C_STATE = 4;
|
||||||
|
private static final int NO_RAND_PART_A_STATE = 5;
|
||||||
|
private static final int NO_RAND_PART_B_STATE = 6;
|
||||||
|
private static final int NO_RAND_PART_C_STATE = 7;
|
||||||
|
|
||||||
|
private int currentState = START_BLOCK_STATE;
|
||||||
|
|
||||||
|
private int storedBlockCRC, storedCombinedCRC;
|
||||||
|
private int computedBlockCRC, computedCombinedCRC;
|
||||||
|
|
||||||
|
int i2, count, chPrev, ch2;
|
||||||
|
int i, tPos;
|
||||||
|
int rNToGo = 0;
|
||||||
|
int rTPos = 0;
|
||||||
|
int j2;
|
||||||
|
char z;
|
||||||
|
|
||||||
|
public CBZip2InputStream(InputStream zStream) {
|
||||||
|
ll8 = null;
|
||||||
|
tt = null;
|
||||||
|
bsSetStream(zStream);
|
||||||
|
initialize();
|
||||||
|
initBlock();
|
||||||
|
setupBlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int read() {
|
||||||
|
if (streamEnd) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
int retChar = currentChar;
|
||||||
|
switch(currentState) {
|
||||||
|
case START_BLOCK_STATE:
|
||||||
|
break;
|
||||||
|
case RAND_PART_A_STATE:
|
||||||
|
break;
|
||||||
|
case RAND_PART_B_STATE:
|
||||||
|
setupRandPartB();
|
||||||
|
break;
|
||||||
|
case RAND_PART_C_STATE:
|
||||||
|
setupRandPartC();
|
||||||
|
break;
|
||||||
|
case NO_RAND_PART_A_STATE:
|
||||||
|
break;
|
||||||
|
case NO_RAND_PART_B_STATE:
|
||||||
|
setupNoRandPartB();
|
||||||
|
break;
|
||||||
|
case NO_RAND_PART_C_STATE:
|
||||||
|
setupNoRandPartC();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return retChar;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initialize() {
|
||||||
|
char magic3, magic4;
|
||||||
|
magic3 = bsGetUChar();
|
||||||
|
magic4 = bsGetUChar();
|
||||||
|
if (magic3 != 'h' || magic4 < '1' || magic4 > '9') {
|
||||||
|
bsFinishedWithStream();
|
||||||
|
streamEnd = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setDecompressStructureSizes(magic4 - '0');
|
||||||
|
computedCombinedCRC = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initBlock() {
|
||||||
|
char magic1, magic2, magic3, magic4;
|
||||||
|
char magic5, magic6;
|
||||||
|
magic1 = bsGetUChar();
|
||||||
|
magic2 = bsGetUChar();
|
||||||
|
magic3 = bsGetUChar();
|
||||||
|
magic4 = bsGetUChar();
|
||||||
|
magic5 = bsGetUChar();
|
||||||
|
magic6 = bsGetUChar();
|
||||||
|
if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45
|
||||||
|
&& magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
|
||||||
|
complete();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59
|
||||||
|
|| magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
|
||||||
|
badBlockHeader();
|
||||||
|
streamEnd = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
storedBlockCRC = bsGetInt32();
|
||||||
|
|
||||||
|
if (bsR(1) == 1) {
|
||||||
|
blockRandomised = true;
|
||||||
|
} else {
|
||||||
|
blockRandomised = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// currBlockNo++;
|
||||||
|
getAndMoveToFrontDecode();
|
||||||
|
|
||||||
|
mCrc.initialiseCRC();
|
||||||
|
currentState = START_BLOCK_STATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void endBlock() {
|
||||||
|
computedBlockCRC = mCrc.getFinalCRC();
|
||||||
|
/* A bad CRC is considered a fatal error. */
|
||||||
|
if (storedBlockCRC != computedBlockCRC) {
|
||||||
|
crcError();
|
||||||
|
}
|
||||||
|
|
||||||
|
computedCombinedCRC = (computedCombinedCRC << 1)
|
||||||
|
| (computedCombinedCRC >>> 31);
|
||||||
|
computedCombinedCRC ^= computedBlockCRC;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void complete() {
|
||||||
|
storedCombinedCRC = bsGetInt32();
|
||||||
|
if (storedCombinedCRC != computedCombinedCRC) {
|
||||||
|
crcError();
|
||||||
|
}
|
||||||
|
|
||||||
|
bsFinishedWithStream();
|
||||||
|
streamEnd = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void blockOverrun() {
|
||||||
|
cadvise();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void badBlockHeader() {
|
||||||
|
cadvise();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void crcError() {
|
||||||
|
cadvise();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void bsFinishedWithStream() {
|
||||||
|
try {
|
||||||
|
if (this.bsStream != null) {
|
||||||
|
if (this.bsStream != System.in) {
|
||||||
|
this.bsStream.close();
|
||||||
|
this.bsStream = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
//ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void bsSetStream(InputStream f) {
|
||||||
|
bsStream = f;
|
||||||
|
bsLive = 0;
|
||||||
|
bsBuff = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int bsR(int n) {
|
||||||
|
int v;
|
||||||
|
while (bsLive < n) {
|
||||||
|
int zzi;
|
||||||
|
char thech = 0;
|
||||||
|
try {
|
||||||
|
thech = (char) bsStream.read();
|
||||||
|
} catch (IOException e) {
|
||||||
|
compressedStreamEOF();
|
||||||
|
}
|
||||||
|
if (thech == -1) {
|
||||||
|
compressedStreamEOF();
|
||||||
|
}
|
||||||
|
zzi = thech;
|
||||||
|
bsBuff = (bsBuff << 8) | (zzi & 0xff);
|
||||||
|
bsLive += 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1);
|
||||||
|
bsLive -= n;
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
private char bsGetUChar() {
|
||||||
|
return (char) bsR(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int bsGetint() {
|
||||||
|
int u = 0;
|
||||||
|
u = (u << 8) | bsR(8);
|
||||||
|
u = (u << 8) | bsR(8);
|
||||||
|
u = (u << 8) | bsR(8);
|
||||||
|
u = (u << 8) | bsR(8);
|
||||||
|
return u;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int bsGetIntVS(int numBits) {
|
||||||
|
return (int) bsR(numBits);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int bsGetInt32() {
|
||||||
|
return (int) bsGetint();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void hbCreateDecodeTables(int[] limit, int[] base,
|
||||||
|
int[] perm, char[] length,
|
||||||
|
int minLen, int maxLen, int alphaSize) {
|
||||||
|
int pp, i, j, vec;
|
||||||
|
|
||||||
|
pp = 0;
|
||||||
|
for (i = minLen; i <= maxLen; i++) {
|
||||||
|
for (j = 0; j < alphaSize; j++) {
|
||||||
|
if (length[j] == i) {
|
||||||
|
perm[pp] = j;
|
||||||
|
pp++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_CODE_LEN; i++) {
|
||||||
|
base[i] = 0;
|
||||||
|
}
|
||||||
|
for (i = 0; i < alphaSize; i++) {
|
||||||
|
base[length[i] + 1]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 1; i < MAX_CODE_LEN; i++) {
|
||||||
|
base[i] += base[i - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_CODE_LEN; i++) {
|
||||||
|
limit[i] = 0;
|
||||||
|
}
|
||||||
|
vec = 0;
|
||||||
|
|
||||||
|
for (i = minLen; i <= maxLen; i++) {
|
||||||
|
vec += (base[i + 1] - base[i]);
|
||||||
|
limit[i] = vec - 1;
|
||||||
|
vec <<= 1;
|
||||||
|
}
|
||||||
|
for (i = minLen + 1; i <= maxLen; i++) {
|
||||||
|
base[i] = ((limit[i - 1] + 1) << 1) - base[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void recvDecodingTables() {
|
||||||
|
char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
|
||||||
|
int i, j, t, nGroups, nSelectors, alphaSize;
|
||||||
|
int minLen, maxLen;
|
||||||
|
boolean[] inUse16 = new boolean[16];
|
||||||
|
|
||||||
|
/* Receive the mapping table */
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
if (bsR(1) == 1) {
|
||||||
|
inUse16[i] = true;
|
||||||
|
} else {
|
||||||
|
inUse16[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++) {
|
||||||
|
inUse[i] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
if (inUse16[i]) {
|
||||||
|
for (j = 0; j < 16; j++) {
|
||||||
|
if (bsR(1) == 1) {
|
||||||
|
inUse[i * 16 + j] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
makeMaps();
|
||||||
|
alphaSize = nInUse + 2;
|
||||||
|
|
||||||
|
/* Now the selectors */
|
||||||
|
nGroups = bsR(3);
|
||||||
|
nSelectors = bsR(15);
|
||||||
|
for (i = 0; i < nSelectors; i++) {
|
||||||
|
j = 0;
|
||||||
|
while (bsR(1) == 1) {
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
selectorMtf[i] = (char) j;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Undo the MTF values for the selectors. */
|
||||||
|
{
|
||||||
|
char[] pos = new char[N_GROUPS];
|
||||||
|
char tmp, v;
|
||||||
|
for (v = 0; v < nGroups; v++) {
|
||||||
|
pos[v] = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nSelectors; i++) {
|
||||||
|
v = selectorMtf[i];
|
||||||
|
tmp = pos[v];
|
||||||
|
while (v > 0) {
|
||||||
|
pos[v] = pos[v - 1];
|
||||||
|
v--;
|
||||||
|
}
|
||||||
|
pos[0] = tmp;
|
||||||
|
selector[i] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now the coding tables */
|
||||||
|
for (t = 0; t < nGroups; t++) {
|
||||||
|
int curr = bsR(5);
|
||||||
|
for (i = 0; i < alphaSize; i++) {
|
||||||
|
while (bsR(1) == 1) {
|
||||||
|
if (bsR(1) == 0) {
|
||||||
|
curr++;
|
||||||
|
} else {
|
||||||
|
curr--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
len[t][i] = (char) curr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create the Huffman decoding tables */
|
||||||
|
for (t = 0; t < nGroups; t++) {
|
||||||
|
minLen = 32;
|
||||||
|
maxLen = 0;
|
||||||
|
for (i = 0; i < alphaSize; i++) {
|
||||||
|
if (len[t][i] > maxLen) {
|
||||||
|
maxLen = len[t][i];
|
||||||
|
}
|
||||||
|
if (len[t][i] < minLen) {
|
||||||
|
minLen = len[t][i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
|
||||||
|
maxLen, alphaSize);
|
||||||
|
minLens[t] = minLen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void getAndMoveToFrontDecode() {
|
||||||
|
char[] yy = new char[256];
|
||||||
|
int i, j, nextSym, limitLast;
|
||||||
|
int EOB, groupNo, groupPos;
|
||||||
|
|
||||||
|
limitLast = baseBlockSize * blockSize100k;
|
||||||
|
origPtr = bsGetIntVS(24);
|
||||||
|
|
||||||
|
recvDecodingTables();
|
||||||
|
EOB = nInUse + 1;
|
||||||
|
groupNo = -1;
|
||||||
|
groupPos = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Setting up the unzftab entries here is not strictly
|
||||||
|
necessary, but it does save having to do it later
|
||||||
|
in a separate pass, and so saves a block's worth of
|
||||||
|
cache misses.
|
||||||
|
*/
|
||||||
|
for (i = 0; i <= 255; i++) {
|
||||||
|
unzftab[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i <= 255; i++) {
|
||||||
|
yy[i] = (char) i;
|
||||||
|
}
|
||||||
|
|
||||||
|
last = -1;
|
||||||
|
|
||||||
|
{
|
||||||
|
int zt, zn, zvec, zj;
|
||||||
|
if (groupPos == 0) {
|
||||||
|
groupNo++;
|
||||||
|
groupPos = G_SIZE;
|
||||||
|
}
|
||||||
|
groupPos--;
|
||||||
|
zt = selector[groupNo];
|
||||||
|
zn = minLens[zt];
|
||||||
|
zvec = bsR(zn);
|
||||||
|
while (zvec > limit[zt][zn]) {
|
||||||
|
zn++;
|
||||||
|
{
|
||||||
|
{
|
||||||
|
while (bsLive < 1) {
|
||||||
|
int zzi;
|
||||||
|
char thech = 0;
|
||||||
|
try {
|
||||||
|
thech = (char) bsStream.read();
|
||||||
|
} catch (IOException e) {
|
||||||
|
compressedStreamEOF();
|
||||||
|
}
|
||||||
|
if (thech == -1) {
|
||||||
|
compressedStreamEOF();
|
||||||
|
}
|
||||||
|
zzi = thech;
|
||||||
|
bsBuff = (bsBuff << 8) | (zzi & 0xff);
|
||||||
|
bsLive += 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
zj = (bsBuff >> (bsLive - 1)) & 1;
|
||||||
|
bsLive--;
|
||||||
|
}
|
||||||
|
zvec = (zvec << 1) | zj;
|
||||||
|
}
|
||||||
|
nextSym = perm[zt][zvec - base[zt][zn]];
|
||||||
|
}
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
|
||||||
|
if (nextSym == EOB) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextSym == RUNA || nextSym == RUNB) {
|
||||||
|
char ch;
|
||||||
|
int s = -1;
|
||||||
|
int N = 1;
|
||||||
|
do {
|
||||||
|
if (nextSym == RUNA) {
|
||||||
|
s = s + (0 + 1) * N;
|
||||||
|
} else if (nextSym == RUNB) {
|
||||||
|
s = s + (1 + 1) * N;
|
||||||
|
}
|
||||||
|
N = N * 2;
|
||||||
|
{
|
||||||
|
int zt, zn, zvec, zj;
|
||||||
|
if (groupPos == 0) {
|
||||||
|
groupNo++;
|
||||||
|
groupPos = G_SIZE;
|
||||||
|
}
|
||||||
|
groupPos--;
|
||||||
|
zt = selector[groupNo];
|
||||||
|
zn = minLens[zt];
|
||||||
|
zvec = bsR(zn);
|
||||||
|
while (zvec > limit[zt][zn]) {
|
||||||
|
zn++;
|
||||||
|
{
|
||||||
|
{
|
||||||
|
while (bsLive < 1) {
|
||||||
|
int zzi;
|
||||||
|
char thech = 0;
|
||||||
|
try {
|
||||||
|
thech = (char) bsStream.read();
|
||||||
|
} catch (IOException e) {
|
||||||
|
compressedStreamEOF();
|
||||||
|
}
|
||||||
|
if (thech == -1) {
|
||||||
|
compressedStreamEOF();
|
||||||
|
}
|
||||||
|
zzi = thech;
|
||||||
|
bsBuff = (bsBuff << 8) | (zzi & 0xff);
|
||||||
|
bsLive += 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
zj = (bsBuff >> (bsLive - 1)) & 1;
|
||||||
|
bsLive--;
|
||||||
|
}
|
||||||
|
zvec = (zvec << 1) | zj;
|
||||||
|
}
|
||||||
|
nextSym = perm[zt][zvec - base[zt][zn]];
|
||||||
|
}
|
||||||
|
} while (nextSym == RUNA || nextSym == RUNB);
|
||||||
|
|
||||||
|
s++;
|
||||||
|
ch = seqToUnseq[yy[0]];
|
||||||
|
unzftab[ch] += s;
|
||||||
|
|
||||||
|
while (s > 0) {
|
||||||
|
last++;
|
||||||
|
ll8[last] = ch;
|
||||||
|
s--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (last >= limitLast) {
|
||||||
|
blockOverrun();
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
char tmp;
|
||||||
|
last++;
|
||||||
|
if (last >= limitLast) {
|
||||||
|
blockOverrun();
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = yy[nextSym - 1];
|
||||||
|
unzftab[seqToUnseq[tmp]]++;
|
||||||
|
ll8[last] = seqToUnseq[tmp];
|
||||||
|
|
||||||
|
/*
|
||||||
|
This loop is hammered during decompression,
|
||||||
|
hence the unrolling.
|
||||||
|
|
||||||
|
for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
|
||||||
|
*/
|
||||||
|
|
||||||
|
j = nextSym - 1;
|
||||||
|
for (; j > 3; j -= 4) {
|
||||||
|
yy[j] = yy[j - 1];
|
||||||
|
yy[j - 1] = yy[j - 2];
|
||||||
|
yy[j - 2] = yy[j - 3];
|
||||||
|
yy[j - 3] = yy[j - 4];
|
||||||
|
}
|
||||||
|
for (; j > 0; j--) {
|
||||||
|
yy[j] = yy[j - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
yy[0] = tmp;
|
||||||
|
{
|
||||||
|
int zt, zn, zvec, zj;
|
||||||
|
if (groupPos == 0) {
|
||||||
|
groupNo++;
|
||||||
|
groupPos = G_SIZE;
|
||||||
|
}
|
||||||
|
groupPos--;
|
||||||
|
zt = selector[groupNo];
|
||||||
|
zn = minLens[zt];
|
||||||
|
zvec = bsR(zn);
|
||||||
|
while (zvec > limit[zt][zn]) {
|
||||||
|
zn++;
|
||||||
|
{
|
||||||
|
{
|
||||||
|
while (bsLive < 1) {
|
||||||
|
int zzi;
|
||||||
|
char thech = 0;
|
||||||
|
try {
|
||||||
|
thech = (char) bsStream.read();
|
||||||
|
} catch (IOException e) {
|
||||||
|
compressedStreamEOF();
|
||||||
|
}
|
||||||
|
zzi = thech;
|
||||||
|
bsBuff = (bsBuff << 8) | (zzi & 0xff);
|
||||||
|
bsLive += 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
zj = (bsBuff >> (bsLive - 1)) & 1;
|
||||||
|
bsLive--;
|
||||||
|
}
|
||||||
|
zvec = (zvec << 1) | zj;
|
||||||
|
}
|
||||||
|
nextSym = perm[zt][zvec - base[zt][zn]];
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupBlock() {
|
||||||
|
int[] cftab = new int[257];
|
||||||
|
char ch;
|
||||||
|
|
||||||
|
cftab[0] = 0;
|
||||||
|
for (i = 1; i <= 256; i++) {
|
||||||
|
cftab[i] = unzftab[i - 1];
|
||||||
|
}
|
||||||
|
for (i = 1; i <= 256; i++) {
|
||||||
|
cftab[i] += cftab[i - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i <= last; i++) {
|
||||||
|
ch = (char) ll8[i];
|
||||||
|
tt[cftab[ch]] = i;
|
||||||
|
cftab[ch]++;
|
||||||
|
}
|
||||||
|
cftab = null;
|
||||||
|
|
||||||
|
tPos = tt[origPtr];
|
||||||
|
|
||||||
|
count = 0;
|
||||||
|
i2 = 0;
|
||||||
|
ch2 = 256; /* not a char and not EOF */
|
||||||
|
|
||||||
|
if (blockRandomised) {
|
||||||
|
rNToGo = 0;
|
||||||
|
rTPos = 0;
|
||||||
|
setupRandPartA();
|
||||||
|
} else {
|
||||||
|
setupNoRandPartA();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupRandPartA() {
|
||||||
|
if (i2 <= last) {
|
||||||
|
chPrev = ch2;
|
||||||
|
ch2 = ll8[tPos];
|
||||||
|
tPos = tt[tPos];
|
||||||
|
if (rNToGo == 0) {
|
||||||
|
rNToGo = rNums[rTPos];
|
||||||
|
rTPos++;
|
||||||
|
if (rTPos == 512) {
|
||||||
|
rTPos = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rNToGo--;
|
||||||
|
ch2 ^= (int) ((rNToGo == 1) ? 1 : 0);
|
||||||
|
i2++;
|
||||||
|
|
||||||
|
currentChar = ch2;
|
||||||
|
currentState = RAND_PART_B_STATE;
|
||||||
|
mCrc.updateCRC(ch2);
|
||||||
|
} else {
|
||||||
|
endBlock();
|
||||||
|
initBlock();
|
||||||
|
setupBlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupNoRandPartA() {
|
||||||
|
if (i2 <= last) {
|
||||||
|
chPrev = ch2;
|
||||||
|
ch2 = ll8[tPos];
|
||||||
|
tPos = tt[tPos];
|
||||||
|
i2++;
|
||||||
|
|
||||||
|
currentChar = ch2;
|
||||||
|
currentState = NO_RAND_PART_B_STATE;
|
||||||
|
mCrc.updateCRC(ch2);
|
||||||
|
} else {
|
||||||
|
endBlock();
|
||||||
|
initBlock();
|
||||||
|
setupBlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupRandPartB() {
|
||||||
|
if (ch2 != chPrev) {
|
||||||
|
currentState = RAND_PART_A_STATE;
|
||||||
|
count = 1;
|
||||||
|
setupRandPartA();
|
||||||
|
} else {
|
||||||
|
count++;
|
||||||
|
if (count >= 4) {
|
||||||
|
z = ll8[tPos];
|
||||||
|
tPos = tt[tPos];
|
||||||
|
if (rNToGo == 0) {
|
||||||
|
rNToGo = rNums[rTPos];
|
||||||
|
rTPos++;
|
||||||
|
if (rTPos == 512) {
|
||||||
|
rTPos = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rNToGo--;
|
||||||
|
z ^= ((rNToGo == 1) ? 1 : 0);
|
||||||
|
j2 = 0;
|
||||||
|
currentState = RAND_PART_C_STATE;
|
||||||
|
setupRandPartC();
|
||||||
|
} else {
|
||||||
|
currentState = RAND_PART_A_STATE;
|
||||||
|
setupRandPartA();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupRandPartC() {
|
||||||
|
if (j2 < (int) z) {
|
||||||
|
currentChar = ch2;
|
||||||
|
mCrc.updateCRC(ch2);
|
||||||
|
j2++;
|
||||||
|
} else {
|
||||||
|
currentState = RAND_PART_A_STATE;
|
||||||
|
i2++;
|
||||||
|
count = 0;
|
||||||
|
setupRandPartA();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupNoRandPartB() {
|
||||||
|
if (ch2 != chPrev) {
|
||||||
|
currentState = NO_RAND_PART_A_STATE;
|
||||||
|
count = 1;
|
||||||
|
setupNoRandPartA();
|
||||||
|
} else {
|
||||||
|
count++;
|
||||||
|
if (count >= 4) {
|
||||||
|
z = ll8[tPos];
|
||||||
|
tPos = tt[tPos];
|
||||||
|
currentState = NO_RAND_PART_C_STATE;
|
||||||
|
j2 = 0;
|
||||||
|
setupNoRandPartC();
|
||||||
|
} else {
|
||||||
|
currentState = NO_RAND_PART_A_STATE;
|
||||||
|
setupNoRandPartA();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupNoRandPartC() {
|
||||||
|
if (j2 < (int) z) {
|
||||||
|
currentChar = ch2;
|
||||||
|
mCrc.updateCRC(ch2);
|
||||||
|
j2++;
|
||||||
|
} else {
|
||||||
|
currentState = NO_RAND_PART_A_STATE;
|
||||||
|
i2++;
|
||||||
|
count = 0;
|
||||||
|
setupNoRandPartA();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setDecompressStructureSizes(int newSize100k) {
|
||||||
|
if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k
|
||||||
|
&& blockSize100k <= 9)) {
|
||||||
|
// throw new IOException("Invalid block size");
|
||||||
|
}
|
||||||
|
|
||||||
|
blockSize100k = newSize100k;
|
||||||
|
|
||||||
|
if (newSize100k == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int n = baseBlockSize * newSize100k;
|
||||||
|
ll8 = new char[n];
|
||||||
|
tt = new int[n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
1665
src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java
Normal file
1665
src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java
Normal file
File diff suppressed because it is too large
Load Diff
167
src/main/java/org/apache/tools/bzip2/CRC.java
Normal file
167
src/main/java/org/apache/tools/bzip2/CRC.java
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
/*
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001-2002 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution, if
|
||||||
|
* any, must include the following acknowlegement:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowlegement may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowlegements normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Ant" and "Apache Software
|
||||||
|
* Foundation" must not be used to endorse or promote products derived
|
||||||
|
* from this software without prior written permission. For written
|
||||||
|
* permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache"
|
||||||
|
* nor may "Apache" appear in their names without prior written
|
||||||
|
* permission of the Apache Group.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This package is based on the work done by Keiron Liddle, Aftex Software
|
||||||
|
* <keiron@aftexsw.com> to whom the Ant project is very grateful for his
|
||||||
|
* great code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.tools.bzip2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple class the hold and calculate the CRC for sanity checking
|
||||||
|
* of the data.
|
||||||
|
*
|
||||||
|
* @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
|
||||||
|
*/
|
||||||
|
class CRC {
|
||||||
|
public static int crc32Table[] = {
|
||||||
|
0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
|
||||||
|
0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
|
||||||
|
0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
|
||||||
|
0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
|
||||||
|
0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
|
||||||
|
0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
|
||||||
|
0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
|
||||||
|
0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
|
||||||
|
0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
|
||||||
|
0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
|
||||||
|
0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81,
|
||||||
|
0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
|
||||||
|
0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49,
|
||||||
|
0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
|
||||||
|
0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
|
||||||
|
0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
|
||||||
|
0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
|
||||||
|
0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
|
||||||
|
0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
|
||||||
|
0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
|
||||||
|
0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
|
||||||
|
0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
|
||||||
|
0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066,
|
||||||
|
0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
|
||||||
|
0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
|
||||||
|
0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
|
||||||
|
0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
|
||||||
|
0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
|
||||||
|
0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
|
||||||
|
0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
|
||||||
|
0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686,
|
||||||
|
0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
|
||||||
|
0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
|
||||||
|
0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
|
||||||
|
0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
|
||||||
|
0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
|
||||||
|
0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
|
||||||
|
0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
|
||||||
|
0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
|
||||||
|
0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
|
||||||
|
0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7,
|
||||||
|
0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
|
||||||
|
0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f,
|
||||||
|
0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
|
||||||
|
0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
|
||||||
|
0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
|
||||||
|
0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
|
||||||
|
0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
|
||||||
|
0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
|
||||||
|
0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
|
||||||
|
0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
|
||||||
|
0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
|
||||||
|
0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30,
|
||||||
|
0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
|
||||||
|
0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
|
||||||
|
0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
|
||||||
|
0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
|
||||||
|
0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
|
||||||
|
0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
|
||||||
|
0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
|
||||||
|
0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0,
|
||||||
|
0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
|
||||||
|
0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
|
||||||
|
0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
|
||||||
|
};
|
||||||
|
|
||||||
|
public CRC() {
|
||||||
|
initialiseCRC();
|
||||||
|
}
|
||||||
|
|
||||||
|
void initialiseCRC() {
|
||||||
|
globalCrc = 0xffffffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getFinalCRC() {
|
||||||
|
return ~globalCrc;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getGlobalCRC() {
|
||||||
|
return globalCrc;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setGlobalCRC(int newCrc) {
|
||||||
|
globalCrc = newCrc;
|
||||||
|
}
|
||||||
|
|
||||||
|
void updateCRC(int inCh) {
|
||||||
|
int temp = (globalCrc >> 24) ^ inCh;
|
||||||
|
if (temp < 0) {
|
||||||
|
temp = 256 + temp;
|
||||||
|
}
|
||||||
|
globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp];
|
||||||
|
}
|
||||||
|
|
||||||
|
int globalCrc;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user