- cataloged all the licenses for the libraries Gridworks depends on
- added the secondstring libraries that contains all sorts of useful string distance functions - added a java arithmetic coding library (used to implement a string distance based on PPM arithmetic coding) - added the vicino kNN string clustering library (from MIT's SIMILE) git-svn-id: http://google-refine.googlecode.com/svn/trunk@181 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
5a0a8bea4f
commit
c07431fb88
@ -11,8 +11,10 @@
|
||||
<classpathentry kind="lib" path="lib/json-20100208.jar" sourcepath="lib-src/json-20100208-sources.jar"/>
|
||||
<classpathentry kind="lib" path="lib/icu4j-4.2.1.jar" sourcepath="lib-src/icu4j-4.2.1-sources.jar"/>
|
||||
<classpathentry kind="lib" path="lib/cos-20081226.jar" sourcepath="lib-src/cos-20081226-sources.jar"/>
|
||||
<classpathentry kind="lib" path="lib/arithcode-1.1.jar" sourcepath="lib-src/arithcode-1.1-sources.jar"/>
|
||||
<classpathentry kind="lib" path="lib/jdatapath-alpha2.jar" sourcepath="lib-src/jdatapath-alpha2-sources.jar"/>
|
||||
<classpathentry kind="lib" path="lib/secondstring-20100303.jar" sourcepath="lib-src/secondstring-20100303-sources.jar"/>
|
||||
<classpathentry kind="lib" path="lib/poi-3.6.jar"/>
|
||||
<classpathentry kind="lib" path="lib/poi-ooxml-3.6.jar"/>
|
||||
<classpathentry kind="lib" path="lib/jdatapath-alpha2.jar" sourcepath="lib-src/jdatapath-alpha2-sources.jar"/>
|
||||
<classpathentry kind="output" path="build/classes"/>
|
||||
</classpath>
|
||||
|
43
LICENSE.txt
43
LICENSE.txt
@ -1,4 +1,45 @@
|
||||
/*
|
||||
* (c) Copyright 2010 Metaweb Technologies, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
See the 'licenses' directory for a list of the licenses for the libraries we depend on,
|
||||
ordered here by license:
|
||||
|
||||
licenses/apache2.0.LICENSE.txt
|
||||
jetty
|
||||
jetty-util
|
||||
servlet-api
|
||||
commons-lang
|
||||
commons-codec
|
||||
log4j
|
||||
xmlbeans
|
||||
poi
|
||||
poi-ooxml
|
||||
poi-ooxml-schemas
|
||||
jdatapath
|
||||
|
||||
licenses/secondstring.LICENSE.txt (BSD family)
|
||||
secondstring
|
||||
|
||||
licenses/dom4j.LICENSE.txt (BSD family)
|
||||
dom4j
|
||||
|
||||
licenses/simile.LICENSE.txt (BSD family)
|
||||
vicino
|
||||
|
||||
licenses/arithcode.LICENSE.txt (BSD family)
|
||||
arithcode
|
||||
|
||||
licenses/icu4j.LICENSE.txt (MIT family)
|
||||
icu4j
|
||||
|
||||
licenses/slf4j.LICENSE.txt (MIT family)
|
||||
slf4j-api
|
||||
slf4j-log4j12
|
||||
jcl-over-slf4j
|
||||
|
||||
licenses/json.LICENSE.txt (MIT family)
|
||||
json
|
||||
|
||||
licenses/cos.LICENSE.txt
|
||||
cos
|
BIN
lib-src/arithcode-1.1-sources.jar
Normal file
BIN
lib-src/arithcode-1.1-sources.jar
Normal file
Binary file not shown.
BIN
lib-src/secondstring-20100303-sources.jar
Normal file
BIN
lib-src/secondstring-20100303-sources.jar
Normal file
Binary file not shown.
BIN
lib/arithcode-1.1.jar
Normal file
BIN
lib/arithcode-1.1.jar
Normal file
Binary file not shown.
BIN
lib/secondstring-20100303.jar
Normal file
BIN
lib/secondstring-20100303.jar
Normal file
Binary file not shown.
202
licenses/apache2.0.LICENSE.txt
Normal file
202
licenses/apache2.0.LICENSE.txt
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
30
licenses/arithcode.LICENSE.txt
Normal file
30
licenses/arithcode.LICENSE.txt
Normal file
@ -0,0 +1,30 @@
|
||||
Copyright (c) 2002, Bob Carpenter.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
|
||||
* Neither the name of colloquial.com nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
82
licenses/cos.LICENSE.txt
Normal file
82
licenses/cos.LICENSE.txt
Normal file
@ -0,0 +1,82 @@
|
||||
Copyright (C) 2001-2009 by Jason Hunter, jhunter@servlets.com.
|
||||
All rights reserved.
|
||||
|
||||
The source code, object code, and documentation in the com.oreilly.servlet
|
||||
package is copyright and owned by Jason Hunter.
|
||||
|
||||
|
||||
ON-SITE USE RIGHTS
|
||||
|
||||
Permission is granted to use the com.oreilly.servlet.* packages in the
|
||||
development of any *non-commercial* project. For this use you are granted
|
||||
a non-exclusive, non-transferable limited license at no cost.
|
||||
|
||||
For a *commercial* project, permission is granted to use the
|
||||
com.oreilly.servlet.* packages provided that every person on the development
|
||||
team for that project owns a copy of the book Java Servlet Programming
|
||||
(O'Reilly) in its most recent edition. The most recent edition is currently
|
||||
the 2nd Edition, available in association with Amazon.com at
|
||||
http://www.amazon.com/exec/obidos/ASIN/0596000405/jasonhunter.
|
||||
|
||||
Other (sometimes cheaper) license terms are available upon request; please
|
||||
write to jhunter@servlets.com for more information.
|
||||
|
||||
|
||||
REDISTRIBUTION RIGHTS
|
||||
|
||||
Commercial redistribution rights of the com.oreilly.servlet.* packages are
|
||||
available by writing jhunter@servlets.com.
|
||||
|
||||
Non-commercial redistribution is permitted provided that:
|
||||
|
||||
1. You redistribute the package in object code form only (as Java .class files
|
||||
or a .jar file containing the .class files) and only as part of a product that
|
||||
uses the classes as part of its primary functionality.
|
||||
|
||||
2. The product containing the package is non-commercial in nature.
|
||||
|
||||
3. The public interface to the classes in the package, and the public
|
||||
interface to any classes with similar functionality, is hidden from end users
|
||||
when engaged in normal use of the product.
|
||||
|
||||
4. The distribution is not part of a software development kit, operating
|
||||
system, other library, or a development tool without written permission from
|
||||
the copyright holder.
|
||||
|
||||
5. The distribution includes copyright notice as follows: "The source code,
|
||||
object code, and documentation in the com.oreilly.servlet package is copyright
|
||||
and owned by Jason Hunter." in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
6. You reproduce the above copyright notice, this list of conditions, and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
7. Licensor retains title to and ownership of the Software and all
|
||||
enhancements, modifications, and updates to the Software.
|
||||
|
||||
Note that the com.oreilly.servlet package is provided "as is" and the author
|
||||
will not be liable for any damages suffered as a result of your use.
|
||||
Furthermore, you understand the package comes without any guarantee of
|
||||
technical support.
|
||||
|
||||
You can always find the latest version of the com.oreilly.servlet package at
|
||||
http://www.servlets.com.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
Thanks,
|
||||
|
||||
Jason Hunter
|
||||
jhunter AT servlets.com
|
27
licenses/dom4j.LICENSE.txt
Normal file
27
licenses/dom4j.LICENSE.txt
Normal file
@ -0,0 +1,27 @@
|
||||
Redistribution and use of this software and associated documentation ("Software"),
|
||||
with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain copyright statements and notices.
|
||||
Redistributions must also contain a copy of this document.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or other
|
||||
materials provided with the distribution.
|
||||
3. The name "DOM4J" must not be used to endorse or promote products derived from this
|
||||
Software without prior written permission of MetaStuff, Ltd. For written permission,
|
||||
please contact dom4j-info@metastuff.com.
|
||||
4. Products derived from this Software may not be called "DOM4J" nor may "DOM4J"
|
||||
appear in their names without prior written permission of MetaStuff, Ltd. DOM4J is a
|
||||
registered trademark of MetaStuff, Ltd.
|
||||
5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
||||
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
|
28
licenses/icu4j.LICENSE.txt
Normal file
28
licenses/icu4j.LICENSE.txt
Normal file
@ -0,0 +1,28 @@
|
||||
ICU License - ICU 1.8.1 and later
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright (c) 1995-2009 International Business Machines Corporation and others
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
software and associated documentation files (the "Software"), to deal in the Software
|
||||
without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, and/or sell copies of the Software, and to permit persons to whom
|
||||
the Software is furnished to do so, provided that the above copyright notice(s) and this
|
||||
permission notice appear in all copies of the Software and that both the above copyright
|
||||
notice(s) and this permission notice appear in supporting documentation.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
|
||||
INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
|
||||
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder shall not be used
|
||||
in advertising or otherwise to promote the sale, use or other dealings in this Software
|
||||
without prior written authorization of the copyright holder.
|
||||
|
21
licenses/json.LICENSE.txt
Normal file
21
licenses/json.LICENSE.txt
Normal file
@ -0,0 +1,21 @@
|
||||
Copyright (c) 2002 JSON.org
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
The Software shall be used for Good, not Evil.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
41
licenses/secondstring.LICENSE.txt
Normal file
41
licenses/secondstring.LICENSE.txt
Normal file
@ -0,0 +1,41 @@
|
||||
Copyright (c) 2003 Carnegie Mellon University
|
||||
All rights reserved.
|
||||
Developed by: Center for Automated Learning and Discovery
|
||||
Carnegie Mellon University
|
||||
http://www.cald.cs.cmu.edu
|
||||
|
||||
The design and implementation of this software was supported in
|
||||
part by National Science Foundation Grant No. EIA-0131884 to the
|
||||
National Institute of Statistical Sciences, and by a contract
|
||||
from the Army Research Office to the Center for Computer and
|
||||
Communications Security with Carnegie Mellon University.
|
||||
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal with the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimers.
|
||||
Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimers in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
Neither the names of the Center for Automated Learning and Discovery,
|
||||
or Carnegie Mellon University, nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this Software
|
||||
without specific prior written permission. THE SOFTWARE IS PROVIDED
|
||||
"AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||
BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS WITH THE SOFTWARE.
|
||||
|
||||
[This is an instance of the University of Illinois/NCSA Open Source
|
||||
agreement, obtained from http://www.opensource.org/licenses/UoI-NCSA.php]
|
||||
|
29
licenses/simile.LICENSE.txt
Normal file
29
licenses/simile.LICENSE.txt
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* (c) Copyright Massachusetts Institute of Technology & Contributors.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
22
licenses/slf4j.LICENSE.txt
Normal file
22
licenses/slf4j.LICENSE.txt
Normal file
@ -0,0 +1,22 @@
|
||||
Copyright (c) 2004-2008 QOS.ch
|
||||
All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
7
src/main/java/edu/mit/simile/vicino/Distance.java
Normal file
7
src/main/java/edu/mit/simile/vicino/Distance.java
Normal file
@ -0,0 +1,7 @@
|
||||
package edu.mit.simile.vicino;
|
||||
|
||||
public interface Distance {
|
||||
|
||||
public float d(String x, String y);
|
||||
|
||||
}
|
61
src/main/java/edu/mit/simile/vicino/Distributor.java
Normal file
61
src/main/java/edu/mit/simile/vicino/Distributor.java
Normal file
@ -0,0 +1,61 @@
|
||||
package edu.mit.simile.vicino;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import edu.mit.simile.vicino.Distance;
|
||||
|
||||
public class Distributor extends Operator {
|
||||
|
||||
private static final int COLUMNS = 70;
|
||||
private static final char CHAR = '=';
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
Distance d = getDistance(args[0]);
|
||||
|
||||
List<String> strings = getStrings(args[1]);
|
||||
|
||||
int buckets = Integer.parseInt(args[2]);
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
int[] values = new int[buckets];
|
||||
|
||||
int size = strings.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
String x = (String) strings.get((int) (Math.random() * size));
|
||||
String y = (String) strings.get((int) (Math.random() * size));
|
||||
int dist = (int) (buckets * d.d(x, y));
|
||||
values[dist]++;
|
||||
System.out.print(".");
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
long stop = System.currentTimeMillis();
|
||||
float m = ((float) (stop - start)) / (float) size;
|
||||
|
||||
int maxValue = 0;
|
||||
for (int i = 0; i < buckets; i++) {
|
||||
if (values[i] > maxValue) {
|
||||
maxValue = values[i];
|
||||
}
|
||||
}
|
||||
|
||||
System.out
|
||||
.println("+-------------------------------------------------------------------");
|
||||
for (int i = 0; i < buckets; i++) {
|
||||
System.out.println("|" + bar(COLUMNS * values[i] / maxValue));
|
||||
}
|
||||
System.out
|
||||
.println("+-------------------------------------------------------------------");
|
||||
|
||||
System.out.println("\n Each distance calculation took: " + m + " millis");
|
||||
}
|
||||
|
||||
static private String bar(int value) {
|
||||
StringBuffer b = new StringBuffer(value);
|
||||
for (int i = 0; i < value; i++) {
|
||||
b.append(CHAR);
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
}
|
10
src/main/java/edu/mit/simile/vicino/Meter.java
Normal file
10
src/main/java/edu/mit/simile/vicino/Meter.java
Normal file
@ -0,0 +1,10 @@
|
||||
package edu.mit.simile.vicino;
|
||||
|
||||
public class Meter extends Operator {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Distance d = getDistance(args[0]);
|
||||
System.out.println(args[1] + " <- " + d.d(args[1], args[2]) + " -> " + args[2]);
|
||||
}
|
||||
|
||||
}
|
47
src/main/java/edu/mit/simile/vicino/Operator.java
Normal file
47
src/main/java/edu/mit/simile/vicino/Operator.java
Normal file
@ -0,0 +1,47 @@
|
||||
package edu.mit.simile.vicino;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class Operator {
|
||||
|
||||
static void log(String msg) {
|
||||
System.out.println(msg);
|
||||
}
|
||||
|
||||
static Distance getDistance(String distance) throws Exception {
|
||||
return (Distance) Class.forName("edu.mit.simile.vicino.distances." + distance + "Distance").newInstance();
|
||||
}
|
||||
|
||||
static List<String> getStrings(String fileName) throws IOException {
|
||||
ArrayList<String> strings = new ArrayList<String>();
|
||||
|
||||
File file = new File(fileName);
|
||||
if (file.isDirectory()) {
|
||||
File[] files = file.listFiles();
|
||||
for (int i = 0; i < files.length; i++) {
|
||||
BufferedReader input = new BufferedReader(new FileReader(files[i]));
|
||||
StringBuffer b = new StringBuffer();
|
||||
String line;
|
||||
while ((line = input.readLine()) != null) {
|
||||
b.append(line.trim());
|
||||
}
|
||||
input.close();
|
||||
strings.add(b.toString());
|
||||
}
|
||||
} else {
|
||||
BufferedReader input = new BufferedReader(new FileReader(fileName));
|
||||
String line;
|
||||
while ((line = input.readLine()) != null) {
|
||||
strings.add(line.trim());
|
||||
}
|
||||
input.close();
|
||||
}
|
||||
|
||||
return strings;
|
||||
}
|
||||
}
|
52
src/main/java/edu/mit/simile/vicino/Seeker.java
Normal file
52
src/main/java/edu/mit/simile/vicino/Seeker.java
Normal file
@ -0,0 +1,52 @@
|
||||
package edu.mit.simile.vicino;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Serializable;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import edu.mit.simile.vicino.vptree.VPTree;
|
||||
import edu.mit.simile.vicino.vptree.VPTreeBuilder;
|
||||
import edu.mit.simile.vicino.vptree.VPTreeSeeker;
|
||||
|
||||
public class Seeker extends Operator {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Distance d = getDistance(args[0]);
|
||||
|
||||
log("Working with distance: " + d);
|
||||
List<String> strings = getStrings(args[1]);
|
||||
log("Obtained " + strings.size() + " from " + args[1]);
|
||||
|
||||
log("Building VPTree...");
|
||||
VPTreeBuilder builder = new VPTreeBuilder(d);
|
||||
VPTree tree = builder.buildVPTree(strings);
|
||||
log("..done");
|
||||
|
||||
VPTreeSeeker seeker = new VPTreeSeeker(d, tree);
|
||||
|
||||
log("type a string|range then hit return:");
|
||||
BufferedReader input = new BufferedReader(new InputStreamReader(
|
||||
System.in));
|
||||
String line = null;
|
||||
while ((line = input.readLine()) != null) {
|
||||
int index = line.indexOf('|');
|
||||
String query = line.substring(0, index);
|
||||
float range = Float.parseFloat(line.substring(index + 1));
|
||||
long start = System.currentTimeMillis();
|
||||
List<? extends Serializable> results = seeker.range(query, range);
|
||||
long stop = System.currentTimeMillis();
|
||||
Iterator<? extends Serializable> j = results.iterator();
|
||||
if (j.hasNext()) {
|
||||
while (j.hasNext()) {
|
||||
String r = (String) j.next();
|
||||
log(" " + r);
|
||||
}
|
||||
log(" [done in " + (stop - start) + "ms]");
|
||||
} else {
|
||||
log(" [no results found in " + (stop - start) + "ms]");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
50
src/main/java/edu/mit/simile/vicino/Tester.java
Normal file
50
src/main/java/edu/mit/simile/vicino/Tester.java
Normal file
@ -0,0 +1,50 @@
|
||||
package edu.mit.simile.vicino;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class Tester extends Operator {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Distance d = getDistance(args[0]);
|
||||
|
||||
List<String> strings = getStrings(args[1]);
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
int size = strings.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
String x = (String) strings.get((int) (Math.random() * size));
|
||||
String y = (String) strings.get((int) (Math.random() * size));
|
||||
String z = (String) strings.get((int) (Math.random() * size));
|
||||
boolean metric = metric(x, y, z, d);
|
||||
if (metric) {
|
||||
System.out.println("metric");
|
||||
} else {
|
||||
System.out.println("***** NOT METRIC *****");
|
||||
}
|
||||
}
|
||||
|
||||
long stop = System.currentTimeMillis();
|
||||
float m = ((float) (stop - start)) / (float) size;
|
||||
|
||||
System.out.println("\n Each metric evaluation took: " + m + " millis");
|
||||
}
|
||||
|
||||
static boolean metric(String x, String y, String z, Distance d) {
|
||||
float dxx = d.d(x, x);
|
||||
boolean identity = (dxx == 0.0f);
|
||||
float dxy = d.d(x, y);
|
||||
float dyx = d.d(y, x);
|
||||
boolean simmetrical = (dxy == dyx);
|
||||
float dxz = d.d(x, z);
|
||||
float dyz = d.d(y, z);
|
||||
boolean triangular = (dxz <= dxy + dyz);
|
||||
return (identity && simmetrical && triangular);
|
||||
}
|
||||
|
||||
static Distance getDistance(String distance) throws Exception {
|
||||
return (Distance) Class.forName(
|
||||
"edu.mit.simile.vicino.distances." + distance + "Distance")
|
||||
.newInstance();
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
package edu.mit.simile.vicino.distances;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.tools.bzip2.CBZip2OutputStream;
|
||||
|
||||
public class BZip2Distance extends PseudoMetricDistance {
|
||||
|
||||
public float d2(String x, String y) {
|
||||
String str = x + y;
|
||||
float result = 0.0f;
|
||||
try {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length());
|
||||
CBZip2OutputStream os = new CBZip2OutputStream(baos);
|
||||
os.write(str.getBytes());
|
||||
os.close();
|
||||
baos.close();
|
||||
result = baos.toByteArray().length;
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
package edu.mit.simile.vicino.distances;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
public class GZipDistance extends PseudoMetricDistance {
|
||||
|
||||
public float d2(String x, String y) {
|
||||
String str = x + y;
|
||||
float result = 0.0f;
|
||||
try {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length());
|
||||
GZIPOutputStream os = new GZIPOutputStream(baos);
|
||||
os.write(str.getBytes());
|
||||
os.close();
|
||||
baos.close();
|
||||
result = baos.toByteArray().length;
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package edu.mit.simile.vicino.distances;
|
||||
|
||||
import com.wcohen.ss.Jaccard;
|
||||
import com.wcohen.ss.api.StringDistance;
|
||||
|
||||
public class JaccardDistance extends MetricDistance {
|
||||
|
||||
StringDistance distance;
|
||||
|
||||
public JaccardDistance() {
|
||||
this.distance = new Jaccard();
|
||||
}
|
||||
|
||||
protected float d2(String x, String y) {
|
||||
return Math.abs((float) this.distance.score(x, y) - 1.0f);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package edu.mit.simile.vicino.distances;
|
||||
|
||||
import com.wcohen.ss.Jaro;
|
||||
import com.wcohen.ss.api.StringDistance;
|
||||
|
||||
public class JaroDistance extends MetricDistance {
|
||||
|
||||
StringDistance distance;
|
||||
|
||||
public JaroDistance() {
|
||||
this.distance = new Jaro();
|
||||
}
|
||||
|
||||
protected float d2(String x, String y) {
|
||||
return Math.abs((float) this.distance.score(x, y) - 1.0f);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package edu.mit.simile.vicino.distances;
|
||||
|
||||
import com.wcohen.ss.Levenstein;
|
||||
import com.wcohen.ss.api.StringDistance;
|
||||
|
||||
import edu.mit.simile.vicino.Distance;
|
||||
|
||||
public class LevensteinDistance implements Distance {
|
||||
|
||||
StringDistance distance;
|
||||
|
||||
public LevensteinDistance() {
|
||||
this.distance = new Levenstein();
|
||||
}
|
||||
|
||||
public float d(String x, String y) {
|
||||
float d = Math.abs((float) this.distance.score(x, y));
|
||||
// System.out.println(this.distance.explainScore(x,y));
|
||||
return d / (x.length() + y.length());
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package edu.mit.simile.vicino.distances;
|
||||
|
||||
import edu.mit.simile.vicino.Distance;
|
||||
|
||||
public abstract class MetricDistance implements Distance {
|
||||
|
||||
/*
|
||||
* public float d(String x,String y) { float dxy = d2(x,y); float dx =
|
||||
* d2(x,""); float dy = d2(y,""); float result = dxy / (dx + dy); return
|
||||
* result; }
|
||||
*/
|
||||
|
||||
public float d(String x, String y) {
|
||||
return d2(x, y);
|
||||
}
|
||||
|
||||
abstract float d2(String x, String y);
|
||||
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
package edu.mit.simile.vicino.distances;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import com.colloquial.arithcode.ArithCodeOutputStream;
|
||||
import com.colloquial.arithcode.PPMModel;
|
||||
|
||||
public class PPMDistance extends PseudoMetricDistance {
|
||||
|
||||
public float d2(String x, String y) {
|
||||
String str = x + y;
|
||||
float result = 0.0f;
|
||||
try {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length());
|
||||
ArithCodeOutputStream os = new ArithCodeOutputStream(baos,new PPMModel(8));
|
||||
os.write(str.getBytes());
|
||||
os.close();
|
||||
baos.close();
|
||||
result = baos.toByteArray().length;
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
package edu.mit.simile.vicino.distances;
|
||||
|
||||
import edu.mit.simile.vicino.Distance;
|
||||
|
||||
public abstract class PseudoMetricDistance implements Distance {
|
||||
|
||||
public float d(String x, String y) {
|
||||
float cxx = d2(x, x);
|
||||
float cyy = d2(y, y);
|
||||
float cxy = d2(x, y);
|
||||
float cyx = d2(y, x);
|
||||
float result1 = (cxy + cyx) / (cxx + cyy) - 1.0f;
|
||||
return result1;
|
||||
}
|
||||
|
||||
protected abstract float d2(String x, String y);
|
||||
}
|
42
src/main/java/edu/mit/simile/vicino/vptree/Node.java
Executable file
42
src/main/java/edu/mit/simile/vicino/vptree/Node.java
Executable file
@ -0,0 +1,42 @@
|
||||
package edu.mit.simile.vicino.vptree;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* This class represent a couple (Object, distance) value of that Object from
|
||||
* the Vp in each step of the algorithm.
|
||||
*
|
||||
* @author Paolo Ciccarese
|
||||
*/
|
||||
public class Node implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = -2077473220894258550L;
|
||||
|
||||
private final Serializable obj;
|
||||
private float distance;
|
||||
|
||||
public Node(Serializable obj, int i) {
|
||||
this.obj = obj;
|
||||
this.distance = i;
|
||||
}
|
||||
|
||||
public Node(Serializable obj) {
|
||||
this.obj = obj;
|
||||
}
|
||||
|
||||
public Serializable get() {
|
||||
return this.obj;
|
||||
}
|
||||
|
||||
public void setDistance(float distance) {
|
||||
this.distance = distance;
|
||||
}
|
||||
|
||||
public float getDistance() {
|
||||
return distance;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return obj.toString();
|
||||
}
|
||||
}
|
94
src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java
Executable file
94
src/main/java/edu/mit/simile/vicino/vptree/NodeSorter.java
Executable file
@ -0,0 +1,94 @@
|
||||
package edu.mit.simile.vicino.vptree;
|
||||
|
||||
public class NodeSorter {
|
||||
|
||||
/**
|
||||
* Sort array of Objects using the QuickSort algorithm.
|
||||
*
|
||||
* @param s
|
||||
* An Object[].
|
||||
* @param lo
|
||||
* The current lower bound.
|
||||
* @param hi
|
||||
* The current upper bound.
|
||||
*/
|
||||
public static void sort(Node nodes[], int lo, int hi) {
|
||||
if (lo >= hi) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use median-of-three(lo, mid, hi) to pick a partition. Also swap them
|
||||
* into relative order while we are at it.
|
||||
*/
|
||||
int mid = (lo + hi) / 2;
|
||||
|
||||
if (nodes[lo].getDistance() > nodes[mid].getDistance()) {
|
||||
// Swap.
|
||||
Node tmp = nodes[lo];
|
||||
nodes[lo] = nodes[mid];
|
||||
nodes[mid] = tmp;
|
||||
}
|
||||
|
||||
if (nodes[mid].getDistance() > nodes[hi].getDistance()) {
|
||||
// Swap .
|
||||
Node tmp = nodes[mid];
|
||||
nodes[mid] = nodes[hi];
|
||||
nodes[hi] = tmp;
|
||||
|
||||
if (nodes[lo].getDistance() > nodes[mid].getDistance()) {
|
||||
// Swap.
|
||||
Node tmp2 = nodes[lo];
|
||||
nodes[lo] = nodes[mid];
|
||||
nodes[mid] = tmp2;
|
||||
}
|
||||
}
|
||||
|
||||
// Start one past lo since already handled lo.
|
||||
|
||||
int left = lo + 1;
|
||||
|
||||
// Similarly, end one before hi since already handled hi.
|
||||
|
||||
int right = hi - 1;
|
||||
|
||||
// If there are three or fewer elements, we are done.
|
||||
|
||||
if (left >= right) {
|
||||
return;
|
||||
}
|
||||
|
||||
Node partition = nodes[mid];
|
||||
|
||||
while (true) {
|
||||
while (nodes[right].getDistance() > partition.getDistance()) {
|
||||
--right;
|
||||
}
|
||||
|
||||
while (left < right && nodes[left].getDistance() <= partition.getDistance()) {
|
||||
++left;
|
||||
}
|
||||
|
||||
if (left < right) {
|
||||
// Swap.
|
||||
Node tmp = nodes[left];
|
||||
nodes[left] = nodes[right];
|
||||
nodes[right] = tmp;
|
||||
|
||||
--right;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sort(nodes, lo, left);
|
||||
sort(nodes, left + 1, hi);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts and array of objects.
|
||||
*/
|
||||
public void sort(Node nodes[]) {
|
||||
NodeSorter.sort(nodes, 0, nodes.length - 1);
|
||||
}
|
||||
}
|
52
src/main/java/edu/mit/simile/vicino/vptree/TNode.java
Executable file
52
src/main/java/edu/mit/simile/vicino/vptree/TNode.java
Executable file
@ -0,0 +1,52 @@
|
||||
package edu.mit.simile.vicino.vptree;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author Paolo Ciccarese
|
||||
*/
|
||||
public class TNode implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = -217604190976851241L;
|
||||
|
||||
private final Serializable obj;
|
||||
private float median;
|
||||
private TNode left;
|
||||
private TNode right;
|
||||
|
||||
/**
|
||||
* The Object will be fixed during the instantiation of the node, while the
|
||||
* children will be defined in another iteration of the algorithm,
|
||||
*/
|
||||
public TNode(Serializable obj) {
|
||||
this.obj = obj;
|
||||
}
|
||||
|
||||
public Serializable get() {
|
||||
return this.obj;
|
||||
}
|
||||
|
||||
public void setMedian(float median) {
|
||||
this.median = median;
|
||||
}
|
||||
|
||||
public float getMedian() {
|
||||
return median;
|
||||
}
|
||||
|
||||
public void setLeft(TNode leftNode) {
|
||||
this.left = leftNode;
|
||||
}
|
||||
|
||||
public TNode getLeft() {
|
||||
return left;
|
||||
}
|
||||
|
||||
public void setRight(TNode rightNode) {
|
||||
this.right = rightNode;
|
||||
}
|
||||
|
||||
public TNode getRight() {
|
||||
return right;
|
||||
}
|
||||
}
|
33
src/main/java/edu/mit/simile/vicino/vptree/VPTree.java
Executable file
33
src/main/java/edu/mit/simile/vicino/vptree/VPTree.java
Executable file
@ -0,0 +1,33 @@
|
||||
package edu.mit.simile.vicino.vptree;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* The VPTree class.
|
||||
*
|
||||
* @author Paolo Ciccarese
|
||||
*/
|
||||
public class VPTree implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 1291056732155841123L;
|
||||
|
||||
private TNode root;
|
||||
|
||||
/**
|
||||
* Sets the root of the VPTree.
|
||||
*
|
||||
* @param root The VPTree root.
|
||||
*/
|
||||
public void setRoot(TNode root) {
|
||||
this.root = root;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the root of the VPTree.
|
||||
*
|
||||
* @return The VPTree root.
|
||||
*/
|
||||
public TNode getRoot() {
|
||||
return root;
|
||||
}
|
||||
}
|
101
src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java
Executable file
101
src/main/java/edu/mit/simile/vicino/vptree/VPTreeBuilder.java
Executable file
@ -0,0 +1,101 @@
|
||||
package edu.mit.simile.vicino.vptree;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Random;
|
||||
|
||||
import edu.mit.simile.vicino.Distance;
|
||||
|
||||
/**
|
||||
* @author Paolo Ciccarese
|
||||
* @author Stefano Mazzocchi
|
||||
*/
|
||||
public class VPTreeBuilder {
|
||||
|
||||
private static final boolean DEBUG = false;
|
||||
|
||||
private Random generator = new Random(System.currentTimeMillis());
|
||||
|
||||
private VPTree tree;
|
||||
private final Distance distance;
|
||||
|
||||
/**
|
||||
* Defines a VPTree Builder for a specific distance.
|
||||
*
|
||||
* @param distance The class implementing the distance.
|
||||
*/
|
||||
public VPTreeBuilder(Distance distance) {
|
||||
this.distance = distance;
|
||||
}
|
||||
|
||||
public VPTree buildVPTree(Collection<? extends Serializable> col) {
|
||||
Node nodes[] = new Node[col.size()];
|
||||
Iterator<? extends Serializable> i = col.iterator();
|
||||
int counter = 0;
|
||||
while (i.hasNext()) {
|
||||
Serializable s = (Serializable) i.next();
|
||||
nodes[counter++] = new Node(s);
|
||||
}
|
||||
|
||||
tree = new VPTree();
|
||||
tree.setRoot(addNode(nodes, 0, nodes.length - 1));
|
||||
return tree;
|
||||
}
|
||||
|
||||
private TNode addNode(Node nodes[], int begin, int end) {
|
||||
|
||||
int delta = end - begin;
|
||||
int middle = begin + delta / 2;
|
||||
|
||||
TNode node = new TNode(nodes[begin + getRandomIndex(delta)].get());
|
||||
|
||||
if (DEBUG) System.out.println("\nnode: " + node.get().toString());
|
||||
|
||||
calculateDistances(node, nodes, begin, end);
|
||||
orderDistances(nodes, begin, end);
|
||||
|
||||
if (DEBUG) {
|
||||
for (int i = begin; i <= end; i++) {
|
||||
System.out.println(" +-- " + nodes[i].getDistance() + " --> " + nodes[i].get());
|
||||
}
|
||||
}
|
||||
|
||||
if (delta + 1 > 0) {
|
||||
if (middle - (begin + 1) >= 1) {
|
||||
node.setLeft(addNode(nodes, begin + 1, middle));
|
||||
if (DEBUG) System.out.println(" L --> " + node.getLeft().get());
|
||||
} else if (middle - (begin + 1) == 0) {
|
||||
node.setLeft(new TNode(nodes[middle].get()));
|
||||
if (DEBUG) System.out.println(" L --> " + node.getLeft().get());
|
||||
}
|
||||
|
||||
if ((end - (middle + 1)) >= 1) {
|
||||
node.setRight(addNode(nodes, middle + 1, end));
|
||||
if (DEBUG) System.out.println(" R --> " + node.getRight().get());
|
||||
} else if (end - (middle + 1) == 0) {
|
||||
node.setRight(new TNode(nodes[middle + 1].get()));
|
||||
if (DEBUG) System.out.println(" R --> " + node.getRight().get());
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private void calculateDistances(TNode pivot, Node nodes[], int begin, int end) {
|
||||
for (int i = begin; i <= end; i++) {
|
||||
Object x = pivot.get();
|
||||
Object y = nodes[i].get();
|
||||
float d = (x == y) ? 0.0f : distance.d(x.toString(), y.toString());
|
||||
nodes[i].setDistance(d);
|
||||
}
|
||||
}
|
||||
|
||||
private void orderDistances(Node nodes[], int begin, int end) {
|
||||
NodeSorter.sort(nodes, begin, end);
|
||||
}
|
||||
|
||||
private int getRandomIndex(int max) {
|
||||
return generator.nextInt(max);
|
||||
}
|
||||
}
|
48
src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java
Executable file
48
src/main/java/edu/mit/simile/vicino/vptree/VPTreeSeeker.java
Executable file
@ -0,0 +1,48 @@
|
||||
package edu.mit.simile.vicino.vptree;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import edu.mit.simile.vicino.Distance;
|
||||
|
||||
/**
|
||||
* @author Paolo Ciccarese
|
||||
*/
|
||||
public class VPTreeSeeker {
|
||||
|
||||
VPTree tree;
|
||||
Distance distance;
|
||||
|
||||
public VPTreeSeeker(Distance distance, VPTree tree) {
|
||||
this.distance = distance;
|
||||
this.tree = tree;
|
||||
}
|
||||
|
||||
public List<? extends Serializable> range(Object query, float range) {
|
||||
return rangeTraversal(query, range, tree.getRoot(), new ArrayList<Serializable>());
|
||||
}
|
||||
|
||||
private List<Serializable> rangeTraversal(Object query, float range, TNode tNode, List<Serializable> results) {
|
||||
|
||||
if (tNode != null) {
|
||||
float distance = this.distance.d(query.toString(), tNode.toString());
|
||||
|
||||
if (distance < range) {
|
||||
results.add(tNode.get());
|
||||
}
|
||||
|
||||
if ((distance + range) < tNode.getMedian()) {
|
||||
rangeTraversal(query, range, tNode.getLeft(), results);
|
||||
} else if ((distance - range) > tNode.getMedian()) {
|
||||
rangeTraversal(query, range, tNode.getRight(), results);
|
||||
} else {
|
||||
rangeTraversal(query, range, tNode.getLeft(), results);
|
||||
rangeTraversal(query, range, tNode.getRight(), results);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
}
|
136
src/main/java/org/apache/tools/bzip2/BZip2Constants.java
Normal file
136
src/main/java/org/apache/tools/bzip2/BZip2Constants.java
Normal file
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution, if
|
||||
* any, must include the following acknowlegement:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowlegement may appear in the software itself,
|
||||
* if and wherever such third-party acknowlegements normally appear.
|
||||
*
|
||||
* 4. The names "Ant" and "Apache Software
|
||||
* Foundation" must not be used to endorse or promote products derived
|
||||
* from this software without prior written permission. For written
|
||||
* permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache"
|
||||
* nor may "Apache" appear in their names without prior written
|
||||
* permission of the Apache Group.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This package is based on the work done by Keiron Liddle, Aftex Software
|
||||
* <keiron@aftexsw.com> to whom the Ant project is very grateful for his
|
||||
* great code.
|
||||
*/
|
||||
|
||||
package org.apache.tools.bzip2;
|
||||
|
||||
/**
|
||||
* Base class for both the compress and decompress classes.
|
||||
* Holds common arrays, and static data.
|
||||
*
|
||||
* @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
|
||||
*/
|
||||
public interface BZip2Constants {
|
||||
|
||||
int baseBlockSize = 100000;
|
||||
int MAX_ALPHA_SIZE = 258;
|
||||
int MAX_CODE_LEN = 23;
|
||||
int RUNA = 0;
|
||||
int RUNB = 1;
|
||||
int N_GROUPS = 6;
|
||||
int G_SIZE = 50;
|
||||
int N_ITERS = 4;
|
||||
int MAX_SELECTORS = (2 + (900000 / G_SIZE));
|
||||
int NUM_OVERSHOOT_BYTES = 20;
|
||||
|
||||
int[] rNums = {
|
||||
619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
|
||||
985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
|
||||
733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
|
||||
419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
|
||||
878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
|
||||
862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
|
||||
150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
|
||||
170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
|
||||
73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
|
||||
909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
|
||||
641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
|
||||
161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
|
||||
382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
|
||||
98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
|
||||
227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
|
||||
469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
|
||||
184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
|
||||
715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
|
||||
951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
|
||||
652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
|
||||
645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
|
||||
609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
|
||||
653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
|
||||
411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
|
||||
170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
|
||||
857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
|
||||
669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
|
||||
944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
|
||||
344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
|
||||
897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
|
||||
433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
|
||||
686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
|
||||
946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
|
||||
978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
|
||||
680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
|
||||
707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
|
||||
297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
|
||||
134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
|
||||
343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
|
||||
140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
|
||||
170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
|
||||
369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
|
||||
804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
|
||||
896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
|
||||
661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
|
||||
768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
|
||||
61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
|
||||
372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
|
||||
780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
|
||||
920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
|
||||
645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
|
||||
936, 638
|
||||
};
|
||||
}
|
865
src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java
Normal file
865
src/main/java/org/apache/tools/bzip2/CBZip2InputStream.java
Normal file
@ -0,0 +1,865 @@
|
||||
/*
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution, if
|
||||
* any, must include the following acknowlegement:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowlegement may appear in the software itself,
|
||||
* if and wherever such third-party acknowlegements normally appear.
|
||||
*
|
||||
* 4. The names "Ant" and "Apache Software
|
||||
* Foundation" must not be used to endorse or promote products derived
|
||||
* from this software without prior written permission. For written
|
||||
* permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache"
|
||||
* nor may "Apache" appear in their names without prior written
|
||||
* permission of the Apache Group.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This package is based on the work done by Keiron Liddle, Aftex Software
|
||||
* <keiron@aftexsw.com> to whom the Ant project is very grateful for his
|
||||
* great code.
|
||||
*/
|
||||
package org.apache.tools.bzip2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* An input stream that decompresses from the BZip2 format (without the file
|
||||
* header chars) to be read as any other stream.
|
||||
*
|
||||
* @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
|
||||
*/
|
||||
public class CBZip2InputStream extends InputStream implements BZip2Constants {
|
||||
private static void cadvise() {
|
||||
System.out.println("CRC Error");
|
||||
//throw new CCoruptionError();
|
||||
}
|
||||
|
||||
private static void compressedStreamEOF() {
|
||||
cadvise();
|
||||
}
|
||||
|
||||
private void makeMaps() {
|
||||
int i;
|
||||
nInUse = 0;
|
||||
for (i = 0; i < 256; i++) {
|
||||
if (inUse[i]) {
|
||||
seqToUnseq[nInUse] = (char) i;
|
||||
unseqToSeq[i] = (char) nInUse;
|
||||
nInUse++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
index of the last char in the block, so
|
||||
the block size == last + 1.
|
||||
*/
|
||||
private int last;
|
||||
|
||||
/*
|
||||
index in zptr[] of original string after sorting.
|
||||
*/
|
||||
private int origPtr;
|
||||
|
||||
/*
|
||||
always: in the range 0 .. 9.
|
||||
The current block size is 100000 * this number.
|
||||
*/
|
||||
private int blockSize100k;
|
||||
|
||||
private boolean blockRandomised;
|
||||
|
||||
private int bsBuff;
|
||||
private int bsLive;
|
||||
private CRC mCrc = new CRC();
|
||||
|
||||
private boolean[] inUse = new boolean[256];
|
||||
private int nInUse;
|
||||
|
||||
private char[] seqToUnseq = new char[256];
|
||||
private char[] unseqToSeq = new char[256];
|
||||
|
||||
private char[] selector = new char[MAX_SELECTORS];
|
||||
private char[] selectorMtf = new char[MAX_SELECTORS];
|
||||
|
||||
private int[] tt;
|
||||
private char[] ll8;
|
||||
|
||||
/*
|
||||
freq table collected to save a pass over the data
|
||||
during decompression.
|
||||
*/
|
||||
private int[] unzftab = new int[256];
|
||||
|
||||
private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
|
||||
private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE];
|
||||
private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
|
||||
private int[] minLens = new int[N_GROUPS];
|
||||
|
||||
private InputStream bsStream;
|
||||
|
||||
private boolean streamEnd = false;
|
||||
|
||||
private int currentChar = -1;
|
||||
|
||||
private static final int START_BLOCK_STATE = 1;
|
||||
private static final int RAND_PART_A_STATE = 2;
|
||||
private static final int RAND_PART_B_STATE = 3;
|
||||
private static final int RAND_PART_C_STATE = 4;
|
||||
private static final int NO_RAND_PART_A_STATE = 5;
|
||||
private static final int NO_RAND_PART_B_STATE = 6;
|
||||
private static final int NO_RAND_PART_C_STATE = 7;
|
||||
|
||||
private int currentState = START_BLOCK_STATE;
|
||||
|
||||
private int storedBlockCRC, storedCombinedCRC;
|
||||
private int computedBlockCRC, computedCombinedCRC;
|
||||
|
||||
int i2, count, chPrev, ch2;
|
||||
int i, tPos;
|
||||
int rNToGo = 0;
|
||||
int rTPos = 0;
|
||||
int j2;
|
||||
char z;
|
||||
|
||||
public CBZip2InputStream(InputStream zStream) {
|
||||
ll8 = null;
|
||||
tt = null;
|
||||
bsSetStream(zStream);
|
||||
initialize();
|
||||
initBlock();
|
||||
setupBlock();
|
||||
}
|
||||
|
||||
public int read() {
|
||||
if (streamEnd) {
|
||||
return -1;
|
||||
} else {
|
||||
int retChar = currentChar;
|
||||
switch(currentState) {
|
||||
case START_BLOCK_STATE:
|
||||
break;
|
||||
case RAND_PART_A_STATE:
|
||||
break;
|
||||
case RAND_PART_B_STATE:
|
||||
setupRandPartB();
|
||||
break;
|
||||
case RAND_PART_C_STATE:
|
||||
setupRandPartC();
|
||||
break;
|
||||
case NO_RAND_PART_A_STATE:
|
||||
break;
|
||||
case NO_RAND_PART_B_STATE:
|
||||
setupNoRandPartB();
|
||||
break;
|
||||
case NO_RAND_PART_C_STATE:
|
||||
setupNoRandPartC();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return retChar;
|
||||
}
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
char magic3, magic4;
|
||||
magic3 = bsGetUChar();
|
||||
magic4 = bsGetUChar();
|
||||
if (magic3 != 'h' || magic4 < '1' || magic4 > '9') {
|
||||
bsFinishedWithStream();
|
||||
streamEnd = true;
|
||||
return;
|
||||
}
|
||||
|
||||
setDecompressStructureSizes(magic4 - '0');
|
||||
computedCombinedCRC = 0;
|
||||
}
|
||||
|
||||
private void initBlock() {
|
||||
char magic1, magic2, magic3, magic4;
|
||||
char magic5, magic6;
|
||||
magic1 = bsGetUChar();
|
||||
magic2 = bsGetUChar();
|
||||
magic3 = bsGetUChar();
|
||||
magic4 = bsGetUChar();
|
||||
magic5 = bsGetUChar();
|
||||
magic6 = bsGetUChar();
|
||||
if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45
|
||||
&& magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
|
||||
complete();
|
||||
return;
|
||||
}
|
||||
|
||||
if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59
|
||||
|| magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
|
||||
badBlockHeader();
|
||||
streamEnd = true;
|
||||
return;
|
||||
}
|
||||
|
||||
storedBlockCRC = bsGetInt32();
|
||||
|
||||
if (bsR(1) == 1) {
|
||||
blockRandomised = true;
|
||||
} else {
|
||||
blockRandomised = false;
|
||||
}
|
||||
|
||||
// currBlockNo++;
|
||||
getAndMoveToFrontDecode();
|
||||
|
||||
mCrc.initialiseCRC();
|
||||
currentState = START_BLOCK_STATE;
|
||||
}
|
||||
|
||||
private void endBlock() {
|
||||
computedBlockCRC = mCrc.getFinalCRC();
|
||||
/* A bad CRC is considered a fatal error. */
|
||||
if (storedBlockCRC != computedBlockCRC) {
|
||||
crcError();
|
||||
}
|
||||
|
||||
computedCombinedCRC = (computedCombinedCRC << 1)
|
||||
| (computedCombinedCRC >>> 31);
|
||||
computedCombinedCRC ^= computedBlockCRC;
|
||||
}
|
||||
|
||||
private void complete() {
|
||||
storedCombinedCRC = bsGetInt32();
|
||||
if (storedCombinedCRC != computedCombinedCRC) {
|
||||
crcError();
|
||||
}
|
||||
|
||||
bsFinishedWithStream();
|
||||
streamEnd = true;
|
||||
}
|
||||
|
||||
private static void blockOverrun() {
|
||||
cadvise();
|
||||
}
|
||||
|
||||
private static void badBlockHeader() {
|
||||
cadvise();
|
||||
}
|
||||
|
||||
private static void crcError() {
|
||||
cadvise();
|
||||
}
|
||||
|
||||
private void bsFinishedWithStream() {
|
||||
try {
|
||||
if (this.bsStream != null) {
|
||||
if (this.bsStream != System.in) {
|
||||
this.bsStream.close();
|
||||
this.bsStream = null;
|
||||
}
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
//ignore
|
||||
}
|
||||
}
|
||||
|
||||
private void bsSetStream(InputStream f) {
|
||||
bsStream = f;
|
||||
bsLive = 0;
|
||||
bsBuff = 0;
|
||||
}
|
||||
|
||||
private int bsR(int n) {
|
||||
int v;
|
||||
while (bsLive < n) {
|
||||
int zzi;
|
||||
char thech = 0;
|
||||
try {
|
||||
thech = (char) bsStream.read();
|
||||
} catch (IOException e) {
|
||||
compressedStreamEOF();
|
||||
}
|
||||
if (thech == -1) {
|
||||
compressedStreamEOF();
|
||||
}
|
||||
zzi = thech;
|
||||
bsBuff = (bsBuff << 8) | (zzi & 0xff);
|
||||
bsLive += 8;
|
||||
}
|
||||
|
||||
v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1);
|
||||
bsLive -= n;
|
||||
return v;
|
||||
}
|
||||
|
||||
private char bsGetUChar() {
|
||||
return (char) bsR(8);
|
||||
}
|
||||
|
||||
private int bsGetint() {
|
||||
int u = 0;
|
||||
u = (u << 8) | bsR(8);
|
||||
u = (u << 8) | bsR(8);
|
||||
u = (u << 8) | bsR(8);
|
||||
u = (u << 8) | bsR(8);
|
||||
return u;
|
||||
}
|
||||
|
||||
private int bsGetIntVS(int numBits) {
|
||||
return (int) bsR(numBits);
|
||||
}
|
||||
|
||||
private int bsGetInt32() {
|
||||
return (int) bsGetint();
|
||||
}
|
||||
|
||||
private void hbCreateDecodeTables(int[] limit, int[] base,
|
||||
int[] perm, char[] length,
|
||||
int minLen, int maxLen, int alphaSize) {
|
||||
int pp, i, j, vec;
|
||||
|
||||
pp = 0;
|
||||
for (i = minLen; i <= maxLen; i++) {
|
||||
for (j = 0; j < alphaSize; j++) {
|
||||
if (length[j] == i) {
|
||||
perm[pp] = j;
|
||||
pp++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_CODE_LEN; i++) {
|
||||
base[i] = 0;
|
||||
}
|
||||
for (i = 0; i < alphaSize; i++) {
|
||||
base[length[i] + 1]++;
|
||||
}
|
||||
|
||||
for (i = 1; i < MAX_CODE_LEN; i++) {
|
||||
base[i] += base[i - 1];
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_CODE_LEN; i++) {
|
||||
limit[i] = 0;
|
||||
}
|
||||
vec = 0;
|
||||
|
||||
for (i = minLen; i <= maxLen; i++) {
|
||||
vec += (base[i + 1] - base[i]);
|
||||
limit[i] = vec - 1;
|
||||
vec <<= 1;
|
||||
}
|
||||
for (i = minLen + 1; i <= maxLen; i++) {
|
||||
base[i] = ((limit[i - 1] + 1) << 1) - base[i];
|
||||
}
|
||||
}
|
||||
|
||||
private void recvDecodingTables() {
|
||||
char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
|
||||
int i, j, t, nGroups, nSelectors, alphaSize;
|
||||
int minLen, maxLen;
|
||||
boolean[] inUse16 = new boolean[16];
|
||||
|
||||
/* Receive the mapping table */
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (bsR(1) == 1) {
|
||||
inUse16[i] = true;
|
||||
} else {
|
||||
inUse16[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
inUse[i] = false;
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (inUse16[i]) {
|
||||
for (j = 0; j < 16; j++) {
|
||||
if (bsR(1) == 1) {
|
||||
inUse[i * 16 + j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
makeMaps();
|
||||
alphaSize = nInUse + 2;
|
||||
|
||||
/* Now the selectors */
|
||||
nGroups = bsR(3);
|
||||
nSelectors = bsR(15);
|
||||
for (i = 0; i < nSelectors; i++) {
|
||||
j = 0;
|
||||
while (bsR(1) == 1) {
|
||||
j++;
|
||||
}
|
||||
selectorMtf[i] = (char) j;
|
||||
}
|
||||
|
||||
/* Undo the MTF values for the selectors. */
|
||||
{
|
||||
char[] pos = new char[N_GROUPS];
|
||||
char tmp, v;
|
||||
for (v = 0; v < nGroups; v++) {
|
||||
pos[v] = v;
|
||||
}
|
||||
|
||||
for (i = 0; i < nSelectors; i++) {
|
||||
v = selectorMtf[i];
|
||||
tmp = pos[v];
|
||||
while (v > 0) {
|
||||
pos[v] = pos[v - 1];
|
||||
v--;
|
||||
}
|
||||
pos[0] = tmp;
|
||||
selector[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now the coding tables */
|
||||
for (t = 0; t < nGroups; t++) {
|
||||
int curr = bsR(5);
|
||||
for (i = 0; i < alphaSize; i++) {
|
||||
while (bsR(1) == 1) {
|
||||
if (bsR(1) == 0) {
|
||||
curr++;
|
||||
} else {
|
||||
curr--;
|
||||
}
|
||||
}
|
||||
len[t][i] = (char) curr;
|
||||
}
|
||||
}
|
||||
|
||||
/* Create the Huffman decoding tables */
|
||||
for (t = 0; t < nGroups; t++) {
|
||||
minLen = 32;
|
||||
maxLen = 0;
|
||||
for (i = 0; i < alphaSize; i++) {
|
||||
if (len[t][i] > maxLen) {
|
||||
maxLen = len[t][i];
|
||||
}
|
||||
if (len[t][i] < minLen) {
|
||||
minLen = len[t][i];
|
||||
}
|
||||
}
|
||||
hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
|
||||
maxLen, alphaSize);
|
||||
minLens[t] = minLen;
|
||||
}
|
||||
}
|
||||
|
||||
private void getAndMoveToFrontDecode() {
|
||||
char[] yy = new char[256];
|
||||
int i, j, nextSym, limitLast;
|
||||
int EOB, groupNo, groupPos;
|
||||
|
||||
limitLast = baseBlockSize * blockSize100k;
|
||||
origPtr = bsGetIntVS(24);
|
||||
|
||||
recvDecodingTables();
|
||||
EOB = nInUse + 1;
|
||||
groupNo = -1;
|
||||
groupPos = 0;
|
||||
|
||||
/*
|
||||
Setting up the unzftab entries here is not strictly
|
||||
necessary, but it does save having to do it later
|
||||
in a separate pass, and so saves a block's worth of
|
||||
cache misses.
|
||||
*/
|
||||
for (i = 0; i <= 255; i++) {
|
||||
unzftab[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i <= 255; i++) {
|
||||
yy[i] = (char) i;
|
||||
}
|
||||
|
||||
last = -1;
|
||||
|
||||
{
|
||||
int zt, zn, zvec, zj;
|
||||
if (groupPos == 0) {
|
||||
groupNo++;
|
||||
groupPos = G_SIZE;
|
||||
}
|
||||
groupPos--;
|
||||
zt = selector[groupNo];
|
||||
zn = minLens[zt];
|
||||
zvec = bsR(zn);
|
||||
while (zvec > limit[zt][zn]) {
|
||||
zn++;
|
||||
{
|
||||
{
|
||||
while (bsLive < 1) {
|
||||
int zzi;
|
||||
char thech = 0;
|
||||
try {
|
||||
thech = (char) bsStream.read();
|
||||
} catch (IOException e) {
|
||||
compressedStreamEOF();
|
||||
}
|
||||
if (thech == -1) {
|
||||
compressedStreamEOF();
|
||||
}
|
||||
zzi = thech;
|
||||
bsBuff = (bsBuff << 8) | (zzi & 0xff);
|
||||
bsLive += 8;
|
||||
}
|
||||
}
|
||||
zj = (bsBuff >> (bsLive - 1)) & 1;
|
||||
bsLive--;
|
||||
}
|
||||
zvec = (zvec << 1) | zj;
|
||||
}
|
||||
nextSym = perm[zt][zvec - base[zt][zn]];
|
||||
}
|
||||
|
||||
while (true) {
|
||||
|
||||
if (nextSym == EOB) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (nextSym == RUNA || nextSym == RUNB) {
|
||||
char ch;
|
||||
int s = -1;
|
||||
int N = 1;
|
||||
do {
|
||||
if (nextSym == RUNA) {
|
||||
s = s + (0 + 1) * N;
|
||||
} else if (nextSym == RUNB) {
|
||||
s = s + (1 + 1) * N;
|
||||
}
|
||||
N = N * 2;
|
||||
{
|
||||
int zt, zn, zvec, zj;
|
||||
if (groupPos == 0) {
|
||||
groupNo++;
|
||||
groupPos = G_SIZE;
|
||||
}
|
||||
groupPos--;
|
||||
zt = selector[groupNo];
|
||||
zn = minLens[zt];
|
||||
zvec = bsR(zn);
|
||||
while (zvec > limit[zt][zn]) {
|
||||
zn++;
|
||||
{
|
||||
{
|
||||
while (bsLive < 1) {
|
||||
int zzi;
|
||||
char thech = 0;
|
||||
try {
|
||||
thech = (char) bsStream.read();
|
||||
} catch (IOException e) {
|
||||
compressedStreamEOF();
|
||||
}
|
||||
if (thech == -1) {
|
||||
compressedStreamEOF();
|
||||
}
|
||||
zzi = thech;
|
||||
bsBuff = (bsBuff << 8) | (zzi & 0xff);
|
||||
bsLive += 8;
|
||||
}
|
||||
}
|
||||
zj = (bsBuff >> (bsLive - 1)) & 1;
|
||||
bsLive--;
|
||||
}
|
||||
zvec = (zvec << 1) | zj;
|
||||
}
|
||||
nextSym = perm[zt][zvec - base[zt][zn]];
|
||||
}
|
||||
} while (nextSym == RUNA || nextSym == RUNB);
|
||||
|
||||
s++;
|
||||
ch = seqToUnseq[yy[0]];
|
||||
unzftab[ch] += s;
|
||||
|
||||
while (s > 0) {
|
||||
last++;
|
||||
ll8[last] = ch;
|
||||
s--;
|
||||
}
|
||||
|
||||
if (last >= limitLast) {
|
||||
blockOverrun();
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
char tmp;
|
||||
last++;
|
||||
if (last >= limitLast) {
|
||||
blockOverrun();
|
||||
}
|
||||
|
||||
tmp = yy[nextSym - 1];
|
||||
unzftab[seqToUnseq[tmp]]++;
|
||||
ll8[last] = seqToUnseq[tmp];
|
||||
|
||||
/*
|
||||
This loop is hammered during decompression,
|
||||
hence the unrolling.
|
||||
|
||||
for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
|
||||
*/
|
||||
|
||||
j = nextSym - 1;
|
||||
for (; j > 3; j -= 4) {
|
||||
yy[j] = yy[j - 1];
|
||||
yy[j - 1] = yy[j - 2];
|
||||
yy[j - 2] = yy[j - 3];
|
||||
yy[j - 3] = yy[j - 4];
|
||||
}
|
||||
for (; j > 0; j--) {
|
||||
yy[j] = yy[j - 1];
|
||||
}
|
||||
|
||||
yy[0] = tmp;
|
||||
{
|
||||
int zt, zn, zvec, zj;
|
||||
if (groupPos == 0) {
|
||||
groupNo++;
|
||||
groupPos = G_SIZE;
|
||||
}
|
||||
groupPos--;
|
||||
zt = selector[groupNo];
|
||||
zn = minLens[zt];
|
||||
zvec = bsR(zn);
|
||||
while (zvec > limit[zt][zn]) {
|
||||
zn++;
|
||||
{
|
||||
{
|
||||
while (bsLive < 1) {
|
||||
int zzi;
|
||||
char thech = 0;
|
||||
try {
|
||||
thech = (char) bsStream.read();
|
||||
} catch (IOException e) {
|
||||
compressedStreamEOF();
|
||||
}
|
||||
zzi = thech;
|
||||
bsBuff = (bsBuff << 8) | (zzi & 0xff);
|
||||
bsLive += 8;
|
||||
}
|
||||
}
|
||||
zj = (bsBuff >> (bsLive - 1)) & 1;
|
||||
bsLive--;
|
||||
}
|
||||
zvec = (zvec << 1) | zj;
|
||||
}
|
||||
nextSym = perm[zt][zvec - base[zt][zn]];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void setupBlock() {
|
||||
int[] cftab = new int[257];
|
||||
char ch;
|
||||
|
||||
cftab[0] = 0;
|
||||
for (i = 1; i <= 256; i++) {
|
||||
cftab[i] = unzftab[i - 1];
|
||||
}
|
||||
for (i = 1; i <= 256; i++) {
|
||||
cftab[i] += cftab[i - 1];
|
||||
}
|
||||
|
||||
for (i = 0; i <= last; i++) {
|
||||
ch = (char) ll8[i];
|
||||
tt[cftab[ch]] = i;
|
||||
cftab[ch]++;
|
||||
}
|
||||
cftab = null;
|
||||
|
||||
tPos = tt[origPtr];
|
||||
|
||||
count = 0;
|
||||
i2 = 0;
|
||||
ch2 = 256; /* not a char and not EOF */
|
||||
|
||||
if (blockRandomised) {
|
||||
rNToGo = 0;
|
||||
rTPos = 0;
|
||||
setupRandPartA();
|
||||
} else {
|
||||
setupNoRandPartA();
|
||||
}
|
||||
}
|
||||
|
||||
private void setupRandPartA() {
|
||||
if (i2 <= last) {
|
||||
chPrev = ch2;
|
||||
ch2 = ll8[tPos];
|
||||
tPos = tt[tPos];
|
||||
if (rNToGo == 0) {
|
||||
rNToGo = rNums[rTPos];
|
||||
rTPos++;
|
||||
if (rTPos == 512) {
|
||||
rTPos = 0;
|
||||
}
|
||||
}
|
||||
rNToGo--;
|
||||
ch2 ^= (int) ((rNToGo == 1) ? 1 : 0);
|
||||
i2++;
|
||||
|
||||
currentChar = ch2;
|
||||
currentState = RAND_PART_B_STATE;
|
||||
mCrc.updateCRC(ch2);
|
||||
} else {
|
||||
endBlock();
|
||||
initBlock();
|
||||
setupBlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void setupNoRandPartA() {
|
||||
if (i2 <= last) {
|
||||
chPrev = ch2;
|
||||
ch2 = ll8[tPos];
|
||||
tPos = tt[tPos];
|
||||
i2++;
|
||||
|
||||
currentChar = ch2;
|
||||
currentState = NO_RAND_PART_B_STATE;
|
||||
mCrc.updateCRC(ch2);
|
||||
} else {
|
||||
endBlock();
|
||||
initBlock();
|
||||
setupBlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void setupRandPartB() {
|
||||
if (ch2 != chPrev) {
|
||||
currentState = RAND_PART_A_STATE;
|
||||
count = 1;
|
||||
setupRandPartA();
|
||||
} else {
|
||||
count++;
|
||||
if (count >= 4) {
|
||||
z = ll8[tPos];
|
||||
tPos = tt[tPos];
|
||||
if (rNToGo == 0) {
|
||||
rNToGo = rNums[rTPos];
|
||||
rTPos++;
|
||||
if (rTPos == 512) {
|
||||
rTPos = 0;
|
||||
}
|
||||
}
|
||||
rNToGo--;
|
||||
z ^= ((rNToGo == 1) ? 1 : 0);
|
||||
j2 = 0;
|
||||
currentState = RAND_PART_C_STATE;
|
||||
setupRandPartC();
|
||||
} else {
|
||||
currentState = RAND_PART_A_STATE;
|
||||
setupRandPartA();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void setupRandPartC() {
|
||||
if (j2 < (int) z) {
|
||||
currentChar = ch2;
|
||||
mCrc.updateCRC(ch2);
|
||||
j2++;
|
||||
} else {
|
||||
currentState = RAND_PART_A_STATE;
|
||||
i2++;
|
||||
count = 0;
|
||||
setupRandPartA();
|
||||
}
|
||||
}
|
||||
|
||||
private void setupNoRandPartB() {
|
||||
if (ch2 != chPrev) {
|
||||
currentState = NO_RAND_PART_A_STATE;
|
||||
count = 1;
|
||||
setupNoRandPartA();
|
||||
} else {
|
||||
count++;
|
||||
if (count >= 4) {
|
||||
z = ll8[tPos];
|
||||
tPos = tt[tPos];
|
||||
currentState = NO_RAND_PART_C_STATE;
|
||||
j2 = 0;
|
||||
setupNoRandPartC();
|
||||
} else {
|
||||
currentState = NO_RAND_PART_A_STATE;
|
||||
setupNoRandPartA();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void setupNoRandPartC() {
|
||||
if (j2 < (int) z) {
|
||||
currentChar = ch2;
|
||||
mCrc.updateCRC(ch2);
|
||||
j2++;
|
||||
} else {
|
||||
currentState = NO_RAND_PART_A_STATE;
|
||||
i2++;
|
||||
count = 0;
|
||||
setupNoRandPartA();
|
||||
}
|
||||
}
|
||||
|
||||
private void setDecompressStructureSizes(int newSize100k) {
|
||||
if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k
|
||||
&& blockSize100k <= 9)) {
|
||||
// throw new IOException("Invalid block size");
|
||||
}
|
||||
|
||||
blockSize100k = newSize100k;
|
||||
|
||||
if (newSize100k == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int n = baseBlockSize * newSize100k;
|
||||
ll8 = new char[n];
|
||||
tt = new int[n];
|
||||
}
|
||||
}
|
||||
|
1665
src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java
Normal file
1665
src/main/java/org/apache/tools/bzip2/CBZip2OutputStream.java
Normal file
File diff suppressed because it is too large
Load Diff
167
src/main/java/org/apache/tools/bzip2/CRC.java
Normal file
167
src/main/java/org/apache/tools/bzip2/CRC.java
Normal file
@ -0,0 +1,167 @@
|
||||
/*
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001-2002 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution, if
|
||||
* any, must include the following acknowlegement:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowlegement may appear in the software itself,
|
||||
* if and wherever such third-party acknowlegements normally appear.
|
||||
*
|
||||
* 4. The names "Ant" and "Apache Software
|
||||
* Foundation" must not be used to endorse or promote products derived
|
||||
* from this software without prior written permission. For written
|
||||
* permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache"
|
||||
* nor may "Apache" appear in their names without prior written
|
||||
* permission of the Apache Group.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This package is based on the work done by Keiron Liddle, Aftex Software
|
||||
* <keiron@aftexsw.com> to whom the Ant project is very grateful for his
|
||||
* great code.
|
||||
*/
|
||||
|
||||
package org.apache.tools.bzip2;
|
||||
|
||||
/**
|
||||
* A simple class the hold and calculate the CRC for sanity checking
|
||||
* of the data.
|
||||
*
|
||||
* @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
|
||||
*/
|
||||
class CRC {
|
||||
public static int crc32Table[] = {
|
||||
0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
|
||||
0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
|
||||
0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
|
||||
0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
|
||||
0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
|
||||
0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
|
||||
0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
|
||||
0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
|
||||
0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
|
||||
0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
|
||||
0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81,
|
||||
0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
|
||||
0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49,
|
||||
0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
|
||||
0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
|
||||
0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
|
||||
0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
|
||||
0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
|
||||
0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
|
||||
0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
|
||||
0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
|
||||
0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
|
||||
0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066,
|
||||
0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
|
||||
0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
|
||||
0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
|
||||
0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
|
||||
0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
|
||||
0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
|
||||
0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
|
||||
0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686,
|
||||
0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
|
||||
0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
|
||||
0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
|
||||
0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
|
||||
0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
|
||||
0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
|
||||
0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
|
||||
0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
|
||||
0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
|
||||
0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7,
|
||||
0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
|
||||
0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f,
|
||||
0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
|
||||
0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
|
||||
0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
|
||||
0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
|
||||
0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
|
||||
0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
|
||||
0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
|
||||
0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
|
||||
0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
|
||||
0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30,
|
||||
0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
|
||||
0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
|
||||
0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
|
||||
0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
|
||||
0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
|
||||
0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
|
||||
0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
|
||||
0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0,
|
||||
0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
|
||||
0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
|
||||
0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
|
||||
};
|
||||
|
||||
public CRC() {
|
||||
initialiseCRC();
|
||||
}
|
||||
|
||||
void initialiseCRC() {
|
||||
globalCrc = 0xffffffff;
|
||||
}
|
||||
|
||||
int getFinalCRC() {
|
||||
return ~globalCrc;
|
||||
}
|
||||
|
||||
int getGlobalCRC() {
|
||||
return globalCrc;
|
||||
}
|
||||
|
||||
void setGlobalCRC(int newCrc) {
|
||||
globalCrc = newCrc;
|
||||
}
|
||||
|
||||
void updateCRC(int inCh) {
|
||||
int temp = (globalCrc >> 24) ^ inCh;
|
||||
if (temp < 0) {
|
||||
temp = 256 + temp;
|
||||
}
|
||||
globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp];
|
||||
}
|
||||
|
||||
int globalCrc;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user